[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merge (only) xen-ia64-unstable with latest xen-unstable
# HG changeset patch # User djm@xxxxxxxxxxxxxxx # Node ID b2f4823b6ff09017e21c0d44f73c58118f50ea46 # Parent 10b1d30d3f66beac5a8275d108461da558c38d1d # Parent ec4a3f2d060e1dcc137cca136309ca67d46fbb85 Merge (only) xen-ia64-unstable with latest xen-unstable diff -r 10b1d30d3f66 -r b2f4823b6ff0 .hgignore --- a/.hgignore Thu Sep 8 15:18:40 2005 +++ b/.hgignore Fri Sep 9 16:30:54 2005 @@ -82,6 +82,7 @@ ^tools/blktap/parallax/vdi_validate$ ^tools/blktap/parallax/parallax$ ^tools/blktap/parallax/blockstored$ +^tools/blktap/ublkback/ublkback$ ^tools/blktap/xen/.*$ ^tools/check/\..*$ ^tools/cmdline/.*$ @@ -141,24 +142,28 @@ ^tools/vnet/vnet-module/\.tmp_versions/.*$ ^tools/vnet/vnet-module/vnet_module\.mod\..*$ ^tools/vnetd/vnetd$ +^tools/vtpm/vtpm* +^tools/vtpm/tpm_emulator-* +^tools/vtpm_manager/manager/vtpm_managerd ^tools/web-shutdown\.tap$ ^tools/x2d2/minixend$ -^tools/xcs/xcs$ -^tools/xcs/xcsdump$ ^tools/xcutils/xc_restore$ ^tools/xcutils/xc_save$ ^tools/xenstat/xentop/xentop$ ^tools/xenstore/testsuite/tmp/.*$ ^tools/xenstore/xen$ +^tools/xenstore/xenbus_dev.h$ ^tools/xenstore/xenstored$ ^tools/xenstore/xenstored_test$ +^tools/xenstore/xenstore-read$ +^tools/xenstore/xenstore-rm$ +^tools/xenstore/xenstore-write$ ^tools/xenstore/xs_dom0_test$ ^tools/xenstore/xs_random$ ^tools/xenstore/xs_stress$ ^tools/xenstore/xs_test$ ^tools/xenstore/xs_watch_stress$ ^tools/xentrace/xentrace$ -^tools/xfrd/xfrd$ ^xen/BLOG$ ^xen/TAGS$ ^xen/arch/x86/asm-offsets\.s$ diff -r 10b1d30d3f66 -r b2f4823b6ff0 Config.mk --- a/Config.mk Thu Sep 8 15:18:40 2005 +++ b/Config.mk Fri Sep 9 16:30:54 2005 @@ -48,3 +48,4 @@ # Optional components XENSTAT_XENTOP ?= y +VTPM_TOOLS ?= n diff -r 10b1d30d3f66 -r b2f4823b6ff0 Makefile --- a/Makefile Thu Sep 8 15:18:40 2005 +++ b/Makefile Fri Sep 9 16:30:54 2005 @@ -35,11 +35,11 @@ export pae=y endif -.PHONY: all dist install xen tools kernels docs world clean mkpatches mrproper +.PHONY: all dist install xen kernels tools docs world clean mkpatches mrproper .PHONY: kbuild kdelete kclean # build and install everything into the standard system directories -install: install-xen install-tools install-kernels install-docs +install: install-xen install-kernels install-tools install-docs build: kernels $(MAKE) -C xen build @@ -47,7 +47,7 @@ $(MAKE) -C docs build # build and install everything into local dist directory -dist: xen tools kernels docs +dist: xen kernels tools docs $(INSTALL_DIR) $(DISTDIR)/check $(INSTALL_DATA) ./COPYING $(DISTDIR) $(INSTALL_DATA) ./README $(DISTDIR) @@ -178,6 +178,8 @@ rm -rf $(D)/usr/share/man/man1/xen* rm -rf $(D)/usr/share/man/man8/xen* rm -rf $(D)/usr/lib/xen + rm -rf $(D)/etc/hotplug.d/xen-backend + rm -rf $(D)/etc/hotplug/xen-backend.agent # Legacy targets for compatibility linux24: diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/Makefile --- a/extras/mini-os/Makefile Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/Makefile Fri Sep 9 16:30:54 2005 @@ -31,16 +31,18 @@ OBJS := $(TARGET_ARCH).o OBJS += $(patsubst %.c,%.o,$(wildcard *.c)) OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c)) - +OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c)) + HDRS := $(wildcard include/*.h) HDRS += $(wildcard include/xen/*.h) default: $(TARGET) -xen-public: +links: [ -e include/xen ] || ln -sf ../../../xen/include/public include/xen - -$(TARGET): xen-public $(OBJS) + [ -e xenbus/xenstored.h ] || ln -sf ../../../tools/xenstore/xenstored.h xenbus/xenstored.h + +$(TARGET): links $(OBJS) $(LD) -N -T minios-$(TARGET_ARCH).lds $(OBJS) -o $@.elf gzip -f -9 -c $@.elf >$@.gz @@ -55,3 +57,4 @@ %.o: %.S $(HDRS) Makefile $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@ + diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/README --- a/extras/mini-os/README Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/README Fri Sep 9 16:30:54 2005 @@ -23,13 +23,8 @@ - to build it just type make. -- copy image.final somewhere where dom0 can access it +- to start it do the following in domain0 (assuming xend is running) + # xm create domain_config -- in dom0 - # xi_create 16000 test - <domid> - # xi_build <domid> image.final 0 - # xi_start <domid> - -this prints out a bunch of stuff and then every 1000 timer interrupts the -system time. +this starts the kernel and prints out a bunch of stuff and then every +1000 timer interrupts the system time. diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/events.c --- a/extras/mini-os/events.c Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/events.c Fri Sep 9 16:30:54 2005 @@ -17,13 +17,13 @@ */ #include <os.h> +#include <mm.h> #include <hypervisor.h> #include <events.h> #include <lib.h> -#include <xen/event_channel.h> static ev_action_t ev_actions[NR_EVS]; -void default_handler(u32 port, struct pt_regs *regs); +void default_handler(int port, struct pt_regs *regs); /* @@ -32,7 +32,6 @@ int do_event(u32 port, struct pt_regs *regs) { ev_action_t *action; - if (port >= NR_EVS) { printk("Port number too large: %d\n", port); return 0; @@ -57,11 +56,23 @@ } +void bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) ) +{ + if(ev_actions[port].handler) + printk("WARN: Handler for port %d already registered, replacing\n", + port); + + ev_actions[port].handler = handler; + ev_actions[port].status &= ~EVS_DISABLED; + + /* Finally unmask the port */ + unmask_evtchn(port); +} + int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) ) { evtchn_op_t op; int ret = 0; - u32 port; /* Try to bind the virq to a port */ op.cmd = EVTCHNOP_bind_virq; @@ -73,21 +84,12 @@ printk("Failed to bind virtual IRQ %d\n", virq); goto out; } - - port = op.u.bind_virq.port; - - if(ev_actions[port].handler) - printk("WARN: Handler for port %d already registered, replacing\n", - port); - - ev_actions[port].handler = handler; - ev_actions[port].status &= ~EVS_DISABLED; - - /* Finally unmask the port */ - unmask_evtchn(port); + bind_evtchn(op.u.bind_virq.port, handler); out: return ret; } + + /* * Initially all events are without a handler and disabled @@ -100,10 +102,10 @@ for ( i = 0; i < NR_EVS; i++ ) { ev_actions[i].status = EVS_DISABLED; - ev_actions[i].handler = NULL; + ev_actions[i].handler = default_handler; } } -void default_handler(u32 port, struct pt_regs *regs) { +void default_handler(int port, struct pt_regs *regs) { printk("[Port %d] - event received\n", port); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/events.h --- a/extras/mini-os/include/events.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/events.h Fri Sep 9 16:30:54 2005 @@ -20,6 +20,7 @@ #define _EVENTS_H_ #include<traps.h> +#include <xen/event_channel.h> #define NR_EVS 1024 @@ -39,6 +40,16 @@ /* prototypes */ int do_event(u32 port, struct pt_regs *regs); int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) ); +void bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) ); void init_events(void); +static inline int notify_via_evtchn(int port) +{ + evtchn_op_t op; + op.cmd = EVTCHNOP_send; + op.u.send.local_port = port; + return HYPERVISOR_event_channel_op(&op); +} + + #endif /* _EVENTS_H_ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/hypervisor.h Fri Sep 9 16:30:54 2005 @@ -13,7 +13,6 @@ #define _HYPERVISOR_H_ #include <types.h> - #include <xen/xen.h> #include <xen/io/domain_controller.h> @@ -40,48 +39,26 @@ /* * Assembler stubs for hyper-calls. */ - -#ifdef __i386__ -#define _a1 "b" -#define _a2 "c" -#define _a3 "d" -#define _a4 "S" -#else -#define _a1 "D" -#define _a2 "S" -#define _a3 "d" -#define _a4 "b" -#endif - -static __inline__ int HYPERVISOR_event_channel_op( - void *op) +#if defined(__i386__) +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) { int ret; unsigned long ignore; + __asm__ __volatile__ ( TRAP_INSTR : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_event_channel_op), "1" (op) - : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_set_trap_table(trap_info_t *table) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table), - _a1 (table) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_mmu_update(mmu_update_t *req, - int count, - int *success_count, - domid_t domid) + : "0" (__HYPERVISOR_set_trap_table), "1" (table) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) { int ret; unsigned long ign1, ign2, ign3, ign4; @@ -89,18 +66,16 @@ __asm__ __volatile__ ( TRAP_INSTR : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; -} - - -static __inline__ int HYPERVISOR_mmuext_op(struct mmuext_op *op, - int count, - int *success_count, - domid_t domid) + : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), + "3" (success_count), "4" (domid) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) { int ret; unsigned long ign1, ign2, ign3, ign4; @@ -108,70 +83,65 @@ __asm__ __volatile__ ( TRAP_INSTR : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; -} - - - -static __inline__ int HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), - _a1 (frame_list), _a2 (entries) : "memory" ); - - - return ret; -} - -static __inline__ int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_stack_switch), - _a1 (ss), _a2 (esp) : "memory" ); - - return ret; -} - -#ifdef __i386__ -static __inline__ int HYPERVISOR_set_callbacks( + : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count), + "3" (success_count), "4" (domid) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + int ret; + unsigned long ign1, ign2; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries) + : "memory" ); + + + return ret; +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + int ret; + unsigned long ign1, ign2; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_set_callbacks( unsigned long event_selector, unsigned long event_address, unsigned long failsafe_selector, unsigned long failsafe_address) { int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks), - _a1 (event_selector), _a2 (event_address), - _a3 (failsafe_selector), _a4 (failsafe_address) : "memory" ); - - return ret; -} -#else -static __inline__ int HYPERVISOR_set_callbacks( - unsigned long event_address, - unsigned long failsafe_address, - unsigned long syscall_address) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_callbacks), - _a1 (event_address), _a2 (failsafe_address), - _a3 (syscall_address) : "memory" ); - - return ret; -} -#endif - -static __inline__ int + unsigned long ign1, ign2, ign3, ign4; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) + : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector), + "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address) + : "memory" ); + + return ret; +} + +static inline int HYPERVISOR_fpu_taskswitch( int set) { @@ -187,67 +157,106 @@ return ret; } -static __inline__ int HYPERVISOR_yield(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - _a1 (SCHEDOP_yield) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_block(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - _a1 (SCHEDOP_block) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_shutdown(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - _a1 (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_reboot(void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - _a1 (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) - : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_suspend(unsigned long srec) -{ - int ret; +static inline int +HYPERVISOR_yield( + void) +{ + int ret; + unsigned long ign; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign) + : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield) + : "memory", "ecx" ); + + return ret; +} + +static inline int +HYPERVISOR_block( + void) +{ + int ret; + unsigned long ign1; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block) + : "memory", "ecx" ); + + return ret; +} + +static inline int +HYPERVISOR_shutdown( + void) +{ + int ret; + unsigned long ign1; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) + : "memory", "ecx" ); + + return ret; +} + +static inline int +HYPERVISOR_reboot( + void) +{ + int ret; + unsigned long ign1; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) + : "memory", "ecx" ); + + return ret; +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + int ret; + unsigned long ign1, ign2; + /* NB. On suspend, control software expects a suspend record in %esi. */ __asm__ __volatile__ ( TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), - _a1 (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), - "S" (srec) : "memory" ); - - return ret; -} - -#ifdef __i386__ -static __inline__ long HYPERVISOR_set_timer_op( u64 timeout ) + : "=a" (ret), "=b" (ign1), "=S" (ign2) + : "0" (__HYPERVISOR_sched_op), + "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), + "S" (srec) : "memory", "ecx"); + + return ret; +} + +static inline int +HYPERVISOR_crash( + void) +{ + int ret; + unsigned long ign1; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift)) + : "memory", "ecx" ); + + return ret; +} + +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) { int ret; unsigned long timeout_hi = (unsigned long)(timeout>>32); @@ -262,8 +271,516 @@ return ret; } + +#if 0 +static inline int +HYPERVISOR_dom0_op( + dom0_op_t *dom0_op) +{ + int ret; + unsigned long ign1; + + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op) + : "memory"); + + return ret; +} +#endif + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + int ret; + unsigned long ign1, ign2; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value) + : "memory" ); + + return ret; +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + unsigned long ret; + unsigned long ign; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign) + : "0" (__HYPERVISOR_get_debugreg), "1" (reg) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_update_descriptor( + u64 ma, u64 desc) +{ + int ret; + unsigned long ign1, ign2, ign3, ign4; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) + : "0" (__HYPERVISOR_update_descriptor), + "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)), + "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32)) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_dom_mem_op( + unsigned int op, unsigned long *extent_list, + unsigned long nr_extents, unsigned int extent_order) +{ + int ret; + unsigned long ign1, ign2, ign3, ign4, ign5; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4), + "=D" (ign5) + : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list), + "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + int ret; + unsigned long ign1, ign2; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + int ret; + unsigned long ign1, ign2, ign3, ign4; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) + : "0" (__HYPERVISOR_update_va_mapping), + "1" (va), "2" ((new_val).pte_low), +#ifdef CONFIG_X86_PAE + "3" ((new_val).pte_high), #else -static __inline__ long HYPERVISOR_set_timer_op( u64 timeout ) + "3" (0), +#endif + "4" (flags) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_event_channel_op( + void *op) +{ + int ret; + unsigned long ignore; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ignore) + : "0" (__HYPERVISOR_event_channel_op), "1" (op) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_xen_version( + int cmd) +{ + int ret; + unsigned long ignore; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ignore) + : "0" (__HYPERVISOR_xen_version), "1" (cmd) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + int ret; + unsigned long ign1, ign2, ign3; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) + : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_physdev_op( + void *physdev_op) +{ + int ret; + unsigned long ign; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign) + : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + int ret; + unsigned long ign1, ign2, ign3; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) + : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + int ret; + unsigned long ign1, ign2, ign3, ign4, ign5; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), + "=S" (ign4), "=D" (ign5) + : "0" (__HYPERVISOR_update_va_mapping_otherdomain), + "1" (va), "2" ((new_val).pte_low), +#ifdef CONFIG_X86_PAE + "3" ((new_val).pte_high), +#else + "3" (0), +#endif + "4" (flags), "5" (domid) : + "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + int ret; + unsigned long ign1, ign2; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type) + : "memory" ); + + return ret; +} + +static inline int +HYPERVISOR_boot_vcpu( + unsigned long vcpu, vcpu_guest_context_t *ctxt) +{ + int ret; + unsigned long ign1, ign2; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) + : "memory"); + + return ret; +} + +static inline int +HYPERVISOR_vcpu_down( + int vcpu) +{ + int ret; + unsigned long ign1; + /* Yes, I really do want to clobber edx here: when we resume a + vcpu after unpickling a multi-processor domain, it returns + here, but clobbers all of the call clobbered registers. */ + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift)) + : "memory", "ecx", "edx" ); + + return ret; +} + +static inline int +HYPERVISOR_vcpu_up( + int vcpu) +{ + int ret; + unsigned long ign1; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift)) + : "memory", "ecx" ); + + return ret; +} + +static inline int +HYPERVISOR_vcpu_pickle( + int vcpu, vcpu_guest_context_t *ctxt) +{ + int ret; + unsigned long ign1, ign2; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=c" (ign2) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), + "2" (ctxt) + : "memory" ); + + return ret; +} +#elif defined(__x86_64__) + +#define __syscall_clobber "r11","rcx","memory" + +/* + * Assembler stubs for hyper-calls. + */ +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + int ret; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table) + : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + int ret; + + __asm__ __volatile__ ( + "movq %5, %%r10;" TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S" ((long)count), + "d" (success_count), "g" ((unsigned long)domid) + : __syscall_clobber, "r10" ); + + return ret; +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + int ret; + + __asm__ __volatile__ ( + "movq %5, %%r10;" TRAP_INSTR + : "=a" (ret) + : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count), + "d" (success_count), "g" ((unsigned long)domid) + : __syscall_clobber, "r10" ); + + return ret; +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + int ret; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S" ((long)entries) + : __syscall_clobber ); + + + return ret; +} +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + int ret; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp) + : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_address, unsigned long failsafe_address, + unsigned long syscall_address) +{ + int ret; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address), + "S" (failsafe_address), "d" (syscall_address) + : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch), + "D" ((unsigned long) set) : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_yield( + void) +{ + int ret; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_yield) + : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_block( + void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_block) + : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_shutdown( + void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_sched_op), + "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))) + : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_reboot( + void) +{ + int ret; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_sched_op), + "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))) + : __syscall_clobber ); + + return ret; +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + int ret; + + /* NB. On suspend, control software expects a suspend record in %esi. */ + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) + : "0" ((unsigned long)__HYPERVISOR_sched_op), + "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift))), + "S" (srec) + : __syscall_clobber ); + + return ret; +} + +/* + * We can have the timeout value in a single argument for the hypercall, but + * that will break the common code. + */ +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) { int ret; @@ -329,7 +846,7 @@ int ret; __asm__ __volatile__ ( TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_dom_mem_op), + : "=a" (ret) : "0" (__HYPERVISOR_memory_op), _a1 (dom_mem_op) : "memory" ); return ret; diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/lib.h --- a/extras/mini-os/include/lib.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/lib.h Fri Sep 9 16:30:54 2005 @@ -60,10 +60,22 @@ /* printing */ #define printk printf #define kprintf printf -int printf(const char *fmt, ...); -int vprintf(const char *fmt, va_list ap); -int sprintf(char *buf, const char *cfmt, ...); -int vsprintf(char *buf, const char *cfmt, va_list ap); +#define _p(_x) ((void *)(unsigned long)(_x)) +void printf(const char *fmt, ...); +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); +int snprintf(char * buf, size_t size, const char *fmt, ...); +int scnprintf(char * buf, size_t size, const char *fmt, ...); +int vsprintf(char *buf, const char *fmt, va_list args); +int sprintf(char * buf, const char *fmt, ...); +int vsscanf(const char * buf, const char * fmt, va_list args); +int sscanf(const char * buf, const char * fmt, ...); + +long simple_strtol(const char *cp,char **endp,unsigned int base); +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base); +long long simple_strtoll(const char *cp,char **endp,unsigned int base); +unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base); + /* string and memory manipulation */ int memcmp(const void *cs, const void *ct, size_t count); @@ -77,6 +89,16 @@ size_t strlen(const char *s); char *strchr(const char *s, int c); char *strstr(const char *s1, const char *s2); +char * strcat(char * dest, const char * src); + + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +struct kvec { + void *iov_base; + size_t iov_len; +}; + #endif /* _LIB_H_ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/mm.h --- a/extras/mini-os/include/mm.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/mm.h Fri Sep 9 16:30:54 2005 @@ -24,6 +24,15 @@ #ifndef _MM_H_ #define _MM_H_ + +#ifdef __i386__ +#include <xen/arch-x86_32.h> +#endif + +#ifdef __x86_64__ +#include <xen/arch-x86_64.h> +#endif + #ifdef __x86_64__ @@ -56,6 +65,8 @@ #define L1_PAGETABLE_ENTRIES 1024 #define L2_PAGETABLE_ENTRIES 1024 + +#elif defined(__x86_64__) #endif /* Given a virtual address, get an entry offset into a page table. */ @@ -97,13 +108,15 @@ extern unsigned long *phys_to_machine_mapping; #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) -#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) static __inline__ unsigned long phys_to_machine(unsigned long phys) { unsigned long machine = pfn_to_mfn(phys >> L1_PAGETABLE_SHIFT); machine = (machine << L1_PAGETABLE_SHIFT) | (phys & ~PAGE_MASK); return machine; } + + +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) static __inline__ unsigned long machine_to_phys(unsigned long machine) { unsigned long phys = mfn_to_pfn(machine >> L1_PAGETABLE_SHIFT); @@ -119,16 +132,15 @@ #define to_phys(x) ((unsigned long)(x)-VIRT_START) #define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) -#define __va to_virt -#define __pa to_phys #define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) +#define mfn_to_virt(_mfn) (mach_to_virt(_mfn << PAGE_SHIFT)) void init_mm(void); unsigned long alloc_pages(int order); #define alloc_page() alloc_pages(0); void free_pages(void *pointer, int order); -//int is_mfn_mapped(unsigned long mfn); static __inline__ int get_order(unsigned long size) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/os.h --- a/extras/mini-os/include/os.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/os.h Fri Sep 9 16:30:54 2005 @@ -15,15 +15,16 @@ #define unlikely(x) __builtin_expect((x),0) #define smp_processor_id() 0 -#define preempt_disable() ((void)0) -#define preempt_enable() ((void)0) - -#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0)) + #ifndef __ASSEMBLY__ #include <types.h> +#include <hypervisor.h> #endif #include <xen/xen.h> + + +#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0)) #define __KERNEL_CS FLAT_KERNEL_CS #define __KERNEL_DS FLAT_KERNEL_DS @@ -54,8 +55,6 @@ /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ -#define pt_regs xen_regs - void trap_init(void); /* @@ -69,10 +68,8 @@ #define __cli() \ do { \ vcpu_info_t *_vcpu; \ - preempt_disable(); \ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ _vcpu->evtchn_upcall_mask = 1; \ - preempt_enable_no_resched(); \ barrier(); \ } while (0) @@ -80,13 +77,11 @@ do { \ vcpu_info_t *_vcpu; \ barrier(); \ - preempt_disable(); \ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ _vcpu->evtchn_upcall_mask = 0; \ barrier(); /* unmask then check (avoid races) */ \ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ force_evtchn_callback(); \ - preempt_enable(); \ } while (0) #define __save_flags(x) \ @@ -100,15 +95,12 @@ do { \ vcpu_info_t *_vcpu; \ barrier(); \ - preempt_disable(); \ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ barrier(); /* unmask then check (avoid races) */ \ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ force_evtchn_callback(); \ - preempt_enable(); \ - } else \ - preempt_enable_no_resched(); \ + }\ } while (0) #define safe_halt() ((void)0) @@ -116,11 +108,9 @@ #define __save_and_cli(x) \ do { \ vcpu_info_t *_vcpu; \ - preempt_disable(); \ _vcpu = &HYPERVISOR_shared_info->vcpu_data[smp_processor_id()]; \ (x) = _vcpu->evtchn_upcall_mask; \ _vcpu->evtchn_upcall_mask = 1; \ - preempt_enable_no_resched(); \ barrier(); \ } while (0) @@ -135,6 +125,15 @@ /* This is a barrier for the compiler only, NOT the processor! */ #define barrier() __asm__ __volatile__("": : :"memory") + +#if defined(__i386__) +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#elif defined(__x86_64__) +#define mb() __asm__ __volatile__ ("mfence":::"memory") +#define rmb() __asm__ __volatile__ ("lfence":::"memory") +#endif + #define LOCK_PREFIX "" #define LOCK "" @@ -147,69 +146,71 @@ typedef struct { volatile int counter; } atomic_t; -#define xchg(ptr,v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +/************************** i386 *******************************/ +#if defined (__i386__) + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((volatile struct __xchg_dummy *)(x)) -static __inline__ unsigned long __xchg(unsigned long x, volatile void * ptr, - int size) -{ - switch (size) { - case 1: - __asm__ __volatile__("xchgb %b0,%1" - :"=q" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 2: - __asm__ __volatile__("xchgw %w0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - case 4: - __asm__ __volatile__("xchgl %0,%1" - :"=r" (x) - :"m" (*__xg(ptr)), "0" (x) - :"memory"); - break; - } - return x; +#define __xg(x) ((struct __xchg_dummy *)(x)) +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; } /** * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to set + * @nr: Bit to clear * @addr: Address to count from * - * This operation is atomic and cannot be reordered. + * This operation is atomic and cannot be reordered. + * It can be reorderdered on other architectures other than x86. * It also implies a memory barrier. */ -static __inline__ int test_and_clear_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( LOCK_PREFIX - "btrl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit),"=m" (ADDR) - :"Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int constant_test_bit(int nr, const volatile void * addr) -{ - return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static __inline__ int variable_test_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__( - "btl %2,%1\n\tsbbl %0,%0" - :"=r" (oldbit) - :"m" (ADDR),"Ir" (nr)); - return oldbit; +static inline int test_and_clear_bit(int nr, volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static inline int constant_test_bit(int nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0; +} + +static inline int variable_test_bit(int nr, const volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; } #define test_bit(nr,addr) \ @@ -217,6 +218,152 @@ constant_test_bit((nr),(addr)) : \ variable_test_bit((nr),(addr))) +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writting portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( LOCK + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static inline void clear_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( LOCK + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ +#define ADDR (*(volatile long *) addr) + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + + + +#elif defined(__x86_64__)/* ifdef __i386__ */ +/************************** x86_84 *******************************/ + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +#define __xg(x) ((volatile long *)(x)) +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %k0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 8: + __asm__ __volatile__("xchgq %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"dIr" (nr) : "memory"); + return oldbit; +} + +static __inline__ int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int variable_test_bit(int nr, volatile const void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"dIr" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + /** * set_bit - Atomically set a bit in memory @@ -230,10 +377,10 @@ */ static __inline__ void set_bit(int nr, volatile void * addr) { - __asm__ __volatile__( LOCK_PREFIX - "btsl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"dIr" (nr) : "memory"); } /** @@ -248,40 +395,43 @@ */ static __inline__ void clear_bit(int nr, volatile void * addr) { - __asm__ __volatile__( LOCK_PREFIX - "btrl %1,%0" - :"=m" (ADDR) - :"Ir" (nr)); -} - -/** - * atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. - */ -static __inline__ void atomic_inc(atomic_t *v) -{ - __asm__ __volatile__( - LOCK "incl %0" - :"=m" (v->counter) - :"m" (v->counter)); -} - - -#define rdtscll(val) \ - __asm__ __volatile__("rdtsc" : "=A" (val)) - + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"dIr" (nr)); +} + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ static __inline__ unsigned long __ffs(unsigned long word) { - __asm__("bsfl %1,%0" - :"=r" (word) - :"rm" (word)); - return word; + __asm__("bsfq %1,%0" + :"=r" (word) + :"rm" (word)); + return word; } #define ADDR (*(volatile long *) addr) + +#define rdtscll(val) do { \ + unsigned int __a,__d; \ + asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ + (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ +} while(0) + + +#else /* ifdef __x86_64__ */ +#error "Unsupported architecture" +#endif + + +/********************* common i386 and x86_64 ****************************/ + + static __inline__ void synch_set_bit(int nr, volatile void * addr) { @@ -306,6 +456,14 @@ return oldbit; } +static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) { @@ -326,9 +484,8 @@ (__builtin_constant_p(nr) ? \ synch_const_test_bit((nr),(addr)) : \ synch_var_test_bit((nr),(addr))) -#endif /* !__ASSEMBLY__ */ - -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - + + + +#endif /* not assembly */ #endif /* _OS_H_ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/time.h --- a/extras/mini-os/include/time.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/time.h Fri Sep 9 16:30:54 2005 @@ -58,6 +58,6 @@ s_time_t get_s_time(void); s_time_t get_v_time(void); void gettimeofday(struct timeval *tv); -void block(u32 millisecs); +void block_domain(u32 millisecs); #endif /* _TIME_H_ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/traps.h --- a/extras/mini-os/include/traps.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/traps.h Fri Sep 9 16:30:54 2005 @@ -17,6 +17,7 @@ #ifndef _TRAPS_H_ #define _TRAPS_H_ +#ifdef __i386__ struct pt_regs { long ebx; long ecx; @@ -34,7 +35,38 @@ long esp; int xss; }; +#elif __x86_64__ +struct pt_regs { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long rbp; + unsigned long rbx; +/* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; + unsigned long rcx; + unsigned long rdx; + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; +/* end of arguments */ +/* cpu exception frame or undefined */ + unsigned long rip; + unsigned long cs; + unsigned long eflags; + unsigned long rsp; + unsigned long ss; +/* top of stack page */ +}; + + +#endif void dump_regs(struct pt_regs *regs); diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/types.h --- a/extras/mini-os/include/types.h Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/types.h Fri Sep 9 16:30:54 2005 @@ -44,11 +44,19 @@ typedef long long quad_t; typedef unsigned long long u_quad_t; typedef unsigned int uintptr_t; + +typedef struct { unsigned long pte_low; } pte_t; #elif defined(__x86_64__) typedef long quad_t; typedef unsigned long u_quad_t; typedef unsigned long uintptr_t; + +typedef struct { unsigned long pte; } pte_t; #endif + + + +#define INT_MAX ((int)(~0U>>1)) #define UINT_MAX (~0U) #endif /* _TYPES_H_ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/kernel.c Fri Sep 9 16:30:54 2005 @@ -33,6 +33,8 @@ #include <time.h> #include <types.h> #include <lib.h> +#include <sched.h> +#include <xenbus.h> /* * Shared page for communicating with the hypervisor. @@ -59,10 +61,12 @@ extern char shared_info[PAGE_SIZE]; +#define __pte(x) ((pte_t) { (0) } ) + static shared_info_t *map_shared_info(unsigned long pa) { if ( HYPERVISOR_update_va_mapping( - (unsigned long)shared_info, pa | 7, UVMF_INVLPG) ) + (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) ) { printk("Failed to map shared_info!!\n"); *(int*)0=0; @@ -77,7 +81,6 @@ void start_kernel(start_info_t *si) { static char hello[] = "Bootstrapping...\n"; - int i; (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(hello), hello); /* Copy the start_info struct to a globally-accessible area. */ @@ -96,7 +99,6 @@ (unsigned long)hypervisor_callback, (unsigned long)failsafe_callback, 0); #endif - trap_init(); /* ENABLE EVENT DELIVERY. This is disabled at start of day. */ @@ -119,7 +121,6 @@ * If used for porting another OS, start here to figure out your * guest os entry point. Otherwise continue below... */ - /* init memory management */ init_mm(); @@ -127,15 +128,15 @@ init_events(); /* init time and timers */ init_time(); + + /* init scheduler */ + init_sched(); - /* do nothing */ - i = 0; - for ( ; ; ) - { -// HYPERVISOR_yield(); - block(100); - i++; - } + /* init xenbus */ + xs_init(); + + /* Everything initialised, start idle thread */ + run_idle_thread(); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/lib/printf.c --- a/extras/mini-os/lib/printf.c Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/lib/printf.c Fri Sep 9 16:30:54 2005 @@ -1,20 +1,18 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- +/* **************************************************************************** * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge **************************************************************************** * * File: printf.c * Author: Rolf Neugebauer (neugebar@xxxxxxxxxxxxx) - * Changes: + * Changes: Grzegorz Milos (gm281@xxxxxxxxx) * - * Date: Aug 2003 + * Date: Aug 2003, Aug 2005 * * Environment: Xen Minimal OS * Description: Library functions for printing * (freebsd port, mainly sys/subr_prf.c) * - **************************************************************************** - * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $ **************************************************************************** * *- @@ -60,409 +58,748 @@ #include <types.h> #include <hypervisor.h> #include <lib.h> - -/**************************************************************************** - * RN: printf family of routines - * taken mainly from sys/subr_prf.c - ****************************************************************************/ -char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz"; -#define hex2ascii(hex) (hex2ascii_data[hex]) -#define NBBY 8 /* number of bits in a byte */ -#define MAXNBUF (sizeof(quad_t) * NBBY + 1) - -static int kvprintf(char const *fmt, void *arg, int radix, va_list ap); - - -int -printf(const char *fmt, ...) -{ - va_list ap; - int retval; - static char printk_buf[1024]; - - va_start(ap, fmt); - retval = kvprintf(fmt, printk_buf, 10, ap); - printk_buf[retval] = '\0'; - va_end(ap); - (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(printk_buf), - printk_buf); - return retval; -} - -int -vprintf(const char *fmt, va_list ap) -{ - int retval; - static char printk_buf[1024]; - retval = kvprintf(fmt, printk_buf, 10, ap); - printk_buf[retval] = '\0'; - (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(printk_buf), - printk_buf); - return retval; -} - -int -sprintf(char *buf, const char *cfmt, ...) -{ - int retval; - va_list ap; - - va_start(ap, cfmt); - retval = kvprintf(cfmt, (void *)buf, 10, ap); - buf[retval] = '\0'; - va_end(ap); - return retval; -} - -int -vsprintf(char *buf, const char *cfmt, va_list ap) -{ - int retval; - - retval = kvprintf(cfmt, (void *)buf, 10, ap); - buf[retval] = '\0'; - return retval; -} - - -/* - * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse - * order; return an optional length and a pointer to the last character - * written in the buffer (i.e., the first character of the string). - * The buffer pointed to by `nbuf' must have length >= MAXNBUF. - */ -static char * -ksprintn(char *nbuf, u_long ul, int base, int *lenp) -{ - char *p; - - p = nbuf; - *p = '\0'; - do { - *++p = hex2ascii(ul % base); - } while (ul /= base); - if (lenp) - *lenp = p - nbuf; - return (p); -} -/* ksprintn, but for a quad_t. */ -static char * -ksprintqn(char *nbuf, u_quad_t uq, int base, int *lenp) -{ - char *p; - - p = nbuf; - *p = '\0'; - do { - *++p = hex2ascii(uq % base); - } while (uq /= base); - if (lenp) - *lenp = p - nbuf; - return (p); -} - -/* - * Scaled down version of printf(3). +#include <mm.h> +#include <ctype.h> + +/** + * simple_strtoul - convert a string to an unsigned long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +/** + * simple_strtol - convert a string to a signed long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long simple_strtol(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoul(cp+1,endp,base); + return simple_strtoul(cp,endp,base); +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base) +{ + unsigned long long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp) + ? toupper(*cp) : *cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +/** + * simple_strtoll - convert a string to a signed long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long long simple_strtoll(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoull(cp+1,endp,base); + return simple_strtoull(cp,endp,base); +} + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & LARGE) ? large_digits : small_digits; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return buf; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else + { + /* XXX KAF: force unsigned mod and div. */ + unsigned long long num2=(unsigned long long)num; + unsigned int base2=(unsigned int)base; + while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; } + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) { + while(size-->0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + } + if (sign) { + if (buf <= end) + *buf = sign; + ++buf; + } + if (type & SPECIAL) { + if (base==8) { + if (buf <= end) + *buf = '0'; + ++buf; + } else if (base==16) { + if (buf <= end) + *buf = '0'; + ++buf; + if (buf <= end) + *buf = digits[33]; + ++buf; + } + } + if (!(type & LEFT)) { + while (size-- > 0) { + if (buf <= end) + *buf = c; + ++buf; + } + } + while (i < precision--) { + if (buf <= end) + *buf = '0'; + ++buf; + } + while (i-- > 0) { + if (buf <= end) + *buf = tmp[i]; + ++buf; + } + while (size-- > 0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + return buf; +} + +/** +* vsnprintf - Format a string and place it in a buffer +* @buf: The buffer to place the result into +* @size: The size of the buffer, including the trailing null space +* @fmt: The format string to use +* @args: Arguments for the format string +* +* Call this function if you are already dealing with a va_list. +* You probably want snprintf instead. + */ +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) { + end = ((void *) -1); + size = end - buf + 1; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str <= end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') { + qualifier = 'L'; + ++fmt; + } + } + if (*fmt == 'q') { + qualifier = 'L'; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) { + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + } + c = (unsigned char) va_arg(args, int); + if (str <= end) + *str = c; + ++str; + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = "<NULL>"; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) { + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + } + for (i = 0; i < len; ++i) { + if (str <= end) + *str = *s; + ++str; ++s; + } + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, end, + (unsigned long) va_arg(args, void *), + 16, field_width, precision, flags); + continue; + + + case 'n': + /* FIXME: + * What does C99 say about the overflow case here? */ + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + if (str <= end) + *str = '%'; + ++str; + if (*fmt) { + if (str <= end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + + str = number(str, end, num, base, + field_width, precision, flags); + } + if (str <= end) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str-buf; +} + +/** + * snprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} + +/** + * vsprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @args: Arguments for the format string * - * Two additional formats: - * - * The format %b is supported to decode error registers. - * Its usage is: - * - * printf("reg=%b\n", regval, "<base><arg>*"); - * - * where <base> is the output base expressed as a control character, e.g. - * \10 gives octal; \20 gives hex. Each arg is a sequence of characters, - * the first of which gives the bit number to be inspected (origin 1), and - * the next characters (up to a control character, i.e. a character <= 32), - * give the name of the register. Thus: - * - * kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE\n"); - * - * would produce output: - * - * reg=3<BITTWO,BITONE> - * - * XXX: %D -- Hexdump, takes pointer and separator string: - * ("%6D", ptr, ":") -> XX:XX:XX:XX:XX:XX - * ("%*D", len, ptr, " " -> XX XX XX XX ... - */ - -/* RN: This normally takes a function for output. - * we always print to a string and the use HYPERCALL for write to console */ -static int -kvprintf(char const *fmt, void *arg, int radix, va_list ap) -{ - -#define PCHAR(c) {int cc=(c); *d++ = cc; retval++; } - - char nbuf[MAXNBUF]; - char *p, *q, *d; - u_char *up; - int ch, n; - u_long ul; - u_quad_t uq; - int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot; - int dwidth; - char padc; - int retval = 0; - - ul = 0; - uq = 0; - d = (char *) arg; - - if (fmt == NULL) - fmt = "(fmt null)\n"; - - if (radix < 2 || radix > 36) - radix = 10; - - for (;;) { - padc = ' '; - width = 0; - while ((ch = (u_char)*fmt++) != '%') { - if (ch == '\0') - return retval; - PCHAR(ch); - } - qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0; - sign = 0; dot = 0; dwidth = 0; -reswitch: switch (ch = (u_char)*fmt++) { - case '.': - dot = 1; - goto reswitch; - case '#': - sharpflag = 1; - goto reswitch; - case '+': - sign = 1; - goto reswitch; - case '-': - ladjust = 1; - goto reswitch; - case '%': - PCHAR(ch); - break; - case '*': - if (!dot) { - width = va_arg(ap, int); - if (width < 0) { - ladjust = !ladjust; - width = -width; + * Call this function if you are already dealing with a va_list. + * You probably want sprintf instead. + */ +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args); +} + + +/** + * sprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + + +void printf(const char *fmt, ...) +{ + static char buf[1024]; + va_list args; + + va_start(args, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(buf), buf); +} + +/** + * vsscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: format of buffer + * @args: arguments + */ +int vsscanf(const char * buf, const char * fmt, va_list args) +{ + const char *str = buf; + char *next; + char digit; + int num = 0; + int qualifier; + int base; + int field_width; + int is_sign = 0; + + while(*fmt && *str) { + /* skip any white space in format */ + /* white space in format matchs any amount of + * white space, including none, in the input. + */ + if (isspace(*fmt)) { + while (isspace(*fmt)) + ++fmt; + while (isspace(*str)) + ++str; + } + + /* anything that is not a conversion must match exactly */ + if (*fmt != '%' && *fmt) { + if (*fmt++ != *str++) + break; + continue; + } + + if (!*fmt) + break; + ++fmt; + + /* skip this conversion. + * advance both strings to next white space + */ + if (*fmt == '*') { + while (!isspace(*fmt) && *fmt) + fmt++; + while (!isspace(*str) && *str) + str++; + continue; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + + /* get conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || + *fmt == 'Z' || *fmt == 'z') { + qualifier = *fmt++; + if (unlikely(qualifier == *fmt)) { + if (qualifier == 'h') { + qualifier = 'H'; + fmt++; + } else if (qualifier == 'l') { + qualifier = 'L'; + fmt++; } - } else { - dwidth = va_arg(ap, int); } - goto reswitch; - case '0': - if (!dot) { - padc = '0'; - goto reswitch; + } + base = 10; + is_sign = 0; + + if (!*fmt || !*str) + break; + + switch(*fmt++) { + case 'c': + { + char *s = (char *) va_arg(args,char*); + if (field_width == -1) + field_width = 1; + do { + *s++ = *str++; + } while (--field_width > 0 && *str); + num++; + } + continue; + case 's': + { + char *s = (char *) va_arg(args, char *); + if(field_width == -1) + field_width = INT_MAX; + /* first, skip leading white space in buffer */ + while (isspace(*str)) + str++; + + /* now copy until next white space */ + while (*str && !isspace(*str) && field_width--) { + *s++ = *str++; } - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - for (n = 0;; ++fmt) { - n = n * 10 + ch - '0'; - ch = *fmt; - if (ch < '0' || ch > '9') - break; - } - if (dot) - dwidth = n; - else - width = n; - goto reswitch; - case 'b': - ul = va_arg(ap, int); - p = va_arg(ap, char *); - for (q = ksprintn(nbuf, ul, *p++, NULL); *q;) - PCHAR(*q--); - - if (!ul) - break; - - for (tmp = 0; *p;) { - n = *p++; - if (ul & (1 << (n - 1))) { - PCHAR(tmp ? ',' : '<'); - for (; (n = *p) > ' '; ++p) - PCHAR(n); - tmp = 1; - } else - for (; *p > ' '; ++p) - continue; - } - if (tmp) - PCHAR('>'); - break; - case 'c': - PCHAR(va_arg(ap, int)); - break; - case 'D': - up = va_arg(ap, u_char *); - p = va_arg(ap, char *); - if (!width) - width = 16; - while(width--) { - PCHAR(hex2ascii(*up >> 4)); - PCHAR(hex2ascii(*up & 0x0f)); - up++; - if (width) - for (q=p;*q;q++) - PCHAR(*q); - } - break; - case 'd': - if (qflag) - uq = va_arg(ap, quad_t); - else if (lflag) - ul = va_arg(ap, long); - else - ul = va_arg(ap, int); - sign = 1; - base = 10; - goto number; - case 'l': - if (lflag) { - lflag = 0; - qflag = 1; - } else - lflag = 1; - goto reswitch; + *s = '\0'; + num++; + } + continue; + case 'n': + /* return number of characters read so far */ + { + int *i = (int *)va_arg(args,int*); + *i = str - buf; + } + continue; case 'o': - if (qflag) - uq = va_arg(ap, u_quad_t); - else if (lflag) - ul = va_arg(ap, u_long); - else - ul = va_arg(ap, u_int); base = 8; - goto nosign; - case 'p': - ul = (uintptr_t)va_arg(ap, void *); - base = 16; - sharpflag = 0; - padc = '0'; - width = sizeof(uintptr_t)*2; - goto nosign; - case 'q': - qflag = 1; - goto reswitch; - case 'n': - case 'r': - if (qflag) - uq = va_arg(ap, u_quad_t); - else if (lflag) - ul = va_arg(ap, u_long); - else - ul = sign ? - (u_long)va_arg(ap, int) : va_arg(ap, u_int); - base = radix; - goto number; - case 's': - p = va_arg(ap, char *); - if (p == NULL) - p = "(null)"; - if (!dot) - n = strlen (p); - else - for (n = 0; n < dwidth && p[n]; n++) - continue; - - width -= n; - - if (!ladjust && width > 0) - while (width--) - PCHAR(padc); - while (n--) - PCHAR(*p++); - if (ladjust && width > 0) - while (width--) - PCHAR(padc); - break; - case 'u': - if (qflag) - uq = va_arg(ap, u_quad_t); - else if (lflag) - ul = va_arg(ap, u_long); - else - ul = va_arg(ap, u_int); - base = 10; - goto nosign; + break; case 'x': case 'X': - if (qflag) - uq = va_arg(ap, u_quad_t); - else if (lflag) - ul = va_arg(ap, u_long); - else - ul = va_arg(ap, u_int); base = 16; - goto nosign; + break; + case 'i': + base = 0; + case 'd': + is_sign = 1; + case 'u': + break; + case '%': + /* looking for '%' in str */ + if (*str++ != '%') + return num; + continue; + default: + /* invalid format; stop here */ + return num; + } + + /* have some sort of integer conversion. + * first, skip white space in buffer. + */ + while (isspace(*str)) + str++; + + digit = *str; + if (is_sign && digit == '-') + digit = *(str + 1); + + if (!digit + || (base == 16 && !isxdigit(digit)) + || (base == 10 && !isdigit(digit)) + || (base == 8 && (!isdigit(digit) || digit > '7')) + || (base == 0 && !isdigit(digit))) + break; + + switch(qualifier) { + case 'H': /* that's 'hh' in format */ + if (is_sign) { + signed char *s = (signed char *) va_arg(args,signed char *); + *s = (signed char) simple_strtol(str,&next,base); + } else { + unsigned char *s = (unsigned char *) va_arg(args, unsigned char *); + *s = (unsigned char) simple_strtoul(str, &next, base); + } + break; + case 'h': + if (is_sign) { + short *s = (short *) va_arg(args,short *); + *s = (short) simple_strtol(str,&next,base); + } else { + unsigned short *s = (unsigned short *) va_arg(args, unsigned short *); + *s = (unsigned short) simple_strtoul(str, &next, base); + } + break; + case 'l': + if (is_sign) { + long *l = (long *) va_arg(args,long *); + *l = simple_strtol(str,&next,base); + } else { + unsigned long *l = (unsigned long*) va_arg(args,unsigned long*); + *l = simple_strtoul(str,&next,base); + } + break; + case 'L': + if (is_sign) { + long long *l = (long long*) va_arg(args,long long *); + *l = simple_strtoll(str,&next,base); + } else { + unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*); + *l = simple_strtoull(str,&next,base); + } + break; + case 'Z': case 'z': - if (qflag) - uq = va_arg(ap, u_quad_t); - else if (lflag) - ul = va_arg(ap, u_long); - else - ul = sign ? - (u_long)va_arg(ap, int) : va_arg(ap, u_int); - base = 16; - goto number; -nosign: sign = 0; -number: - if (qflag) { - if (sign && (quad_t)uq < 0) { - neg = 1; - uq = -(quad_t)uq; - } - p = ksprintqn(nbuf, uq, base, &tmp); + { + size_t *s = (size_t*) va_arg(args,size_t*); + *s = (size_t) simple_strtoul(str,&next,base); + } + break; + default: + if (is_sign) { + int *i = (int *) va_arg(args, int*); + *i = (int) simple_strtol(str,&next,base); } else { - if (sign && (long)ul < 0) { - neg = 1; - ul = -(long)ul; - } - p = ksprintn(nbuf, ul, base, &tmp); + unsigned int *i = (unsigned int*) va_arg(args, unsigned int*); + *i = (unsigned int) simple_strtoul(str,&next,base); } - if (sharpflag && (qflag ? uq != 0 : ul != 0)) { - if (base == 8) - tmp++; - else if (base == 16) - tmp += 2; - } - if (neg) - tmp++; - - if (!ladjust && width && (width -= tmp) > 0) - while (width--) - PCHAR(padc); - if (neg) - PCHAR('-'); - if (sharpflag && (qflag ? uq != 0 : ul != 0)) { - if (base == 8) { - PCHAR('0'); - } else if (base == 16) { - PCHAR('0'); - PCHAR('x'); - } - } - - while (*p) - PCHAR(*p--); - - if (ladjust && width && (width -= tmp) > 0) - while (width--) - PCHAR(padc); - - break; - default: - PCHAR('%'); - if (lflag) - PCHAR('l'); - PCHAR(ch); - break; - } + break; + } + num++; + + if (!next) + break; + str = next; } -#undef PCHAR -} - + return num; +} + +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char * buf, const char * fmt, ...) +{ + va_list args; + int i; + + va_start(args,fmt); + i = vsscanf(buf,fmt,args); + va_end(args); + return i; +} + + diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/lib/string.c --- a/extras/mini-os/lib/string.c Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/lib/string.c Fri Sep 9 16:30:54 2005 @@ -107,6 +107,19 @@ return sc - s; } + +char * strcat(char * dest, const char * src) +{ + char *tmp = dest; + + while (*dest) + dest++; + + while ((*dest++ = *src++) != '\0'); + + return tmp; +} + size_t strlen(const char * s) { const char *sc; diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/mm.c --- a/extras/mini-os/mm.c Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/mm.c Fri Sep 9 16:30:54 2005 @@ -198,7 +198,6 @@ #endif - /* * Initialise allocator, placing addresses [@min,@max] in free pool. * @min and @max are PHYSICAL addresses. @@ -486,16 +485,17 @@ phys_to_machine_mapping = (unsigned long *)start_info.mfn_list; /* First page follows page table pages and 3 more pages (store page etc) */ - start_pfn = PFN_UP(__pa(start_info.pt_base)) + start_info.nr_pt_frames + 3; + start_pfn = PFN_UP(to_phys(start_info.pt_base)) + start_info.nr_pt_frames + 3; max_pfn = start_info.nr_pages; printk(" start_pfn: %lx\n", start_pfn); printk(" max_pfn: %lx\n", max_pfn); +#ifdef __i386__ build_pagetable(&start_pfn, &max_pfn); - -#ifdef __i386__ +#endif + /* * now we can initialise the page allocator */ @@ -503,7 +503,5 @@ (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn)); init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); -#endif - printk("MM: done\n"); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/time.c --- a/extras/mini-os/time.c Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/time.c Fri Sep 9 16:30:54 2005 @@ -203,7 +203,7 @@ } -void block(u32 millisecs) +void block_domain(u32 millisecs) { struct timeval tv; gettimeofday(&tv); @@ -232,5 +232,6 @@ void init_time(void) { + printk("Initialising timer interface\n"); bind_virq(VIRQ_TIMER, &timer_handler); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/traps.c --- a/extras/mini-os/traps.c Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/traps.c Fri Sep 9 16:30:54 2005 @@ -33,36 +33,7 @@ void dump_regs(struct pt_regs *regs) { - unsigned long esp; - unsigned short ss; - -#ifdef __x86_64__ - esp = regs->rsp; - ss = regs->xss; -#else - esp = (unsigned long) (®s->esp); - ss = __KERNEL_DS; - if (regs->xcs & 2) { -printk("CS is true, esp is %x\n", regs->esp); - esp = regs->esp; - ss = regs->xss & 0xffff; - } -#endif - printf("EIP: %04x:[<%p>]\n", - 0xffff & regs->xcs , regs->eip); - printf("EFLAGS: %p\n",regs->eflags); - printf("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printf("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); -#ifdef __x86_64__ - printf("r8 : %p r9 : %p r10: %p r11: %p\n", - regs->r8, regs->r9, regs->r10, regs->r11); - printf("r12: %p r13: %p r14: %p r15: %p\n", - regs->r12, regs->r13, regs->r14, regs->r15); -#endif - printf("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk("FIXME: proper register dump (with the stack dump)\n"); } @@ -105,6 +76,7 @@ printk("Page fault at linear address %p\n", addr); dump_regs(regs); #ifdef __x86_64__ + /* FIXME: _PAGE_PSE */ { unsigned long *tab = (unsigned long *)start_info.pt_base; unsigned long page; @@ -112,23 +84,16 @@ printk("Pagetable walk from %p:\n", tab); page = tab[l4_table_offset(addr)]; - tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); + tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); printk(" L4 = %p (%p)\n", page, tab); - if ( !(page & AGERESENT) ) - goto out; page = tab[l3_table_offset(addr)]; - tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); + tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); printk(" L3 = %p (%p)\n", page, tab); - if ( !(page & AGERESENT) ) - goto out; page = tab[l2_table_offset(addr)]; - tab = __va(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); - printk(" L2 = %p (%p) %s\n", page, tab, - (page & AGESE) ? "(2MB)" : ""); - if ( !(page & AGERESENT) || (page & AGESE) ) - goto out; + tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); + printk(" L2 = %p (%p)\n", page, tab); page = tab[l1_table_offset(addr)]; printk(" L1 = %p\n", page); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.4-xen-sparse/mkbuildtree --- a/linux-2.4-xen-sparse/mkbuildtree Thu Sep 8 15:18:40 2005 +++ b/linux-2.4-xen-sparse/mkbuildtree Fri Sep 9 16:30:54 2005 @@ -102,9 +102,9 @@ relative_lndir ${RS} rm -f mkbuildtree -set ${RS}/../linux-2.6-xen-sparse -[ "$1" == "${RS}/../linux-2.6-xen-sparse" ] && { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } -LINUX_26="$1" +LINUX_26=${RS}/../linux-2.6-xen-sparse +[ -d $LINUX_26 ] || { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } + # Create links to the shared definitions of the Xen interfaces. rm -rf ${AD}/include/asm-xen/xen-public diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/Kconfig Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Fri Sep 9 16:30:54 2005 @@ -70,6 +70,27 @@ network devices to other guests via a high-performance shared-memory interface. +config XEN_TPMDEV_FRONTEND + bool "TPM-device frontend driver" + default n + help + The TPM-device frontend driver. + +config XEN_TPMDEV_BACKEND + bool "TPM-device backend driver" + default n + help + The TPM-device backend driver + +config XEN_TPMDEV_CLOSE_IF_VTPM_FAILS + bool "TPM backend closes upon vTPM failure" + depends on XEN_TPMDEV_BACKEND + default n + help + The TPM backend closes the channel if the vTPM in userspace indicates + a failure. The corresponding domain's channel will be closed. + Say Y if you want this feature. + config XEN_BLKDEV_FRONTEND bool "Block-device frontend driver" default y @@ -88,15 +109,8 @@ dedicated device-driver domain, or your master control domain (domain 0), then you almost certainly want to say Y here. -config XEN_NETDEV_GRANT_TX - bool "Grant table substrate for net drivers tx path (DANGEROUS)" - default n - help - This introduces the use of grant tables as a data exhange mechanism - between the frontend and backend network drivers. - -config XEN_NETDEV_GRANT_RX - bool "Grant table substrate for net drivers rx path (DANGEROUS)" +config XEN_NETDEV_GRANT + bool "Grant table substrate for network drivers (DANGEROUS)" default n help This introduces the use of grant tables as a data exhange mechanism diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/Kconfig.drivers --- a/linux-2.6-xen-sparse/arch/xen/Kconfig.drivers Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig.drivers Fri Sep 9 16:30:54 2005 @@ -49,6 +49,10 @@ endif if !XEN_PHYSDEV_ACCESS +source "drivers/char/tpm/Kconfig.domU" +endif + +if !XEN_PHYSDEV_ACCESS menu "Character devices" diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/Makefile --- a/linux-2.6-xen-sparse/arch/xen/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/Makefile Fri Sep 9 16:30:54 2005 @@ -65,6 +65,7 @@ XINSTALL_NAME ?= $(KERNELRELEASE) install: vmlinuz +install kernel_install: mkdir -p $(INSTALL_PATH)/boot ln -f -s vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) $(INSTALL_PATH)/boot/vmlinuz-$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(XENGUEST)$(INSTALL_SUFFIX) rm -f $(INSTALL_PATH)/boot/vmlinuz-$(XINSTALL_NAME)$(INSTALL_SUFFIX) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Fri Sep 9 16:30:54 2005 @@ -15,10 +15,11 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set CONFIG_XEN_NETDEV_BACKEND=y +# CONFIG_XEN_TPMDEV_FRONTEND is not set +# CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_NETDEV_GRANT_TX=y -CONFIG_XEN_NETDEV_GRANT_RX=y +CONFIG_XEN_NETDEV_GRANT=y # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set @@ -1122,7 +1123,7 @@ # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set +CONFIG_CRAMFS=y # CONFIG_VXFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Fri Sep 9 16:30:54 2005 @@ -15,10 +15,11 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set CONFIG_XEN_NETDEV_BACKEND=y +# CONFIG_XEN_TPMDEV_FRONTEND is not set +# CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_NETDEV_GRANT_TX=y -CONFIG_XEN_NETDEV_GRANT_RX=y +CONFIG_XEN_NETDEV_GRANT=y # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set @@ -195,6 +196,7 @@ # CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=16384 @@ -1030,7 +1032,7 @@ # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set +CONFIG_CRAMFS=y # CONFIG_VXFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Fri Sep 9 16:30:54 2005 @@ -12,10 +12,11 @@ # # CONFIG_XEN_PRIVILEGED_GUEST is not set # CONFIG_XEN_PHYSDEV_ACCESS is not set +# CONFIG_XEN_TPMDEV_FRONTEND is not set +# CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_NETDEV_GRANT_TX=y -CONFIG_XEN_NETDEV_GRANT_RX=y +CONFIG_XEN_NETDEV_GRANT=y # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set @@ -336,6 +337,7 @@ CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_TCG_TPM is not set # # Character devices diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Fri Sep 9 16:30:54 2005 @@ -12,10 +12,11 @@ # # CONFIG_XEN_PRIVILEGED_GUEST is not set # CONFIG_XEN_PHYSDEV_ACCESS is not set +# CONFIG_XEN_TPMDEV_FRONTEND is not set +# CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_NETDEV_GRANT_TX=y -CONFIG_XEN_NETDEV_GRANT_RX=y +CONFIG_XEN_NETDEV_GRANT=y # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set @@ -662,6 +663,7 @@ CONFIG_INPUT=m CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set +# CONFIG_TCG_TPM is not set # # Character devices diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Fri Sep 9 16:30:54 2005 @@ -15,10 +15,11 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set CONFIG_XEN_NETDEV_BACKEND=y +# CONFIG_XEN_TPMDEV_FRONTEND is not set +# CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_NETDEV_GRANT_TX=y -CONFIG_XEN_NETDEV_GRANT_RX=y +CONFIG_XEN_NETDEV_GRANT=y # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set @@ -1855,9 +1856,7 @@ # # TPM devices # -CONFIG_TCG_TPM=m -CONFIG_TCG_NSC=m -CONFIG_TCG_ATMEL=m +# CONFIG_TCG_TPM is not set # # I2C support diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Fri Sep 9 16:30:54 2005 @@ -15,10 +15,11 @@ CONFIG_XEN_BLKDEV_BACKEND=y # CONFIG_XEN_BLKDEV_TAP_BE is not set CONFIG_XEN_NETDEV_BACKEND=y +# CONFIG_XEN_TPMDEV_FRONTEND is not set +# CONFIG_XEN_TPMDEV_BACKEND is not set CONFIG_XEN_BLKDEV_FRONTEND=y CONFIG_XEN_NETDEV_FRONTEND=y -CONFIG_XEN_NETDEV_GRANT_TX=y -CONFIG_XEN_NETDEV_GRANT_RX=y +CONFIG_XEN_NETDEV_GRANT=y # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set # CONFIG_XEN_BLKDEV_TAP is not set # CONFIG_XEN_SHADOW_MODE is not set @@ -2201,7 +2202,7 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set +CONFIG_DEVFS_FS=y CONFIG_DEVPTS_FS_XATTR=y CONFIG_DEVPTS_FS_SECURITY=y CONFIG_TMPFS=y @@ -2231,7 +2232,7 @@ CONFIG_JFFS2_ZLIB=y CONFIG_JFFS2_RTIME=y # CONFIG_JFFS2_RUBIN is not set -CONFIG_CRAMFS=m +CONFIG_CRAMFS=y CONFIG_VXFS_FS=m # CONFIG_HPFS_FS is not set CONFIG_QNX4FS_FS=m diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Fri Sep 9 16:30:54 2005 @@ -379,18 +379,18 @@ If you don't know what to do here, say N. config SMP_ALTERNATIVES - bool "SMP alternatives support (EXPERIMENTAL)" - depends on SMP && EXPERIMENTAL - help - Try to reduce the overhead of running an SMP kernel on a uniprocessor - host slightly by replacing certain key instruction sequences - according to whether we currently have more than one CPU available. - This should provide a noticeable boost to performance when - running SMP kernels on UP machines, and have negligible impact - when running on an true SMP host. + bool "SMP alternatives support (EXPERIMENTAL)" + depends on SMP && EXPERIMENTAL + help + Try to reduce the overhead of running an SMP kernel on a uniprocessor + host slightly by replacing certain key instruction sequences + according to whether we currently have more than one CPU available. + This should provide a noticeable boost to performance when + running SMP kernels on UP machines, and have negligible impact + when running on an true SMP host. If unsure, say N. - + config NR_CPUS int "Maximum number of CPUs (2-255)" range 2 255 @@ -807,8 +807,8 @@ direct access method and falls back to the BIOS if that doesn't work. If unsure, go with the default, which is "Any". -config PCI_GOBIOS - bool "BIOS" +#config PCI_GOBIOS +# bool "BIOS" config PCI_GOMMCONFIG bool "MMConfig" @@ -821,10 +821,10 @@ endchoice -config PCI_BIOS - bool - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) - default y +#config PCI_BIOS +# bool +# depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) +# default y config PCI_DIRECT bool diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Fri Sep 9 16:30:54 2005 @@ -5,6 +5,7 @@ XENARCH := $(subst ",,$(CONFIG_XENARCH)) CFLAGS += -Iarch/$(XENARCH)/kernel +AFLAGS += -Iarch/$(XENARCH)/kernel extra-y := head.o init_task.o @@ -32,7 +33,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o -c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o +c-obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o c-obj-$(CONFIG_X86_NUMAQ) += numaq.o c-obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o c-obj-$(CONFIG_MODULES) += module.o @@ -69,7 +70,7 @@ $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \ - $(obj)/vsyscall-%.o FORCE + $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE $(call if_changed,syscall) # We also create a special relocatable object that should mirror the symbol @@ -81,20 +82,17 @@ SYSCFLAGS_vsyscall-syms.o = -r $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ - $(obj)/vsyscall-sysenter.o FORCE + $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE $(call if_changed,syscall) c-link := -s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o syscall_table.o +s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o vsyscall-note.o $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)): @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@ $(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S -EXTRA_AFLAGS += -I$(obj) -$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S - obj-y += $(c-obj-y) $(s-obj-y) obj-m += $(c-obj-m) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Fri Sep 9 16:30:54 2005 @@ -469,6 +469,18 @@ unsigned int irq; unsigned int plat_gsi = gsi; +#ifdef CONFIG_PCI + /* + * Make sure all (legacy) PCI IRQs are set as level-triggered. + */ + if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { + extern void eisa_set_level_irq(unsigned int irq); + + if (edge_level == ACPI_LEVEL_SENSITIVE) + eisa_set_level_irq(gsi); + } +#endif + #ifdef CONFIG_X86_IO_APIC if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low); @@ -610,7 +622,7 @@ acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; #endif -#ifdef CONFIG_X86_PM_TIMER +#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN) /* detect the location of the ACPI PM Timer */ if (fadt->revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c Fri Sep 9 16:30:54 2005 @@ -147,7 +147,7 @@ { struct cpuinfo_x86 *c = &boot_cpu_data; - if (!(xen_start_info.flags & SIF_PRIVILEGED)) + if (!(xen_start_info->flags & SIF_PRIVILEGED)) return -ENODEV; if ((!cpu_has(c, X86_FEATURE_MTRR)) && diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Fri Sep 9 16:30:54 2005 @@ -47,6 +47,7 @@ #include <asm/segment.h> #include <asm/smp.h> #include <asm/page.h> +#include <asm/desc.h> #include "irq_vectors.h" #include <asm-xen/xen-public/xen.h> @@ -112,7 +113,7 @@ XEN_BLOCK_EVENTS(%esi) #else #define preempt_stop -#define resume_kernel restore_all +#define resume_kernel restore_nocheck #endif #define SAVE_ALL \ @@ -161,11 +162,9 @@ addl $4, %esp; \ 1: iret; \ .section .fixup,"ax"; \ -2: movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ - movl $11,%eax; \ - call do_exit; \ +2: pushl $0; \ + pushl $do_iret_error; \ + jmp error_code; \ .previous; \ .section __ex_table,"a";\ .align 4; \ @@ -196,7 +195,7 @@ movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al testl $(VM_MASK | 2), %eax - jz resume_kernel # returning to kernel or vm86-space + jz resume_kernel ENTRY(resume_userspace) XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -211,7 +210,7 @@ ENTRY(resume_kernel) XEN_BLOCK_EVENTS(%esi) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all + jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl @@ -252,7 +251,8 @@ SAVE_ALL GET_THREAD_INFO(%ebp) - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ + testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -276,7 +276,8 @@ SAVE_ALL GET_THREAD_INFO(%ebp) # system call tracing in operation - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ + testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -290,7 +291,20 @@ movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work + restore_all: +#if 0 /* XEN */ + movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS + # Warning: OLDSS(%esp) contains the wrong/random values if we + # are returning to the kernel. + # See comments in process.c:copy_thread() for details. + movb OLDSS(%esp), %ah + movb CS(%esp), %al + andl $(VM_MASK | (4 << 8) | 3), %eax + cmpl $((4 << 8) | 3), %eax + je ldt_ss # returning to user-space with LDT SS +#endif /* XEN */ +restore_nocheck: testl $VM_MASK, EFLAGS(%esp) jnz resume_vm86 movb EVENT_MASK(%esp), %al @@ -300,7 +314,19 @@ andb $1,%al # %al == mask & ~saved_mask jnz restore_all_enable_events # != 0 => reenable event delivery XEN_PUT_VCPU_INFO(%esi) - RESTORE_ALL + RESTORE_REGS + addl $4, %esp +1: iret +.section .fixup,"ax" +iret_exc: + pushl $0 # no error code + pushl $do_iret_error + jmp error_code +.previous +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous resume_vm86: XEN_UNBLOCK_EVENTS(%esi) @@ -309,6 +335,33 @@ movl $__HYPERVISOR_switch_vm86,%eax int $0x82 ud2 + +#if 0 /* XEN */ +ldt_ss: + larl OLDSS(%esp), %eax + jnz restore_nocheck + testl $0x00400000, %eax # returning to 32bit stack? + jnz restore_nocheck # allright, normal return + /* If returning to userspace with 16bit stack, + * try to fix the higher word of ESP, as the CPU + * won't restore it. + * This is an "official" bug of all the x86-compatible + * CPUs, which we can try to work around to make + * dosemu and wine happy. */ + subl $8, %esp # reserve space for switch16 pointer + cli + movl %esp, %eax + /* Set up the 16bit stack frame with switch32 pointer on top, + * and a switch16 pointer on top of the current frame. */ + call setup_x86_bogus_stack + RESTORE_REGS + lss 20+4(%esp), %esp # switch to 16bit stack +1: iret +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous +#endif /* XEN */ # perform work that needs to be done immediately before resumption ALIGN @@ -385,6 +438,27 @@ jmp resume_userspace #if 0 /* XEN */ +#define FIXUP_ESPFIX_STACK \ + movl %esp, %eax; \ + /* switch to 32bit stack using the pointer on top of 16bit stack */ \ + lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ + /* copy data from 16bit stack to 32bit stack */ \ + call fixup_x86_bogus_stack; \ + /* put ESP to the proper location */ \ + movl %eax, %esp; +#define UNWIND_ESPFIX_STACK \ + pushl %eax; \ + movl %ss, %eax; \ + /* see if on 16bit stack */ \ + cmpw $__ESPFIX_SS, %ax; \ + jne 28f; \ + movl $__KERNEL_DS, %edx; \ + movl %edx, %ds; \ + movl %edx, %es; \ + /* switch to 32bit stack */ \ + FIXUP_ESPFIX_STACK \ +28: popl %eax; + /* * Build the entry stubs and pointer table with * some assembler magic. @@ -440,7 +514,9 @@ pushl %ecx pushl %ebx cld - movl %es, %ecx + pushl %es +# UNWIND_ESPFIX_STACK + popl %ecx movl ES(%esp), %edi # get the function address movl ORIG_EAX(%esp), %edx # get the error code movl %eax, ORIG_EAX(%esp) @@ -625,6 +701,11 @@ * fault happened on the sysenter path. */ ENTRY(nmi) + pushl %eax + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl %eax + je nmi_16bit_stack cmpl $sysenter_entry,(%esp) je nmi_stack_fixup pushl %eax @@ -644,7 +725,7 @@ xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - RESTORE_ALL + jmp restore_all nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -659,6 +740,29 @@ nmi_debug_stack_fixup: FIX_STACK(24,nmi_stack_correct, 1) jmp nmi_stack_correct + +nmi_16bit_stack: + /* create the pointer to lss back */ + pushl %ss + pushl %esp + movzwl %sp, %esp + addw $4, (%esp) + /* copy the iret frame of 12 bytes */ + .rept 3 + pushl 16(%esp) + .endr + pushl %eax + SAVE_ALL + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx,%edx # zero error code + call do_nmi + RESTORE_REGS + lss 12+4(%esp), %esp # back to 16bit stack +1: iret +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous #endif /* XEN */ ENTRY(int3) @@ -725,7 +829,9 @@ pushl %ecx pushl %ebx cld - movl %es,%edi + pushl %es +# UNWIND_ESPFIX_STACK + popl %edi movl ES(%esp), %ecx /* get the faulting address */ movl ORIG_EAX(%esp), %edx /* get the error code */ movl %eax, ORIG_EAX(%esp) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Fri Sep 9 16:30:54 2005 @@ -38,17 +38,13 @@ #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id ENTRY(startup_32) - cld - - /* Copy the necessary stuff from xen_start_info structure. */ - mov $xen_start_info_union,%edi - mov $512,%ecx - rep movsl + movl %esi,xen_start_info #ifdef CONFIG_SMP ENTRY(startup_32_smp) +#endif /* CONFIG_SMP */ + cld -#endif /* CONFIG_SMP */ /* Set up the stack pointer */ lss stack_start,%esp @@ -179,7 +175,7 @@ .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ .quad 0x0000000000000000 /* 0xc8 APM DS data */ - .quad 0x0000000000000000 /* 0xd0 - unused */ + .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c Fri Sep 9 16:30:54 2005 @@ -242,12 +242,12 @@ } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); + seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); for_each_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).apic_timer_irqs); + seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); @@ -263,6 +263,7 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; + static int warned; for (irq = 0; irq < NR_IRQS; irq++) { cpumask_t mask; @@ -276,7 +277,7 @@ } if (irq_desc[irq].handler->set_affinity) irq_desc[irq].handler->set_affinity(irq, mask); - else if (irq_desc[irq].action) + else if (irq_desc[irq].action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Fri Sep 9 16:30:54 2005 @@ -115,9 +115,9 @@ if (swiotlb) return swiotlb_dma_supported(dev, mask); /* - * By default we'll BUG when an infeasible DMA is requested, and - * request swiotlb=force (see IOMMU_BUG_ON). - */ + * By default we'll BUG when an infeasible DMA is requested, and + * request swiotlb=force (see IOMMU_BUG_ON). + */ return 1; } EXPORT_SYMBOL(dma_supported); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Fri Sep 9 16:30:54 2005 @@ -135,6 +135,10 @@ * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ +#ifdef CONFIG_SMP +extern void smp_suspend(void); +extern void smp_resume(void); +#endif void cpu_idle (void) { int cpu = _smp_processor_id(); @@ -149,6 +153,9 @@ if (cpu_is_offline(cpu)) { local_irq_disable(); +#ifdef CONFIG_SMP + smp_suspend(); +#endif #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) /* Ack it. From this point on until we get woken up, we're not allowed @@ -159,6 +166,9 @@ HYPERVISOR_vcpu_down(cpu); #endif play_dead(); +#ifdef CONFIG_SMP + smp_resume(); +#endif local_irq_enable(); } @@ -456,7 +466,6 @@ boot_option_idle_override = 1; return 1; } - /* * switch_to(x,yn) should switch tasks from x to y. @@ -789,10 +798,3 @@ sp -= get_random_int() % 8192; return sp & ~0xf; } - - -#ifndef CONFIG_X86_SMP -void _restore_vcpu(void) -{ -} -#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Sep 9 16:30:54 2005 @@ -55,6 +55,7 @@ #include <asm/io.h> #include <asm-xen/hypervisor.h> #include <asm-xen/xen-public/physdev.h> +#include <asm-xen/xen-public/memory.h> #include "setup_arch_pre.h" #include <bios_ebda.h> @@ -288,7 +289,7 @@ int i; /* Nothing to do if not running in dom0. */ - if (!(xen_start_info.flags & SIF_INITDOMAIN)) + if (!(xen_start_info->flags & SIF_INITDOMAIN)) return; /* video rom */ @@ -358,11 +359,12 @@ shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; EXPORT_SYMBOL(HYPERVISOR_shared_info); -unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list; +unsigned long *phys_to_machine_mapping; +unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16]; EXPORT_SYMBOL(phys_to_machine_mapping); /* Raw start-of-day parameters from the hypervisor. */ -union xen_start_info_union xen_start_info_union; +start_info_t *xen_start_info; static void __init limit_regions(unsigned long long size) { @@ -702,7 +704,7 @@ if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) max_cmdline = COMMAND_LINE_SIZE; - memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline); + memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); /* Save unparsed command line copy for /proc/cmdline */ saved_command_line[max_cmdline-1] = '\0'; @@ -933,8 +935,8 @@ /* We don't use the fake e820 because we need to respond to user override. */ void __init find_max_pfn(void) { - if ( xen_override_max_pfn < xen_start_info.nr_pages ) - xen_override_max_pfn = xen_start_info.nr_pages; + if ( xen_override_max_pfn < xen_start_info->nr_pages ) + xen_override_max_pfn = xen_start_info->nr_pages; max_pfn = xen_override_max_pfn; } #endif /* XEN */ @@ -1077,12 +1079,12 @@ void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) { - /* * partially used pages are not usable - thus * we are rounding upwards: */ - min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames; + min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) + + xen_start_info->nr_pt_frames; find_max_pfn(); @@ -1188,7 +1190,7 @@ #endif /* !CONFIG_XEN */ #ifdef CONFIG_BLK_DEV_INITRD - if (xen_start_info.mod_start) { + if (xen_start_info->mod_start) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/ initrd_start = INITRD_START + PAGE_OFFSET; @@ -1205,7 +1207,7 @@ } #endif - phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list; + phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; } /* @@ -1234,10 +1236,64 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) { int i; +#ifdef CONFIG_XEN + dom0_op_t op; + struct dom0_memory_map_entry *map; + unsigned long gapstart, gapsize; + unsigned long long last; +#endif #ifdef CONFIG_XEN_PRIVILEGED_GUEST probe_roms(); #endif + +#ifdef CONFIG_XEN + map = alloc_bootmem_low_pages(PAGE_SIZE); + op.cmd = DOM0_PHYSICAL_MEMORY_MAP; + op.u.physical_memory_map.memory_map = map; + op.u.physical_memory_map.max_map_entries = + PAGE_SIZE / sizeof(struct dom0_memory_map_entry); + BUG_ON(HYPERVISOR_dom0_op(&op)); + + last = 0x100000000ULL; + gapstart = 0x10000000; + gapsize = 0x400000; + + for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) { + struct resource *res; + + if ((last > map[i].end) && ((last - map[i].end) > gapsize)) { + gapsize = last - map[i].end; + gapstart = map[i].end; + } + if (map[i].start < last) + last = map[i].start; + + if (map[i].end > 0x100000000ULL) + continue; + res = alloc_bootmem_low(sizeof(struct resource)); + res->name = map[i].is_ram ? "System RAM" : "reserved"; + res->start = map[i].start; + res->end = map[i].end - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + request_resource(&iomem_resource, res); + } + + free_bootmem(__pa(map), PAGE_SIZE); + + /* + * Start allocating dynamic PCI memory a bit into the gap, + * aligned up to the nearest megabyte. + * + * Question: should we try to pad it up a bit (do something + * like " + (gapsize >> 3)" in there too?). We now have the + * technology. + */ + pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; + + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", + pci_mem_start, gapstart, gapsize); +#else for (i = 0; i < e820.nr_map; i++) { struct resource *res; if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) @@ -1263,6 +1319,7 @@ request_resource(res, data_resource); } } +#endif } /* @@ -1270,23 +1327,29 @@ */ static void __init register_memory(void) { +#ifndef CONFIG_XEN unsigned long gapstart, gapsize; unsigned long long last; +#endif int i; + + /* Nothing to do if not running in dom0. */ + if (!(xen_start_info->flags & SIF_INITDOMAIN)) + return; if (efi_enabled) efi_initialize_iomem_resources(&code_resource, &data_resource); else legacy_init_iomem_resources(&code_resource, &data_resource); - if (xen_start_info.flags & SIF_INITDOMAIN) - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); + /* EFI systems may still have VGA */ + request_resource(&iomem_resource, &video_ram_resource); /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); +#ifndef CONFIG_XEN /* * Search for the bigest gap in the low 32 bits of the e820 * memory space. @@ -1327,6 +1390,7 @@ printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", pci_mem_start, gapstart, gapsize); +#endif } /* Use inline assembly to define this because the nops are defined @@ -1456,7 +1520,7 @@ */ void __init setup_arch(char **cmdline_p) { - int i, j; + int i, j, k, fpp; physdev_op_t op; unsigned long max_low_pfn; @@ -1535,8 +1599,8 @@ init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; - init_mm.brk = (PFN_UP(__pa(xen_start_info.pt_base)) + - xen_start_info.nr_pt_frames) << PAGE_SHIFT; + init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) + + xen_start_info->nr_pt_frames) << PAGE_SHIFT; /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */ /*code_resource.start = virt_to_phys(_text);*/ @@ -1573,42 +1637,64 @@ #endif /* Make sure we have a correctly sized P->M table. */ - if (max_pfn != xen_start_info.nr_pages) { + if (max_pfn != xen_start_info->nr_pages) { phys_to_machine_mapping = alloc_bootmem_low_pages( - max_pfn * sizeof(unsigned int)); - - if (max_pfn > xen_start_info.nr_pages) { + max_pfn * sizeof(unsigned long)); + + if (max_pfn > xen_start_info->nr_pages) { /* set to INVALID_P2M_ENTRY */ memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned int)); + max_pfn * sizeof(unsigned long)); memcpy(phys_to_machine_mapping, - (unsigned int *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(unsigned int)); + (unsigned long *)xen_start_info->mfn_list, + xen_start_info->nr_pages * sizeof(unsigned long)); } else { + struct xen_memory_reservation reservation = { + .extent_start = (unsigned long *)xen_start_info->mfn_list + max_pfn, + .nr_extents = xen_start_info->nr_pages - max_pfn, + .extent_order = 0, + .domid = DOMID_SELF + }; + memcpy(phys_to_machine_mapping, - (unsigned int *)xen_start_info.mfn_list, - max_pfn * sizeof(unsigned int)); - /* N.B. below relies on sizeof(int) == sizeof(long). */ - if (HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, - (unsigned long *)xen_start_info.mfn_list + max_pfn, - xen_start_info.nr_pages - max_pfn, 0) != - (xen_start_info.nr_pages - max_pfn)) BUG(); + (unsigned long *)xen_start_info->mfn_list, + max_pfn * sizeof(unsigned long)); + BUG_ON(HYPERVISOR_memory_op( + XENMEM_decrease_reservation, + &reservation) != + (xen_start_info->nr_pages - max_pfn)); } free_bootmem( - __pa(xen_start_info.mfn_list), - PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(unsigned int)))); - } - - pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); - for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned int)), j++ ) - { - pfn_to_mfn_frame_list[j] = - virt_to_mfn(&phys_to_machine_mapping[i]); - } - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = - virt_to_mfn(pfn_to_mfn_frame_list); + __pa(xen_start_info->mfn_list), + PFN_PHYS(PFN_UP(xen_start_info->nr_pages * + sizeof(unsigned long)))); + } + + + /* + * Initialise the list of the frames that specify the list of + * frames that make up the p2m table. Used by save/restore + */ + pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE); + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(pfn_to_mfn_frame_list_list); + + fpp = PAGE_SIZE/sizeof(unsigned long); + for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ ) + { + if ( (j % fpp) == 0 ) + { + k++; + BUG_ON(k>=16); + pfn_to_mfn_frame_list[k] = alloc_bootmem_low_pages(PAGE_SIZE); + pfn_to_mfn_frame_list_list[k] = + virt_to_mfn(pfn_to_mfn_frame_list[k]); + j=0; + } + pfn_to_mfn_frame_list[k][j] = + virt_to_mfn(&phys_to_machine_mapping[i]); + } + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; /* * NOTE: at this point the bootmem allocator is fully available. @@ -1626,8 +1712,8 @@ } #endif - - dmi_scan_machine(); + if (xen_start_info->flags & SIF_INITDOMAIN) + dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH generic_apic_probe(*cmdline_p); @@ -1640,7 +1726,7 @@ HYPERVISOR_physdev_op(&op); #ifdef CONFIG_ACPI_BOOT - if (!(xen_start_info.flags & SIF_INITDOMAIN)) { + if (!(xen_start_info->flags & SIF_INITDOMAIN)) { printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); acpi_disabled = 1; acpi_ht = 0; @@ -1666,8 +1752,8 @@ register_memory(); - if (xen_start_info.flags & SIF_INITDOMAIN) { - if (!(xen_start_info.flags & SIF_PRIVILEGED)) + if (xen_start_info->flags & SIF_INITDOMAIN) { + if (!(xen_start_info->flags & SIF_PRIVILEGED)) panic("Xen granted us console access " "but not privileged status"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Fri Sep 9 16:30:54 2005 @@ -856,9 +856,6 @@ cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO); BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE); cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; - printk("GDT: copying %d bytes from %lx to %lx\n", - cpu_gdt_descr[0].size, cpu_gdt_descr[0].address, - cpu_gdt_descr[cpu].address); memcpy((void *)cpu_gdt_descr[cpu].address, (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size); @@ -1274,6 +1271,7 @@ printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); smp_num_siblings = siblings; } + if (c->x86_num_cores > 1) { for (i = 0; i < NR_CPUS; i++) { if (!cpu_isset(i, cpu_callout_map)) @@ -1601,32 +1599,71 @@ void smp_suspend(void) { - /* XXX todo: take down time and ipi's on all cpus */ local_teardown_timer_irq(); smp_intr_exit(); } void smp_resume(void) { - /* XXX todo: restore time and ipi's on all cpus */ smp_intr_init(); local_setup_timer_irq(); } -DECLARE_PER_CPU(int, timer_irq); - -void _restore_vcpu(void) -{ - int cpu = smp_processor_id(); - extern atomic_t vcpus_rebooting; - - /* We are the first thing the vcpu runs when it comes back, - and we are supposed to restore the IPIs and timer - interrupts etc. When we return, the vcpu's idle loop will - start up again. */ - _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu)); - _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu)); - _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) ); - _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) ); +static atomic_t vcpus_rebooting; + +static void restore_vcpu_ready(void) +{ + atomic_dec(&vcpus_rebooting); } + +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ + int r; + int gdt_pages; + r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); + if (r != 0) + panic("pickling vcpu %d -> %d!\n", vcpu, r); + + /* Translate from machine to physical addresses where necessary, + so that they can be translated to our new machine address space + after resume. libxc is responsible for doing this to vcpu0, + but we do it to the others. */ + gdt_pages = (ctxt->gdt_ents + 511) / 512; + ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); + for (r = 0; r < gdt_pages; r++) + ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); +} + +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ + int r; + int gdt_pages = (ctxt->gdt_ents + 511) / 512; + + /* This is kind of a hack, and implicitly relies on the fact that + the vcpu stops in a place where all of the call clobbered + registers are already dead. */ + ctxt->user_regs.esp -= 4; + ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; + ctxt->user_regs.eip = (unsigned long)restore_vcpu_ready; + + /* De-canonicalise. libxc handles this for vcpu 0, but we need + to do it for the other vcpus. */ + ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); + for (r = 0; r < gdt_pages; r++) + ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); + + atomic_set(&vcpus_rebooting, 1); + r = HYPERVISOR_boot_vcpu(vcpu, ctxt); + if (r != 0) { + printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); + return -1; + } + + /* Make sure we wait for the new vcpu to come up before trying to do + anything with it or starting the next one. */ + while (atomic_read(&vcpus_rebooting)) + barrier(); + + return 0; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Fri Sep 9 16:30:54 2005 @@ -51,7 +51,7 @@ * swiotlb_sync_single_*, to see if the memory was in fact allocated by this * API. */ -static dma_addr_t iotlb_bus_start, iotlb_bus_mask; +static dma_addr_t iotlb_bus_start, iotlb_bus_end, iotlb_bus_mask; /* Does the given dma address reside within the swiotlb aperture? */ #define in_swiotlb_aperture(a) (!(((a) ^ iotlb_bus_start) & iotlb_bus_mask)) @@ -157,6 +157,7 @@ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); iotlb_bus_start = virt_to_bus(iotlb_virt_start); + iotlb_bus_end = iotlb_bus_start + bytes; iotlb_bus_mask = ~(dma_addr_t)(bytes - 1); printk(KERN_INFO "Software IO TLB enabled: \n" @@ -165,7 +166,7 @@ " Kernel range: 0x%016lx - 0x%016lx\n", bytes >> 20, (unsigned long)iotlb_bus_start, - (unsigned long)iotlb_bus_start + bytes, + (unsigned long)iotlb_bus_end, (unsigned long)iotlb_virt_start, (unsigned long)iotlb_virt_start + bytes); } @@ -181,7 +182,7 @@ * Otherwise, enable for domain 0 if the machine has 'lots of memory', * which we take to mean more than 2GB. */ - if (xen_start_info.flags & SIF_INITDOMAIN) { + if (xen_start_info->flags & SIF_INITDOMAIN) { dom0_op_t op; op.cmd = DOM0_PHYSINFO; if ((HYPERVISOR_dom0_op(&op) == 0) && @@ -191,6 +192,8 @@ if (swiotlb) swiotlb_init_with_default_size(64 * (1<<20)); + else + printk(KERN_INFO "Software IO TLB disabled\n"); } static void @@ -424,13 +427,6 @@ } dev_addr = virt_to_bus(map); - - /* - * Ensure that the address returned is DMA'ble - */ - if (address_needs_mapping(hwdev, dev_addr)) - panic("map_single: bounce buffer is not DMA'ble"); - return dev_addr; } @@ -632,7 +628,7 @@ int swiotlb_dma_supported (struct device *hwdev, u64 mask) { - return (mask >= 0xffffffffUL); + return (mask >= (iotlb_bus_end - 1)); } EXPORT_SYMBOL(swiotlb_init); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Fri Sep 9 16:30:54 2005 @@ -445,7 +445,7 @@ sec = tv->tv_sec; __normalize_time(&sec, &nsec); - if ((xen_start_info.flags & SIF_INITDOMAIN) && + if ((xen_start_info->flags & SIF_INITDOMAIN) && !independent_wallclock) { op.cmd = DOM0_SETTIME; op.u.settime.secs = sec; @@ -476,7 +476,7 @@ WARN_ON(irqs_disabled()); - if (!(xen_start_info.flags & SIF_INITDOMAIN)) + if (!(xen_start_info->flags & SIF_INITDOMAIN)) return 0; /* gets recalled with irq locally disabled */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Fri Sep 9 16:30:54 2005 @@ -449,10 +449,10 @@ DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) #ifdef CONFIG_X86_MCE DO_ERROR(18, SIGBUS, "machine check", machine_check) #endif +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) fastcall void do_general_protection(struct pt_regs * regs, long error_code) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c Fri Sep 9 16:30:54 2005 @@ -588,7 +588,15 @@ pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; +#ifndef CONFIG_XEN set_pmd(pmd, *pmd_k); +#else + /* + * When running on Xen we must launder *pmd_k through + * pmd_val() to ensure that _PAGE_PRESENT is correctly set. + */ + set_pmd(pmd, __pmd(pmd_val(*pmd_k))); +#endif pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Fri Sep 9 16:30:54 2005 @@ -35,6 +35,7 @@ #include <asm/pgtable.h> #include <asm-xen/hypervisor.h> #include <asm-xen/balloon.h> +#include <asm-xen/xen-public/memory.h> #include <linux/module.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) #include <linux/percpu.h> @@ -105,7 +106,7 @@ void xen_machphys_update(unsigned long mfn, unsigned long pfn) { mmu_update_t u; - u.ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; u.val = pfn; BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } @@ -320,6 +321,12 @@ pmd_t *pmd; pte_t *pte; unsigned long mfn, i, flags; + struct xen_memory_reservation reservation = { + .extent_start = &mfn, + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; scrub_pages(vstart, 1 << order); @@ -336,13 +343,15 @@ vstart + (i*PAGE_SIZE), __pte_ma(0), 0)); phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = INVALID_P2M_ENTRY; - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, &mfn, 1, 0) != 1); + BUG_ON(HYPERVISOR_memory_op( + XENMEM_decrease_reservation, &reservation) != 1); } /* 2. Get a new contiguous memory extent. */ - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_increase_reservation, &mfn, 1, order | (32<<8)) != 1); + reservation.extent_order = order; + reservation.address_bits = 31; /* aacraid limitation */ + BUG_ON(HYPERVISOR_memory_op( + XENMEM_increase_reservation, &reservation) != 1); /* 3. Map the new extent in place of old pages. */ for (i = 0; i < (1<<order); i++) { @@ -367,6 +376,12 @@ pmd_t *pmd; pte_t *pte; unsigned long mfn, i, flags; + struct xen_memory_reservation reservation = { + .extent_start = &mfn, + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; scrub_pages(vstart, 1 << order); @@ -385,14 +400,14 @@ vstart + (i*PAGE_SIZE), __pte_ma(0), 0)); phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = INVALID_P2M_ENTRY; - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, &mfn, 1, 0) != 1); + BUG_ON(HYPERVISOR_memory_op( + XENMEM_decrease_reservation, &reservation) != 1); } /* 2. Map new pages in place of old pages. */ for (i = 0; i < (1<<order); i++) { - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_increase_reservation, &mfn, 1, 0) != 1); + BUG_ON(HYPERVISOR_memory_op( + XENMEM_increase_reservation, &reservation) != 1); BUG_ON(HYPERVISOR_update_va_mapping( vstart + (i*PAGE_SIZE), pfn_pte_ma(mfn, PAGE_KERNEL), 0)); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Fri Sep 9 16:30:54 2005 @@ -159,7 +159,7 @@ pte_t *pte; int pgd_idx, pmd_idx, pte_ofs; - unsigned long max_ram_pfn = xen_start_info.nr_pages; + unsigned long max_ram_pfn = xen_start_info->nr_pages; if (max_ram_pfn > max_low_pfn) max_ram_pfn = max_low_pfn; @@ -219,6 +219,8 @@ } } +#ifndef CONFIG_XEN + static inline int page_kills_ppro(unsigned long pagenr) { if (pagenr >= 0x70000 && pagenr <= 0x7003F) @@ -266,6 +268,13 @@ return 0; } +#else /* CONFIG_XEN */ + +#define page_kills_ppro(p) 0 +#define page_is_ram(p) 1 + +#endif + #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; pgprot_t kmap_prot; @@ -308,7 +317,7 @@ ClearPageReserved(page); set_bit(PG_highmem, &page->flags); set_page_count(page, 1); - if (pfn < xen_start_info.nr_pages) + if (pfn < xen_start_info->nr_pages) __free_page(page); totalhigh_pages++; } else @@ -347,7 +356,7 @@ static void __init pagetable_init (void) { unsigned long vaddr; - pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base; + pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base; int i; swapper_pg_dir = pgd_base; @@ -526,14 +535,14 @@ kmap_init(); /* Switch to the real shared_info page, and clear the dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); memset(empty_zero_page, 0, sizeof(empty_zero_page)); #ifdef CONFIG_XEN_PHYSDEV_ACCESS /* Setup mapping of lower 1st MB */ for (i = 0; i < NR_FIX_ISAMAPS; i++) - if (xen_start_info.flags & SIF_PRIVILEGED) + if (xen_start_info->flags & SIF_PRIVILEGED) set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); else __set_fixmap(FIX_ISAMAP_BEGIN - i, @@ -630,7 +639,7 @@ /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); /* XEN: init and count low-mem pages outside initial allocation. */ - for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) { + for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) { ClearPageReserved(&mem_map[pfn]); set_page_count(&mem_map[pfn], 1); totalram_pages++; diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Fri Sep 9 16:30:54 2005 @@ -19,295 +19,17 @@ #include <asm/pgtable.h> #include <asm/pgalloc.h> -#ifndef CONFIG_XEN_PHYSDEV_ACCESS - -void * __ioremap(unsigned long phys_addr, unsigned long size, - unsigned long flags) -{ - return NULL; -} - -void *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - return NULL; -} - -void iounmap(volatile void __iomem *addr) -{ -} - -#ifdef __i386__ - -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) -{ - return NULL; -} - -void __init bt_iounmap(void *addr, unsigned long size) -{ -} - -#endif /* __i386__ */ - -#else - -/* - * Does @address reside within a non-highmem page that is local to this virtual - * machine (i.e., not an I/O page, nor a memory page belonging to another VM). - * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand - * why this works. - */ -static inline int is_local_lowmem(unsigned long address) -{ - extern unsigned long max_low_pfn; - unsigned long mfn = address >> PAGE_SHIFT; - unsigned long pfn = mfn_to_pfn(mfn); - return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn)); -} - -/* - * Generic mapping function (not visible outside): - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) -{ - void __iomem * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - domid_t domid = DOMID_IO; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= 0x0 && last_addr < 0x100000) - return isa_bus_to_virt(phys_addr); -#endif - - /* - * Don't allow anybody to remap normal RAM that we're using.. - */ - if (is_local_lowmem(phys_addr)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = bus_to_virt(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; - - domid = DOMID_SELF; - } - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - phys_addr; - - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); - if (!area) - return NULL; - area->phys_addr = phys_addr; - addr = (void __iomem *) area->addr; - flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; -#ifdef __x86_64__ - flags |= _PAGE_USER; -#endif - if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, - size, __pgprot(flags), domid)) { - vunmap((void __force *) addr); - return NULL; - } - return (void __iomem *) (offset + (char __iomem *)addr); -} - - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - * - * Must be freed with iounmap. - */ - -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - unsigned long last_addr; - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); - if (!p) - return p; - - /* Guaranteed to be > phys_addr, as per __ioremap() */ - last_addr = phys_addr + size - 1; - - if (is_local_lowmem(last_addr)) { - struct page *ppage = virt_to_page(bus_to_virt(phys_addr)); - unsigned long npages; - - phys_addr &= PAGE_MASK; - - /* This might overflow and become zero.. */ - last_addr = PAGE_ALIGN(last_addr); - - /* .. but that's ok, because modulo-2**n arithmetic will make - * the page-aligned "last - first" come out right. - */ - npages = (last_addr - phys_addr) >> PAGE_SHIFT; - - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { - iounmap(p); - p = NULL; - } - global_flush_tlb(); - } - - return p; -} - -void iounmap(volatile void __iomem *addr) -{ - struct vm_struct *p; - if ((void __force *) addr <= high_memory) - return; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) - return; -#endif - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); - if (!p) { - printk("__iounmap: bad address %p\n", addr); - return; - } - - if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) { - /* p->size includes the guard page, but cpa doesn't like that */ - change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)), - (p->size - PAGE_SIZE) >> PAGE_SHIFT, - PAGE_KERNEL); - global_flush_tlb(); - } - kfree(p); -} - -#ifdef __i386__ - -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) -{ - unsigned long offset, last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= 0x0 && last_addr < 0x100000) - return isa_bus_to_virt(phys_addr); -#endif - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) - return NULL; - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - set_fixmap(idx, phys_addr); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); -} - -void __init bt_iounmap(void *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) - return; -#ifdef CONFIG_XEN_PRIVILEGED_GUEST - if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) - return; -#endif - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - clear_fixmap(idx); - --idx; - --nrpages; - } -} - -#endif /* __i386__ */ - -#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ - +#define ISA_START_ADDRESS 0x0 +#define ISA_END_ADDRESS 0x100000 + +#if 0 /* not PAE safe */ /* These hacky macros avoid phys->machine translations. */ #define __direct_pte(x) ((pte_t) { (x) } ) #define __direct_mk_pte(page_nr,pgprot) \ __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) #define direct_mk_pte_phys(physpage, pgprot) \ __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) - +#endif static int direct_remap_area_pte_fn(pte_t *pte, struct page *pte_page, @@ -316,16 +38,16 @@ { mmu_update_t **v = (mmu_update_t **)data; - (*v)->ptr = ((maddr_t)pfn_to_mfn(page_to_pfn(pte_page)) << + (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK); (*v)++; return 0; } -int direct_remap_area_pages(struct mm_struct *mm, +int direct_remap_pfn_range(struct mm_struct *mm, unsigned long address, - unsigned long machine_addr, + unsigned long mfn, unsigned long size, pgprot_t prot, domid_t domid) @@ -356,9 +78,9 @@ * Fill in the machine address: PTE ptr is done later by * __direct_remap_area_pages(). */ - v->val = pte_val_ma(pfn_pte_ma(machine_addr >> PAGE_SHIFT, prot)); - - machine_addr += PAGE_SIZE; + v->val = pte_val_ma(pfn_pte_ma(mfn, prot)); + + mfn++; address += PAGE_SIZE; v++; } @@ -376,8 +98,10 @@ return 0; } -EXPORT_SYMBOL(direct_remap_area_pages); - +EXPORT_SYMBOL(direct_remap_pfn_range); + + +/* FIXME: This is horribly broken on PAE */ static int lookup_pte_fn( pte_t *pte, struct page *pte_page, unsigned long addr, void *data) { @@ -412,6 +136,292 @@ } EXPORT_SYMBOL(touch_pte_range); + +#ifdef CONFIG_XEN_PHYSDEV_ACCESS + +/* + * Does @address reside within a non-highmem page that is local to this virtual + * machine (i.e., not an I/O page, nor a memory page belonging to another VM). + * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand + * why this works. + */ +static inline int is_local_lowmem(unsigned long address) +{ + extern unsigned long max_low_pfn; + unsigned long mfn = address >> PAGE_SHIFT; + unsigned long pfn = mfn_to_pfn(mfn); + return ((pfn < max_low_pfn) && (phys_to_machine_mapping[pfn] == mfn)); +} + +/* + * Generic mapping function (not visible outside): + */ + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +{ + void __iomem * addr; + struct vm_struct * area; + unsigned long offset, last_addr; + domid_t domid = DOMID_IO; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Don't remap the low PCI/ISA area, it's always mapped.. + */ + if (xen_start_info->flags & SIF_PRIVILEGED && + phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + return (void __iomem *) isa_bus_to_virt(phys_addr); + + /* + * Don't allow anybody to remap normal RAM that we're using.. + */ + if (is_local_lowmem(phys_addr)) { + char *t_addr, *t_end; + struct page *page; + + t_addr = bus_to_virt(phys_addr); + t_end = t_addr + (size - 1); + + for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) + if(!PageReserved(page)) + return NULL; + + domid = DOMID_SELF; + } + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr+1) - phys_addr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP | (flags << 20)); + if (!area) + return NULL; + area->phys_addr = phys_addr; + addr = (void __iomem *) area->addr; + flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; +#ifdef __x86_64__ + flags |= _PAGE_USER; +#endif + if (direct_remap_pfn_range(&init_mm, (unsigned long) addr, phys_addr>>PAGE_SHIFT, + size, __pgprot(flags), domid)) { + vunmap((void __force *) addr); + return NULL; + } + return (void __iomem *) (offset + (char __iomem *)addr); +} + + +/** + * ioremap_nocache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_nocache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked uncachable + * on the CPU as well as honouring existing caching rules from things like + * the PCI bus. Note that there are other caches and buffers on many + * busses. In particular driver authors should read up on PCI writes + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + * + * Must be freed with iounmap. + */ + +void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) +{ + unsigned long last_addr; + void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); + if (!p) + return p; + + /* Guaranteed to be > phys_addr, as per __ioremap() */ + last_addr = phys_addr + size - 1; + + if (is_local_lowmem(last_addr)) { + struct page *ppage = virt_to_page(bus_to_virt(phys_addr)); + unsigned long npages; + + phys_addr &= PAGE_MASK; + + /* This might overflow and become zero.. */ + last_addr = PAGE_ALIGN(last_addr); + + /* .. but that's ok, because modulo-2**n arithmetic will make + * the page-aligned "last - first" come out right. + */ + npages = (last_addr - phys_addr) >> PAGE_SHIFT; + + if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { + iounmap(p); + p = NULL; + } + global_flush_tlb(); + } + + return p; +} + +void iounmap(volatile void __iomem *addr) +{ + struct vm_struct *p; + if ((void __force *) addr <= high_memory) + return; + + /* + * __ioremap special-cases the PCI/ISA range by not instantiating a + * vm_area and by simply returning an address into the kernel mapping + * of ISA space. So handle that here. + */ + if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) + return; + + write_lock(&vmlist_lock); + p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); + if (!p) { + printk("iounmap: bad address %p\n", addr); + goto out_unlock; + } + + if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) { + /* p->size includes the guard page, but cpa doesn't like that */ + change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)), + (p->size - PAGE_SIZE) >> PAGE_SHIFT, + PAGE_KERNEL); + global_flush_tlb(); + } +out_unlock: + write_unlock(&vmlist_lock); + kfree(p); +} + +#ifdef __i386__ + +void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) +{ + unsigned long offset, last_addr; + unsigned int nrpages; + enum fixed_addresses idx; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Don't remap the low PCI/ISA area, it's always mapped.. + */ + if (xen_start_info->flags & SIF_PRIVILEGED && + phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + return isa_bus_to_virt(phys_addr); + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - phys_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. + */ + nrpages = size >> PAGE_SHIFT; + if (nrpages > NR_FIX_BTMAPS) + return NULL; + + /* + * Ok, go for it.. + */ + idx = FIX_BTMAP_BEGIN; + while (nrpages > 0) { + set_fixmap(idx, phys_addr); + phys_addr += PAGE_SIZE; + --idx; + --nrpages; + } + return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); +} + +void __init bt_iounmap(void *addr, unsigned long size) +{ + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; + + virt_addr = (unsigned long)addr; + if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) + return; + if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN)) + return; + offset = virt_addr & ~PAGE_MASK; + nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; + + idx = FIX_BTMAP_BEGIN; + while (nrpages > 0) { + clear_fixmap(idx); + --idx; + --nrpages; + } +} + +#endif /* __i386__ */ + +#else /* CONFIG_XEN_PHYSDEV_ACCESS */ + +void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, + unsigned long flags) +{ + return NULL; +} + +void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) +{ + return NULL; +} + +void iounmap(volatile void __iomem *addr) +{ +} + +#ifdef __i386__ + +void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) +{ + return NULL; +} + +void __init bt_iounmap(void *addr, unsigned long size) +{ +} + +#endif /* __i386__ */ + +#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ /* * Local variables: diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Fri Sep 9 16:30:54 2005 @@ -4,7 +4,7 @@ c-obj-y := i386.o -c-obj-$(CONFIG_PCI_BIOS) += pcbios.o +#c-obj-$(CONFIG_PCI_BIOS) += pcbios.o c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o c-obj-$(CONFIG_PCI_DIRECT) += direct.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Fri Sep 9 16:30:54 2005 @@ -11,7 +11,7 @@ extra-y += vmlinux.lds -obj-y := ctrl_if.o evtchn.o fixup.o reboot.o gnttab.o devmem.o +obj-y := evtchn.o fixup.o reboot.o gnttab.o devmem.o obj-$(CONFIG_PROC_FS) += xen_proc.o obj-$(CONFIG_NET) += skbuff.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Fri Sep 9 16:30:54 2005 @@ -40,16 +40,8 @@ #include <asm-xen/synch_bitops.h> #include <asm-xen/xen-public/event_channel.h> #include <asm-xen/xen-public/physdev.h> -#include <asm-xen/ctrl_if.h> #include <asm-xen/hypervisor.h> #include <asm-xen/evtchn.h> - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -EXPORT_SYMBOL(force_evtchn_callback); -EXPORT_SYMBOL(evtchn_do_upcall); -EXPORT_SYMBOL(bind_evtchn_to_irq); -EXPORT_SYMBOL(unbind_evtchn_from_irq); -#endif /* * This lock protects updates to the following mapping and reference-count @@ -133,6 +125,7 @@ { (void)HYPERVISOR_xen_version(0); } +EXPORT_SYMBOL(force_evtchn_callback); /* NB. Interrupts are disabled on entry. */ asmlinkage void evtchn_do_upcall(struct pt_regs *regs) @@ -165,6 +158,7 @@ } } } +EXPORT_SYMBOL(evtchn_do_upcall); static int find_unbound_irq(void) { @@ -211,6 +205,7 @@ return irq; } +EXPORT_SYMBOL(bind_virq_to_irq); void unbind_virq_from_irq(int virq) { @@ -244,74 +239,7 @@ spin_unlock(&irq_mapping_update_lock); } - -/* This is only used when a vcpu from an xm save. The ipi is expected - to have been bound before we suspended, and so all of the xenolinux - state is set up; we only need to restore the Xen side of things. - The irq number has to be the same, but the evtchn number can - change. */ -void _bind_ipi_to_irq(int ipi, int vcpu, int irq) -{ - evtchn_op_t op; - int evtchn; - - spin_lock(&irq_mapping_update_lock); - - op.cmd = EVTCHNOP_bind_ipi; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu); - evtchn = op.u.bind_ipi.port; - - printk("<0>IPI %d, old evtchn %d, evtchn %d.\n", - ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi], - evtchn); - - evtchn_to_irq[irq_to_evtchn[irq]] = -1; - irq_to_evtchn[irq] = -1; - - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn, - evtchn_to_irq[evtchn]); - per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn; - - bind_evtchn_to_cpu(evtchn, vcpu); - - spin_unlock(&irq_mapping_update_lock); - - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); -} - -void _bind_virq_to_irq(int virq, int cpu, int irq) -{ - evtchn_op_t op; - int evtchn; - - spin_lock(&irq_mapping_update_lock); - - op.cmd = EVTCHNOP_bind_virq; - op.u.bind_virq.virq = virq; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to bind virtual IRQ %d\n", virq); - evtchn = op.u.bind_virq.port; - - evtchn_to_irq[irq_to_evtchn[irq]] = -1; - irq_to_evtchn[irq] = -1; - - evtchn_to_irq[evtchn] = irq; - irq_to_evtchn[irq] = evtchn; - - per_cpu(virq_to_irq, cpu)[virq] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - - spin_unlock(&irq_mapping_update_lock); - - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); -} +EXPORT_SYMBOL(unbind_virq_from_irq); int bind_ipi_to_irq(int ipi) { @@ -347,6 +275,7 @@ return irq; } +EXPORT_SYMBOL(bind_ipi_to_irq); void unbind_ipi_from_irq(int ipi) { @@ -374,6 +303,7 @@ spin_unlock(&irq_mapping_update_lock); } +EXPORT_SYMBOL(unbind_ipi_from_irq); int bind_evtchn_to_irq(unsigned int evtchn) { @@ -394,6 +324,7 @@ return irq; } +EXPORT_SYMBOL(bind_evtchn_to_irq); void unbind_evtchn_from_irq(unsigned int evtchn) { @@ -409,6 +340,7 @@ spin_unlock(&irq_mapping_update_lock); } +EXPORT_SYMBOL(unbind_evtchn_from_irq); int bind_evtchn_to_irqhandler( unsigned int evtchn, @@ -427,6 +359,7 @@ return retval; } +EXPORT_SYMBOL(bind_evtchn_to_irqhandler); void unbind_evtchn_from_irqhandler(unsigned int evtchn, void *dev_id) { @@ -434,6 +367,7 @@ free_irq(irq, dev_id); unbind_evtchn_from_irq(evtchn); } +EXPORT_SYMBOL(unbind_evtchn_from_irqhandler); #ifdef CONFIG_SMP static void do_nothing_function(void *ign) @@ -797,7 +731,4 @@ irq_desc[pirq_to_irq(i)].depth = 1; irq_desc[pirq_to_irq(i)].handler = &pirq_type; } - - /* This needs to be done early, but after the IRQ subsystem is alive. */ - ctrl_if_init(); -} +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/kernel/reboot.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Sep 9 16:30:54 2005 @@ -12,10 +12,8 @@ #include <asm-xen/evtchn.h> #include <asm-xen/hypervisor.h> #include <asm-xen/xen-public/dom0_ops.h> -#include <asm-xen/linux-public/suspend.h> #include <asm-xen/queues.h> #include <asm-xen/xenbus.h> -#include <asm-xen/ctrl_if.h> #include <linux/cpu.h> #include <linux/kthread.h> @@ -65,69 +63,10 @@ #define cpu_up(x) (-EOPNOTSUPP) #endif -static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ - int r; - int gdt_pages; - r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); - if (r != 0) - panic("pickling vcpu %d -> %d!\n", vcpu, r); - - /* Translate from machine to physical addresses where necessary, - so that they can be translated to our new machine address space - after resume. libxc is responsible for doing this to vcpu0, - but we do it to the others. */ - gdt_pages = (ctxt->gdt_ents + 511) / 512; - ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); - for (r = 0; r < gdt_pages; r++) - ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); -} - -void _restore_vcpu(int cpu); - -atomic_t vcpus_rebooting; - -static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) -{ - int r; - int gdt_pages = (ctxt->gdt_ents + 511) / 512; - - /* This is kind of a hack, and implicitly relies on the fact that - the vcpu stops in a place where all of the call clobbered - registers are already dead. */ - ctxt->user_regs.esp -= 4; - ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; - ctxt->user_regs.eip = (unsigned long)_restore_vcpu; - - /* De-canonicalise. libxc handles this for vcpu 0, but we need - to do it for the other vcpus. */ - ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); - for (r = 0; r < gdt_pages; r++) - ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); - - atomic_set(&vcpus_rebooting, 1); - r = HYPERVISOR_boot_vcpu(vcpu, ctxt); - if (r != 0) { - printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); - return -1; - } - - /* Make sure we wait for the new vcpu to come up before trying to do - anything with it or starting the next one. */ - while (atomic_read(&vcpus_rebooting)) - barrier(); - - return 0; -} static int __do_suspend(void *ignore) { - int i, j; - suspend_record_t *suspend_record; - static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; - - /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */ - /* XXX SMH: yes it would :-( */ + int i, j, k, fpp; #ifdef CONFIG_XEN_USB_FRONTEND extern void usbif_resume(); @@ -138,16 +77,25 @@ extern int gnttab_suspend(void); extern int gnttab_resume(void); -#ifdef CONFIG_SMP - extern void smp_suspend(void); - extern void smp_resume(void); -#endif extern void time_suspend(void); extern void time_resume(void); extern unsigned long max_pfn; - extern unsigned int *pfn_to_mfn_frame_list; - + extern unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[]; + +#ifdef CONFIG_SMP + extern void smp_suspend(void); + extern void smp_resume(void); + + static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; cpumask_t prev_online_cpus, prev_present_cpus; + + void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); + int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); +#endif + + extern void xencons_suspend(void); + extern void xencons_resume(void); + int err = 0; BUG_ON(smp_processor_id() != 0); @@ -155,15 +103,14 @@ #if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) if (num_online_cpus() > 1) { - printk(KERN_WARNING "Can't suspend SMP guests without CONFIG_HOTPLUG_CPU\n"); + printk(KERN_WARNING + "Can't suspend SMP guests without CONFIG_HOTPLUG_CPU\n"); return -EOPNOTSUPP; } #endif - suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL); - if ( suspend_record == NULL ) - goto out; - + preempt_disable(); +#ifdef CONFIG_SMP /* Take all of the other cpus offline. We need to be careful not to get preempted between the final test for num_online_cpus() == 1 and disabling interrupts, since otherwise userspace could @@ -175,7 +122,6 @@ since by the time num_online_cpus() == 1, there aren't any other cpus) */ cpus_clear(prev_online_cpus); - preempt_disable(); while (num_online_cpus() > 1) { preempt_enable(); for_each_online_cpu(i) { @@ -190,13 +136,13 @@ } preempt_disable(); } - - suspend_record->nr_pfns = max_pfn; /* final number of pfns */ +#endif __cli(); preempt_enable(); +#ifdef CONFIG_SMP cpus_clear(prev_present_cpus); for_each_present_cpu(i) { if (i == 0) @@ -204,6 +150,7 @@ save_vcpu_context(i, &suspended_cpu_records[i]); cpu_set(i, prev_present_cpus); } +#endif #ifdef __i386__ mm_pin_all(); @@ -218,7 +165,7 @@ xenbus_suspend(); - ctrl_if_suspend(); + xencons_suspend(); irq_suspend(); @@ -227,37 +174,44 @@ HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; clear_fixmap(FIX_SHARED_INFO); - memcpy(&suspend_record->resume_info, &xen_start_info, - sizeof(xen_start_info)); + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn); /* We'll stop somewhere inside this hypercall. When it returns, we'll start resuming after the restore. */ - HYPERVISOR_suspend(virt_to_mfn(suspend_record)); + HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); shutting_down = SHUTDOWN_INVALID; - memcpy(&xen_start_info, &suspend_record->resume_info, - sizeof(xen_start_info)); - - set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); memset(empty_zero_page, 0, PAGE_SIZE); - - for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) + + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(pfn_to_mfn_frame_list_list); + + fpp = PAGE_SIZE/sizeof(unsigned long); + for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ ) { - pfn_to_mfn_frame_list[j] = - virt_to_mfn(&phys_to_machine_mapping[i]); - } - HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = - virt_to_mfn(pfn_to_mfn_frame_list); + if ( (j % fpp) == 0 ) + { + k++; + pfn_to_mfn_frame_list_list[k] = + virt_to_mfn(pfn_to_mfn_frame_list[k]); + j=0; + } + pfn_to_mfn_frame_list[k][j] = + virt_to_mfn(&phys_to_machine_mapping[i]); + } + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; gnttab_resume(); irq_resume(); - ctrl_if_resume(); + xencons_resume(); xenbus_resume(); @@ -269,12 +223,14 @@ usbif_resume(); - for_each_cpu_mask(i, prev_present_cpus) { +#ifdef CONFIG_SMP + for_each_cpu_mask(i, prev_present_cpus) restore_vcpu_context(i, &suspended_cpu_records[i]); - } +#endif __sti(); +#ifdef CONFIG_SMP out_reenable_cpus: for_each_cpu_mask(i, prev_online_cpus) { j = cpu_up(i); @@ -284,10 +240,8 @@ err = j; } } - - out: - if ( suspend_record != NULL ) - free_page((unsigned long)suspend_record); +#endif + return err; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Fri Sep 9 16:30:54 2005 @@ -21,12 +21,12 @@ classical 32-bit x86 architecture. For details see <http://www.x86-64.org/>. -config X86 - bool - default y - config 64BIT def_bool y + +config X86 + bool + default y config MMU bool @@ -89,10 +89,11 @@ # Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs. config MPSC - bool "Intel x86-64" + bool "Intel EM64T" help - Optimize for Intel IA32 with 64bit extension CPUs - (Prescott/Nocona/Potomac) + Optimize for Intel Pentium 4 and Xeon CPUs with Intel + Extended Memory 64 Technology(EM64T). For details see + <http://www.intel.com/technology/64bitextensions/>. config GENERIC_CPU bool "Generic-x86-64" @@ -367,7 +368,6 @@ If unsure, say Y. Only embedded should say N here. - endmenu # diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Sep 9 16:30:54 2005 @@ -40,7 +40,7 @@ i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o +#obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o c-obj-$(CONFIG_MODULES) += module.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c Fri Sep 9 16:30:54 2005 @@ -20,6 +20,9 @@ #include <asm/e820.h> #include <asm/proto.h> #include <asm/bootsetup.h> +#include <asm-xen/xen-public/memory.h> + +unsigned long pci_mem_start = 0xaeedbabe; /* * PFN of last memory page. @@ -517,14 +520,13 @@ } #else /* CONFIX_XEN */ + extern unsigned long xen_override_max_pfn; extern union xen_start_info_union xen_start_info_union; -/* - * Guest physical starts from 0. - */ + unsigned long __init e820_end_of_ram(void) { - unsigned long max_end_pfn = xen_start_info.nr_pages; + unsigned long max_end_pfn = xen_start_info->nr_pages; if ( xen_override_max_pfn < max_end_pfn) xen_override_max_pfn = max_end_pfn; @@ -532,64 +534,53 @@ return xen_override_max_pfn; } - - void __init e820_reserve_resources(void) { - return; /* Xen won't have reserved entries */ -} - -#endif - -void __init parse_memopt(char *p, char **from) -{ - end_user_pfn = memparse(p, from); - end_user_pfn >>= PAGE_SHIFT; - xen_override_max_pfn = (unsigned long) end_user_pfn; -} - -unsigned long pci_mem_start = 0xaeedbabe; - -/* - * Search for the biggest gap in the low 32 bits of the e820 - * memory space. We pass this space to PCI to assign MMIO resources - * for hotplug or unconfigured devices in. - * Hopefully the BIOS let enough space left. - */ -__init void e820_setup_gap(void) -{ - unsigned long gapstart, gapsize; - unsigned long last; - int i; - int found = 0; - - last = 0x100000000ull; + dom0_op_t op; + struct dom0_memory_map_entry *map; + unsigned long gapstart, gapsize, last; + int i, found = 0; + + if (!(xen_start_info->flags & SIF_INITDOMAIN)) + return; + + map = alloc_bootmem_low_pages(PAGE_SIZE); + op.cmd = DOM0_PHYSICAL_MEMORY_MAP; + op.u.physical_memory_map.memory_map = map; + op.u.physical_memory_map.max_map_entries = + PAGE_SIZE / sizeof(struct dom0_memory_map_entry); + BUG_ON(HYPERVISOR_dom0_op(&op)); + + last = 0x100000000ULL; gapstart = 0x10000000; gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - found = 1; - } - } - if (start < last) - last = start; - } + + for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) { + struct resource *res; + + if ((last > map[i].end) && ((last - map[i].end) > gapsize)) { + gapsize = last - map[i].end; + gapstart = map[i].end; + found = 1; + } + if (map[i].start < last) + last = map[i].start; + + if (map[i].end > 0x100000000ULL) + continue; + res = alloc_bootmem_low(sizeof(struct resource)); + res->name = map[i].is_ram ? "System RAM" : "reserved"; + res->start = map[i].start; + res->end = map[i].end - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + request_resource(&iomem_resource, res); + } + + free_bootmem(__pa(map), PAGE_SIZE); if (!found) { - gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; + HYPERVISOR_memory_op(XENMEM_maximum_ram_page, &gapstart); + gapstart = (gapstart << PAGE_SHIFT) + 1024*1024; printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); } @@ -607,3 +598,72 @@ printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); } + +#endif + +void __init parse_memopt(char *p, char **from) +{ + end_user_pfn = memparse(p, from); + end_user_pfn >>= PAGE_SHIFT; + xen_override_max_pfn = (unsigned long) end_user_pfn; +} + +/* + * Search for the biggest gap in the low 32 bits of the e820 + * memory space. We pass this space to PCI to assign MMIO resources + * for hotplug or unconfigured devices in. + * Hopefully the BIOS let enough space left. + */ +__init void e820_setup_gap(void) +{ +#ifndef CONFIG_XEN + unsigned long gapstart, gapsize; + unsigned long last; + int i; + int found = 0; + + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + found = 1; + } + } + if (start < last) + last = start; + } + + if (!found) { + gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); + } + + /* + * Start allocating dynamic PCI memory a bit into the gap, + * aligned up to the nearest megabyte. + * + * Question: should we try to pad it up a bit (do something + * like " + (gapsize >> 3)" in there too?). We now have the + * technology. + */ + pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; + + printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); +#endif +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Fri Sep 9 16:30:54 2005 @@ -40,16 +40,13 @@ .globl startup_64 startup_64: ENTRY(_start) - cld - /* Copy the necessary stuff from xen_start_info structure. */ - movq $xen_start_info_union,%rdi - movq $256,%rcx - rep movsq + movq %rsi,xen_start_info(%rip) #ifdef CONFIG_SMP - ENTRY(startup_64_smp) +ENTRY(startup_64_smp) +#endif /* CONFIG_SMP */ + cld -#endif /* CONFIG_SMP */ movq init_rsp(%rip),%rsp /* zero EFLAGS after setting rsp */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c Fri Sep 9 16:30:54 2005 @@ -90,8 +90,9 @@ { int i; - phys_to_machine_mapping = (u32 *)xen_start_info.mfn_list; - start_pfn = (__pa(xen_start_info.pt_base) >> PAGE_SHIFT) + xen_start_info.nr_pt_frames; + phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; + start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + + xen_start_info->nr_pt_frames; for (i = 0; i < 256; i++) set_intr_gate(i, early_idt_handler); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Fri Sep 9 16:30:54 2005 @@ -76,7 +76,8 @@ /* Allows setting of maximum possible memory size */ unsigned long xen_override_max_pfn; -u32 *phys_to_machine_mapping, *pfn_to_mfn_frame_list; +unsigned long *phys_to_machine_mapping; +unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[512]; EXPORT_SYMBOL(phys_to_machine_mapping); @@ -84,7 +85,7 @@ DEFINE_PER_CPU(int, nr_multicall_ents); /* Raw start-of-day parameters from the hypervisor. */ -union xen_start_info_union xen_start_info_union; +start_info_t *xen_start_info; #endif /* @@ -314,7 +315,7 @@ if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) max_cmdline = COMMAND_LINE_SIZE; - memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline); + memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); saved_command_line[max_cmdline-1] = '\0'; #else memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); @@ -687,7 +688,7 @@ #endif #ifdef CONFIG_XEN #ifdef CONFIG_BLK_DEV_INITRD - if (xen_start_info.mod_start) { + if (xen_start_info->mod_start) { if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/ initrd_start = INITRD_START + PAGE_OFFSET; @@ -730,29 +731,50 @@ #endif #ifdef CONFIG_XEN { - int i, j; + int i, j, k, fpp; /* Make sure we have a large enough P->M table. */ - if (end_pfn > xen_start_info.nr_pages) { + if (end_pfn > xen_start_info->nr_pages) { phys_to_machine_mapping = alloc_bootmem( - max_pfn * sizeof(u32)); + end_pfn * sizeof(unsigned long)); memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(u32)); + end_pfn * sizeof(unsigned long)); memcpy(phys_to_machine_mapping, - (u32 *)xen_start_info.mfn_list, - xen_start_info.nr_pages * sizeof(u32)); + (unsigned long *)xen_start_info->mfn_list, + xen_start_info->nr_pages * sizeof(unsigned long)); free_bootmem( - __pa(xen_start_info.mfn_list), - PFN_PHYS(PFN_UP(xen_start_info.nr_pages * - sizeof(u32)))); - } - - pfn_to_mfn_frame_list = alloc_bootmem(PAGE_SIZE); - - for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(u32)), j++ ) - { - pfn_to_mfn_frame_list[j] = + __pa(xen_start_info->mfn_list), + PFN_PHYS(PFN_UP(xen_start_info->nr_pages * + sizeof(unsigned long)))); + } + + /* + * Initialise the list of the frames that specify the list of + * frames that make up the p2m table. Used by save/restore + */ + pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE); + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + virt_to_mfn(pfn_to_mfn_frame_list_list); + + fpp = PAGE_SIZE/sizeof(unsigned long); + for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ ) + { + if ( (j % fpp) == 0 ) + { + k++; + BUG_ON(k>=fpp); + pfn_to_mfn_frame_list[k] = alloc_bootmem(PAGE_SIZE); + pfn_to_mfn_frame_list_list[k] = + virt_to_mfn(pfn_to_mfn_frame_list[k]); + j=0; + } + pfn_to_mfn_frame_list[k][j] = virt_to_mfn(&phys_to_machine_mapping[i]); } + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; + + + + } #endif @@ -817,8 +839,8 @@ op.u.set_iopl.iopl = 1; HYPERVISOR_physdev_op(&op); - if (xen_start_info.flags & SIF_INITDOMAIN) { - if (!(xen_start_info.flags & SIF_PRIVILEGED)) + if (xen_start_info->flags & SIF_INITDOMAIN) { + if (!(xen_start_info->flags & SIF_PRIVILEGED)) panic("Xen granted us console access " "but not privileged status"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Fri Sep 9 16:30:54 2005 @@ -1277,21 +1277,23 @@ void smp_suspend(void) { - /* XXX todo: take down time and ipi's on all cpus */ local_teardown_timer_irq(); smp_intr_exit(); } void smp_resume(void) { - /* XXX todo: restore time and ipi's on all cpus */ smp_intr_init(); local_setup_timer_irq(); } -void _restore_vcpu(void) -{ - /* XXX need to write this */ -} - -#endif +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ +} + +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) +{ + return 0; +} + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Fri Sep 9 16:30:54 2005 @@ -149,7 +149,7 @@ pmd_t *pmd; pte_t *pte; - pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id()); + pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id()); pgd += pgd_index(address); printk("PGD %lx ", pgd_val(*pgd)); @@ -296,9 +296,9 @@ #define MEM_VERBOSE 1 #ifdef MEM_VERBOSE -#define MEM_LOG(_f, _a...) \ - printk("fault.c:[%d]-> " _f "\n", \ - __LINE__ , ## _a ) +#define MEM_LOG(_f, _a...) \ + printk("fault.c:[%d]-> " _f "\n", \ + __LINE__ , ## _a ) #else #define MEM_LOG(_f, _a...) ((void)0) #endif @@ -325,7 +325,7 @@ siginfo_t info; if (!user_mode(regs)) - error_code &= ~4; /* means kernel */ + error_code &= ~4; /* means kernel */ #ifdef CONFIG_CHECKING { diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Sep 9 16:30:54 2005 @@ -62,14 +62,16 @@ * avaialble in init_memory_mapping(). */ -#define addr_to_page(addr, page) \ - (addr) &= PHYSICAL_PAGE_MASK; \ - (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map))) +#define addr_to_page(addr, page) \ + (addr) &= PHYSICAL_PAGE_MASK; \ + (page) = ((unsigned long *) ((unsigned long) \ + (((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + \ + __START_KERNEL_map))) static void __make_page_readonly(unsigned long va) { - unsigned long addr; - pte_t pte, *ptep; + unsigned long addr; + pte_t pte, *ptep; unsigned long *page = (unsigned long *) init_level4_pgt; addr = (unsigned long) page[pgd_index(va)]; @@ -89,22 +91,22 @@ static void __make_page_writable(unsigned long va) { - unsigned long addr; - pte_t pte, *ptep; - unsigned long *page = (unsigned long *) init_level4_pgt; - - addr = (unsigned long) page[pgd_index(va)]; - addr_to_page(addr, page); - - addr = page[pud_index(va)]; - addr_to_page(addr, page); - - addr = page[pmd_index(va)]; - addr_to_page(addr, page); - - ptep = (pte_t *) &page[pte_index(va)]; + unsigned long addr; + pte_t pte, *ptep; + unsigned long *page = (unsigned long *) init_level4_pgt; + + addr = (unsigned long) page[pgd_index(va)]; + addr_to_page(addr, page); + + addr = page[pud_index(va)]; + addr_to_page(addr, page); + + addr = page[pmd_index(va)]; + addr_to_page(addr, page); + + ptep = (pte_t *) &page[pte_index(va)]; pte.pte = (ptep->pte | _PAGE_RW); - xen_l1_entry_update(ptep, pte); + xen_l1_entry_update(ptep, pte); __flush_tlb_one(addr); } @@ -115,55 +117,55 @@ void make_page_readonly(void *va) { pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep; - unsigned long addr = (unsigned long) va; - - if (!init_mapping_done) { - __make_page_readonly(addr); - return; - } - - pgd = pgd_offset_k(addr); - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); - ptep = pte_offset_kernel(pmd, addr); + unsigned long addr = (unsigned long) va; + + if (!init_mapping_done) { + __make_page_readonly(addr); + return; + } + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + ptep = pte_offset_kernel(pmd, addr); pte.pte = (ptep->pte & ~_PAGE_RW); - xen_l1_entry_update(ptep, pte); + xen_l1_entry_update(ptep, pte); __flush_tlb_one(addr); } void make_page_writable(void *va) { - pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep; - unsigned long addr = (unsigned long) va; - - if (!init_mapping_done) { - __make_page_writable(addr); - return; - } - - pgd = pgd_offset_k(addr); - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); - ptep = pte_offset_kernel(pmd, addr); + pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep; + unsigned long addr = (unsigned long) va; + + if (!init_mapping_done) { + __make_page_writable(addr); + return; + } + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + ptep = pte_offset_kernel(pmd, addr); pte.pte = (ptep->pte | _PAGE_RW); - xen_l1_entry_update(ptep, pte); + xen_l1_entry_update(ptep, pte); __flush_tlb_one(addr); } void make_pages_readonly(void* va, unsigned nr) { - while ( nr-- != 0 ) { - make_page_readonly(va); - va = (void*)((unsigned long)va + PAGE_SIZE); - } + while (nr-- != 0) { + make_page_readonly(va); + va = (void*)((unsigned long)va + PAGE_SIZE); + } } void make_pages_writable(void* va, unsigned nr) { - while ( nr-- != 0 ) { - make_page_writable(va); - va = (void*)((unsigned long)va + PAGE_SIZE); - } + while (nr-- != 0) { + make_page_writable(va); + va = (void*)((unsigned long)va + PAGE_SIZE); + } } /* @@ -389,7 +391,7 @@ set_pte_phys(address, phys, prot, SET_FIXMAP_USER); } -unsigned long __initdata table_start, table_end, tables_space; +unsigned long __initdata table_start, tables_space; unsigned long get_machine_pfn(unsigned long addr) { @@ -400,40 +402,15 @@ return pte_mfn(*pte); } -#define ALIGN_TO_4K __attribute__((section(".data.page_aligned"))) -#define MAX_LOW_PAGES 0x20 -static unsigned long __init_pgt[MAX_LOW_PAGES][512] ALIGN_TO_4K; -static int __init_pgt_index; - -/* - * We start using from start_pfn - */ static __init void *alloc_static_page(unsigned long *phys) { - int i = __init_pgt_index++; - - if (__init_pgt_index >= MAX_LOW_PAGES) { - printk("Need to increase MAX_LOW_PAGES"); - BUG(); - } - - *phys = __pa(__init_pgt[i]); - - return (void *) __init_pgt[i]; + unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map; + *phys = start_pfn << PAGE_SHIFT; + start_pfn++; + memset((void *)va, 0, PAGE_SIZE); + return (void *)va; } -/* - * Get RO page - */ -static void __init *alloc_low_page(unsigned long *phys) -{ - unsigned long pfn = table_end++; - - *phys = (pfn << PAGE_SHIFT); - memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0, PAGE_SIZE); - return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map); -} - #define PTE_SIZE PAGE_SIZE static inline void __set_pte(pte_t *dst, pte_t val) @@ -443,30 +420,24 @@ static inline int make_readonly(unsigned long paddr) { - int readonly = 0; - - /* Make new page tables read-only. */ - if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) && - (paddr >= (table_start << PAGE_SHIFT))) - readonly = 1; - - /* Make old page tables read-only. */ - if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) + - (xen_start_info.nr_pt_frames << PAGE_SHIFT))) && - (paddr >= (xen_start_info.pt_base - __START_KERNEL_map))) - readonly = 1; - - /* - * No need for writable mapping of kernel image. This also ensures that - * page and descriptor tables embedded inside don't have writable mappings. - */ - if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))) - readonly = 1; - - return readonly; -} - -void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) + int readonly = 0; + + /* Make old and new page tables read-only. */ + if ((paddr >= (xen_start_info->pt_base - __START_KERNEL_map)) + && (paddr < ((table_start << PAGE_SHIFT) + tables_space))) + readonly = 1; + /* + * No need for writable mapping of kernel image. This also ensures that + * page and descriptor tables embedded inside don't have writable + * mappings. + */ + if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))) + readonly = 1; + + return readonly; +} + +static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) { long i, j, k; unsigned long paddr; @@ -485,7 +456,7 @@ break; } - pmd = alloc_low_page(&pmd_phys); + pmd = alloc_static_page(&pmd_phys); make_page_readonly(pmd); xen_pmd_pin(pmd_phys); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); @@ -499,18 +470,19 @@ set_pmd(pmd, __pmd(0)); break; } - pte = alloc_low_page(&pte_phys); + pte = alloc_static_page(&pte_phys); pte_save = pte; for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) { + if ((paddr >= end) || + ((paddr >> PAGE_SHIFT) >= + xen_start_info->nr_pages)) { + __set_pte(pte, __pte(0)); + continue; + } if (make_readonly(paddr)) { __set_pte(pte, __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW))); continue; - } - if (paddr >= end) { - for (; k < PTRS_PER_PTE; k++, pte++) - __set_pte(pte, __pte(0)); - break; } __set_pte(pte, __pte(paddr | _KERNPG_TABLE)); } @@ -525,15 +497,16 @@ static void __init find_early_table_space(unsigned long end) { - unsigned long puds, pmds, ptes; + unsigned long puds, pmds, ptes; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT; - - tables_space = round_up(puds * 8, PAGE_SIZE) + - round_up(pmds * 8, PAGE_SIZE) + - round_up(ptes * 8, PAGE_SIZE); + ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT; + + tables_space = + round_up(puds * 8, PAGE_SIZE) + + round_up(pmds * 8, PAGE_SIZE) + + round_up(ptes * 8, PAGE_SIZE); } void __init xen_init_pt(void) @@ -549,7 +522,7 @@ memset((void *)level2_kernel_pgt, 0, PAGE_SIZE); /* Find the initial pte page that was built for us. */ - page = (unsigned long *)xen_start_info.pt_base; + page = (unsigned long *)xen_start_info->pt_base; addr = page[pgd_index(__START_KERNEL_map)]; addr_to_page(addr, page); addr = page[pud_index(__START_KERNEL_map)]; @@ -579,65 +552,58 @@ mk_kernel_pgd(__pa_symbol(level3_user_pgt))); } -/* - * Extend kernel mapping to access pages for page tables. The initial - * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the - * mapping for early initialization. - */ -static unsigned long current_size, extended_size; - void __init extend_init_mapping(void) { unsigned long va = __START_KERNEL_map; unsigned long phys, addr, *pte_page; - pmd_t *pmd; + pmd_t *pmd; pte_t *pte, new_pte; - unsigned long *page = (unsigned long *) init_level4_pgt; - int i; + unsigned long *page = (unsigned long *)init_level4_pgt; addr = page[pgd_index(va)]; addr_to_page(addr, page); addr = page[pud_index(va)]; addr_to_page(addr, page); - for (;;) { + /* Kill mapping of low 1MB. */ + while (va < (unsigned long)&_text) { + HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); + va += PAGE_SIZE; + } + + /* Ensure init mappings cover kernel text/data and initial tables. */ + while (va < (__START_KERNEL_map + + (start_pfn << PAGE_SHIFT) + + tables_space)) { pmd = (pmd_t *)&page[pmd_index(va)]; - if (!pmd_present(*pmd)) - break; - addr = page[pmd_index(va)]; - addr_to_page(addr, pte_page); - for (i = 0; i < PTRS_PER_PTE; i++) { - pte = (pte_t *) &pte_page[pte_index(va)]; - if (!pte_present(*pte)) - break; - va += PAGE_SIZE; - current_size += PAGE_SIZE; + if (pmd_none(*pmd)) { + pte_page = alloc_static_page(&phys); + make_page_readonly(pte_page); + xen_pte_pin(phys); + set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER)); + } else { + addr = page[pmd_index(va)]; + addr_to_page(addr, pte_page); } - } - - while (va < __START_KERNEL_map + current_size + tables_space) { - pmd = (pmd_t *) &page[pmd_index(va)]; - if (!pmd_none(*pmd)) - continue; - pte_page = (unsigned long *) alloc_static_page(&phys); - make_page_readonly(pte_page); - xen_pte_pin(phys); - set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER)); - for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) { + pte = (pte_t *)&pte_page[pte_index(va)]; + if (pte_none(*pte)) { new_pte = pfn_pte( (va - __START_KERNEL_map) >> PAGE_SHIFT, __pgprot(_KERNPG_TABLE | _PAGE_USER)); - pte = (pte_t *)&pte_page[pte_index(va)]; xen_l1_entry_update(pte, new_pte); - extended_size += PAGE_SIZE; } - } - - /* Kill mapping of low 1MB. */ - for (va = __START_KERNEL_map; va < (unsigned long)&_text; va += PAGE_SIZE) + va += PAGE_SIZE; + } + + /* Finally, blow away any spurious initial mappings. */ + while (1) { + pmd = (pmd_t *)&page[pmd_index(va)]; + if (pmd_none(*pmd)) + break; HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); -} - + va += PAGE_SIZE; + } +} /* Setup the direct mapping of the physical memory at PAGE_OFFSET. This runs before bootmem is initialized and gets pages directly from the @@ -650,34 +616,31 @@ find_early_table_space(end); extend_init_mapping(); - start_pfn = current_size >> PAGE_SHIFT; table_start = start_pfn; - table_end = table_start; start = (unsigned long)__va(start); end = (unsigned long)__va(end); for (; start < end; start = next) { unsigned long pud_phys; - pud_t *pud = alloc_low_page(&pud_phys); - make_page_readonly(pud); - xen_pud_pin(pud_phys); + pud_t *pud = alloc_static_page(&pud_phys); + make_page_readonly(pud); + xen_pud_pin(pud_phys); next = start + PGDIR_SIZE; if (next > end) next = end; phys_pud_init(pud, __pa(start), __pa(next)); set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); - } - - printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, - table_start<<PAGE_SHIFT, - table_end<<PAGE_SHIFT); - - start_pfn = ((current_size + extended_size) >> PAGE_SHIFT); + } + + printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", + __pa(end), table_start<<PAGE_SHIFT, start_pfn<<PAGE_SHIFT); + + BUG_ON(start_pfn != (table_start + (tables_space >> PAGE_SHIFT))); __flush_tlb_all(); - init_mapping_done = 1; + init_mapping_done = 1; } extern struct x8664_pda cpu_pda[NR_CPUS]; @@ -708,7 +671,7 @@ free_area_init(zones_size); } - set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info); + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); memset(empty_zero_page, 0, sizeof(empty_zero_page)); @@ -719,7 +682,7 @@ int i; /* Setup mapping of lower 1st MB */ for (i = 0; i < NR_FIX_ISAMAPS; i++) - if (xen_start_info.flags & SIF_PRIVILEGED) + if (xen_start_info->flags & SIF_PRIVILEGED) set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); else __set_fixmap(FIX_ISAMAP_BEGIN - i, @@ -767,9 +730,6 @@ static inline int page_is_ram (unsigned long pagenr) { - if (pagenr < start_pfn || pagenr >= end_pfn) - return 0; - return 1; } @@ -1005,3 +965,13 @@ { return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/mem.c --- a/linux-2.6-xen-sparse/drivers/char/mem.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/mem.c Fri Sep 9 16:30:54 2005 @@ -231,7 +231,7 @@ } #endif -static int mmap_kmem(struct file * file, struct vm_area_struct * vma) +static int mmap_mem(struct file * file, struct vm_area_struct * vma) { #if defined(__HAVE_PHYS_MEM_ACCESS_PROT) unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; @@ -258,7 +258,6 @@ return 0; } -#if 0 static int mmap_kmem(struct file * file, struct vm_area_struct * vma) { unsigned long long val; @@ -275,7 +274,6 @@ vma->vm_pgoff = __pa(val) >> PAGE_SHIFT; return mmap_mem(file, vma); } -#endif extern long vread(char *buf, char *addr, unsigned long count); extern long vwrite(char *buf, char *addr, unsigned long count); @@ -731,7 +729,7 @@ .llseek = memory_lseek, .read = read_mem, .write = write_mem, - .mmap = mmap_kmem, + .mmap = mmap_mem, .open = open_mem, }; #else diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Fri Sep 9 16:30:54 2005 @@ -8,7 +8,9 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ +obj-$(CONFIG_XEN_TPMDEV_FRONTEND) += tpmfront/ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Sep 9 16:30:54 2005 @@ -44,6 +44,7 @@ #include <asm-xen/xen_proc.h> #include <asm-xen/hypervisor.h> #include <asm-xen/balloon.h> +#include <asm-xen/xen-public/memory.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -57,6 +58,12 @@ static struct proc_dir_entry *balloon_pde; static DECLARE_MUTEX(balloon_mutex); + +/* + * Protects atomic reservation decrease/increase against concurrent increases. + * Also protects non-atomic updates of current_pages and driver_pages, and + * balloon lists. + */ spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED; /* We aim for 'current allocation' == 'target allocation'. */ @@ -156,6 +163,146 @@ return target; } +static int increase_reservation(unsigned long nr_pages) +{ + unsigned long *mfn_list, pfn, i, flags; + struct page *page; + long rc; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > (PAGE_SIZE / sizeof(unsigned long))) + nr_pages = PAGE_SIZE / sizeof(unsigned long); + + mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL); + if (mfn_list == NULL) + return -ENOMEM; + + balloon_lock(flags); + + reservation.extent_start = mfn_list; + reservation.nr_extents = nr_pages; + rc = HYPERVISOR_memory_op( + XENMEM_increase_reservation, &reservation); + if (rc < nr_pages) { + /* We hit the Xen hard limit: reprobe. */ + reservation.extent_start = mfn_list; + reservation.nr_extents = rc; + BUG_ON(HYPERVISOR_memory_op( + XENMEM_decrease_reservation, + &reservation) != rc); + hard_limit = current_pages + rc - driver_pages; + goto out; + } + + for (i = 0; i < nr_pages; i++) { + page = balloon_retrieve(); + BUG_ON(page == NULL); + + pfn = page - mem_map; + BUG_ON(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); + + /* Update P->M and M->P tables. */ + phys_to_machine_mapping[pfn] = mfn_list[i]; + xen_machphys_update(mfn_list[i], pfn); + + /* Link back into the page tables if not highmem. */ + if (pfn < max_low_pfn) + BUG_ON(HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + pfn_pte_ma(mfn_list[i], PAGE_KERNEL), + 0)); + + /* Relinquish the page back to the allocator. */ + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + } + + current_pages += nr_pages; + + out: + balloon_unlock(flags); + + free_page((unsigned long)mfn_list); + + return 0; +} + +static int decrease_reservation(unsigned long nr_pages) +{ + unsigned long *mfn_list, pfn, i, flags; + struct page *page; + void *v; + int need_sleep = 0; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > (PAGE_SIZE / sizeof(unsigned long))) + nr_pages = PAGE_SIZE / sizeof(unsigned long); + + mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL); + if (mfn_list == NULL) + return -ENOMEM; + + for (i = 0; i < nr_pages; i++) { + if ((page = alloc_page(GFP_HIGHUSER)) == NULL) { + nr_pages = i; + need_sleep = 1; + break; + } + + pfn = page - mem_map; + mfn_list[i] = phys_to_machine_mapping[pfn]; + + if (!PageHighMem(page)) { + v = phys_to_virt(pfn << PAGE_SHIFT); + scrub_pages(v, 1); + BUG_ON(HYPERVISOR_update_va_mapping( + (unsigned long)v, __pte_ma(0), 0)); + } +#ifdef CONFIG_XEN_SCRUB_PAGES + else { + v = kmap(page); + scrub_pages(v, 1); + kunmap(page); + } +#endif + } + + /* Ensure that ballooned highmem pages don't have kmaps. */ + kmap_flush_unused(); + flush_tlb_all(); + + balloon_lock(flags); + + /* No more mappings: invalidate P2M and add to balloon. */ + for (i = 0; i < nr_pages; i++) { + pfn = mfn_to_pfn(mfn_list[i]); + phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; + balloon_append(pfn_to_page(pfn)); + } + + reservation.extent_start = mfn_list; + reservation.nr_extents = nr_pages; + BUG_ON(HYPERVISOR_memory_op( + XENMEM_decrease_reservation, &reservation) != nr_pages); + + current_pages -= nr_pages; + + balloon_unlock(flags); + + free_page((unsigned long)mfn_list); + + return need_sleep; +} + /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected @@ -164,112 +311,23 @@ */ static void balloon_process(void *unused) { - unsigned long *mfn_list, pfn, i, flags; - struct page *page; - long credit, debt, rc; - void *v; + int need_sleep = 0; + long credit; down(&balloon_mutex); - retry: - mfn_list = NULL; - - if ((credit = current_target() - current_pages) > 0) { - mfn_list = vmalloc(credit * sizeof(*mfn_list)); - if (mfn_list == NULL) - goto out; - - balloon_lock(flags); - rc = HYPERVISOR_dom_mem_op( - MEMOP_increase_reservation, mfn_list, credit, 0); - balloon_unlock(flags); - if (rc < credit) { - /* We hit the Xen hard limit: reprobe. */ - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, - mfn_list, rc, 0) != rc); - hard_limit = current_pages + rc - driver_pages; - vfree(mfn_list); - goto retry; - } - - for (i = 0; i < credit; i++) { - page = balloon_retrieve(); - BUG_ON(page == NULL); - - pfn = page - mem_map; - if (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) - BUG(); - - /* Update P->M and M->P tables. */ - phys_to_machine_mapping[pfn] = mfn_list[i]; - xen_machphys_update(mfn_list[i], pfn); - - /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)__va(pfn << PAGE_SHIFT), - pfn_pte_ma(mfn_list[i], PAGE_KERNEL), - 0)); - - /* Relinquish the page back to the allocator. */ - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - } - - current_pages += credit; - } else if (credit < 0) { - debt = -credit; - - mfn_list = vmalloc(debt * sizeof(*mfn_list)); - if (mfn_list == NULL) - goto out; - - for (i = 0; i < debt; i++) { - if ((page = alloc_page(GFP_HIGHUSER)) == NULL) { - debt = i; - break; - } - - pfn = page - mem_map; - mfn_list[i] = phys_to_machine_mapping[pfn]; - - if (!PageHighMem(page)) { - v = phys_to_virt(pfn << PAGE_SHIFT); - scrub_pages(v, 1); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)v, __pte_ma(0), 0)); - } -#ifdef CONFIG_XEN_SCRUB_PAGES - else { - v = kmap(page); - scrub_pages(v, 1); - kunmap(page); - } + do { + credit = current_target() - current_pages; + if (credit > 0) + need_sleep = (increase_reservation(credit) != 0); + if (credit < 0) + need_sleep = (decrease_reservation(-credit) != 0); + +#ifndef CONFIG_PREEMPT + if (need_resched()) + schedule(); #endif - } - - /* Ensure that ballooned highmem pages don't have kmaps. */ - kmap_flush_unused(); - flush_tlb_all(); - - /* No more mappings: invalidate P2M and add to balloon. */ - for (i = 0; i < debt; i++) { - pfn = mfn_to_pfn(mfn_list[i]); - phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; - balloon_append(pfn_to_page(pfn)); - } - - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation,mfn_list, debt, 0) != debt); - - current_pages -= debt; - } - - out: - if (mfn_list != NULL) - vfree(mfn_list); + } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != current_pages) @@ -295,10 +353,10 @@ /* React to a change in the target key */ static void watch_target(struct xenbus_watch *watch, const char *node) { - unsigned long new_target; + unsigned long long new_target; int err; - err = xenbus_scanf("memory", "target", "%lu", &new_target); + err = xenbus_scanf("memory", "target", "%llu", &new_target); if (err != 1) { printk(KERN_ERR "Unable to read memory/target\n"); return; @@ -390,7 +448,7 @@ IPRINTK("Initialising balloon driver.\n"); - current_pages = min(xen_start_info.nr_pages, max_pfn); + current_pages = min(xen_start_info->nr_pages, max_pfn); target_pages = current_pages; balloon_low = 0; balloon_high = 0; @@ -410,7 +468,7 @@ balloon_pde->write_proc = balloon_write; /* Initialise the balloon with excess memory space. */ - for (pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++) { + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { page = &mem_map[pfn]; if (!PageReserved(page)) balloon_append(page); @@ -429,8 +487,9 @@ void balloon_update_driver_allowance(long delta) { unsigned long flags; + balloon_lock(flags); - driver_pages += delta; /* non-atomic update */ + driver_pages += delta; balloon_unlock(flags); } @@ -438,11 +497,17 @@ pte_t *pte, struct page *pte_page, unsigned long addr, void *data) { unsigned long mfn = pte_mfn(*pte); + struct xen_memory_reservation reservation = { + .extent_start = &mfn, + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; set_pte(pte, __pte_ma(0)); phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] = INVALID_P2M_ENTRY; - BUG_ON(HYPERVISOR_dom_mem_op( - MEMOP_decrease_reservation, &mfn, 1, 0) != 1); + BUG_ON(HYPERVISOR_memory_op( + XENMEM_decrease_reservation, &reservation) != 1); return 0; } @@ -457,9 +522,10 @@ scrub_pages(vstart, 1 << order); - balloon_lock(flags); BUG_ON(generic_page_range( &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL)); + + balloon_lock(flags); current_pages -= 1UL << order; balloon_unlock(flags); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Sep 9 16:30:54 2005 @@ -504,8 +504,8 @@ int i; struct page *page; - if ( !(xen_start_info.flags & SIF_INITDOMAIN) && - !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) + if ( !(xen_start_info->flags & SIF_INITDOMAIN) && + !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) ) return 0; blkif_interface_init(); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Sep 9 16:30:54 2005 @@ -32,23 +32,15 @@ */ #if 1 -#define ASSERT(_p) \ - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ - __LINE__, __FILE__); *(int*)0=0; } +#define ASSERT(p) \ + if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \ + __LINE__, __FILE__); *(int*)0=0; } #else #define ASSERT(_p) #endif #include <linux/version.h> - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) #include "block.h" -#else -#include "common.h" -#include <linux/blk.h> -#include <linux/tqueue.h> -#endif - #include <linux/cdrom.h> #include <linux/sched.h> #include <linux/interrupt.h> @@ -58,90 +50,57 @@ #include <asm-xen/xen-public/grant_table.h> #include <asm-xen/gnttab.h> -typedef unsigned char byte; /* from linux/ide.h */ - -/* Control whether runtime update of vbds is enabled. */ -#define ENABLE_VBD_UPDATE 1 - #define BLKIF_STATE_DISCONNECTED 0 #define BLKIF_STATE_CONNECTED 1 static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED; - -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) #define GRANTREF_INVALID (1<<15) - -static struct blk_shadow { - blkif_request_t req; - unsigned long request; - unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -} blk_shadow[BLK_RING_SIZE]; -unsigned long blk_shadow_free; +#define GRANT_INVALID_REF (0xFFFF) static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ static void kick_pending_request_queues(struct blkfront_info *info); -static int __init xlblk_init(void); - static void blkif_completion(struct blk_shadow *s); -static inline int GET_ID_FROM_FREELIST(void) -{ - unsigned long free = blk_shadow_free; - BUG_ON(free > BLK_RING_SIZE); - blk_shadow_free = blk_shadow[free].req.id; - blk_shadow[free].req.id = 0x0fffffee; /* debug */ - return free; -} - -static inline void ADD_ID_TO_FREELIST(unsigned long id) -{ - blk_shadow[id].req.id = blk_shadow_free; - blk_shadow[id].request = 0; - blk_shadow_free = id; -} - - -/************************ COMMON CODE (inlined) ************************/ - -/* Kernel-specific definitions used in the common code */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define DISABLE_SCATTERGATHER() -#else -static int sg_operation = -1; -#define DISABLE_SCATTERGATHER() (sg_operation = -1) -#endif +static inline int GET_ID_FROM_FREELIST( + struct blkfront_info *info) +{ + unsigned long free = info->shadow_free; + BUG_ON(free > BLK_RING_SIZE); + info->shadow_free = info->shadow[free].req.id; + info->shadow[free].req.id = 0x0fffffee; /* debug */ + return free; +} + +static inline void ADD_ID_TO_FREELIST( + struct blkfront_info *info, unsigned long id) +{ + info->shadow[id].req.id = info->shadow_free; + info->shadow[id].request = 0; + info->shadow_free = id; +} static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) { - s->req = *r; + s->req = *r; } static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) { - *r = s->req; -} - + *r = s->req; +} static inline void flush_requests(struct blkfront_info *info) { - DISABLE_SCATTERGATHER(); - RING_PUSH_REQUESTS(&info->ring); - notify_via_evtchn(info->evtchn); -} - - -/************************** KERNEL VERSION 2.6 **************************/ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) - -module_init(xlblk_init); + RING_PUSH_REQUESTS(&info->ring); + notify_via_evtchn(info->evtchn); +} static void kick_pending_request_queues(struct blkfront_info *info) { @@ -169,50 +128,44 @@ int blkif_open(struct inode *inode, struct file *filep) { - // struct gendisk *gd = inode->i_bdev->bd_disk; - // struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; - - /* Update of usage count is protected by per-device semaphore. */ - // di->mi->usage++; - return 0; } int blkif_release(struct inode *inode, struct file *filep) { - /* FIXME: This is where we can actually free up majors, etc. --RR */ - return 0; + return 0; } int blkif_ioctl(struct inode *inode, struct file *filep, unsigned command, unsigned long argument) { - int i; - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long)argument, inode->i_rdev); - - switch ( command ) - { - case HDIO_GETGEO: - /* return ENOSYS to use defaults */ - return -ENOSYS; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - default: - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ - return -EINVAL; /* same return as native Linux */ - } - - return 0; + int i; + + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long)argument, inode->i_rdev); + + switch ( command ) + { + case HDIO_GETGEO: + /* return ENOSYS to use defaults */ + return -ENOSYS; + + case CDROMMULTISESSION: + DPRINTK("FIXME: support multisession CDs later\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char *)(argument + i))) + return -EFAULT; + return 0; + + default: + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", + command);*/ + return -EINVAL; /* same return as native Linux */ + } + + return 0; } @@ -228,76 +181,77 @@ */ static int blkif_queue_request(struct request *req) { - struct blkfront_info *info = req->rq_disk->private_data; - unsigned long buffer_ma; - blkif_request_t *ring_req; - struct bio *bio; - struct bio_vec *bvec; - int idx; - unsigned long id; - unsigned int fsect, lsect; - int ref; - grant_ref_t gref_head; - - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) - return 1; - - if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, - &gref_head) < 0) { - gnttab_request_free_callback(&info->callback, - blkif_restart_queue_callback, info, - BLKIF_MAX_SEGMENTS_PER_REQUEST); - return 1; - } - - /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); - id = GET_ID_FROM_FREELIST(); - blk_shadow[id].request = (unsigned long)req; - - ring_req->id = id; - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - ring_req->sector_number = (blkif_sector_t)req->sector; - ring_req->handle = info->handle; - - ring_req->nr_segments = 0; - rq_for_each_bio(bio, req) - { - bio_for_each_segment(bvec, bio, idx) - { - if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST ) - BUG(); - buffer_ma = page_to_phys(bvec->bv_page); - fsect = bvec->bv_offset >> 9; - lsect = fsect + (bvec->bv_len >> 9) - 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - info->backend_id, - buffer_ma >> PAGE_SHIFT, - rq_data_dir(req) ); - - blk_shadow[id].frame[ring_req->nr_segments] = - buffer_ma >> PAGE_SHIFT; - - ring_req->frame_and_sects[ring_req->nr_segments] = - blkif_fas_from_gref(ref, fsect, lsect); - - ring_req->nr_segments++; - } - } - - info->ring.req_prod_pvt++; - - /* Keep a private copy so we can reissue requests when recovering. */ - pickle_request(&blk_shadow[id], ring_req); - - gnttab_free_grant_references(gref_head); - - return 0; + struct blkfront_info *info = req->rq_disk->private_data; + unsigned long buffer_mfn; + blkif_request_t *ring_req; + struct bio *bio; + struct bio_vec *bvec; + int idx; + unsigned long id; + unsigned int fsect, lsect; + int ref; + grant_ref_t gref_head; + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + return 1; + + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { + gnttab_request_free_callback( + &info->callback, + blkif_restart_queue_callback, + info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + + /* Fill out a communications ring structure. */ + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + id = GET_ID_FROM_FREELIST(info); + info->shadow[id].request = (unsigned long)req; + + ring_req->id = id; + ring_req->operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + ring_req->sector_number = (blkif_sector_t)req->sector; + ring_req->handle = info->handle; + + ring_req->nr_segments = 0; + rq_for_each_bio (bio, req) { + bio_for_each_segment (bvec, bio, idx) { + BUG_ON(ring_req->nr_segments + == BLKIF_MAX_SEGMENTS_PER_REQUEST); + buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT; + fsect = bvec->bv_offset >> 9; + lsect = fsect + (bvec->bv_len >> 9) - 1; + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head); + ASSERT(ref != -ENOSPC); + + gnttab_grant_foreign_access_ref( + ref, + info->backend_id, + buffer_mfn, + rq_data_dir(req) ); + + info->shadow[id].frame[ring_req->nr_segments] = + buffer_mfn; + + ring_req->frame_and_sects[ring_req->nr_segments] = + blkif_fas_from_gref(ref, fsect, lsect); + + ring_req->nr_segments++; + } + } + + info->ring.req_prod_pvt++; + + /* Keep a private copy so we can reissue requests when recovering. */ + pickle_request(&info->shadow[id], ring_req); + + gnttab_free_grant_references(gref_head); + + return 0; } /* @@ -306,756 +260,200 @@ */ void do_blkif_request(request_queue_t *rq) { - struct blkfront_info *info = NULL; - struct request *req; - int queued; - - DPRINTK("Entered do_blkif_request\n"); - - queued = 0; - - while ( (req = elv_next_request(rq)) != NULL ) - { - info = req->rq_disk->private_data; - - if ( !blk_fs_request(req) ) - { - end_request(req, 0); - continue; - } - - if (RING_FULL(&info->ring)) - goto wait; - - DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", - req, req->cmd, req->sector, req->current_nr_sectors, - req->nr_sectors, req->buffer, - rq_data_dir(req) ? "write" : "read"); - - blkdev_dequeue_request(req); - if (blkif_queue_request(req)) { - blk_requeue_request(rq, req); - wait: - /* Avoid pointless unplugs. */ - blk_stop_queue(rq); - break; - } - - queued++; - } - - if ( queued != 0 ) - flush_requests(info); + struct blkfront_info *info = NULL; + struct request *req; + int queued; + + DPRINTK("Entered do_blkif_request\n"); + + queued = 0; + + while ((req = elv_next_request(rq)) != NULL) { + info = req->rq_disk->private_data; + + if (!blk_fs_request(req)) { + end_request(req, 0); + continue; + } + + if (RING_FULL(&info->ring)) + goto wait; + + DPRINTK("do_blk_req %p: cmd %p, sec %lx, " + "(%u/%li) buffer:%p [%s]\n", + req, req->cmd, req->sector, req->current_nr_sectors, + req->nr_sectors, req->buffer, + rq_data_dir(req) ? "write" : "read"); + + blkdev_dequeue_request(req); + if (blkif_queue_request(req)) { + blk_requeue_request(rq, req); + wait: + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; + } + + queued++; + } + + if (queued != 0) + flush_requests(info); } static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) { - struct request *req; - blkif_response_t *bret; - RING_IDX i, rp; - unsigned long flags; - struct blkfront_info *info = (struct blkfront_info *)dev_id; - - spin_lock_irqsave(&blkif_io_lock, flags); - - if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { - spin_unlock_irqrestore(&blkif_io_lock, flags); - return IRQ_HANDLED; - } - - rp = info->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for ( i = info->ring.rsp_cons; i != rp; i++ ) - { - unsigned long id; - - bret = RING_GET_RESPONSE(&info->ring, i); - id = bret->id; - req = (struct request *)blk_shadow[id].request; - - blkif_completion(&blk_shadow[id]); - - ADD_ID_TO_FREELIST(id); - - switch ( bret->operation ) - { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) - DPRINTK("Bad return from blkdev data request: %x\n", - bret->status); - - if ( unlikely(end_that_request_first - (req, - (bret->status == BLKIF_RSP_OKAY), - req->hard_nr_sectors)) ) - BUG(); - end_that_request_last(req); - - break; - default: - BUG(); - } - } - - info->ring.rsp_cons = i; - - kick_pending_request_queues(info); - - spin_unlock_irqrestore(&blkif_io_lock, flags); - - return IRQ_HANDLED; -} - -#else -/************************** KERNEL VERSION 2.4 **************************/ - -static kdev_t sg_dev; -static unsigned long sg_next_sect; - -/* - * Request queues with outstanding work, but ring is currently full. - * We need no special lock here, as we always access this with the - * blkif_io_lock held. We only need a small maximum list. - */ -#define MAX_PENDING 8 -static request_queue_t *pending_queues[MAX_PENDING]; -static int nr_pending; - - -#define blkif_io_lock io_request_lock - -/*============================================================================*/ -static void kick_pending_request_queues(void) -{ - /* We kick pending request queues if the ring is reasonably empty. */ - if ( (nr_pending != 0) && - (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) ) - { - /* Attempt to drain the queue, but bail if the ring becomes full. */ - while ( (nr_pending != 0) && !RING_FULL(&info->ring) ) - do_blkif_request(pending_queues[--nr_pending]); - } -} - -int blkif_open(struct inode *inode, struct file *filep) -{ - short xldev = inode->i_rdev; - struct gendisk *gd = get_gendisk(xldev); - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - short minor = MINOR(xldev); - - if ( gd->part[minor].nr_sects == 0 ) - { - /* - * Device either doesn't exist, or has zero capacity; we use a few - * cheesy heuristics to return the relevant error code - */ - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || - ((minor & (gd->max_p - 1)) != 0) ) - { - /* - * We have a real device, but no such partition, or we just have a - * partition number so guess this is the problem. - */ - return -ENXIO; /* no such device or address */ - } - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) - { - /* This is a removable device => assume that media is missing. */ - return -ENOMEDIUM; /* media not present (this is a guess) */ - } - else - { - /* Just go for the general 'no such device' error. */ - return -ENODEV; /* no such device */ - } - } - - /* Update of usage count is protected by per-device semaphore. */ - disk->usage++; - - return 0; -} - - -int blkif_release(struct inode *inode, struct file *filep) -{ - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - - /* - * When usage drops to zero it may allow more VBD updates to occur. - * Update of usage count is protected by a per-device semaphore. - */ - if ( --disk->usage == 0 ) { - vbd_update(); - } - - return 0; -} - - -int blkif_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument) -{ - kdev_t dev = inode->i_rdev; - struct hd_geometry *geo = (struct hd_geometry *)argument; - struct gendisk *gd; - struct hd_struct *part; - int i; - unsigned short cylinders; - byte heads, sectors; - - /* NB. No need to check permissions. That is done for us. */ - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long) argument, dev); - - gd = get_gendisk(dev); - part = &gd->part[MINOR(dev)]; - - switch ( command ) - { - case BLKGETSIZE: - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); - return put_user(part->nr_sects, (unsigned long *) argument); - - case BLKGETSIZE64: - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, - (u64)part->nr_sects * 512); - return put_user((u64)part->nr_sects * 512, (u64 *) argument); - - case BLKRRPART: /* re-read partition table */ - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); - return blkif_revalidate(dev); - - case BLKSSZGET: - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - - case BLKBSZGET: /* get block size */ - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); - break; - - case BLKBSZSET: /* set block size */ - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); - break; - - case BLKRASET: /* set read-ahead */ - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); - break; - - case BLKRAGET: /* get read-ahead */ - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); - break; - - case HDIO_GETGEO: - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); - if (!argument) return -EINVAL; - - /* We don't have real geometry info, but let's at least return - values consistent with the size of the device */ - - heads = 0xff; - sectors = 0x3f; - cylinders = part->nr_sects / (heads * sectors); - - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT; - - return 0; - - case HDIO_GETGEO_BIG: - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); - if (!argument) return -EINVAL; - - /* We don't have real geometry info, but let's at least return - values consistent with the size of the device */ - - heads = 0xff; - sectors = 0x3f; - cylinders = part->nr_sects / (heads * sectors); - - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT; - - return 0; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - case SCSI_IOCTL_GET_BUS_NUMBER: - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); - return -ENOSYS; - - default: - WPRINTK("ioctl %08x not supported by XL blkif\n", command); - return -ENOSYS; - } - - return 0; -} - - - -/* check media change: should probably do something here in some cases :-) */ -int blkif_check(kdev_t dev) -{ - DPRINTK("blkif_check\n"); - return 0; -} - -int blkif_revalidate(kdev_t dev) -{ - struct block_device *bd; - struct gendisk *gd; - xl_disk_t *disk; - unsigned long capacity; - int i, rc = 0; - - if ( (bd = bdget(dev)) == NULL ) - return -EINVAL; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((gd = get_gendisk(dev)) == NULL) || - ((disk = xldev_to_xldisk(dev)) == NULL) || - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) - { - rc = -EINVAL; - goto out; - } - - if ( disk->usage > 1 ) - { - rc = -EBUSY; - goto out; - } - - /* Only reread partition table if VBDs aren't mapped to partitions. */ - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) - { - for ( i = gd->max_p - 1; i >= 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); - } - - out: - up(&bd->bd_sem); - bdput(bd); - return rc; -} - - -/* - * blkif_queue_request - * - * request block io - * - * id: for guest use only. - * operation: BLKIF_OP_{READ,WRITE,PROBE} - * buffer: buffer to read/write into. this should be a - * virtual address in the guest os. - */ -static int blkif_queue_request(unsigned long id, - int operation, - char * buffer, - unsigned long sector_number, - unsigned short nr_sectors, - kdev_t device, - blkif_vdev_t handle) -{ - unsigned long buffer_ma = virt_to_bus(buffer); - unsigned long xid; - struct gendisk *gd; - blkif_request_t *req; - struct buffer_head *bh; - unsigned int fsect, lsect; - int ref; - - fsect = (buffer_ma & ~PAGE_MASK) >> 9; - lsect = fsect + nr_sectors - 1; - - /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) - BUG(); - if ( lsect > ((PAGE_SIZE/512)-1) ) - BUG(); - - buffer_ma &= PAGE_MASK; - - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) - return 1; - - switch ( operation ) - { - - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - gd = get_gendisk(device); - - /* - * Update the sector_number we'll pass down as appropriate; note that - * we could sanity check that resulting sector will be in this - * partition, but this will happen in driver backend anyhow. - */ - sector_number += gd->part[MINOR(device)].start_sect; - - /* - * If this unit doesn't consist of virtual partitions then we clear - * the partn bits from the device number. - */ - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & - GENHD_FL_VIRT_PARTNS) ) - device &= ~(gd->max_p - 1); - - if ( (sg_operation == operation) && - (sg_dev == device) && - (sg_next_sect == sector_number) ) - { - req = RING_GET_REQUEST(&info->ring, - info->ring.req_prod_pvt - 1); - bh = (struct buffer_head *)id; - - bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; - blk_shadow[req->id].request = (unsigned long)id; - - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - info->backend_id, - buffer_ma >> PAGE_SHIFT, - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); - - blk_shadow[req->id].frame[req->nr_segments] = - buffer_ma >> PAGE_SHIFT; - - req->frame_and_sects[req->nr_segments] = - blkif_fas_from_gref(ref, fsect, lsect); - if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) - sg_next_sect += nr_sectors; - else - DISABLE_SCATTERGATHER(); - - /* Update the copy of the request in the recovery ring. */ - pickle_request(&blk_shadow[req->id], req ); - - return 0; - } - else if ( RING_FULL(&info->ring) ) - { - return 1; - } - else - { - sg_operation = operation; - sg_dev = device; - sg_next_sect = sector_number + nr_sectors; - } - break; - - default: - panic("unknown op %d\n", operation); - } - - /* Fill out a communications ring structure. */ - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); - - xid = GET_ID_FROM_FREELIST(); - blk_shadow[xid].request = (unsigned long)id; - - req->id = xid; - req->operation = operation; - req->sector_number = (blkif_sector_t)sector_number; - req->handle = handle; - req->nr_segments = 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - info->backend_id, - buffer_ma >> PAGE_SHIFT, - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); - - blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; - - req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); - - /* Keep a private copy so we can reissue requests when recovering. */ - pickle_request(&blk_shadow[xid], req); - - info->ring.req_prod_pvt++; - - return 0; -} - - -/* - * do_blkif_request - * read a block; request is in a request queue - */ -void do_blkif_request(request_queue_t *rq) -{ - struct request *req; - struct buffer_head *bh, *next_bh; - int rw, nsect, full, queued = 0; - - DPRINTK("Entered do_blkif_request\n"); - - while ( !rq->plugged && !list_empty(&rq->queue_head)) - { - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) - goto out; - - DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", - req, req->cmd, req->sector, - req->current_nr_sectors, req->nr_sectors, req->bh); - - rw = req->cmd; - if ( rw == READA ) - rw = READ; - if ( unlikely((rw != READ) && (rw != WRITE)) ) - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); - - req->errors = 0; - - bh = req->bh; - while ( bh != NULL ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - - full = blkif_queue_request( - (unsigned long)bh, - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); - - if ( full ) - { - bh->b_reqnext = next_bh; - pending_queues[nr_pending++] = rq; - if ( unlikely(nr_pending >= MAX_PENDING) ) - BUG(); - goto out; - } - - queued++; - - /* Dequeue the buffer head from the request. */ - nsect = bh->b_size >> 9; - bh = req->bh = next_bh; - - if ( bh != NULL ) - { - /* There's another buffer head to do. Update the request. */ - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; - req->buffer = bh->b_data; - } - else - { - /* That was the last buffer head. Finalise the request. */ - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) - BUG(); - blkdev_dequeue_request(req); - end_that_request_last(req); - } - } - } - - out: - if ( queued != 0 ) - flush_requests(); -} - - -static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - RING_IDX i, rp; - unsigned long flags; - struct buffer_head *bh, *next_bh; - - spin_lock_irqsave(&io_request_lock, flags); - - if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) ) - { - spin_unlock_irqrestore(&io_request_lock, flags); - return; - } - - rp = info->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for ( i = info->ring.rsp_cons; i != rp; i++ ) - { - unsigned long id; - blkif_response_t *bret; - - bret = RING_GET_RESPONSE(&info->ring, i); - id = bret->id; - bh = (struct buffer_head *)blk_shadow[id].request; - - blkif_completion(&blk_shadow[id]); - - ADD_ID_TO_FREELIST(id); - - switch ( bret->operation ) - { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) - DPRINTK("Bad return from blkdev data request: %lx\n", - bret->status); - for ( ; bh != NULL; bh = next_bh ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); - } - - break; - case BLKIF_OP_PROBE: - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); - blkif_control_rsp_valid = 1; - break; - default: - BUG(); - } - - } - info->ring.rsp_cons = i; - - kick_pending_request_queues(); - - spin_unlock_irqrestore(&io_request_lock, flags); -} - -#endif - -/***************************** COMMON CODE *******************************/ + struct request *req; + blkif_response_t *bret; + RING_IDX i, rp; + unsigned long flags; + struct blkfront_info *info = (struct blkfront_info *)dev_id; + + spin_lock_irqsave(&blkif_io_lock, flags); + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { + spin_unlock_irqrestore(&blkif_io_lock, flags); + return IRQ_HANDLED; + } + + rp = info->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + for (i = info->ring.rsp_cons; i != rp; i++) { + unsigned long id; + + bret = RING_GET_RESPONSE(&info->ring, i); + id = bret->id; + req = (struct request *)info->shadow[id].request; + + blkif_completion(&info->shadow[id]); + + ADD_ID_TO_FREELIST(info, id); + + switch (bret->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + if (unlikely(bret->status != BLKIF_RSP_OKAY)) + DPRINTK("Bad return from blkdev data " + "request: %x\n", bret->status); + + BUG_ON(end_that_request_first( + req, (bret->status == BLKIF_RSP_OKAY), + req->hard_nr_sectors)); + end_that_request_last(req); + break; + default: + BUG(); + } + } + + info->ring.rsp_cons = i; + + kick_pending_request_queues(info); + + spin_unlock_irqrestore(&blkif_io_lock, flags); + + return IRQ_HANDLED; +} static void blkif_free(struct blkfront_info *info) { - /* Prevent new requests being issued until we fix things up. */ - spin_lock_irq(&blkif_io_lock); - info->connected = BLKIF_STATE_DISCONNECTED; - spin_unlock_irq(&blkif_io_lock); - - /* Free resources associated with old device channel. */ - if ( info->ring.sring != NULL ) - { - free_page((unsigned long)info->ring.sring); - info->ring.sring = NULL; - } - unbind_evtchn_from_irqhandler(info->evtchn, NULL); - info->evtchn = 0; + /* Prevent new requests being issued until we fix things up. */ + spin_lock_irq(&blkif_io_lock); + info->connected = BLKIF_STATE_DISCONNECTED; + spin_unlock_irq(&blkif_io_lock); + + /* Free resources associated with old device channel. */ + if (info->ring.sring != NULL) { + free_page((unsigned long)info->ring.sring); + info->ring.sring = NULL; + } + if (info->ring_ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(info->ring_ref, 0); + info->ring_ref = GRANT_INVALID_REF; + unbind_evtchn_from_irqhandler(info->evtchn, info); + info->evtchn = 0; } static void blkif_recover(struct blkfront_info *info) { - int i; - blkif_request_t *req; - struct blk_shadow *copy; - int j; - - /* Stage 1: Make a safe copy of the shadow state. */ - copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); - BUG_ON(copy == NULL); - memcpy(copy, blk_shadow, sizeof(blk_shadow)); - - /* Stage 2: Set up free list. */ - memset(&blk_shadow, 0, sizeof(blk_shadow)); - for ( i = 0; i < BLK_RING_SIZE; i++ ) - blk_shadow[i].req.id = i+1; - blk_shadow_free = info->ring.req_prod_pvt; - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - - /* Stage 3: Find pending requests and requeue them. */ - for ( i = 0; i < BLK_RING_SIZE; i++ ) - { - /* Not in use? */ - if ( copy[i].request == 0 ) - continue; - - /* Grab a request slot and unpickle shadow state into it. */ - req = RING_GET_REQUEST( - &info->ring, info->ring.req_prod_pvt); - unpickle_request(req, ©[i]); - - /* We get a new request id, and must reset the shadow state. */ - req->id = GET_ID_FROM_FREELIST(); - memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); - - /* Rewrite any grant references invalidated by suspend/resume. */ - for ( j = 0; j < req->nr_segments; j++ ) - { - if ( req->frame_and_sects[j] & GRANTREF_INVALID ) - gnttab_grant_foreign_access_ref( - blkif_gref_from_fas(req->frame_and_sects[j]), - info->backend_id, - blk_shadow[req->id].frame[j], - rq_data_dir((struct request *) - blk_shadow[req->id].request)); - req->frame_and_sects[j] &= ~GRANTREF_INVALID; - } - blk_shadow[req->id].req = *req; - - info->ring.req_prod_pvt++; - } - - kfree(copy); - - recovery = 0; - - /* info->ring->req_prod will be set when we flush_requests().*/ - wmb(); - - /* Kicks things back into life. */ - flush_requests(info); - - /* Now safe to left other people use the interface. */ - info->connected = BLKIF_STATE_CONNECTED; + int i; + blkif_request_t *req; + struct blk_shadow *copy; + int j; + + /* Stage 1: Make a safe copy of the shadow state. */ + copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL); + BUG_ON(copy == NULL); + memcpy(copy, info->shadow, sizeof(info->shadow)); + + /* Stage 2: Set up free list. */ + memset(&info->shadow, 0, sizeof(info->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow_free = info->ring.req_prod_pvt; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < BLK_RING_SIZE; i++) { + /* Not in use? */ + if (copy[i].request == 0) + continue; + + /* Grab a request slot and unpickle shadow state into it. */ + req = RING_GET_REQUEST( + &info->ring, info->ring.req_prod_pvt); + unpickle_request(req, ©[i]); + + /* We get a new request id, and must reset the shadow state. */ + req->id = GET_ID_FROM_FREELIST(info); + memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); + + /* Rewrite any grant references invalidated by susp/resume. */ + for (j = 0; j < req->nr_segments; j++) { + if ( req->frame_and_sects[j] & GRANTREF_INVALID ) + gnttab_grant_foreign_access_ref( + blkif_gref_from_fas( + req->frame_and_sects[j]), + info->backend_id, + info->shadow[req->id].frame[j], + rq_data_dir( + (struct request *) + info->shadow[req->id].request)); + req->frame_and_sects[j] &= ~GRANTREF_INVALID; + } + info->shadow[req->id].req = *req; + + info->ring.req_prod_pvt++; + } + + kfree(copy); + + recovery = 0; + + /* info->ring->req_prod will be set when we flush_requests().*/ + wmb(); + + /* Kicks things back into life. */ + flush_requests(info); + + /* Now safe to left other people use the interface. */ + info->connected = BLKIF_STATE_CONNECTED; } static void blkif_connect(struct blkfront_info *info, u16 evtchn) { - int err = 0; - - info->evtchn = evtchn; - - err = bind_evtchn_to_irqhandler( - info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); - if ( err != 0 ) - { - WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); - return; - } + int err = 0; + + info->evtchn = evtchn; + + err = bind_evtchn_to_irqhandler( + info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); + if (err != 0) { + WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); + return; + } } @@ -1107,6 +505,8 @@ blkif_sring_t *sring; evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; int err; + + info->ring_ref = GRANT_INVALID_REF; sring = (void *)__get_free_page(GFP_KERNEL); if (!sring) { @@ -1130,6 +530,7 @@ err = HYPERVISOR_event_channel_op(&op); if (err) { gnttab_end_foreign_access(info->ring_ref, 0); + info->ring_ref = GRANT_INVALID_REF; free_page((unsigned long)info->ring.sring); info->ring.sring = 0; xenbus_dev_error(dev, err, "allocating event channel"); @@ -1227,9 +628,8 @@ static int blkfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int err; + int err, vdevice, i; struct blkfront_info *info; - int vdevice; /* FIXME: Use dynamic device id if this is not set. */ err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice); @@ -1250,6 +650,12 @@ info->connected = BLKIF_STATE_DISCONNECTED; info->mi = NULL; INIT_WORK(&info->work, blkif_restart_queue, (void *)info); + + info->shadow_free = 0; + memset(info->shadow, 0, sizeof(info->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); @@ -1329,55 +735,57 @@ static int wait_for_blkif(void) { - int err = 0; - int i; - - /* - * We should figure out how many and which devices we need to - * proceed and only wait for those. For now, continue once the - * first device is around. - */ - for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - if ( blkif_state != BLKIF_STATE_CONNECTED ) - { - WPRINTK("Timeout connecting to device!\n"); - err = -ENOSYS; - } - return err; + int err = 0; + int i; + + /* + * We should figure out how many and which devices we need to + * proceed and only wait for those. For now, continue once the + * first device is around. + */ + for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } + + if (blkif_state != BLKIF_STATE_CONNECTED) { + WPRINTK("Timeout connecting to device!\n"); + err = -ENOSYS; + } + return err; } static int __init xlblk_init(void) { - int i; - - if ( (xen_start_info.flags & SIF_INITDOMAIN) || - (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) - return 0; - - IPRINTK("Initialising virtual block device driver\n"); - - blk_shadow_free = 0; - memset(blk_shadow, 0, sizeof(blk_shadow)); - for ( i = 0; i < BLK_RING_SIZE; i++ ) - blk_shadow[i].req.id = i+1; - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - - init_blk_xenbus(); - - wait_for_blkif(); - - return 0; -} + if ((xen_start_info->flags & SIF_INITDOMAIN) || + (xen_start_info->flags & SIF_BLK_BE_DOMAIN) ) + return 0; + + IPRINTK("Initialising virtual block device driver\n"); + + init_blk_xenbus(); + + wait_for_blkif(); + + return 0; +} + +module_init(xlblk_init); static void blkif_completion(struct blk_shadow *s) { - int i; - for ( i = 0; i < s->req.nr_segments; i++ ) - gnttab_free_grant_reference( - blkif_gref_from_fas(s->req.frame_and_sects[i])); -} + int i; + for (i = 0; i < s->req.nr_segments; i++) + gnttab_end_foreign_access( + blkif_gref_from_fas(s->req.frame_and_sects[i]), 0); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Sep 9 16:30:54 2005 @@ -96,6 +96,14 @@ struct xlbd_type_info *type; }; +struct blk_shadow { + blkif_request_t req; + unsigned long request; + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) + /* * We have one of these per vbd, whether ide, scsi or 'other'. They * hang in private_data off the gendisk structure. We may end up @@ -116,11 +124,11 @@ blkif_front_ring_t ring; unsigned int evtchn; struct xlbd_major_info *mi; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) request_queue_t *rq; -#endif struct work_struct work; struct gnttab_free_callback callback; + struct blk_shadow shadow[BLK_RING_SIZE]; + unsigned long shadow_free; }; extern spinlock_t blkif_io_lock; diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile Fri Sep 9 16:30:54 2005 @@ -1,3 +1,3 @@ -obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o +obj-y := xenbus.o interface.o blktap.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Sep 9 16:30:54 2005 @@ -1,90 +1,916 @@ /****************************************************************************** - * blktap.c + * arch/xen/drivers/blkif/blktap/blktap.c * - * XenLinux virtual block-device tap. + * This is a modified version of the block backend driver that remaps requests + * to a user-space memory region. It is intended to be used to write + * application-level servers that provide block interfaces to client VMs. * - * Copyright (c) 2004, Andrew Warfield + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <asm-xen/balloon.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/errno.h> +#include <linux/major.h> +#include <linux/gfp.h> +#include <linux/poll.h> +#include <asm/tlbflush.h> +#include "common.h" + +/* Only one process may open /dev/xen/blktap at any time. */ +static unsigned long blktap_dev_inuse; +unsigned long blktap_ring_ok; /* make this ring->state */ + +/* Rings up to user space. */ +static blkif_front_ring_t blktap_ufe_ring; + +/* for poll: */ +static wait_queue_head_t blktap_wait; + +/* current switching mode */ +static unsigned long blktap_mode; + +/* local prototypes */ +static int blktap_read_ufe_ring(void); + + +/* /dev/xen/blktap resides at device number major=10, minor=200 */ +#define BLKTAP_MINOR 202 + +/* blktap IOCTLs: */ +#define BLKTAP_IOCTL_KICK_FE 1 +#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */ +#define BLKTAP_IOCTL_SETMODE 3 +#define BLKTAP_IOCTL_PRINT_IDXS 100 + +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */ +#define BLKTAP_MODE_COPY_FE 0x00000004 /* unimp. */ +#define BLKTAP_MODE_COPY_BE 0x00000008 /* unimp. */ +#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 /* unimp. */ +#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 /* unimp. */ + +#define BLKTAP_MODE_INTERPOSE \ + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) + +#define BLKTAP_MODE_COPY_BOTH \ + (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) + +#define BLKTAP_MODE_COPY_BOTH_PAGES \ + (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) + +static inline int BLKTAP_MODE_VALID(unsigned long arg) +{ + return ( + ( arg == BLKTAP_MODE_PASSTHROUGH ) || + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || + ( arg == BLKTAP_MODE_INTERPOSE ) ); +/* + return ( + ( arg == BLKTAP_MODE_PASSTHROUGH ) || + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || + ( arg == BLKTAP_MODE_INTERCEPT_BE ) || + ( arg == BLKTAP_MODE_INTERPOSE ) || + ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || + ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || + ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) + ); +*/ +} + + +/****************************************************************** + * MMAP REGION + */ + +/* + * We use a big chunk of address space to map in-flight requests into, + * and export this region up to user-space. See the comments in blkback + * about this -- the two must be kept in sync if the tap is used as a + * passthrough. + */ + +#define MAX_PENDING_REQS 64 +#define BATCH_PER_DOMAIN 16 + +/* immediately before the mmap area, we have a bunch of pages reserved + * for shared memory rings. + */ +#define RING_PAGES 1 /* Front */ + +/* Where things are inside the device mapping. */ +struct vm_area_struct *blktap_vma = NULL; +unsigned long mmap_vstart; /* Kernel pages for mapping in data. */ +unsigned long rings_vstart; /* start of mmaped vma */ +unsigned long user_vstart; /* start of user mappings */ + +#define MMAP_PAGES \ + (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) +#define MMAP_VADDR(_start, _req,_seg) \ + (_start + \ + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) + + + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct { + blkif_t *blkif; + unsigned long id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; +} pending_req_t; + +/* + * We can't allocate pending_req's in order, since they may complete out of + * order. We therefore maintain an allocation ring. This ring also indicates + * when enough work has been passed down -- at that point the allocation ring + * will be empty. + */ +static pending_req_t pending_reqs[MAX_PENDING_REQS]; +static unsigned char pending_ring[MAX_PENDING_REQS]; +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; +/* NB. We use a different index type to differentiate from shared blk rings. */ +typedef unsigned int PEND_RING_IDX; +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) +static PEND_RING_IDX pending_prod, pending_cons; +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +/* Requests passing through the tap to the backend hijack the id field + * in the request message. In it we put the AR index _AND_ the fe domid. + * the domid is used by the backend to map the pages properly. + */ + +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx) +{ + return ( (fe_dom << 16) | MASK_PEND_IDX(idx) ); +} + +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) +{ + return (PEND_RING_IDX)( id & 0x0000ffff ); +} + +extern inline domid_t ID_TO_DOM(unsigned long id) +{ + return (domid_t)(id >> 16); +} + + + +/****************************************************************** + * GRANT HANDLES + */ + +/* When using grant tables to map a frame for device access then the + * handle returned must be used to unmap the frame. This is needed to + * drop the ref count on the frame. + */ +struct grant_handle_pair +{ + u16 kernel; + u16 user; +}; +static struct grant_handle_pair pending_grant_handles[MMAP_PAGES]; +#define pending_handle(_idx, _i) \ + (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) +#define BLKTAP_INVALID_HANDLE(_g) \ + (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ + (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ + } while(0) + + +/****************************************************************** + * BLKTAP VM OPS + */ + +static struct page *blktap_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + /* + * if the page has not been mapped in by the driver then generate + * a SIGBUS to the domain. + */ + + force_sig(SIGBUS, current); + + return 0; +} + +struct vm_operations_struct blktap_vm_ops = { + nopage: blktap_nopage, +}; + +/****************************************************************** + * BLKTAP FILE OPS + */ + +static int blktap_open(struct inode *inode, struct file *filp) +{ + blkif_sring_t *sring; + + if ( test_and_set_bit(0, &blktap_dev_inuse) ) + return -EBUSY; + + /* Allocate the fe ring. */ + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) + goto fail_nomem; + + SetPageReserved(virt_to_page(sring)); + + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE); + + return 0; + + fail_nomem: + return -ENOMEM; +} + +static int blktap_release(struct inode *inode, struct file *filp) +{ + blktap_dev_inuse = 0; + blktap_ring_ok = 0; + + /* Free the ring page. */ + ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); + free_page((unsigned long) blktap_ufe_ring.sring); + + /* Clear any active mappings and free foreign map table */ + if (blktap_vma != NULL) { + zap_page_range(blktap_vma, blktap_vma->vm_start, + blktap_vma->vm_end - blktap_vma->vm_start, NULL); + blktap_vma = NULL; + } + + return 0; +} + + +/* Note on mmap: + * We need to map pages to user space in a way that will allow the block + * subsystem set up direct IO to them. This couldn't be done before, because + * there isn't really a sane way to translate a user virtual address down to a + * physical address when the page belongs to another domain. * - * Based on the original split block driver: - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge - * Copyright (c) 2004, Christian Limpach - * - * Note that unlike the split block driver code, this driver has been developed - * strictly for Linux 2.6 - */ - -#include "blktap.h" - -int __init xlblktap_init(void) -{ - ctrl_msg_t cmsg; - blkif_fe_driver_status_t fe_st; - blkif_be_driver_status_t be_st; - - printk(KERN_INFO "Initialising Xen block tap device\n"); -#ifdef CONFIG_XEN_BLKDEV_GRANT - printk(KERN_INFO "Block tap is using grant tables.\n"); -#endif - - DPRINTK(" tap - Backend connection init:\n"); - - - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_BLKIF_FE; - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; - cmsg.length = sizeof(blkif_fe_driver_status_t); - fe_st.status = BLKIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &fe_st, sizeof(fe_st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); - - DPRINTK(" tap - Frontend connection init:\n"); + * My first approach was to map the page in to kernel memory, add an entry + * for it in the physical frame list (using alloc_lomem_region as in blkback) + * and then attempt to map that page up to user space. This is disallowed + * by xen though, which realizes that we don't really own the machine frame + * underlying the physical page. + * + * The new approach is to provide explicit support for this in xen linux. + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages + * mapped from other vms. vma->vm_private_data is set up as a mapping + * from pages to actual page structs. There is a new clause in get_user_pages + * that does the right thing for this sort of mapping. + */ +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int size; + struct page **map; + int i; + + DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n", + vma->vm_start, vma->vm_end); + + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &blktap_vm_ops; + + size = vma->vm_end - vma->vm_start; + if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { + printk(KERN_INFO + "blktap: you _must_ map exactly %d pages!\n", + MMAP_PAGES + RING_PAGES); + return -EAGAIN; + } + + size >>= PAGE_SHIFT; + DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); - active_reqs_init(); + rings_vstart = vma->vm_start; + user_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); + + /* Map the ring pages to the start of the region and reserve it. */ + + /* not sure if I really need to do this... */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (remap_pfn_range(vma, vma->vm_start, + __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) + { + WPRINTK("Mapping user ring failed!\n"); + goto fail; + } + + /* Mark this VM as containing foreign pages, and set up mappings. */ + map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + * sizeof(struct page_struct*), + GFP_KERNEL); + if (map == NULL) + { + WPRINTK("Couldn't alloc VM_FOREIGH map.\n"); + goto fail; + } + + for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) + map[i] = NULL; + + vma->vm_private_data = map; + vma->vm_flags |= VM_FOREIGN; + + blktap_vma = vma; + blktap_ring_ok = 1; + + return 0; + fail: + /* Clear any active mappings. */ + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, NULL); + + return -ENOMEM; +} + +static int blktap_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch(cmd) { + case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ + return blktap_read_ufe_ring(); + + case BLKTAP_IOCTL_SETMODE: + if (BLKTAP_MODE_VALID(arg)) { + blktap_mode = arg; + /* XXX: may need to flush rings here. */ + printk(KERN_INFO "blktap: set mode to %lx\n", arg); + return 0; + } + case BLKTAP_IOCTL_PRINT_IDXS: + { + //print_fe_ring_idxs(); + WPRINTK("User Rings: \n-----------\n"); + WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_ufe_ring.rsp_cons, + blktap_ufe_ring.req_prod_pvt, + blktap_ufe_ring.sring->req_prod, + blktap_ufe_ring.sring->rsp_prod); + + } + } + return -ENOIOCTLCMD; +} + +static unsigned int blktap_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &blktap_wait, wait); + if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ) + { + flush_tlb_all(); + + RING_PUSH_REQUESTS(&blktap_ufe_ring); + return POLLIN | POLLRDNORM; + } + + return 0; +} + +void blktap_kick_user(void) +{ + /* blktap_ring->req_prod = blktap_req_prod; */ + wake_up_interruptible(&blktap_wait); +} + +static struct file_operations blktap_fops = { + owner: THIS_MODULE, + poll: blktap_poll, + ioctl: blktap_ioctl, + open: blktap_open, + release: blktap_release, + mmap: blktap_mmap, +}; + + + +static int do_block_io_op(blkif_t *blkif, int max_to_do); +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); +static void make_response(blkif_t *blkif, unsigned long id, + unsigned short op, int st); + + +static void fast_flush_area(int idx, int nr_pages) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + unsigned int i, op = 0; + struct grant_handle_pair *handle; + unsigned long ptep; + + for (i=0; i<nr_pages; i++) + { + handle = &pending_handle(idx, i); + if (!BLKTAP_INVALID_HANDLE(handle)) + { + + unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); + unmap[op].dev_bus_addr = 0; + unmap[op].handle = handle->kernel; + op++; + + if (create_lookup_pte_addr(blktap_vma->vm_mm, + MMAP_VADDR(user_vstart, idx, i), + &ptep) !=0) { + DPRINTK("Couldn't get a pte addr!\n"); + return; + } + unmap[op].host_addr = ptep; + unmap[op].dev_bus_addr = 0; + unmap[op].handle = handle->user; + op++; + + BLKTAP_INVALIDATE_HANDLE(handle); + } + } + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, op))) + BUG(); + + if (blktap_vma != NULL) + zap_page_range(blktap_vma, + MMAP_VADDR(user_vstart, idx, 0), + nr_pages << PAGE_SHIFT, NULL); +} + +/****************************************************************** + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE + */ + +static struct list_head blkio_schedule_list; +static spinlock_t blkio_schedule_list_lock; + +static int __on_blkdev_list(blkif_t *blkif) +{ + return blkif->blkdev_list.next != NULL; +} + +static void remove_from_blkdev_list(blkif_t *blkif) +{ + unsigned long flags; + if ( !__on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( __on_blkdev_list(blkif) ) + { + list_del(&blkif->blkdev_list); + blkif->blkdev_list.next = NULL; + blkif_put(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + +static void add_to_blkdev_list_tail(blkif_t *blkif) +{ + unsigned long flags; + if ( __on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) + { + list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); + blkif_get(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); + +static int blkio_schedule(void *arg) +{ + DECLARE_WAITQUEUE(wq, current); + + blkif_t *blkif; + struct list_head *ent; + + daemonize("xenblkd"); + + for ( ; ; ) + { + /* Wait for work to do. */ + add_wait_queue(&blkio_schedule_wait, &wq); + set_current_state(TASK_INTERRUPTIBLE); + if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || + list_empty(&blkio_schedule_list) ) + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&blkio_schedule_wait, &wq); + + /* Queue up a batch of requests. */ + while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && + !list_empty(&blkio_schedule_list) ) + { + ent = blkio_schedule_list.next; + blkif = list_entry(ent, blkif_t, blkdev_list); + blkif_get(blkif); + remove_from_blkdev_list(blkif); + if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) + add_to_blkdev_list_tail(blkif); + blkif_put(blkif); + } + } +} + +static void maybe_trigger_blkio_schedule(void) +{ + /* + * Needed so that two processes, who together make the following predicate + * true, don't both read stale values and evaluate the predicate + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... + */ + smp_mb(); + + if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && + !list_empty(&blkio_schedule_list) ) + wake_up(&blkio_schedule_wait); +} + + + +/****************************************************************** + * COMPLETION CALLBACK -- Called as bh->b_end_io() + */ + + +static int blktap_read_ufe_ring(void) +{ + /* This is called to read responses from the UFE ring. */ + + RING_IDX i, j, rp; + blkif_response_t *resp; + blkif_t *blkif; + int pending_idx; + pending_req_t *pending_req; + unsigned long flags; + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { + + /* for each outstanding message on the UFEring */ + rp = blktap_ufe_ring.sring->rsp_prod; + rmb(); + + for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) + { + resp = RING_GET_RESPONSE(&blktap_ufe_ring, i); + pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id)); + pending_req = &pending_reqs[pending_idx]; + + blkif = pending_req->blkif; + for (j = 0; j < pending_req->nr_pages; j++) { + unsigned long vaddr; + struct page **map = blktap_vma->vm_private_data; + int offset; + + vaddr = MMAP_VADDR(user_vstart, pending_idx, j); + offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT; + + //ClearPageReserved(virt_to_page(vaddr)); + ClearPageReserved((struct page *)map[offset]); + map[offset] = NULL; + } + + fast_flush_area(pending_idx, pending_req->nr_pages); + make_response(blkif, pending_req->id, resp->operation, + resp->status); + blkif_put(pending_req->blkif); + spin_lock_irqsave(&pend_prod_lock, flags); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + spin_unlock_irqrestore(&pend_prod_lock, flags); + } + blktap_ufe_ring.rsp_cons = i; + maybe_trigger_blkio_schedule(); + } + return 0; +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_t *blkif = dev_id; + add_to_blkdev_list_tail(blkif); + maybe_trigger_blkio_schedule(); + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ + +static int do_block_io_op(blkif_t *blkif, int max_to_do) +{ + blkif_back_ring_t *blk_ring = &blkif->blk_ring; + blkif_request_t *req; + RING_IDX i, rp; + int more_to_do = 0; + + rp = blk_ring->sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + for ( i = blk_ring->req_cons; + (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i); + i++ ) + { + if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) + { + more_to_do = 1; + break; + } + + req = RING_GET_REQUEST(blk_ring, i); + switch ( req->operation ) + { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + dispatch_rw_block_io(blkif, req); + break; + + default: + DPRINTK("error: unknown block io operation [%d]\n", + req->operation); + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + break; + } + } + + blk_ring->req_cons = i; + blktap_kick_user(); + + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) +{ + blkif_request_t *target; + int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; + pending_req_t *pending_req; + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; + int op, ret; + unsigned int nseg; + + /* Check that number of segments is sane. */ + nseg = req->nr_segments; + if ( unlikely(nseg == 0) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) + { + DPRINTK("Bad number of segments in request (%d)\n", nseg); + goto bad_descriptor; + } + + /* Make sure userspace is ready. */ + if (!blktap_ring_ok) { + DPRINTK("blktap: ring not ready for requests!\n"); + goto bad_descriptor; + } + + + if ( RING_FULL(&blktap_ufe_ring) ) { + WPRINTK("blktap: fe_ring is full, can't add (very broken!).\n"); + goto bad_descriptor; + } + + flush_cache_all(); /* a noop on intel... */ + + /* Map the foreign pages directly in to the application */ + op = 0; + for (i=0; i<req->nr_segments; i++) { + + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long ptep; + + uvaddr = MMAP_VADDR(user_vstart, pending_idx, i); + kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i); + + /* Map the remote page to kernel. */ + map[op].host_addr = kvaddr; + map[op].dom = blkif->domid; + map[op].ref = blkif_gref_from_fas(req->frame_and_sects[i]); + map[op].flags = GNTMAP_host_map; + /* This needs a bit more thought in terms of interposition: + * If we want to be able to modify pages during write using + * grant table mappings, the guest will either need to allow + * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */ + if (req->operation == BLKIF_OP_WRITE) + map[op].flags |= GNTMAP_readonly; + op++; + + /* Now map it to user. */ + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); + if (ret) + { + DPRINTK("Couldn't get a pte addr!\n"); + fast_flush_area(pending_idx, req->nr_segments); + goto bad_descriptor; + } + + map[op].host_addr = ptep; + map[op].dom = blkif->domid; + map[op].ref = blkif_gref_from_fas(req->frame_and_sects[i]); + map[op].flags = GNTMAP_host_map | GNTMAP_application_map + | GNTMAP_contains_pte; + /* Above interposition comment applies here as well. */ + if (req->operation == BLKIF_OP_WRITE) + map[op].flags |= GNTMAP_readonly; + op++; + } + + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, map, op))) + BUG(); + + op = 0; + for (i=0; i<(req->nr_segments*2); i+=2) { + unsigned long uvaddr; + unsigned long kvaddr; + unsigned long offset; + int cancel = 0; + + uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2); + kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2); + + if ( unlikely(map[i].handle < 0) ) + { + DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle); + ret = map[i].handle; + cancel = 1; + } + + if ( unlikely(map[i+1].handle < 0) ) + { + DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle); + ret = map[i+1].handle; + cancel = 1; + } + + if (cancel) + { + fast_flush_area(pending_idx, req->nr_segments); + goto bad_descriptor; + } + + /* Set the necessary mappings in p2m and in the VM_FOREIGN + * vm_area_struct to allow user vaddr -> struct page lookups + * to work. This is needed for direct IO to foreign pages. */ + phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] = + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); + + offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; + ((struct page **)blktap_vma->vm_private_data)[offset] = + pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); + + /* Save handles for unmapping later. */ + pending_handle(pending_idx, i/2).kernel = map[i].handle; + pending_handle(pending_idx, i/2).user = map[i+1].handle; + } + + /* Mark mapped pages as reserved: */ + for ( i = 0; i < req->nr_segments; i++ ) + { + unsigned long kvaddr; + + kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i); + SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT)); + } + + pending_req = &pending_reqs[pending_idx]; + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + req->id = MAKE_ID(blkif->domid, pending_idx); + //atomic_set(&pending_req->pendcnt, nbio); + pending_cons++; + blkif_get(blkif); + + /* Finally, write the request message to the user ring. */ + target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); + memcpy(target, req, sizeof(*req)); + blktap_ufe_ring.req_prod_pvt++; + return; + + bad_descriptor: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, unsigned long id, + unsigned short op, int st) +{ + blkif_response_t *resp; + unsigned long flags; + blkif_back_ring_t *blk_ring = &blkif->blk_ring; + + /* Place on the response ring for the relevant domain. */ + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt); + resp->id = id; + resp->operation = op; + resp->status = st; + wmb(); /* Ensure other side can see the response fields. */ + blk_ring->rsp_prod_pvt++; + RING_PUSH_RESPONSES(blk_ring); + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + + /* Kick the relevant domain. */ + notify_via_evtchn(blkif->evtchn); +} + +static struct miscdevice blktap_miscdev = { + .minor = BLKTAP_MINOR, + .name = "blktap", + .fops = &blktap_fops, + .devfs_name = "misc/blktap", +}; + +void blkif_deschedule(blkif_t *blkif) +{ + remove_from_blkdev_list(blkif); +} + +static int __init blkif_init(void) +{ + int i, j, err; + struct page *page; +/* + if ( !(xen_start_info->flags & SIF_INITDOMAIN) && + !(xen_start_info->flags & SIF_BLK_BE_DOMAIN) ) + return 0; +*/ blkif_interface_init(); - blkdev_schedule_init(); + + page = balloon_alloc_empty_page_range(MMAP_PAGES); + BUG_ON(page == NULL); + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + + pending_cons = 0; + pending_prod = MAX_PENDING_REQS; + memset(pending_reqs, 0, sizeof(pending_reqs)); + for ( i = 0; i < MAX_PENDING_REQS; i++ ) + pending_ring[i] = i; - (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_BLKIF_BE; - cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS; - cmsg.length = sizeof(blkif_be_driver_status_t); - be_st.status = BLKIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &be_st, sizeof(be_st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); - - DPRINTK(" tap - Userland channel init:\n"); - - blktap_init(); - - DPRINTK("Blkif tap device initialized.\n"); + spin_lock_init(&blkio_schedule_list_lock); + INIT_LIST_HEAD(&blkio_schedule_list); + + if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) + BUG(); + + blkif_xenbus_init(); + + for (i=0; i<MAX_PENDING_REQS ; i++) + for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) + BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j)); + + err = misc_register(&blktap_miscdev); + if ( err != 0 ) + { + printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); + return err; + } + + init_waitqueue_head(&blktap_wait); return 0; } -#if 0 /* tap doesn't handle suspend/resume */ -void blkdev_suspend(void) -{ -} - -void blkdev_resume(void) -{ - ctrl_msg_t cmsg; - blkif_fe_driver_status_t st; - - /* Send a driver-UP notification to the domain controller. */ - cmsg.type = CMSG_BLKIF_FE; - cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS; - cmsg.length = sizeof(blkif_fe_driver_status_t); - st.status = BLKIF_DRIVER_STATUS_UP; - memcpy(cmsg.msg, &st, sizeof(st)); - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} -#endif - -__initcall(xlblktap_init); +__initcall(blkif_init); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Fri Sep 9 16:30:54 2005 @@ -15,7 +15,6 @@ #include <linux/config.h> #include <linux/sched.h> #include <linux/interrupt.h> -#include <asm-xen/ctrl_if.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <asm/io.h> diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/console/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/console/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/console/Makefile Fri Sep 9 16:30:54 2005 @@ -1,2 +1,2 @@ -obj-y := console.o +obj-y := console.o xencons_ring.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Fri Sep 9 16:30:54 2005 @@ -45,14 +45,15 @@ #include <linux/init.h> #include <linux/console.h> #include <linux/bootmem.h> +#include <linux/sysrq.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/uaccess.h> #include <asm-xen/xen-public/event_channel.h> #include <asm-xen/hypervisor.h> #include <asm-xen/evtchn.h> -#include <asm-xen/ctrl_if.h> - + +#include "xencons_ring.h" /* * Modes: * 'xencons=off' [XC_OFF]: Console is disabled. @@ -66,6 +67,11 @@ static enum { XC_OFF, XC_DEFAULT, XC_TTY, XC_SERIAL } xc_mode = XC_DEFAULT; static int xc_num = -1; +#ifdef CONFIG_MAGIC_SYSRQ +static unsigned long sysrq_requested; +extern int sysrq_enabled; +#endif + static int __init xencons_setup(char *str) { char *q; @@ -118,13 +124,6 @@ /* Common transmit-kick routine. */ static void __xencons_tx_flush(void); -/* This task is used to defer sending console data until there is space. */ -static void xencons_tx_flush_task_routine(void *data); - -static DECLARE_TQUEUE(xencons_tx_flush_task, - xencons_tx_flush_task_routine, - NULL); - #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) static struct tty_driver *xencons_driver; #else @@ -196,7 +195,7 @@ void xen_console_init(void) #endif { - if ( xen_start_info.flags & SIF_INITDOMAIN ) + if ( xen_start_info->flags & SIF_INITDOMAIN ) { if ( xc_mode == XC_DEFAULT ) xc_mode = XC_SERIAL; @@ -264,39 +263,22 @@ /*** Forcibly flush console data before dying. ***/ void xencons_force_flush(void) { - ctrl_msg_t msg; int sz; /* Emergency console is synchronous, so there's nothing to flush. */ - if ( xen_start_info.flags & SIF_INITDOMAIN ) - return; - - /* - * We use dangerous control-interface functions that require a quiescent - * system and no interrupts. Try to ensure this with a global cli(). - */ - local_irq_disable(); /* XXXsmp */ + if ( xen_start_info->flags & SIF_INITDOMAIN ) + return; + /* Spin until console data is flushed through to the domain controller. */ - while ( (wc != wp) && !ctrl_if_transmitter_empty() ) - { - /* Interrupts are disabled -- we must manually reap responses. */ - ctrl_if_discard_responses(); - + while ( (wc != wp) ) + { + int sent = 0; if ( (sz = wp - wc) == 0 ) continue; - if ( sz > sizeof(msg.msg) ) - sz = sizeof(msg.msg); - if ( sz > (wbuf_size - WBUF_MASK(wc)) ) - sz = wbuf_size - WBUF_MASK(wc); - - msg.type = CMSG_CONSOLE; - msg.subtype = CMSG_CONSOLE_DATA; - msg.length = sz; - memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz); - - if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ) - wc += sz; + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); + if (sent > 0) + wc += sent; } } @@ -320,7 +302,7 @@ static char x_char; /* Non-privileged receive callback. */ -static void xencons_rx(ctrl_msg_t *msg, unsigned long id) +static void xencons_rx(char *buf, unsigned len, struct pt_regs *regs) { int i; unsigned long flags; @@ -328,23 +310,39 @@ spin_lock_irqsave(&xencons_lock, flags); if ( xencons_tty != NULL ) { - for ( i = 0; i < msg->length; i++ ) - tty_insert_flip_char(xencons_tty, msg->msg[i], 0); + for ( i = 0; i < len; i++ ) { +#ifdef CONFIG_MAGIC_SYSRQ + if (sysrq_enabled) { + if (buf[i] == '\x0f') { /* ^O */ + sysrq_requested = jiffies; + continue; /* don't print the sysrq key */ + } else if (sysrq_requested) { + unsigned long sysrq_timeout = sysrq_requested + HZ*2; + sysrq_requested = 0; + /* if it's been less than a timeout, do the sysrq */ + if (time_before(jiffies, sysrq_timeout)) { + spin_unlock_irqrestore(&xencons_lock, flags); + handle_sysrq(buf[i], regs, xencons_tty); + spin_lock_irqsave(&xencons_lock, flags); + continue; + } + } + } +#endif + tty_insert_flip_char(xencons_tty, buf[i], 0); + } tty_flip_buffer_push(xencons_tty); } spin_unlock_irqrestore(&xencons_lock, flags); - msg->length = 0; - ctrl_if_send_response(msg); } /* Privileged and non-privileged transmit worker. */ static void __xencons_tx_flush(void) { int sz, work_done = 0; - ctrl_msg_t msg; - - if ( xen_start_info.flags & SIF_INITDOMAIN ) + + if ( xen_start_info->flags & SIF_INITDOMAIN ) { if ( x_char ) { @@ -367,38 +365,23 @@ { while ( x_char ) { - msg.type = CMSG_CONSOLE; - msg.subtype = CMSG_CONSOLE_DATA; - msg.length = 1; - msg.msg[0] = x_char; - - if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ) - x_char = 0; - else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) ) - break; - - work_done = 1; + if (xencons_ring_send(&x_char, 1) == 1) { + x_char = 0; + work_done = 1; + } } while ( wc != wp ) { + int sent; sz = wp - wc; - if ( sz > sizeof(msg.msg) ) - sz = sizeof(msg.msg); - if ( sz > (wbuf_size - WBUF_MASK(wc)) ) - sz = wbuf_size - WBUF_MASK(wc); - - msg.type = CMSG_CONSOLE; - msg.subtype = CMSG_CONSOLE_DATA; - msg.length = sz; - memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz); - - if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 ) - wc += sz; - else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) ) - break; - - work_done = 1; + if ( sz > (wbuf_size - WBUF_MASK(wc)) ) + sz = wbuf_size - WBUF_MASK(wc); + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); + if ( sent > 0 ) { + wc += sent; + work_done = 1; + } } } @@ -409,15 +392,6 @@ (xencons_tty->ldisc.write_wakeup != NULL) ) (xencons_tty->ldisc.write_wakeup)(xencons_tty); } -} - -/* Non-privileged transmit kicker. */ -static void xencons_tx_flush_task_routine(void *data) -{ - unsigned long flags; - spin_lock_irqsave(&xencons_lock, flags); - __xencons_tx_flush(); - spin_unlock_irqrestore(&xencons_lock, flags); } /* Privileged receive callback and transmit kicker. */ @@ -726,6 +700,8 @@ if ( xc_mode == XC_OFF ) return 0; + xencons_ring_init(); + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES); @@ -794,7 +770,7 @@ tty_register_device(xencons_driver, 0, NULL); #endif - if ( xen_start_info.flags & SIF_INITDOMAIN ) + if ( xen_start_info->flags & SIF_INITDOMAIN ) { xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE); (void)request_irq(xencons_priv_irq, @@ -802,7 +778,8 @@ } else { - (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0); + + xencons_ring_register_receiver(xencons_rx); } printk("Xen virtual console successfully installed as %s%d\n", diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Fri Sep 9 16:30:54 2005 @@ -350,6 +350,8 @@ spin_unlock_irq(&port_user_lock); + kfree(u); + return 0; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Sep 9 16:30:54 2005 @@ -14,15 +14,17 @@ #include <linux/in.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include <asm-xen/ctrl_if.h> #include <asm-xen/evtchn.h> #include <asm-xen/xen-public/io/netif.h> #include <asm/io.h> #include <asm/pgalloc.h> -#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#ifdef CONFIG_XEN_NETDEV_GRANT #include <asm-xen/xen-public/grant_table.h> #include <asm-xen/gnttab.h> + +#define GRANT_INVALID_REF (0xFFFF) + #endif @@ -37,6 +39,11 @@ #define ASSERT(_p) ((void)0) #define DPRINTK(_f, _a...) ((void)0) #endif +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "xen_net: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "xen_net: " fmt, ##args) + typedef struct netif_st { /* Unique identifier for this interface. */ @@ -47,13 +54,13 @@ /* Physical parameters of the comms window. */ unsigned long tx_shmem_frame; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT u16 tx_shmem_handle; unsigned long tx_shmem_vaddr; grant_ref_t tx_shmem_ref; #endif unsigned long rx_shmem_frame; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT u16 rx_shmem_handle; unsigned long rx_shmem_vaddr; grant_ref_t rx_shmem_ref; @@ -68,7 +75,7 @@ /* Private indexes into shared ring. */ NETIF_RING_IDX rx_req_cons; NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */ -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */ #endif NETIF_RING_IDX tx_req_cons; diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Fri Sep 9 16:30:54 2005 @@ -111,91 +111,81 @@ return netif; } -static int map_frontend_page(netif_t *netif, unsigned long localaddr, - unsigned long tx_ring_ref, unsigned long rx_ring_ref) -{ -#if !defined(CONFIG_XEN_NETDEV_GRANT_TX)||!defined(CONFIG_XEN_NETDEV_GRANT_RX) +static int map_frontend_pages(netif_t *netif, unsigned long localaddr, + unsigned long tx_ring_ref, + unsigned long rx_ring_ref) +{ +#ifdef CONFIG_XEN_NETDEV_GRANT + struct gnttab_map_grant_ref op; + + /* Map: Use the Grant table reference */ + op.host_addr = localaddr; + op.flags = GNTMAP_host_map; + op.ref = tx_ring_ref; + op.dom = netif->domid; + + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); + if (op.handle < 0) { + DPRINTK(" Grant table operation failure mapping tx_ring_ref!\n"); + return op.handle; + } + + netif->tx_shmem_ref = tx_ring_ref; + netif->tx_shmem_handle = op.handle; + netif->tx_shmem_vaddr = localaddr; + + /* Map: Use the Grant table reference */ + op.host_addr = localaddr + PAGE_SIZE; + op.flags = GNTMAP_host_map; + op.ref = rx_ring_ref; + op.dom = netif->domid; + + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); + if (op.handle < 0) { + DPRINTK(" Grant table operation failure mapping rx_ring_ref!\n"); + return op.handle; + } + + netif->rx_shmem_ref = rx_ring_ref; + netif->rx_shmem_handle = op.handle; + netif->rx_shmem_vaddr = localaddr + PAGE_SIZE; + +#else pgprot_t prot = __pgprot(_KERNPG_TABLE); int err; -#endif -#if defined(CONFIG_XEN_NETDEV_GRANT_TX) - { - struct gnttab_map_grant_ref op; - - /* Map: Use the Grant table reference */ - op.host_addr = localaddr; - op.flags = GNTMAP_host_map; - op.ref = tx_ring_ref; - op.dom = netif->domid; - - BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); - if (op.handle < 0) { - DPRINTK(" Grant table operation failure !\n"); - return op.handle; - } - - netif->tx_shmem_ref = tx_ring_ref; - netif->tx_shmem_handle = op.handle; - netif->tx_shmem_vaddr = localaddr; - } -#else - err = direct_remap_area_pages(&init_mm, localaddr, - tx_ring_ref<<PAGE_SHIFT, PAGE_SIZE, + + err = direct_remap_pfn_range(&init_mm, localaddr, + tx_ring_ref, PAGE_SIZE, prot, netif->domid); + + err |= direct_remap_pfn_range(&init_mm, localaddr + PAGE_SIZE, + rx_ring_ref, PAGE_SIZE, + prot, netif->domid); + if (err) return err; #endif -#if defined(CONFIG_XEN_NETDEV_GRANT_RX) - { - struct gnttab_map_grant_ref op; - - /* Map: Use the Grant table reference */ - op.host_addr = localaddr + PAGE_SIZE; - op.flags = GNTMAP_host_map; - op.ref = rx_ring_ref; - op.dom = netif->domid; - - BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); - if (op.handle < 0) { - DPRINTK(" Grant table operation failure !\n"); - return op.handle; - } - - netif->rx_shmem_ref = rx_ring_ref; - netif->rx_shmem_handle = op.handle; - netif->rx_shmem_vaddr = localaddr + PAGE_SIZE; - } -#else - err = direct_remap_area_pages(&init_mm, localaddr + PAGE_SIZE, - rx_ring_ref<<PAGE_SHIFT, PAGE_SIZE, - prot, netif->domid); - if (err) - return err; -#endif - return 0; } -static void unmap_frontend_page(netif_t *netif) -{ -#if defined(CONFIG_XEN_NETDEV_GRANT_RX) || defined(CONFIG_XEN_NETDEV_GRANT_TX) +static void unmap_frontend_pages(netif_t *netif) +{ +#ifdef CONFIG_XEN_NETDEV_GRANT struct gnttab_unmap_grant_ref op; -#endif - -#ifdef CONFIG_XEN_NETDEV_GRANT_TX + op.host_addr = netif->tx_shmem_vaddr; op.handle = netif->tx_shmem_handle; op.dev_bus_addr = 0; BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); -#endif - -#ifdef CONFIG_XEN_NETDEV_GRANT_RX + op.host_addr = netif->rx_shmem_vaddr; op.handle = netif->rx_shmem_handle; op.dev_bus_addr = 0; BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); #endif + + return; } int netif_map(netif_t *netif, unsigned long tx_ring_ref, @@ -209,8 +199,8 @@ if (vma == NULL) return -ENOMEM; - err = map_frontend_page(netif, (unsigned long)vma->addr, tx_ring_ref, - rx_ring_ref); + err = map_frontend_pages(netif, (unsigned long)vma->addr, tx_ring_ref, + rx_ring_ref); if (err) { vfree(vma->addr); return err; @@ -222,7 +212,7 @@ op.u.bind_interdomain.port2 = evtchn; err = HYPERVISOR_event_channel_op(&op); if (err) { - unmap_frontend_page(netif); + unmap_frontend_pages(netif); vfree(vma->addr); return err; } @@ -267,7 +257,7 @@ unregister_netdev(netif->dev); if (netif->tx) { - unmap_frontend_page(netif); + unmap_frontend_pages(netif); vfree(netif->tx); /* Frees netif->rx as well. */ } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Fri Sep 9 16:30:54 2005 @@ -29,136 +29,163 @@ #include <linux/skbuff.h> #include <net/dst.h> +static int nloopbacks = 1; +module_param(nloopbacks, int, 0); +MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create"); + struct net_private { - struct net_device *loopback_dev; - struct net_device_stats stats; + struct net_device *loopback_dev; + struct net_device_stats stats; }; static int loopback_open(struct net_device *dev) { - struct net_private *np = netdev_priv(dev); - memset(&np->stats, 0, sizeof(np->stats)); - netif_start_queue(dev); - return 0; + struct net_private *np = netdev_priv(dev); + memset(&np->stats, 0, sizeof(np->stats)); + netif_start_queue(dev); + return 0; } static int loopback_close(struct net_device *dev) { - netif_stop_queue(dev); - return 0; + netif_stop_queue(dev); + return 0; } static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct net_private *np = netdev_priv(dev); + struct net_private *np = netdev_priv(dev); - dst_release(skb->dst); - skb->dst = NULL; + dst_release(skb->dst); + skb->dst = NULL; - skb_orphan(skb); + skb_orphan(skb); - np->stats.tx_bytes += skb->len; - np->stats.tx_packets++; + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; - /* Switch to loopback context. */ - dev = np->loopback_dev; - np = netdev_priv(dev); + /* Switch to loopback context. */ + dev = np->loopback_dev; + np = netdev_priv(dev); - np->stats.rx_bytes += skb->len; - np->stats.rx_packets++; + np->stats.rx_bytes += skb->len; + np->stats.rx_packets++; - if ( skb->ip_summed == CHECKSUM_HW ) - { - /* Defer checksum calculation. */ - skb->proto_csum_blank = 1; - /* Must be a local packet: assert its integrity. */ - skb->proto_csum_valid = 1; - } + if (skb->ip_summed == CHECKSUM_HW) { + /* Defer checksum calculation. */ + skb->proto_csum_blank = 1; + /* Must be a local packet: assert its integrity. */ + skb->proto_csum_valid = 1; + } - skb->ip_summed = skb->proto_csum_valid ? - CHECKSUM_UNNECESSARY : CHECKSUM_NONE; + skb->ip_summed = skb->proto_csum_valid ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE; - skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */ - skb->protocol = eth_type_trans(skb, dev); - skb->dev = dev; - dev->last_rx = jiffies; - netif_rx(skb); + skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */ + skb->protocol = eth_type_trans(skb, dev); + skb->dev = dev; + dev->last_rx = jiffies; + netif_rx(skb); - return 0; + return 0; } static struct net_device_stats *loopback_get_stats(struct net_device *dev) { - struct net_private *np = netdev_priv(dev); - return &np->stats; + struct net_private *np = netdev_priv(dev); + return &np->stats; } static void loopback_construct(struct net_device *dev, struct net_device *lo) { - struct net_private *np = netdev_priv(dev); + struct net_private *np = netdev_priv(dev); - np->loopback_dev = lo; + np->loopback_dev = lo; - dev->open = loopback_open; - dev->stop = loopback_close; - dev->hard_start_xmit = loopback_start_xmit; - dev->get_stats = loopback_get_stats; + dev->open = loopback_open; + dev->stop = loopback_close; + dev->hard_start_xmit = loopback_start_xmit; + dev->get_stats = loopback_get_stats; - dev->tx_queue_len = 0; + dev->tx_queue_len = 0; - dev->features = NETIF_F_HIGHDMA | NETIF_F_LLTX; + dev->features = NETIF_F_HIGHDMA | NETIF_F_LLTX; - /* - * We do not set a jumbo MTU on the interface. Otherwise the network - * stack will try to send large packets that will get dropped by the - * Ethernet bridge (unless the physical Ethernet interface is configured - * to transfer jumbo packets). If a larger MTU is desired then the system - * administrator can specify it using the 'ifconfig' command. - */ - /*dev->mtu = 16*1024;*/ + /* + * We do not set a jumbo MTU on the interface. Otherwise the network + * stack will try to send large packets that will get dropped by the + * Ethernet bridge (unless the physical Ethernet interface is + * configured to transfer jumbo packets). If a larger MTU is desired + * then the system administrator can specify it using the 'ifconfig' + * command. + */ + /*dev->mtu = 16*1024;*/ +} + +static int __init make_loopback(int i) +{ + struct net_device *dev1, *dev2; + char dev_name[IFNAMSIZ]; + int err = -ENOMEM; + + sprintf(dev_name, "vif0.%d", i); + dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup); + sprintf(dev_name, "veth%d", i); + dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup); + if ((dev1 == NULL) || (dev2 == NULL)) + goto fail; + + loopback_construct(dev1, dev2); + loopback_construct(dev2, dev1); + + dev1->features |= NETIF_F_NO_CSUM; + dev2->features |= NETIF_F_IP_CSUM; + + /* + * Initialise a dummy MAC address for the 'dummy backend' interface. We + * choose the numerically largest non-broadcast address to prevent the + * address getting stolen by an Ethernet bridge for STP purposes. + */ + memset(dev1->dev_addr, 0xFF, ETH_ALEN); + dev1->dev_addr[0] &= ~0x01; + + if ((err = register_netdev(dev1)) != 0) + goto fail; + + if ((err = register_netdev(dev2)) != 0) { + unregister_netdev(dev1); + goto fail; + } + + return 0; + + fail: + if (dev1 != NULL) + kfree(dev1); + if (dev2 != NULL) + kfree(dev2); + return err; } static int __init loopback_init(void) { - struct net_device *dev1, *dev2; - int err = -ENOMEM; + int i, err = 0; - dev1 = alloc_netdev(sizeof(struct net_private), "vif0.0", ether_setup); - dev2 = alloc_netdev(sizeof(struct net_private), "veth0", ether_setup); - if ( (dev1 == NULL) || (dev2 == NULL) ) - goto fail; + for (i = 0; i < nloopbacks; i++) + if ((err = make_loopback(i)) != 0) + break; - loopback_construct(dev1, dev2); - loopback_construct(dev2, dev1); - - dev1->features |= NETIF_F_NO_CSUM; - dev2->features |= NETIF_F_IP_CSUM; - - /* - * Initialise a dummy MAC address for the 'dummy backend' interface. We - * choose the numerically largest non-broadcast address to prevent the - * address getting stolen by an Ethernet bridge for STP purposes. - */ - memset(dev1->dev_addr, 0xFF, ETH_ALEN); - dev1->dev_addr[0] &= ~0x01; - - if ( (err = register_netdev(dev1)) != 0 ) - goto fail; - - if ( (err = register_netdev(dev2)) != 0 ) - { - unregister_netdev(dev1); - goto fail; - } - - return 0; - - fail: - if ( dev1 != NULL ) - kfree(dev1); - if ( dev2 != NULL ) - kfree(dev2); - return err; + return err; } module_init(loopback_init); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Sep 9 16:30:54 2005 @@ -12,24 +12,8 @@ #include "common.h" #include <asm-xen/balloon.h> - -#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) -#include <asm-xen/xen-public/grant_table.h> -#include <asm-xen/gnttab.h> -#ifdef GRANT_DEBUG -static void -dump_packet(int tag, u32 addr, unsigned char *p) -{ - int i; - - printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr); - for (i = 0; i < 20; i++) { - printk("%02x", p[i]); - } - printk("\n"); -} -#endif -#endif +#include <asm-xen/xen-public/memory.h> + static void netif_idx_release(u16 pending_idx); static void netif_page_release(struct page *page); @@ -56,7 +40,8 @@ static struct sk_buff_head rx_queue; static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1]; static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE]; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX + +#ifdef CONFIG_XEN_NETDEV_GRANT static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS]; #else static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE]; @@ -87,16 +72,13 @@ static struct sk_buff_head tx_queue; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT static u16 grant_tx_ref[MAX_PENDING_REQS]; static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; + #else static multicall_entry_t tx_mcl[MAX_PENDING_REQS]; -#endif - -#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) -#define GRANT_INVALID_REF (0xFFFF) #endif static struct list_head net_schedule_list; @@ -110,25 +92,37 @@ static unsigned long alloc_mfn(void) { unsigned long mfn = 0, flags; + struct xen_memory_reservation reservation = { + .extent_start = mfn_list, + .nr_extents = MAX_MFN_ALLOC, + .extent_order = 0, + .domid = DOMID_SELF + }; spin_lock_irqsave(&mfn_lock, flags); if ( unlikely(alloc_index == 0) ) - alloc_index = HYPERVISOR_dom_mem_op( - MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0); + alloc_index = HYPERVISOR_memory_op( + XENMEM_increase_reservation, &reservation); if ( alloc_index != 0 ) mfn = mfn_list[--alloc_index]; spin_unlock_irqrestore(&mfn_lock, flags); return mfn; } -#ifndef CONFIG_XEN_NETDEV_GRANT_RX +#ifndef CONFIG_XEN_NETDEV_GRANT static void free_mfn(unsigned long mfn) { unsigned long flags; + struct xen_memory_reservation reservation = { + .extent_start = &mfn, + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; spin_lock_irqsave(&mfn_lock, flags); if ( alloc_index != MAX_MFN_ALLOC ) mfn_list[alloc_index++] = mfn; - else if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, - &mfn, 1, 0) != 1 ) + else if ( HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) + != 1 ) BUG(); spin_unlock_irqrestore(&mfn_lock, flags); } @@ -187,7 +181,7 @@ dev_kfree_skb(skb); skb = nskb; } -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT #ifdef DEBUG_GRANT printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n", netif->rx->req_prod, @@ -233,12 +227,12 @@ static void net_rx_action(unsigned long unused) { - netif_t *netif; + netif_t *netif = NULL; s8 status; u16 size, id, evtchn; multicall_entry_t *mcl; mmu_update_t *mmu; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT gnttab_donate_t *gop; #else struct mmuext_op *mmuext; @@ -253,7 +247,7 @@ mcl = rx_mcl; mmu = rx_mmu; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT gop = grant_rx_op; #else mmuext = rx_mmuext; @@ -269,7 +263,7 @@ if ( (new_mfn = alloc_mfn()) == 0 ) { if ( net_ratelimit() ) - printk(KERN_WARNING "Memory squeeze in netback driver.\n"); + WPRINTK("Memory squeeze in netback driver.\n"); mod_timer(&net_timer, jiffies + HZ); skb_queue_head(&rx_queue, skb); break; @@ -284,7 +278,7 @@ pfn_pte_ma(new_mfn, PAGE_KERNEL), 0); mcl++; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT gop->mfn = old_mfn; gop->domid = netif->domid; gop->handle = netif->rx->ring[ @@ -303,7 +297,7 @@ mmuext->mfn = old_mfn; mmuext++; #endif - mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + mmu->ptr = ((unsigned long long)new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = __pa(vdata) >> PAGE_SHIFT; mmu++; @@ -327,7 +321,7 @@ mcl->args[3] = DOMID_SELF; mcl++; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; #else mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; @@ -336,9 +330,17 @@ BUG(); mcl = rx_mcl; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_donate, grant_rx_op, gop - grant_rx_op)); +#ifdef CONFIG_XEN_NETDEV_GRANT + if(HYPERVISOR_grant_table_op(GNTTABOP_donate, grant_rx_op, + gop - grant_rx_op)) { + /* + ** The other side has given us a bad grant ref, or has no headroom, + ** or has gone away. Unfortunately the current grant table code + ** doesn't inform us which is the case, so not much we can do. + */ + DPRINTK("net_rx: donate to DOM%u failed; dropping (up to) %d " + "packets.\n", grant_rx_op[0].domid, gop - grant_rx_op); + } gop = grant_rx_op; #else mmuext = rx_mmuext; @@ -350,7 +352,7 @@ /* Rederive the machine addresses. */ new_mfn = mcl[0].args[1] >> PAGE_SHIFT; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */ #else old_mfn = mmuext[0].mfn; @@ -367,8 +369,13 @@ /* Check the reassignment error code. */ status = NETIF_RSP_OKAY; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX - BUG_ON(gop->status != 0); /* XXX */ +#ifdef CONFIG_XEN_NETDEV_GRANT + if(gop->status != 0) { + DPRINTK("Bad status %d from grant donate to DOM%u\n", + gop->status, netif->domid); + /* XXX SMH: should free 'old_mfn' here */ + status = NETIF_RSP_ERROR; + } #else if ( unlikely(mcl[1].result != 0) ) { @@ -391,7 +398,7 @@ netif_put(netif); dev_kfree_skb(skb); -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT mcl++; gop++; #else @@ -407,6 +414,7 @@ notify_via_evtchn(evtchn); } + out: /* More work to do? */ if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) ) tasklet_schedule(&net_rx_tasklet); @@ -483,7 +491,7 @@ inline static void net_tx_action_dealloc(void) { -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT gnttab_unmap_grant_ref_t *gop; #else multicall_entry_t *mcl; @@ -495,7 +503,7 @@ dc = dealloc_cons; dp = dealloc_prod; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT /* * Free up any grants we have finished using */ @@ -529,7 +537,7 @@ #endif while ( dealloc_cons != dp ) { -#ifndef CONFIG_XEN_NETDEV_GRANT_TX +#ifndef CONFIG_XEN_NETDEV_GRANT /* The update_va_mapping() must not fail. */ BUG_ON(mcl[0].result != 0); #endif @@ -556,7 +564,7 @@ netif_put(netif); -#ifndef CONFIG_XEN_NETDEV_GRANT_TX +#ifndef CONFIG_XEN_NETDEV_GRANT mcl++; #endif } @@ -572,7 +580,7 @@ netif_tx_request_t txreq; u16 pending_idx; NETIF_RING_IDX i; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT gnttab_map_grant_ref_t *mop; #else multicall_entry_t *mcl; @@ -582,7 +590,7 @@ if ( dealloc_cons != dealloc_prod ) net_tx_action_dealloc(); -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT mop = tx_map_ops; #else mcl = tx_mcl; @@ -683,7 +691,7 @@ /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, 16); -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT mop->host_addr = MMAP_VADDR(pending_idx); mop->dom = netif->domid; mop->ref = txreq.addr >> PAGE_SHIFT; @@ -706,7 +714,7 @@ pending_cons++; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) ) break; #else @@ -716,7 +724,7 @@ #endif } -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT if ( mop == tx_map_ops ) return; @@ -739,7 +747,7 @@ memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); /* Check the remap error code. */ -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT /* XXX SMH: error returns from grant operations are pretty poorly specified/thought out, but the below at least conforms with @@ -813,7 +821,7 @@ netif_rx(skb); netif->dev->last_rx = jiffies; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT mop++; #else mcl++; @@ -932,16 +940,13 @@ int i; struct page *page; - if ( !(xen_start_info.flags & SIF_NET_BE_DOMAIN) && - !(xen_start_info.flags & SIF_INITDOMAIN) ) + if ( !(xen_start_info->flags & SIF_NET_BE_DOMAIN) && + !(xen_start_info->flags & SIF_INITDOMAIN) ) return 0; - printk("Initialising Xen netif backend\n"); -#ifdef CONFIG_XEN_NETDEV_GRANT_TX - printk("#### netback tx using grant tables\n"); -#endif -#ifdef CONFIG_XEN_NETDEV_GRANT_RX - printk("#### netback rx using grant tables\n"); + IPRINTK("Initialising Xen netif backend.\n"); +#ifdef CONFIG_XEN_NETDEV_GRANT + IPRINTK("Using grant tables.\n"); #endif /* We can increase reservation by this much in net_rx_action(). */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 9 16:30:54 2005 @@ -160,9 +160,47 @@ } #endif + kobject_hotplug(&dev->dev.kobj, KOBJ_ONLINE); + /* Pass in NULL node to skip exist test. */ frontend_changed(&be->watch, NULL); } +} + +static int netback_hotplug(struct xenbus_device *xdev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + struct backend_info *be; + netif_t *netif; + char **key, *val; + int i = 0, length = 0; + static char *env_vars[] = { "script", "domain", "mac", "bridge", "ip", + NULL }; + + be = xdev->data; + netif = be->netif; + + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "vif=%s", netif->dev->name); + + key = env_vars; + while (*key != NULL) { + val = xenbus_read(xdev->nodename, *key, NULL); + if (!IS_ERR(val)) { + char buf[strlen(*key) + 4]; + sprintf(buf, "%s=%%s", *key); + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + buf, val); + kfree(val); + } + key++; + } + + envp[i] = NULL; + + return 0; } static int netback_probe(struct xenbus_device *dev, @@ -249,6 +287,7 @@ .ids = netback_ids, .probe = netback_probe, .remove = netback_remove, + .hotplug = netback_hotplug, }; void netif_xenbus_init(void) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Sep 9 16:30:54 2005 @@ -50,13 +50,23 @@ #include <asm-xen/evtchn.h> #include <asm-xen/xenbus.h> #include <asm-xen/xen-public/io/netif.h> +#include <asm-xen/xen-public/memory.h> #include <asm-xen/balloon.h> #include <asm/page.h> #include <asm/uaccess.h> -#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#ifdef CONFIG_XEN_NETDEV_GRANT #include <asm-xen/xen-public/grant_table.h> #include <asm-xen/gnttab.h> + +static grant_ref_t gref_tx_head; +static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; + +static grant_ref_t gref_rx_head; +static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1]; + +#define GRANT_INVALID_REF (0xFFFF) + #ifdef GRANT_DEBUG static void dump_packet(int tag, void *addr, u32 ap) @@ -70,8 +80,17 @@ } printk("\n"); } -#endif -#endif + +#define GDPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define dump_packet(x,y,z) ((void)0) +#define GDPRINTK(_f, _a...) ((void)0) +#endif + +#endif + + #ifndef __GFP_NOWARN #define __GFP_NOWARN 0 @@ -101,22 +120,10 @@ #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */ #endif -#ifdef CONFIG_XEN_NETDEV_GRANT_TX -static grant_ref_t gref_tx_head; -static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; -#endif - -#ifdef CONFIG_XEN_NETDEV_GRANT_RX -static grant_ref_t gref_rx_head; -static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1]; -#endif - -#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) -#define GRANT_INVALID_REF (0xFFFF) -#endif #define NETIF_STATE_DISCONNECTED 0 #define NETIF_STATE_CONNECTED 1 + static unsigned int netif_state = NETIF_STATE_DISCONNECTED; @@ -278,7 +285,7 @@ for (i = np->tx_resp_cons; i != prod; i++) { id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id; skb = np->tx_skbs[id]; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) { /* other domain is still using this grant - shouldn't happen but if it does, we'll try to reclaim the grant later */ @@ -309,7 +316,7 @@ mb(); } while (prod != np->tx->resp_prod); -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT out: #endif @@ -328,8 +335,9 @@ struct sk_buff *skb; int i, batch_target; NETIF_RING_IDX req_prod = np->rx->req_prod; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX - int ref; + struct xen_memory_reservation reservation; +#ifdef CONFIG_XEN_NETDEV_GRANT + grant_ref_t ref; #endif if (unlikely(np->backend_state != BEST_CONNECTED)) @@ -363,9 +371,9 @@ np->rx_skbs[id] = skb; np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT ref = gnttab_claim_grant_reference(&gref_rx_head); - if (unlikely(ref < 0)) { + if (unlikely((signed short)ref < 0)) { printk(KERN_ALERT "#### netfront can't claim rx reference\n"); BUG(); } @@ -388,12 +396,15 @@ rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; /* Give away a batch of pages. */ - rx_mcl[i].op = __HYPERVISOR_dom_mem_op; - rx_mcl[i].args[0] = MEMOP_decrease_reservation; - rx_mcl[i].args[1] = (unsigned long)rx_pfn_array; - rx_mcl[i].args[2] = (unsigned long)i; - rx_mcl[i].args[3] = 0; - rx_mcl[i].args[4] = DOMID_SELF; + rx_mcl[i].op = __HYPERVISOR_memory_op; + rx_mcl[i].args[0] = XENMEM_decrease_reservation; + rx_mcl[i].args[1] = (unsigned long)&reservation; + + reservation.extent_start = rx_pfn_array; + reservation.nr_extents = i; + reservation.extent_order = 0; + reservation.address_bits = 0; + reservation.domid = DOMID_SELF; /* Tell the ballon driver what is going on. */ balloon_update_driver_allowance(i); @@ -401,7 +412,7 @@ /* Zap PTEs and give away pages in one big multicall. */ (void)HYPERVISOR_multicall(rx_mcl, i+1); - /* Check return status of HYPERVISOR_dom_mem_op(). */ + /* Check return status of HYPERVISOR_memory_op(). */ if (unlikely(rx_mcl[i].result != i)) panic("Unable to reduce memory reservation\n"); @@ -421,8 +432,8 @@ struct net_private *np = netdev_priv(dev); netif_tx_request_t *tx; NETIF_RING_IDX i; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX - unsigned int ref; +#ifdef CONFIG_XEN_NETDEV_GRANT + grant_ref_t ref; unsigned long mfn; #endif @@ -459,9 +470,9 @@ tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req; tx->id = id; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT ref = gnttab_claim_grant_reference(&gref_tx_head); - if (unlikely(ref < 0)) { + if (unlikely((signed short)ref < 0)) { printk(KERN_ALERT "#### netfront can't claim tx grant reference\n"); BUG(); } @@ -514,7 +525,7 @@ network_tx_buf_gc(dev); spin_unlock_irqrestore(&np->tx_lock, flags); - if ((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN)) + if((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN)) netif_rx_schedule(dev); return IRQ_HANDLED; @@ -532,7 +543,7 @@ int work_done, budget, more_to_do = 1; struct sk_buff_head rxq; unsigned long flags; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT unsigned long mfn; grant_ref_t ref; #endif @@ -569,8 +580,19 @@ continue; } -#ifdef CONFIG_XEN_NETDEV_GRANT_RX - ref = grant_rx_ref[rx->id]; +#ifdef CONFIG_XEN_NETDEV_GRANT + ref = grant_rx_ref[rx->id]; + + if(ref == GRANT_INVALID_REF) { + printk(KERN_WARNING "Bad rx grant reference %d from dom %d.\n", + ref, np->backend_id); + np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id; + wmb(); + np->rx->req_prod++; + work_done--; + continue; + } + grant_rx_ref[rx->id] = GRANT_INVALID_REF; mfn = gnttab_end_foreign_transfer_ref(ref); gnttab_release_grant_reference(&gref_rx_head, ref); @@ -580,7 +602,7 @@ ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); /* NB. We handle skb overflow later. */ -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT skb->data = skb->head + rx->addr; #else skb->data = skb->head + (rx->addr & ~PAGE_MASK); @@ -595,14 +617,14 @@ np->stats.rx_bytes += rx->status; /* Remap the page. */ -#ifdef CONFIG_XEN_NETDEV_GRANT_RX - mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE; +#ifdef CONFIG_XEN_NETDEV_GRANT + mmu->ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; #else mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE; #endif mmu->val = __pa(skb->head) >> PAGE_SHIFT; mmu++; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT MULTI_update_va_mapping(mcl, (unsigned long)skb->head, pfn_pte_ma(mfn, PAGE_KERNEL), 0); #else @@ -612,19 +634,19 @@ #endif mcl++; -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn; + GDPRINTK("#### rx_poll enqueue vdata=%p mfn=%lu ref=%x\n", + skb->data, mfn, ref); #else phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = rx->addr >> PAGE_SHIFT; -#endif - -#ifdef GRANT_DEBUG - printk(KERN_ALERT "#### rx_poll enqueue vdata=%p mfn=%lu ref=%x\n", - skb->data, mfn, ref); -#endif +#endif + + __skb_queue_tail(&rxq, skb); } + /* Some pages are no longer absent... */ balloon_update_driver_allowance(-work_done); @@ -641,9 +663,9 @@ } while ((skb = __skb_dequeue(&rxq)) != NULL) { -#ifdef GRANT_DEBUG - printk(KERN_ALERT "#### rx_poll dequeue vdata=%p mfn=%lu\n", - skb->data, virt_to_mfn(skb->data)); +#ifdef CONFIG_XEN_NETDEV_GRANT + GDPRINTK("#### rx_poll dequeue vdata=%p mfn=%lu\n", + skb->data, virt_to_mfn(skb->data)); dump_packet('d', skb->data, (unsigned long)skb->data); #endif /* @@ -742,7 +764,6 @@ return &np->stats; } - static void network_connect(struct net_device *dev) { struct net_private *np; @@ -782,8 +803,11 @@ tx = &np->tx->ring[requeue_idx++].req; tx->id = i; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX - tx->addr = 0; /*(ref << PAGE_SHIFT) |*/ +#ifdef CONFIG_XEN_NETDEV_GRANT + gnttab_grant_foreign_access_ref(grant_tx_ref[i], np->backend_id, + virt_to_mfn(np->tx_skbs[i]->data), + GNTMAP_readonly); + tx->addr = grant_tx_ref[i] << PAGE_SHIFT; #else tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT; #endif @@ -798,9 +822,20 @@ np->tx->req_prod = requeue_idx; /* Rebuild the RX buffer freelist and the RX ring itself. */ - for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++) - if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET) - np->rx->ring[requeue_idx++].req.id = i; + for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++) { + if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET) { +#ifdef CONFIG_XEN_NETDEV_GRANT + /* Reinstate the grant ref so backend can 'donate' mfn to us. */ + gnttab_grant_foreign_transfer_ref(grant_rx_ref[i], np->backend_id, + virt_to_mfn(np->rx_skbs[i]->head) + ); + np->rx->ring[requeue_idx].req.gref = grant_rx_ref[i]; +#endif + np->rx->ring[requeue_idx].req.id = i; + requeue_idx++; + } + } + wmb(); np->rx->req_prod = requeue_idx; @@ -896,13 +931,14 @@ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ for (i = 0; i <= NETIF_TX_RING_SIZE; i++) { np->tx_skbs[i] = (void *)((unsigned long) i+1); -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT grant_tx_ref[i] = GRANT_INVALID_REF; #endif } + for (i = 0; i <= NETIF_RX_RING_SIZE; i++) { np->rx_skbs[i] = (void *)((unsigned long) i+1); -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT grant_rx_ref[i] = GRANT_INVALID_REF; #endif } @@ -986,10 +1022,8 @@ evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; int err; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT info->tx_ring_ref = GRANT_INVALID_REF; -#endif -#ifdef CONFIG_XEN_NETDEV_GRANT_RX info->rx_ring_ref = GRANT_INVALID_REF; #endif @@ -1009,7 +1043,7 @@ memset(info->rx, 0, PAGE_SIZE); info->backend_state = BEST_DISCONNECTED; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT err = gnttab_grant_foreign_access(info->backend_id, virt_to_mfn(info->tx), 0); if (err < 0) { @@ -1017,11 +1051,7 @@ goto out; } info->tx_ring_ref = err; -#else - info->tx_ring_ref = virt_to_mfn(info->tx); -#endif - -#ifdef CONFIG_XEN_NETDEV_GRANT_RX + err = gnttab_grant_foreign_access(info->backend_id, virt_to_mfn(info->rx), 0); if (err < 0) { @@ -1029,7 +1059,9 @@ goto out; } info->rx_ring_ref = err; + #else + info->tx_ring_ref = virt_to_mfn(info->tx); info->rx_ring_ref = virt_to_mfn(info->rx); #endif @@ -1049,16 +1081,17 @@ if (info->rx) free_page((unsigned long)info->rx); info->rx = 0; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX + +#ifdef CONFIG_XEN_NETDEV_GRANT if (info->tx_ring_ref != GRANT_INVALID_REF) gnttab_end_foreign_access(info->tx_ring_ref, 0); info->tx_ring_ref = GRANT_INVALID_REF; -#endif -#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if (info->rx_ring_ref != GRANT_INVALID_REF) gnttab_end_foreign_access(info->rx_ring_ref, 0); info->rx_ring_ref = GRANT_INVALID_REF; #endif + return err; } @@ -1070,16 +1103,17 @@ if (info->rx) free_page((unsigned long)info->rx); info->rx = 0; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX + +#ifdef CONFIG_XEN_NETDEV_GRANT if (info->tx_ring_ref != GRANT_INVALID_REF) gnttab_end_foreign_access(info->tx_ring_ref, 0); info->tx_ring_ref = GRANT_INVALID_REF; -#endif -#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if (info->rx_ring_ref != GRANT_INVALID_REF) gnttab_end_foreign_access(info->rx_ring_ref, 0); info->rx_ring_ref = GRANT_INVALID_REF; #endif + unbind_evtchn_from_irqhandler(info->evtchn, info->netdev); info->evtchn = 0; } @@ -1272,25 +1306,25 @@ static int netfront_suspend(struct xenbus_device *dev) { - struct net_private *np = dev->data; - /* Avoid having tx/rx stuff happen until we're ready. */ - unbind_evtchn_from_irqhandler(np->evtchn, np->netdev); - return 0; + struct netfront_info *info = dev->data; + + unregister_xenbus_watch(&info->watch); + kfree(info->backend); + info->backend = NULL; + + netif_free(info); + + return 0; } static int netfront_resume(struct xenbus_device *dev) { - struct net_private *np = dev->data; - /* - * Connect regardless of whether IFF_UP flag set. - * Stop bad things from happening until we're back up. - */ - np->backend_state = BEST_DISCONNECTED; - memset(np->tx, 0, PAGE_SIZE); - memset(np->rx, 0, PAGE_SIZE); - - // send_interface_connect(np); - return 0; + struct net_private *np = dev->data; + int err; + + err = talk_to_backend(dev, np); + + return err; } static struct xenbus_driver netfront = { @@ -1335,32 +1369,31 @@ { int err = 0; - if (xen_start_info.flags & SIF_INITDOMAIN) + if (xen_start_info->flags & SIF_INITDOMAIN) return 0; -#ifdef CONFIG_XEN_NETDEV_GRANT_TX - /* A grant for every ring slot */ + if ((err = xennet_proc_init()) != 0) + return err; + + IPRINTK("Initialising virtual ethernet driver.\n"); + +#ifdef CONFIG_XEN_NETDEV_GRANT + IPRINTK("Using grant tables.\n"); + + /* A grant for every tx ring slot */ if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE, &gref_tx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); return 1; } - printk(KERN_ALERT "Netdev frontend (TX) is using grant tables.\n"); -#endif -#ifdef CONFIG_XEN_NETDEV_GRANT_RX - /* A grant for every ring slot */ + /* A grant for every rx ring slot */ if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE, &gref_rx_head) < 0) { printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); return 1; } - printk(KERN_ALERT "Netdev frontend (RX) is using grant tables.\n"); -#endif - - if ((err = xennet_proc_init()) != 0) - return err; - - IPRINTK("Initialising virtual ethernet driver.\n"); +#endif + (void)register_inetaddr_notifier(¬ifier_inetdev); @@ -1373,10 +1406,8 @@ static void netif_exit(void) { -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT gnttab_free_grant_references(gref_tx_head); -#endif -#ifdef CONFIG_XEN_NETDEV_GRANT_RX gnttab_free_grant_references(gref_rx_head); #endif } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Sep 9 16:30:54 2005 @@ -63,27 +63,23 @@ "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" : "=a" (ret) : "0" (&hypercall) : "memory" ); #elif defined (__x86_64__) - __asm__ __volatile__ ( - "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR - : "=a" (ret) - : "a" ((unsigned long)hypercall.op), - "D" ((unsigned long)hypercall.arg[0]), - "S" ((unsigned long)hypercall.arg[1]), - "d" ((unsigned long)hypercall.arg[2]), - "g" ((unsigned long)hypercall.arg[3]), - "g" ((unsigned long)hypercall.arg[4]) - : "r11","rcx","r8","r10","memory"); + { + long ign1, ign2, ign3; + __asm__ __volatile__ ( + "movq %8,%%r10; movq %9,%%r8;" TRAP_INSTR + : "=a" (ret), "=D" (ign1), "=S" (ign2), "=d" (ign3) + : "0" ((unsigned long)hypercall.op), + "1" ((unsigned long)hypercall.arg[0]), + "2" ((unsigned long)hypercall.arg[1]), + "3" ((unsigned long)hypercall.arg[2]), + "g" ((unsigned long)hypercall.arg[3]), + "g" ((unsigned long)hypercall.arg[4]) + : "r11","rcx","r8","r10","memory"); + } #endif } break; - case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN: - { - extern int initdom_ctrlif_domcontroller_port; - ret = initdom_ctrlif_domcontroller_port; - } - break; - #if defined(CONFIG_XEN_PRIVILEGED_GUEST) case IOCTL_PRIVCMD_MMAP: { @@ -120,9 +116,9 @@ if ( (msg[j].va + (msg[j].npages<<PAGE_SHIFT)) > vma->vm_end ) return -EINVAL; - if ( (rc = direct_remap_area_pages(vma->vm_mm, + if ( (rc = direct_remap_pfn_range(vma->vm_mm, msg[j].va&PAGE_MASK, - msg[j].mfn<<PAGE_SHIFT, + msg[j].mfn, msg[j].npages<<PAGE_SHIFT, vma->vm_page_prot, mmapcmd.dom)) < 0 ) @@ -202,8 +198,8 @@ extern int do_xenbus_probe(void*); unsigned long page; - if (xen_start_info.store_evtchn != 0) { - ret = xen_start_info.store_mfn; + if (xen_start_info->store_evtchn != 0) { + ret = xen_start_info->store_mfn; break; } @@ -219,10 +215,10 @@ SetPageReserved(virt_to_page(page)); /* Initial connect. Setup channel and page. */ - xen_start_info.store_evtchn = data; - xen_start_info.store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >> + xen_start_info->store_evtchn = data; + xen_start_info->store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >> PAGE_SHIFT); - ret = xen_start_info.store_mfn; + ret = xen_start_info->store_mfn; /* We'll return then this will wait for daemon to answer */ kthread_run(do_xenbus_probe, NULL, "xenbus_probe"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/usbback/common.h --- a/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/usbback/common.h Fri Sep 9 16:30:54 2005 @@ -12,7 +12,6 @@ #include <asm/io.h> #include <asm/setup.h> #include <asm/pgalloc.h> -#include <asm-xen/ctrl_if.h> #include <asm-xen/hypervisor.h> #include <asm-xen/xen-public/io/usbif.h> diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/usbback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/usbback/interface.c Fri Sep 9 16:30:54 2005 @@ -161,8 +161,8 @@ } prot = __pgprot(_KERNPG_TABLE); - error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), - shmem_frame<<PAGE_SHIFT, PAGE_SIZE, + error = direct_remap_pfn_range(&init_mm, VMALLOC_VMADDR(vma->addr), + shmem_frame, PAGE_SIZE, prot, domid); if ( error != 0 ) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c --- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Fri Sep 9 16:30:54 2005 @@ -1029,8 +1029,8 @@ int i; struct page *page; - if ( !(xen_start_info.flags & SIF_INITDOMAIN) && - !(xen_start_info.flags & SIF_USB_BE_DOMAIN) ) + if ( !(xen_start_info->flags & SIF_INITDOMAIN) && + !(xen_start_info->flags & SIF_USB_BE_DOMAIN) ) return 0; page = balloon_alloc_empty_page_range(MMAP_PAGES); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c --- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c Fri Sep 9 16:30:54 2005 @@ -70,7 +70,6 @@ #include "../../../../../drivers/usb/hcd.h" #include <asm-xen/xen-public/io/usbif.h> -#include <asm/ctrl_if.h> #include <asm/xen-public/io/domain_controller.h> /* @@ -1675,8 +1674,8 @@ { int retval = -ENOMEM, i; - if ( (xen_start_info.flags & SIF_INITDOMAIN) - || (xen_start_info.flags & SIF_USB_BE_DOMAIN) ) + if ( (xen_start_info->flags & SIF_INITDOMAIN) || + (xen_start_info->flags & SIF_USB_BE_DOMAIN) ) return 0; info(DRIVER_DESC " " DRIVER_VERSION); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Fri Sep 9 16:30:54 2005 @@ -4,3 +4,4 @@ xenbus-objs += xenbus_comms.o xenbus-objs += xenbus_xs.o xenbus-objs += xenbus_probe.o +xenbus-objs += xenbus_dev.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri Sep 9 16:30:54 2005 @@ -48,12 +48,12 @@ static inline struct ringbuf_head *outbuf(void) { - return mfn_to_virt(xen_start_info.store_mfn); + return mfn_to_virt(xen_start_info->store_mfn); } static inline struct ringbuf_head *inbuf(void) { - return mfn_to_virt(xen_start_info.store_mfn) + PAGE_SIZE/2; + return mfn_to_virt(xen_start_info->store_mfn) + PAGE_SIZE/2; } static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) @@ -145,7 +145,7 @@ data += avail; len -= avail; update_output_chunk(out, avail); - notify_via_evtchn(xen_start_info.store_evtchn); + notify_via_evtchn(xen_start_info->store_evtchn); } while (len != 0); return 0; @@ -190,7 +190,7 @@ pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); /* If it was full, tell them we've taken some. */ if (was_full) - notify_via_evtchn(xen_start_info.store_evtchn); + notify_via_evtchn(xen_start_info->store_evtchn); } /* If we left something, wake watch thread to deal with it. */ @@ -205,20 +205,20 @@ { int err; - if (!xen_start_info.store_evtchn) + if (!xen_start_info->store_evtchn) return 0; err = bind_evtchn_to_irqhandler( - xen_start_info.store_evtchn, wake_waiting, + xen_start_info->store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); if (err) { printk(KERN_ERR "XENBUS request irq failed %i\n", err); - unbind_evtchn_from_irq(xen_start_info.store_evtchn); + unbind_evtchn_from_irq(xen_start_info->store_evtchn); return err; } /* FIXME zero out page -- domain builder should probably do this*/ - memset(mfn_to_virt(xen_start_info.store_mfn), 0, PAGE_SIZE); + memset(mfn_to_virt(xen_start_info->store_mfn), 0, PAGE_SIZE); return 0; } @@ -226,8 +226,8 @@ void xb_suspend_comms(void) { - if (!xen_start_info.store_evtchn) + if (!xen_start_info->store_evtchn) return; - unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq); -} + unbind_evtchn_from_irqhandler(xen_start_info->store_evtchn, &xb_waitq); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Sep 9 16:30:54 2005 @@ -147,6 +147,39 @@ return 0; } +static int xenbus_hotplug_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + struct xenbus_device *xdev; + int i = 0; + int length = 0; + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ + add_hotplug_env_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + + /* terminate, set to next free slot, shrink available space */ + envp[i] = NULL; + envp = &envp[i]; + num_envp -= i; + buffer = &buffer[length]; + buffer_size -= length; + + if (dev->driver && to_xenbus_driver(dev->driver)->hotplug) + return to_xenbus_driver(dev->driver)->hotplug + (xdev, envp, num_envp, buffer, buffer_size); + + return 0; +} + static int xenbus_probe_backend(const char *type, const char *uuid); static struct xen_bus_type xenbus_backend = { .root = "backend", @@ -156,6 +189,7 @@ .bus = { .name = "xen-backend", .match = xenbus_match, + .hotplug = xenbus_hotplug_backend, }, .dev = { .bus_id = "xen-backend", @@ -209,6 +243,7 @@ { return xenbus_register_driver(drv, &xenbus_frontend); } +EXPORT_SYMBOL(xenbus_register_device); int xenbus_register_backend(struct xenbus_driver *drv) { @@ -586,7 +621,7 @@ down(&xenbus_lock); - if (xen_start_info.store_evtchn) { + if (xen_start_info->store_evtchn) { ret = nb->notifier_call(nb, 0, NULL); } else { notifier_chain_register(&xenstore_chain, nb); @@ -612,7 +647,7 @@ int err = 0; /* Initialize xenstore comms unless already done. */ - printk("store_evtchn = %i\n", xen_start_info.store_evtchn); + printk("store_evtchn = %i\n", xen_start_info->store_evtchn); err = xs_init(); if (err) { printk("XENBUS: Error initializing xenstore comms:" @@ -640,7 +675,7 @@ device_register(&xenbus_frontend.dev); device_register(&xenbus_backend.dev); - if (!xen_start_info.store_evtchn) + if (!xen_start_info->store_evtchn) return 0; do_xenbus_probe(NULL); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Fri Sep 9 16:30:54 2005 @@ -45,7 +45,9 @@ static char printf_buffer[4096]; static LIST_HEAD(watches); + DECLARE_MUTEX(xenbus_lock); +EXPORT_SYMBOL(xenbus_lock); static int get_error(const char *errorstring) { @@ -104,10 +106,10 @@ } /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ -static void *xs_talkv(enum xsd_sockmsg_type type, - const struct kvec *iovec, - unsigned int num_vecs, - unsigned int *len) +void *xs_talkv(enum xsd_sockmsg_type type, + const struct kvec *iovec, + unsigned int num_vecs, + unsigned int *len) { struct xsd_sockmsg msg; void *ret = NULL; @@ -224,6 +226,7 @@ ret[(*num)++] = p; return ret; } +EXPORT_SYMBOL(xenbus_directory); /* Check if a path exists. Return 1 if it does. */ int xenbus_exists(const char *dir, const char *node) @@ -237,6 +240,7 @@ kfree(d); return 1; } +EXPORT_SYMBOL(xenbus_exists); /* Get the value of a single file. * Returns a kmalloced value: call free() on it after use. @@ -246,6 +250,7 @@ { return xs_single(XS_READ, join(dir, node), len); } +EXPORT_SYMBOL(xenbus_read); /* Write the value of a single file. * Returns -err on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL. @@ -276,18 +281,21 @@ return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); } +EXPORT_SYMBOL(xenbus_write); /* Create a new directory. */ int xenbus_mkdir(const char *dir, const char *node) { return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL)); } +EXPORT_SYMBOL(xenbus_mkdir); /* Destroy a file or directory (directories must be empty). */ int xenbus_rm(const char *dir, const char *node) { return xs_error(xs_single(XS_RM, join(dir, node), NULL)); } +EXPORT_SYMBOL(xenbus_rm); /* Start a transaction: changes by others will not be seen during this * transaction, and changes will not be visible to others until end. @@ -298,6 +306,7 @@ { return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL)); } +EXPORT_SYMBOL(xenbus_transaction_start); /* End a transaction. * If abandon is true, transaction is discarded instead of committed. @@ -312,6 +321,7 @@ strcpy(abortstr, "T"); return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL)); } +EXPORT_SYMBOL(xenbus_transaction_end); /* Single read and scanf: returns -errno or num scanned. */ int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...) @@ -333,6 +343,7 @@ return -ERANGE; return ret; } +EXPORT_SYMBOL(xenbus_scanf); /* Single printf and write: returns -errno or 0. */ int xenbus_printf(const char *dir, const char *node, const char *fmt, ...) @@ -348,6 +359,7 @@ BUG_ON(ret > sizeof(printf_buffer)-1); return xenbus_write(dir, node, printf_buffer, O_CREAT); } +EXPORT_SYMBOL(xenbus_printf); /* Report a (negative) errno into the store, with explanation. */ void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) @@ -369,6 +381,7 @@ printk("xenbus: failed to write error node for %s (%s)\n", dev->nodename, printf_buffer); } +EXPORT_SYMBOL(xenbus_dev_error); /* Clear any error. */ void xenbus_dev_ok(struct xenbus_device *dev) @@ -381,6 +394,7 @@ dev->has_error = 0; } } +EXPORT_SYMBOL(xenbus_dev_ok); /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ int xenbus_gather(const char *dir, ...) @@ -410,6 +424,7 @@ va_end(ap); return ret; } +EXPORT_SYMBOL(xenbus_gather); static int xs_watch(const char *path, const char *token) { @@ -482,6 +497,7 @@ list_add(&watch->list, &watches); return err; } +EXPORT_SYMBOL(register_xenbus_watch); void unregister_xenbus_watch(struct xenbus_watch *watch) { @@ -499,6 +515,7 @@ "XENBUS Failed to release watch %s: %i\n", watch->node, err); } +EXPORT_SYMBOL(unregister_xenbus_watch); /* Re-register callbacks to all watches. */ void reregister_xenbus_watches(void) @@ -540,7 +557,7 @@ BUG_ON(!w); w->callback(w, node); kfree(node); - } else + } else if (node) printk(KERN_WARNING "XENBUS xs_read_watch: %li\n", PTR_ERR(node)); up(&xenbus_lock); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h Fri Sep 9 16:30:54 2005 @@ -29,551 +29,360 @@ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ + #include <asm-xen/xen-public/xen.h> -/* - * Assembler stubs for hyper-calls. - */ +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res) \ + : "0" (__HYPERVISOR_##name) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "4" ((long)(a4)), "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) static inline int HYPERVISOR_set_trap_table( - trap_info_t *table) -{ - int ret; - unsigned long ignore; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_set_trap_table), "1" (table) - : "memory" ); - - return ret; + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); } static inline int HYPERVISOR_mmu_update( - mmu_update_t *req, int count, int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); } static inline int HYPERVISOR_mmuext_op( - struct mmuext_op *op, int count, int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); } static inline int HYPERVISOR_set_gdt( - unsigned long *frame_list, int entries) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries) - : "memory" ); - - - return ret; + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); } static inline int HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp) - : "memory" ); - - return ret; + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); } static inline int HYPERVISOR_set_callbacks( - unsigned long event_selector, unsigned long event_address, - unsigned long failsafe_selector, unsigned long failsafe_address) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector), - "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address) - : "memory" ); - - return ret; + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); } static inline int HYPERVISOR_fpu_taskswitch( - int set) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set) - : "memory" ); - - return ret; + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); } static inline int HYPERVISOR_yield( - void) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield) - : "memory", "ecx" ); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_yield, 0); } static inline int HYPERVISOR_block( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block) - : "memory", "ecx" ); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_block, 0); } static inline int HYPERVISOR_shutdown( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_shutdown | + (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0); } static inline int HYPERVISOR_reboot( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_suspend( - unsigned long srec) -{ - int ret; - unsigned long ign1, ign2; - - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=S" (ign2) - : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), - "S" (srec) : "memory", "ecx"); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_shutdown | + (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0); } static inline int HYPERVISOR_crash( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_shutdown | + (SHUTDOWN_crash << SCHEDOP_reasonshift), 0); } static inline long HYPERVISOR_set_timer_op( - u64 timeout) -{ - int ret; - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi) - : "memory"); - - return ret; + u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); } static inline int HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - int ret; - unsigned long ign1; - - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op) - : "memory"); - - return ret; + dom0_op_t *dom0_op) +{ + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + return _hypercall1(int, dom0_op, dom0_op); } static inline int HYPERVISOR_set_debugreg( - int reg, unsigned long value) -{ - int ret; - unsigned long ign1, ign2; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value) - : "memory" ); - - return ret; + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); } static inline unsigned long HYPERVISOR_get_debugreg( - int reg) -{ - unsigned long ret; - unsigned long ign; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_get_debugreg), "1" (reg) - : "memory" ); - - return ret; + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); } static inline int HYPERVISOR_update_descriptor( - u64 ma, u64 desc) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_descriptor), - "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)), - "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32)) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_dom_mem_op( - unsigned int op, unsigned long *extent_list, - unsigned long nr_extents, unsigned int extent_order) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4, ign5; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4), - "=D" (ign5) - : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list), - "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF) - : "memory" ); - - return ret; + u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); } static inline int HYPERVISOR_multicall( - void *call_list, int nr_calls) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls) - : "memory" ); - - return ret; + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); } static inline int HYPERVISOR_update_va_mapping( - unsigned long va, pte_t new_val, unsigned long flags) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_va_mapping), - "1" (va), "2" ((new_val).pte_low), + unsigned long va, pte_t new_val, unsigned long flags) +{ + unsigned long pte_hi = 0; #ifdef CONFIG_X86_PAE - "3" ((new_val).pte_high), -#else - "3" (0), + pte_hi = new_val.pte_high; #endif - "4" (flags) - : "memory" ); - - return ret; + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); } static inline int HYPERVISOR_event_channel_op( - void *op) -{ - int ret; - unsigned long ignore; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_event_channel_op), "1" (op) - : "memory" ); - - return ret; + void *op) +{ + return _hypercall1(int, event_channel_op, op); } static inline int HYPERVISOR_xen_version( - int cmd) -{ - int ret; - unsigned long ignore; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_xen_version), "1" (cmd) - : "memory" ); - - return ret; + int cmd) +{ + return _hypercall1(int, xen_version, cmd); } static inline int HYPERVISOR_console_io( - int cmd, int count, char *str) -{ - int ret; - unsigned long ign1, ign2, ign3; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str) - : "memory" ); - - return ret; + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); } static inline int HYPERVISOR_physdev_op( - void *physdev_op) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op) - : "memory" ); - - return ret; + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); } static inline int HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count) - : "memory" ); - - return ret; + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); } static inline int HYPERVISOR_update_va_mapping_otherdomain( - unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4, ign5; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), - "=S" (ign4), "=D" (ign5) - : "0" (__HYPERVISOR_update_va_mapping_otherdomain), - "1" (va), "2" ((new_val).pte_low), + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; #ifdef CONFIG_X86_PAE - "3" ((new_val).pte_high), -#else - "3" (0), + pte_hi = new_val.pte_high; #endif - "4" (flags), "5" (domid) : - "memory" ); - - return ret; + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); } static inline int HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type) - : "memory" ); - - return ret; + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); } static inline int HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) - : "memory"); - - return ret; + unsigned long vcpu, vcpu_guest_context_t *ctxt) +{ + return _hypercall2(int, boot_vcpu, vcpu, ctxt); +} + +static inline int +HYPERVISOR_vcpu_up( + int vcpu) +{ + return _hypercall2(int, sched_op, SCHEDOP_vcpu_up | + (vcpu << SCHEDOP_vcpushift), 0); +} + +static inline int +HYPERVISOR_vcpu_pickle( + int vcpu, vcpu_guest_context_t *ctxt) +{ + return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle | + (vcpu << SCHEDOP_vcpushift), ctxt); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + int ret; + unsigned long ign1, ign2; + + /* On suspend, control software expects a suspend record in %esi. */ + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1), "=S" (ign2) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_shutdown | (SHUTDOWN_suspend << + SCHEDOP_reasonshift)), + "2" (srec) : "memory", "ecx"); + + return ret; } static inline int HYPERVISOR_vcpu_down( - int vcpu) -{ - int ret; - unsigned long ign1; - /* Yes, I really do want to clobber edx here: when we resume a - vcpu after unpickling a multi-processor domain, it returns - here, but clobbers all of the call clobbered registers. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift)) - : "memory", "ecx", "edx" ); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_up( - int vcpu) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_pickle( - int vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - unsigned long ign1, ign2; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), - "2" (ctxt) - : "memory" ); - - return ret; + int vcpu) +{ + int ret; + unsigned long ign1; + /* Yes, I really do want to clobber edx here: when we resume a + vcpu after unpickling a multi-processor domain, it returns + here, but clobbers all of the call clobbered registers. */ + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret), "=b" (ign1) + : "0" (__HYPERVISOR_sched_op), + "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift)) + : "memory", "ecx", "edx" ); + return ret; } #endif /* __HYPERCALL_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h Fri Sep 9 16:30:54 2005 @@ -8,7 +8,7 @@ static char * __init machine_specific_memory_setup(void) { - unsigned long max_pfn = xen_start_info.nr_pages; + unsigned long max_pfn = xen_start_info->nr_pages; e820.nr_map = 0; add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); @@ -23,7 +23,7 @@ clear_bit(X86_FEATURE_PSE, c->x86_capability); clear_bit(X86_FEATURE_PGE, c->x86_capability); clear_bit(X86_FEATURE_SEP, c->x86_capability); - if (!(xen_start_info.flags & SIF_PRIVILEGED)) + if (!(xen_start_info->flags & SIF_PRIVILEGED)) clear_bit(X86_FEATURE_MTRR, c->x86_capability); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h Fri Sep 9 16:30:54 2005 @@ -35,9 +35,9 @@ * happen before reload of cr3/ldt (i.e., not in __switch_to). */ asm volatile ( "mov %%fs,%0 ; mov %%gs,%1" - : "=m" (*(int *)¤t->thread.fs), - "=m" (*(int *)¤t->thread.gs)); - asm volatile ( "mov %0,%%fs ; mov %0,%%gs" + : "=m" (current->thread.fs), + "=m" (current->thread.gs)); + asm volatile ( "movl %0,%%fs ; movl %0,%%gs" : : "r" (0) ); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Fri Sep 9 16:30:54 2005 @@ -60,14 +60,14 @@ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(m) ((m) | 0x80000000U) -extern unsigned int *phys_to_machine_mapping; +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME(m) ((m) | (1UL<<31)) +extern unsigned long *phys_to_machine_mapping; #define pfn_to_mfn(pfn) \ -((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL) +(phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL<<31)) static inline unsigned long mfn_to_pfn(unsigned long mfn) { - unsigned int pfn; + unsigned long pfn; /* * The array access can fail (e.g., device space beyond end of RAM). @@ -83,7 +83,7 @@ ".previous" : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) ); - return (unsigned long)pfn; + return pfn; } /* Definitions for machine and pseudophysical addresses. */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Fri Sep 9 16:30:54 2005 @@ -43,8 +43,32 @@ struct pci_dev; +#ifdef CONFIG_SWIOTLB + + /* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */ #define PCI_DMA_BUS_IS_PHYS (0) + +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + __u32 LEN_NAME; +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) + +#else + +/* The PCI address space does equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (1) /* pci_unmap_{page,single} is a nop so... */ #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) @@ -53,6 +77,8 @@ #define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) #define pci_unmap_len(PTR, LEN_NAME) (0) #define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) + +#endif /* This is always fine. */ #define pci_dac_dma_supported(pci_dev, mask) (1) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Fri Sep 9 16:30:54 2005 @@ -460,9 +460,9 @@ #define kern_addr_valid(addr) (1) #endif /* !CONFIG_DISCONTIGMEM */ -int direct_remap_area_pages(struct mm_struct *mm, +int direct_remap_pfn_range(struct mm_struct *mm, unsigned long address, - unsigned long machine_addr, + unsigned long mfn, unsigned long size, pgprot_t prot, domid_t domid); @@ -474,10 +474,10 @@ unsigned long size); #define io_remap_page_range(vma,from,phys,size,prot) \ -direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO) +direct_remap_pfn_range(vma->vm_mm,from,phys>>PAGE_SHIFT,size,prot,DOMID_IO) #define io_remap_pfn_range(vma,from,pfn,size,prot) \ -direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO) +direct_remap_pfn_range(vma->vm_mm,from,pfn,size,prot,DOMID_IO) #define MK_IOSPACE_PFN(space, pfn) (pfn) #define GET_IOSPACE(pfn) 0 diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h Fri Sep 9 16:30:54 2005 @@ -517,8 +517,8 @@ * This special macro can be used to load a debugging register */ #define loaddebug(thread,register) \ - HYPERVISOR_set_debugreg((register), \ - ((thread)->debugreg[register])) + HYPERVISOR_set_debugreg((register), \ + ((thread)->debugreg[register])) /* Forward declaration, a strange C thing */ struct task_struct; diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h Fri Sep 9 16:30:54 2005 @@ -53,8 +53,8 @@ #define AUX_DEVICE_INFO (*(unsigned char *) (PARAM+0x1FF)) #define LOADER_TYPE (*(unsigned char *) (PARAM+0x210)) #define KERNEL_START (*(unsigned long *) (PARAM+0x214)) -#define INITRD_START (__pa(xen_start_info.mod_start)) -#define INITRD_SIZE (xen_start_info.mod_len) +#define INITRD_START (__pa(xen_start_info->mod_start)) +#define INITRD_SIZE (xen_start_info->mod_len) #define EDID_INFO (*(struct edid_info *) (PARAM+0x440)) #define EDD_NR (*(unsigned char *) (PARAM+EDDNR)) #define EDD_MBR_SIG_NR (*(unsigned char *) (PARAM+EDD_MBR_SIG_NR_BUF)) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h Fri Sep 9 16:30:54 2005 @@ -561,8 +561,14 @@ #define local_irq_disable() __cli() #define local_irq_enable() __sti() +/* Don't use smp_processor_id: this is called in debug versions of that fn. */ +#ifdef CONFIG_SMP #define irqs_disabled() \ - HYPERVISOR_shared_info->vcpu_data[smp_processor_id()].evtchn_upcall_mask + HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask +#else +#define irqs_disabled() \ + HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask +#endif /* * disable hlt during certain critical i/o operations diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h Fri Sep 9 16:30:54 2005 @@ -25,8 +25,8 @@ #define LOADER_TYPE (*(unsigned char *) (PARAM+0x210)) #define KERNEL_START (*(unsigned int *) (PARAM+0x214)) -#define INITRD_START (__pa(xen_start_info.mod_start)) -#define INITRD_SIZE (xen_start_info.mod_len) +#define INITRD_START (__pa(xen_start_info->mod_start)) +#define INITRD_SIZE (xen_start_info->mod_len) #define EDID_INFO (*(struct edid_info *) (PARAM+0x440)) #define EDD_NR (*(unsigned char *) (PARAM+EDDNR)) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Fri Sep 9 16:30:54 2005 @@ -4,6 +4,10 @@ * Linux-specific hypervisor handling. * * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu <benjamin.liu@xxxxxxxxx> + * Jun Nakajima <jun.nakajima@xxxxxxxxx> * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -26,497 +30,329 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -/* - * Benjamin Liu <benjamin.liu@xxxxxxxxx> - * Jun Nakajima <jun.nakajima@xxxxxxxxx> - * Ported to x86-64. - * - */ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ + #include <asm-xen/xen-public/xen.h> #define __syscall_clobber "r11","rcx","memory" -/* - * Assembler stubs for hyper-calls. - */ +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res) \ + : "0" (__HYPERVISOR_##name) \ + : __syscall_clobber ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=D" (__ign1) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \ + : __syscall_clobber ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)) \ + : __syscall_clobber ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)) \ + : __syscall_clobber ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %8,%%r10; " TRAP_INSTR \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "g" ((long)(a4)) \ + : __syscall_clobber, "r10" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "movq %8,%%r10; movq %9,%%r8; " TRAP_INSTR \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "g" ((long)(a4)), "g" ((long)(a5)) \ + : __syscall_clobber, "r10", "r8" ); \ + (type)__res; \ +}) + static inline int HYPERVISOR_set_trap_table( - trap_info_t *table) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_trap_table), "D" (table) - : __syscall_clobber ); - - return ret; + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); } static inline int HYPERVISOR_mmu_update( - mmu_update_t *req, int count, int *success_count, domid_t domid) -{ - int ret; - - __asm__ __volatile__ ( - "movq %5, %%r10;" TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_mmu_update), "D" (req), "S" ((long)count), - "d" (success_count), "g" ((unsigned long)domid) - : __syscall_clobber, "r10" ); - - return ret; + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); } static inline int HYPERVISOR_mmuext_op( - struct mmuext_op *op, int count, int *success_count, domid_t domid) -{ - int ret; - - __asm__ __volatile__ ( - "movq %5, %%r10;" TRAP_INSTR - : "=a" (ret) - : "0" (__HYPERVISOR_mmuext_op), "D" (op), "S" ((long)count), - "d" (success_count), "g" ((unsigned long)domid) - : __syscall_clobber, "r10" ); - - return ret; + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); } static inline int HYPERVISOR_set_gdt( - unsigned long *frame_list, int entries) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_gdt), "D" (frame_list), "S" ((long)entries) - : __syscall_clobber ); - - - return ret; -} + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + static inline int HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_stack_switch), "D" (ss), "S" (esp) - : __syscall_clobber ); - - return ret; + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); } static inline int HYPERVISOR_set_callbacks( - unsigned long event_address, unsigned long failsafe_address, - unsigned long syscall_address) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_callbacks), "D" (event_address), - "S" (failsafe_address), "d" (syscall_address) - : __syscall_clobber ); - - return ret; + unsigned long event_address, unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, syscall_address); } static inline int HYPERVISOR_fpu_taskswitch( - int set) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_fpu_taskswitch), - "D" ((unsigned long) set) : __syscall_clobber ); - - return ret; + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); } static inline int HYPERVISOR_yield( - void) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_yield) - : __syscall_clobber ); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_yield, 0); } static inline int HYPERVISOR_block( - void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), "D" ((unsigned long)SCHEDOP_block) - : __syscall_clobber ); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_block, 0); } static inline int HYPERVISOR_shutdown( - void) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), - "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))) - : __syscall_clobber ); - - return ret; + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_shutdown | + (SHUTDOWN_poweroff << SCHEDOP_reasonshift), 0); } static inline int HYPERVISOR_reboot( - void) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), - "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_suspend( - unsigned long srec) -{ - int ret; - - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), - "D" ((unsigned long)(SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift))), - "S" (srec) - : __syscall_clobber ); - - return ret; -} - -/* - * We can have the timeout value in a single argument for the hypercall, but - * that will break the common code. - */ + void) +{ + return _hypercall2(int, sched_op, SCHEDOP_shutdown | + (SHUTDOWN_reboot << SCHEDOP_reasonshift), 0); +} + static inline long HYPERVISOR_set_timer_op( - u64 timeout) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_timer_op), - "D" (timeout) - : __syscall_clobber ); - - return ret; + u64 timeout) +{ + return _hypercall1(long, set_timer_op, timeout); } static inline int HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - int ret; - - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_dom0_op), "D" (dom0_op) - : __syscall_clobber ); - - return ret; + dom0_op_t *dom0_op) +{ + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + return _hypercall1(int, dom0_op, dom0_op); } static inline int HYPERVISOR_set_debugreg( - int reg, unsigned long value) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_debugreg), "D" ((unsigned long)reg), "S" (value) - : __syscall_clobber ); - - return ret; + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); } static inline unsigned long HYPERVISOR_get_debugreg( - int reg) -{ - unsigned long ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_get_debugreg), "D" ((unsigned long)reg) - : __syscall_clobber ); - - return ret; + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); } static inline int HYPERVISOR_update_descriptor( - unsigned long ma, unsigned long word) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_update_descriptor), "D" (ma), - "S" (word) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_dom_mem_op( - unsigned int op, unsigned long *extent_list, - unsigned long nr_extents, unsigned int extent_order) -{ - int ret; - - __asm__ __volatile__ ( - "movq %5,%%r10; movq %6,%%r8;" TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_dom_mem_op), "D" ((unsigned long)op), "S" (extent_list), - "d" (nr_extents), "g" ((unsigned long) extent_order), "g" ((unsigned long) DOMID_SELF) - : __syscall_clobber,"r8","r10"); - - return ret; + unsigned long ma, unsigned long word) +{ + return _hypercall2(int, update_descriptor, ma, word); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); } static inline int HYPERVISOR_multicall( - void *call_list, int nr_calls) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_multicall), "D" (call_list), "S" ((unsigned long)nr_calls) - : __syscall_clobber); - - return ret; + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); } static inline int HYPERVISOR_update_va_mapping( - unsigned long page_nr, pte_t new_val, unsigned long flags) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_update_va_mapping), - "D" (page_nr), "S" (new_val.pte), "d" (flags) - : __syscall_clobber); - - return ret; + unsigned long va, pte_t new_val, unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val.pte, flags); } static inline int HYPERVISOR_event_channel_op( - void *op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_event_channel_op), "D" (op) - : __syscall_clobber); - - return ret; + void *op) +{ + return _hypercall1(int, event_channel_op, op); } static inline int HYPERVISOR_xen_version( - int cmd) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_xen_version), "D" ((unsigned long)cmd) - : __syscall_clobber); - - return ret; + int cmd) +{ + return _hypercall1(int, xen_version, cmd); } static inline int HYPERVISOR_console_io( - int cmd, int count, char *str) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_console_io), "D" ((unsigned long)cmd), "S" ((unsigned long)count), "d" (str) - : __syscall_clobber); - - return ret; + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); } static inline int HYPERVISOR_physdev_op( - void *physdev_op) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_physdev_op), "D" (physdev_op) - : __syscall_clobber); - - return ret; + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); } static inline int HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_grant_table_op), "D" ((unsigned long)cmd), "S" ((unsigned long)uop), "d" (count) - : __syscall_clobber); - - return ret; + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); } static inline int HYPERVISOR_update_va_mapping_otherdomain( - unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid) -{ - int ret; - - __asm__ __volatile__ ( - "movq %5, %%r10;" TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_update_va_mapping_otherdomain), - "D" (page_nr), "S" (new_val.pte), "d" (flags), "g" ((unsigned long)domid) - : __syscall_clobber,"r10"); - - return ret; + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val.pte, flags, domid); } static inline int HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_vm_assist), "D" ((unsigned long)cmd), "S" ((unsigned long)type) - : __syscall_clobber); - - return ret; + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_boot_vcpu( + unsigned long vcpu, vcpu_guest_context_t *ctxt) +{ + return _hypercall2(int, boot_vcpu, vcpu, ctxt); +} + +static inline int +HYPERVISOR_vcpu_up( + int vcpu) +{ + return _hypercall2(int, sched_op, SCHEDOP_vcpu_up | + (vcpu << SCHEDOP_vcpushift), 0); +} + +static inline int +HYPERVISOR_vcpu_pickle( + int vcpu, vcpu_guest_context_t *ctxt) +{ + return _hypercall2(int, sched_op, SCHEDOP_vcpu_pickle | + (vcpu << SCHEDOP_vcpushift), ctxt); } static inline int HYPERVISOR_switch_to_user(void) { - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" ((unsigned long)__HYPERVISOR_switch_to_user) : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" (__HYPERVISOR_boot_vcpu), "D" (vcpu), "S" (ctxt) - : __syscall_clobber); - - return ret; + return _hypercall0(int, switch_to_user); } static inline int HYPERVISOR_set_segment_base( - int reg, unsigned long value) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_set_segment_base), "D" ((unsigned long)reg), "S" (value) - : __syscall_clobber ); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_pickle( - int vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) - : "0" ((unsigned long)__HYPERVISOR_sched_op), - "D" ((unsigned long)SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), - "S" ((unsigned long)ctxt) - : __syscall_clobber ); - - return ret; + int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall2(int, sched_op, SCHEDOP_shutdown | + (SHUTDOWN_suspend << SCHEDOP_reasonshift), srec); } #endif /* __HYPERCALL_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h Fri Sep 9 16:30:54 2005 @@ -14,7 +14,7 @@ who = "Xen"; start_pfn = 0; - max_pfn = xen_start_info.nr_pages; + max_pfn = xen_start_info->nr_pages; e820.nr_map = 0; add_memory_region(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn) - PFN_PHYS(start_pfn), E820_RAM); @@ -29,7 +29,7 @@ clear_bit(X86_FEATURE_PSE, c->x86_capability); clear_bit(X86_FEATURE_PGE, c->x86_capability); clear_bit(X86_FEATURE_SEP, c->x86_capability); - if (!(xen_start_info.flags & SIF_PRIVILEGED)) + if (!(xen_start_info->flags & SIF_PRIVILEGED)) clear_bit(X86_FEATURE_MTRR, c->x86_capability); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Fri Sep 9 16:30:54 2005 @@ -62,14 +62,14 @@ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ -#define INVALID_P2M_ENTRY (~0U) -#define FOREIGN_FRAME(m) ((m) | 0x80000000U) -extern u32 *phys_to_machine_mapping; +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME(m) ((m) | (1UL<<63)) +extern unsigned long *phys_to_machine_mapping; #define pfn_to_mfn(pfn) \ -((unsigned long)phys_to_machine_mapping[(unsigned int)(pfn)] & 0x7FFFFFFFUL) +(phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL << 63)) static inline unsigned long mfn_to_pfn(unsigned long mfn) { - unsigned int pfn; + unsigned long pfn; /* * The array access can fail (e.g., device space beyond end of RAM). @@ -77,7 +77,7 @@ * but we must handle the fault without crashing! */ asm ( - "1: movl %1,%k0\n" + "1: movq %1,%0\n" "2:\n" ".section __ex_table,\"a\"\n" " .align 8\n" @@ -85,7 +85,7 @@ ".previous" : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) ); - return (unsigned long)pfn; + return pfn; } /* Definitions for machine and pseudophysical addresses. */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Fri Sep 9 16:30:54 2005 @@ -76,12 +76,29 @@ #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) +#elif defined(CONFIG_SWIOTLB) + +#define PCI_DMA_BUS_IS_PHYS 0 + +#define pci_dac_dma_supported(pci_dev, mask) 1 + +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + __u32 LEN_NAME; +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) + #else /* No IOMMU */ -/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */ -#define PCI_DMA_BUS_IS_PHYS (0) - +#define PCI_DMA_BUS_IS_PHYS 1 #define pci_dac_dma_supported(pci_dev, mask) 1 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Fri Sep 9 16:30:54 2005 @@ -307,7 +307,7 @@ #define pte_pfn(_pte) \ ({ \ unsigned long mfn = pte_mfn(_pte); \ - unsigned pfn = mfn_to_pfn(mfn); \ + unsigned long pfn = mfn_to_pfn(mfn); \ if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ @@ -526,28 +526,26 @@ #define DOMID_LOCAL (0xFFFFU) -int direct_remap_area_pages(struct mm_struct *mm, +int direct_remap_pfn_range(struct mm_struct *mm, unsigned long address, - unsigned long machine_addr, + unsigned long mfn, unsigned long size, pgprot_t prot, domid_t domid); -int __direct_remap_area_pages(struct mm_struct *mm, - unsigned long address, - unsigned long size, - mmu_update_t *v); + int create_lookup_pte_addr(struct mm_struct *mm, unsigned long address, unsigned long *ptep); + int touch_pte_range(struct mm_struct *mm, unsigned long address, unsigned long size); #define io_remap_page_range(vma, vaddr, paddr, size, prot) \ - direct_remap_area_pages((vma)->vm_mm,vaddr,paddr,size,prot,DOMID_IO) + direct_remap_pfn_range((vma)->vm_mm,vaddr,paddr>>PAGE_SHIFT,size,prot,DOMID_IO) #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - direct_remap_area_pages((vma)->vm_mm,vaddr,(pfn)<<PAGE_SHIFT,size,prot,DOMID_IO) + direct_remap_pfn_range((vma)->vm_mm,vaddr,pfn,size,prot,DOMID_IO) #define MK_IOSPACE_PFN(space, pfn) (pfn) #define GET_IOSPACE(pfn) 0 diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h Fri Sep 9 16:30:54 2005 @@ -387,8 +387,14 @@ #define local_irq_disable() __cli() #define local_irq_enable() __sti() +/* Don't use smp_processor_id: this is called in debug versions of that fn. */ +#ifdef CONFIG_SMP #define irqs_disabled() \ - HYPERVISOR_shared_info->vcpu_data[smp_processor_id()].evtchn_upcall_mask + HYPERVISOR_shared_info->vcpu_data[__smp_processor_id()].evtchn_upcall_mask +#else +#define irqs_disabled() \ + HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask +#endif /* * disable hlt during certain critical i/o operations diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Fri Sep 9 16:30:54 2005 @@ -52,13 +52,7 @@ #endif /* arch/xen/i386/kernel/setup.c */ -union xen_start_info_union -{ - start_info_t xen_start_info; - char padding[2048]; -}; -extern union xen_start_info_union xen_start_info_union; -#define xen_start_info (xen_start_info_union.xen_start_info) +extern start_info_t *xen_start_info; /* arch/xen/kernel/evtchn.c */ /* Force a proper event-channel callback from Xen. */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h --- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Fri Sep 9 16:30:54 2005 @@ -70,14 +70,6 @@ #define IOCTL_PRIVCMD_HYPERCALL \ _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) -/* - * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN - * @arg: n/a - * Return: Port associated with domain-controller end of control event channel - * for the initial domain. - */ -#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \ - _IOC(_IOC_NONE, 'P', 1, 0) #define IOCTL_PRIVCMD_MMAP \ _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) #define IOCTL_PRIVCMD_MMAPBATCH \ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/xenbus.h --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Fri Sep 9 16:30:54 2005 @@ -64,6 +64,7 @@ int (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); + int (*hotplug)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; }; diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/mm/memory.c --- a/linux-2.6-xen-sparse/mm/memory.c Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/mm/memory.c Fri Sep 9 16:30:54 2005 @@ -954,10 +954,8 @@ i++; start += PAGE_SIZE; len--; -printk(KERN_ALERT "HIT 0x%lx\n", start); continue; } -else printk(KERN_ALERT "MISS 0x%lx\n", start); } if (!vma || (vma->vm_flags & VM_IO) @@ -1367,20 +1365,15 @@ struct page *old_page, *new_page; unsigned long pfn = pte_pfn(pte); pte_t entry; + struct page invalid_page; if (unlikely(!pfn_valid(pfn))) { - /* - * This should really halt the system so it can be debugged or - * at least the kernel stops what it's doing before it corrupts - * data, but for the moment just pretend this is OOM. - */ - pte_unmap(page_table); - printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", - address); - spin_unlock(&mm->page_table_lock); - return VM_FAULT_OOM; - } - old_page = pfn_to_page(pfn); + /* This can happen with /dev/mem (PROT_WRITE, MAP_PRIVATE). */ + invalid_page.flags = (1<<PG_reserved) | (1<<PG_locked); + old_page = &invalid_page; + } else { + old_page = pfn_to_page(pfn); + } if (!TestSetPageLocked(old_page)) { int reuse = can_share_swap_page(old_page); @@ -1416,7 +1409,13 @@ new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!new_page) goto no_new_page; - copy_user_highpage(new_page, old_page, address); + if (old_page == &invalid_page) { + char *vto = kmap_atomic(new_page, KM_USER1); + copy_page(vto, (void *)(address & PAGE_MASK)); + kunmap_atomic(vto, KM_USER1); + } else { + copy_user_highpage(new_page, old_page, address); + } } /* * Re-check the pte - we dropped the lock diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/Makefile --- a/tools/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/Makefile Fri Sep 9 16:30:54 2005 @@ -7,14 +7,23 @@ SUBDIRS += misc SUBDIRS += examples SUBDIRS += xentrace -SUBDIRS += python -SUBDIRS += xcs SUBDIRS += xcutils -#SUBDIRS += pygrub SUBDIRS += firmware SUBDIRS += security SUBDIRS += console +ifeq ($(VTPM_TOOLS),y) +SUBDIRS += vtpm_manager +SUBDIRS += vtpm +endif SUBDIRS += xenstat + +.PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean + +# These don't cross-compile +ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) +SUBDIRS += python +#SUBDIRS += pygrub +endif .PHONY: all install clean check check_clean ioemu eioemuinstall ioemuclean diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/Rules.mk --- a/tools/Rules.mk Thu Sep 8 15:18:40 2005 +++ b/tools/Rules.mk Fri Sep 9 16:30:54 2005 @@ -4,7 +4,6 @@ XEN_XC = $(XEN_ROOT)/tools/python/xen/lowlevel/xc XEN_LIBXC = $(XEN_ROOT)/tools/libxc -XEN_XCS = $(XEN_ROOT)/tools/xcs XEN_XENSTORE = $(XEN_ROOT)/tools/xenstore XEN_LIBXENSTAT = $(XEN_ROOT)/tools/xenstat/libxenstat/src diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/Makefile --- a/tools/blktap/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/Makefile Fri Sep 9 16:30:54 2005 @@ -6,7 +6,8 @@ include $(XEN_ROOT)/tools/Rules.mk SUBDIRS := -SUBDIRS += parallax +SUBDIRS += ublkback +#SUBDIRS += parallax BLKTAP_INSTALL_DIR = /usr/sbin @@ -14,12 +15,12 @@ INSTALL_PROG = $(INSTALL) -m0755 INSTALL_DIR = $(INSTALL) -d -m0755 -INCLUDES += -I. -I $(XEN_LIBXC) +INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE) LIBS := -lpthread -lz SRCS := -SRCS += blktaplib.c +SRCS += blktaplib.c xenbus.c blkif.c CFLAGS += -Wall CFLAGS += -Werror @@ -28,17 +29,20 @@ CFLAGS += -g3 CFLAGS += -fno-strict-aliasing CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE +# get asprintf(): +CFLAGS += -D _GNU_SOURCE # Get gcc to generate the dependencies for us. CFLAGS += -Wp,-MD,.$(@F).d CFLAGS += $(INCLUDES) DEPS = .*.d OBJS = $(patsubst %.c,%.o,$(SRCS)) -IBINS = blkdump +IBINS := +#IBINS += blkdump LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) -all: mk-symlinks libblktap.so blkdump +all: mk-symlinks libblktap.so #blkdump @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done @@ -59,7 +63,7 @@ $(INSTALL_DIR) -p $(DESTDIR)/usr/include $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR) $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include - $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR) + #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR) @set -e; for subdir in $(SUBDIRS); do \ $(MAKE) -C $$subdir $@; \ done @@ -79,14 +83,16 @@ mv staging/i386/*.rpm . rm -rf staging -libblktap.so: $(OBJS) - $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared -o \ - libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) +libblktap.so: $(OBJS) + $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \ + -L$(XEN_XENSTORE) -l xenstore \ + -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) ln -sf libblktap.so.$(MAJOR) $@ blkdump: libblktap.so - $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. -l blktap blkdump.c + $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \ + -l blktap blkdump.c .PHONY: TAGS clean install mk-symlinks rpm diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/blkdump.c --- a/tools/blktap/blkdump.c Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/blkdump.c Fri Sep 9 16:30:54 2005 @@ -8,85 +8,18 @@ #include <stdio.h> #include "blktaplib.h" -int control_print(control_msg_t *msg) -{ - if (msg->type != CMSG_BLKIF_BE) - { - printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); - return 0; - } - - switch(msg->subtype) - { - case CMSG_BLKIF_BE_CREATE: - if ( msg->length != sizeof(blkif_be_create_t) ) - goto parse_error; - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", - ((blkif_be_create_t *)msg->msg)->domid, - ((blkif_be_create_t *)msg->msg)->blkif_handle); - break; - case CMSG_BLKIF_BE_DESTROY: - if ( msg->length != sizeof(blkif_be_destroy_t) ) - goto parse_error; - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", - ((blkif_be_destroy_t *)msg->msg)->domid, - ((blkif_be_destroy_t *)msg->msg)->blkif_handle); - break; - case CMSG_BLKIF_BE_CONNECT: - if ( msg->length != sizeof(blkif_be_connect_t) ) - goto parse_error; - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n", - ((blkif_be_connect_t *)msg->msg)->domid, - ((blkif_be_connect_t *)msg->msg)->blkif_handle); - break; - case CMSG_BLKIF_BE_DISCONNECT: - if ( msg->length != sizeof(blkif_be_disconnect_t) ) - goto parse_error; - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n", - ((blkif_be_disconnect_t *)msg->msg)->domid, - ((blkif_be_disconnect_t *)msg->msg)->blkif_handle); - break; - case CMSG_BLKIF_BE_VBD_CREATE: - if ( msg->length != sizeof(blkif_be_vbd_create_t) ) - goto parse_error; - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n", - ((blkif_be_vbd_create_t *)msg->msg)->domid, - ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle, - ((blkif_be_vbd_create_t *)msg->msg)->vdevice); - break; - case CMSG_BLKIF_BE_VBD_DESTROY: - if ( msg->length != sizeof(blkif_be_vbd_destroy_t) ) - goto parse_error; - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n", - ((blkif_be_vbd_destroy_t *)msg->msg)->domid, - ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle, - ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice); - break; - default: - goto parse_error; - } - - return 0; - -parse_error: - printf("[CONTROL_MSG] Bad message type or length!\n"); - return 0; -} - int request_print(blkif_request_t *req) { int i; unsigned long fas; - if ( req->operation == BLKIF_OP_PROBE ) { - printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id), - blkif_op_name[req->operation]); - return BLKTAP_PASS; - } else { + if ( (req->operation == BLKIF_OP_READ) || + (req->operation == BLKIF_OP_WRITE) ) + { printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id), blkif_op_name[req->operation], - req->nr_segments, req->device, + req->nr_segments, req->handle, req->sector_number); @@ -99,6 +32,8 @@ ); } + } else { + printf("Unknown request message type.\n"); } return BLKTAP_PASS; @@ -106,23 +41,22 @@ int response_print(blkif_response_t *rsp) { - if ( rsp->operation == BLKIF_OP_PROBE ) { - printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), - blkif_op_name[rsp->operation]); - return BLKTAP_PASS; - } else { + if ( (rsp->operation == BLKIF_OP_READ) || + (rsp->operation == BLKIF_OP_WRITE) ) + { printf("[%2u:%2u>%5s] (status: %d)\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), blkif_op_name[rsp->operation], rsp->status); + } else { + printf("Unknown request message type.\n"); } return BLKTAP_PASS; } int main(int argc, char *argv[]) { - blktap_register_ctrl_hook("control_print", control_print); blktap_register_request_hook("request_print", request_print); blktap_register_response_hook("response_print", response_print); blktap_listen(); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/blktaplib.c --- a/tools/blktap/blktaplib.c Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/blktaplib.c Fri Sep 9 16:30:54 2005 @@ -24,7 +24,7 @@ #include <string.h> #include <unistd.h> #include <pthread.h> - +#include <xs.h> #define __COMPILING_BLKTAP_LIB #include "blktaplib.h" @@ -34,11 +34,12 @@ #else #define DPRINTF(_f, _a...) ((void)0) #endif -#define DEBUG_RING_IDXS 1 +#define DEBUG_RING_IDXS 0 #define POLLRDNORM 0x040 #define BLKTAP_IOCTL_KICK 1 + void got_sig_bus(); void got_sig_int(); @@ -46,17 +47,13 @@ /* in kernel these are opposite, but we are a consumer now. */ blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */ blkif_front_ring_t be_ring; -ctrl_back_ring_t ctrl_ring; unsigned long mmap_vstart = 0; char *blktap_mem; int fd = 0; -#define BLKTAP_RING_PAGES 3 /* Ctrl, Back, Front */ -/*#define BLKTAP_MMAP_PAGES ((11 + 1) * 64)*/ -#define BLKTAP_MMAP_PAGES \ - ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE) -#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES) +#define BLKTAP_RING_PAGES 1 /* Front */ +#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES) int bad_count = 0; void bad(void) @@ -79,126 +76,13 @@ } inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); } -/* + static int (*request_hook)(blkif_request_t *req) = NULL; static int (*response_hook)(blkif_response_t *req) = NULL; -*/ - -/*-----[ Request/Response hook chains.]----------------------------------*/ - -#define HOOK_NAME_MAX 50 - -typedef struct ctrl_hook_st { - char name[HOOK_NAME_MAX]; - int (*func)(control_msg_t *); - struct ctrl_hook_st *next; -} ctrl_hook_t; - -typedef struct request_hook_st { - char name[HOOK_NAME_MAX]; - int (*func)(blkif_request_t *); - struct request_hook_st *next; -} request_hook_t; - -typedef struct response_hook_st { - char name[HOOK_NAME_MAX]; - int (*func)(blkif_response_t *); - struct response_hook_st *next; -} response_hook_t; - -static ctrl_hook_t *ctrl_hook_chain = NULL; -static request_hook_t *request_hook_chain = NULL; -static response_hook_t *response_hook_chain = NULL; - -void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)) -{ - ctrl_hook_t *ch_ent, **c; - - ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t)); - if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } - - ch_ent->func = ch; - ch_ent->next = NULL; - strncpy(ch_ent->name, name, HOOK_NAME_MAX); - ch_ent->name[HOOK_NAME_MAX-1] = '\0'; - - c = &ctrl_hook_chain; - while (*c != NULL) { - c = &(*c)->next; - } - *c = ch_ent; -} - -void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)) -{ - request_hook_t *rh_ent, **c; - - rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t)); - if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } - - rh_ent->func = rh; - rh_ent->next = NULL; - strncpy(rh_ent->name, name, HOOK_NAME_MAX); - - c = &request_hook_chain; - while (*c != NULL) { - c = &(*c)->next; - } - *c = rh_ent; -} - -void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)) -{ - response_hook_t *rh_ent, **c; - - rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t)); - if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } - - rh_ent->func = rh; - rh_ent->next = NULL; - strncpy(rh_ent->name, name, HOOK_NAME_MAX); - - c = &response_hook_chain; - while (*c != NULL) { - c = &(*c)->next; - } - *c = rh_ent; -} - -void print_hooks(void) -{ - request_hook_t *req_hook; - response_hook_t *rsp_hook; - ctrl_hook_t *ctrl_hook; - - DPRINTF("Control Hooks:\n"); - ctrl_hook = ctrl_hook_chain; - while (ctrl_hook != NULL) - { - DPRINTF(" [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name); - ctrl_hook = ctrl_hook->next; - } - - DPRINTF("Request Hooks:\n"); - req_hook = request_hook_chain; - while (req_hook != NULL) - { - DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name); - req_hook = req_hook->next; - } - - DPRINTF("Response Hooks:\n"); - rsp_hook = response_hook_chain; - while (rsp_hook != NULL) - { - DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name); - rsp_hook = rsp_hook->next; - } -} /*-----[ Data to/from Backend (server) VM ]------------------------------*/ - +/* inline int write_req_to_be_ring(blkif_request_t *req) { @@ -214,6 +98,7 @@ return 0; } +*/ inline int write_rsp_to_fe_ring(blkif_response_t *rsp) { @@ -230,14 +115,14 @@ return 0; } -static void apply_rsp_hooks(blkif_response_t *rsp) +static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp) { response_hook_t *rsp_hook; - rsp_hook = response_hook_chain; + rsp_hook = blkif->response_hook_chain; while (rsp_hook != NULL) { - switch(rsp_hook->func(rsp)) + switch(rsp_hook->func(blkif, rsp, 1)) { case BLKTAP_PASS: break; @@ -248,15 +133,19 @@ } } + static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER; -void blktap_inject_response(blkif_response_t *rsp) -{ - - apply_rsp_hooks(rsp); - +void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp) +{ + + apply_rsp_hooks(blkif, rsp); + write_rsp_to_fe_ring(rsp); - +} + +void blktap_kick_responses(void) +{ pthread_mutex_lock(&push_mutex); RING_PUSH_RESPONSES(&fe_ring); @@ -277,7 +166,7 @@ int active; } pollhook_t; -static struct pollfd pfd[MAX_POLLFDS+1]; +static struct pollfd pfd[MAX_POLLFDS+2]; /* tap and store are extra */ static pollhook_t pollhooks[MAX_POLLFDS]; static unsigned int ph_freelist[MAX_POLLFDS]; static unsigned int ph_cons, ph_prod; @@ -344,65 +233,65 @@ int blktap_listen(void) { - int notify_be, notify_fe, tap_pfd; - + int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret; + struct xs_handle *h; + blkif_t *blkif; + /* comms rings: */ blkif_request_t *req; blkif_response_t *rsp; - control_msg_t *msg; blkif_sring_t *sring; - ctrl_sring_t *csring; RING_IDX rp, i, pfd_count; /* pending rings */ blkif_request_t req_pending[BLKIF_RING_SIZE]; - blkif_response_t rsp_pending[BLKIF_RING_SIZE]; + /* blkif_response_t rsp_pending[BLKIF_RING_SIZE] */; /* handler hooks: */ request_hook_t *req_hook; response_hook_t *rsp_hook; - ctrl_hook_t *ctrl_hook; signal (SIGBUS, got_sig_bus); signal (SIGINT, got_sig_int); - print_hooks(); - + __init_blkif(); + fd = open("/dev/blktap", O_RDWR); - if (fd == -1) { - printf("open failed! (%d)\n", errno); - goto open_failed; - } + if (fd == -1) + err(-1, "open failed!"); blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if ((int)blktap_mem == -1) { - printf("mmap failed! (%d)\n", errno); - goto mmap_failed; - } + if ((int)blktap_mem == -1) + err(-1, "mmap failed!"); /* assign the rings to the mapped memory */ - csring = (ctrl_sring_t *)blktap_mem; - BACK_RING_INIT(&ctrl_ring, csring, PAGE_SIZE); - +/* sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE); FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE); - - sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE)); +*/ + sring = (blkif_sring_t *)((unsigned long)blktap_mem); BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE); mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT); + + /* Set up store connection and watch. */ + h = xs_daemon_open(); + if (h == NULL) + err(-1, "xs_daemon_open"); + + ret = add_blockdevice_probe_watch(h, "Domain-0"); + if (ret != 0) + err(0, "adding device probewatch"); + ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); while(1) { int ret; /* build the poll list */ - - DPRINTF("Building poll list.\n"); - pfd_count = 0; for ( i=0; i < MAX_POLLFDS; i++ ) { pollhook_t *ph = &pollhooks[i]; @@ -415,49 +304,31 @@ } } - tap_pfd = pfd_count; + tap_pfd = pfd_count++; pfd[tap_pfd].fd = fd; pfd[tap_pfd].events = POLLIN; - DPRINTF("poll() %d fds.\n", pfd_count); + store_pfd = pfd_count++; + pfd[store_pfd].fd = xs_fileno(h); + pfd[store_pfd].events = POLLIN; - if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) { + if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) { if (DEBUG_RING_IDXS) ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS); continue; } - DPRINTF("poll returned %d\n", ret); - for (i=0; i < MAX_POLLFDS; i++) { if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) ) pollhooks[i].func(pollhooks[i].pfd->fd); } - if (pfd[tap_pfd].revents) { - - /* empty the control ring */ - rp = ctrl_ring.sring->req_prod; - rmb(); - for (i = ctrl_ring.req_cons; i < rp; i++) - { - msg = RING_GET_REQUEST(&ctrl_ring, i); - - ctrl_hook = ctrl_hook_chain; - while (ctrl_hook != NULL) - { - DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name); - /* We currently don't respond to ctrl messages. */ - ctrl_hook->func(msg); - ctrl_hook = ctrl_hook->next; - } - } - /* Using this as a unidirectional ring. */ - ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i; -pthread_mutex_lock(&push_mutex); - RING_PUSH_RESPONSES(&ctrl_ring); -pthread_mutex_unlock(&push_mutex); - + if (pfd[store_pfd].revents) { + ret = xs_fire_next_watch(h); + } + + if (pfd[tap_pfd].revents) + { /* empty the fe_ring */ notify_fe = 0; notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring); @@ -465,44 +336,62 @@ rmb(); for (i = fe_ring.req_cons; i != rp; i++) { - int done = 0; /* stop forwarding this request */ + int done = 0; req = RING_GET_REQUEST(&fe_ring, i); memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req)); req = &req_pending[ID_TO_IDX(req->id)]; - DPRINTF("copying an fe request\n"); - - req_hook = request_hook_chain; - while (req_hook != NULL) + blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle); + + if (blkif != NULL) { - DPRINTF("REQ_HOOK: %s\n", req_hook->name); - switch(req_hook->func(req)) + req_hook = blkif->request_hook_chain; + while (req_hook != NULL) { - case BLKTAP_RESPOND: - apply_rsp_hooks((blkif_response_t *)req); - write_rsp_to_fe_ring((blkif_response_t *)req); - notify_fe = 1; - done = 1; - break; - case BLKTAP_STOLEN: - done = 1; - break; - case BLKTAP_PASS: - break; - default: - printf("Unknown request hook return value!\n"); + switch(req_hook->func(blkif, req, ((i+1) == rp))) + { + case BLKTAP_RESPOND: + apply_rsp_hooks(blkif, (blkif_response_t *)req); + write_rsp_to_fe_ring((blkif_response_t *)req); + notify_fe = 1; + done = 1; + break; + case BLKTAP_STOLEN: + done = 1; + break; + case BLKTAP_PASS: + break; + default: + printf("Unknown request hook return value!\n"); + } + if (done) break; + req_hook = req_hook->next; } - if (done) break; - req_hook = req_hook->next; } - if (done == 0) write_req_to_be_ring(req); + if (done == 0) + { + /* this was: */ + /* write_req_to_be_ring(req); */ + + unsigned long id = req->id; + unsigned short operation = req->operation; + printf("Unterminated request!\n"); + rsp = (blkif_response_t *)req; + rsp->id = id; + rsp->operation = operation; + rsp->status = BLKIF_RSP_ERROR; + write_rsp_to_fe_ring(rsp); + notify_fe = 1; + done = 1; + } } fe_ring.req_cons = i; /* empty the be_ring */ +/* notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring); rp = be_ring.sring->rsp_prod; rmb(); @@ -519,9 +408,9 @@ write_rsp_to_fe_ring(rsp); } be_ring.rsp_cons = i; - +*/ /* notify the domains */ - +/* if (notify_be) { DPRINTF("notifying be\n"); pthread_mutex_lock(&push_mutex); @@ -529,13 +418,13 @@ ioctl(fd, BLKTAP_IOCTL_KICK_BE); pthread_mutex_unlock(&push_mutex); } - +*/ if (notify_fe) { DPRINTF("notifying fe\n"); -pthread_mutex_lock(&push_mutex); + pthread_mutex_lock(&push_mutex); RING_PUSH_RESPONSES(&fe_ring); ioctl(fd, BLKTAP_IOCTL_KICK_FE); -pthread_mutex_unlock(&push_mutex); + pthread_mutex_unlock(&push_mutex); } } } diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/blktaplib.h --- a/tools/blktap/blktaplib.h Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/blktaplib.h Fri Sep 9 16:30:54 2005 @@ -2,6 +2,9 @@ * * userland accessors to the block tap. * + * Sept 2/05 -- I'm scaling this back to only support block remappings + * to user in a backend domain. Passthrough and interposition can be readded + * once transitive grants are available. */ #ifndef __BLKTAPLIB_H__ @@ -13,6 +16,7 @@ #include <xen/io/blkif.h> #include <xen/io/ring.h> #include <xen/io/domain_controller.h> +#include <xs.h> /* /dev/xen/blktap resides at device number major=10, minor=202 */ #define BLKTAP_MINOR 202 @@ -49,12 +53,18 @@ return ( ( arg == BLKTAP_MODE_PASSTHROUGH ) || ( arg == BLKTAP_MODE_INTERCEPT_FE ) || + ( arg == BLKTAP_MODE_INTERPOSE ) ); +/* + return ( + ( arg == BLKTAP_MODE_PASSTHROUGH ) || + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || ( arg == BLKTAP_MODE_INTERCEPT_BE ) || ( arg == BLKTAP_MODE_INTERPOSE ) || ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) ); +*/ } /* Return values for handling messages in hooks. */ @@ -62,29 +72,88 @@ #define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */ #define BLKTAP_STOLEN 2 /* Hook has stolen request. */ -#define domid_t unsigned short +//#define domid_t unsigned short inline unsigned int ID_TO_IDX(unsigned long id); inline domid_t ID_TO_DOM(unsigned long id); -void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)); -void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)); -void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)); -void blktap_inject_response(blkif_response_t *); int blktap_attach_poll(int fd, short events, int (*func)(int)); void blktap_detach_poll(int fd); int blktap_listen(void); +struct blkif; + +typedef struct request_hook_st { + char *name; + int (*func)(struct blkif *, blkif_request_t *, int); + struct request_hook_st *next; +} request_hook_t; + +typedef struct response_hook_st { + char *name; + int (*func)(struct blkif *, blkif_response_t *, int); + struct response_hook_st *next; +} response_hook_t; + +struct blkif_ops { + long int (*get_size)(struct blkif *blkif); + long int (*get_secsize)(struct blkif *blkif); + unsigned (*get_info)(struct blkif *blkif); +}; + +typedef struct blkif { + domid_t domid; + long int handle; + + long int pdev; + long int readonly; + + enum { DISCONNECTED, CONNECTED } state; + + struct blkif_ops *ops; + request_hook_t *request_hook_chain; + response_hook_t *response_hook_chain; + + struct blkif *hash_next; + + void *prv; /* device-specific data */ +} blkif_t; + +void register_new_blkif_hook(int (*fn)(blkif_t *blkif)); +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); +blkif_t *alloc_blkif(domid_t domid); +int blkif_init(blkif_t *blkif, long int handle, long int pdev, + long int readonly); +void free_blkif(blkif_t *blkif); +void __init_blkif(void); + + +/* xenstore/xenbus: */ +extern int add_blockdevice_probe_watch(struct xs_handle *h, + const char *domname); +int xs_fire_next_watch(struct xs_handle *h); + + +void blkif_print_hooks(blkif_t *blkif); +void blkif_register_request_hook(blkif_t *blkif, char *name, + int (*rh)(blkif_t *, blkif_request_t *, int)); +void blkif_register_response_hook(blkif_t *blkif, char *name, + int (*rh)(blkif_t *, blkif_response_t *, int)); +void blkif_inject_response(blkif_t *blkif, blkif_response_t *); +void blktap_kick_responses(void); + +/* this must match the underlying driver... */ +#define MAX_PENDING_REQS 64 + /* Accessing attached data page mappings */ -#define MMAP_PAGES_PER_REQUEST \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) -#define MMAP_VADDR(_req,_seg) \ - (mmap_vstart + \ - ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ +#define MMAP_PAGES \ + (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) +#define MMAP_VADDR(_req,_seg) \ + (mmap_vstart + \ + ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ ((_seg) * PAGE_SIZE)) extern unsigned long mmap_vstart; - /* Defines that are only used by library clients */ @@ -93,7 +162,6 @@ static char *blkif_op_name[] = { [BLKIF_OP_READ] = "READ", [BLKIF_OP_WRITE] = "WRITE", - [BLKIF_OP_PROBE] = "PROBE", }; #endif /* __COMPILING_BLKTAP_LIB */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_brctl --- a/tools/check/check_brctl Thu Sep 8 15:18:40 2005 +++ b/tools/check/check_brctl Fri Sep 9 16:30:54 2005 @@ -2,8 +2,9 @@ # CHECK-INSTALL function error { - echo 'Check for the bridge control utils (brctl) failed.' + echo + echo ' *** Check for the bridge control utils (brctl) FAILED' exit 1 } -brctl show || error \ No newline at end of file +which brctl 1>/dev/null 2>&1 || error diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_iproute --- a/tools/check/check_iproute Thu Sep 8 15:18:40 2005 +++ b/tools/check/check_iproute Fri Sep 9 16:30:54 2005 @@ -2,9 +2,10 @@ # CHECK-INSTALL function error { - echo 'Check for iproute (ip addr) failed.' + echo + echo ' *** Check for iproute (ip addr) FAILED' exit 1 } -ip addr list || error +ip addr list 1>/dev/null 2>&1 || error diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_logging --- a/tools/check/check_logging Thu Sep 8 15:18:40 2005 +++ b/tools/check/check_logging Fri Sep 9 16:30:54 2005 @@ -18,11 +18,12 @@ import logging except ImportError: hline() - msg("Python logging is not installed.") - msg("Use 'make install-logging' at the xen root to install.") msg("") - msg("Alternatively download and install from") - msg("http://www.red-dove.com/python_logging.html") + msg(" *** Python logging is not installed.") + msg(" *** Use 'make install-logging' at the xen root to install.") + msg(" *** ") + msg(" *** Alternatively download and install from") + msg(" *** http://www.red-dove.com/python_logging.html") hline() sys.exit(1) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_python --- a/tools/check/check_python Thu Sep 8 15:18:40 2005 +++ b/tools/check/check_python Fri Sep 9 16:30:54 2005 @@ -2,9 +2,9 @@ # CHECK-BUILD CHECK-INSTALL function error { - echo "Check for Python version 2.2 or higher failed." + echo + echo " *** Check for Python version >= 2.2 FAILED" exit 1 } -python -V python -V 2>&1 | cut -d ' ' -f 2 | grep -q -E '^2.2|^2.3|^2.4' || error diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_zlib_devel --- a/tools/check/check_zlib_devel Thu Sep 8 15:18:40 2005 +++ b/tools/check/check_zlib_devel Fri Sep 9 16:30:54 2005 @@ -2,9 +2,10 @@ # CHECK-BUILD function error { - echo 'Check for zlib includes failed.' + echo + echo " *** Check for zlib headers FAILED" exit 1 } set -e -[ -e /usr/include/zlib.h ] || error \ No newline at end of file +[ -e /usr/include/zlib.h ] || error diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_zlib_lib --- a/tools/check/check_zlib_lib Thu Sep 8 15:18:40 2005 +++ b/tools/check/check_zlib_lib Fri Sep 9 16:30:54 2005 @@ -2,9 +2,10 @@ # CHECK-BUILD CHECK-INSTALL function error { - echo 'Check for zlib library failed.' + echo + echo " *** Check for zlib library FAILED" exit 1 } set -e -ldconfig -p | grep libz.so || error \ No newline at end of file +ldconfig -p | grep -q libz.so || error diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/chk --- a/tools/check/chk Thu Sep 8 15:18:40 2005 +++ b/tools/check/chk Fri Sep 9 16:30:54 2005 @@ -17,14 +17,11 @@ case $1 in build) check="CHECK-BUILD" - info=".chkbuild" ;; install) check="CHECK-INSTALL" - info=".chkinstall" ;; clean) - rm -f .chkbuild .chkinstall exit 0 ;; *) @@ -34,7 +31,7 @@ failed=0 -echo "Xen ${check} " $(date) > ${info} +echo "Xen ${check} " $(date) for f in check_* ; do case $f in *~) @@ -49,24 +46,12 @@ if ! grep -q ${check} $f ; then continue fi - echo ' ' >> ${info} - echo "Checking $f" >> ${info} - if ./$f 1>>${info} 2>&1 ; then - echo OK >> ${info} + echo -n "Checking $f: " + if ./$f 2>&1 ; then + echo OK else failed=1 - echo "FAILED $f" - echo FAILED >> ${info} fi done -echo >> ${info} - -if [ "$failed" == "1" ] ; then - echo "Checks failed. See `pwd`/${info} for details." - echo "FAILED" >> ${info} - exit 1 -else - echo "OK" >> ${info} - exit 0 -fi +exit $failed diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/Makefile --- a/tools/console/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/console/Makefile Fri Sep 9 16:30:54 2005 @@ -9,10 +9,8 @@ INSTALL_PROG = $(INSTALL) -m0755 INSTALL_DIR = $(INSTALL) -d -m0755 -CC = gcc -CFLAGS = -Wall -Werror -g3 +CFLAGS += -Wall -Werror -g3 -CFLAGS += -I $(XEN_XCS) CFLAGS += -I $(XEN_LIBXC) CFLAGS += -I $(XEN_XENSTORE) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/client/main.c --- a/tools/console/client/main.c Thu Sep 8 15:18:40 2005 +++ b/tools/console/client/main.c Fri Sep 9 16:30:54 2005 @@ -170,12 +170,12 @@ { 0 }, }; - char *str_pty; - char path[1024]; + char *str_pty, *path; int spty; unsigned int len = 0; struct xs_handle *xs; char *end; + time_t now; while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch(ch) { @@ -213,20 +213,51 @@ signal(SIGTERM, sighandler); - snprintf(path, sizeof(path), "/console/%d/tty", domid); + path = xs_get_domain_path(xs, domid); + if (path == NULL) + err(errno, "xs_get_domain_path()"); + path = realloc(path, strlen(path) + strlen("/console/tty") + 1); + if (path == NULL) + err(ENOMEM, "realloc"); + strcat(path, "/console/tty"); str_pty = xs_read(xs, path, &len); + /* FIXME consoled currently does not assume domain-0 doesn't have a console which is good when we break domain-0 up. To keep us user friendly, we'll bail out here since no data will ever show up on domain-0. */ - if (domid == 0 || str_pty == NULL) { + if (domid == 0) { err(errno, "Could not read tty from store"); } + + /* Wait a little bit for tty to appear. There is a race + condition that occurs after xend creates a domain. This + code might be running before consoled has noticed the new + domain and setup a pty for it. + + A xenstore watch would slightly improve responsiveness but + a timeout would still be needed since we don't want to + block forever if given an invalid domain or worse yet, a + domain that someone else has connected to. */ + + now = time(0); + while (str_pty == NULL && (now + 5) > time(0)) { + struct timeval tv = { 0, 500 }; + select(0, NULL, NULL, NULL, &tv); /* pause briefly */ + + str_pty = xs_read(xs, path, &len); + } + + if (str_pty == NULL) { + err(errno, "Could not read tty from store"); + } + spty = open(str_pty, O_RDWR | O_NOCTTY); if (spty == -1) { err(errno, "Could not open tty `%s'", str_pty); } free(str_pty); + free(path); init_term(STDIN_FILENO, &attr); console_loop(xc_handle, domid, spty); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/io.c --- a/tools/console/daemon/io.c Thu Sep 8 15:18:40 2005 +++ b/tools/console/daemon/io.c Fri Sep 9 16:30:54 2005 @@ -26,7 +26,6 @@ #include "xenctrl.h" #include "xs.h" #include "xen/io/domain_controller.h" -#include "xcs_proto.h" #include <malloc.h> #include <stdlib.h> @@ -36,9 +35,15 @@ #include <fcntl.h> #include <unistd.h> #include <termios.h> +#include <stdarg.h> +#include <sys/ioctl.h> +#include <sys/mman.h> #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */ +#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2) struct buffer { @@ -48,41 +53,6 @@ size_t max_capacity; }; -static void buffer_append(struct buffer *buffer, const void *data, size_t size) -{ - if ((buffer->capacity - buffer->size) < size) { - buffer->capacity += (size + 1024); - buffer->data = realloc(buffer->data, buffer->capacity); - if (buffer->data == NULL) { - dolog(LOG_ERR, "Memory allocation failed"); - exit(ENOMEM); - } - } - - memcpy(buffer->data + buffer->size, data, size); - buffer->size += size; - - if (buffer->max_capacity && - buffer->size > buffer->max_capacity) { - memmove(buffer->data + (buffer->size - buffer->max_capacity), - buffer->data, buffer->max_capacity); - buffer->data = realloc(buffer->data, buffer->max_capacity); - buffer->capacity = buffer->max_capacity; - } -} - -static bool buffer_empty(struct buffer *buffer) -{ - return buffer->size == 0; -} - -static void buffer_advance(struct buffer *buffer, size_t size) -{ - size = MIN(size, buffer->size); - memmove(buffer->data, buffer + size, buffer->size - size); - buffer->size -= size; -} - struct domain { int domid; @@ -90,9 +60,74 @@ bool is_dead; struct buffer buffer; struct domain *next; + char *conspath; + int ring_ref; + int local_port; + char *page; + int evtchn_fd; }; static struct domain *dom_head; + +struct ring_head +{ + u32 cons; + u32 prod; + char buf[0]; +} __attribute__((packed)); + +#define PAGE_SIZE (getpagesize()) +#define XENCONS_RING_SIZE (PAGE_SIZE/2 - sizeof (struct ring_head)) +#define XENCONS_IDX(cnt) ((cnt) % XENCONS_RING_SIZE) +#define XENCONS_FULL(ring) (((ring)->prod - (ring)->cons) == XENCONS_RING_SIZE) +#define XENCONS_SPACE(ring) (XENCONS_RING_SIZE - ((ring)->prod - (ring)->cons)) + +static void buffer_append(struct domain *dom) +{ + struct buffer *buffer = &dom->buffer; + struct ring_head *ring = (struct ring_head *)dom->page; + size_t size; + + while ((size = ring->prod - ring->cons) != 0) { + if ((buffer->capacity - buffer->size) < size) { + buffer->capacity += (size + 1024); + buffer->data = realloc(buffer->data, buffer->capacity); + if (buffer->data == NULL) { + dolog(LOG_ERR, "Memory allocation failed"); + exit(ENOMEM); + } + } + + while (ring->cons < ring->prod) { + buffer->data[buffer->size] = + ring->buf[XENCONS_IDX(ring->cons)]; + buffer->size++; + ring->cons++; + } + + if (buffer->max_capacity && + buffer->size > buffer->max_capacity) { + memmove(buffer->data + (buffer->size - + buffer->max_capacity), + buffer->data, buffer->max_capacity); + buffer->data = realloc(buffer->data, + buffer->max_capacity); + buffer->capacity = buffer->max_capacity; + } + } +} + +static bool buffer_empty(struct buffer *buffer) +{ + return buffer->size == 0; +} + +static void buffer_advance(struct buffer *buffer, size_t size) +{ + size = MIN(size, buffer->size); + memmove(buffer->data, buffer + size, buffer->size - size); + buffer->size -= size; +} static bool domain_is_valid(int domid) { @@ -107,8 +142,9 @@ static int domain_create_tty(struct domain *dom) { - char path[1024]; + char *path; int master; + bool success; if ((master = getpt()) == -1 || grantpt(master) == -1 || unlockpt(master) == -1) { @@ -126,27 +162,134 @@ tcsetattr(master, TCSAFLUSH, &term); } - xs_mkdir(xs, "/console"); - snprintf(path, sizeof(path), "/console/%d", dom->domid); - xs_mkdir(xs, path); - strcat(path, "/tty"); - - xs_write(xs, path, slave, strlen(slave), O_CREAT); - - snprintf(path, sizeof(path), "/console/%d/limit", dom->domid); + success = asprintf(&path, "%s/tty", dom->conspath) != -1; + if (!success) + goto out; + success = xs_write(xs, path, slave, strlen(slave), O_CREAT); + free(path); + if (!success) + goto out; + + success = asprintf(&path, "%s/limit", dom->conspath) != -1; + if (!success) + goto out; data = xs_read(xs, path, &len); if (data) { dom->buffer.max_capacity = strtoul(data, 0, 0); free(data); } + free(path); } return master; + out: + close(master); + return -1; +} + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xs_gather(struct xs_handle *xs, const char *dir, ...) +{ + va_list ap; + const char *name; + char *path; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + asprintf(&path, "%s/%s", dir, name); + p = xs_read(xs, path, NULL); + free(path); + if (p == NULL) { + ret = ENOENT; + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = EINVAL; + free(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} + +#define EVENTCHN_BIND _IO('E', 2) +#define EVENTCHN_UNBIND _IO('E', 3) + +static int domain_create_ring(struct domain *dom) +{ + int err, local_port, ring_ref; + + err = xs_gather(xs, dom->conspath, + "ring-ref", "%u", &ring_ref, + "port", "%i", &local_port, + NULL); + if (err) + goto out; + + if (ring_ref != dom->ring_ref) { + if (dom->page) + munmap(dom->page, getpagesize()); + dom->page = xc_map_foreign_range(xc, dom->domid, getpagesize(), + PROT_READ|PROT_WRITE, + (unsigned long)ring_ref); + if (dom->page == NULL) { + err = EINVAL; + goto out; + } + dom->ring_ref = ring_ref; + } + + if (local_port != dom->local_port) { + dom->local_port = -1; + if (dom->evtchn_fd != -1) + close(dom->evtchn_fd); + /* Opening evtchn independently for each console is a bit + * wastefule, but that's how the code is structured... */ + dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR); + if (dom->evtchn_fd == -1) { + err = errno; + goto out; + } + + if (ioctl(dom->evtchn_fd, EVENTCHN_BIND, local_port) == -1) { + err = errno; + close(dom->evtchn_fd); + dom->evtchn_fd = -1; + goto out; + } + dom->local_port = local_port; + } + + out: + return err; +} + +static bool watch_domain(struct domain *dom, bool watch) +{ + char domid_str[3 + MAX_STRLEN(dom->domid)]; + bool success; + + sprintf(domid_str, "dom%u", dom->domid); + if (watch) + success = xs_watch(xs, dom->conspath, domid_str); + else + success = xs_unwatch(xs, dom->conspath, domid_str); + if (success) + domain_create_ring(dom); + return success; } static struct domain *create_domain(int domid) { struct domain *dom; + char *s; dom = (struct domain *)malloc(sizeof(struct domain)); if (dom == NULL) { @@ -156,99 +299,145 @@ } dom->domid = domid; + + dom->conspath = xs_get_domain_path(xs, dom->domid); + if (dom->conspath == NULL) + goto out; + s = realloc(dom->conspath, strlen(dom->conspath) + + strlen("/console") + 1); + if (s == NULL) + goto out; + dom->conspath = s; + strcat(dom->conspath, "/console"); + dom->tty_fd = domain_create_tty(dom); dom->is_dead = false; dom->buffer.data = 0; dom->buffer.size = 0; dom->buffer.capacity = 0; dom->buffer.max_capacity = 0; - dom->next = 0; + dom->next = NULL; + + dom->ring_ref = -1; + dom->local_port = -1; + dom->page = NULL; + dom->evtchn_fd = -1; + + if (!watch_domain(dom, true)) + goto out; + + dom->next = dom_head; + dom_head = dom; dolog(LOG_DEBUG, "New domain %d", domid); return dom; + out: + if (dom->conspath) + free(dom->conspath); + free(dom); + return NULL; } static struct domain *lookup_domain(int domid) { + struct domain *dom; + + for (dom = dom_head; dom; dom = dom->next) + if (dom->domid == domid) + return dom; + return NULL; +} + +static void remove_domain(struct domain *dom) +{ struct domain **pp; + dolog(LOG_DEBUG, "Removing domain-%d", dom->domid); + for (pp = &dom_head; *pp; pp = &(*pp)->next) { - struct domain *dom = *pp; - - if (dom->domid == domid) { - return dom; - } else if (dom->domid > domid) { - *pp = create_domain(domid); - (*pp)->next = dom; - return *pp; - } - } - - *pp = create_domain(domid); - return *pp; -} - -static void remove_domain(struct domain *dom) -{ - struct domain **pp; - - dolog(LOG_DEBUG, "Removing domain-%d", dom->domid); - - for (pp = &dom_head; *pp; pp = &(*pp)->next) { - struct domain *d = *pp; - - if (dom->domid == d->domid) { - *pp = d->next; - if (d->buffer.data) { - free(d->buffer.data); - } - free(d); + if (dom == *pp) { + *pp = dom->next; + free(dom); break; } } } -static void remove_dead_domains(struct domain *dom) -{ - if (dom == NULL) return; - remove_dead_domains(dom->next); - - if (dom->is_dead) { - remove_domain(dom); +static void cleanup_domain(struct domain *d) +{ + if (!buffer_empty(&d->buffer)) + return; + + if (d->buffer.data) + free(d->buffer.data); + d->buffer.data = NULL; + if (d->tty_fd != -1) + close(d->tty_fd); + d->tty_fd = -1; + remove_domain(d); +} + +static void shutdown_domain(struct domain *d) +{ + d->is_dead = true; + watch_domain(d, false); + if (d->page) + munmap(d->page, getpagesize()); + d->page = NULL; + if (d->evtchn_fd != -1) + close(d->evtchn_fd); + d->evtchn_fd = -1; + cleanup_domain(d); +} + +void enum_domains(void) +{ + int domid = 1; + xc_dominfo_t dominfo; + struct domain *dom; + + while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) { + dom = lookup_domain(dominfo.domid); + if (dominfo.dying || dominfo.crashed || dominfo.shutdown) { + if (dom) + shutdown_domain(dom); + } else { + if (dom == NULL) + create_domain(dominfo.domid); + } + domid = dominfo.domid + 1; } } static void handle_tty_read(struct domain *dom) { ssize_t len; - xcs_msg_t msg; - - msg.type = XCS_REQUEST; - msg.u.control.remote_dom = dom->domid; - msg.u.control.msg.type = CMSG_CONSOLE; - msg.u.control.msg.subtype = CMSG_CONSOLE_DATA; - msg.u.control.msg.id = 1; - - len = read(dom->tty_fd, msg.u.control.msg.msg, 60); + char msg[80]; + struct ring_head *inring = + (struct ring_head *)(dom->page + PAGE_SIZE/2); + int i; + + len = read(dom->tty_fd, msg, MIN(XENCONS_SPACE(inring), sizeof(msg))); if (len < 1) { close(dom->tty_fd); + dom->tty_fd = -1; if (domain_is_valid(dom->domid)) { dom->tty_fd = domain_create_tty(dom); } else { - dom->is_dead = true; + shutdown_domain(dom); } } else if (domain_is_valid(dom->domid)) { - msg.u.control.msg.length = len; - - if (!write_sync(xcs_data_fd, &msg, sizeof(msg))) { - dolog(LOG_ERR, "Write to xcs failed: %m"); - exit(1); - } + for (i = 0; i < len; i++) { + inring->buf[XENCONS_IDX(inring->prod)] = msg[i]; + inring->prod++; + } + xc_evtchn_send(xc, dom->local_port); } else { close(dom->tty_fd); - dom->is_dead = true; + dom->tty_fd = -1; + shutdown_domain(dom); } } @@ -259,104 +448,105 @@ len = write(dom->tty_fd, dom->buffer.data, dom->buffer.size); if (len < 1) { close(dom->tty_fd); + dom->tty_fd = -1; if (domain_is_valid(dom->domid)) { dom->tty_fd = domain_create_tty(dom); } else { - dom->is_dead = true; + shutdown_domain(dom); } } else { buffer_advance(&dom->buffer, len); } } -static void handle_xcs_msg(int fd) -{ - xcs_msg_t msg; - - if (!read_sync(fd, &msg, sizeof(msg))) { - dolog(LOG_ERR, "read from xcs failed! %m"); - exit(1); - } else if (msg.type == XCS_REQUEST) { - struct domain *dom; - - dom = lookup_domain(msg.u.control.remote_dom); - buffer_append(&dom->buffer, - msg.u.control.msg.msg, - msg.u.control.msg.length); - } -} - -static void enum_domains(void) -{ - int domid = 0; - xc_dominfo_t dominfo; - - while (xc_domain_getinfo(xc, domid, 1, &dominfo) == 1) { - lookup_domain(dominfo.domid); - domid = dominfo.domid + 1; - } +static void handle_ring_read(struct domain *dom) +{ + u16 v; + + if (!read_sync(dom->evtchn_fd, &v, sizeof(v))) + return; + + buffer_append(dom); + + (void)write_sync(dom->evtchn_fd, &v, sizeof(v)); +} + +static void handle_xs(int fd) +{ + char **vec; + int domid; + struct domain *dom; + + vec = xs_read_watch(xs); + if (!vec) + return; + + if (!strcmp(vec[1], "domlist")) + enum_domains(); + else if (sscanf(vec[1], "dom%u", &domid) == 1) { + dom = lookup_domain(domid); + if (dom->is_dead == false) + domain_create_ring(dom); + } + + xs_acknowledge_watch(xs, vec[1]); + free(vec); } void handle_io(void) { fd_set readfds, writefds; int ret; - int max_fd = -1; - int num_of_writes = 0; do { - struct domain *d; - struct timeval tv = { 1, 0 }; + struct domain *d, *n; + struct timeval tv = { 100, 0 }; + int max_fd = -1; FD_ZERO(&readfds); FD_ZERO(&writefds); - FD_SET(xcs_data_fd, &readfds); - max_fd = MAX(xcs_data_fd, max_fd); + FD_SET(xs_fileno(xs), &readfds); + max_fd = MAX(xs_fileno(xs), max_fd); for (d = dom_head; d; d = d->next) { + if (d->evtchn_fd != -1) { + FD_SET(d->evtchn_fd, &readfds); + max_fd = MAX(d->evtchn_fd, max_fd); + } + if (d->tty_fd != -1) { - FD_SET(d->tty_fd, &readfds); + if (!d->is_dead) + FD_SET(d->tty_fd, &readfds); + + if (!buffer_empty(&d->buffer)) + FD_SET(d->tty_fd, &writefds); + max_fd = MAX(d->tty_fd, max_fd); } - - if (d->tty_fd != -1 && !buffer_empty(&d->buffer)) { - FD_SET(d->tty_fd, &writefds); + } + + ret = select(max_fd + 1, &readfds, &writefds, 0, &tv); + + if (FD_ISSET(xs_fileno(xs), &readfds)) + handle_xs(xs_fileno(xs)); + + for (d = dom_head; d; d = n) { + n = d->next; + if (d->evtchn_fd != -1 && + FD_ISSET(d->evtchn_fd, &readfds)) + handle_ring_read(d); + + if (d->tty_fd != -1) { + if (FD_ISSET(d->tty_fd, &readfds)) + handle_tty_read(d); + + if (FD_ISSET(d->tty_fd, &writefds)) + handle_tty_write(d); + + if (d->is_dead) + cleanup_domain(d); } - - max_fd = MAX(d->tty_fd, max_fd); - } - - ret = select(max_fd + 1, &readfds, &writefds, 0, &tv); - if (tv.tv_sec == 1 && (++num_of_writes % 100) == 0) { -#if 0 - /* FIXME */ - /* This is a nasty hack. xcs does not handle the - control channels filling up well at all. We'll - throttle ourselves here since we do proper - queueing to give the domains a shot at pulling out - the data. Fixing xcs is not worth it as it's - going away */ - tv.tv_usec = 1000; - select(0, 0, 0, 0, &tv); -#endif - } - enum_domains(); - - if (FD_ISSET(xcs_data_fd, &readfds)) { - handle_xcs_msg(xcs_data_fd); - } - - for (d = dom_head; d; d = d->next) { - if (!d->is_dead && FD_ISSET(d->tty_fd, &readfds)) { - handle_tty_read(d); - } - - if (!d->is_dead && FD_ISSET(d->tty_fd, &writefds)) { - handle_tty_write(d); - } - } - - remove_dead_domains(dom_head); + } } while (ret > -1); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/io.h --- a/tools/console/daemon/io.h Thu Sep 8 15:18:40 2005 +++ b/tools/console/daemon/io.h Fri Sep 9 16:30:54 2005 @@ -21,6 +21,7 @@ #ifndef CONSOLED_IO_H #define CONSOLED_IO_H +void enum_domains(void); void handle_io(void); #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/main.c --- a/tools/console/daemon/main.c Thu Sep 8 15:18:40 2005 +++ b/tools/console/daemon/main.c Fri Sep 9 16:30:54 2005 @@ -26,8 +26,6 @@ #include <sys/types.h> #include "xenctrl.h" -#include "xen/io/domain_controller.h" -#include "xcs_proto.h" #include "utils.h" #include "io.h" @@ -83,7 +81,10 @@ daemonize("/var/run/xenconsoled.pid"); } - xen_setup(); + if (!xen_setup()) + exit(1); + + enum_domains(); handle_io(); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/utils.c --- a/tools/console/daemon/utils.c Thu Sep 8 15:18:40 2005 +++ b/tools/console/daemon/utils.c Fri Sep 9 16:30:54 2005 @@ -35,15 +35,11 @@ #include "xenctrl.h" #include "xen/io/domain_controller.h" -#include "xcs_proto.h" #include "utils.h" struct xs_handle *xs; int xc; - -int xcs_ctrl_fd = -1; -int xcs_data_fd = -1; bool _read_write_sync(int fd, void *data, size_t size, bool do_read) { @@ -69,32 +65,6 @@ } return true; -} - -static int open_domain_socket(const char *path) -{ - struct sockaddr_un addr; - int sock; - size_t addr_len; - - if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) { - goto out; - } - - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, path); - addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1; - - if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) { - goto out_close_sock; - } - - return sock; - - out_close_sock: - close(sock); - out: - return -1; } static void child_exit(int sig) @@ -155,34 +125,8 @@ signal(SIGTTIN, SIG_IGN); } -/* synchronized send/recv strictly for setting up xcs */ -/* always use asychronize callbacks any other time */ -static bool xcs_send_recv(int fd, xcs_msg_t *msg) -{ - bool ret = false; - - if (!write_sync(fd, msg, sizeof(*msg))) { - dolog(LOG_ERR, "Write failed at %s:%s():L%d? Possible bug.", - __FILE__, __FUNCTION__, __LINE__); - goto out; - } - - if (!read_sync(fd, msg, sizeof(*msg))) { - dolog(LOG_ERR, "Read failed at %s:%s():L%d? Possible bug.", - __FILE__, __FUNCTION__, __LINE__); - goto out; - } - - ret = true; - - out: - return ret; -} - bool xen_setup(void) { - int sock; - xcs_msg_t msg; xs = xs_daemon_open(); if (xs == NULL) { @@ -197,57 +141,23 @@ goto out; } - sock = open_domain_socket(XCS_SUN_PATH); - if (sock == -1) { - dolog(LOG_ERR, "Failed to contact xcs (%m). Is it running?"); - goto out_close_store; + if (!xs_watch(xs, "@introduceDomain", "domlist")) { + dolog(LOG_ERR, "xenstore watch on @introduceDomain fails."); + goto out; } - xcs_ctrl_fd = sock; - - sock = open_domain_socket(XCS_SUN_PATH); - if (sock == -1) { - dolog(LOG_ERR, "Failed to contact xcs (%m). Is it running?"); - goto out_close_ctrl; - } - - xcs_data_fd = sock; - - memset(&msg, 0, sizeof(msg)); - msg.type = XCS_CONNECT_CTRL; - if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) { - dolog(LOG_ERR, "xcs control connect failed. Possible bug."); - goto out_close_data; + if (!xs_watch(xs, "@releaseDomain", "domlist")) { + dolog(LOG_ERR, "xenstore watch on @releaseDomain fails."); + goto out; } - msg.type = XCS_CONNECT_DATA; - if (!xcs_send_recv(xcs_data_fd, &msg) || msg.result != XCS_RSLT_OK) { - dolog(LOG_ERR, "xcs data connect failed. Possible bug."); - goto out_close_data; - } - - /* Since the vast majority of control messages are console messages - it's just easier to ignore other messages that try to bind to - a specific type. */ - msg.type = XCS_MSG_BIND; - msg.u.bind.port = PORT_WILDCARD; - msg.u.bind.type = TYPE_WILDCARD; - if (!xcs_send_recv(xcs_ctrl_fd, &msg) || msg.result != XCS_RSLT_OK) { - dolog(LOG_ERR, "xcs vind failed. Possible bug."); - goto out_close_data; - } - return true; - out_close_data: - close(xcs_ctrl_fd); - xcs_data_fd = -1; - out_close_ctrl: - close(xcs_ctrl_fd); - xcs_ctrl_fd = -1; - out_close_store: - xs_daemon_close(xs); out: + if (xs) + xs_daemon_close(xs); + if (xc != -1) + xc_interface_close(xc); return false; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/console/daemon/utils.h --- a/tools/console/daemon/utils.h Thu Sep 8 15:18:40 2005 +++ b/tools/console/daemon/utils.h Fri Sep 9 16:30:54 2005 @@ -33,13 +33,15 @@ #define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false) bool _read_write_sync(int fd, void *data, size_t size, bool do_read); -extern int xcs_ctrl_fd; -extern int xcs_data_fd; extern struct xs_handle *xs; extern int xc; #if 1 -#define dolog(val, fmt, ...) syslog(val, fmt, ## __VA_ARGS__) +#define dolog(val, fmt, ...) do { \ + if ((val) == LOG_ERR) \ + fprintf(stderr, fmt "\n", ## __VA_ARGS__); \ + syslog(val, fmt, ## __VA_ARGS__); \ +} while (/* CONSTCOND */0) #else #define dolog(val, fmt, ...) fprintf(stderr, fmt "\n", ## __VA_ARGS__) #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/Makefile --- a/tools/examples/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/examples/Makefile Fri Sep 9 16:30:54 2005 @@ -1,3 +1,6 @@ +XEN_ROOT = ../../ +include $(XEN_ROOT)/tools/Rules.mk + INSTALL = install INSTALL_DIR = $(INSTALL) -d -m0755 INSTALL_PROG = $(INSTALL) -m0755 @@ -24,8 +27,8 @@ XEN_BOOT_DIR = /usr/lib/xen/boot XEN_BOOT = mem-map.sxp -XEN_HOTPLUG_DIR = /etc/hotplug.d/xen-backend -XEN_HOTPLUG_SCRIPTS = backend.hotplug +XEN_HOTPLUG_DIR = /etc/hotplug +XEN_HOTPLUG_SCRIPTS = xen-backend.agent all: build: @@ -68,7 +71,7 @@ [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \ $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR) for i in $(XEN_HOTPLUG_SCRIPTS); \ - do [ -a $(DESTDIR)$(XEN_HOTPLUG_DIR)/$$i ] || \ + do \ $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \ done diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/init.d/xend --- a/tools/examples/init.d/xend Thu Sep 8 15:18:40 2005 +++ b/tools/examples/init.d/xend Fri Sep 9 16:30:54 2005 @@ -11,7 +11,7 @@ exit 0 fi -# Wait for Xend and xcs to be up +# Wait for Xend to be up function await_daemons_up { i=1 diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/vif-bridge --- a/tools/examples/vif-bridge Thu Sep 8 15:18:40 2005 +++ b/tools/examples/vif-bridge Fri Sep 9 16:30:54 2005 @@ -80,7 +80,7 @@ fi ifconfig ${vif} $OP -if [ ${ip} ] ; then +if [ "${ip}" ] ; then # If we've been given a list of IP networks, allow pkts with these src addrs. for addr in ${ip} ; do diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/vif-route --- a/tools/examples/vif-route Thu Sep 8 15:18:40 2005 +++ b/tools/examples/vif-route Fri Sep 9 16:30:54 2005 @@ -63,7 +63,7 @@ ;; esac -if [ ${ip} ] ; then +if [ "${ip}" ] ; then # If we've been given a list of IP networks, allow pkts with these src addrs. for addr in ${ip} ; do diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample.vmx --- a/tools/examples/xmexample.vmx Thu Sep 8 15:18:40 2005 +++ b/tools/examples/xmexample.vmx Fri Sep 9 16:30:54 2005 @@ -73,6 +73,10 @@ vnc=1 #---------------------------------------------------------------------------- +# enable spawning vncviewer(only valid when vnc=1), default = 1 +vncviewer=1 + +#---------------------------------------------------------------------------- # no graphics, use serial port #nographic=0 diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample1 --- a/tools/examples/xmexample1 Thu Sep 8 15:18:40 2005 +++ b/tools/examples/xmexample1 Fri Sep 9 16:30:54 2005 @@ -48,6 +48,20 @@ disk = [ 'phy:hda1,hda1,w' ] #---------------------------------------------------------------------------- +# Define to which TPM instance the user domain should communicate. +# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM' +# where INSTANCE indicates the instance number of the TPM the VM +# should be talking to and DOM provides the domain where the backend +# is located. +# Note that no two virtual machines should try to connect to the same +# TPM instance. The handling of all TPM instances does require +# some management effort in so far that VM configration files (and thus +# a VM) should be associated with a TPM instance throughout the lifetime +# of the VM / VM configuration file. The instance number must be +# greater or equal to 1. +#vtpm = [ 'instance=1,backend=0' ] + +#---------------------------------------------------------------------------- # Set the kernel command line for the new domain. # You only need to define the IP parameters and hostname if the domain's # IP config doesn't, e.g. in ifcfg-eth0 or via DHCP. diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample2 --- a/tools/examples/xmexample2 Thu Sep 8 15:18:40 2005 +++ b/tools/examples/xmexample2 Fri Sep 9 16:30:54 2005 @@ -84,6 +84,20 @@ 'phy:sda6,sda6,r' ] #---------------------------------------------------------------------------- +# Define to which TPM instance the user domain should communicate. +# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM' +# where INSTANCE indicates the instance number of the TPM the VM +# should be talking to and DOM provides the domain where the backend +# is located. +# Note that no two virtual machines should try to connect to the same +# TPM instance. The handling of all TPM instances does require +# some management effort in so far that VM configration files (and thus +# a VM) should be associated with a TPM instance throughout the lifetime +# of the VM / VM configuration file. The instance number must be +# greater or equal to 1. +#vtpm = ['instance=%d,backend=0' % (vmid) ] + +#---------------------------------------------------------------------------- # Set the kernel command line for the new domain. # You only need to define the IP parameters and hostname if the domain's # IP config doesn't, e.g. in ifcfg-eth0 or via DHCP. diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xmexample3 --- a/tools/examples/xmexample3 Thu Sep 8 15:18:40 2005 +++ b/tools/examples/xmexample3 Fri Sep 9 16:30:54 2005 @@ -80,6 +80,20 @@ disk = [ 'phy:hda%d,hda1,w' % (vmid)] #---------------------------------------------------------------------------- +# Define to which TPM instance the user domain should communicate. +# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM' +# where INSTANCE indicates the instance number of the TPM the VM +# should be talking to and DOM provides the domain where the backend +# is located. +# Note that no two virtual machines should try to connect to the same +# TPM instance. The handling of all TPM instances does require +# some management effort in so far that VM configration files (and thus +# a VM) should be associated with a TPM instance throughout the lifetime +# of the VM / VM configuration file. The instance number must be +# greater or equal to 1. +#vtpm = ['instance=%d,backend=0' % (vmid) ] + +#---------------------------------------------------------------------------- # Set the kernel command line for the new domain. # You only need to define the IP parameters and hostname if the domain's # IP config doesn't, e.g. in ifcfg-eth0 or via DHCP. diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/rombios/rombios.c --- a/tools/firmware/rombios/rombios.c Thu Sep 8 15:18:40 2005 +++ b/tools/firmware/rombios/rombios.c Fri Sep 9 16:30:54 2005 @@ -31,7 +31,7 @@ // Xen full virtualization does not handle unaligned IO with page crossing. // Disable 32-bit PIO as a workaround. -#define NO_PIO32 +#undef NO_PIO32 // ROM BIOS compatability entry points: diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/Makefile --- a/tools/firmware/vmxassist/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/firmware/vmxassist/Makefile Fri Sep 9 16:30:54 2005 @@ -24,7 +24,7 @@ # The emulator code lives in ROM space TEXTADDR=0x000D0000 -DEFINES=-DDEBUG -DENABLE_VME -DTEXTADDR=${TEXTADDR} +DEFINES=-DDEBUG -DTEXTADDR=${TEXTADDR} XENINC=-I$(XEN_ROOT)/xen/include -I$(XEN_ROOT)/tools/libxc #DEFINES=-DDEBUG -DTEST -DTEXTADDR=${TEXTADDR} #XENINC=-I/home/leendert/xen/xeno-unstable.bk/xen/include diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/setup.c --- a/tools/firmware/vmxassist/setup.c Thu Sep 8 15:18:40 2005 +++ b/tools/firmware/vmxassist/setup.c Fri Sep 9 16:30:54 2005 @@ -353,7 +353,7 @@ #endif setup_gdt(); setup_idt(); -#ifdef ENABLE_VME +#ifndef TEST set_cr4(get_cr4() | CR4_VME); #endif setup_ctx(); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Thu Sep 8 15:18:40 2005 +++ b/tools/firmware/vmxassist/vm86.c Fri Sep 9 16:30:54 2005 @@ -465,8 +465,7 @@ * Emulate a segment load in protected mode */ int -load_seg(unsigned long sel, unsigned long *base, unsigned long *limit, - union vmcs_arbytes *arbytes) +load_seg(unsigned long sel, u32 *base, u32 *limit, union vmcs_arbytes *arbytes) { unsigned long long entry; diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/vmxloader.c --- a/tools/firmware/vmxassist/vmxloader.c Thu Sep 8 15:18:40 2005 +++ b/tools/firmware/vmxassist/vmxloader.c Fri Sep 9 16:30:54 2005 @@ -110,8 +110,8 @@ } #ifdef _ACPI_ puts("Loading ACPI ...\n"); - if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){ - /* make sure acpi table does not overlap rombios + if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){ + /* make sure acpi table does not overlap rombios * currently acpi less than 8K will be OK. */ memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi)); @@ -122,5 +122,6 @@ memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist)); puts("Go ...\n"); ((void (*)())TEXTADDR)(); + return 0; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/cpu-all.h --- a/tools/ioemu/cpu-all.h Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/cpu-all.h Fri Sep 9 16:30:54 2005 @@ -672,6 +672,8 @@ int cpu_memory_rw_debug(CPUState *env, target_ulong addr, uint8_t *buf, int len, int is_write); +#define VGA_DIRTY_FLAG 0x01 + /* read dirty bit (return 0 or 1) */ static inline int cpu_physical_memory_is_dirty(target_ulong addr) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/exec.c --- a/tools/ioemu/exec.c Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/exec.c Fri Sep 9 16:30:54 2005 @@ -142,6 +142,10 @@ #else setvbuf(logfile, NULL, _IOLBF, 0); #endif +/* + stdout = logfile; + stderr = logfile; +*/ } } @@ -386,9 +390,6 @@ io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val); l = 2; } else { - if (l!=1){ - fprintf(logfile, "ERROR 8 bit mmio\n"); - } /* 8 bit access */ val = ldub_raw(buf); io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val); @@ -461,4 +462,14 @@ void cpu_physical_memory_reset_dirty(target_ulong start, target_ulong end) { -} + uint8_t *p; + int len; + + if ((len = (end - start)) <= 0) + return; + p = phys_ram_dirty + (start >> TARGET_PAGE_BITS); + len = len >> TARGET_PAGE_BITS; + while (len > 0) + p[--len] &= ~VGA_DIRTY_FLAG; + return; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/hw/pc.c Fri Sep 9 16:30:54 2005 @@ -540,7 +540,10 @@ if (pci_enabled) { for(i = 0; i < nb_nics; i++) { - pci_ne2000_init(pci_bus, &nd_table[i]); + if (nic_pcnet) + pci_pcnet_init(pci_bus, &nd_table[i]); + else + pci_ne2000_init(pci_bus, &nd_table[i]); } pci_piix3_ide_init(pci_bus, bs_table); #ifdef APIC_SUPPORT diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/hw/vga.c Fri Sep 9 16:30:54 2005 @@ -1620,7 +1620,6 @@ static void vga_save(QEMUFile *f, void *opaque) { VGAState *s = opaque; - int i; qemu_put_be32s(f, &s->latch); qemu_put_8s(f, &s->sr_index); @@ -1661,7 +1660,7 @@ static int vga_load(QEMUFile *f, void *opaque, int version_id) { VGAState *s = opaque; - int is_vbe, i; + int is_vbe; if (version_id != 1) return -EINVAL; diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/target-i386-dm/Makefile --- a/tools/ioemu/target-i386-dm/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/target-i386-dm/Makefile Fri Sep 9 16:30:54 2005 @@ -272,7 +272,7 @@ # Hardware support VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pc.o port-e9.o -VL_OBJS+= cirrus_vga.o +VL_OBJS+= cirrus_vga.o pcnet.o ifeq ($(TARGET_ARCH), ppc) VL_OBJS+= ppc.o ide.o ne2000.o pckbd.o vga.o $(SOUND_HW) dma.o $(AUDIODRV) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/target-i386-dm/helper2.c --- a/tools/ioemu/target-i386-dm/helper2.c Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/target-i386-dm/helper2.c Fri Sep 9 16:30:54 2005 @@ -169,133 +169,217 @@ unsigned long do_inp(CPUState *env, unsigned long addr, unsigned long size) { - switch(size) { - case 1: - return cpu_inb(env, addr); - case 2: - return cpu_inw(env, addr); - case 4: - return cpu_inl(env, addr); - default: - fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size); - exit(-1); - } + switch(size) { + case 1: + return cpu_inb(env, addr); + case 2: + return cpu_inw(env, addr); + case 4: + return cpu_inl(env, addr); + default: + fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size); + exit(-1); + } } void do_outp(CPUState *env, unsigned long addr, unsigned long size, unsigned long val) { - switch(size) { - case 1: - return cpu_outb(env, addr, val); - case 2: - return cpu_outw(env, addr, val); - case 4: - return cpu_outl(env, addr, val); - default: - fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size); - exit(-1); - } + switch(size) { + case 1: + return cpu_outb(env, addr, val); + case 2: + return cpu_outw(env, addr, val); + case 4: + return cpu_outl(env, addr, val); + default: + fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size); + exit(-1); + } } extern void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write); static inline void -read_physical(target_phys_addr_t addr, unsigned long size, void *val) -{ - return cpu_physical_memory_rw(addr, val, size, 0); +read_physical(u64 addr, unsigned long size, void *val) +{ + return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 0); } static inline void -write_physical(target_phys_addr_t addr, unsigned long size, void *val) -{ - return cpu_physical_memory_rw(addr, val, size, 1); -} - -//send the ioreq to device model -void cpu_dispatch_ioreq(CPUState *env, ioreq_t *req) -{ - int i; - int sign; - - sign = (req->df) ? -1 : 1; - - if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { - if (req->size != 4) { - // Bochs expects higher bits to be 0 - req->u.data &= (1UL << (8 * req->size))-1; - } - } - - if (req->port_mm == 0){//port io - if(req->dir == IOREQ_READ){//read - if (!req->pdata_valid) { - req->u.data = do_inp(env, req->addr, req->size); - } else { - unsigned long tmp; - - for (i = 0; i < req->count; i++) { - tmp = do_inp(env, req->addr, req->size); - write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), - req->size, &tmp); - } - } - } else if(req->dir == IOREQ_WRITE) { - if (!req->pdata_valid) { - do_outp(env, req->addr, req->size, req->u.data); - } else { - for (i = 0; i < req->count; i++) { - unsigned long tmp; - - read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, - &tmp); - do_outp(env, req->addr, req->size, tmp); - } - } - - } - } else if (req->port_mm == 1){//memory map io +write_physical(u64 addr, unsigned long size, void *val) +{ + return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 1); +} + +void +cpu_ioreq_pio(CPUState *env, ioreq_t *req) +{ + int i, sign; + + sign = req->df ? -1 : 1; + + if (req->dir == IOREQ_READ) { if (!req->pdata_valid) { - //handle stos - if(req->dir == IOREQ_READ) { //read - for (i = 0; i < req->count; i++) { - read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &req->u.data); - } - } else if(req->dir == IOREQ_WRITE) { //write - for (i = 0; i < req->count; i++) { - write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &req->u.data); - } - } + req->u.data = do_inp(env, req->addr, req->size); } else { - //handle movs - unsigned long tmp; - if (req->dir == IOREQ_READ) { - for (i = 0; i < req->count; i++) { - read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &tmp); - write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, &tmp); - } - } else if (req->dir == IOREQ_WRITE) { - for (i = 0; i < req->count; i++) { - read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, &tmp); - write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &tmp); - } - } - } - } - /* No state change if state = STATE_IORESP_HOOK */ - if (req->state == STATE_IOREQ_INPROCESS) - req->state = STATE_IORESP_READY; - env->send_event = 1; + unsigned long tmp; + + for (i = 0; i < req->count; i++) { + tmp = do_inp(env, req->addr, req->size); + write_physical((target_phys_addr_t) req->u.pdata + + (sign * i * req->size), + req->size, &tmp); + } + } + } else if (req->dir == IOREQ_WRITE) { + if (!req->pdata_valid) { + do_outp(env, req->addr, req->size, req->u.data); + } else { + for (i = 0; i < req->count; i++) { + unsigned long tmp; + + read_physical((target_phys_addr_t) req->u.pdata + + (sign * i * req->size), + req->size, &tmp); + do_outp(env, req->addr, req->size, tmp); + } + } + } +} + +void +cpu_ioreq_move(CPUState *env, ioreq_t *req) +{ + int i, sign; + + sign = req->df ? -1 : 1; + + if (!req->pdata_valid) { + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + read_physical(req->addr + + (sign * i * req->size), + req->size, &req->u.data); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + write_physical(req->addr + + (sign * i * req->size), + req->size, &req->u.data); + } + } + } else { + unsigned long tmp; + + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + read_physical(req->addr + + (sign * i * req->size), + req->size, &tmp); + write_physical((target_phys_addr_t )req->u.pdata + + (sign * i * req->size), + req->size, &tmp); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + read_physical((target_phys_addr_t) req->u.pdata + + (sign * i * req->size), + req->size, &tmp); + write_physical(req->addr + + (sign * i * req->size), + req->size, &tmp); + } + } + } +} + +void +cpu_ioreq_and(CPUState *env, ioreq_t *req) +{ + unsigned long tmp1, tmp2; + + if (req->pdata_valid != 0) + hw_error("expected scalar value"); + + read_physical(req->addr, req->size, &tmp1); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 & (unsigned long) req->u.data; + write_physical(req->addr, req->size, &tmp2); + } + req->u.data = tmp1; +} + +void +cpu_ioreq_or(CPUState *env, ioreq_t *req) +{ + unsigned long tmp1, tmp2; + + if (req->pdata_valid != 0) + hw_error("expected scalar value"); + + read_physical(req->addr, req->size, &tmp1); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 | (unsigned long) req->u.data; + write_physical(req->addr, req->size, &tmp2); + } + req->u.data = tmp1; +} + +void +cpu_ioreq_xor(CPUState *env, ioreq_t *req) +{ + unsigned long tmp1, tmp2; + + if (req->pdata_valid != 0) + hw_error("expected scalar value"); + + read_physical(req->addr, req->size, &tmp1); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 ^ (unsigned long) req->u.data; + write_physical(req->addr, req->size, &tmp2); + } + req->u.data = tmp1; } void cpu_handle_ioreq(CPUState *env) { ioreq_t *req = cpu_get_ioreq(); - if (req) - cpu_dispatch_ioreq(env, req); + + if (req) { + if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { + if (req->size != 4) + req->u.data &= (1UL << (8 * req->size))-1; + } + + switch (req->type) { + case IOREQ_TYPE_PIO: + cpu_ioreq_pio(env, req); + break; + case IOREQ_TYPE_COPY: + cpu_ioreq_move(env, req); + break; + case IOREQ_TYPE_AND: + cpu_ioreq_and(env, req); + break; + case IOREQ_TYPE_OR: + cpu_ioreq_or(env, req); + break; + case IOREQ_TYPE_XOR: + cpu_ioreq_xor(env, req); + break; + default: + hw_error("Invalid ioreq type 0x%x", req->type); + } + + /* No state change if state = STATE_IORESP_HOOK */ + if (req->state == STATE_IOREQ_INPROCESS) + req->state = STATE_IORESP_READY; + env->send_event = 1; + } } void @@ -321,7 +405,7 @@ // Send a message on the event channel. Add the vector to the shared mem // page. - intr = &(shared_page->sp_global.pic_intr[0]); + intr = (unsigned long *) &(shared_page->sp_global.pic_intr[0]); atomic_set_bit(vector, intr); if (loglevel & CPU_LOG_INT) fprintf(logfile, "injecting vector: %x\n", vector); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/vl.c Fri Sep 9 16:30:54 2005 @@ -125,6 +125,7 @@ QEMUTimer *polling_timer; int vm_running; int audio_enabled = 0; +int nic_pcnet = 1; int sb16_enabled = 1; int adlib_enabled = 1; int gus_enabled = 1; @@ -412,6 +413,11 @@ fprintf(stderr, "qemu: hardware error: "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); + if (logfile) { + fprintf(logfile, "qemu: hardware error: "); + vfprintf(logfile, fmt, ap); + fprintf(logfile, "\n"); + } va_end(ap); abort(); } @@ -2115,6 +2121,7 @@ "-prep Simulate a PREP system (default is PowerMAC)\n" "-g WxH[xDEPTH] Set the initial VGA graphic mode\n" #endif + "-nic-pcnet simulate an AMD PC-Net PCI ethernet adaptor\n" "\n" "Network options:\n" "-nics n simulate 'n' network cards [default=1]\n" @@ -2229,6 +2236,7 @@ QEMU_OPTION_L, QEMU_OPTION_no_code_copy, QEMU_OPTION_pci, + QEMU_OPTION_nic_pcnet, QEMU_OPTION_isa, QEMU_OPTION_prep, QEMU_OPTION_k, @@ -2313,6 +2321,7 @@ /* temporary options */ { "pci", 0, QEMU_OPTION_pci }, + { "nic-pcnet", 0, QEMU_OPTION_nic_pcnet }, { "cirrusvga", 0, QEMU_OPTION_cirrusvga }, { NULL }, }; @@ -2639,6 +2648,9 @@ break; case QEMU_OPTION_pci: pci_enabled = 1; + break; + case QEMU_OPTION_nic_pcnet: + nic_pcnet = 1; break; case QEMU_OPTION_isa: pci_enabled = 0; diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/vl.h Fri Sep 9 16:30:54 2005 @@ -600,6 +600,12 @@ void isa_ne2000_init(int base, int irq, NetDriverState *nd); void pci_ne2000_init(PCIBus *bus, NetDriverState *nd); +/* pcnet.c */ + +extern int nic_pcnet; + +void pci_pcnet_init(PCIBus *bus, NetDriverState *nd); + /* pckbd.c */ void kbd_init(void); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/Makefile Fri Sep 9 16:30:54 2005 @@ -139,7 +139,7 @@ libxenguest.so.$(MAJOR): libxenguest.so.$(MAJOR).$(MINOR) ln -sf $< $@ -libxenguest.so.$(MAJOR).$(MINOR): $(PIC_BUILD_OBJS) +libxenguest.so.$(MAJOR).$(MINOR): $(PIC_BUILD_OBJS) libxenctrl.so $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-soname -Wl,libxenguest.so.$(MAJOR) -shared -o $@ $^ -lz -lxenctrl -include $(DEPS) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_core.c Fri Sep 9 16:30:54 2005 @@ -2,6 +2,7 @@ #define ELFSIZE 32 #include "xc_elf.h" #include <stdlib.h> +#include <unistd.h> #include <zlib.h> /* number of pages to write at a time */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_domain.c Fri Sep 9 16:30:54 2005 @@ -7,6 +7,7 @@ */ #include "xc_private.h" +#include <xen/memory.h> int xc_domain_create(int xc_handle, u32 ssidref, @@ -261,19 +262,66 @@ int xc_domain_memory_increase_reservation(int xc_handle, u32 domid, - unsigned int mem_kb) + unsigned long nr_extents, + unsigned int extent_order, + unsigned int address_bits, + unsigned long *extent_start) { int err; - unsigned int npages = mem_kb / (PAGE_SIZE/1024); - - err = xc_dom_mem_op(xc_handle, MEMOP_increase_reservation, NULL, - npages, 0, domid); - if (err == npages) + struct xen_memory_reservation reservation = { + .extent_start = extent_start, /* may be NULL */ + .nr_extents = nr_extents, + .extent_order = extent_order, + .address_bits = address_bits, + .domid = domid + }; + + err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation); + if (err == nr_extents) return 0; if (err > 0) { + fprintf(stderr,"Failed alocation for dom %d : %ld pages order %d addr_bits %d\n", + domid, nr_extents, extent_order, address_bits); errno = ENOMEM; err = -1; } return err; } + +int xc_domain_memory_decrease_reservation(int xc_handle, + u32 domid, + unsigned long nr_extents, + unsigned int extent_order, + unsigned long *extent_start) +{ + int err; + struct xen_memory_reservation reservation = { + .extent_start = extent_start, + .nr_extents = nr_extents, + .extent_order = extent_order, + .address_bits = 0, + .domid = domid + }; + + if (extent_start == NULL) + { + fprintf(stderr,"decrease_reservation extent_start is NULL!\n"); + errno = EINVAL; + err = -1; + goto out; + } + + err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation); + if (err == nr_extents) + return 0; + + if (err > 0) { + fprintf(stderr,"Failed de-alocation for dom %d : %ld pages order %d\n", + domid, nr_extents, extent_order); + errno = EBUSY; + err = -1; + } +out: + return err; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_linux_build.c Fri Sep 9 16:30:54 2005 @@ -17,6 +17,7 @@ #include "xc_elf.h" #include "xc_aout9.h" #include <stdlib.h> +#include <unistd.h> #include <zlib.h> #if defined(__i386__) @@ -56,7 +57,7 @@ } #define alloc_pt(ltab, vltab) \ - ltab = page_array[ppt_alloc++] << PAGE_SHIFT; \ + ltab = (unsigned long long)(page_array[ppt_alloc++]) << PAGE_SHIFT; \ if (vltab != NULL) { \ munmap(vltab, PAGE_SIZE); \ } \ @@ -127,18 +128,37 @@ l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL; l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL; l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL; - unsigned long l1tab = 0; - unsigned long l2tab = 0; - unsigned long l3tab = 0; + unsigned long long l1tab = 0; + unsigned long long l2tab = 0; + unsigned long long l3tab = 0; unsigned long ppt_alloc; unsigned long count; /* First allocate page for page dir. */ ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT; + + if ( page_array[ppt_alloc] > 0xfffff ) + { + unsigned long nmfn; + nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] ); + if ( nmfn == 0 ) + { + fprintf(stderr, "Couldn't get a page below 4GB :-(\n"); + goto error_out; + } + page_array[ppt_alloc] = nmfn; + } + alloc_pt(l3tab, vl3tab); vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)]; ctxt->ctrlreg[3] = l3tab; - + + if(l3tab>0xfffff000ULL) + { + fprintf(stderr,"L3TAB = %llx above 4GB!\n",l3tab); + goto error_out; + } + for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++) { if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) ) @@ -274,7 +294,6 @@ unsigned long *pvss, vcpu_guest_context_t *ctxt, const char *cmdline, unsigned long shared_info_frame, - unsigned int control_evtchn, unsigned long flags, unsigned int vcpus, unsigned int store_evtchn, unsigned long *store_mfn) @@ -332,10 +351,10 @@ unsigned long *pvss, vcpu_guest_context_t *ctxt, const char *cmdline, unsigned long shared_info_frame, - unsigned int control_evtchn, unsigned long flags, unsigned int vcpus, - unsigned int store_evtchn, unsigned long *store_mfn) + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn) { unsigned long *page_array = NULL; unsigned long count, i; @@ -346,7 +365,7 @@ unsigned long nr_pt_pages; unsigned long physmap_pfn; - u32 *physmap, *physmap_e; + unsigned long *physmap, *physmap_e; struct load_funcs load_funcs; struct domain_setup_info dsi; @@ -358,6 +377,8 @@ unsigned long vstartinfo_end; unsigned long vstoreinfo_start; unsigned long vstoreinfo_end; + unsigned long vconsole_start; + unsigned long vconsole_end; unsigned long vstack_start; unsigned long vstack_end; unsigned long vpt_start; @@ -391,16 +412,18 @@ vinitrd_end = vinitrd_start + initrd_len; vphysmap_start = round_pgup(vinitrd_end); vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); - vstoreinfo_start = round_pgup(vphysmap_end); + vstartinfo_start = round_pgup(vphysmap_end); + vstartinfo_end = vstartinfo_start + PAGE_SIZE; + vstoreinfo_start = vstartinfo_end; vstoreinfo_end = vstoreinfo_start + PAGE_SIZE; - vpt_start = vstoreinfo_end; + vconsole_start = vstoreinfo_end; + vconsole_end = vconsole_start + PAGE_SIZE; + vpt_start = vconsole_end; for ( nr_pt_pages = 2; ; nr_pt_pages++ ) { vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); - vstartinfo_start = vpt_end; - vstartinfo_end = vstartinfo_start + PAGE_SIZE; - vstack_start = vstartinfo_end; + vstack_start = vpt_end; vstack_end = vstack_start + PAGE_SIZE; v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); if ( (v_end - vstack_end) < (512UL << 10) ) @@ -436,17 +459,19 @@ " Loaded kernel: %p->%p\n" " Init. ramdisk: %p->%p\n" " Phys-Mach map: %p->%p\n" + " Start info: %p->%p\n" " Store page: %p->%p\n" + " Console page: %p->%p\n" " Page tables: %p->%p\n" - " Start info: %p->%p\n" " Boot stack: %p->%p\n" " TOTAL: %p->%p\n", _p(dsi.v_kernstart), _p(dsi.v_kernend), _p(vinitrd_start), _p(vinitrd_end), _p(vphysmap_start), _p(vphysmap_end), + _p(vstartinfo_start), _p(vstartinfo_end), _p(vstoreinfo_start), _p(vstoreinfo_end), + _p(vconsole_start), _p(vconsole_end), _p(vpt_start), _p(vpt_end), - _p(vstartinfo_start), _p(vstartinfo_end), _p(vstack_start), _p(vstack_end), _p(dsi.v_start), _p(v_end)); printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry)); @@ -519,12 +544,14 @@ physmap = physmap_e = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[physmap_pfn++]); + for ( count = 0; count < nr_pages; count++ ) { if ( xc_add_mmu_update(xc_handle, mmu, - (page_array[count] << PAGE_SHIFT) | + ((unsigned long long)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, count) ) { + fprintf(stderr,"m2p update failure p=%lx m=%lx\n",count,page_array[count] ); munmap(physmap, PAGE_SIZE); goto error_out; } @@ -566,6 +593,8 @@ #endif *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT]; + *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT]; + start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, @@ -577,9 +606,10 @@ start_info->pt_base = vpt_start; start_info->nr_pt_frames = nr_pt_pages; start_info->mfn_list = vphysmap_start; - start_info->domain_controller_evtchn = control_evtchn; start_info->store_mfn = *store_mfn; start_info->store_evtchn = store_evtchn; + start_info->console_mfn = *console_mfn; + start_info->console_evtchn = console_evtchn; if ( initrd_len != 0 ) { start_info->mod_start = vinitrd_start; @@ -627,11 +657,12 @@ const char *image_name, const char *ramdisk_name, const char *cmdline, - unsigned int control_evtchn, unsigned long flags, unsigned int vcpus, unsigned int store_evtchn, - unsigned long *store_mfn) + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) { dom0_op_t launch_op, op; int initrd_fd = -1; @@ -706,8 +737,9 @@ &vstartinfo_start, &vkern_entry, &vstack_start, ctxt, cmdline, op.u.getdomaininfo.shared_info_frame, - control_evtchn, flags, vcpus, - store_evtchn, store_mfn) < 0 ) + flags, vcpus, + store_evtchn, store_mfn, + console_evtchn, console_mfn) < 0 ) { ERROR("Error constructing guest OS"); goto error_out; @@ -727,7 +759,6 @@ ctxt->regs.ar_fpsr = FPSR_DEFAULT; /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */ ctxt->vcpu.privregs = 0; - ctxt->shared.domain_controller_evtchn = control_evtchn; ctxt->shared.flags = flags; i = 0; /* silence unused variable warning */ #else /* x86 */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_linux_restore.c Fri Sep 9 16:30:54 2005 @@ -8,24 +8,22 @@ #include <stdlib.h> #include <unistd.h> - #include "xg_private.h" #include <xenctrl.h> - -#include <xen/linux/suspend.h> +#include <xen/memory.h> #define MAX_BATCH_SIZE 1024 #define DEBUG 0 #if 1 -#define ERR(_f, _a...) fprintf ( stderr, _f , ## _a ); fflush(stderr) +#define ERR(_f, _a...) do { fprintf ( stderr, _f , ## _a ); fflush(stderr); } while(0) #else #define ERR(_f, _a...) ((void)0) #endif #if DEBUG -#define DPRINTF(_f, _a...) fprintf ( stdout, _f , ## _a ); fflush(stdout) +#define DPRINTF(_f, _a...) do { fprintf ( stdout, _f , ## _a ); fflush(stdout); } while (0) #else #define DPRINTF(_f, _a...) ((void)0) #endif @@ -54,7 +52,8 @@ } int xc_linux_restore(int xc_handle, int io_fd, u32 dom, unsigned long nr_pfns, - unsigned int store_evtchn, unsigned long *store_mfn) + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn) { dom0_op_t op; int rc = 1, i, n, k; @@ -89,8 +88,8 @@ /* used by mapper for updating the domain's copy of the table */ unsigned long *live_pfn_to_mfn_table = NULL; - /* A temporary mapping of the guest's suspend record. */ - suspend_record_t *p_srec; + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; char *region_base; @@ -103,7 +102,7 @@ struct mmuext_op pin[MAX_PIN_BATCH]; unsigned int nr_pins = 0; - DPRINTF("xc_linux_restore start\n"); + DPRINTF("xc_linux_restore start: nr_pfns = %lx\n", nr_pfns); if (mlock(&ctxt, sizeof(ctxt))) { /* needed for when we do the build dom0 op, @@ -150,8 +149,10 @@ } err = xc_domain_memory_increase_reservation(xc_handle, dom, - nr_pfns * PAGE_SIZE / 1024); + nr_pfns, 0, 0, NULL); if (err != 0) { + ERR("Failed to increase reservation by %lx\n", + nr_pfns * PAGE_SIZE / 1024); errno = ENOMEM; goto out; } @@ -409,7 +410,8 @@ /* Get the list of PFNs that are not in the psuedo-phys map */ { - unsigned int count, *pfntab; + unsigned int count; + unsigned long *pfntab; int rc; if ( read_exact(io_fd, &count, sizeof(count)) != sizeof(count) ) @@ -441,9 +443,15 @@ if ( count > 0 ) { - if ( (rc = xc_dom_mem_op( xc_handle, - MEMOP_decrease_reservation, - pfntab, count, 0, dom )) <0 ) + struct xen_memory_reservation reservation = { + .extent_start = pfntab, + .nr_extents = count, + .extent_order = 0, + .domid = dom + }; + if ( (rc = xc_memory_op(xc_handle, + XENMEM_decrease_reservation, + &reservation)) != count ) { ERR("Could not decrease reservation : %d",rc); goto out; @@ -470,15 +478,18 @@ goto out; } ctxt.user_regs.esi = mfn = pfn_to_mfn_table[pfn]; - p_srec = xc_map_foreign_range( + start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); - p_srec->resume_info.nr_pages = nr_pfns; - p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT; - p_srec->resume_info.flags = 0; - *store_mfn = p_srec->resume_info.store_mfn = - pfn_to_mfn_table[p_srec->resume_info.store_mfn]; - p_srec->resume_info.store_evtchn = store_evtchn; - munmap(p_srec, PAGE_SIZE); + start_info->nr_pages = nr_pfns; + start_info->shared_info = shared_info_frame << PAGE_SHIFT; + start_info->flags = 0; + *store_mfn = start_info->store_mfn = + pfn_to_mfn_table[start_info->store_mfn]; + start_info->store_evtchn = store_evtchn; + *console_mfn = start_info->console_mfn = + pfn_to_mfn_table[start_info->console_mfn]; + start_info->console_evtchn = console_evtchn; + munmap(start_info, PAGE_SIZE); /* Uncanonicalise each GDT frame number. */ if ( ctxt.gdt_ents > 8192 ) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_linux_save.c Fri Sep 9 16:30:54 2005 @@ -14,12 +14,29 @@ #include "xg_private.h" -#include <xen/linux/suspend.h> #include <xen/io/domain_controller.h> #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */ #define MAX_MBIT_RATE 500 + + +/* +** Default values for important tuning parameters. Can override by passing +** non-zero replacement values to xc_linux_save(). +** +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. +** +*/ +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ + + + +/* Flags to control behaviour of xc_linux_save */ +#define XCFLAGS_LIVE 1 +#define XCFLAGS_DEBUG 2 + #define DEBUG 0 @@ -320,18 +337,18 @@ xc_dominfo_t *info, vcpu_guest_context_t *ctxt) { - int i=0; + int i = 0; char ans[30]; printf("suspend\n"); fflush(stdout); if (fgets(ans, sizeof(ans), stdin) == NULL) { - ERR("failed reading suspend reply"); - return -1; + ERR("failed reading suspend reply"); + return -1; } if (strncmp(ans, "done\n", 5)) { - ERR("suspend reply incorrect: %s", ans); - return -1; + ERR("suspend reply incorrect: %s", ans); + return -1; } retry: @@ -377,19 +394,16 @@ return -1; } -int xc_linux_save(int xc_handle, int io_fd, u32 dom) +int xc_linux_save(int xc_handle, int io_fd, u32 dom, u32 max_iters, + u32 max_factor, u32 flags) { xc_dominfo_t info; int rc = 1, i, j, k, last_iter, iter = 0; unsigned long mfn; - int live = 0; // (ioctxt->flags & XCFLAGS_LIVE); - int debug = 0; // (ioctxt->flags & XCFLAGS_DEBUG); + int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); int sent_last_iter, skip_this_iter; - - /* Important tuning parameters */ - int max_iters = 29; /* limit us to 30 times round loop */ - int max_factor = 3; /* never send more than 3x nr_pfns */ /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; @@ -405,6 +419,7 @@ unsigned long page[1024]; /* A copy of the pfn-to-mfn table frame list. */ + unsigned long *live_pfn_to_mfn_frame_list_list = NULL; unsigned long *live_pfn_to_mfn_frame_list = NULL; unsigned long pfn_to_mfn_frame_list[1024]; @@ -420,9 +435,6 @@ /* base of the region in which domain memory is mapped */ unsigned char *region_base = NULL; - /* A temporary mapping, and a copy, of the guest's suspend record. */ - suspend_record_t *p_srec = NULL; - /* number of pages we're dealing with */ unsigned long nr_pfns; @@ -442,8 +454,16 @@ MBIT_RATE = START_MBIT_RATE; - DPRINTF("xc_linux_save start %d\n", dom); - + + /* If no explicit control parameters given, use defaults */ + if(!max_iters) + max_iters = DEF_MAX_ITERS; + if(!max_factor) + max_factor = DEF_MAX_FACTOR; + + + DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live?"true":"false"); + if (mlock(&ctxt, sizeof(ctxt))) { ERR("Unable to mlock ctxt"); return 1; @@ -487,11 +507,20 @@ goto out; } - /* the pfn_to_mfn_frame_list fits in a single page */ + live_pfn_to_mfn_frame_list_list = xc_map_foreign_range(xc_handle, dom, + PAGE_SIZE, PROT_READ, + live_shinfo->arch.pfn_to_mfn_frame_list_list); + + if (!live_pfn_to_mfn_frame_list_list){ + ERR("Couldn't map pfn_to_mfn_frame_list_list"); + goto out; + } + live_pfn_to_mfn_frame_list = - xc_map_foreign_range(xc_handle, dom, - PAGE_SIZE, PROT_READ, - live_shinfo->arch.pfn_to_mfn_frame_list ); + xc_map_foreign_batch(xc_handle, dom, + PROT_READ, + live_pfn_to_mfn_frame_list_list, + (nr_pfns+(1024*1024)-1)/(1024*1024) ); if (!live_pfn_to_mfn_frame_list){ ERR("Couldn't map pfn_to_mfn_frame_list"); @@ -647,22 +676,6 @@ goto out; } - /* Map the suspend-record MFN to pin it. The page must be owned by - dom for this to succeed. */ - p_srec = xc_map_foreign_range(xc_handle, dom, - sizeof(*p_srec), PROT_READ | PROT_WRITE, - ctxt.user_regs.esi); - if (!p_srec){ - ERR("Couldn't map suspend record"); - goto out; - } - - /* Canonicalize store mfn. */ - if ( !translate_mfn_to_pfn(&p_srec->resume_info.store_mfn) ) { - ERR("Store frame is not in range of pseudophys map"); - goto out; - } - print_stats( xc_handle, dom, 0, &stats, 0 ); /* Now write out each data page, canonicalising page tables as we go... */ @@ -763,8 +776,6 @@ batch++; } -// DPRINTF("batch %d:%d (n=%d)\n", iter, batch, n); - if ( batch == 0 ) goto skip; /* vanishingly unlikely... */ @@ -915,7 +926,7 @@ continue; } - if ( last_iter ) break; + if ( last_iter ) break; if ( live ) { @@ -1003,13 +1014,6 @@ } } - if (nr_pfns != p_srec->nr_pfns ) - { - ERR("Suspend record nr_pfns unexpected (%ld != %ld)", - p_srec->nr_pfns, nr_pfns); - goto out; - } - /* Canonicalise the suspend-record frame number. */ if ( !translate_mfn_to_pfn(&ctxt.user_regs.esi) ){ ERR("Suspend record is not in range of pseudophys map"); @@ -1043,9 +1047,6 @@ if(live_shinfo) munmap(live_shinfo, PAGE_SIZE); - if(p_srec) - munmap(p_srec, sizeof(*p_srec)); - if(live_pfn_to_mfn_frame_list) munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_load_aout9.c --- a/tools/libxc/xc_load_aout9.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_load_aout9.c Fri Sep 9 16:30:54 2005 @@ -14,7 +14,8 @@ #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) -#define round_pgdown(_p) ((_p)&PAGE_MASK) +#define KZERO 0x80000000 +#define KOFFSET(_p) ((_p)&~KZERO) static int parseaout9image(char *, unsigned long, struct domain_setup_info *); static int loadaout9image(char *, unsigned long, int, u32, unsigned long *, struct domain_setup_info *); @@ -47,7 +48,7 @@ struct domain_setup_info *dsi) { struct Exec ehdr; - unsigned long start, txtsz, end; + unsigned long start, dstart, end; if (!get_header(image, image_size, &ehdr)) { ERROR("Kernel image does not have a a.out9 header."); @@ -59,11 +60,11 @@ return -EINVAL; } - start = round_pgdown(ehdr.entry); - txtsz = round_pgup(ehdr.text); - end = start + txtsz + ehdr.data + ehdr.bss; + start = ehdr.entry; + dstart = round_pgup(start + ehdr.text); + end = dstart + ehdr.data + ehdr.bss; - dsi->v_start = start; + dsi->v_start = KZERO; dsi->v_kernstart = start; dsi->v_kernend = end; dsi->v_kernentry = ehdr.entry; @@ -83,19 +84,18 @@ struct domain_setup_info *dsi) { struct Exec ehdr; - unsigned long txtsz; + unsigned long start, dstart; if (!get_header(image, image_size, &ehdr)) { ERROR("Kernel image does not have a a.out9 header."); return -EINVAL; } - txtsz = round_pgup(ehdr.text); - copyout(xch, dom, parray, - 0, image, sizeof ehdr + ehdr.text); - copyout(xch, dom, parray, - txtsz, image + sizeof ehdr + ehdr.text, ehdr.data); - /* XXX zeroing of BSS needed? */ + start = ehdr.entry; + dstart = round_pgup(start + ehdr.text); + copyout(xch, dom, parray, start, image + sizeof ehdr, ehdr.text); + copyout(xch, dom, parray, dstart, + image + sizeof ehdr + ehdr.text, ehdr.data); /* XXX load symbols */ @@ -110,13 +110,14 @@ copyout( int xch, u32 dom, unsigned long *parray, - unsigned long off, + unsigned long addr, void *buf, int sz) { - unsigned long pgoff, chunksz; + unsigned long pgoff, chunksz, off; void *pg; + off = KOFFSET(addr); while (sz > 0) { pgoff = off & (PAGE_SIZE-1); chunksz = sz; diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_private.c Fri Sep 9 16:30:54 2005 @@ -6,6 +6,7 @@ #include <zlib.h> #include "xc_private.h" +#include <xen/memory.h> void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot, unsigned long *arr, int num ) @@ -115,7 +116,7 @@ if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 ) { - fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to" + fprintf(stderr, "Dom_mmuext operation failed (rc=%ld errno=%d)-- need to" " rebuild the user-space tool set?\n",ret,errno); } @@ -171,7 +172,7 @@ } int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, - unsigned long ptr, unsigned long val) + unsigned long long ptr, unsigned long long val) { mmu->updates[mmu->idx].ptr = ptr; mmu->updates[mmu->idx].val = val; @@ -187,38 +188,64 @@ return flush_mmu_updates(xc_handle, mmu); } -int xc_dom_mem_op(int xc_handle, - unsigned int memop, - unsigned int *extent_list, - unsigned int nr_extents, - unsigned int extent_order, - domid_t domid) +int xc_memory_op(int xc_handle, + int cmd, + void *arg) { privcmd_hypercall_t hypercall; + struct xen_memory_reservation *reservation = arg; long ret = -EINVAL; - hypercall.op = __HYPERVISOR_dom_mem_op; - hypercall.arg[0] = (unsigned long)memop; - hypercall.arg[1] = (unsigned long)extent_list; - hypercall.arg[2] = (unsigned long)nr_extents; - hypercall.arg[3] = (unsigned long)extent_order; - hypercall.arg[4] = (unsigned long)domid; - - if ( (extent_list != NULL) && - (mlock(extent_list, nr_extents*sizeof(unsigned long)) != 0) ) - { - PERROR("Could not lock memory for Xen hypercall"); - goto out1; + hypercall.op = __HYPERVISOR_memory_op; + hypercall.arg[0] = (unsigned long)cmd; + hypercall.arg[1] = (unsigned long)arg; + + switch ( cmd ) + { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + if ( mlock(reservation, sizeof(*reservation)) != 0 ) + { + PERROR("Could not mlock"); + goto out1; + } + if ( (reservation->extent_start != NULL) && + (mlock(reservation->extent_start, + reservation->nr_extents * sizeof(unsigned long)) != 0) ) + { + PERROR("Could not mlock"); + safe_munlock(reservation, sizeof(*reservation)); + goto out1; + } + break; + case XENMEM_maximum_ram_page: + if ( mlock(arg, sizeof(unsigned long)) != 0 ) + { + PERROR("Could not mlock"); + goto out1; + } + break; } if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 ) { - fprintf(stderr, "Dom_mem operation failed (rc=%ld errno=%d)-- need to" + fprintf(stderr, "hypercall failed (rc=%ld errno=%d)-- need to" " rebuild the user-space tool set?\n",ret,errno); } - if ( extent_list != NULL ) - safe_munlock(extent_list, nr_extents*sizeof(unsigned long)); + switch ( cmd ) + { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + safe_munlock(reservation, sizeof(*reservation)); + if ( reservation->extent_start != NULL ) + safe_munlock(reservation->extent_start, + reservation->nr_extents * sizeof(unsigned long)); + break; + case XENMEM_maximum_ram_page: + safe_munlock(arg, sizeof(unsigned long)); + break; + } out1: return ret; @@ -395,3 +422,26 @@ { return do_dom0_op(xc_handle, op); } + +int xc_version(int xc_handle, int cmd, void *arg) +{ + return do_xen_version(xc_handle, cmd, arg); +} + +unsigned long xc_make_page_below_4G(int xc_handle, u32 domid, + unsigned long mfn) +{ + unsigned long new_mfn; + if ( xc_domain_memory_decrease_reservation( + xc_handle, domid, 1, 0, &mfn ) != 1 ) + { + fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn); + return 0; + } + if ( xc_domain_memory_increase_reservation( xc_handle, domid, 1, 0, 32, &new_mfn ) != 1 ) + { + fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn); + return 0; + } + return new_mfn; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_private.h --- a/tools/libxc/xc_private.h Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_private.h Fri Sep 9 16:30:54 2005 @@ -59,6 +59,17 @@ (unsigned long)hypercall); } +static inline int do_xen_version(int xc_handle, int cmd, void *dest) +{ + privcmd_hypercall_t hypercall; + + hypercall.op = __HYPERVISOR_xen_version; + hypercall.arg[0] = (unsigned long) cmd; + hypercall.arg[1] = (unsigned long) dest; + + return do_xen_hypercall(xc_handle, &hypercall); +} + static inline int do_dom0_op(int xc_handle, dom0_op_t *op) { int ret = -1; diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xc_vmx_build.c --- a/tools/libxc/xc_vmx_build.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xc_vmx_build.c Fri Sep 9 16:30:54 2005 @@ -7,6 +7,7 @@ #define ELFSIZE 32 #include "xc_elf.h" #include <stdlib.h> +#include <unistd.h> #include <zlib.h> #include <xen/io/ioreq.h> #include "linux_boot_params.h" diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xenctrl.h Fri Sep 9 16:30:54 2005 @@ -23,6 +23,7 @@ #include <sys/ptrace.h> #include <xen/xen.h> #include <xen/dom0_ops.h> +#include <xen/version.h> #include <xen/event_channel.h> #include <xen/sched_ctl.h> #include <xen/acm.h> @@ -386,7 +387,19 @@ int xc_domain_memory_increase_reservation(int xc_handle, u32 domid, - unsigned int mem_kb); + unsigned long nr_extents, + unsigned int extent_order, + unsigned int address_bits, + unsigned long *extent_start); + +int xc_domain_memory_decrease_reservation(int xc_handle, + u32 domid, + unsigned long nr_extents, + unsigned int extent_order, + unsigned long *extent_start); + +unsigned long xc_make_page_below_4G(int xc_handle, u32 domid, + unsigned long mfn); typedef dom0_perfc_desc_t xc_perfc_desc_t; /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */ @@ -430,9 +443,7 @@ int xc_mmuext_op(int xc_handle, struct mmuext_op *op, unsigned int nr_ops, domid_t dom); -int xc_dom_mem_op(int xc_handle, unsigned int memop, unsigned int *extent_list, - unsigned int nr_extents, unsigned int extent_order, - domid_t domid); +int xc_memory_op(int xc_handle, int cmd, void *arg); int xc_get_pfn_type_batch(int xc_handle, u32 dom, int num, unsigned long *arr); @@ -498,6 +509,8 @@ /* Execute a privileged dom0 operation. */ int xc_dom0_op(int xc_handle, dom0_op_t *op); + +int xc_version(int xc_handle, int cmd, void *arg); /* Initializes the store (for dom0) remote_port should be the remote end of a bound interdomain channel between @@ -520,7 +533,7 @@ typedef struct xc_mmu xc_mmu_t; xc_mmu_t *xc_init_mmu_updates(int xc_handle, domid_t dom); int xc_add_mmu_update(int xc_handle, xc_mmu_t *mmu, - unsigned long ptr, unsigned long val); + unsigned long long ptr, unsigned long long val); int xc_finish_mmu_updates(int xc_handle, xc_mmu_t *mmu); #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xenguest.h Fri Sep 9 16:30:54 2005 @@ -6,13 +6,12 @@ * Copyright (c) 2003-2004, K A Fraser. */ -#ifndef XENBUILD_H -#define XENBUILD_H +#ifndef XENGUEST_H +#define XENGUEST_H -#define XCFLAGS_VERBOSE 1 -#define XCFLAGS_LIVE 2 -#define XCFLAGS_DEBUG 4 -#define XCFLAGS_CONFIGURE 8 +#define XCFLAGS_LIVE 1 +#define XCFLAGS_DEBUG 2 + /** * This function will save a domain running Linux. @@ -22,7 +21,8 @@ * @parm dom the id of the domain * @return 0 on success, -1 on failure */ -int xc_linux_save(int xc_handle, int fd, uint32_t dom); +int xc_linux_save(int xc_handle, int fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */); /** * This function will restore a saved domain running Linux. @@ -35,19 +35,22 @@ * @parm store_mfn returned with the mfn of the store page * @return 0 on success, -1 on failure */ -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long nr_pfns, - unsigned int store_evtchn, unsigned long *store_mfn); +int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned long nr_pfns, unsigned int store_evtchn, + unsigned long *store_mfn, unsigned int console_evtchn, + unsigned long *console_mfn); int xc_linux_build(int xc_handle, uint32_t domid, const char *image_name, const char *ramdisk_name, const char *cmdline, - unsigned int control_evtchn, unsigned long flags, unsigned int vcpus, unsigned int store_evtchn, - unsigned long *store_mfn); + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn); struct mem_map; int xc_vmx_build(int xc_handle, @@ -63,4 +66,4 @@ unsigned int store_evtchn, unsigned long *store_mfn); -#endif +#endif // XENGUEST_H diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Thu Sep 8 15:18:40 2005 +++ b/tools/libxc/xg_private.c Fri Sep 9 16:30:54 2005 @@ -5,6 +5,7 @@ */ #include <stdlib.h> +#include <unistd.h> #include <zlib.h> #include "xg_private.h" diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/cpuperf/cpuperf.c --- a/tools/misc/cpuperf/cpuperf.c Thu Sep 8 15:18:40 2005 +++ b/tools/misc/cpuperf/cpuperf.c Fri Sep 9 16:30:54 2005 @@ -243,16 +243,12 @@ } if (read) { - while((cpu_mask&1)) { - int i; - for (i=0x300;i<0x312;i++) { - printf("%010llu ",cpus_rdmsr( cpu_mask, i ) ); - } - printf("\n"); - cpu_mask>>=1; - } + int i; + for (i=0x300;i<0x312;i++) + printf("%010llu ",cpus_rdmsr( cpu_mask, i ) ); + printf("\n"); exit(1); - } + } if (!escr) { fprintf(stderr, "Need an ESCR.\n"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/Makefile --- a/tools/misc/mbootpack/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/misc/mbootpack/Makefile Fri Sep 9 16:30:54 2005 @@ -20,8 +20,7 @@ INCS := -I. -I- DEFS := LDFLAGS := -CC := gcc -CFLAGS := -Wall -Wpointer-arith -Wcast-qual -Wno-unused -Wno-format +CFLAGS := -Wall -Wpointer-arith -Wcast-qual -Wno-unused -Wno-format CFLAGS += -Wmissing-prototypes #CFLAGS += -pipe -g -O0 -Wcast-align CFLAGS += -pipe -O3 @@ -34,7 +33,7 @@ DEPS = .*.d mbootpack: $(OBJS) - $(CC) -o $@ $(filter-out %.a, $^) $(LDFLAGS) + $(HOSTCC) -o $@ $(filter-out %.a, $^) $(LDFLAGS) clean: $(RM) mbootpack *.o $(DEPS) bootsect setup bzimage_header.c bin2c @@ -48,7 +47,7 @@ $(LD) -m elf_i386 -Ttext 0x0 -s --oformat binary setup.o -o $@ bin2c: bin2c.o - $(CC) -o $@ $^ + $(HOSTCC) -o $@ $^ bzimage_header.c: bootsect setup bin2c ./bin2c -n 8 -b1 -a bzimage_bootsect bootsect > bzimage_header.c @@ -58,10 +57,10 @@ @ %.o: %.S - $(CC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@ + $(HOSTCC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@ %.o: %.c - $(CC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@ + $(HOSTCC) $(DEPFLAGS) $(CFLAGS) $(INCS) $(DEFS) -c $< -o $@ .PHONY: all clean gdb .PRECIOUS: $(OBJS) $(OBJS:.o=.c) $(DEPS) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/buildimage.c --- a/tools/misc/mbootpack/buildimage.c Thu Sep 8 15:18:40 2005 +++ b/tools/misc/mbootpack/buildimage.c Fri Sep 9 16:30:54 2005 @@ -42,6 +42,7 @@ #include "mbootpack.h" #include "mb_header.h" + /* We will build an image that a bzImage-capable bootloader will load like * this: @@ -105,8 +106,8 @@ section_t *s; /* Patch the kernel and mbi addresses into the setup code */ - *(address_t *)(bzimage_setup + BZ_ENTRY_OFFSET) = entry; - *(address_t *)(bzimage_setup + BZ_MBI_OFFSET) = mbi; + *(address_t *)(bzimage_setup + BZ_ENTRY_OFFSET) = eswap(entry); + *(address_t *)(bzimage_setup + BZ_MBI_OFFSET) = eswap(mbi); if (!quiet) printf("Kernel entry is %p, MBI is %p.\n", entry, mbi); /* Write out header and trampoline */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/mbootpack.c --- a/tools/misc/mbootpack/mbootpack.c Thu Sep 8 15:18:40 2005 +++ b/tools/misc/mbootpack/mbootpack.c Fri Sep 9 16:30:54 2005 @@ -252,20 +252,21 @@ for (i = 0; i <= MIN(len - 12, MULTIBOOT_SEARCH - 12); i += 4) { mbh = (struct multiboot_header *)(headerbuf + i); - if (mbh->magic != MULTIBOOT_MAGIC - || ((mbh->magic+mbh->flags+mbh->checksum) & 0xffffffff)) + if (eswap(mbh->magic) != MULTIBOOT_MAGIC + || ((eswap(mbh->magic)+eswap(mbh->flags)+eswap(mbh->checksum)) + & 0xffffffff)) { /* Not a multiboot header */ continue; } - if (mbh->flags & MULTIBOOT_UNSUPPORTED) { + if (eswap(mbh->flags) & MULTIBOOT_UNSUPPORTED) { /* Requires options we don't support */ printf("Fatal: found a multiboot header, but it " "requires multiboot options that I\n" "don't understand. Sorry.\n"); exit(1); } - if (mbh->flags & MULTIBOOT_VIDEO_MODE) { + if (eswap(mbh->flags) & MULTIBOOT_VIDEO_MODE) { /* Asked for screen mode information */ /* XXX carry on regardless */ printf("Warning: found a multiboot header which asks " @@ -275,22 +276,22 @@ } /* This kernel will do: place and load it */ - if (mbh->flags & MULTIBOOT_AOUT_KLUDGE) { + if (eswap(mbh->flags) & MULTIBOOT_AOUT_KLUDGE) { /* Load using the offsets in the multiboot header */ if(!quiet) printf("Loading %s using multiboot header.\n", filename); /* How much is there? */ - start = mbh->load_addr; - if (mbh->load_end_addr != 0) - loadsize = mbh->load_end_addr - mbh->load_addr; + start = eswap(mbh->load_addr); + if (eswap(mbh->load_end_addr) != 0) + loadsize = eswap(mbh->load_end_addr) - eswap(mbh->load_addr); else loadsize = sb.st_size; /* How much memory will it take up? */ - if (mbh->bss_end_addr != 0) - size = mbh->bss_end_addr - mbh->load_addr; + if (eswap(mbh->bss_end_addr) != 0) + size = eswap(mbh->bss_end_addr) - eswap(mbh->load_addr); else size = loadsize; @@ -335,32 +336,34 @@ /* Done. */ if (!quiet) printf("Loaded kernel from %s\n", filename); - return mbh->entry_addr; + return eswap(mbh->entry_addr); } else { /* Now look for an ELF32 header */ ehdr = (Elf32_Ehdr *)headerbuf; - if (*(unsigned long *)ehdr != 0x464c457f + if (*(unsigned long *)ehdr != eswap(0x464c457f) || ehdr->e_ident[EI_DATA] != ELFDATA2LSB || ehdr->e_ident[EI_CLASS] != ELFCLASS32 - || ehdr->e_machine != EM_386) + || eswap(ehdr->e_machine) != EM_386) { printf("Fatal: kernel has neither ELF32/x86 nor multiboot load" " headers.\n"); exit(1); } - if (ehdr->e_phoff + ehdr->e_phnum*sizeof(*phdr) > HEADERBUF_SIZE) { + if (eswap(ehdr->e_phoff) + eswap(ehdr->e_phnum)*sizeof(*phdr) + > HEADERBUF_SIZE) { /* Don't expect this will happen with sane kernels */ printf("Fatal: too much ELF for me. Try increasing " "HEADERBUF_SIZE in mbootpack.\n"); exit(1); } - if (ehdr->e_phoff + ehdr->e_phnum*sizeof (*phdr) > len) { + if (eswap(ehdr->e_phoff) + eswap(ehdr->e_phnum)*sizeof (*phdr) + > len) { printf("Fatal: malformed ELF header overruns EOF.\n"); exit(1); } - if (ehdr->e_phnum <= 0) { + if (eswap(ehdr->e_phnum) <= 0) { printf("Fatal: ELF kernel has no program headers.\n"); exit(1); } @@ -368,22 +371,22 @@ if(!quiet) printf("Loading %s using ELF header.\n", filename); - if (ehdr->e_type != ET_EXEC - || ehdr->e_version != EV_CURRENT - || ehdr->e_phentsize != sizeof (Elf32_Phdr)) { + if (eswap(ehdr->e_type) != ET_EXEC + || eswap(ehdr->e_version) != EV_CURRENT + || eswap(ehdr->e_phentsize) != sizeof (Elf32_Phdr)) { printf("Warning: funny-looking ELF header.\n"); } - phdr = (Elf32_Phdr *)(headerbuf + ehdr->e_phoff); + phdr = (Elf32_Phdr *)(headerbuf + eswap(ehdr->e_phoff)); /* Obey the program headers to load the kernel */ - for(i = 0; i < ehdr->e_phnum; i++) { - - start = phdr[i].p_paddr; - size = phdr[i].p_memsz; - if (phdr[i].p_type != PT_LOAD) + for(i = 0; i < eswap(ehdr->e_phnum); i++) { + + start = eswap(phdr[i].p_paddr); + size = eswap(phdr[i].p_memsz); + if (eswap(phdr[i].p_type) != PT_LOAD) loadsize = 0; else - loadsize = MIN((long int)phdr[i].p_filesz, size); + loadsize = MIN((long int)eswap(phdr[i].p_filesz), size); if ((buffer = malloc(size)) == NULL) { printf("Fatal: malloc() for kernel load failed: %s\n", @@ -396,7 +399,7 @@ /* Load section from file */ if (loadsize > 0) { - if (fseek(fp, phdr[i].p_offset, SEEK_SET) != 0) { + if (fseek(fp, eswap(phdr[i].p_offset), SEEK_SET) != 0) { printf("Fatal: seek failed in %s\n", strerror(errno)); exit(1); @@ -452,7 +455,7 @@ /* Done! */ if (!quiet) printf("Loaded kernel from %s\n", filename); - return ehdr->e_entry; + return eswap(ehdr->e_entry); } } @@ -568,12 +571,12 @@ /* Command line */ p = (char *)(mbi + 1); sprintf(p, "%s %s", imagename, command_line); - mbi->cmdline = ((address_t)p) + mbi_reloc_offset; + mbi->cmdline = eswap(((address_t)p) + mbi_reloc_offset); p += command_line_len; /* Bootloader ID */ sprintf(p, version_string); - mbi->boot_loader_name = ((address_t)p) + mbi_reloc_offset; + mbi->boot_loader_name = eswap(((address_t)p) + mbi_reloc_offset); p += strlen(version_string) + 1; /* Next is space for the module command lines */ @@ -582,17 +585,17 @@ /* Last come the module info structs */ modp = (struct mod_list *) ((((address_t)p + mod_command_line_space) + 3) & ~3); - mbi->mods_count = modules; - mbi->mods_addr = ((address_t)modp) + mbi_reloc_offset; + mbi->mods_count = eswap(modules); + mbi->mods_addr = eswap(((address_t)modp) + mbi_reloc_offset); /* Memory information will be added at boot time, by setup.S * or trampoline.S. */ - mbi->flags = MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME; + mbi->flags = eswap(MB_INFO_CMDLINE | MB_INFO_BOOT_LOADER_NAME); /* Load the modules */ if (modules) { - mbi->flags |= MB_INFO_MODS; + mbi->flags = eswap(eswap(mbi->flags) | MB_INFO_MODS); /* Go back and parse the module command lines */ optind = opterr = 1; @@ -652,10 +655,10 @@ if (p != NULL) *p = ' '; /* Fill in the module info struct */ - modp->mod_start = start; - modp->mod_end = start + size; - modp->cmdline = (address_t)mod_clp + mbi_reloc_offset; - modp->pad = 0; + modp->mod_start = eswap(start); + modp->mod_end = eswap(start + size); + modp->cmdline = eswap((address_t)mod_clp + mbi_reloc_offset); + modp->pad = eswap(0); modp++; /* Store the module command line */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/mbootpack/mbootpack.h --- a/tools/misc/mbootpack/mbootpack.h Thu Sep 8 15:18:40 2005 +++ b/tools/misc/mbootpack/mbootpack.h Fri Sep 9 16:30:54 2005 @@ -31,6 +31,24 @@ #undef NDEBUG #include <stdio.h> + +#include <endian.h> +#include <byteswap.h> +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define eswap(x) (x) +#else +#define eswap(x) \ + ({ \ + typeof(x) y = (x); \ + switch(sizeof(y)) \ + { \ + case 2: y = __bswap_16(y); break; \ + case 4: y = __bswap_32(y); break; \ + case 8: y = __bswap_64(y); break; \ + } \ + y; \ + }) +#endif /* Flags */ extern int quiet; diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/misc/xend --- a/tools/misc/xend Thu Sep 8 15:18:40 2005 +++ b/tools/misc/xend Fri Sep 9 16:30:54 2005 @@ -25,11 +25,6 @@ import signal import time import commands - -XCS_PATH = "/var/lib/xen/xcs_socket" -XCS_EXEC = "/usr/sbin/xcs" -XCS_PIDFILE = "/var/run/xcs.pid" -XCS_ARGS = (XCS_EXEC, "-p", XCS_PIDFILE) # add fallback path for non-native python path installs if needed sys.path.append('/usr/lib/python') @@ -70,52 +65,6 @@ hline() raise CheckError("invalid user") -def xcs_running(): - """ See if the control switch is running. - """ - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - try: - s.connect( (XCS_PATH) ) - s.close() - except: - try: - os.remove(XCS_PIDFILE) - except: - pass - return 0 - return 1 - -def start_xcs(): - if (not xcs_running()): - if os.fork() == 0 : - if not os.path.isdir(os.path.dirname(XCS_PATH)): - os.makedirs(os.path.dirname(XCS_PATH)) - try: - os.execvp(XCS_EXEC, XCS_ARGS) - except: - hline() - msg("Tried to start xcs, but failed. Is it installed?") - hline() - raise CheckError("couldn't start xcs") - for n in range(10) : - if (xcs_running()): - break - time.sleep(0.1) - else : - hline() - msg("Failed to start the control interface switch.") - hline() - raise CheckError("xcs not running") - -def stop_xcs(): - try: - xcs_pidfile = open(XCS_PIDFILE) - xcs_pid = int(xcs_pidfile.read().strip()) - os.kill(xcs_pid, signal.SIGTERM) - xcs_pidfile.close() - except: - return - def start_xenstored(): XENSTORED_TRACE = os.getenv("XENSTORED_TRACE") cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid" @@ -141,21 +90,16 @@ pid, status = os.wait() return status >> 8 elif sys.argv[1] == 'start': - start_xcs() start_xenstored() start_consoled() return daemon.start() elif sys.argv[1] == 'trace_start': - start_xcs() start_xenstored() start_consoled() return daemon.start(trace=1) elif sys.argv[1] == 'stop': - stop_xcs() return daemon.stop() elif sys.argv[1] == 'restart': - stop_xcs() - start_xcs() start_xenstored() start_consoled() return daemon.stop() or daemon.start() diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/setup.py --- a/tools/python/setup.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/setup.py Fri Sep 9 16:30:54 2005 @@ -7,10 +7,8 @@ extra_compile_args = [ "-fno-strict-aliasing", "-Wall", "-Werror" ] -include_dirs = [ XEN_ROOT + "/tools/python/xen/lowlevel/xu", - XEN_ROOT + "/tools/libxc", +include_dirs = [ XEN_ROOT + "/tools/libxc", XEN_ROOT + "/tools/xenstore", - XEN_ROOT + "/tools/xcs", ] library_dirs = [ XEN_ROOT + "/tools/libxc", @@ -25,13 +23,6 @@ library_dirs = library_dirs, libraries = libraries, sources = [ "xen/lowlevel/xc/xc.c" ]) - -xu = Extension("xu", - extra_compile_args = extra_compile_args, - include_dirs = include_dirs + [ "xen/lowlevel/xu" ], - library_dirs = library_dirs, - libraries = libraries, - sources = [ "xen/lowlevel/xu/xu.c" ]) xs = Extension("xs", extra_compile_args = extra_compile_args, @@ -51,10 +42,10 @@ 'xen.xend.xenstore', 'xen.xm', 'xen.web', - 'xen.sv' + 'xen.sv' ], ext_package = "xen.lowlevel", - ext_modules = [ xc, xu, xs ] + ext_modules = [ xc, xs ] ) os.chdir('logging') diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri Sep 9 16:30:54 2005 @@ -268,25 +268,33 @@ u32 dom; char *image, *ramdisk = NULL, *cmdline = ""; int flags = 0, vcpus = 1; - int control_evtchn, store_evtchn; + int store_evtchn, console_evtchn; unsigned long store_mfn = 0; - - static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn", - "image", "ramdisk", "cmdline", "flags", + unsigned long console_mfn = 0; + + static char *kwd_list[] = { "dom", "store_evtchn", + "console_evtchn", "image", + /* optional */ + "ramdisk", "cmdline", "flags", "vcpus", NULL }; if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssii", kwd_list, - &dom, &control_evtchn, &store_evtchn, - &image, &ramdisk, &cmdline, &flags, + &dom, &store_evtchn, + &console_evtchn, &image, + /* optional */ + &ramdisk, &cmdline, &flags, &vcpus) ) return NULL; if ( xc_linux_build(xc->xc_handle, dom, image, - ramdisk, cmdline, control_evtchn, flags, vcpus, - store_evtchn, &store_mfn) != 0 ) - return PyErr_SetFromErrno(xc_error); - - return Py_BuildValue("{s:i}", "store_mfn", store_mfn); + ramdisk, cmdline, flags, vcpus, + store_evtchn, &store_mfn, + console_evtchn, &console_mfn) != 0 ) + return PyErr_SetFromErrno(xc_error); + + return Py_BuildValue("{s:i,s:i}", + "store_mfn", store_mfn, + "console_mfn", console_mfn); } static PyObject *pyxc_vmx_build(PyObject *self, @@ -682,6 +690,8 @@ { XcObject *xc = (XcObject *)self; xc_physinfo_t info; + char cpu_cap[128], *p=cpu_cap, *q=cpu_cap; + int i; if ( !PyArg_ParseTuple(args, "") ) return NULL; @@ -689,15 +699,72 @@ if ( xc_physinfo(xc->xc_handle, &info) != 0 ) return PyErr_SetFromErrno(xc_error); - return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i}", + *q=0; + for(i=0;i<sizeof(info.hw_cap)/4;i++) + { + p+=sprintf(p,"%08x:",info.hw_cap[i]); + if(info.hw_cap[i]) + q=p; + } + if(q>cpu_cap) + *(q-1)=0; + + return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:i,s:s}", "threads_per_core", info.threads_per_core, "cores_per_socket", info.cores_per_socket, "sockets_per_node", info.sockets_per_node, "nr_nodes", info.nr_nodes, "total_pages", info.total_pages, "free_pages", info.free_pages, - "cpu_khz", info.cpu_khz); -} + "cpu_khz", info.cpu_khz, + "hw_caps", cpu_cap); +} + +static PyObject *pyxc_xeninfo(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + xen_extraversion_t xen_extra; + xen_compile_info_t xen_cc; + xen_changeset_info_t xen_chgset; + xen_capabilities_info_t xen_caps; + xen_parameters_info_t xen_parms; + long xen_version; + char str[128]; + + xen_version = xc_version(xc->xc_handle, XENVER_version, NULL); + + if ( xc_version(xc->xc_handle, XENVER_extraversion, &xen_extra) != 0 ) + return PyErr_SetFromErrno(xc_error); + + if ( xc_version(xc->xc_handle, XENVER_compile_info, &xen_cc) != 0 ) + return PyErr_SetFromErrno(xc_error); + + if ( xc_version(xc->xc_handle, XENVER_changeset, &xen_chgset) != 0 ) + return PyErr_SetFromErrno(xc_error); + + if ( xc_version(xc->xc_handle, XENVER_capabilities, &xen_caps) != 0 ) + return PyErr_SetFromErrno(xc_error); + + if ( xc_version(xc->xc_handle, XENVER_parameters, &xen_parms) != 0 ) + return PyErr_SetFromErrno(xc_error); + + sprintf(str,"virt_start=0x%lx",xen_parms.virt_start); + + return Py_BuildValue("{s:i,s:i,s:s,s:s,s:s,s:s,s:s,s:s,s:s,s:s}", + "xen_major", xen_version >> 16, + "xen_minor", (xen_version & 0xffff), + "xen_extra", xen_extra, + "xen_caps", xen_caps.caps, + "xen_params", str, + "xen_changeset", xen_chgset, + "cc_compiler", xen_cc.compiler, + "cc_compile_by", xen_cc.compile_by, + "cc_compile_domain", xen_cc.compile_domain, + "cc_compile_date", xen_cc.compile_date); +} + static PyObject *pyxc_sedf_domain_set(PyObject *self, PyObject *args, @@ -800,14 +867,21 @@ u32 dom; unsigned long mem_kb; - - static char *kwd_list[] = { "dom", "mem_kb", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list, - &dom, &mem_kb) ) - return NULL; - - if ( xc_domain_memory_increase_reservation(xc->xc_handle, dom, mem_kb) ) + unsigned int extent_order = 0 , address_bits = 0; + unsigned long nr_extents; + + static char *kwd_list[] = { "dom", "mem_kb", "extent_order", "address_bits", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "il|ii", kwd_list, + &dom, &mem_kb, &extent_order, &address_bits) ) + return NULL; + + /* round down to nearest power of 2. Assume callers using extent_order>0 + know what they are doing */ + nr_extents = (mem_kb / (XC_PAGE_SIZE/1024)) >> extent_order; + if ( xc_domain_memory_increase_reservation(xc->xc_handle, dom, + nr_extents, extent_order, + address_bits, NULL) ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); @@ -1081,6 +1155,13 @@ "Returns [dict]: information about the hardware" " [None]: on failure.\n" }, + { "xeninfo", + (PyCFunction)pyxc_xeninfo, + METH_VARARGS, "\n" + "Get information about the Xen host\n" + "Returns [dict]: information about Xen" + " [None]: on failure.\n" }, + { "shadow_control", (PyCFunction)pyxc_shadow_control, METH_VARARGS | METH_KEYWORDS, "\n" diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/lowlevel/xs/xs.c --- a/tools/python/xen/lowlevel/xs/xs.c Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/lowlevel/xs/xs.c Fri Sep 9 16:30:54 2005 @@ -15,6 +15,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Copyright (C) 2005 Mike Wray Hewlett-Packard + * Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx> * */ @@ -45,6 +46,7 @@ typedef struct XsHandle { PyObject_HEAD; struct xs_handle *xh; + PyObject *watches; } XsHandle; static inline struct xs_handle *xshandle(PyObject *self) @@ -87,19 +89,21 @@ PyObject *val = NULL; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_read(xh, path, &xsval_n); - if (!xsval) { - val = pyvalue_int(0); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); goto exit; } val = PyString_FromStringAndSize(xsval, xsval_n); exit: if (xsval) - free(xsval); + free(xsval); return val; } @@ -110,7 +114,7 @@ " create [int] : create flag, default 0.\n" \ " excl [int] : exclusive flag, default 0.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" @@ -130,16 +134,23 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path, &data, &data_n, &create, &excl)) goto exit; if (create) - flags |= O_CREAT; + flags |= O_CREAT; if (excl) - flags |= O_EXCL; + flags |= O_EXCL; + Py_BEGIN_ALLOW_THREADS xsval = xs_write(xh, path, data, data_n, flags); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -165,12 +176,14 @@ int i; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_directory(xh, path, &xsval_n); - if (!xsval) { - val = pyvalue_int(0); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); goto exit; } val = PyList_New(xsval_n); @@ -184,7 +197,7 @@ "Make a directory.\n" \ " path [string]: path to directory to create.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" @@ -199,11 +212,18 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_mkdir(xh, path); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -212,7 +232,7 @@ "Remove a path.\n" \ " path [string] : path to remove\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" @@ -227,11 +247,18 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_rm(xh, path); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -245,7 +272,7 @@ "\n" static PyObject *xspy_get_permissions(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { "path", NULL }; static char *arg_spec = "s|"; @@ -258,10 +285,12 @@ int i; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path)) goto exit; + Py_BEGIN_ALLOW_THREADS perms = xs_get_permissions(xh, path, &perms_n); + Py_END_ALLOW_THREADS if (!perms) { PyErr_SetFromErrno(PyExc_RuntimeError); goto exit; @@ -283,12 +312,12 @@ " path [string] : xenstore path.\n" \ " perms : permissions.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" static PyObject *xspy_set_permissions(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { "path", "perms", NULL }; static char *arg_spec = "sO"; @@ -321,7 +350,7 @@ } tuple0 = PyTuple_New(0); if (!tuple0) - goto exit; + goto exit; for (i = 0; i < xsperms_n; i++) { /* Domain the permissions apply to. */ int dom = 0; @@ -329,20 +358,27 @@ int p_read = 0, p_write = 0; PyObject *p = PyList_GetItem(perms, i); if (!PyArg_ParseTupleAndKeywords(tuple0, p, perm_spec, perm_names, - &dom, &p_read, &p_write)) + &dom, &p_read, &p_write)) goto exit; xsperms[i].id = dom; if (p_read) - xsperms[i].perms |= XS_PERM_READ; + xsperms[i].perms |= XS_PERM_READ; if (p_write) - xsperms[i].perms |= XS_PERM_WRITE; - } + xsperms[i].perms |= XS_PERM_WRITE; + } + Py_BEGIN_ALLOW_THREADS xsval = xs_set_permissions(xh, path, xsperms, xsperms_n); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: Py_XDECREF(tuple0); if (xsperms) - free(xsperms); + free(xsperms); return val; } @@ -351,28 +387,53 @@ " path [string] : xenstore path.\n" \ " token [string] : returned in watch notification.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" +/* Each 10 bits takes ~ 3 digits, plus one, plus one for nul terminator. */ +#define MAX_STRLEN(x) ((sizeof(x) * CHAR_BIT + CHAR_BIT-1) / 10 * 3 + 2) + static PyObject *xspy_watch(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwd_spec[] = { "path", "token", NULL }; - static char *arg_spec = "s|is"; - char *path = NULL; - char *token = ""; - - struct xs_handle *xh = xshandle(self); - PyObject *val = NULL; - int xsval = 0; - - if (!xh) - goto exit; + static char *arg_spec = "sO"; + char *path = NULL; + PyObject *token; + char token_str[MAX_STRLEN(unsigned long) + 1]; + int i; + + XsHandle *xsh = (XsHandle *)self; + struct xs_handle *xh = xshandle(self); + PyObject *val = NULL; + int xsval = 0; + + if (!xh) + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path, &token)) goto exit; - xsval = xs_watch(xh, path, token); - val = pyvalue_int(xsval); + Py_INCREF(token); + sprintf(token_str, "%li", (unsigned long)token); + Py_BEGIN_ALLOW_THREADS + xsval = xs_watch(xh, path, token_str); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + Py_DECREF(token); + goto exit; + } + + for (i = 0; i < PyList_Size(xsh->watches); i++) { + if (PyList_GetItem(xsh->watches, i) == Py_None) { + PyList_SetItem(xsh->watches, i, token); + break; + } + } + if (i == PyList_Size(xsh->watches)) + PyList_Append(xsh->watches, token); + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -388,29 +449,46 @@ "\n" static PyObject *xspy_read_watch(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { NULL }; static char *arg_spec = ""; + XsHandle *xsh = (XsHandle *)self; struct xs_handle *xh = xshandle(self); PyObject *val = NULL; char **xsval = NULL; - - if (!xh) - goto exit; + PyObject *token; + int i; + + if (!xh) + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_read_watch(xh); - if (!xsval) { - val = PyErr_SetFromErrno(PyExc_RuntimeError); - goto exit; + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + if (sscanf(xsval[1], "%li", (unsigned long *)&token) != 1) { + PyErr_SetString(PyExc_RuntimeError, "invalid token"); + goto exit; + } + for (i = 0; i < PyList_Size(xsh->watches); i++) { + if (token == PyList_GetItem(xsh->watches, i)) + break; + } + if (i == PyList_Size(xsh->watches)) { + PyErr_SetString(PyExc_RuntimeError, "invalid token"); + goto exit; } /* Create tuple (path, token). */ - val = Py_BuildValue("(ss)", xsval[0], xsval[1]); + val = Py_BuildValue("(sO)", xsval[0], token); exit: if (xsval) - free(xsval); + free(xsval); return val; } @@ -418,27 +496,36 @@ "Acknowledge a watch notification that has been read.\n" \ " token [string] : from the watch notification\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" static PyObject *xspy_acknowledge_watch(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { "token", NULL }; - static char *arg_spec = "s"; - char *token; - - struct xs_handle *xh = xshandle(self); - PyObject *val = NULL; - int xsval = 0; - - if (!xh) - goto exit; + static char *arg_spec = "O"; + PyObject *token; + char token_str[MAX_STRLEN(unsigned long) + 1]; + + struct xs_handle *xh = xshandle(self); + PyObject *val = NULL; + int xsval = 0; + + if (!xh) + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &token)) goto exit; - xsval = xs_acknowledge_watch(xh, token); - val = pyvalue_int(xsval); + sprintf(token_str, "%li", (unsigned long)token); + Py_BEGIN_ALLOW_THREADS + xsval = xs_acknowledge_watch(xh, token_str); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -448,28 +535,46 @@ " path [string] : xenstore path.\n" \ " token [string] : token from the watch.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" static PyObject *xspy_unwatch(PyObject *self, PyObject *args, PyObject *kwds) { static char *kwd_spec[] = { "path", "token", NULL }; - static char *arg_spec = "s|s"; - char *path = NULL; - char *token = ""; - - struct xs_handle *xh = xshandle(self); - PyObject *val = NULL; - int xsval = 0; - - if (!xh) - goto exit; + static char *arg_spec = "sO"; + char *path = NULL; + PyObject *token; + char token_str[MAX_STRLEN(unsigned long) + 1]; + int i; + + XsHandle *xsh = (XsHandle *)self; + struct xs_handle *xh = xshandle(self); + PyObject *val = NULL; + int xsval = 0; + + if (!xh) + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path, - &token)) - goto exit; - xsval = xs_unwatch(xh, path, token); - val = pyvalue_int(xsval); + &token)) + goto exit; + sprintf(token_str, "%li", (unsigned long)token); + Py_BEGIN_ALLOW_THREADS + xsval = xs_unwatch(xh, path, token_str); + Py_END_ALLOW_THREADS + if (!xsval) + PyErr_SetFromErrno(PyExc_RuntimeError); + else { + Py_INCREF(Py_None); + val = Py_None; + } + for (i = 0; i < PyList_Size(xsh->watches); i++) { + if (token == PyList_GetItem(xsh->watches, i)) { + Py_INCREF(Py_None); + PyList_SetItem(xsh->watches, i, Py_None); + break; + } + } exit: return val; } @@ -479,12 +584,12 @@ "Only one transaction can be active at a time.\n" \ " path [string]: xenstore path.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" static PyObject *xspy_transaction_start(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { "path", NULL }; static char *arg_spec = "s|"; @@ -495,11 +600,18 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &path)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_transaction_start(xh, path); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -509,12 +621,12 @@ "Attempts to commit the transaction unless abort is true.\n" \ " abort [int]: abort flag (default 0).\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" static PyObject *xspy_transaction_end(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { "abort", NULL }; static char *arg_spec = "|i"; @@ -525,11 +637,18 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &abort)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_transaction_end(xh, abort); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -541,12 +660,12 @@ " port [int] : port the domain is using for xenstore\n" \ " path [string]: path to the domain's data in xenstore\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" static PyObject *xspy_introduce_domain(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { "dom", "page", "port", "path", NULL }; static char *arg_spec = "iiis|"; @@ -560,12 +679,19 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &dom, &page, &port, &path)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_introduce_domain(xh, dom, page, port, path); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -575,12 +701,12 @@ "Unless this is done the domain will not be released.\n" \ " dom [int]: domain id\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" static PyObject *xspy_release_domain(PyObject *self, PyObject *args, - PyObject *kwds) + PyObject *kwds) { static char *kwd_spec[] = { "dom", NULL }; static char *arg_spec = "i|"; @@ -591,12 +717,19 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &dom)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_release_domain(xh, dom); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -604,7 +737,7 @@ #define xspy_close_doc "\n" \ "Close the connection to xenstore.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" @@ -612,18 +745,25 @@ { static char *kwd_spec[] = { NULL }; static char *arg_spec = ""; - - struct xs_handle *xh = xshandle(self); - PyObject *val = NULL; - int xsval = 1; - - if (!xh) - goto exit; + int i; + + XsHandle *xsh = (XsHandle *)self; + struct xs_handle *xh = xshandle(self); + PyObject *val = NULL; + + if (!xh) + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec)) goto exit; + for (i = 0; i < PyList_Size(xsh->watches); i++) { + /* TODO: xs_unwatch watches */ + Py_INCREF(Py_None); + PyList_SetItem(xsh->watches, i, Py_None); + } xs_daemon_close(xh); - ((XsHandle*)self)->xh = NULL; - val = pyvalue_int(xsval); + xsh->xh = NULL; + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -631,7 +771,7 @@ #define xspy_shutdown_doc "\n" \ "Shutdown the xenstore daemon.\n" \ "\n" \ - "Returns: [int] 0 on success.\n" \ + "Returns None on success.\n" \ "Raises RuntimeError on error.\n" \ "\n" @@ -645,11 +785,18 @@ int xsval = 0; if (!xh) - goto exit; + goto exit; if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec)) goto exit; + Py_BEGIN_ALLOW_THREADS xsval = xs_shutdown(xh); - val = pyvalue_int(xsval); + Py_END_ALLOW_THREADS + if (!xsval) { + PyErr_SetFromErrno(PyExc_RuntimeError); + goto exit; + } + Py_INCREF(Py_None); + val = Py_None; exit: return val; } @@ -750,20 +897,25 @@ if (!PyArg_ParseTupleAndKeywords(args, kwds, arg_spec, kwd_spec, &readonly)) - goto exit; + return NULL; xsh = PyObject_New(XsHandle, &xshandle_type); if (!xsh) - goto exit; + return NULL; + xsh->watches = PyList_New(0); + if (!xsh->watches) + goto exit; xsh->xh = (readonly ? xs_daemon_open_readonly() : xs_daemon_open()); if (!xsh->xh) { - PyObject_Del(xsh); - val = pyvalue_int(0); + Py_DECREF(xsh->watches); + PyErr_SetFromErrno(PyExc_RuntimeError); goto exit; } val = (PyObject *)xsh; - exit: - return val; + return val; + exit: + PyObject_Del(xsh); + return NULL; } static PyMethodDef xs_methods[] = { diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/XendCheckpoint.py Fri Sep 9 16:30:54 2005 @@ -34,7 +34,7 @@ raise XendError(errmsg) return buf -def save(xd, fd, dominfo): +def save(xd, fd, dominfo, live): write_exact(fd, SIGNATURE, "could not write guest state file: signature") config = sxp.to_string(dominfo.sxpr()) @@ -42,8 +42,13 @@ "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") + # xc_save takes three customization parameters: maxit, max_f, and flags + # the last controls whether or not save is 'live', while the first two + # further customize behaviour when 'live' save is enabled. Passing "0" + # simply uses the defaults compiled into libxenguest; see the comments + # and/or code in xc_linux_save() for more information. cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd), - str(dominfo.id)] + str(dominfo.id), "0", "0", str(int(live)) ] log.info("[xc_save] " + join(cmd)) child = xPopen3(cmd, True, -1, [fd, xc.handle()]) @@ -51,7 +56,7 @@ p = select.poll() p.register(child.fromchild.fileno()) p.register(child.childerr.fileno()) - while True: + while True: r = p.poll() for (fd, event) in r: if not event & select.POLLIN: @@ -65,15 +70,16 @@ if l.rstrip() == "suspend": log.info("suspending %d" % dominfo.id) xd.domain_shutdown(dominfo.id, reason='suspend') + dominfo.state_wait("suspended") + log.info("suspend %d done" % dominfo.id) if dominfo.store_channel: try: dominfo.db.releaseDomain(dominfo.id) except Exception, ex: - log.warning("error in domain release on xenstore: %s", - ex) + log.warning( + "error in domain release on xenstore: %s", + ex) pass - dominfo.state_wait("suspended") - log.info("suspend %d done" % dominfo.id) child.tochild.write("done\n") child.tochild.flush() if filter(lambda (fd, event): event & select.POLLHUP, r): @@ -121,12 +127,18 @@ "not a valid guest state file: pfn count out of range") if dominfo.store_channel: - evtchn = dominfo.store_channel.port2 + store_evtchn = dominfo.store_channel.port2 else: - evtchn = 0 + store_evtchn = 0 + + if dominfo.console_channel: + console_evtchn = dominfo.console_channel.port2 + else: + console_evtchn = 0 cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd), - str(dominfo.id), str(nr_pfns), str(evtchn)] + str(dominfo.id), str(nr_pfns), + str(store_evtchn), str(console_evtchn)] log.info("[xc_restore] " + join(cmd)) child = xPopen3(cmd, True, -1, [fd, xc.handle()]) child.tochild.close() @@ -147,6 +159,7 @@ if fd == child.fromchild.fileno(): l = child.fromchild.readline() while l: + log.info(l.rstrip()) m = re.match(r"^(store-mfn) (\d+)\n$", l) if m: if dominfo.store_channel: @@ -156,7 +169,10 @@ dominfo.store_mfn, dominfo.store_channel) dominfo.exportToDB(save=True, sync=True) - log.info(l.rstrip()) + m = re.match(r"^(console-mfn) (\d+)\n$", l) + if m: + dominfo.console_mfn = int(m.group(2)) + dominfo.exportToDB(save=True, sync=True) try: l = child.fromchild.readline() except: diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/XendDomain.py Fri Sep 9 16:30:54 2005 @@ -36,7 +36,6 @@ from xen.xend.XendError import XendError from xen.xend.XendLogging import log from xen.xend import scheduler -from xen.xend.server import channel from xen.xend.server import relocate from xen.xend.uuid import getUuid from xen.xend.xenstore import XenNode, DBMap @@ -67,7 +66,7 @@ xroot.add_component("xen.xend.XendDomain", self) self.domains = XendDomainDict() self.dbmap = DBMap(db=XenNode("/domain")) - eserver.subscribe('xend.virq', self.onVirq) + self.watchReleaseDomain() self.initial_refresh() def list(self): @@ -75,12 +74,32 @@ @return: domain objects """ + self.refresh() return self.domains.values() - - def onVirq(self, event, val): - """Event handler for virq. - """ + + def list_sorted(self): + """Get list of domain objects, sorted by name. + + @return: domain objects + """ + doms = self.list() + doms.sort(lambda x, y: cmp(x.name, y.name)) + return doms + + def list_names(self): + """Get list of domain names. + + @return: domain names + """ + doms = self.list_sorted() + return map(lambda x: x.name, doms) + + def onReleaseDomain(self): self.refresh(cleanup=True) + + def watchReleaseDomain(self): + from xen.xend.xenstore.xswatch import xswatch + self.releaseDomain = xswatch("@releaseDomain", self.onReleaseDomain) def xen_domains(self): """Get table of domains indexed by id from xc. @@ -265,24 +284,6 @@ else: self._delete_domain(id) - def domain_ls(self): - """Get list of domain names. - - @return: domain names - """ - self.refresh() - doms = self.domains.values() - doms.sort(lambda x, y: cmp(x.name, y.name)) - return map(lambda x: x.name, doms) - - def domain_ls_ids(self): - """Get list of domain ids. - - @return: domain names - """ - self.refresh() - return self.domains.keys() - def domain_create(self, config): """Create a domain from a configuration. @@ -542,7 +543,7 @@ dominfo.name = "tmp-" + dominfo.name try: - XendCheckpoint.save(self, sock.fileno(), dominfo) + XendCheckpoint.save(self, sock.fileno(), dominfo, live) except: if dst == "localhost": dominfo.name = string.replace(dominfo.name, "tmp-", "", 1) @@ -563,7 +564,8 @@ fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) - return XendCheckpoint.save(self, fd, dominfo) + # For now we don't support 'live checkpoint' + return XendCheckpoint.save(self, fd, dominfo, False) except OSError, ex: raise XendError("can't write guest state file %s: %s" % diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Sep 9 16:30:54 2005 @@ -34,8 +34,7 @@ from xen.xend.server import controller from xen.xend.server import SrvDaemon; xend = SrvDaemon.instance() -from xen.xend.server import messages -from xen.xend.server.channel import EventChannel, channelFactory +from xen.xend.server.channel import EventChannel from xen.util.blkif import blkdev_name_to_number, expand_dev_name from xen.xend import sxp @@ -47,7 +46,7 @@ from xen.xend.XendRoot import get_component from xen.xend.uuid import getUuid -from xen.xend.xenstore import DBVar +from xen.xend.xenstore import DBVar, XenNode, DBMap """Shutdown code for poweroff.""" DOMAIN_POWEROFF = 0 @@ -231,6 +230,7 @@ DBVar('start_time', ty='float'), DBVar('state', ty='str'), DBVar('store_mfn', ty='long'), + DBVar('console_mfn', ty='long', path="console/ring-ref"), DBVar('restart_mode', ty='str'), DBVar('restart_state', ty='str'), DBVar('restart_time', ty='float'), @@ -257,15 +257,17 @@ self.target = None - self.channel = None self.store_channel = None self.store_mfn = None + self.console_channel = None + self.console_mfn = None self.controllers = {} self.info = None self.blkif_backend = False self.netif_backend = False self.netif_idx = 0 + self.tpmif_backend = False #todo: state: running, suspended self.state = STATE_VM_OK @@ -292,18 +294,18 @@ self.db.saveDB(save=save, sync=sync) def exportToDB(self, save=False, sync=False): - if self.channel: - self.channel.saveToDB(self.db.addChild("channel"), save=save) if self.store_channel: self.store_channel.saveToDB(self.db.addChild("store_channel"), save=save) + if self.console_channel: + self.db['console/port'] = "%i" % self.console_channel.port1 if self.image: self.image.exportToDB(save=save, sync=sync) self.db.exportToDB(self, fields=self.__exports__, save=save, sync=sync) def importFromDB(self): self.db.importFromDB(self, fields=self.__exports__) - self.store_channel = self.eventChannel("store_channel") + self.store_channel = self.eventChannelOld("store_channel") def setdom(self, dom): """Set the domain id. @@ -323,16 +325,16 @@ def getName(self): return self.name - def getChannel(self): - return self.channel - def getStoreChannel(self): return self.store_channel - def update(self, info): + def getConsoleChannel(self): + return self.console_channel + + def update(self, info=None): """Update with info from xc.domain_getinfo(). """ - self.info = info + self.info = info or dom_get(self.id) self.memory = self.info['mem_kb'] / 1024 self.ssidref = self.info['ssidref'] self.target = self.info['mem_kb'] * 1024 @@ -384,6 +386,8 @@ return ctrl def createDevice(self, type, devconfig, change=False): + if self.recreate: + return if type == 'vbd': typedev = sxp.child_value(devconfig, 'dev') if re.match('^ioemu:', typedev): @@ -420,6 +424,15 @@ return if type == 'vif': + from xen.xend import XendRoot + xroot = XendRoot.instance() + + def _get_config_ipaddr(config): + val = [] + for ipaddr in sxp.children(config, elt='ip'): + val.append(sxp.child0(ipaddr)) + return val + backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0')) log.error(devconfig) @@ -427,6 +440,14 @@ devnum = self.netif_idx self.netif_idx += 1 + script = sxp.child_value(devconfig, 'script', + xroot.get_vif_script()) + script = os.path.join(xroot.network_script_dir, script) + bridge = sxp.child_value(devconfig, 'bridge', + xroot.get_vif_bridge()) + mac = sxp.child_value(devconfig, 'mac') + ipaddr = _get_config_ipaddr(devconfig) + # create backend db backdb = backdom.db.addChild("/backend/%s/%s/%d" % (type, self.uuid, devnum)) @@ -434,6 +455,12 @@ # create frontend db db = self.db.addChild("/device/%s/%d" % (type, devnum)) + backdb['script'] = script + backdb['domain'] = self.name + backdb['mac'] = mac + backdb['bridge'] = bridge + if ipaddr: + backdb['ip'] = ' '.join(ipaddr) backdb['frontend'] = db.getPath() backdb['frontend-id'] = "%i" % self.id backdb['handle'] = "%i" % devnum @@ -442,13 +469,37 @@ db['backend'] = backdb.getPath() db['backend-id'] = "%i" % backdom.id db['handle'] = "%i" % devnum - log.error(sxp.child_value(devconfig, 'mac')) - db['mac'] = sxp.child_value(devconfig, 'mac') + db['mac'] = mac db.saveDB(save=True) return + if type == 'vtpm': + backdom = domain_exists(sxp.child_value(devconfig, 'backend', '0')) + + devnum = int(sxp.child_value(devconfig, 'instance', '0')) + log.error("The domain has a TPM with instance %d." % devnum) + + # create backend db + backdb = backdom.db.addChild("/backend/%s/%s/%d" % + (type, self.uuid, devnum)) + # create frontend db + db = self.db.addChild("/device/%s/%d" % (type, devnum)) + + backdb['frontend'] = db.getPath() + backdb['frontend-id'] = "%i" % self.id + backdb['instance'] = sxp.child_value(devconfig, 'instance', '0') + backdb.saveDB(save=True) + + db['handle'] = "%i" % devnum + db['backend'] = backdb.getPath() + db['backend-id'] = "%i" % int(sxp.child_value(devconfig, + 'backend', '0')) + db.saveDB(save=True) + + return + ctrl = self.findDeviceController(type) return ctrl.createDevice(devconfig, recreate=self.recreate, change=change) @@ -512,12 +563,18 @@ sxpr.append(['up_time', str(up_time) ]) sxpr.append(['start_time', str(self.start_time) ]) - if self.channel: - sxpr.append(self.channel.sxpr()) if self.store_channel: sxpr.append(self.store_channel.sxpr()) if self.store_mfn: sxpr.append(['store_mfn', self.store_mfn]) + if self.console_channel: + sxpr.append(['console_channel', self.console_channel.sxpr()]) + if self.console_mfn: + sxpr.append(['console_mfn', self.console_mfn]) +# already in (devices) +# console = self.getConsole() +# if console: +# sxpr.append(console.sxpr()) if self.restart_count: sxpr.append(['restart_count', self.restart_count]) @@ -695,12 +752,6 @@ """ self.state = STATE_VM_TERMINATED self.release_devices() - if self.channel: - try: - self.channel.close() - self.channel = None - except: - pass if self.store_channel: try: self.store_channel.close() @@ -712,6 +763,13 @@ except Exception, ex: log.warning("error in domain release on xenstore: %s", ex) pass + if self.console_channel: + # notify processes using this cosole? + try: + self.console_channel.close() + self.console_channel = None + except: + pass if self.image: try: self.device_model_pid = 0 @@ -723,8 +781,8 @@ def destroy(self): """Clenup vm and destroy domain. """ + self.destroy_domain() self.cleanup() - self.destroy_domain() self.saveToDB() return 0 @@ -755,6 +813,11 @@ for dev in typedb.keys(): typedb[dev].delete() typedb.saveDB(save=True) + if type == 'vtpm': + typedb = ddb.addChild(type) + for dev in typedb.keys(): + typedb[dev].delete() + typedb.saveDB(save=True) def show(self): """Print virtual machine info. @@ -780,21 +843,7 @@ id, self.name, self.memory) self.setdom(id) - def openChannel(self, key, local, remote): - """Create a control channel to the domain. - If saved info is available recreate the channel. - - @param key db key for the saved data (if any) - @param local default local port - @param remote default remote port - """ - db = self.db.addChild(key) - chan = channelFactory().restoreFromDB(db, self.id, local, remote) - #todo: save here? - #chan.saveToDB(db) - return chan - - def eventChannel(self, key): + def eventChannelOld(self, key): """Create an event channel to the domain. If saved info is available recreate the channel. @@ -803,11 +852,27 @@ db = self.db.addChild(key) return EventChannel.restoreFromDB(db, 0, self.id) + def eventChannel(self, path=None, key=None): + """Create an event channel to the domain. + + @param path under which port is stored in db + """ + port = 0 + try: + if path and key: + if path: + db = self.db.addChild(path) + else: + db = self.db + port = int(db[key].getData()) + except: pass + return EventChannel.interdomain(0, self.id, port1=port, port2=0) + def create_channel(self): """Create the channels to the domain. """ - self.channel = self.openChannel("channel", 0, 1) - self.store_channel = self.eventChannel("store_channel") + self.store_channel = self.eventChannelOld("store_channel") + self.console_channel = self.eventChannel("console", "port") def create_configured_devices(self): devices = sxp.children(self.config, 'device') @@ -950,6 +1015,7 @@ """ try: + self.clear_shutdown() self.state = STATE_VM_OK self.shutdown_pending = None self.restart_check() @@ -993,6 +1059,8 @@ self.netif_backend = True elif name == 'usbif': self.usbif_backend = True + elif name == 'tpmif': + self.tpmif_backend = True else: raise VmError('invalid backend type:' + str(name)) @@ -1084,7 +1152,7 @@ def dom0_init_store(self): if not self.store_channel: - self.store_channel = self.eventChannel("store_channel") + self.store_channel = self.eventChannelOld("store_channel") self.store_mfn = xc.init_store(self.store_channel.port2) if self.store_mfn >= 0: self.db.introduceDomain(self.id, self.store_mfn, @@ -1158,6 +1226,10 @@ controller.addDevControllerClass("vif", netif.NetifController) add_device_handler("vif", "vif") +from server import tpmif +controller.addDevControllerClass("vtpm", tpmif.TPMifController) +add_device_handler("vtpm", "vtpm") + from server import pciif controller.addDevControllerClass("pci", pciif.PciController) add_device_handler("pci", "pci") diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/XendNode.py --- a/tools/python/xen/xend/XendNode.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/XendNode.py Fri Sep 9 16:30:54 2005 @@ -46,7 +46,7 @@ return self.xc.bvtsched_global_get() def info(self): - return self.nodeinfo() + self.physinfo() + return self.nodeinfo() + self.physinfo() + self.xeninfo() def nodeinfo(self): (sys, host, rel, ver, mch) = os.uname() @@ -58,14 +58,29 @@ def physinfo(self): pinfo = self.xc.physinfo() - info = [['cores_per_socket', pinfo['cores_per_socket']], + info = [['nr_cpus', pinfo['nr_nodes']*pinfo['sockets_per_node']*pinfo['cores_per_socket']*pinfo['threads_per_core']], + ['nr_nodes', pinfo['nr_nodes']], + ['sockets_per_node', pinfo['sockets_per_node']], + ['cores_per_socket', pinfo['cores_per_socket']], ['threads_per_core', pinfo['threads_per_core']], - ['cpu_mhz', pinfo['cpu_khz']/1000], - ['memory', pinfo['total_pages']/256], - ['free_memory', pinfo['free_pages']/256]] + ['cpu_mhz', pinfo['cpu_khz']/1000], + ['hw_caps', pinfo['hw_caps']], + ['memory', pinfo['total_pages']/256], + ['free_memory', pinfo['free_pages']/256]] return info - + def xeninfo(self): + xinfo = self.xc.xeninfo() + return [['xen_major', xinfo['xen_major']], + ['xen_minor', xinfo['xen_minor']], + ['xen_extra', xinfo['xen_extra']], + ['xen_caps', xinfo['xen_caps']], + ['xen_params',xinfo['xen_params']], + ['xen_changeset', xinfo['xen_changeset']], + ['cc_compiler', xinfo['cc_compiler']], + ['cc_compile_by', xinfo['cc_compile_by']], + ['cc_compile_domain', xinfo['cc_compile_domain']], + ['cc_compile_date', xinfo['cc_compile_date']]] def instance(): global inst diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/image.py Fri Sep 9 16:30:54 2005 @@ -32,6 +32,9 @@ """Flag for a net device backend domain.""" SIF_NET_BE_DOMAIN = (1<<5) +"""Flag for a TPM device backend domain.""" +SIF_TPM_BE_DOMAIN = (1<<7) + class ImageHandler: """Abstract base class for image handlers. @@ -156,7 +159,12 @@ xc.domain_setmaxmem(dom, mem_kb) try: - xc.domain_memory_increase_reservation(dom, mem_kb) + # Give the domain some memory below 4GB + lmem_kb = 0 + if lmem_kb > 0: + xc.domain_memory_increase_reservation(dom, min(lmem_kb,mem_kb), 0, 32) + if mem_kb > lmem_kb: + xc.domain_memory_increase_reservation(dom, mem_kb-lmem_kb, 0, 0) except: xc.domain_destroy(dom) raise @@ -194,6 +202,7 @@ self.flags = 0 if self.vm.netif_backend: self.flags |= SIF_NET_BE_DOMAIN if self.vm.blkif_backend: self.flags |= SIF_BLK_BE_DOMAIN + if self.vm.tpmif_backend: self.flags |= SIF_TPM_BE_DOMAIN if self.vm.recreate or self.vm.restore: return @@ -238,16 +247,31 @@ store_evtchn = self.vm.store_channel.port2 else: store_evtchn = 0 + if self.vm.console_channel: + console_evtchn = self.vm.console_channel.port2 + else: + console_evtchn = 0 + + log.debug("dom = %d", self.vm.getDomain()) + log.debug("image = %s", self.kernel) + log.debug("store_evtchn = %d", store_evtchn) + log.debug("console_evtchn = %d", console_evtchn) + log.debug("cmdline = %s", self.cmdline) + log.debug("ramdisk = %s", self.ramdisk) + log.debug("flags = %d", self.flags) + log.debug("vcpus = %d", self.vm.vcpus) + ret = xc.linux_build(dom = self.vm.getDomain(), image = self.kernel, - control_evtchn = self.vm.channel.getRemotePort(), store_evtchn = store_evtchn, + console_evtchn = console_evtchn, cmdline = self.cmdline, ramdisk = self.ramdisk, flags = self.flags, vcpus = self.vm.vcpus) if isinstance(ret, dict): self.vm.store_mfn = ret.get('store_mfn') + self.vm.console_mfn = ret.get('console_mfn') return 0 return ret @@ -349,6 +373,11 @@ mac = sxp.child_value(vifinfo, 'mac') ret.append("-macaddr") ret.append("%s" % mac) + if name == 'vtpm': + vtpminfo = sxp.child(device, 'vtpm') + instance = sxp.child_value(vtpminfo, 'instance') + ret.append("-instance") + ret.append("%s" % instance) # Handle graphics library related options vnc = sxp.child_value(self.vm.config, 'vnc') diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/SrvDaemon.py --- a/tools/python/xen/xend/server/SrvDaemon.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/SrvDaemon.py Fri Sep 9 16:30:54 2005 @@ -17,8 +17,6 @@ import time import glob -from xen.lowlevel import xu - from xen.xend import sxp from xen.xend import PrettyPrint from xen.xend import EventServer; eserver = EventServer.instance() @@ -27,7 +25,6 @@ from xen.xend.XendLogging import log from xen.xend import XendRoot; xroot = XendRoot.instance() -import channel import controller import event import relocate @@ -37,12 +34,12 @@ """The xend daemon. """ def __init__(self): - self.channelF = None self.shutdown = 0 self.traceon = 0 self.tracefile = None self.traceindent = 0 - + self.child = 0 + def daemon_pids(self): pids = [] pidex = '(?P<pid>\d+)' @@ -140,15 +137,12 @@ else: return 0 - def install_child_reaper(self): - #signal.signal(signal.SIGCHLD, self.onSIGCHLD) - # Ensure that zombie children are automatically reaped. - xu.autoreap() - def onSIGCHLD(self, signum, frame): - code = 1 - while code > 0: - code = os.waitpid(-1, os.WNOHANG) + if self.child > 0: + try: + pid, sts = os.waitpid(self.child, os.WNOHANG) + except os.error, ex: + pass def fork_pid(self, pidfile): """Fork and write the pid of the child to 'pidfile'. @@ -156,13 +150,16 @@ @param pidfile: pid file @return: pid of child in parent, 0 in child """ - pid = os.fork() - if pid: + + self.child = os.fork() + + if self.child: # Parent pidfile = open(pidfile, 'w') - pidfile.write(str(pid)) + pidfile.write(str(self.child)) pidfile.close() - return pid + + return self.child def daemonize(self): if not XEND_DAEMONIZE: return @@ -203,8 +200,7 @@ # Trying to run an already-running service is a success. return 0 - self.install_child_reaper() - + signal.signal(signal.SIGCHLD, self.onSIGCHLD) if self.fork_pid(XEND_PID_FILE): #Parent. Sleep to give child time to start. time.sleep(1) @@ -298,10 +294,8 @@ _enforce_dom0_cpus() try: log.info("Xend Daemon started") - self.createFactories() event.listenEvent(self) relocate.listenRelocation() - self.listenChannels() servers = SrvServer.create() self.daemonize() servers.start() @@ -309,22 +303,10 @@ print >>sys.stderr, 'Exception starting xend:', ex if XEND_DEBUG: traceback.print_exc() - log.exception("Exception starting xend") + log.exception("Exception starting xend (%s)" % ex) self.exit(1) - def createFactories(self): - self.channelF = channel.channelFactory() - - def listenChannels(self): - def virqReceived(virq): - eserver.inject('xend.virq', virq) - - self.channelF.setVirqHandler(virqReceived) - self.channelF.start() - def exit(self, rc=0): - if self.channelF: - self.channelF.stop() # Calling sys.exit() raises a SystemExit exception, which only # kills the current thread. Calling os._exit() makes the whole # Python process exit immediately. There doesn't seem to be another diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/SrvDomain.py --- a/tools/python/xen/xend/server/SrvDomain.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/SrvDomain.py Fri Sep 9 16:30:54 2005 @@ -221,6 +221,7 @@ # # if op and op[0] in ['vifs', 'vif', 'vbds', 'vbd', 'mem_target_set']: # return self.perform(req) + self.dom.update() if self.use_sxp(req): req.setHeader("Content-Type", sxp.mime_type) sxp.show(self.dom.sxpr(), out=req) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/SrvDomainDir.py --- a/tools/python/xen/xend/server/SrvDomainDir.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/SrvDomainDir.py Fri Sep 9 16:30:54 2005 @@ -146,11 +146,10 @@ if not url.endswith('/'): url += '/' if use_sxp: - domains = self.xd.domain_ls() + domains = self.xd.list_names() sxp.show(domains, out=req) else: - domains = self.xd.list() - domains.sort(lambda x, y: cmp(x.name, y.name)) + domains = self.xd.list_sorted() req.write('<ul>') for d in domains: req.write('<li><a href="%s%s"> Domain %s</a>' diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/SrvServer.py --- a/tools/python/xen/xend/server/SrvServer.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/SrvServer.py Fri Sep 9 16:30:54 2005 @@ -61,9 +61,14 @@ def start(self): Vifctl.network('start') + threads = [] for server in self.servers: thread = Thread(target=server.run) thread.start() + threads.append(thread) + + for t in threads: + t.join() def create(): root = SrvDir() diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/blkif.py Fri Sep 9 16:30:54 2005 @@ -27,9 +27,7 @@ from xen.xend import Blkctl from xen.xend.xenstore import DBVar -from xen.xend.server import channel -from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController -from xen.xend.server.messages import * +from xen.xend.server.controller import Dev, DevController class BlkifBackend: """ Handler for the 'back-end' channel to a block device driver domain @@ -41,21 +39,15 @@ self.controller = controller self.id = id self.frontendDomain = self.controller.getDomain() - self.frontendChannel = None self.backendDomain = dom - self.backendChannel = None self.destroyed = False self.connected = False - self.evtchn = None self.status = None def init(self, recreate=False, reboot=False): self.destroyed = False self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED self.frontendDomain = self.controller.getDomain() - self.frontendChannel = self.controller.getChannel() - cf = channel.channelFactory() - self.backendChannel = cf.openChannel(self.backendDomain) def __str__(self): return ('<BlkifBackend frontend=%d backend=%d id=%d>' @@ -66,29 +58,6 @@ def getId(self): return self.id - def getEvtchn(self): - return self.evtchn - - def closeEvtchn(self): - if self.evtchn: - channel.eventChannelClose(self.evtchn) - self.evtchn = None - - def openEvtchn(self): - self.evtchn = channel.eventChannel(self.backendDomain, self.frontendDomain) - - def getEventChannelBackend(self): - val = 0 - if self.evtchn: - val = self.evtchn['port1'] - return val - - def getEventChannelFrontend(self): - val = 0 - if self.evtchn: - val = self.evtchn['port2'] - return val - def connect(self, recreate=False): """Connect to the blkif control interface. @@ -98,83 +67,19 @@ if recreate or self.connected: self.connected = True pass - else: - self.send_be_create() - - def send_be_create(self): - log.debug("send_be_create %s", str(self)) - msg = packMsg('blkif_be_create_t', - { 'domid' : self.frontendDomain, - 'blkif_handle' : self.id }) - msg = self.backendChannel.requestResponse(msg) - #todo: check return status - self.connected = True - + def destroy(self, change=False, reboot=False): """Disconnect from the blkif control interface and destroy it. """ - self.send_be_disconnect() - self.send_be_destroy() - self.closeEvtchn() self.destroyed = True # For change true need to notify front-end, or back-end will do it? - def send_be_disconnect(self): - msg = packMsg('blkif_be_disconnect_t', - { 'domid' : self.frontendDomain, - 'blkif_handle' : self.id }) - self.backendChannel.requestResponse(msg) - #todo: check return status - self.connected = False - - def send_be_destroy(self): - msg = packMsg('blkif_be_destroy_t', - { 'domid' : self.frontendDomain, - 'blkif_handle' : self.id }) - self.backendChannel.requestResponse(msg) - #todo: check return status - def connectInterface(self, val): - self.openEvtchn() - log.debug("Connecting blkif to event channel %s ports=%d:%d", - str(self), self.evtchn['port1'], self.evtchn['port2']) - msg = packMsg('blkif_be_connect_t', - { 'domid' : self.frontendDomain, - 'blkif_handle' : self.id, - 'evtchn' : self.getEventChannelBackend(), - 'shmem_frame' : val['shmem_frame'], - 'shmem_ref' : val['shmem_ref'] }) - msg = self.backendChannel.requestResponse(msg) - #todo: check return status - val = unpackMsg('blkif_be_connect_t', msg) self.status = BLKIF_INTERFACE_STATUS_CONNECTED - self.send_fe_interface_status() - def send_fe_interface_status(self): - msg = packMsg('blkif_fe_interface_status_t', - { 'handle' : self.id, - 'status' : self.status, - 'domid' : self.backendDomain, - 'evtchn' : self.getEventChannelFrontend() }) - self.frontendChannel.writeRequest(msg) - def interfaceDisconnected(self): self.status = BLKIF_INTERFACE_STATUS_DISCONNECTED - #todo?: Close evtchn: - #self.closeEvtchn() - self.send_fe_interface_status() - - def interfaceChanged(self): - """Notify the front-end that devices have been added or removed. - The front-end should then probe for devices. - """ - msg = packMsg('blkif_fe_interface_status_t', - { 'handle' : self.id, - 'status' : BLKIF_INTERFACE_STATUS_CHANGED, - 'domid' : self.backendDomain, - 'evtchn' : 0 }) - self.frontendChannel.writeRequest(msg) - + class BlkDev(Dev): """Info record for a block device. """ @@ -207,24 +112,17 @@ self.nr_sectors = None self.frontendDomain = self.getDomain() - self.frontendChannel = None self.backendDomain = None - self.backendChannel = None self.backendId = 0 self.configure(self.config, recreate=recreate) def exportToDB(self, save=False): Dev.exportToDB(self, save=save) backend = self.getBackend() - if backend and backend.evtchn: - db = self.db.addChild("evtchn") - backend.evtchn.saveToDB(db, save=save) def init(self, recreate=False, reboot=False): self.frontendDomain = self.getDomain() - self.frontendChannel = self.getChannel() backend = self.getBackend() - self.backendChannel = backend.backendChannel self.backendId = backend.id def configure(self, config, change=False, recreate=False): @@ -351,7 +249,6 @@ self.destroyed = True log.debug("Destroying vbd domain=%d id=%s", self.frontendDomain, self.id) - self.send_be_vbd_destroy() if change: self.interfaceChanged() self.unbind() @@ -367,30 +264,6 @@ """ self.getBackend().connect() - self.send_be_vbd_create() - - def send_be_vbd_create(self): - msg = packMsg('blkif_be_vbd_create_t', - { 'domid' : self.frontendDomain, - 'blkif_handle' : self.backendId, - 'pdevice' : self.device, - 'dev_handle' : self.dev_handle, - 'vdevice' : self.vdev, - 'readonly' : self.readonly() }) - msg = self.backendChannel.requestResponse(msg) - - val = unpackMsg('blkif_be_vbd_create_t', msg) - status = val['status'] - if status != BLKIF_BE_STATUS_OKAY: - raise XendError("Creating vbd failed: device %s, error %d" - % (sxp.to_string(self.config), status)) - - def send_be_vbd_destroy(self): - msg = packMsg('blkif_be_vbd_destroy_t', - { 'domid' : self.frontendDomain, - 'blkif_handle' : self.backendId, - 'vdevice' : self.vdev }) - return self.backendChannel.writeRequest(msg) class BlkifController(DevController): """Block device interface controller. Handles all block devices @@ -403,19 +276,9 @@ DevController.__init__(self, vm, recreate=recreate) self.backends = {} self.backendId = 0 - self.rcvr = None def initController(self, recreate=False, reboot=False): self.destroyed = False - # Add our handlers for incoming requests. - self.rcvr = CtrlMsgRcvr(self.getChannel()) - self.rcvr.addHandler(CMSG_BLKIF_FE, - CMSG_BLKIF_FE_DRIVER_STATUS, - self.recv_fe_driver_status) - self.rcvr.addHandler(CMSG_BLKIF_FE, - CMSG_BLKIF_FE_INTERFACE_CONNECT, - self.recv_fe_interface_connect) - self.rcvr.registerChannel() if reboot: self.rebootBackends() self.rebootDevices() @@ -465,26 +328,7 @@ log.debug("Destroying blkif domain=%d", self.getDomain()) self.destroyDevices(reboot=reboot) self.destroyBackends(reboot=reboot) - self.rcvr.deregisterChannel() def destroyBackends(self, reboot=False): for backend in self.backends.values(): backend.destroy(reboot=reboot) - - def recv_fe_driver_status(self, msg): - val = unpackMsg('blkif_fe_driver_status_t', msg) - for backend in self.backends.values(): - backend.interfaceDisconnected() - - def recv_fe_interface_connect(self, msg): - val = unpackMsg('blkif_fe_interface_connect_t', msg) - id = val['handle'] - backend = self.getBackendById(id) - if backend: - try: - backend.connectInterface(val) - except IOError, ex: - log.error("Exception connecting backend: %s", ex) - else: - log.error('interface connect on unknown interface: id=%d', id) - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/channel.py --- a/tools/python/xen/xend/server/channel.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/channel.py Fri Sep 9 16:30:54 2005 @@ -19,11 +19,8 @@ import select import xen.lowlevel.xc; xc = xen.lowlevel.xc.new() -from xen.lowlevel import xu from xen.xend.XendLogging import log - -from messages import * DEBUG = 0 @@ -132,501 +129,3 @@ """ if not evtchn: return evtchn.close() - -class ChannelFactory: - """Factory for creating control channels. - Maintains a table of channels. - """ - - """ Channels indexed by index. """ - channels = None - - thread = None - - notifier = None - - """Map of ports to the virq they signal.""" - virqPorts = None - - def __init__(self): - """Constructor - do not use. Use the channelFactory function.""" - self.channels = {} - self.virqPorts = {} - self.notifier = xu.notifier() - # Register interest in virqs. - self.bind_virq(xen.lowlevel.xc.VIRQ_DOM_EXC) - self.virqHandler = None - - def bind_virq(self, virq): - port = self.notifier.bind_virq(virq) - self.virqPorts[port] = virq - log.info("Virq %s on port %s", virq, port) - - def start(self): - """Fork a thread to read messages. - """ - if self.thread: return - self.thread = threading.Thread(name="ChannelFactory", - target=self.main) - self.thread.setDaemon(True) - self.thread.start() - - def stop(self): - """Signal the thread to stop. - """ - self.thread = None - - def main(self): - """Main routine for the thread. - Reads the notifier and dispatches to channels. - """ - while True: - if self.thread == None: return - port = self.notifier.read() - if port: - virq = self.virqPorts.get(port) - if virq is not None: - self.virqReceived(virq) - else: - self.msgReceived(port) - else: - select.select([self.notifier], [], [], 1.0) - - def msgReceived(self, port): - # We run the message handlers in their own threads. - # Note we use keyword args to lambda to save the values - - # otherwise lambda will use the variables, which will get - # assigned by the loop and the lambda will get the changed values. - received = 0 - for chan in self.channels.values(): - if self.thread == None: return - msg = chan.readResponse() - if msg: - received += 1 - chan.responseReceived(msg) - for chan in self.channels.values(): - if self.thread == None: return - msg = chan.readRequest() - if msg: - received += 1 - self.runInThread(lambda chan=chan, msg=msg: chan.requestReceived(msg)) - if port and received == 0: - log.warning("Port %s notified, but no messages found", port) - - def runInThread(self, thunk): - thread = threading.Thread(target = thunk) - thread.setDaemon(True) - thread.start() - - def setVirqHandler(self, virqHandler): - self.virqHandler = virqHandler - - def virqReceived(self, virq): - if DEBUG: - print 'virqReceived>', virq - if not self.virqHandler: return - self.runInThread(lambda virq=virq: self.virqHandler(virq)) - - def newChannel(self, dom, local_port, remote_port): - """Create a new channel. - """ - return self.addChannel(Channel(self, dom, local_port, remote_port)) - - def addChannel(self, channel): - """Add a channel. - """ - self.channels[channel.getKey()] = channel - return channel - - def delChannel(self, channel): - """Remove the channel. - """ - key = channel.getKey() - if key in self.channels: - del self.channels[key] - - def getChannel(self, dom, local_port, remote_port): - """Get the channel with the given domain and ports (if any). - """ - key = (dom, local_port, remote_port) - return self.channels.get(key) - - def findChannel(self, dom, local_port=0, remote_port=0): - """Find a channel. Ports given as zero are wildcards. - - dom domain - - returns channel - """ - chan = self.getChannel(dom, local_port, remote_port) - if chan: return chan - if local_port and remote_port: - return None - for c in self.channels.values(): - if c.dom != dom: continue - if local_port and local_port != c.getLocalPort(): continue - if remote_port and remote_port != c.getRemotePort(): continue - return c - return None - - def openChannel(self, dom, local_port=0, remote_port=0): - chan = self.findChannel(dom, local_port=local_port, - remote_port=remote_port) - if chan: - return chan - chan = self.newChannel(dom, local_port, remote_port) - return chan - - - def createPort(self, dom, local_port=0, remote_port=0): - """Create a port for a channel to the given domain. - If only the domain is specified, a new channel with new port ids is - created. If one port id is specified and the given port id is in use, - the other port id is filled. If one port id is specified and the - given port id is not in use, a new channel is created with one port - id equal to the given id and a new id for the other end. If both - port ids are specified, a port is reconnected using the given port - ids. - - @param dom: domain - @param local: local port id to use - @type local: int - @param remote: remote port id to use - @type remote: int - @return: port object - """ - return xu.port(dom, local_port=local_port, remote_port=remote_port) - - def restoreFromDB(self, db, dom, local, remote): - """Create a channel using ports restored from the db (if available). - Otherwise use the given ports. This is the inverse operation to - saveToDB() on a channel. - - @param db db - @param dom domain the channel connects to - @param local default local port - @param remote default remote port - """ - try: - local_port = int(db['local_port']) - except: - local_port = local - try: - remote_port = int(db['remote_port']) - except: - remote_port = remote - try: - chan = self.openChannel(dom, local_port, remote_port) - except: - return None - return chan - -def channelFactory(): - """Singleton constructor for the channel factory. - Use this instead of the class constructor. - """ - global inst - try: - inst - except: - inst = ChannelFactory() - return inst - -class Channel: - """Control channel to a domain. - Maintains a list of device handlers to dispatch requests to, based - on the request type. - """ - - def __init__(self, factory, dom, local_port, remote_port): - self.factory = factory - self.dom = int(dom) - # Registered device handlers. - self.devs = [] - # Handlers indexed by the message types they handle. - self.devs_by_type = {} - self.port = self.factory.createPort(self.dom, - local_port=local_port, - remote_port=remote_port) - self.closed = False - # Queue of waiters for responses to requests. - self.queue = ResponseQueue(self) - # Make sure the port will deliver all the messages. - self.port.register(TYPE_WILDCARD) - - def saveToDB(self, db, save=False): - """Save the channel ports to the db so the channel can be restored later, - using restoreFromDB() on the factory. - - @param db db - """ - if self.closed: return - db['local_port'] = str(self.getLocalPort()) - db['remote_port'] = str(self.getRemotePort()) - db.saveDB(save=save) - - def getKey(self): - """Get the channel key. - """ - return (self.dom, self.getLocalPort(), self.getRemotePort()) - - def sxpr(self): - val = ['channel'] - val.append(['domain', self.dom]) - if self.port: - val.append(['local_port', self.port.local_port]) - val.append(['remote_port', self.port.remote_port]) - return val - - def close(self): - """Close the channel. - """ - if DEBUG: - print 'Channel>close>', self - if self.closed: return - self.closed = True - self.factory.delChannel(self) - for d in self.devs[:]: - d.lostChannel(self) - self.devs = [] - self.devs_by_type = {} - if self.port: - self.port.close() - #self.port = None - - def getDomain(self): - return self.dom - - def getLocalPort(self): - """Get the local port. - - @return: local port - @rtype: int - """ - if self.closed: return -1 - return self.port.local_port - - def getRemotePort(self): - """Get the remote port. - - @return: remote port - @rtype: int - """ - if self.closed: return -1 - return self.port.remote_port - - def __repr__(self): - return ('<Channel dom=%d ports=%d:%d>' - % (self.dom, - self.getLocalPort(), - self.getRemotePort())) - - - def registerDevice(self, types, dev): - """Register a device message handler. - - @param types: message types handled - @type types: array of ints - @param dev: device handler - """ - if self.closed: return - self.devs.append(dev) - for ty in types: - self.devs_by_type[ty] = dev - - def deregisterDevice(self, dev): - """Remove the registration for a device handler. - - @param dev: device handler - """ - if dev in self.devs: - self.devs.remove(dev) - types = [ ty for (ty, d) in self.devs_by_type.items() if d == dev ] - for ty in types: - del self.devs_by_type[ty] - - def getDevice(self, type): - """Get the handler for a message type. - - @param type: message type - @type type: int - @return: controller or None - @rtype: device handler - """ - return self.devs_by_type.get(type) - - def requestReceived(self, msg): - """A request has been received on the channel. - Disptach it to the device handlers. - Called from the channel factory thread. - """ - if DEBUG: - print 'Channel>requestReceived>', self, - printMsg(msg) - (ty, subty) = getMessageType(msg) - responded = False - dev = self.getDevice(ty) - if dev: - responded = dev.requestReceived(msg, ty, subty) - elif DEBUG: - print "Channel>requestReceived> No device handler", self, - printMsg(msg) - else: - pass - if not responded: - self.writeResponse(msg) - - def writeRequest(self, msg): - """Write a request to the channel. - """ - if DEBUG: - print 'Channel>writeRequest>', self, - printMsg(msg, all=True) - if self.closed: return -1 - self.port.write_request(msg) - return 1 - - def writeResponse(self, msg): - """Write a response to the channel. - """ - if DEBUG: - print 'Channel>writeResponse>', self, - printMsg(msg, all=True) - if self.port: - self.port.write_response(msg) - return 1 - - def readRequest(self): - """Read a request from the channel. - Called internally. - """ - if self.closed: - val = None - else: - val = self.port.read_request() - return val - - def readResponse(self): - """Read a response from the channel. - Called internally. - """ - if self.closed: - val = None - else: - val = self.port.read_response() - if DEBUG and val: - print 'Channel>readResponse>', self, - printMsg(val, all=True) - return val - - def requestResponse(self, msg, timeout=None): - """Write a request and wait for a response. - Raises IOError on timeout. - - @param msg request message - @param timeout timeout (0 is forever) - @return response message - """ - if self.closed: - raise IOError("closed") - if self.closed: - return None - if timeout is None: - timeout = RESPONSE_TIMEOUT - elif timeout <= 0: - timeout = None - return self.queue.call(msg, timeout) - - def responseReceived(self, msg): - """A response has been received, look for a waiter to - give it to. - Called internally. - """ - if DEBUG: - print 'Channel>responseReceived>', self, - printMsg(msg) - self.queue.response(getMessageId(msg), msg) - - def virq(self): - self.factory.virq() - -class Response: - """Entry in the response queue. - Used to signal a response to a message. - """ - - def __init__(self, mid): - self.mid = mid - self.msg = None - self.ready = threading.Event() - - def response(self, msg): - """Signal arrival of a response to a waiting thread. - Passing msg None cancels the wait with an IOError. - """ - if msg: - self.msg = msg - else: - self.mid = -1 - self.ready.set() - - def wait(self, timeout): - """Wait up to 'timeout' seconds for a response. - Returns the response or raises an IOError. - """ - self.ready.wait(timeout) - if self.mid < 0: - raise IOError("wait canceled") - if self.msg is None: - raise IOError("response timeout") - return self.msg - -class ResponseQueue: - """Response queue. Manages waiters for responses to messages. - """ - - def __init__(self, channel): - self.channel = channel - self.lock = threading.Lock() - self.responses = {} - - def add(self, mid): - r = Response(mid) - self.responses[mid] = r - return r - - def get(self, mid): - return self.responses.get(mid) - - def remove(self, mid): - r = self.responses.get(mid) - if r: - del self.responses[mid] - return r - - def response(self, mid, msg): - """Process a response - signals any waiter that a response - has arrived. - """ - try: - self.lock.acquire() - r = self.remove(mid) - finally: - self.lock.release() - if r: - r.response(msg) - - def call(self, msg, timeout): - """Send the message and wait for 'timeout' seconds for a response. - Returns the response. - Raises IOError on timeout. - """ - mid = getMessageId(msg) - try: - self.lock.acquire() - r = self.add(mid) - finally: - self.lock.release() - self.channel.writeRequest(msg) - return r.wait(timeout) - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/controller.py --- a/tools/python/xen/xend/server/controller.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/controller.py Fri Sep 9 16:30:54 2005 @@ -21,106 +21,8 @@ from xen.xend.XendError import XendError from xen.xend.xenstore import DBVar -from xen.xend.server.messages import msgTypeName, printMsg, getMessageType DEBUG = 0 - -class CtrlMsgRcvr: - """Utility class to dispatch messages on a control channel. - Once I{registerChannel} has been called, our message types are registered - with the channel. The channel will call I{requestReceived} - when a request arrives if it has one of our message types. - - @ivar channel: channel to a domain - @type channel: Channel - @ivar majorTypes: major message types we are interested in - @type majorTypes: {int:{int:method}} - - """ - - def __init__(self, channel): - self.majorTypes = {} - self.channel = channel - - def getHandler(self, type, subtype): - """Get the method for a type and subtype. - - @param type: major message type - @param subtype: minor message type - @return: method or None - """ - method = None - subtypes = self.majorTypes.get(type) - if subtypes: - method = subtypes.get(subtype) - return method - - def addHandler(self, type, subtype, method): - """Add a method to handle a message type and subtype. - - @param type: major message type - @param subtype: minor message type - @param method: method - """ - subtypes = self.majorTypes.get(type) - if not subtypes: - subtypes = {} - self.majorTypes[type] = subtypes - subtypes[subtype] = method - - def getMajorTypes(self): - """Get the list of major message types handled. - """ - return self.majorTypes.keys() - - def requestReceived(self, msg, type, subtype): - """Dispatch a request message to handlers. - Called by the channel for requests with one of our types. - - @param msg: message - @type msg: xu message - @param type: major message type - @type type: int - @param subtype: minor message type - @type subtype: int - """ - if DEBUG: - print 'requestReceived>', - printMsg(msg, all=True) - responded = 0 - method = self.getHandler(type, subtype) - if method: - responded = method(msg) - elif DEBUG: - print ('requestReceived> No handler: Message type %s %d:%d' - % (msgTypeName(type, subtype), type, subtype)), self - return responded - - - def lostChannel(self): - """Called when the channel to the domain is lost. - """ - if DEBUG: - print 'CtrlMsgRcvr>lostChannel>', - self.channel = None - - def registerChannel(self): - """Register interest in our major message types with the - channel to our domain. Once we have registered, the channel - will call requestReceived for our messages. - """ - if DEBUG: - print 'CtrlMsgRcvr>registerChannel>', self.channel, self.getMajorTypes() - if self.channel: - self.channel.registerDevice(self.getMajorTypes(), self) - - def deregisterChannel(self): - """Deregister interest in our major message types with the - channel to our domain. After this the channel won't call - us any more. - """ - if self.channel: - self.channel.deregisterDevice(self) class DevControllerTable: """Table of device controller classes, indexed by type name. @@ -232,10 +134,6 @@ def getDomainName(self): return self.vm.getName() - def getChannel(self): - chan = self.vm.getChannel() - return chan - def getDomainInfo(self): return self.vm @@ -433,9 +331,6 @@ def getDomainName(self): return self.controller.getDomainName() - def getChannel(self): - return self.controller.getChannel() - def getDomainInfo(self): return self.controller.getDomainInfo() diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/event.py --- a/tools/python/xen/xend/server/event.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/event.py Fri Sep 9 16:30:54 2005 @@ -174,11 +174,6 @@ else: logging.removeLogStderr() - def op_debug_msg(self, name, v): - mode = v[1] - import messages - messages.DEBUG = (mode == 'on') - def op_debug_controller(self, name, v): mode = v[1] import controller @@ -186,7 +181,7 @@ def op_domain_ls(self, name, v): xd = xroot.get_component("xen.xend.XendDomain") - return xd.domain_ls() + return xd.list_names() def op_domain_configure(self, name, v): domid = sxp.child_value(v, "dom") diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/netif.py Fri Sep 9 16:30:54 2005 @@ -30,9 +30,7 @@ from xen.xend.XendRoot import get_component from xen.xend.xenstore import DBVar -from xen.xend.server import channel -from xen.xend.server.controller import CtrlMsgRcvr, Dev, DevController -from xen.xend.server.messages import * +from xen.xend.server.controller import Dev, DevController class NetDev(Dev): """A network device. @@ -90,12 +88,9 @@ def __init__(self, controller, id, config, recreate=False): Dev.__init__(self, controller, id, config, recreate=recreate) self.vif = int(self.id) - self.evtchn = None self.status = None self.frontendDomain = self.getDomain() - self.frontendChannel = None self.backendDomain = None - self.backendChannel = None self.credit = None self.period = None self.mac = None @@ -109,17 +104,11 @@ def exportToDB(self, save=False): Dev.exportToDB(self, save=save) - if self.evtchn: - db = self.db.addChild("evtchn") - self.evtchn.saveToDB(db, save=save) def init(self, recreate=False, reboot=False): self.destroyed = False self.status = NETIF_INTERFACE_STATUS_DISCONNECTED self.frontendDomain = self.getDomain() - self.frontendChannel = self.getChannel() - cf = channel.channelFactory() - self.backendChannel = cf.openChannel(self.backendDomain) def _get_config_mac(self, config): vmac = sxp.child_value(config, 'mac') @@ -287,10 +276,6 @@ val.append(['credit', self.credit]) if self.period: val.append(['period', self.period]) - if self.evtchn: - val.append(['evtchn', - self.evtchn['port1'], - self.evtchn['port2']]) return val def get_vifname(self): @@ -348,42 +333,11 @@ if recreate: pass else: - self.send_be_create() if self.credit and self.period: - self.send_be_creditlimit(self.credit, self.period) + #self.send_be_creditlimit(self.credit, self.period) + pass self.vifctl('up', vmname=self.getDomainName()) - def closeEvtchn(self): - if self.evtchn: - channel.eventChannelClose(self.evtchn) - self.evtchn = None - - def openEvtchn(self): - self.evtchn = channel.eventChannel(self.backendDomain, self.frontendDomain) - - def getEventChannelBackend(self): - val = 0 - if self.evtchn: - val = self.evtchn['port1'] - return val - - def getEventChannelFrontend(self): - val = 0 - if self.evtchn: - val = self.evtchn['port2'] - return val - - def send_be_create(self): - msg = packMsg('netif_be_create_t', - { 'domid' : self.frontendDomain, - 'netif_handle' : self.vif, - 'be_mac' : self.be_mac or [0, 0, 0, 0, 0, 0], - 'mac' : self.mac, - #'vifname' : self.vifname - }) - msg = self.backendChannel.requestResponse(msg) - # todo: check return status - def destroy(self, change=False, reboot=False): """Destroy the device's resources and disconnect from the back-end device controller. If 'change' is true notify the front-end interface. @@ -393,47 +347,14 @@ self.destroyed = True self.status = NETIF_INTERFACE_STATUS_CLOSED log.debug("Destroying vif domain=%d vif=%d", self.frontendDomain, self.vif) - self.closeEvtchn() self.vifctl('down') - self.send_be_disconnect() - self.send_be_destroy() if change: self.reportStatus() - - def send_be_disconnect(self): - msg = packMsg('netif_be_disconnect_t', - { 'domid' : self.frontendDomain, - 'netif_handle' : self.vif }) - self.backendChannel.requestResponse(msg) - #todo: check return status - - def send_be_destroy(self, response=None): - msg = packMsg('netif_be_destroy_t', - { 'domid' : self.frontendDomain, - 'netif_handle' : self.vif }) - self.backendChannel.requestResponse(msg) - #todo: check return status - - def recv_fe_interface_connect(self, val): - self.openEvtchn() - msg = packMsg('netif_be_connect_t', - { 'domid' : self.frontendDomain, - 'netif_handle' : self.vif, - 'evtchn' : self.getEventChannelBackend(), - 'tx_shmem_frame' : val['tx_shmem_frame'], - 'tx_shmem_ref' : val['tx_shmem_ref'], - 'rx_shmem_frame' : val['rx_shmem_frame'], - 'rx_shmem_ref' : val['rx_shmem_ref'] }) - msg = self.backendChannel.requestResponse(msg) - #todo: check return status - self.status = NETIF_INTERFACE_STATUS_CONNECTED - self.reportStatus() def setCreditLimit(self, credit, period): #todo: these params should be in sxpr and vif config. self.credit = credit self.period = period - self.send_be_creditlimit(credit, period) def getCredit(self): return self.credit @@ -441,31 +362,10 @@ def getPeriod(self): return self.period - def send_be_creditlimit(self, credit, period): - msg = packMsg('netif_be_creditlimit_t', - { 'domid' : self.frontendDomain, - 'netif_handle' : self.vif, - 'credit_bytes' : credit, - 'period_usec' : period }) - msg = self.backendChannel.requestResponse(msg) - # todo: check return status - - def reportStatus(self, resp=False): - msg = packMsg('netif_fe_interface_status_t', - { 'handle' : self.vif, - 'status' : self.status, - 'evtchn' : self.getEventChannelFrontend(), - 'domid' : self.backendDomain, - 'mac' : self.mac }) - if resp: - self.frontendChannel.writeResponse(msg) - else: - self.frontendChannel.writeRequest(msg) - def interfaceChanged(self): """Notify the front-end that a device has been added or removed. """ - self.reportStatus() + pass class NetifController(DevController): """Network interface controller. Handles all network devices for a domain. @@ -473,25 +373,9 @@ def __init__(self, vm, recreate=False): DevController.__init__(self, vm, recreate=recreate) - self.channel = None - self.rcvr = None - self.channel = None def initController(self, recreate=False, reboot=False): self.destroyed = False - self.channel = self.getChannel() - # Register our handlers for incoming requests. - self.rcvr = CtrlMsgRcvr(self.channel) - self.rcvr.addHandler(CMSG_NETIF_FE, - CMSG_NETIF_FE_DRIVER_STATUS, - self.recv_fe_driver_status) - self.rcvr.addHandler(CMSG_NETIF_FE, - CMSG_NETIF_FE_INTERFACE_STATUS, - self.recv_fe_interface_status) - self.rcvr.addHandler(CMSG_NETIF_FE, - CMSG_NETIF_FE_INTERFACE_CONNECT, - self.recv_fe_interface_connect) - self.rcvr.registerChannel() if reboot: self.rebootDevices() @@ -501,8 +385,6 @@ self.destroyed = True log.debug("Destroying netif domain=%d", self.getDomain()) self.destroyDevices(reboot=reboot) - if self.rcvr: - self.rcvr.deregisterChannel() def sxpr(self): val = ['netif', ['dom', self.getDomain()]] @@ -524,57 +406,3 @@ dev = self.devices[vif] return dev.setCreditLimit(credit, period) - - def recv_fe_driver_status(self, msg): - msg = packMsg('netif_fe_driver_status_t', - { 'status' : NETIF_DRIVER_STATUS_UP, - ## FIXME: max_handle should be max active interface id - 'max_handle' : self.getDeviceCount() - #'max_handle' : self.getMaxDeviceId() - }) - # Two ways of doing it: - # 1) front-end requests driver status, we reply with the interface count, - # front-end polls the interfaces, - # front-end checks they are all up - # 2) front-end requests driver status, we reply (with anything), - # we notify the interfaces, - # we notify driver status up with the count - # front-end checks they are all up - # - # We really want to use 1), but at the moment the xenU kernel panics - # in that mode, so we're sticking to 2) for now. - resp = False - if resp: - self.channel.writeResponse(msg) - else: - for dev in self.devices.values(): - dev.reportStatus() - self.channel.writeRequest(msg) - return resp - - def recv_fe_interface_status(self, msg): - val = unpackMsg('netif_fe_interface_status_t', msg) - vif = val['handle'] - dev = self.findDevice(vif) - if dev: - dev.reportStatus(resp=True) - else: - log.error('Received netif_fe_interface_status for unknown vif: dom=%d vif=%d', - self.getDomain(), vif) - msg = packMsg('netif_fe_interface_status_t', - { 'handle' : -1, - 'status' : NETIF_INTERFACE_STATUS_CLOSED, - }); - self.channel.writeResponse(msg) - return True - - def recv_fe_interface_connect(self, msg): - val = unpackMsg('netif_fe_interface_connect_t', msg) - vif = val['handle'] - dev = self.getDevice(vif) - if dev: - dev.recv_fe_interface_connect(val) - else: - log.error('Received netif_fe_interface_connect for unknown vif: dom=%d vif=%d', - self.getDomain(), vif) - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/usbif.py --- a/tools/python/xen/xend/server/usbif.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/usbif.py Fri Sep 9 16:30:54 2005 @@ -9,9 +9,7 @@ from xen.xend.XendError import XendError from xen.xend.xenstore import DBVar -from xen.xend.server import channel from xen.xend.server.controller import Dev, DevController -from xen.xend.server.messages import * class UsbBackend: """Handler for the 'back-end' channel to a USB device driver domain @@ -25,39 +23,15 @@ self.connecting = False self.frontendDomain = self.controller.getDomain() self.backendDomain = dom - self.frontendChannel = None - self.backendChannel = None def init(self, recreate=False, reboot=False): - self.frontendChannel = self.controller.getChannel() - cf = channel.channelFactory() - self.backendChannel = cf.openChannel(self.backendDomain) - + pass + def __str__(self): return ('<UsbifBackend frontend=%d backend=%d id=%d>' % (self.frontendDomain, self.backendDomain, self.id)) - - def closeEvtchn(self): - if self.evtchn: - channel.eventChannelClose(self.evtchn) - self.evtchn = None - - def openEvtchn(self): - self.evtchn = channel.eventChannel(self.backendDomain, self.frontendDomain) - - def getEventChannelBackend(self): - val = 0 - if self.evtchn: - val = self.evtchn['port1'] - return val - - def getEventChannelFrontend(self): - val = 0 - if self.evtchn: - val = self.evtchn['port2'] - return val def connect(self, recreate=False): """Connect the controller to the usbif control interface. @@ -67,78 +41,14 @@ log.debug("Connecting usbif %s", str(self)) if recreate or self.connected or self.connecting: pass - else: - self.send_be_create() - def send_be_create(self): - msg = packMsg('usbif_be_create_t', - { 'domid' : self.frontendDomain }) - msg = self.backendChannel.requestResponse(msg) - val = unpackMsg('usbif_be_create_t', msg) - log.debug('>UsbifBackendController>respond_be_create> %s', str(val)) - self.connected = True - def destroy(self, reboot=False): """Disconnect from the usbif control interface and destroy it. """ self.destroyed = True - self.send_be_disconnect() - self.send_be_destroy() - self.closeEvtchn() - def send_be_disconnect(self): - log.debug('>UsbifBackendController>send_be_disconnect> %s', str(self)) - msg = packMsg('usbif_be_disconnect_t', - { 'domid' : self.frontendDomain }) - self.backendChannel.requestResponse(msg) - - def send_be_destroy(self, response=None): - log.debug('>UsbifBackendController>send_be_destroy> %s', str(self)) - msg = packMsg('usbif_be_destroy_t', - { 'domid' : self.frontendDomain }) - self.backendChannel.requestResponse(msg) - #todo: check return status - - - def connectInterface(self, val): - self.openEvtchn() - log.debug(">UsbifBackendController>connectInterface> connecting usbif to event channel %s ports=%d:%d", - str(self), - self.getEventChannelBackend(), - self.getEventChannelFrontend()) - msg = packMsg('usbif_be_connect_t', - { 'domid' : self.frontendDomain, - 'evtchn' : self.getEventChannelBackend(), - 'shmem_frame' : val['shmem_frame'], - 'bandwidth' : 500 # XXX fix bandwidth! - }) - msg = self.backendChannel.requestResponse(msg) - self.respond_be_connect(msg) - - def respond_be_connect(self, msg): - """Response handler for a be_connect message. - - @param msg: message - @type msg: xu message - """ - val = unpackMsg('usbif_be_connect_t', msg) - log.debug('>UsbifBackendController>respond_be_connect> %s, %s', str(self), str(val)) - self.send_fe_interface_status_changed() - log.debug(">UsbifBackendController> Successfully connected USB interface for domain %d" % self.frontendDomain) - self.controller.claim_ports() - - def send_fe_interface_status_changed(self): - msg = packMsg('usbif_fe_interface_status_changed_t', - { 'status' : USBIF_INTERFACE_STATUS_CONNECTED, - 'domid' : self.backendDomain, - 'evtchn' : self.getEventChannelFrontend(), - 'bandwidth' : 500, - 'num_ports' : len(self.controller.devices) - }) - self.frontendChannel.writeRequest(msg) - def interfaceChanged(self): - self.send_fe_interface_status_changed() + pass class UsbDev(Dev): @@ -153,17 +63,12 @@ self.port = id self.path = None self.frontendDomain = self.getDomain() - self.frontendChannel = None self.backendDomain = 0 - self.backendChannel = None self.configure(self.config, recreate=recreate) def init(self, recreate=False, reboot=False): self.destroyed = False self.frontendDomain = self.getDomain() - self.frontendChannel = self.getChannel() - backend = self.getBackend() - self.backendChannel = backend.backendChannel def configure(self, config, change=False, recreate=False): if change: @@ -204,7 +109,6 @@ """ self.destroyed = True log.debug("Destroying usb domain=%d id=%s", self.frontendDomain, self.id) - self.send_be_release_port() if change: self.interfaceChanged() @@ -220,27 +124,6 @@ """ self.getBackend().connect() - def send_be_claim_port(self): - log.debug(">UsbifBackendController>send_be_claim_port> about to claim port %s" % self.path) - msg = packMsg('usbif_be_claim_port_t', - { 'domid' : self.frontendDomain, - 'path' : self.path, - 'usbif_port' : self.port, - 'status' : 0}) - self.backendChannel.writeRequest(msg) - log.debug(">UsbifBackendController> Claim port completed") - # No need to add any callbacks, since the guest polls its virtual ports - # anyhow, somewhat like a UHCI controller ;-) - - def send_be_release_port(self): - msg = packMsg('usbif_be_release_port_t', - { 'domid' : self.frontendDomain, - 'path' : self.path }) - self.backendChannel.writeRequest(msg) - log.debug(">UsbifBackendController> Release port completed") - # No need to add any callbacks, since the guest polls its virtual ports - # anyhow, somewhat like a UHCI controller ;-) - class UsbifController(DevController): """USB device interface controller. Handles all USB devices for a domain. @@ -252,18 +135,9 @@ DevController.__init__(self, vm, recreate=recreate) self.backends = {} self.backendId = 0 - self.rcvr = None def init(self, recreate=False, reboot=False): self.destroyed = False - self.rcvr = CtrlMsgRcvr(self.getChannel()) - self.rcvr.addHandler(CMSG_USBIF_FE, - CMSG_USBIF_FE_DRIVER_STATUS_CHANGED, - self.recv_fe_driver_status_changed) - self.rcvr.addHandler(CMSG_USBIF_FE, - CMSG_USBIF_FE_INTERFACE_CONNECT, - self.recv_fe_interface_connect) - self.rcvr.registerChannel() if reboot: self.rebootBackends() self.rebootDevices() @@ -283,8 +157,6 @@ log.debug("Destroying blkif domain=%d", self.getDomain()) self.destroyDevices(reboot=reboot) self.destroyBackends(reboot=reboot) - if self.rcvr: - self.rcvr.deregisterChannel() def rebootBackends(self): for backend in self.backends.values(): @@ -311,40 +183,3 @@ def destroyBackends(self, reboot=False): for backend in self.backends.values(): backend.destroy(reboot=reboot) - - def recv_fe_driver_status_changed(self, msg): - val = unpackMsg('usbif_fe_driver_status_changed_t', msg) - log.debug('>UsbifController>recv_fe_driver_status_changed> %s', str(val)) - #todo: FIXME: For each backend? - msg = packMsg('usbif_fe_interface_status_changed_t', - { 'status' : USBIF_INTERFACE_STATUS_DISCONNECTED, - 'domid' : 0, #todo: FIXME: should be domid of backend - 'evtchn' : 0 }) - msg = self.getChannel().requestResponse(msg) - self.disconnected_resp(msg) - - def disconnected_resp(self, msg): - val = unpackMsg('usbif_fe_interface_status_changed_t', msg) - if val['status'] != USBIF_INTERFACE_STATUS_DISCONNECTED: - log.error(">UsbifController>disconnected_resp> unexpected status change") - else: - log.debug(">UsbifController>disconnected_resp> interface disconnected OK") - - def recv_fe_interface_connect(self, msg): - val = unpackMsg('usbif_fe_interface_status_changed_t', msg) - log.debug(">UsbifController>recv_fe_interface_connect> notifying backend") - #todo: FIXME: generalise to more than one backend. - id = 0 - backend = self.getBackendById(id) - if backend: - try: - backend.connectInterface(val) - except IOError, ex: - log.error("Exception connecting backend: %s", ex) - else: - log.error('interface connect on unknown interface: id=%d', id) - - def claim_ports(self): - for dev in self.devices.values(): - dev.send_be_claim_port() - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/xenstore/xsobj.py --- a/tools/python/xen/xend/xenstore/xsobj.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/xenstore/xsobj.py Fri Sep 9 16:30:54 2005 @@ -543,10 +543,10 @@ if not isinstance(node, DBMap): continue node.saveDB(sync=sync, save=save) # Remove db nodes not in children. - if sync: - for name in self.__db__.ls(): - if name not in self: - self.__db__.delete(name) + ###if sync: + ### for name in self.__db__.ls(): + ### if name not in self: + ### self.__db__.delete(name) def importFromDB(self, obj, fields): """Set fields in obj from db fields. diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xm/create.py Fri Sep 9 16:30:54 2005 @@ -103,12 +103,13 @@ fn=set_true, default=0, use="Connect to the console after the domain is created.") -gopts.var('vnc', val='no|yes', +gopts.var('vncviewer', val='no|yes', fn=set_bool, default=None, use="""Spawn a vncviewer listening for a vnc server in the domain. The address of the vncviewer is passed to the domain on the kernel command line using 'VNC_SERVER=<host>:<port>'. The port used by vnc is 5500 + DISPLAY. A display value with a free port is chosen if possible. + Only valid when vnc=1. """) gopts.var('name', val='NAME', @@ -174,6 +175,12 @@ gopts.var('netif', val='no|yes', fn=set_bool, default=0, use="Make the domain a network interface backend.") + +gopts.var('tpmif', val='frontend=DOM', + fn=append_value, default=[], + use="""Make the domain a TPM interface backend. If frontend is given, + the frontend in that domain is connected to this backend (not + completely implemented, yet)""") gopts.var('disk', val='phy:DEV,VDEV,MODE[,DOM]', fn=append_value, default=[], @@ -213,6 +220,12 @@ This option may be repeated to add more than one vif. Specifying vifs will increase the number of interfaces as needed.""") +gopts.var('vtpm', val="instance=INSTANCE,backend=DOM", + fn=append_value, default=[], + use="""Add a tpm interface. On the backend side us the the given + instance as virtual TPM instance. Use the backend in the given + domain.""") + gopts.var('nics', val="NUM", fn=set_int, default=1, use="""Set the number of network interfaces. @@ -308,6 +321,10 @@ gopts.var('nographic', val='no|yes', fn=set_bool, default=0, use="Should device models use graphics?") + +gopts.var('vnc', val='', + fn=set_value, default=None, + use="""Should the device model use VNC?""") gopts.var('sdl', val='', fn=set_value, default=None, @@ -368,6 +385,46 @@ for path in vals.usb: config_usb = ['usb', ['path', path]] config_devs.append(['device', config_usb]) + +def configure_vtpm(opts, config_devs, vals): + """Create the config for virtual TPM interfaces. + """ + vtpm = vals.vtpm + vtpm_n = 1 + for idx in range(0, vtpm_n): + if idx < len(vtpm): + d = vtpm[idx] + instance = d.get('instance') + if instance == "VTPMD": + instance = "0" + else: + try: + if int(instance) == 0: + opts.err('VM config error: vTPM instance must not be 0.') + except ValueError: + opts.err('Vm config error: could not parse instance number.') + backend = d.get('backend') + config_vtpm = ['vtpm'] + if instance: + config_vtpm.append(['instance', instance]) + if backend: + config_vtpm.append(['backend', backend]) + config_devs.append(['device', config_vtpm]) + +def configure_tpmif(opts, config_devs, vals): + """Create the config for virtual TPM interfaces. + """ + tpmif = vals.tpmif + tpmif_n = 1 + for idx in range(0, tpmif_n): + if idx < len(tpmif): + d = tpmif[idx] + frontend = d.get('frontend') + config_tpmif = ['tpmif'] + if frontend: + config_tpmif.append(['frontend', frontend]) + config_devs.append(['device', config_tpmif]) + def randomMAC(): """Generate a random MAC address. @@ -442,7 +499,7 @@ """ args = [ 'memmap', 'device_model', 'cdrom', 'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', - 'isa', 'nographic', 'vnc', 'sdl', 'display'] + 'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display'] for a in args: if (vals.__dict__[a]): config_devs.append([a, vals.__dict__[a]]) @@ -479,6 +536,8 @@ config.append(['backend', ['blkif']]) if vals.netif: config.append(['backend', ['netif']]) + if vals.tpmif: + config.append(['backend', ['tpmif']]) if vals.restart: config.append(['restart', vals.restart]) @@ -491,6 +550,7 @@ configure_pci(opts, config_devs, vals) configure_vifs(opts, config_devs, vals) configure_usb(opts, config_devs, vals) + configure_vtpm(opts, config_devs, vals) configure_vmx(opts, config_devs, vals) config += config_devs @@ -539,6 +599,38 @@ vifs.append(d) vals.vif = vifs +def preprocess_vtpm(opts, vals): + if not vals.vtpm: return + vtpms = [] + for vtpm in vals.vtpm: + d = {} + a = vtpm.split(',') + for b in a: + (k, v) = b.strip().split('=', 1) + k = k.strip() + v = v.strip() + if k not in ['backend', 'instance']: + opts.err('Invalid vtpm specifier: ' + vtpm) + d[k] = v + vtpms.append(d) + vals.vtpm = vtpms + +def preprocess_tpmif(opts, vals): + if not vals.tpmif: return + tpmifs = [] + for tpmif in vals.tpmif: + d = {} + a = tpmif.split(',') + for b in a: + (k, v) = b.strip().split('=', 1) + k = k.strip() + v = v.strip() + if k not in ['frontend']: + opts.err('Invalid tpmif specifier: ' + vtpm) + d[k] = v + tpmifs.append(d) + vals.tpmif = tpmifs + def preprocess_ip(opts, vals): if vals.ip or vals.dhcp != 'off': dummy_nfs_server = '1.2.3.4' @@ -606,7 +698,7 @@ """If vnc was specified, spawn a vncviewer in listen mode and pass its address to the domain on the kernel command line. """ - if not vals.vnc or vals.dryrun: return + if not (vals.vnc and vals.vncviewer) or vals.dryrun: return vnc_display = choose_vnc_display() if not vnc_display: opts.warn("No free vnc display") @@ -627,6 +719,8 @@ preprocess_ip(opts, vals) preprocess_nfs(opts, vals) preprocess_vnc(opts, vals) + preprocess_vtpm(opts, vals) + preprocess_tpmif(opts, vals) def make_domain(opts, config): """Create, build and start a domain. diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xm/main.py Fri Sep 9 16:30:54 2005 @@ -715,9 +715,9 @@ err("Most commands need root access. Please try again as root") sys.exit(1) except XendError, ex: - if args[0] == "bogus": - args.remove("bogus") if len(args) > 0: + if args[0] == "bogus": + args.remove("bogus") handle_xend_error(argv[1], args[0], ex) else: print "Unexpected error:", sys.exc_info()[0] diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/Makefile --- a/tools/security/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/security/Makefile Fri Sep 9 16:30:54 2005 @@ -45,6 +45,7 @@ $(MAKE) secpol_xml2bin chmod 700 ./setlabel.sh chmod 700 ./updategrub.sh + chmod 700 ./getlabel.sh secpol_tool : secpol_tool.c secpol_compat.h $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ $< diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/secpol_tool.c --- a/tools/security/secpol_tool.c Thu Sep 8 15:18:40 2005 +++ b/tools/security/secpol_tool.c Fri Sep 9 16:30:54 2005 @@ -25,6 +25,7 @@ #include <stdio.h> #include <errno.h> #include <fcntl.h> +#include <getopt.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> @@ -40,6 +41,17 @@ #define PERROR(_m, _a...) \ fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \ errno, strerror(errno)) + +void usage(char *progname) +{ + printf("Use: %s \n" + "\t getpolicy\n" + "\t dumpstats\n" + "\t loadpolicy <binary policy file>\n" + "\t getssid -d <domainid> [-f]\n" + "\t getssid -s <ssidref> [-f]\n", progname); + exit(-1); +} static inline int do_policycmd(int xc_handle, unsigned int cmd, unsigned long data) @@ -320,7 +332,7 @@ if (ret) printf - ("ERROR setting policy. Use 'xm dmesg' to see details.\n"); + ("ERROR setting policy. Try 'xm dmesg' to see details.\n"); else printf("Successfully changed policy.\n"); @@ -370,7 +382,7 @@ if (ret < 0) { - printf("ERROR dumping policy stats. Use 'xm dmesg' to see details.\n"); + printf("ERROR dumping policy stats. Try 'xm dmesg' to see details.\n"); return ret; } stats = (struct acm_stats_buffer *) stats_buffer; @@ -421,17 +433,121 @@ } return ret; } +/************************ get ssidref & types ******************************/ +/* + * the ssid (types) can be looked up either by domain id or by ssidref + */ +int acm_domain_getssid(int xc_handle, int argc, char * const argv[]) +{ + /* this includes header and a set of types */ + #define MAX_SSIDBUFFER 2000 + int ret, i; + acm_op_t op; + struct acm_ssid_buffer *hdr; + unsigned char *buf; + int nice_print = 1; + + op.cmd = ACM_GETSSID; + op.interface_version = ACM_INTERFACE_VERSION; + op.u.getssid.get_ssid_by = UNSET; + /* arguments + -d ... domain id to look up + -s ... ssidref number to look up + -f ... formatted print (scripts depend on this format) + */ + while (1) + { + int c = getopt(argc, argv, "d:s:f"); + if (c == -1) + break; + if (c == 'd') + { + if (op.u.getssid.get_ssid_by != UNSET) + usage(argv[0]); + op.u.getssid.get_ssid_by = DOMAINID; + op.u.getssid.id.domainid = strtoul(optarg, NULL, 0); + } + else if (c== 's') + { + if (op.u.getssid.get_ssid_by != UNSET) + usage(argv[0]); + op.u.getssid.get_ssid_by = SSIDREF; + op.u.getssid.id.ssidref = strtoul(optarg, NULL, 0); + } + else if (c== 'f') + { + nice_print = 0; + } + else + usage(argv[0]); + } + if (op.u.getssid.get_ssid_by == UNSET) + usage(argv[0]); + + buf = malloc(MAX_SSIDBUFFER); + if (!buf) + return -ENOMEM; + + /* dump it and then push it down into xen/acm */ + op.u.getssid.ssidbuf = buf; /* out */ + op.u.getssid.ssidbuf_size = MAX_SSIDBUFFER; + ret = do_acm_op(xc_handle, &op); + + if (ret) + { + printf("ERROR getting ssidref. Try 'xm dmesg' to see details.\n"); + goto out; + } + hdr = (struct acm_ssid_buffer *)buf; + if (hdr->len > MAX_SSIDBUFFER) + { + printf("ERROR: Buffer length inconsistent (ret=%d, hdr->len=%d)!\n", + ret, hdr->len); + return -EIO; + } + if (nice_print) + { + printf("SSID: ssidref = 0x%08x \n", hdr->ssidref); + printf(" P: %s, max_types = %d\n", + ACM_POLICY_NAME(hdr->primary_policy_code), hdr->primary_max_types); + printf(" Types: "); + for (i=0; i< hdr->primary_max_types; i++) + if (buf[hdr->primary_types_offset + i]) + printf("%02x ", i); + else + printf("-- "); + printf("\n"); + + printf(" S: %s, max_types = %d\n", + ACM_POLICY_NAME(hdr->secondary_policy_code), hdr->secondary_max_types); + printf(" Types: "); + for (i=0; i< hdr->secondary_max_types; i++) + if (buf[hdr->secondary_types_offset + i]) + printf("%02x ", i); + else + printf("-- "); + printf("\n"); + } + else + { + /* formatted print for use with scripts (.sh) + * update scripts when updating here (usually + * used in combination with -d to determine a + * running domain's label + */ + printf("SSID: ssidref = 0x%08x \n", hdr->ssidref); + } + + /* return ste ssidref */ + if (hdr->primary_policy_code == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY) + ret = (hdr->ssidref) & 0xffff; + else if (hdr->secondary_policy_code == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY) + ret = (hdr->ssidref) >> 16; + out: + return ret; +} /***************************** main **************************************/ - -void usage(char *progname) -{ - printf("Use: %s \n" - "\t getpolicy\n" - "\t dumpstats\n" - "\t loadpolicy <binary policy file>\n", progname); - exit(-1); -} int main(int argc, char **argv) { @@ -459,6 +575,8 @@ if (argc != 2) usage(argv[0]); ret = acm_domain_dumpstats(acm_cmd_fd); + } else if (!strcmp(argv[1], "getssid")) { + ret = acm_domain_getssid(acm_cmd_fd, argc, argv); } else usage(argv[0]); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/setlabel.sh --- a/tools/security/setlabel.sh Thu Sep 8 15:18:40 2005 +++ b/tools/security/setlabel.sh Fri Sep 9 16:30:54 2005 @@ -34,275 +34,27 @@ exec sh -c "bash $0 $*" fi +export PATH=$PATH:. +source labelfuncs.sh usage () { - echo "Usage: $0 [Option] <vmfile> <label> <policy name> " - echo " or $0 -l <policy name>" + echo "Usage: $0 [Option] <vmfile> <label> [<policy name>]" + echo " or $0 -l [<policy name>]" echo "" - echo "Valid Options are:" + echo "Valid options are:" echo "-r : to relabel a file without being prompted" echo "" echo "vmfile : XEN vm configuration file" - echo "label : the label to map" + echo "label : the label to map to an ssidref" echo "policy name : the name of the policy, i.e. 'chwall'" + echo " If the policy name is omitted, it is attempted" + echo " to find the current policy's name in grub.conf." echo "" - echo "-l <policy name> is used to show valid labels in the map file" + echo "-l [<policy name>] is used to show valid labels in the map file of" + echo " the given or current policy." echo "" } - - -findMapFile () -{ - mapfile="./$1.map" - if [ -r "$mapfile" ]; then - return 1 - fi - - mapfile="./policies/$1/$1.map" - if [ -r "$mapfile" ]; then - return 1 - fi - - return 0 -} - -showLabels () -{ - mapfile=$1 - if [ ! -r "$mapfile" -o "$mapfile" == "" ]; then - echo "Cannot read from vm configuration file $vmfile." - return -1 - fi - - getPrimaryPolicy $mapfile - getSecondaryPolicy $mapfile - - echo "The following labels are available:" - let line=1 - while [ 1 ]; do - ITEM=`cat $mapfile | \ - awk -vline=$line \ - -vprimary=$primary \ - '{ \ - if ($1 == "LABEL->SSID" && \ - $2 == "VM" && \ - $3 == primary ) { \ - ctr++; \ - if (ctr == line) { \ - print $4; \ - } \ - } \ - } END { \ - }'` - - if [ "$ITEM" == "" ]; then - break - fi - if [ "$secondary" != "NULL" ]; then - LABEL=`cat $mapfile | \ - awk -vitem=$ITEM \ - '{ - if ($1 == "LABEL->SSID" && \ - $2 == "VM" && \ - $3 == "CHWALL" && \ - $4 == item ) { \ - result = item; \ - } \ - } END { \ - print result \ - }'` - else - LABEL=$ITEM - fi - - if [ "$LABEL" != "" ]; then - echo "$LABEL" - found=1 - fi - let line=line+1 - done - if [ "$found" != "1" ]; then - echo "No labels found." - fi -} - -getPrimaryPolicy () -{ - mapfile=$1 - primary=`cat $mapfile | \ - awk ' \ - { \ - if ( $1 == "PRIMARY" ) { \ - res=$2; \ - } \ - } END { \ - print res; \ - } '` -} - -getSecondaryPolicy () -{ - mapfile=$1 - secondary=`cat $mapfile | \ - awk ' \ - { \ - if ( $1 == "SECONDARY" ) { \ - res=$2; \ - } \ - } END { \ - print res; \ - } '` -} - - -getDefaultSsid () -{ - mapfile=$1 - pol=$2 - RES=`cat $mapfile \ - awk -vpol=$pol \ - { \ - if ($1 == "LABEL->SSID" && \ - $2 == "ANY" && \ - $3 == pol && \ - $4 == "DEFAULT" ) {\ - res=$5; \ - } \ - } END { \ - printf "%04x", strtonum(res) \ - }'` - echo "default NULL mapping is $RES" - defaultssid=$RES -} - -relabel () -{ - vmfile=$1 - label=$2 - mapfile=$3 - mode=$4 - - if [ ! -r "$vmfile" ]; then - echo "Cannot read from vm configuration file $vmfile." - return -1 - fi - - if [ ! -w "$vmfile" ]; then - echo "Cannot write to vm configuration file $vmfile." - return -1 - fi - - if [ ! -r "$mapfile" ] ; then - echo "Cannot read mapping file $mapfile." - return -1 - fi - - # Determine which policy is primary, which sec. - getPrimaryPolicy $mapfile - getSecondaryPolicy $mapfile - - # Calculate the primary policy's SSIDREF - if [ "$primary" == "NULL" ]; then - SSIDLO="0000" - else - SSIDLO=`cat $mapfile | \ - awk -vlabel=$label \ - -vprimary=$primary \ - '{ \ - if ( $1 == "LABEL->SSID" && \ - $2 == "VM" && \ - $3 == primary && \ - $4 == label ) { \ - result=$5 \ - } \ - } END { \ - if (result != "" ) \ - {printf "%04x", strtonum(result)}\ - }'` - fi - - # Calculate the secondary policy's SSIDREF - if [ "$secondary" == "NULL" ]; then - SSIDHI="0000" - else - SSIDHI=`cat $mapfile | \ - awk -vlabel=$label \ - -vsecondary=$secondary \ - '{ \ - if ( $1 == "LABEL->SSID" && \ - $2 == "VM" && \ - $3 == secondary && \ - $4 == label ) { \ - result=$5 \ - } \ - } END { \ - if (result != "" ) \ - {printf "%04x", strtonum(result)}\ - }'` - fi - - if [ "$SSIDLO" == "" -o \ - "$SSIDHI" == "" ]; then - echo "Could not map the given label '$label'." - return -1 - fi - - ACM_POLICY=`cat $mapfile | \ - awk ' { if ( $1 == "POLICY" ) { \ - result=$2 \ - } \ - } \ - END { \ - if (result != "") { \ - printf result \ - } \ - }'` - - if [ "$ACM_POLICY" == "" ]; then - echo "Could not find 'POLICY' entry in map file." - return -1 - fi - - SSIDREF="0x$SSIDHI$SSIDLO" - - if [ "$mode" != "relabel" ]; then - RES=`cat $vmfile | \ - awk '{ \ - if ( substr($1,0,7) == "ssidref" ) {\ - print $0; \ - } \ - }'` - if [ "$RES" != "" ]; then - echo "Do you want to overwrite the existing mapping ($RES)? (y/N)" - read user - if [ "$user" != "y" -a "$user" != "Y" ]; then - echo "Aborted." - return 0 - fi - fi - fi - - #Write the output - vmtmp1="/tmp/__setlabel.tmp1" - vmtmp2="/tmp/__setlabel.tmp2" - touch $vmtmp1 - touch $vmtmp2 - if [ ! -w "$vmtmp1" -o ! -w "$vmtmp2" ]; then - echo "Cannot create temporary files. Aborting." - return -1 - fi - RES=`sed -e '/^#ACM_POLICY/d' $vmfile > $vmtmp1` - RES=`sed -e '/^#ACM_LABEL/d' $vmtmp1 > $vmtmp2` - RES=`sed -e '/^ssidref/d' $vmtmp2 > $vmtmp1` - echo "#ACM_POLICY=$ACM_POLICY" >> $vmtmp1 - echo "#ACM_LABEL=$label" >> $vmtmp1 - echo "ssidref = $SSIDREF" >> $vmtmp1 - mv -f $vmtmp1 $vmfile - rm -rf $vmtmp1 $vmtmp2 - echo "Mapped label '$label' to ssidref '$SSIDREF'." -} - if [ "$1" == "-r" ]; then @@ -317,10 +69,25 @@ if [ "$mode" == "show" ]; then if [ "$1" == "" ]; then - usage - exit -1; + findGrubConf + ret=$? + if [ $ret -eq 0 ]; then + echo "Could not find grub.conf" + exit -1; + fi + findPolicyInGrub $grubconf + if [ "$policy" != "" ]; then + echo "Assuming policy to be '$policy'."; + else + echo "Could not find policy." + exit -1; + fi + else + policy=$3; fi - findMapFile $1 + + + findMapFile $policy res=$? if [ "$res" != "0" ]; then showLabels $mapfile @@ -330,11 +97,29 @@ elif [ "$mode" == "usage" ]; then usage else + if [ "$2" == "" ]; then + usage + exit -1 + fi if [ "$3" == "" ]; then - usage - exit -1; + findGrubConf + ret=$? + if [ $ret -eq 0 ]; then + echo "Could not find grub.conf" + exit -1; + fi + findPolicyInGrub $grubconf + if [ "$policy" != "" ]; then + echo "Assuming policy to be '$policy'."; + else + echo "Could not find policy." + exit -1; + fi + + else + policy=$3; fi - findMapFile $3 + findMapFile $policy res=$? if [ "$res" != "0" ]; then relabel $1 $2 $mapfile $mode diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcutils/Makefile --- a/tools/xcutils/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/xcutils/Makefile Fri Sep 9 16:30:54 2005 @@ -18,8 +18,6 @@ PROGRAMS_INSTALL_DIR = /usr/libexec/xen INCLUDES += -I $(XEN_LIBXC) - -CC := gcc CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing CFLAGS += $(INCLUDES) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcutils/xc_restore.c --- a/tools/xcutils/xc_restore.c Thu Sep 8 15:18:40 2005 +++ b/tools/xcutils/xc_restore.c Fri Sep 9 16:30:54 2005 @@ -17,22 +17,27 @@ int main(int argc, char **argv) { - unsigned int xc_fd, io_fd, domid, nr_pfns, evtchn; + unsigned int xc_fd, io_fd, domid, nr_pfns, store_evtchn, console_evtchn; int ret; - unsigned long mfn; + unsigned long store_mfn, console_mfn; - if (argc != 6) - errx(1, "usage: %s xcfd iofd domid nr_pfns evtchn", argv[0]); + if (argc != 7) + errx(1, + "usage: %s xcfd iofd domid nr_pfns store_evtchn console_evtchn", + argv[0]); xc_fd = atoi(argv[1]); io_fd = atoi(argv[2]); domid = atoi(argv[3]); nr_pfns = atoi(argv[4]); - evtchn = atoi(argv[5]); + store_evtchn = atoi(argv[5]); + console_evtchn = atoi(argv[6]); - ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, evtchn, &mfn); + ret = xc_linux_restore(xc_fd, io_fd, domid, nr_pfns, store_evtchn, + &store_mfn, console_evtchn, &console_mfn); if (ret == 0) { - printf("store-mfn %li\n", mfn); + printf("store-mfn %li\n", store_mfn); + printf("console-mfn %li\n", console_mfn); fflush(stdout); } return ret; diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcutils/xc_save.c --- a/tools/xcutils/xc_save.c Thu Sep 8 15:18:40 2005 +++ b/tools/xcutils/xc_save.c Fri Sep 9 16:30:54 2005 @@ -17,14 +17,17 @@ int main(int argc, char **argv) { - unsigned int xc_fd, io_fd, domid; + unsigned int xc_fd, io_fd, domid, maxit, max_f, flags; - if (argc != 4) - errx(1, "usage: %s xcfd iofd domid", argv[0]); + if (argc != 7) + errx(1, "usage: %s xcfd iofd domid maxit maxf flags", argv[0]); xc_fd = atoi(argv[1]); io_fd = atoi(argv[2]); domid = atoi(argv[3]); + maxit = atoi(argv[4]); + max_f = atoi(argv[5]); + flags = atoi(argv[6]); - return xc_linux_save(xc_fd, io_fd, domid); + return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags); } diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstat/Makefile --- a/tools/xenstat/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/xenstat/Makefile Fri Sep 9 16:30:54 2005 @@ -3,7 +3,11 @@ SUBDIRS := SUBDIRS += libxenstat + +# This doesn't cross-compile (cross-compile environments rarely have curses) +ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) SUBDIRS += xentop +endif .PHONY: all install clean diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/Makefile Fri Sep 9 16:30:54 2005 @@ -8,15 +8,16 @@ INSTALL_DIR = $(INSTALL) -d -m0755 PROFILE=#-pg -BASECFLAGS=-Wall -W -g +BASECFLAGS=-Wall -W -g -Werror # Make gcc generate dependencies. BASECFLAGS += -Wp,-MD,.$(@F).d PROG_DEP = .*.d -#BASECFLAGS+= -O3 $(PROFILE) +BASECFLAGS+= -O3 $(PROFILE) #BASECFLAGS+= -I$(XEN_ROOT)/tools BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc BASECFLAGS+= -I$(XEN_ROOT)/xen/include/public BASECFLAGS+= -I. +BASECFLAGS+= -I$(XEN_ROOT)/linux-2.6-xen-sparse/include/asm-xen/linux-public CFLAGS += $(BASECFLAGS) LDFLAGS += $(PROFILE) -L$(XEN_LIBXC) @@ -24,15 +25,22 @@ TESTFLAGS= -DTESTING TESTENV = XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR) -all: xen xenstored libxenstore.so +CLIENTS := xenstore-read xenstore-rm xenstore-write +CLIENTS_OBJS := $(patsubst xenstore-%,xenstore_%.o,$(CLIENTS)) -testcode: xen xs_test xenstored_test xs_random xs_dom0_test +all: libxenstore.so xenstored $(CLIENTS) -xen: - ln -sf $(XEN_ROOT)/xen/include/public $@ +testcode: xs_test xenstored_test xs_random xs_dom0_test xenstored: xenstored_core.o xenstored_watch.o xenstored_domain.o xenstored_transaction.o xs_lib.o talloc.o utils.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -o $@ + +$(CLIENTS): libxenstore.so +$(CLIENTS): xenstore-%: xenstore_%.o + $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl -L. -lxenstore -o $@ + +$(CLIENTS_OBJS): xenstore_%.o: xenstore_client.c + $(COMPILE.c) -DCLIENT_$(*F) -o $@ $< xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o $(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@ @@ -58,7 +66,7 @@ clean: testsuite-clean rm -f *.o *.opic *.so - rm -f xen xenstored xs_random xs_stress xs_crashme + rm -f xenstored xs_random xs_stress xs_crashme rm -f xs_test xenstored_test xs_dom0_test $(RM) $(PROG_DEP) @@ -72,10 +80,10 @@ fullcheck: testsuite-run randomcheck stresstest -testsuite-run: xen xenstored_test xs_test +testsuite-run: xenstored_test xs_test $(TESTENV) testsuite/test.sh && echo -testsuite-fast: xen xenstored_test xs_test +testsuite-fast: xenstored_test xs_test @$(TESTENV) testsuite/test.sh --fast testsuite-clean: @@ -111,12 +119,14 @@ tarball: clean cd .. && tar -c -j -v -h -f xenstore.tar.bz2 xenstore/ -install: xenstored libxenstore.so +install: libxenstore.so xenstored $(CLIENTS) $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored + $(INSTALL_DIR) -p $(DESTDIR)/usr/bin $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin $(INSTALL_DIR) -p $(DESTDIR)/usr/include $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin + $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)/usr/bin $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) $(INSTALL_DATA) libxenstore.so $(DESTDIR)/usr/$(LIBDIR) $(INSTALL_DATA) xs.h $(DESTDIR)/usr/include diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored.h --- a/tools/xenstore/xenstored.h Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xenstored.h Fri Sep 9 16:30:54 2005 @@ -42,7 +42,7 @@ XS_OP_READ_ONLY = XS_TRANSACTION_END, XS_INTRODUCE, XS_RELEASE, - XS_GETDOMAINPATH, + XS_GET_DOMAIN_PATH, XS_WRITE, XS_MKDIR, XS_RM, diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xenstored_core.c Fri Sep 9 16:30:54 2005 @@ -49,6 +49,8 @@ #include "xenstored_watch.h" #include "xenstored_transaction.h" #include "xenstored_domain.h" +#include "xenctrl.h" +#include "xen/io/domain_controller.h" static bool verbose; LIST_HEAD(connections); @@ -140,7 +142,7 @@ case XS_TRANSACTION_END: return "TRANSACTION_END"; case XS_INTRODUCE: return "INTRODUCE"; case XS_RELEASE: return "RELEASE"; - case XS_GETDOMAINPATH: return "GETDOMAINPATH"; + case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH"; case XS_WRITE: return "WRITE"; case XS_MKDIR: return "MKDIR"; case XS_RM: return "RM"; @@ -719,7 +721,7 @@ char *slash = strrchr(node + 1, '/'); if (!slash) return talloc_strdup(node, "/"); - return talloc_asprintf(node, "%.*s", slash - node, node); + return talloc_asprintf(node, "%.*s", (int)(slash - node), node); } static enum xs_perm_type perm_for_id(domid_t id, @@ -828,6 +830,15 @@ return false; } +bool check_event_node(const char *node) +{ + if (!node || !strstarts(node, "@")) { + errno = EINVAL; + return false; + } + return true; +} + static void send_directory(struct connection *conn, const char *node) { char *path, *reply; @@ -901,7 +912,7 @@ if (slash) *slash = '\0'; - dest = talloc_asprintf(dir, "%.*s", dot - dir, dir); + dest = talloc_asprintf(dir, "%.*s", (int)(dot - dir), dir); return rename(dir, dest) == 0; } @@ -1277,7 +1288,7 @@ do_release(conn, onearg(in)); break; - case XS_GETDOMAINPATH: + case XS_GET_DOMAIN_PATH: do_get_domain_path(conn, onearg(in)); break; @@ -1295,8 +1306,12 @@ static void consider_message(struct connection *conn) { - struct buffered_data *in = NULL; - enum xsd_sockmsg_type type = conn->in->hdr.msg.type; + /* + * 'volatile' qualifier prevents register allocation which fixes: + * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork' + */ + struct buffered_data *volatile in = NULL; + enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type; jmp_buf talloc_fail; assert(conn->state == OK); @@ -1434,7 +1449,11 @@ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) { - struct connection *new; + /* + * 'volatile' qualifier prevents register allocation which fixes: + * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork' + */ + struct connection *volatile new; jmp_buf talloc_fail; new = talloc(talloc_autofree_context(), struct connection); @@ -1628,12 +1647,13 @@ } -static struct option options[] = { { "no-fork", 0, NULL, 'N' }, - { "verbose", 0, NULL, 'V' }, - { "output-pid", 0, NULL, 'P' }, - { "trace-file", 1, NULL, 'T' }, - { "pid-file", 1, NULL, 'F' }, - { NULL, 0, NULL, 0 } }; +static struct option options[] = { + { "pid-file", 1, NULL, 'F' }, + { "no-fork", 0, NULL, 'N' }, + { "output-pid", 0, NULL, 'P' }, + { "trace-file", 1, NULL, 'T' }, + { "verbose", 0, NULL, 'V' }, + { NULL, 0, NULL, 0 } }; int main(int argc, char *argv[]) { @@ -1644,13 +1664,14 @@ bool outputpid = false; const char *pidfile = NULL; - while ((opt = getopt_long(argc, argv, "DVT:", options, NULL)) != -1) { + while ((opt = getopt_long(argc, argv, "F:NPT:V", options, + NULL)) != -1) { switch (opt) { + case 'F': + pidfile = optarg; + break; case 'N': dofork = false; - break; - case 'V': - verbose = true; break; case 'P': outputpid = true; @@ -1662,8 +1683,9 @@ optarg); write(tracefd, "\n***\n", strlen("\n***\n")); break; - case 'F': - pidfile = optarg; + case 'V': + verbose = true; + break; } } if (optind != argc) @@ -1812,6 +1834,7 @@ /* If transactions ended, we might be able to do more work. */ unblock_connections(); - max = initialize_set(&inset, &outset, *sock,*ro_sock,event_fd); - } -} + max = initialize_set(&inset, &outset, *sock, *ro_sock, + event_fd); + } +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_core.h --- a/tools/xenstore/xenstored_core.h Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xenstored_core.h Fri Sep 9 16:30:54 2005 @@ -133,6 +133,9 @@ bool check_node_perms(struct connection *conn, const char *node, enum xs_perm_type perm); +/* Check if node is an event node. */ +bool check_event_node(const char *node); + /* Path to this node outside transaction. */ char *node_dir_outside_transaction(const char *node); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xenstored_domain.c Fri Sep 9 16:30:54 2005 @@ -1,4 +1,4 @@ -/* +/* Domain communications for Xen Store Daemon. Copyright (C) 2005 Rusty Russell IBM Corporation @@ -33,10 +33,12 @@ #include "talloc.h" #include "xenstored_core.h" #include "xenstored_domain.h" +#include "xenstored_watch.h" #include "xenstored_test.h" static int *xc_handle; static int eventchn_fd; +static int virq_port; static unsigned int ringbuf_datasize; struct domain @@ -216,17 +218,6 @@ return 0; } -static struct domain *find_domain(u16 port) -{ - struct domain *i; - - list_for_each_entry(i, &domains, list) { - if (i->port == port) - return i; - } - return NULL; -} - /* We scan all domains rather than use the information given here. */ void handle_event(int event_fd) { @@ -234,6 +225,10 @@ if (read(event_fd, &port, sizeof(port)) != sizeof(port)) barf_perror("Failed to read from event fd"); + + if (port == virq_port) + domain_cleanup(); + #ifndef TESTING if (write(event_fd, &port, sizeof(port)) != sizeof(port)) barf_perror("Failed to write to event fd"); @@ -319,6 +314,9 @@ /* Now domain belongs to its connection. */ talloc_steal(domain->conn, domain); + + fire_watches(conn, "@introduceDomain", false); + send_ack(conn, XS_INTRODUCE); } @@ -367,7 +365,30 @@ } talloc_free(domain->conn); + + fire_watches(NULL, "@releaseDomain", false); + send_ack(conn, XS_RELEASE); +} + +void domain_cleanup(void) +{ + xc_dominfo_t dominfo; + struct domain *domain, *tmp; + int released = 0; + + list_for_each_entry_safe(domain, tmp, &domains, list) { + if (xc_domain_getinfo(*xc_handle, domain->domid, 1, + &dominfo) == 1 && + dominfo.domid == domain->domid && + !dominfo.dying && !dominfo.crashed && !dominfo.shutdown) + continue; + talloc_free(domain->conn); + released++; + } + + if (released) + fire_watches(NULL, "@releaseDomain", false); } void do_get_domain_path(struct connection *conn, const char *domid_str) @@ -386,10 +407,10 @@ else domain = find_domain_by_domid(domid); - if (!domain) + if (!domain) send_error(conn, ENOENT); else - send_reply(conn, XS_GETDOMAINPATH, domain->path, + send_reply(conn, XS_GET_DOMAIN_PATH, domain->path, strlen(domain->path) + 1); } @@ -412,26 +433,55 @@ { } +#define EVTCHN_DEV_NAME "/dev/xen/evtchn" +#define EVTCHN_DEV_MAJOR 10 +#define EVTCHN_DEV_MINOR 201 + /* Returns the event channel handle. */ int domain_init(void) { + struct stat st; + /* The size of the ringbuffer: half a page minus head structure. */ ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head); xc_handle = talloc(talloc_autofree_context(), int); if (!xc_handle) barf_perror("Failed to allocate domain handle"); + *xc_handle = xc_interface_open(); if (*xc_handle < 0) barf_perror("Failed to open connection to hypervisor"); + talloc_set_destructor(xc_handle, close_xc_handle); #ifdef TESTING eventchn_fd = fake_open_eventchn(); #else - eventchn_fd = open("/dev/xen/evtchn", O_RDWR); + /* Make sure any existing device file links to correct device. */ + if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) || + (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR))) + (void)unlink(EVTCHN_DEV_NAME); + + reopen: + eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR); + if (eventchn_fd == -1) { + if ((errno == ENOENT) && + ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) && + (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600, + makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0)) + goto reopen; + return -errno; + } #endif if (eventchn_fd < 0) - barf_perror("Failed to open connection to hypervisor"); + barf_perror("Failed to open evtchn device"); + + if (xc_evtchn_bind_virq(*xc_handle, VIRQ_DOM_EXC, &virq_port)) + barf_perror("Failed to bind to domain exception virq"); + + if (ioctl(eventchn_fd, EVENTCHN_BIND, virq_port) != 0) + barf_perror("Failed to bind to domain exception virq port"); + return eventchn_fd; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_domain.h --- a/tools/xenstore/xenstored_domain.h Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xenstored_domain.h Fri Sep 9 16:30:54 2005 @@ -28,6 +28,10 @@ /* domid */ void do_release(struct connection *conn, const char *domid_str); +/* Enumerate domains and release connections for non-existant or dying + * domains. */ +void domain_cleanup(void); + /* domid */ void do_get_domain_path(struct connection *conn, const char *domid_str); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstored_watch.c --- a/tools/xenstore/xenstored_watch.c Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xenstored_watch.c Fri Sep 9 16:30:54 2005 @@ -103,7 +103,8 @@ /* Check read permission: no permission, no watch event. * If it doesn't exist, we need permission to read parent. */ - if (!check_node_perms(conn, node, XS_PERM_READ|XS_PERM_ENOENT_OK)) { + if (!check_node_perms(conn, node, XS_PERM_READ|XS_PERM_ENOENT_OK) && + !check_event_node(node)) { fprintf(stderr, "No permission for %s\n", node); return; } @@ -131,7 +132,7 @@ struct watch *watch; /* During transactions, don't fire watches. */ - if (conn->transaction) + if (conn && conn->transaction) return; /* Create an event for each watch. Don't send to self. */ @@ -213,11 +214,16 @@ return; } - relative = !strstarts(vec[0], "/"); - vec[0] = canonicalize(conn, vec[0]); - if (!is_valid_nodename(vec[0])) { - send_error(conn, errno); - return; + if (strstarts(vec[0], "@")) { + relative = false; + /* check if valid event */ + } else { + relative = !strstarts(vec[0], "/"); + vec[0] = canonicalize(conn, vec[0]); + if (!is_valid_nodename(vec[0])) { + send_error(conn, errno); + return; + } } watch = talloc(conn, struct watch); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs.c --- a/tools/xenstore/xs.c Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xs.c Fri Sep 9 16:30:54 2005 @@ -31,14 +31,17 @@ #include <signal.h> #include <stdint.h> #include <errno.h> +#include <sys/ioctl.h> #include "xs.h" #include "xenstored.h" #include "xs_lib.h" #include "utils.h" +#include "xenbus_dev.h" struct xs_handle { int fd; + enum { SOCK, DEV } type; }; /* Get the socket from the store daemon handle. @@ -65,17 +68,39 @@ h = malloc(sizeof(*h)); if (h) { h->fd = sock; + h->type = SOCK; return h; } } saved_errno = errno; close(sock); - free(h); errno = saved_errno; return NULL; } +static struct xs_handle *get_dev(const char *connect_to) +{ + int fd, saved_errno; + struct xs_handle *h = NULL; + + fd = open(connect_to, O_RDONLY); + if (fd < 0) + return NULL; + + h = malloc(sizeof(*h)); + if (h) { + h->fd = fd; + h->type = DEV; + return h; + } + + saved_errno = errno; + close(fd); + errno = saved_errno; + return NULL; +} + struct xs_handle *xs_daemon_open(void) { return get_socket(xs_daemon_socket()); @@ -84,6 +109,11 @@ struct xs_handle *xs_daemon_open_readonly(void) { return get_socket(xs_daemon_socket_ro()); +} + +struct xs_handle *xs_domain_open(void) +{ + return get_dev(xs_domain_dev()); } void xs_daemon_close(struct xs_handle *h) @@ -160,9 +190,9 @@ } /* Send message to xs, get malloc'ed reply. NULL and set errno on error. */ -static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type, - const struct iovec *iovec, unsigned int num_vecs, - unsigned int *len) +static void *xs_talkv_sock(struct xs_handle *h, enum xsd_sockmsg_type type, + const struct iovec *iovec, unsigned int num_vecs, + unsigned int *len) { struct xsd_sockmsg msg; void *ret = NULL; @@ -220,6 +250,54 @@ close(h->fd); h->fd = -1; errno = saved_errno; + return NULL; +} + +/* Send message to xs, get malloc'ed reply. NULL and set errno on error. */ +static void *xs_talkv_dev(struct xs_handle *h, enum xsd_sockmsg_type type, + const struct iovec *iovec, unsigned int num_vecs, + unsigned int *len) +{ + struct xenbus_dev_talkv dt; + char *buf; + int err, buflen = 1024; + + again: + buf = malloc(buflen); + if (buf == NULL) { + errno = ENOMEM; + return NULL; + } + dt.type = type; + dt.iovec = (struct kvec *)iovec; + dt.num_vecs = num_vecs; + dt.buf = buf; + dt.len = buflen; + err = ioctl(h->fd, IOCTL_XENBUS_DEV_TALKV, &dt); + if (err < 0) { + free(buf); + errno = err; + return NULL; + } + if (err > buflen) { + free(buf); + buflen = err; + goto again; + } + if (len) + *len = err; + return buf; +} + +/* Send message to xs, get malloc'ed reply. NULL and set errno on error. */ +static void *xs_talkv(struct xs_handle *h, enum xsd_sockmsg_type type, + const struct iovec *iovec, unsigned int num_vecs, + unsigned int *len) +{ + if (h->type == SOCK) + return xs_talkv_sock(h, type, iovec, num_vecs, len); + if (h->type == DEV) + return xs_talkv_dev(h, type, iovec, num_vecs, len); return NULL; } @@ -535,13 +613,23 @@ return xs_bool(xs_single(h, XS_RELEASE, domid_str, NULL)); } +char *xs_get_domain_path(struct xs_handle *h, domid_t domid) +{ + char domid_str[MAX_STRLEN(domid)]; + + sprintf(domid_str, "%u", domid); + + return xs_single(h, XS_GET_DOMAIN_PATH, domid_str, NULL); +} + bool xs_shutdown(struct xs_handle *h) { bool ret = xs_bool(xs_single(h, XS_SHUTDOWN, "", NULL)); if (ret) { char c; /* Wait for it to actually shutdown. */ - read(h->fd, &c, 1); + while ((read(h->fd, &c, 1) < 0) && (errno == EINTR)) + continue; } return ret; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs.h --- a/tools/xenstore/xs.h Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xs.h Fri Sep 9 16:30:54 2005 @@ -30,6 +30,7 @@ * Returns a handle or NULL. */ struct xs_handle *xs_daemon_open(void); +struct xs_handle *xs_domain_open(void); /* Connect to the xs daemon (readonly for non-root clients). * Returns a handle or NULL. @@ -133,6 +134,10 @@ */ bool xs_release_domain(struct xs_handle *h, domid_t domid); +/* Query the home path of a domain. + */ +char *xs_get_domain_path(struct xs_handle *h, domid_t domid); + /* Only useful for DEBUG versions */ char *xs_debug_command(struct xs_handle *h, const char *cmd, void *data, unsigned int len); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs_lib.c --- a/tools/xenstore/xs_lib.c Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xs_lib.c Fri Sep 9 16:30:54 2005 @@ -64,6 +64,12 @@ static char buf[PATH_MAX]; sprintf(buf, "%s/transactions", xs_daemon_rootdir()); return buf; +} + +const char *xs_domain_dev(void) +{ + char *s = getenv("XENSTORED_DOMAIN_DEV"); + return (s ? s : "/proc/xen/xenbus"); } /* Simple routines for writing to sockets, etc. */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xs_lib.h --- a/tools/xenstore/xs_lib.h Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xs_lib.h Fri Sep 9 16:30:54 2005 @@ -48,6 +48,7 @@ const char *xs_daemon_socket_ro(void); const char *xs_daemon_store(void); const char *xs_daemon_transactions(void); +const char *xs_domain_dev(void); /* Simple write function: loops for you. */ bool xs_write_all(int fd, const void *data, unsigned int len); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xentrace/formats --- a/tools/xentrace/formats Thu Sep 8 15:18:40 2005 +++ b/tools/xentrace/formats Fri Sep 9 16:30:54 2005 @@ -15,3 +15,7 @@ 0x00080001 CPU%(cpu)d %(tsc)d VMX_VMEXIT [ domid = 0x%(1)08x, eip = 0x%(2)08x, reason = 0x%(3)08x ] 0x00080002 CPU%(cpu)d %(tsc)d VMX_VECTOR [ domid = 0x%(1)08x, eip = 0x%(2)08x, vector = 0x%(3)08x ] 0x00080003 CPU%(cpu)d %(tsc)d VMX_INT [ domid = 0x%(1)08x, trap = 0x%(2)08x, va = 0x%(3)08x ] + +0x00090001 CPU%(cpu)d %(tsc)d VMENTRY 0x%(1)08x 0x%(2)08x 0x%(3)08x 0x%(4)08x 0x%(5)08x +0x00090002 CPU%(cpu)d %(tsc)d VMEXIT 0x%(1)08x 0x%(2)08x 0x%(3)08x + diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xentrace/xentrace.c --- a/tools/xentrace/xentrace.c Thu Sep 8 15:18:40 2005 +++ b/tools/xentrace/xentrace.c Fri Sep 9 16:30:54 2005 @@ -525,7 +525,7 @@ } if (opts.cpu_mask != 0) { - set_mask(opts.evt_mask, 1); + set_mask(opts.cpu_mask, 1); } if ( opts.outfile ) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/Rules.mk --- a/xen/Rules.mk Thu Sep 8 15:18:40 2005 +++ b/xen/Rules.mk Fri Sep 9 16:30:54 2005 @@ -7,7 +7,6 @@ perfc ?= n perfc_arrays?= n trace ?= n -optimize ?= y domu_debug ?= n crash_debug ?= n diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_chinesewall_hooks.c --- a/xen/acm/acm_chinesewall_hooks.c Thu Sep 8 15:18:40 2005 +++ b/xen/acm/acm_chinesewall_hooks.c Fri Sep 9 16:30:54 2005 @@ -310,6 +310,28 @@ return 0; } +static int +chwall_dump_ssid_types(ssidref_t ssidref, u8 *buf, u16 len) +{ + int i; + + /* fill in buffer */ + if (chwall_bin_pol.max_types > len) + return -EFAULT; + + if (ssidref >= chwall_bin_pol.max_ssidrefs) + return -EFAULT; + + /* read types for chwall ssidref */ + for(i=0; i< chwall_bin_pol.max_types; i++) { + if (chwall_bin_pol.ssidrefs[ssidref * chwall_bin_pol.max_types + i]) + buf[i] = 1; + else + buf[i] = 0; + } + return chwall_bin_pol.max_types; +} + /*************************** * Authorization functions ***************************/ @@ -492,6 +514,7 @@ .dump_binary_policy = chwall_dump_policy, .set_binary_policy = chwall_set_policy, .dump_statistics = chwall_dump_stats, + .dump_ssid_types = chwall_dump_ssid_types, /* domain management control hooks */ .pre_domain_create = chwall_pre_domain_create, .post_domain_create = chwall_post_domain_create, diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_core.c --- a/xen/acm/acm_core.c Thu Sep 8 15:18:40 2005 +++ b/xen/acm/acm_core.c Fri Sep 9 16:30:54 2005 @@ -64,16 +64,17 @@ void acm_set_endian(void) { u32 test = 1; - if (*((u8 *)&test) == 1) { + if (*((u8 *)&test) == 1) + { printk("ACM module running in LITTLE ENDIAN.\n"); - little_endian = 1; - } else { - printk("ACM module running in BIG ENDIAN.\n"); - little_endian = 0; - } -} - -#if (ACM_USE_SECURITY_POLICY != ACM_NULL_POLICY) + little_endian = 1; + } + else + { + printk("ACM module running in BIG ENDIAN.\n"); + little_endian = 0; + } +} /* initialize global security policy for Xen; policy write-locked already */ static void @@ -101,7 +102,8 @@ * Try all modules and see whichever could be the binary policy. * Adjust the initrdidx if module[1] is the binary policy. */ - for (i = mbi->mods_count-1; i >= 1; i--) { + for (i = mbi->mods_count-1; i >= 1; i--) + { struct acm_policy_buffer *pol; char *_policy_start; unsigned long _policy_len; @@ -117,23 +119,32 @@ continue; /* not a policy */ pol = (struct acm_policy_buffer *)_policy_start; - if (ntohl(pol->magic) == ACM_MAGIC) { + if (ntohl(pol->magic) == ACM_MAGIC) + { rc = acm_set_policy((void *)_policy_start, (u16)_policy_len, 0); - if (rc == ACM_OK) { + if (rc == ACM_OK) + { printf("Policy len 0x%lx, start at %p.\n",_policy_len,_policy_start); - if (i == 1) { - if (mbi->mods_count > 2) { + if (i == 1) + { + if (mbi->mods_count > 2) + { *initrdidx = 2; - } else { + } + else { *initrdidx = 0; } - } else { + } + else + { *initrdidx = 1; } break; - } else { + } + else + { printk("Invalid policy. %d.th module line.\n", i+1); } } /* end if a binary policy definition, i.e., (ntohl(pol->magic) == ACM_MAGIC ) */ @@ -147,56 +158,84 @@ const multiboot_info_t *mbi, unsigned long initial_images_start) { - int ret = -EINVAL; - - acm_set_endian(); + int ret = ACM_OK; + + acm_set_endian(); write_lock(&acm_bin_pol_rwlock); - - if (ACM_USE_SECURITY_POLICY == ACM_CHINESE_WALL_POLICY) { - acm_init_binary_policy(NULL, NULL); - acm_init_chwall_policy(); + acm_init_binary_policy(NULL, NULL); + + /* set primary policy component */ + switch ((ACM_USE_SECURITY_POLICY) & 0x0f) + { + + case ACM_CHINESE_WALL_POLICY: + acm_init_chwall_policy(); acm_bin_pol.primary_policy_code = ACM_CHINESE_WALL_POLICY; acm_primary_ops = &acm_chinesewall_ops; + break; + + case ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY: + acm_init_ste_policy(); + acm_bin_pol.primary_policy_code = ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY; + acm_primary_ops = &acm_simple_type_enforcement_ops; + break; + + default: + /* NULL or Unknown policy not allowed primary; + * NULL/NULL will not compile this code */ + ret = -EINVAL; + goto out; + } + + /* secondary policy component part */ + switch ((ACM_USE_SECURITY_POLICY) >> 4) { + case ACM_NULL_POLICY: acm_bin_pol.secondary_policy_code = ACM_NULL_POLICY; acm_secondary_ops = &acm_null_ops; - ret = ACM_OK; - } else if (ACM_USE_SECURITY_POLICY == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY) { - acm_init_binary_policy(NULL, NULL); + break; + + case ACM_CHINESE_WALL_POLICY: + if (acm_bin_pol.primary_policy_code == ACM_CHINESE_WALL_POLICY) + { /* not a valid combination */ + ret = -EINVAL; + goto out; + } + acm_init_chwall_policy(); + acm_bin_pol.secondary_policy_code = ACM_CHINESE_WALL_POLICY; + acm_secondary_ops = &acm_chinesewall_ops; + break; + + case ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY: + if (acm_bin_pol.primary_policy_code == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY) + { /* not a valid combination */ + ret = -EINVAL; + goto out; + } acm_init_ste_policy(); - acm_bin_pol.primary_policy_code = ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY; - acm_primary_ops = &acm_simple_type_enforcement_ops; - acm_bin_pol.secondary_policy_code = ACM_NULL_POLICY; - acm_secondary_ops = &acm_null_ops; - ret = ACM_OK; - } else if (ACM_USE_SECURITY_POLICY == ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY) { - acm_init_binary_policy(NULL, NULL); - acm_init_chwall_policy(); - acm_init_ste_policy(); - acm_bin_pol.primary_policy_code = ACM_CHINESE_WALL_POLICY; - acm_primary_ops = &acm_chinesewall_ops; acm_bin_pol.secondary_policy_code = ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY; acm_secondary_ops = &acm_simple_type_enforcement_ops; - ret = ACM_OK; - } else if (ACM_USE_SECURITY_POLICY == ACM_NULL_POLICY) { - acm_init_binary_policy(NULL, NULL); - acm_bin_pol.primary_policy_code = ACM_NULL_POLICY; - acm_primary_ops = &acm_null_ops; - acm_bin_pol.secondary_policy_code = ACM_NULL_POLICY; - acm_secondary_ops = &acm_null_ops; - ret = ACM_OK; + break; + + default: + ret = -EINVAL; + goto out; + } + + out: + write_unlock(&acm_bin_pol_rwlock); + + if (ret != ACM_OK) + { + printk("%s: Error setting policies.\n", __func__); + /* here one could imagine a clean panic */ + return -EINVAL; } - write_unlock(&acm_bin_pol_rwlock); - - if (ret != ACM_OK) - return -EINVAL; acm_setup(initrdidx, mbi, initial_images_start); printk("%s: Enforcing Primary %s, Secondary %s.\n", __func__, - ACM_POLICY_NAME(acm_bin_pol.primary_policy_code), ACM_POLICY_NAME(acm_bin_pol.secondary_policy_code)); + ACM_POLICY_NAME(acm_bin_pol.primary_policy_code), + ACM_POLICY_NAME(acm_bin_pol.secondary_policy_code)); return ret; } - - -#endif int acm_init_domain_ssid(domid_t id, ssidref_t ssidref) @@ -205,7 +244,8 @@ struct domain *subj = find_domain_by_id(id); int ret1, ret2; - if (subj == NULL) { + if (subj == NULL) + { printk("%s: ACM_NULL_POINTER ERROR (id=%x).\n", __func__, id); return ACM_NULL_POINTER_ERROR; } @@ -235,14 +275,16 @@ else ret2 = ACM_OK; - if ((ret1 != ACM_OK) || (ret2 != ACM_OK)) { + if ((ret1 != ACM_OK) || (ret2 != ACM_OK)) + { printk("%s: ERROR instantiating individual ssids for domain 0x%02x.\n", __func__, subj->domain_id); acm_free_domain_ssid(ssid); put_domain(subj); return ACM_INIT_SSID_ERROR; } - printk("%s: assigned domain %x the ssidref=%x.\n", __func__, id, ssid->ssidref); + printk("%s: assigned domain %x the ssidref=%x.\n", + __func__, id, ssid->ssidref); put_domain(subj); return ACM_OK; } @@ -254,11 +296,12 @@ domid_t id; /* domain is already gone, just ssid is left */ - if (ssid == NULL) { + if (ssid == NULL) + { printk("%s: ACM_NULL_POINTER ERROR.\n", __func__); return ACM_NULL_POINTER_ERROR; } - id = ssid->domainid; + id = ssid->domainid; ssid->subject = NULL; if (acm_primary_ops->free_domain_ssid != NULL) /* null policy */ @@ -268,6 +311,7 @@ acm_secondary_ops->free_domain_ssid(ssid->secondary_ssid); ssid->secondary_ssid = NULL; xfree(ssid); - printkd("%s: Freed individual domain ssid (domain=%02x).\n",__func__, id); + printkd("%s: Freed individual domain ssid (domain=%02x).\n", + __func__, id); return ACM_OK; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_null_hooks.c --- a/xen/acm/acm_null_hooks.c Thu Sep 8 15:18:40 2005 +++ b/xen/acm/acm_null_hooks.c Fri Sep 9 16:30:54 2005 @@ -14,13 +14,13 @@ #include <acm/acm_hooks.h> static int -null_init_domain_ssid(void **chwall_ssid, ssidref_t ssidref) +null_init_domain_ssid(void **ssid, ssidref_t ssidref) { return ACM_OK; } static void -null_free_domain_ssid(void *chwall_ssid) +null_free_domain_ssid(void *ssid) { return; } @@ -44,6 +44,14 @@ return 0; } +static int +null_dump_ssid_types(ssidref_t ssidref, u8 *buffer, u16 buf_size) +{ + /* no types */ + return 0; +} + + /* now define the hook structure similarly to LSM */ struct acm_operations acm_null_ops = { .init_domain_ssid = null_init_domain_ssid, @@ -51,6 +59,7 @@ .dump_binary_policy = null_dump_binary_policy, .set_binary_policy = null_set_binary_policy, .dump_statistics = null_dump_stats, + .dump_ssid_types = null_dump_ssid_types, /* domain management control hooks */ .pre_domain_create = NULL, .post_domain_create = NULL, diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_policy.c --- a/xen/acm/acm_policy.c Thu Sep 8 15:18:40 2005 +++ b/xen/acm/acm_policy.c Fri Sep 9 16:30:54 2005 @@ -26,8 +26,8 @@ #include <xen/lib.h> #include <xen/delay.h> #include <xen/sched.h> +#include <acm/acm_core.h> #include <public/acm_ops.h> -#include <acm/acm_core.h> #include <acm/acm_hooks.h> #include <acm/acm_endian.h> @@ -37,14 +37,16 @@ u8 *policy_buffer = NULL; struct acm_policy_buffer *pol; - if (buf_size < sizeof(struct acm_policy_buffer)) + if (buf_size < sizeof(struct acm_policy_buffer)) return -EFAULT; /* 1. copy buffer from domain */ if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL) - goto error_free; + return -ENOMEM; + if (isuserbuffer) { - if (copy_from_user(policy_buffer, buf, buf_size)) { + if (copy_from_user(policy_buffer, buf, buf_size)) + { printk("%s: Error copying!\n",__func__); goto error_free; } @@ -57,11 +59,13 @@ if ((ntohl(pol->magic) != ACM_MAGIC) || (ntohl(pol->policy_version) != ACM_POLICY_VERSION) || (ntohl(pol->primary_policy_code) != acm_bin_pol.primary_policy_code) || - (ntohl(pol->secondary_policy_code) != acm_bin_pol.secondary_policy_code)) { + (ntohl(pol->secondary_policy_code) != acm_bin_pol.secondary_policy_code)) + { printkd("%s: Wrong policy magics or versions!\n", __func__); goto error_free; } - if (buf_size != ntohl(pol->len)) { + if (buf_size != ntohl(pol->len)) + { printk("%s: ERROR in buf size.\n", __func__); goto error_free; } @@ -72,27 +76,25 @@ /* 3. set primary policy data */ if (acm_primary_ops->set_binary_policy(buf + ntohl(pol->primary_buffer_offset), ntohl(pol->secondary_buffer_offset) - - ntohl(pol->primary_buffer_offset))) { + ntohl(pol->primary_buffer_offset))) goto error_lock_free; - } + /* 4. set secondary policy data */ if (acm_secondary_ops->set_binary_policy(buf + ntohl(pol->secondary_buffer_offset), ntohl(pol->len) - - ntohl(pol->secondary_buffer_offset))) { + ntohl(pol->secondary_buffer_offset))) goto error_lock_free; - } + write_unlock(&acm_bin_pol_rwlock); - if (policy_buffer != NULL) - xfree(policy_buffer); + xfree(policy_buffer); return ACM_OK; error_lock_free: write_unlock(&acm_bin_pol_rwlock); error_free: printk("%s: Error setting policy.\n", __func__); - if (policy_buffer != NULL) - xfree(policy_buffer); - return -ENOMEM; + xfree(policy_buffer); + return -EFAULT; } int @@ -102,11 +104,14 @@ int ret; struct acm_policy_buffer *bin_pol; + if (buf_size < sizeof(struct acm_policy_buffer)) + return -EFAULT; + if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL) return -ENOMEM; read_lock(&acm_bin_pol_rwlock); - /* future: read policy from file and set it */ + bin_pol = (struct acm_policy_buffer *)policy_buffer; bin_pol->magic = htonl(ACM_MAGIC); bin_pol->primary_policy_code = htonl(acm_bin_pol.primary_policy_code); @@ -118,27 +123,30 @@ ret = acm_primary_ops->dump_binary_policy (policy_buffer + ntohl(bin_pol->primary_buffer_offset), buf_size - ntohl(bin_pol->primary_buffer_offset)); - if (ret < 0) { - printk("%s: ERROR creating chwallpolicy buffer.\n", __func__); - read_unlock(&acm_bin_pol_rwlock); - return -1; - } + if (ret < 0) + goto error_free_unlock; + bin_pol->len = htonl(ntohl(bin_pol->len) + ret); bin_pol->secondary_buffer_offset = htonl(ntohl(bin_pol->len)); ret = acm_secondary_ops->dump_binary_policy(policy_buffer + ntohl(bin_pol->secondary_buffer_offset), buf_size - ntohl(bin_pol->secondary_buffer_offset)); - if (ret < 0) { - printk("%s: ERROR creating chwallpolicy buffer.\n", __func__); - read_unlock(&acm_bin_pol_rwlock); - return -1; - } + if (ret < 0) + goto error_free_unlock; + bin_pol->len = htonl(ntohl(bin_pol->len) + ret); - read_unlock(&acm_bin_pol_rwlock); if (copy_to_user(buf, policy_buffer, ntohl(bin_pol->len))) - return -EFAULT; + goto error_free_unlock; + + read_unlock(&acm_bin_pol_rwlock); xfree(policy_buffer); return ACM_OK; + + error_free_unlock: + read_unlock(&acm_bin_pol_rwlock); + printk("%s: Error getting policy.\n", __func__); + xfree(policy_buffer); + return -EFAULT; } int @@ -185,4 +193,62 @@ return -EFAULT; } + +int +acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size) +{ + /* send stats to user space */ + u8 *ssid_buffer; + int ret; + struct acm_ssid_buffer *acm_ssid; + if (buf_size < sizeof(struct acm_ssid_buffer)) + return -EFAULT; + + if ((ssid_buffer = xmalloc_array(u8, buf_size)) == NULL) + return -ENOMEM; + + read_lock(&acm_bin_pol_rwlock); + + acm_ssid = (struct acm_ssid_buffer *)ssid_buffer; + acm_ssid->len = sizeof(struct acm_ssid_buffer); + acm_ssid->ssidref = ssidref; + acm_ssid->primary_policy_code = acm_bin_pol.primary_policy_code; + acm_ssid->secondary_policy_code = acm_bin_pol.secondary_policy_code; + acm_ssid->primary_types_offset = acm_ssid->len; + + /* ret >= 0 --> ret == max_types */ + ret = acm_primary_ops->dump_ssid_types(ACM_PRIMARY(ssidref), + ssid_buffer + acm_ssid->primary_types_offset, + buf_size - acm_ssid->primary_types_offset); + if (ret < 0) + goto error_free_unlock; + + acm_ssid->len += ret; + acm_ssid->primary_max_types = ret; + + acm_ssid->secondary_types_offset = acm_ssid->len; + + ret = acm_secondary_ops->dump_ssid_types(ACM_SECONDARY(ssidref), + ssid_buffer + acm_ssid->secondary_types_offset, + buf_size - acm_ssid->secondary_types_offset); + if (ret < 0) + goto error_free_unlock; + + acm_ssid->len += ret; + acm_ssid->secondary_max_types = ret; + + if (copy_to_user(buf, ssid_buffer, acm_ssid->len)) + goto error_free_unlock; + + read_unlock(&acm_bin_pol_rwlock); + xfree(ssid_buffer); + return ACM_OK; + + error_free_unlock: + read_unlock(&acm_bin_pol_rwlock); + printk("%s: Error getting ssid.\n", __func__); + xfree(ssid_buffer); + return -ENOMEM; +} + /*eof*/ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/acm/acm_simple_type_enforcement_hooks.c --- a/xen/acm/acm_simple_type_enforcement_hooks.c Thu Sep 8 15:18:40 2005 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c Fri Sep 9 16:30:54 2005 @@ -383,6 +383,27 @@ return sizeof(struct acm_ste_stats_buffer); } +static int +ste_dump_ssid_types(ssidref_t ssidref, u8 *buf, u16 len) +{ + int i; + + /* fill in buffer */ + if (ste_bin_pol.max_types > len) + return -EFAULT; + + if (ssidref >= ste_bin_pol.max_ssidrefs) + return -EFAULT; + + /* read types for chwall ssidref */ + for(i=0; i< ste_bin_pol.max_types; i++) { + if (ste_bin_pol.ssidrefs[ssidref * ste_bin_pol.max_types + i]) + buf[i] = 1; + else + buf[i] = 0; + } + return ste_bin_pol.max_types; +} /* we need to go through this before calling the hooks, * returns 1 == cache hit */ @@ -625,22 +646,23 @@ /* policy management services */ .init_domain_ssid = ste_init_domain_ssid, .free_domain_ssid = ste_free_domain_ssid, - .dump_binary_policy = ste_dump_policy, - .set_binary_policy = ste_set_policy, + .dump_binary_policy = ste_dump_policy, + .set_binary_policy = ste_set_policy, .dump_statistics = ste_dump_stats, + .dump_ssid_types = ste_dump_ssid_types, /* domain management control hooks */ .pre_domain_create = ste_pre_domain_create, - .post_domain_create = NULL, - .fail_domain_create = NULL, - .post_domain_destroy = ste_post_domain_destroy, + .post_domain_create = NULL, + .fail_domain_create = NULL, + .post_domain_destroy = ste_post_domain_destroy, /* event channel control hooks */ - .pre_eventchannel_unbound = ste_pre_eventchannel_unbound, + .pre_eventchannel_unbound = ste_pre_eventchannel_unbound, .fail_eventchannel_unbound = NULL, .pre_eventchannel_interdomain = ste_pre_eventchannel_interdomain, .fail_eventchannel_interdomain = NULL, /* grant table control hooks */ - .pre_grant_map_ref = ste_pre_grant_map_ref, - .fail_grant_map_ref = NULL, - .pre_grant_setup = ste_pre_grant_setup, - .fail_grant_setup = NULL, + .pre_grant_map_ref = ste_pre_grant_map_ref, + .fail_grant_map_ref = NULL, + .pre_grant_setup = ste_pre_grant_setup, + .fail_grant_setup = NULL, }; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/Makefile Fri Sep 9 16:30:54 2005 @@ -17,7 +17,7 @@ OBJS := $(patsubst shadow%.o,,$(OBJS)) # drop all ifeq ($(TARGET_SUBARCH),x86_64) - OBJS += shadow.o shadow_public.o # x86_64: new code + OBJS += shadow.o shadow_public.o shadow_guest32.o # x86_64: new code endif ifeq ($(TARGET_SUBARCH),x86_32) ifneq ($(pae),n) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/Rules.mk Fri Sep 9 16:30:54 2005 @@ -13,10 +13,8 @@ CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-generic CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-default -ifeq ($(optimize),y) +ifneq ($(debug),y) CFLAGS += -O3 -fomit-frame-pointer -else -x86_32/usercopy.o: CFLAGS += -O1 endif # Prevent floating-point variables from creeping into Xen. diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/boot/x86_32.S --- a/xen/arch/x86/boot/x86_32.S Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/boot/x86_32.S Fri Sep 9 16:30:54 2005 @@ -9,6 +9,8 @@ .text ENTRY(start) +ENTRY(stext) +ENTRY(_stext) jmp __start .align 4 @@ -260,6 +262,3 @@ .org 0x2000 + STACK_SIZE + PAGE_SIZE #endif /* CONFIG_X86_PAE */ - -ENTRY(stext) -ENTRY(_stext) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/boot/x86_64.S --- a/xen/arch/x86/boot/x86_64.S Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/boot/x86_64.S Fri Sep 9 16:30:54 2005 @@ -10,6 +10,8 @@ .code32 ENTRY(start) +ENTRY(stext) +ENTRY(_stext) jmp __start .org 0x004 @@ -267,5 +269,3 @@ .org 0x4000 + STACK_SIZE + PAGE_SIZE .code64 -ENTRY(stext) -ENTRY(_stext) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/cdb.c --- a/xen/arch/x86/cdb.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/cdb.c Fri Sep 9 16:30:54 2005 @@ -21,7 +21,7 @@ debugger. so avoid it. */ #define dbg_printk(...) -static unsigned char opt_cdb[30] = "none"; +static char opt_cdb[30] = "none"; string_param("cdb", opt_cdb); struct xendbg_context { diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/dom0_ops.c Fri Sep 9 16:30:54 2005 @@ -19,6 +19,7 @@ #include <xen/console.h> #include <asm/shadow.h> #include <asm/irq.h> +#include <asm/processor.h> #include <public/sched_ctl.h> #include <asm/mtrr.h> @@ -188,9 +189,11 @@ pi->total_pages = max_page; pi->free_pages = avail_domheap_pages(); pi->cpu_khz = cpu_khz; - - copy_to_user(u_dom0_op, op, sizeof(*op)); + memset( pi->hw_cap, 0, sizeof(pi->hw_cap) ); + memcpy( pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4 ); ret = 0; + if( copy_to_user(u_dom0_op, op, sizeof(*op)) ) + ret = -EINVAL; } break; @@ -389,9 +392,31 @@ } break; + case DOM0_PHYSICAL_MEMORY_MAP: + { + struct dom0_memory_map_entry entry; + int i; + + for ( i = 0; i < e820.nr_map; i++ ) + { + if ( i >= op->u.physical_memory_map.max_map_entries ) + break; + entry.start = e820.map[i].addr; + entry.end = e820.map[i].addr + e820.map[i].size; + entry.is_ram = (e820.map[i].type == E820_RAM); + (void)copy_to_user( + &op->u.physical_memory_map.memory_map[i], + &entry, sizeof(entry)); + } + + op->u.physical_memory_map.nr_map_entries = i; + (void)copy_to_user(u_dom0_op, op, sizeof(*op)); + } + break; + default: ret = -ENOSYS; - + break; } return ret; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/domain.c Fri Sep 9 16:30:54 2005 @@ -255,13 +255,13 @@ v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id]; v->cpumap = CPUMAP_RUNANYWHERE; SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); - machine_to_phys_mapping[virt_to_phys(d->shared_info) >> - PAGE_SHIFT] = INVALID_M2P_ENTRY; + set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, + INVALID_M2P_ENTRY); d->arch.mm_perdomain_pt = alloc_xenheap_page(); memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE); - machine_to_phys_mapping[virt_to_phys(d->arch.mm_perdomain_pt) >> - PAGE_SHIFT] = INVALID_M2P_ENTRY; + set_pfn_from_mfn(virt_to_phys(d->arch.mm_perdomain_pt) >> PAGE_SHIFT, + INVALID_M2P_ENTRY); v->arch.perdomain_ptes = d->arch.mm_perdomain_pt; v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); @@ -381,11 +381,13 @@ out: free_vmcs(vmcs); if(v->arch.arch_vmx.io_bitmap_a != 0) { - free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000)); + free_xenheap_pages( + v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000)); v->arch.arch_vmx.io_bitmap_a = 0; } if(v->arch.arch_vmx.io_bitmap_b != 0) { - free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000)); + free_xenheap_pages( + v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000)); v->arch.arch_vmx.io_bitmap_b = 0; } v->arch.arch_vmx.vmcs = 0; @@ -885,8 +887,13 @@ return switch_required; } -void sync_lazy_execstate_cpu(unsigned int cpu) -{ +void sync_vcpu_execstate(struct vcpu *v) +{ + unsigned int cpu = v->processor; + + if ( !cpu_isset(cpu, v->domain->cpumask) ) + return; + if ( cpu == smp_processor_id() ) { (void)__sync_lazy_execstate(); @@ -967,11 +974,13 @@ BUG_ON(v->arch.arch_vmx.vmcs == NULL); free_vmcs(v->arch.arch_vmx.vmcs); if(v->arch.arch_vmx.io_bitmap_a != 0) { - free_xenheap_pages(v->arch.arch_vmx.io_bitmap_a, get_order(0x1000)); + free_xenheap_pages( + v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000)); v->arch.arch_vmx.io_bitmap_a = 0; } if(v->arch.arch_vmx.io_bitmap_b != 0) { - free_xenheap_pages(v->arch.arch_vmx.io_bitmap_b, get_order(0x1000)); + free_xenheap_pages( + v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000)); v->arch.arch_vmx.io_bitmap_b = 0; } v->arch.arch_vmx.vmcs = 0; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/domain_build.c Fri Sep 9 16:30:54 2005 @@ -20,6 +20,7 @@ #include <asm/processor.h> #include <asm/desc.h> #include <asm/i387.h> +#include <asm/physdev.h> #include <asm/shadow.h> static long dom0_nrpages; @@ -74,15 +75,12 @@ struct pfn_info *page; unsigned int order; /* - * Allocate up to 2MB at a time: - * 1. This prevents overflow of get_order() when allocating more than - * 4GB to domain 0 on a PAE machine. - * 2. It prevents allocating very large chunks from DMA pools before - * the >4GB pool is fully depleted. + * Allocate up to 2MB at a time: It prevents allocating very large chunks + * from DMA pools before the >4GB pool is fully depleted. */ if ( max_pages > (2UL << (20 - PAGE_SHIFT)) ) max_pages = 2UL << (20 - PAGE_SHIFT); - order = get_order(max_pages << PAGE_SHIFT); + order = get_order_from_pages(max_pages); if ( (max_pages & (max_pages-1)) != 0 ) order--; while ( (page = alloc_domheap_pages(d, order, 0)) == NULL ) @@ -217,14 +215,14 @@ vinitrd_start = round_pgup(dsi.v_end); vinitrd_end = vinitrd_start + initrd_len; vphysmap_start = round_pgup(vinitrd_end); - vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32)); - vpt_start = round_pgup(vphysmap_end); + vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); + vstartinfo_start = round_pgup(vphysmap_end); + vstartinfo_end = vstartinfo_start + PAGE_SIZE; + vpt_start = vstartinfo_end; for ( nr_pt_pages = 2; ; nr_pt_pages++ ) { vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE); - vstartinfo_start = vpt_end; - vstartinfo_end = vstartinfo_start + PAGE_SIZE; - vstack_start = vstartinfo_end; + vstack_start = vpt_end; vstack_end = vstack_start + PAGE_SIZE; v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1); if ( (v_end - vstack_end) < (512UL << 10) ) @@ -251,7 +249,7 @@ #endif } - order = get_order(v_end - dsi.v_start); + order = get_order_from_bytes(v_end - dsi.v_start); if ( (1UL << order) > nr_pages ) panic("Domain 0 allocation is too small for kernel image.\n"); @@ -271,15 +269,15 @@ " Loaded kernel: %p->%p\n" " Init. ramdisk: %p->%p\n" " Phys-Mach map: %p->%p\n" + " Start info: %p->%p\n" " Page tables: %p->%p\n" - " Start info: %p->%p\n" " Boot stack: %p->%p\n" " TOTAL: %p->%p\n", _p(dsi.v_kernstart), _p(dsi.v_kernend), _p(vinitrd_start), _p(vinitrd_end), _p(vphysmap_start), _p(vphysmap_end), + _p(vstartinfo_start), _p(vstartinfo_end), _p(vpt_start), _p(vpt_end), - _p(vstartinfo_start), _p(vstartinfo_end), _p(vstack_start), _p(vstack_end), _p(dsi.v_start), _p(v_end)); printk(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry)); @@ -592,8 +590,7 @@ if ( opt_dom0_translate ) { si->shared_info = d->next_io_page << PAGE_SHIFT; - set_machinetophys(virt_to_phys(d->shared_info) >> PAGE_SHIFT, - d->next_io_page); + set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, d->next_io_page); d->next_io_page++; } else @@ -613,8 +610,8 @@ if ( !opt_dom0_translate && (pfn > REVERSE_START) ) mfn = alloc_epfn - (pfn - REVERSE_START); #endif - ((u32 *)vphysmap_start)[pfn] = mfn; - machine_to_phys_mapping[mfn] = pfn; + ((unsigned long *)vphysmap_start)[pfn] = mfn; + set_pfn_from_mfn(mfn, pfn); } while ( pfn < nr_pages ) { @@ -626,8 +623,8 @@ #ifndef NDEBUG #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) #endif - ((u32 *)vphysmap_start)[pfn] = mfn; - machine_to_phys_mapping[mfn] = pfn; + ((unsigned long *)vphysmap_start)[pfn] = mfn; + set_pfn_from_mfn(mfn, pfn); #undef pfn page++; pfn++; } @@ -708,6 +705,18 @@ printk("dom0: shadow setup done\n"); } + /* + * Modify I/O port access permissions. + */ + /* Master Interrupt Controller (PIC). */ + physdev_modify_ioport_access_range(dom0, 0, 0x20, 2); + /* Slave Interrupt Controller (PIC). */ + physdev_modify_ioport_access_range(dom0, 0, 0xA0, 2); + /* Interval Timer (PIT). */ + physdev_modify_ioport_access_range(dom0, 0, 0x40, 4); + /* PIT Channel 2 / PC Speaker Control. */ + physdev_modify_ioport_access_range(dom0, 0, 0x61, 1); + return 0; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/mm.c Fri Sep 9 16:30:54 2005 @@ -1450,9 +1450,9 @@ ((type & PGT_type_mask) != PGT_l1_page_table) ) MEM_LOG("Bad type (saw %" PRtype_info "!= exp %" PRtype_info ") " - "for mfn %lx (pfn %x)", + "for mfn %lx (pfn %lx)", x, type, page_to_pfn(page), - machine_to_phys_mapping[page_to_pfn(page)]); + get_pfn_from_mfn(page_to_pfn(page))); return 0; } else if ( (x & PGT_va_mask) == PGT_va_mutable ) @@ -2206,7 +2206,7 @@ printk("privileged guest dom%d requests pfn=%lx to " "map mfn=%lx for dom%d\n", d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id); - set_machinetophys(mfn, gpfn); + set_pfn_from_mfn(mfn, gpfn); set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache); okay = 1; shadow_unlock(FOREIGNDOM); @@ -2225,7 +2225,7 @@ break; } - set_machinetophys(mfn, gpfn); + set_pfn_from_mfn(mfn, gpfn); okay = 1; /* @@ -3185,7 +3185,7 @@ struct pfn_info *page; l1_pgentry_t pte; l2_pgentry_t *pl2e, l2e; - int which; + int which, flags; unsigned long l2_idx; if ( unlikely(shadow_mode_enabled(d)) ) @@ -3206,8 +3206,24 @@ pfn = l1e_get_pfn(pte); page = &frame_table[pfn]; +#ifdef CONFIG_X86_64 +#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT | _PAGE_USER) +#else +#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT) +#endif + + /* + * Check the required flags for a valid wrpt mapping. If the page is + * already writable then we can return straight to the guest (SMP race). + * We decide whether or not to propagate the fault by testing for write + * permissions in page directories by writing back to the linear mapping. + */ + if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS ) + return !__put_user( + pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1); + /* We are looking only for read-only mappings of p.t. pages. */ - if ( ((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) != _PAGE_PRESENT) || + if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) || ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) || ((page->u.inuse.type_info & PGT_count_mask) == 0) || (page_get_owner(page) != d) ) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/setup.c Fri Sep 9 16:30:54 2005 @@ -12,6 +12,8 @@ #include <xen/trace.h> #include <xen/multiboot.h> #include <xen/domain_page.h> +#include <xen/compile.h> +#include <public/version.h> #include <asm/bitops.h> #include <asm/smp.h> #include <asm/processor.h> @@ -90,6 +92,8 @@ unsigned long mmu_cr4_features = X86_CR4_PSE; #endif EXPORT_SYMBOL(mmu_cr4_features); + +int hvm_enabled = 0; /* can we run unmodified guests */ struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu }; @@ -529,6 +533,45 @@ startup_cpu_idle_loop(); } +void arch_get_xen_caps(xen_capabilities_info_t *info) +{ + char *p=info->caps; + + *p=0; + +#ifdef CONFIG_X86_32 + +#ifndef CONFIG_X86_PAE + p+=sprintf(p,"xen_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION); + if(hvm_enabled) + { + p+=sprintf(p,"hvm_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION); + } +#else + p+=sprintf(p,"xen_%d.%d_x86_32p ",XEN_VERSION,XEN_SUBVERSION); + if(hvm_enabled) + { + //p+=sprintf(p,"hvm_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION); + //p+=sprintf(p,"hvm_%d.%d_x86_32p ",XEN_VERSION,XEN_SUBVERSION); + } + +#endif + +#else /* !CONFIG_X86_32 */ + p+=sprintf(p,"xen_%d.%d_x86_64 ",XEN_VERSION,XEN_SUBVERSION); + if(hvm_enabled) + { + //p+=sprintf(p,"hvm_%d.%d_x86_32 ",XEN_VERSION,XEN_SUBVERSION); + //p+=sprintf(p,"hvm_%d.%d_x86_32p ",XEN_VERSION,XEN_SUBVERSION); + p+=sprintf(p,"hvm_%d.%d_x86_64 ",XEN_VERSION,XEN_SUBVERSION); + } +#endif + + BUG_ON((p-info->caps)>sizeof(*info)); + + if(p>info->caps) *(p-1) = 0; +} + /* * Local variables: * mode: C diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/shadow.c Fri Sep 9 16:30:54 2005 @@ -53,6 +53,9 @@ struct domain *d, unsigned long gpfn, unsigned long gmfn); static void shadow_map_into_current(struct vcpu *v, unsigned long va, unsigned int from, unsigned int to); +static inline void validate_bl2e_change( struct domain *d, + guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index); + #endif /******** @@ -217,10 +220,38 @@ } else { - page = alloc_domheap_page(NULL); - void *l1 = map_domain_page(page_to_pfn(page)); - memset(l1, 0, PAGE_SIZE); - unmap_domain_page(l1); + if (d->arch.ops->guest_paging_levels == PAGING_L2) + { +#if CONFIG_PAGING_LEVELS >= 4 + /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1 + * So need allocate 2 continues shadow L1 each time. + */ + page = alloc_domheap_pages(NULL, SL1_ORDER, 0); + if (!page) + domain_crash_synchronous(); + + void *l1_0 = map_domain_page(page_to_pfn(page)); + memset(l1_0,0,PAGE_SIZE); + unmap_domain_page(l1_0); + void *l1_1 = map_domain_page(page_to_pfn(page+1)); + memset(l1_1,0,PAGE_SIZE); + unmap_domain_page(l1_1); +#else + page = alloc_domheap_page(NULL); + if (!page) + domain_crash_synchronous(); + void *l1 = map_domain_page(page_to_pfn(page)); + memset(l1, 0, PAGE_SIZE); + unmap_domain_page(l1); +#endif + } + else + { + page = alloc_domheap_page(NULL); + void *l1 = map_domain_page(page_to_pfn(page)); + memset(l1, 0, PAGE_SIZE); + unmap_domain_page(l1); + } } } else { @@ -331,7 +362,21 @@ fail: FSH_LOG("promotion of pfn=%lx mfn=%lx failed! external gnttab refs?", gpfn, gmfn); - free_domheap_page(page); + if (psh_type == PGT_l1_shadow) + { + if (d->arch.ops->guest_paging_levels == PAGING_L2) + { +#if CONFIG_PAGING_LEVELS >=4 + free_domheap_pages(page, SL1_ORDER); +#else + free_domheap_page(page); +#endif + } + else + free_domheap_page(page); + } + else + free_domheap_page(page); return 0; } @@ -478,13 +523,15 @@ { struct vcpu *v = current; struct domain *d = v->domain; - l1_pgentry_t *gpl1e, *spl1e; - l2_pgentry_t gl2e, sl2e; + l1_pgentry_t *spl1e; + l2_pgentry_t sl2e; + guest_l1_pgentry_t *gpl1e; + guest_l2_pgentry_t gl2e; unsigned long gl1pfn, gl1mfn, sl1mfn; int i, init_table = 0; __guest_get_l2e(v, va, &gl2e); - ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT); + ASSERT(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT); gl1pfn = l2e_get_pfn(gl2e); if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) ) @@ -523,28 +570,49 @@ ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) ); #endif - if ( !get_shadow_ref(sl1mfn) ) - BUG(); - l2pde_general(d, &gl2e, &sl2e, sl1mfn); - __guest_set_l2e(v, va, &gl2e); - __shadow_set_l2e(v, va, &sl2e); +#if CONFIG_PAGING_LEVELS >=4 + if (d->arch.ops->guest_paging_levels == PAGING_L2) + { + /* for 32-bit VMX guest on 64-bit host, + * need update two L2 entries each time + */ + if ( !get_shadow_ref(sl1mfn)) + BUG(); + l2pde_general(d, &gl2e, &sl2e, sl1mfn); + __guest_set_l2e(v, va, &gl2e); + __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e); + if ( !get_shadow_ref(sl1mfn+1)) + BUG(); + sl2e = l2e_empty(); + l2pde_general(d, &gl2e, &sl2e, sl1mfn+1); + __shadow_set_l2e(v,((va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1)) + (1 << L2_PAGETABLE_SHIFT)) , &sl2e); + } else +#endif + { + if ( !get_shadow_ref(sl1mfn) ) + BUG(); + l2pde_general(d, &gl2e, &sl2e, sl1mfn); + __guest_set_l2e(v, va, &gl2e); + __shadow_set_l2e(v, va , &sl2e); + } if ( init_table ) { l1_pgentry_t sl1e; - int index = l1_table_offset(va); + int index = guest_l1_table_offset(va); int min = 1, max = 0; unsigned long entries, pt_va; l1_pgentry_t tmp_sl1e; - l1_pgentry_t tmp_gl1e;//Prepare for double compile - - - entries = PAGE_SIZE / sizeof(l1_pgentry_t); + guest_l1_pgentry_t tmp_gl1e;//Prepare for double compile + + + entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT; - gpl1e = (l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e); - - entries = PAGE_SIZE / sizeof(l1_pgentry_t); + gpl1e = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e); + + /* If the PGT_l1_shadow has two continual pages */ + entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); //1024 entry!!! pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT; spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e); @@ -555,7 +623,7 @@ spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) & ~(L1_PAGETABLE_ENTRIES-1)]);*/ - for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) + for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ ) { l1pte_propagate_from_guest(d, gpl1e[i], &sl1e); if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) && @@ -584,7 +652,7 @@ } } -static void +static void shadow_set_l1e(unsigned long va, l1_pgentry_t new_spte, int create_l1_shadow) { struct vcpu *v = current; @@ -616,7 +684,7 @@ perfc_incrc(shadow_set_l1e_unlinked); if ( !get_shadow_ref(sl1mfn) ) BUG(); - l2pde_general(d, &gpde, &sl2e, sl1mfn); + l2pde_general(d, (guest_l2_pgentry_t *)&gpde, &sl2e, sl1mfn); __guest_set_l2e(v, va, &gpde); __shadow_set_l2e(v, va, &sl2e); } @@ -651,6 +719,7 @@ shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va)); } +#if CONFIG_PAGING_LEVELS <= 3 static void shadow_invlpg_32(struct vcpu *v, unsigned long va) { struct domain *d = v->domain; @@ -671,6 +740,7 @@ sizeof(gpte))) {*/ if (unlikely(!__guest_get_l1e(v, va, &gpte))) { perfc_incrc(shadow_invlpg_faults); + shadow_unlock(d); return; } l1pte_propagate_from_guest(d, gpte, &spte); @@ -678,6 +748,7 @@ shadow_unlock(d); } +#endif static struct out_of_sync_entry * shadow_alloc_oos_entry(struct domain *d) @@ -758,8 +829,8 @@ length = max - min + 1; perfc_incr_histo(snapshot_copies, length, PT_UPDATES); - min *= sizeof(l1_pgentry_t); - length *= sizeof(l1_pgentry_t); + min *= sizeof(guest_l1_pgentry_t); + length *= sizeof(guest_l1_pgentry_t); original = map_domain_page(gmfn); snapshot = map_domain_page(smfn); @@ -840,7 +911,7 @@ __shadow_get_l4e(v, va, &sl4e); if ( !(l4e_get_flags(sl4e) & _PAGE_PRESENT)) { - shadow_map_into_current(v, va, L3, L4); + shadow_map_into_current(v, va, PAGING_L3, PAGING_L4); } if (!__shadow_get_l3e(v, va, &sl3e)) { @@ -848,7 +919,7 @@ } if ( !(l3e_get_flags(sl3e) & _PAGE_PRESENT)) { - shadow_map_into_current(v, va, L2, L3); + shadow_map_into_current(v, va, PAGING_L2, PAGING_L3); } } #endif @@ -886,11 +957,11 @@ * Returns 0 otherwise. */ static int snapshot_entry_matches( - struct domain *d, l1_pgentry_t *guest_pt, + struct domain *d, guest_l1_pgentry_t *guest_pt, unsigned long gpfn, unsigned index) { unsigned long smfn = __shadow_status(d, gpfn, PGT_snapshot); - l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ... + guest_l1_pgentry_t *snapshot, gpte; // could be L1s or L2s or ... int entries_match; perfc_incrc(snapshot_entry_matches_calls); @@ -907,7 +978,7 @@ // This could probably be smarter, but this is sufficent for // our current needs. // - entries_match = !l1e_has_changed(gpte, snapshot[index], + entries_match = !guest_l1e_has_changed(gpte, snapshot[index], PAGE_FLAG_MASK); unmap_domain_page(snapshot); @@ -935,10 +1006,10 @@ unsigned long l2mfn = pagetable_get_pfn(v->arch.guest_table); #endif unsigned long l2pfn = __mfn_to_gpfn(d, l2mfn); - l2_pgentry_t l2e; + guest_l2_pgentry_t l2e; unsigned long l1pfn, l1mfn; - l1_pgentry_t *guest_pt; - l1_pgentry_t tmp_gle; + guest_l1_pgentry_t *guest_pt; + guest_l1_pgentry_t tmp_gle; unsigned long pt_va; ASSERT(shadow_lock_is_acquired(d)); @@ -947,7 +1018,7 @@ perfc_incrc(shadow_out_of_sync_calls); #if CONFIG_PAGING_LEVELS >= 4 - if (d->arch.ops->guest_paging_levels == L4) { /* Mode F */ + if (d->arch.ops->guest_paging_levels == PAGING_L4) { /* Mode F */ pgentry_64_t le; unsigned long gmfn; unsigned long gpfn; @@ -955,9 +1026,9 @@ gmfn = l2mfn; gpfn = l2pfn; - guest_pt = (l1_pgentry_t *)v->arch.guest_vtable; - - for (i = L4; i >= L3; i--) { + guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable; + + for (i = PAGING_L4; i >= PAGING_L3; i--) { if ( page_out_of_sync(&frame_table[gmfn]) && !snapshot_entry_matches( d, guest_pt, gpfn, table_offset_64(va, i)) ) @@ -971,7 +1042,7 @@ if ( !VALID_MFN(gmfn) ) return 0; /* Todo: check!*/ - guest_pt = (l1_pgentry_t *)map_domain_page(gmfn); + guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn); } @@ -985,13 +1056,13 @@ #endif if ( page_out_of_sync(&frame_table[l2mfn]) && - !snapshot_entry_matches(d, (l1_pgentry_t *)v->arch.guest_vtable, - l2pfn, l2_table_offset(va)) ) + !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable, + l2pfn, guest_l2_table_offset(va)) ) return 1; __guest_get_l2e(v, va, &l2e); - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || - (l2e_get_flags(l2e) & _PAGE_PSE)) + if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) || + (guest_l2e_get_flags(l2e) & _PAGE_PSE)) return 0; l1pfn = l2e_get_pfn(l2e); @@ -1000,20 +1071,20 @@ // If the l1 pfn is invalid, it can't be out of sync... if ( !VALID_MFN(l1mfn) ) return 0; - - pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(L1_PAGETABLE_ENTRIES - 1)) + + pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(GUEST_L1_PAGETABLE_ENTRIES - 1)) << L1_PAGETABLE_SHIFT; - guest_pt = (l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle); + guest_pt = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle); if ( page_out_of_sync(&frame_table[l1mfn]) && !snapshot_entry_matches( - d, guest_pt, l1pfn, l1_table_offset(va)) ) + d, guest_pt, l1pfn, guest_l1_table_offset(va)) ) return 1; return 0; } -#define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t))) +#define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(guest_l1_pgentry_t))) static inline unsigned long predict_writable_pte_page(struct domain *d, unsigned long gpfn) { @@ -1107,7 +1178,7 @@ return (found == max_refs_to_find); } - i = readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1); + i = readonly_gpfn & (GUEST_L1_PAGETABLE_ENTRIES - 1); if ( !l1e_has_changed(pt[i], match, flags) && fix_entry(i) ) { perfc_incrc(remove_write_fast_exit); @@ -1116,7 +1187,7 @@ return found; } - for (i = 0; i < L1_PAGETABLE_ENTRIES; i++) + for (i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++) { if ( unlikely(!l1e_has_changed(pt[i], match, flags)) && fix_entry(i) ) break; @@ -1281,15 +1352,15 @@ switch ( stype ) { case PGT_l1_shadow: { - l1_pgentry_t *guest1 = guest; + guest_l1_pgentry_t *guest1 = guest; l1_pgentry_t *shadow1 = shadow; - l1_pgentry_t *snapshot1 = snapshot; + guest_l1_pgentry_t *snapshot1 = snapshot; ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) || shadow_mode_write_all(d)); if ( !shadow_mode_refcounts(d) ) - revalidate_l1(d, guest1, snapshot1); + revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t *)snapshot1); if ( !smfn ) break; @@ -1300,7 +1371,7 @@ for ( i = min_shadow; i <= max_shadow; i++ ) { if ( (i < min_snapshot) || (i > max_snapshot) || - l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) ) + guest_l1e_has_changed(guest1[i], snapshot1[i], PAGE_FLAG_MASK) ) { need_flush |= validate_pte_change(d, guest1[i], &shadow1[i]); @@ -1430,32 +1501,36 @@ { int max = -1; - l4_pgentry_t *guest4 = guest; + guest_root_pgentry_t *guest_root = guest; l4_pgentry_t *shadow4 = shadow; - l4_pgentry_t *snapshot4 = snapshot; + guest_root_pgentry_t *snapshot_root = snapshot; changed = 0; - for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) + for ( i = 0; i < GUEST_ROOT_PAGETABLE_ENTRIES; i++ ) { if ( !is_guest_l4_slot(i) && !external ) continue; - l4_pgentry_t new_l4e = guest4[i]; - if ( l4e_has_changed(new_l4e, snapshot4[i], PAGE_FLAG_MASK)) + guest_root_pgentry_t new_root_e = guest_root[i]; + if ( root_entry_has_changed( + new_root_e, snapshot_root[i], PAGE_FLAG_MASK)) { - need_flush |= validate_entry_change( - d, (pgentry_64_t *)&new_l4e, - (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype)); - + if (d->arch.ops->guest_paging_levels == PAGING_L4) { + need_flush |= validate_entry_change( + d, (pgentry_64_t *)&new_root_e, + (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype)); + } else { + validate_bl2e_change(d, &new_root_e, shadow, i); + } changed++; ESH_LOG("%d: shadow4 mfn: %lx, shadow root: %lx\n", i, smfn, pagetable_get_paddr(current->arch.shadow_table)); } - if ( l4e_get_intpte(new_l4e) != 0 ) /* FIXME: check flags? */ + if ( guest_root_get_intpte(new_root_e) != 0 ) /* FIXME: check flags? */ max = i; // Need a better solution in the long term. - if ( !(l4e_get_flags(new_l4e) & _PAGE_PRESENT) && - unlikely(l4e_get_intpte(new_l4e) != 0) && + if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) && + unlikely(guest_root_get_intpte(new_root_e) != 0) && !unshadow && (frame_table[smfn].u.inuse.type_info & PGT_pinned) ) unshadow = 1; @@ -1554,8 +1629,14 @@ if ( shadow_mode_translate(d) ) need_flush |= resync_all(d, PGT_hl2_shadow); #endif - need_flush |= resync_all(d, PGT_l2_shadow); - need_flush |= resync_all(d, PGT_l3_shadow); + + /* + * Fixme: for i386 host + */ + if (d->arch.ops->guest_paging_levels == PAGING_L4) { + need_flush |= resync_all(d, PGT_l2_shadow); + need_flush |= resync_all(d, PGT_l3_shadow); + } need_flush |= resync_all(d, PGT_l4_shadow); if ( need_flush && !unlikely(shadow_mode_external(d)) ) @@ -1565,11 +1646,11 @@ } static inline int l1pte_write_fault( - struct vcpu *v, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p, + struct vcpu *v, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p, unsigned long va) { struct domain *d = v->domain; - l1_pgentry_t gpte = *gpte_p; + guest_l1_pgentry_t gpte = *gpte_p; l1_pgentry_t spte; unsigned long gpfn = l1e_get_pfn(gpte); unsigned long gmfn = __gpfn_to_mfn(d, gpfn); @@ -1583,9 +1664,9 @@ return 0; } - ASSERT(l1e_get_flags(gpte) & _PAGE_RW); - l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED); - spte = l1e_from_pfn(gmfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL); + ASSERT(guest_l1e_get_flags(gpte) & _PAGE_RW); + guest_l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED); + spte = l1e_from_pfn(gmfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL); SH_VVLOG("l1pte_write_fault: updating spte=0x%" PRIpte " gpte=0x%" PRIpte, l1e_get_intpte(spte), l1e_get_intpte(gpte)); @@ -1603,9 +1684,9 @@ } static inline int l1pte_read_fault( - struct domain *d, l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p) + struct domain *d, guest_l1_pgentry_t *gpte_p, l1_pgentry_t *spte_p) { - l1_pgentry_t gpte = *gpte_p; + guest_l1_pgentry_t gpte = *gpte_p; l1_pgentry_t spte = *spte_p; unsigned long pfn = l1e_get_pfn(gpte); unsigned long mfn = __gpfn_to_mfn(d, pfn); @@ -1617,10 +1698,10 @@ return 0; } - l1e_add_flags(gpte, _PAGE_ACCESSED); - spte = l1e_from_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL); - - if ( shadow_mode_log_dirty(d) || !(l1e_get_flags(gpte) & _PAGE_DIRTY) || + guest_l1e_add_flags(gpte, _PAGE_ACCESSED); + spte = l1e_from_pfn(mfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL); + + if ( shadow_mode_log_dirty(d) || !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) || mfn_is_page_table(mfn) ) { l1e_remove_flags(spte, _PAGE_RW); @@ -1633,7 +1714,7 @@ return 1; } - +#if CONFIG_PAGING_LEVELS <= 3 static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs) { l1_pgentry_t gpte, spte, orig_gpte; @@ -1767,6 +1848,7 @@ shadow_unlock(d); return 0; } +#endif static int do_update_va_mapping(unsigned long va, l1_pgentry_t val, @@ -1786,7 +1868,7 @@ // __shadow_sync_va(v, va); - l1pte_propagate_from_guest(d, val, &spte); + l1pte_propagate_from_guest(d, *(guest_l1_pgentry_t *)&val, &spte); shadow_set_l1e(va, spte, 0); /* @@ -1847,7 +1929,7 @@ #if CONFIG_PAGING_LEVELS == 2 unsigned long hl2mfn; #endif - + int max_mode = ( shadow_mode_external(d) ? SHM_external : shadow_mode_translate(d) ? SHM_translate : shadow_mode_enabled(d) ? SHM_enable @@ -1953,17 +2035,6 @@ #endif } -struct shadow_ops MODE_A_HANDLER = { - .guest_paging_levels = 2, - .invlpg = shadow_invlpg_32, - .fault = shadow_fault_32, - .update_pagetables = shadow_update_pagetables, - .sync_all = sync_all, - .remove_all_write_access = remove_all_write_access, - .do_update_va_mapping = do_update_va_mapping, - .mark_mfn_out_of_sync = mark_mfn_out_of_sync, - .is_out_of_sync = is_out_of_sync, -}; /************************************************************************/ /************************************************************************/ @@ -2444,12 +2515,90 @@ BUG(); /* not implemenated yet */ return 42; } +static unsigned long gva_to_gpa_pae(unsigned long gva) +{ + BUG(); + return 43; +} #endif #if CONFIG_PAGING_LEVELS >= 4 /****************************************************************************/ /* 64-bit shadow-mode code testing */ /****************************************************************************/ +/* + * validate_bl2e_change() + * The code is for 32-bit VMX gues on 64-bit host. + * To sync guest L2. + */ + +static inline void +validate_bl2e_change( + struct domain *d, + guest_root_pgentry_t *new_gle_p, + pgentry_64_t *shadow_l3, + int index) +{ + int sl3_idx, sl2_idx; + unsigned long sl2mfn, sl1mfn; + pgentry_64_t *sl2_p; + + /* Using guest l2 pte index to get shadow l3&l2 index + * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512 + */ + sl3_idx = index / (PAGETABLE_ENTRIES / 2); + sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2; + + sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]); + sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn); + + validate_pde_change( + d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]); + + /* Mapping the second l1 shadow page */ + if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) { + sl1mfn = entry_get_pfn(sl2_p[sl2_idx]); + sl2_p[sl2_idx + 1] = + entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx])); + } + unmap_domain_page(sl2_p); + +} + +/* + * init_bl2() is for 32-bit VMX guest on 64-bit host + * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2 + */ +static inline unsigned long init_bl2(l4_pgentry_t *spl4e, unsigned long smfn) +{ + unsigned int count; + unsigned long sl2mfn; + struct pfn_info *page; + + memset(spl4e, 0, PAGE_SIZE); + + /* Map the self entry, L4&L3 share the same page */ + spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR); + + /* Allocate 4 shadow L2s */ + page = alloc_domheap_pages(NULL, SL2_ORDER, 0); + if (!page) + domain_crash_synchronous(); + + for (count = 0; count < PDP_ENTRIES; count++) + { + sl2mfn = page_to_pfn(page+count); + void *l2 = map_domain_page(sl2mfn); + memset(l2, 0, PAGE_SIZE); + unmap_domain_page(l2); + spl4e[count] = l4e_from_pfn(sl2mfn, _PAGE_PRESENT); + } + + unmap_domain_page(spl4e); + return smfn; + + +} static unsigned long shadow_l4_table( struct domain *d, unsigned long gpfn, unsigned long gmfn) @@ -2463,11 +2612,16 @@ if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) ) { - printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); + printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); BUG(); /* XXX Deal gracefully with failure. */ } spl4e = (l4_pgentry_t *)map_domain_page(smfn); + + if (d->arch.ops->guest_paging_levels == PAGING_L2) { + return init_bl2(spl4e, smfn); + } + /* Install hypervisor and 4x linear p.t. mapings. */ if ( (PGT_base_page_table == PGT_l4_page_table) && !shadow_mode_external(d) ) @@ -2575,7 +2729,7 @@ pgentry_64_t gle, sle; unsigned long gpfn, smfn; - if (from == L1 && to == L2) { + if (from == PAGING_L1 && to == PAGING_L2) { shadow_map_l1_into_current_l2(va); return; } @@ -2607,7 +2761,7 @@ if (!(l4e_get_flags(sl4e) & _PAGE_PRESENT)) { if (create_l2_shadow) { perfc_incrc(shadow_set_l3e_force_map); - shadow_map_into_current(v, va, L3, L4); + shadow_map_into_current(v, va, PAGING_L3, PAGING_L4); __shadow_get_l4e(v, va, &sl4e); } else { printk("For non VMX shadow, create_l1_shadow:%d\n", create_l2_shadow); @@ -2618,7 +2772,7 @@ if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) { if (create_l2_shadow) { perfc_incrc(shadow_set_l2e_force_map); - shadow_map_into_current(v, va, L2, L3); + shadow_map_into_current(v, va, PAGING_L2, PAGING_L3); __shadow_get_l3e(v, va, &sl3e); } else { printk("For non VMX shadow, create_l1_shadow:%d\n", create_l2_shadow); @@ -2654,8 +2808,15 @@ l1_pgentry_t old_spte; l1_pgentry_t sl1e = *(l1_pgentry_t *)sl1e_p; int i; - - for (i = L4; i >= L2; i--) { + unsigned long orig_va = 0; + + if (d->arch.ops->guest_paging_levels == PAGING_L2) { + /* This is for 32-bit VMX guest on 64-bit host */ + orig_va = va; + va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1)); + } + + for (i = PAGING_L4; i >= PAGING_L2; i--) { if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) { printk("<%s> i = %d\n", __func__, i); BUG(); @@ -2671,9 +2832,13 @@ #endif } } - if(i < L4) + if(i < PAGING_L4) shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i)); sle_up = sle; + } + + if (d->arch.ops->guest_paging_levels == PAGING_L2) { + va = orig_va; } if ( shadow_mode_refcounts(d) ) @@ -2691,9 +2856,13 @@ } __shadow_set_l1e(v, va, &sl1e); - shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, L1)); -} - + + shadow_update_min_max(entry_get_pfn(sle_up), guest_l1_table_offset(va)); +} + +/* As 32-bit guest don't support 4M page yet, + * we don't concern double compile for this function + */ static inline int l2e_rw_fault( struct vcpu *v, l2_pgentry_t *gl2e_p, unsigned long va, int rw) { @@ -2824,12 +2993,120 @@ } +/* + * Check P, R/W, U/S bits in the guest page table. + * If the fault belongs to guest return 1, + * else return 0. + */ +#if defined( GUEST_PGENTRY_32 ) +static inline int guest_page_fault(struct vcpu *v, + unsigned long va, unsigned int error_code, + guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e) +{ + /* The following check for 32-bit guest on 64-bit host */ + + __guest_get_l2e(v, va, gpl2e); + + /* Check the guest L2 page-table entry first*/ + if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT))) + return 1; + + if (error_code & ERROR_W) { + if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW))) + return 1; + } + if (error_code & ERROR_U) { + if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER))) + return 1; + } + + if (guest_l2e_get_flags(*gpl2e) & _PAGE_PSE) + return 0; + + __guest_get_l1e(v, va, gpl1e); + + /* Then check the guest L1 page-table entry */ + if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT))) + return 1; + + if (error_code & ERROR_W) { + if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW))) + return 1; + } + if (error_code & ERROR_U) { + if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER))) + return 1; + } + + return 0; +} +#else +static inline int guest_page_fault(struct vcpu *v, + unsigned long va, unsigned int error_code, + guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e) +{ + struct domain *d = v->domain; + pgentry_64_t gle, *lva; + unsigned long mfn; + int i; + + __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4); + if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT))) + return 1; + + if (error_code & ERROR_W) { + if (unlikely(!(entry_get_flags(gle) & _PAGE_RW))) + return 1; + } + if (error_code & ERROR_U) { + if (unlikely(!(entry_get_flags(gle) & _PAGE_USER))) + return 1; + } + for (i = PAGING_L3; i >= PAGING_L1; i--) { + /* + * If it's not external mode, then mfn should be machine physical. + */ + mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT)); + + lva = (pgentry_64_t *) phys_to_virt( + mfn << PAGE_SHIFT); + gle = lva[table_offset_64(va, i)]; + + if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT))) + return 1; + + if (error_code & ERROR_W) { + if (unlikely(!(entry_get_flags(gle) & _PAGE_RW))) + return 1; + } + if (error_code & ERROR_U) { + if (unlikely(!(entry_get_flags(gle) & _PAGE_USER))) + return 1; + } + + if (i == PAGING_L2) { + if (gpl2e) + gpl2e->l2 = gle.lo; + + if (likely(entry_get_flags(gle) & _PAGE_PSE)) + return 0; + + } + + if (i == PAGING_L1) + if (gpl1e) + gpl1e->l1 = gle.lo; + } + return 0; +} +#endif static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs) { struct vcpu *v = current; struct domain *d = v->domain; - l2_pgentry_t gl2e; - l1_pgentry_t sl1e, gl1e; + guest_l2_pgentry_t gl2e; + guest_l1_pgentry_t gl1e; + l1_pgentry_t sl1e; perfc_incrc(shadow_fault_calls); @@ -2852,12 +3129,11 @@ * STEP 2. Check if the fault belongs to guest */ if ( guest_page_fault( - v, va, regs->error_code, - (pgentry_64_t *)&gl2e, (pgentry_64_t *)&gl1e) ) { + v, va, regs->error_code, &gl2e, &gl1e) ) { goto fail; } - if ( unlikely(!(l2e_get_flags(gl2e) & _PAGE_PSE)) ) { + if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) { /* * Handle 4K pages here */ @@ -2891,11 +3167,11 @@ */ /* Write fault? */ if ( regs->error_code & 2 ) { - if ( !l2e_rw_fault(v, &gl2e, va, WRITE_FAULT) ) { + if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) { goto fail; } } else { - l2e_rw_fault(v, &gl2e, va, READ_FAULT); + l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT); } /* @@ -2943,7 +3219,27 @@ shadow_unlock(d); } -#ifndef PGENTRY_32 +static unsigned long gva_to_gpa_64(unsigned long gva) +{ + struct vcpu *v = current; + guest_l1_pgentry_t gl1e = {0}; + guest_l2_pgentry_t gl2e = {0}; + unsigned long gpa; + + if (guest_page_fault(v, gva, 0, &gl2e, &gl1e)) + return 0; + + if (guest_l2e_get_flags(gl2e) & _PAGE_PSE) + gpa = guest_l2e_get_paddr(gl2e) + (gva & ((1 << GUEST_L2_PAGETABLE_SHIFT) - 1)); + else + gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK); + + return gpa; + +} + +#ifndef GUEST_PGENTRY_32 + struct shadow_ops MODE_F_HANDLER = { .guest_paging_levels = 4, .invlpg = shadow_invlpg_64, @@ -2954,10 +3250,42 @@ .do_update_va_mapping = do_update_va_mapping, .mark_mfn_out_of_sync = mark_mfn_out_of_sync, .is_out_of_sync = is_out_of_sync, + .gva_to_gpa = gva_to_gpa_64, }; #endif #endif + +#if CONFIG_PAGING_LEVELS == 2 +struct shadow_ops MODE_A_HANDLER = { + .guest_paging_levels = 2, + .invlpg = shadow_invlpg_32, + .fault = shadow_fault_32, + .update_pagetables = shadow_update_pagetables, + .sync_all = sync_all, + .remove_all_write_access = remove_all_write_access, + .do_update_va_mapping = do_update_va_mapping, + .mark_mfn_out_of_sync = mark_mfn_out_of_sync, + .is_out_of_sync = is_out_of_sync, + .gva_to_gpa = gva_to_gpa_64, +}; + +#elif CONFIG_PAGING_LEVELS == 3 +struct shadow_ops MODE_B_HANDLER = { + .guest_paging_levels = 3, + .invlpg = shadow_invlpg_32, + .fault = shadow_fault_32, + .update_pagetables = shadow_update_pagetables, + .sync_all = sync_all, + .remove_all_write_access = remove_all_write_access, + .do_update_va_mapping = do_update_va_mapping, + .mark_mfn_out_of_sync = mark_mfn_out_of_sync, + .is_out_of_sync = is_out_of_sync, + .gva_to_gpa = gva_to_gpa_pae, +}; + +#endif + /* * Local variables: diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/shadow32.c Fri Sep 9 16:30:54 2005 @@ -827,7 +827,7 @@ { page = list_entry(list_ent, struct pfn_info, list); mfn = page_to_pfn(page); - pfn = machine_to_phys_mapping[mfn]; + pfn = get_pfn_from_mfn(mfn); ASSERT(pfn != INVALID_M2P_ENTRY); ASSERT(pfn < (1u<<20)); @@ -841,7 +841,7 @@ { page = list_entry(list_ent, struct pfn_info, list); mfn = page_to_pfn(page); - pfn = machine_to_phys_mapping[mfn]; + pfn = get_pfn_from_mfn(mfn); if ( (pfn != INVALID_M2P_ENTRY) && (pfn < (1u<<20)) ) { @@ -1685,6 +1685,7 @@ if (__copy_from_user(&gpte, &linear_pg_table[va >> PAGE_SHIFT], sizeof(gpte))) { perfc_incrc(shadow_invlpg_faults); + shadow_unlock(d); return; } l1pte_propagate_from_guest(d, gpte, &spte); @@ -1917,8 +1918,10 @@ snapshot = map_domain_page(smfn); if (__copy_from_user(&gpte, &guest_pt[index], - sizeof(gpte))) + sizeof(gpte))) { + unmap_domain_page(snapshot); return 0; + } // This could probably be smarter, but this is sufficent for // our current needs. diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/shadow_public.c Fri Sep 9 16:30:54 2005 @@ -33,11 +33,15 @@ #if CONFIG_PAGING_LEVELS >= 3 #include <asm/shadow_64.h> +#endif +#if CONFIG_PAGING_LEVELS == 4 extern struct shadow_ops MODE_F_HANDLER; +extern struct shadow_ops MODE_D_HANDLER; #endif extern struct shadow_ops MODE_A_HANDLER; +#define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - ((_encoded) >> 16)) /****************************************************************************/ /************* export interface functions ***********************************/ /****************************************************************************/ @@ -48,7 +52,7 @@ shadow_lock(d); switch(levels) { -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 4 case 4: if ( d->arch.ops != &MODE_F_HANDLER ) d->arch.ops = &MODE_F_HANDLER; @@ -56,9 +60,14 @@ return 1; #endif case 3: - case 2: + case 2: +#if CONFIG_PAGING_LEVELS == 2 if ( d->arch.ops != &MODE_A_HANDLER ) d->arch.ops = &MODE_A_HANDLER; +#elif CONFIG_PAGING_LEVELS == 4 + if ( d->arch.ops != &MODE_D_HANDLER ) + d->arch.ops = &MODE_D_HANDLER; +#endif shadow_unlock(d); return 1; default: @@ -122,13 +131,17 @@ return d->arch.ops->is_out_of_sync(v, va); } +unsigned long gva_to_gpa(unsigned long gva) +{ + struct domain *d = current->domain; + return d->arch.ops->gva_to_gpa(gva); +} /****************************************************************************/ /****************************************************************************/ #if CONFIG_PAGING_LEVELS >= 4 /* * Convert PAE 3-level page-table to 4-level page-table */ -#define PDP_ENTRIES 4 static pagetable_t page_table_convert(struct domain *d) { struct pfn_info *l4page, *l3page; @@ -203,19 +216,41 @@ /* * Free l2, l3, l4 shadow tables */ + +void free_fake_shadow_l2(struct domain *d,unsigned long smfn); + static void inline free_shadow_tables(struct domain *d, unsigned long smfn, u32 level) { pgentry_64_t *ple = map_domain_page(smfn); int i, external = shadow_mode_external(d); - - for ( i = 0; i < PAGETABLE_ENTRIES; i++ ) - if ( external || is_guest_l4_slot(i) ) - if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) - put_shadow_ref(entry_get_pfn(ple[i])); - - unmap_domain_page(ple); -} + struct pfn_info *page = &frame_table[smfn]; + + if (d->arch.ops->guest_paging_levels == PAGING_L2) + { +#if CONFIG_PAGING_LEVELS >=4 + for ( i = 0; i < PDP_ENTRIES; i++ ) + { + if (entry_get_flags(ple[i]) & _PAGE_PRESENT ) + free_fake_shadow_l2(d,entry_get_pfn(ple[i])); + } + + page = &frame_table[entry_get_pfn(ple[0])]; + free_domheap_pages(page, SL2_ORDER); + unmap_domain_page(ple); +#endif + } + else + { + for ( i = 0; i < PAGETABLE_ENTRIES; i++ ) + if ( external || is_guest_l4_slot(i) ) + if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(ple[i])); + + unmap_domain_page(ple); + } +} + void free_monitor_pagetable(struct vcpu *v) { @@ -453,7 +488,12 @@ struct pfn_info *spage = pfn_to_page(smfn); u32 min_max = spage->tlbflush_timestamp; int min = SHADOW_MIN(min_max); - int max = SHADOW_MAX(min_max); + int max; + + if (d->arch.ops->guest_paging_levels == PAGING_L2) + max = SHADOW_MAX_GUEST32(min_max); + else + max = SHADOW_MAX(min_max); for ( i = min; i <= max; i++ ) { @@ -512,9 +552,24 @@ unmap_domain_page(pl2e); } +void free_fake_shadow_l2(struct domain *d, unsigned long smfn) +{ + pgentry_64_t *ple = map_domain_page(smfn); + int i; + + for ( i = 0; i < PAGETABLE_ENTRIES; i = i + 2 ) + { + if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(ple[i])); + } + + unmap_domain_page(ple); +} + void free_shadow_page(unsigned long smfn) { struct pfn_info *page = &frame_table[smfn]; + unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask; struct domain *d = page_get_owner(pfn_to_page(gmfn)); unsigned long gpfn = __mfn_to_gpfn(d, gmfn); @@ -531,6 +586,7 @@ gpfn |= (1UL << 63); } #endif + delete_shadow_status(d, gpfn, gmfn, type); switch ( type ) @@ -687,7 +743,7 @@ int i; struct shadow_status *x; struct vcpu *v; - + /* * WARNING! The shadow page table must not currently be in use! * e.g., You are expected to have paused the domain and synchronized CR3. @@ -794,7 +850,16 @@ perfc_decr(free_l1_pages); struct pfn_info *page = list_entry(list_ent, struct pfn_info, list); - free_domheap_page(page); + if (d->arch.ops->guest_paging_levels == PAGING_L2) + { +#if CONFIG_PAGING_LEVELS >=4 + free_domheap_pages(page, SL1_ORDER); +#else + free_domheap_page(page); +#endif + } + else + free_domheap_page(page); } shadow_audit(d, 0); @@ -1191,7 +1256,7 @@ { DPRINTK("Don't try to do a shadow op on yourself!\n"); return -EINVAL; - } + } domain_pause(d); @@ -1311,7 +1376,7 @@ { page = list_entry(list_ent, struct pfn_info, list); mfn = page_to_pfn(page); - pfn = machine_to_phys_mapping[mfn]; + pfn = get_pfn_from_mfn(mfn); ASSERT(pfn != INVALID_M2P_ENTRY); ASSERT(pfn < (1u<<20)); @@ -1325,7 +1390,7 @@ { page = list_entry(list_ent, struct pfn_info, list); mfn = page_to_pfn(page); - pfn = machine_to_phys_mapping[mfn]; + pfn = get_pfn_from_mfn(mfn); if ( (pfn != INVALID_M2P_ENTRY) && (pfn < (1u<<20)) ) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/time.c Fri Sep 9 16:30:54 2005 @@ -792,6 +792,13 @@ tsc_elapsed64 = curr_tsc - prev_tsc; /* + * Weirdness can happen if we lose sync with the platform timer. + * We could be smarter here: resync platform timer with local timer? + */ + if ( ((s64)stime_elapsed64 < (EPOCH / 2)) ) + goto out; + + /* * Calculate error-correction factor. This only slows down a fast local * clock (slow clocks are warped forwards). The scale factor is clamped * to >= 0.5. @@ -854,6 +861,7 @@ cpu_time[cpu].stime_local_stamp = curr_local_stime; cpu_time[cpu].stime_master_stamp = curr_master_stime; + out: set_ac_timer(&cpu_time[cpu].calibration_timer, NOW() + EPOCH); if ( cpu == 0 ) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/traps.c Fri Sep 9 16:30:54 2005 @@ -101,6 +101,14 @@ static int debug_stack_lines = 20; integer_param("debug_stack_lines", debug_stack_lines); +#ifdef CONFIG_X86_32 +#define stack_words_per_line 8 +#define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)®s->esp) +#else +#define stack_words_per_line 4 +#define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->esp) +#endif + int is_kernel_text(unsigned long addr) { extern char _stext, _etext; @@ -117,17 +125,16 @@ return (unsigned long) &_etext; } -void show_guest_stack(void) +static void show_guest_stack(struct cpu_user_regs *regs) { int i; - struct cpu_user_regs *regs = guest_cpu_user_regs(); unsigned long *stack = (unsigned long *)regs->esp, addr; printk("Guest stack trace from "__OP"sp=%p:\n ", stack); - for ( i = 0; i < (debug_stack_lines*8); i++ ) - { - if ( ((long)stack & (STACK_SIZE-1)) == 0 ) + for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ ) + { + if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 ) break; if ( get_user(addr, stack) ) { @@ -137,7 +144,7 @@ i = 1; break; } - if ( (i != 0) && ((i % 8) == 0) ) + if ( (i != 0) && ((i % stack_words_per_line) == 0) ) printk("\n "); printk("%p ", _p(addr)); stack++; @@ -147,40 +154,100 @@ printk("\n"); } -void show_trace(unsigned long *esp) -{ - unsigned long *stack = esp, addr; - int i = 0; - - printk("Xen call trace from "__OP"sp=%p:\n ", stack); - - while ( ((long) stack & (STACK_SIZE-1)) != 0 ) +#ifdef NDEBUG + +static void show_trace(struct cpu_user_regs *regs) +{ + unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr; + + printk("Xen call trace:\n "); + + printk("[<%p>]", _p(regs->eip)); + print_symbol(" %s\n ", regs->eip); + + while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 ) { addr = *stack++; if ( is_kernel_text(addr) ) { printk("[<%p>]", _p(addr)); print_symbol(" %s\n ", addr); - i++; - } - } - if ( i == 0 ) - printk("Trace empty."); + } + } + printk("\n"); } -void show_stack(unsigned long *esp) -{ - unsigned long *stack = esp, addr; +#else + +static void show_trace(struct cpu_user_regs *regs) +{ + unsigned long *frame, next, addr, low, high; + + printk("Xen call trace:\n "); + + printk("[<%p>]", _p(regs->eip)); + print_symbol(" %s\n ", regs->eip); + + /* Bounds for range of valid frame pointer. */ + low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2); + high = (low & ~(STACK_SIZE - 1)) + (STACK_SIZE - sizeof(struct cpu_info)); + + /* The initial frame pointer. */ + next = regs->ebp; + + for ( ; ; ) + { + /* Valid frame pointer? */ + if ( (next < low) || (next > high) ) + { + /* + * Exception stack frames have a different layout, denoted by an + * inverted frame pointer. + */ + next = ~next; + if ( (next < low) || (next > high) ) + break; + frame = (unsigned long *)next; + next = frame[0]; + addr = frame[(offsetof(struct cpu_user_regs, eip) - + offsetof(struct cpu_user_regs, ebp)) + / BYTES_PER_LONG]; + } + else + { + /* Ordinary stack frame. */ + frame = (unsigned long *)next; + next = frame[0]; + addr = frame[1]; + } + + printk("[<%p>]", _p(addr)); + print_symbol(" %s\n ", addr); + + low = (unsigned long)&frame[2]; + } + + printk("\n"); +} + +#endif + +void show_stack(struct cpu_user_regs *regs) +{ + unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr; int i; + if ( GUEST_MODE(regs) ) + return show_guest_stack(regs); + printk("Xen stack trace from "__OP"sp=%p:\n ", stack); - for ( i = 0; i < (debug_stack_lines*8); i++ ) - { - if ( ((long)stack & (STACK_SIZE-1)) == 0 ) - break; - if ( (i != 0) && ((i % 8) == 0) ) + for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ ) + { + if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 ) + break; + if ( (i != 0) && ((i % stack_words_per_line) == 0) ) printk("\n "); addr = *stack++; printk("%p ", _p(addr)); @@ -189,7 +256,7 @@ printk("Stack empty."); printk("\n"); - show_trace(esp); + show_trace(regs); } /* @@ -403,20 +470,32 @@ return EXCRET_fault_fixed; } -asmlinkage int do_page_fault(struct cpu_user_regs *regs) -{ - unsigned long addr, fixup; - struct vcpu *v = current; +#ifdef HYPERVISOR_VIRT_END +#define IN_HYPERVISOR_RANGE(va) \ + (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END)) +#else +#define IN_HYPERVISOR_RANGE(va) \ + (((va) >= HYPERVISOR_VIRT_START)) +#endif + +static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs) +{ + struct vcpu *v = current; struct domain *d = v->domain; - __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : ); - - DEBUGGER_trap_entry(TRAP_page_fault, regs); - - perfc_incrc(page_faults); - - if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && - !shadow_mode_enabled(d)) ) + if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) + { + if ( shadow_mode_external(d) && GUEST_CONTEXT(v, regs) ) + return shadow_fault(addr, regs); + if ( (addr >= PERDOMAIN_VIRT_START) && (addr < PERDOMAIN_VIRT_END) ) + return handle_perdomain_mapping_fault( + addr - PERDOMAIN_VIRT_START, regs); + } + else if ( unlikely(shadow_mode_enabled(d)) ) + { + return shadow_fault(addr, regs); + } + else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) ) { LOCK_BIGLOCK(d); if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) && @@ -428,14 +507,9 @@ return EXCRET_fault_fixed; } - if ( ((addr < HYPERVISOR_VIRT_START) -#if defined(__x86_64__) - || (addr >= HYPERVISOR_VIRT_END) -#endif - ) - && - KERNEL_MODE(v, regs) && - ((regs->error_code & 3) == 3) && /* write-protection fault */ + if ( KERNEL_MODE(v, regs) && + /* Protection violation on write? No reserved-bit violation? */ + ((regs->error_code & 0xb) == 0x3) && ptwr_do_page_fault(d, addr, regs) ) { UNLOCK_BIGLOCK(d); @@ -444,43 +518,51 @@ UNLOCK_BIGLOCK(d); } - if ( unlikely(shadow_mode_enabled(d)) && - ((addr < HYPERVISOR_VIRT_START) || -#if defined(__x86_64__) - (addr >= HYPERVISOR_VIRT_END) || -#endif - (shadow_mode_external(d) && GUEST_CONTEXT(v, regs))) && - shadow_fault(addr, regs) ) - return EXCRET_fault_fixed; - - if ( unlikely(addr >= PERDOMAIN_VIRT_START) && - unlikely(addr < PERDOMAIN_VIRT_END) && - handle_perdomain_mapping_fault(addr - PERDOMAIN_VIRT_START, regs) ) - return EXCRET_fault_fixed; - - if ( !GUEST_MODE(regs) ) - goto xen_fault; + return 0; +} + +/* + * #PF error code: + * Bit 0: Protection violation (=1) ; Page not present (=0) + * Bit 1: Write access + * Bit 2: Supervisor mode + * Bit 3: Reserved bit violation + * Bit 4: Instruction fetch + */ +asmlinkage int do_page_fault(struct cpu_user_regs *regs) +{ + unsigned long addr, fixup; + int rc; + + __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : ); + + DEBUGGER_trap_entry(TRAP_page_fault, regs); + + perfc_incrc(page_faults); + + if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) ) + return rc; + + if ( unlikely(!GUEST_MODE(regs)) ) + { + if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) + { + perfc_incrc(copy_user_faults); + regs->eip = fixup; + return 0; + } + + DEBUGGER_trap_fatal(TRAP_page_fault, regs); + + show_registers(regs); + show_page_walk(addr); + panic("CPU%d FATAL PAGE FAULT\n" + "[error_code=%04x]\n" + "Faulting linear address: %p\n", + smp_processor_id(), regs->error_code, addr); + } propagate_page_fault(addr, regs->error_code); - return 0; - - xen_fault: - - if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) - { - perfc_incrc(copy_user_faults); - regs->eip = fixup; - return 0; - } - - DEBUGGER_trap_fatal(TRAP_page_fault, regs); - - show_registers(regs); - show_page_walk(addr); - panic("CPU%d FATAL PAGE FAULT\n" - "[error_code=%04x]\n" - "Faulting linear address: %p\n", - smp_processor_id(), regs->error_code, addr); return 0; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/vmx.c Fri Sep 9 16:30:54 2005 @@ -49,6 +49,15 @@ int vmcs_size; unsigned int opt_vmx_debug_level = 0; integer_param("vmx_debug", opt_vmx_debug_level); + +extern int hvm_enabled; + +#ifdef TRACE_BUFFER +static unsigned long trace_values[NR_CPUS][4]; +#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value +#else +#define TRACE_VMEXIT(index,value) ((void)0) +#endif #ifdef __x86_64__ static struct msr_state percpu_msr[NR_CPUS]; @@ -338,6 +347,8 @@ vmx_save_init_msrs(); + hvm_enabled = 1; + return 1; } @@ -351,7 +362,7 @@ * Not all cases receive valid value in the VM-exit instruction length field. */ #define __get_instruction_length(len) \ - __vmread(INSTRUCTION_LEN, &(len)); \ + __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \ if ((len) < 1 || (len) > 15) \ __vmx_bug(®s); @@ -381,6 +392,7 @@ if (!vmx_paging_enabled(current)){ handle_mmio(va, va); + TRACE_VMEXIT (2,2); return 1; } gpa = gva_to_gpa(va); @@ -389,21 +401,22 @@ if ( mmio_space(gpa) ){ if (gpa >= 0xFEE00000) { /* workaround for local APIC */ u32 inst_len; - __vmread(INSTRUCTION_LEN, &(inst_len)); + __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len)); __update_guest_eip(inst_len); return 1; } + TRACE_VMEXIT (2,2); handle_mmio(va, gpa); return 1; } result = shadow_fault(va, regs); - + TRACE_VMEXIT (2,result); #if 0 if ( !result ) { __vmread(GUEST_RIP, &eip); - printk("vmx pgfault to guest va=%p eip=%p\n", va, eip); + printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip); } #endif @@ -447,7 +460,16 @@ clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PAE, &edx); clear_bit(X86_FEATURE_PSE36, &edx); +#else + struct vcpu *d = current; + if (d->domain->arch.ops->guest_paging_levels == PAGING_L2) + { + clear_bit(X86_FEATURE_PSE, &edx); + clear_bit(X86_FEATURE_PAE, &edx); + clear_bit(X86_FEATURE_PSE36, &edx); + } #endif + } regs->eax = (unsigned long) eax; @@ -542,7 +564,7 @@ int i, inst_len; int inst_copy_from_guest(unsigned char *, unsigned long, int); - __vmread(INSTRUCTION_LEN, &inst_len); + __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); memset(inst, 0, MAX_INST_LEN); if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) { printf("check_for_null_selector: get guest instruction failed\n"); @@ -584,15 +606,66 @@ return 0; } +void send_pio_req(struct cpu_user_regs *regs, unsigned long port, + unsigned long count, int size, long value, int dir, int pvalid) +{ + struct vcpu *v = current; + vcpu_iodata_t *vio; + ioreq_t *p; + + vio = get_vio(v->domain, v->vcpu_id); + if (vio == NULL) { + printk("bad shared page: %lx\n", (unsigned long) vio); + domain_crash_synchronous(); + } + + if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { + printf("VMX I/O has not yet completed\n"); + domain_crash_synchronous(); + } + set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); + + p = &vio->vp_ioreq; + p->dir = dir; + p->pdata_valid = pvalid; + + p->type = IOREQ_TYPE_PIO; + p->size = size; + p->addr = port; + p->count = count; + p->df = regs->eflags & EF_DF ? 1 : 0; + + if (pvalid) { + if (vmx_paging_enabled(current)) + p->u.pdata = (void *) gva_to_gpa(value); + else + p->u.pdata = (void *) value; /* guest VA == guest PA */ + } else + p->u.data = value; + + p->state = STATE_IOREQ_READY; + + if (vmx_portio_intercept(p)) { + /* no blocking & no evtchn notification */ + clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); + return; + } + + evtchn_send(iopacket_port(v->domain)); + vmx_wait_io(); +} + static void vmx_io_instruction(struct cpu_user_regs *regs, unsigned long exit_qualification, unsigned long inst_len) { - struct vcpu *d = current; - vcpu_iodata_t *vio; - ioreq_t *p; - unsigned long addr; + struct mi_per_cpu_info *mpcip; unsigned long eip, cs, eflags; + unsigned long port, size, dir; int vm86; + + mpcip = ¤t->domain->arch.vmx_platform.mpci; + mpcip->instr = INSTR_PIO; + mpcip->flags = 0; __vmread(GUEST_RIP, &eip); __vmread(GUEST_CS_SELECTOR, &cs); @@ -605,104 +678,93 @@ vm86, cs, eip, exit_qualification); if (test_bit(6, &exit_qualification)) - addr = (exit_qualification >> 16) & (0xffff); + port = (exit_qualification >> 16) & 0xFFFF; else - addr = regs->edx & 0xffff; - - vio = get_vio(d->domain, d->vcpu_id); - if (vio == 0) { - printk("bad shared page: %lx", (unsigned long) vio); - domain_crash_synchronous(); - } - p = &vio->vp_ioreq; - p->dir = test_bit(3, &exit_qualification); /* direction */ - - p->pdata_valid = 0; - p->count = 1; - p->size = (exit_qualification & 7) + 1; + port = regs->edx & 0xffff; + TRACE_VMEXIT(2, port); + size = (exit_qualification & 7) + 1; + dir = test_bit(3, &exit_qualification); /* direction */ if (test_bit(4, &exit_qualification)) { /* string instruction */ - unsigned long laddr; - - __vmread(GUEST_LINEAR_ADDRESS, &laddr); + unsigned long addr, count = 1; + int sign = regs->eflags & EF_DF ? -1 : 1; + + __vmread(GUEST_LINEAR_ADDRESS, &addr); + /* * In protected mode, guest linear address is invalid if the * selector is null. */ - if (!vm86 && check_for_null_selector(eip)) { - laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi; - } - p->pdata_valid = 1; - - p->u.data = laddr; - if (vmx_paging_enabled(d)) - p->u.pdata = (void *) gva_to_gpa(p->u.data); - p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0; - - if (test_bit(5, &exit_qualification)) /* "rep" prefix */ - p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx; - - /* - * Split up string I/O operations that cross page boundaries. Don't - * advance %eip so that "rep insb" will restart at the next page. - */ - if ((p->u.data & PAGE_MASK) != - ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) { - VMX_DBG_LOG(DBG_LEVEL_2, - "String I/O crosses page boundary (cs:eip=0x%lx:0x%lx)\n", - cs, eip); - if (p->u.data & (p->size - 1)) { - printf("Unaligned string I/O operation (cs:eip=0x%lx:0x%lx)\n", - cs, eip); - domain_crash_synchronous(); - } - p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size; - } else { - __update_guest_eip(inst_len); - } - } else if (p->dir == IOREQ_WRITE) { - p->u.data = regs->eax; + if (!vm86 && check_for_null_selector(eip)) + addr = dir == IOREQ_WRITE ? regs->esi : regs->edi; + + if (test_bit(5, &exit_qualification)) { /* "rep" prefix */ + mpcip->flags |= REPZ; + count = vm86 ? regs->ecx & 0xFFFF : regs->ecx; + } + + /* + * Handle string pio instructions that cross pages or that + * are unaligned. See the comments in vmx_platform.c/handle_mmio() + */ + if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) { + unsigned long value = 0; + + mpcip->flags |= OVERLAP; + if (dir == IOREQ_WRITE) + vmx_copy(&value, addr, size, VMX_COPY_IN); + send_pio_req(regs, port, 1, size, value, dir, 0); + } else { + if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) { + if (sign > 0) + count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; + else + count = (addr & ~PAGE_MASK) / size; + } else + __update_guest_eip(inst_len); + + send_pio_req(regs, port, count, size, addr, dir, 1); + } + } else { __update_guest_eip(inst_len); - } else - __update_guest_eip(inst_len); - - p->addr = addr; - p->port_mm = 0; - - /* Check if the packet needs to be intercepted */ - if (vmx_portio_intercept(p)) - /* no blocking & no evtchn notification */ - return; - - set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags); - p->state = STATE_IOREQ_READY; - evtchn_send(iopacket_port(d->domain)); - vmx_wait_io(); -} - -enum { COPY_IN = 0, COPY_OUT }; - -static inline int + send_pio_req(regs, port, 1, size, regs->eax, dir, 0); + } +} + +int vmx_copy(void *buf, unsigned long laddr, int size, int dir) { + unsigned long gpa, mfn; char *addr; - unsigned long mfn; - - if ( (size + (laddr & (PAGE_SIZE - 1))) >= PAGE_SIZE ) - { - printf("vmx_copy exceeds page boundary\n"); - return 0; - } - - mfn = phys_to_machine_mapping(laddr >> PAGE_SHIFT); - addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK); - - if (dir == COPY_IN) - memcpy(buf, addr, size); - else - memcpy(addr, buf, size); - - unmap_domain_page(addr); + int count; + + while (size > 0) { + count = PAGE_SIZE - (laddr & ~PAGE_MASK); + if (count > size) + count = size; + + if (vmx_paging_enabled(current)) { + gpa = gva_to_gpa(laddr); + mfn = get_mfn_from_pfn(gpa >> PAGE_SHIFT); + } else + mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT); + if (mfn == INVALID_MFN) + return 0; + + addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK); + + if (dir == VMX_COPY_IN) + memcpy(buf, addr, count); + else + memcpy(addr, buf, count); + + unmap_domain_page(addr); + + laddr += count; + buf += count; + size -= count; + } + return 1; } @@ -712,7 +774,7 @@ unsigned long inst_len; int error = 0; - error |= __vmread(INSTRUCTION_LEN, &inst_len); + error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); error |= __vmread(GUEST_RIP, &c->eip); c->eip += inst_len; /* skip transition instruction */ error |= __vmread(GUEST_RSP, &c->esp); @@ -795,7 +857,7 @@ * removed some translation or changed page attributes. * We simply invalidate the shadow. */ - mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT); + mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(d->arch.guest_table)) { printk("Invalid CR3 value=%x", c->cr3); domain_crash_synchronous(); @@ -813,7 +875,7 @@ domain_crash_synchronous(); return 0; } - mfn = phys_to_machine_mapping(c->cr3 >> PAGE_SHIFT); + mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT); d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); update_pagetables(d); /* @@ -889,7 +951,7 @@ u32 cp; /* make sure vmxassist exists (this is not an error) */ - if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN)) + if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), VMX_COPY_IN)) return 0; if (magic != VMXASSIST_MAGIC) return 0; @@ -903,20 +965,20 @@ */ case VMX_ASSIST_INVOKE: /* save the old context */ - if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN)) + if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN)) goto error; if (cp != 0) { if (!vmx_world_save(d, &c)) goto error; - if (!vmx_copy(&c, cp, sizeof(c), COPY_OUT)) + if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT)) goto error; } /* restore the new context, this should activate vmxassist */ - if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), COPY_IN)) + if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), VMX_COPY_IN)) goto error; if (cp != 0) { - if (!vmx_copy(&c, cp, sizeof(c), COPY_IN)) + if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN)) goto error; if (!vmx_world_restore(d, &c)) goto error; @@ -930,10 +992,10 @@ */ case VMX_ASSIST_RESTORE: /* save the old context */ - if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN)) + if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN)) goto error; if (cp != 0) { - if (!vmx_copy(&c, cp, sizeof(c), COPY_IN)) + if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN)) goto error; if (!vmx_world_restore(d, &c)) goto error; @@ -968,7 +1030,7 @@ /* * The guest CR3 must be pointing to the guest physical. */ - if ( !VALID_MFN(mfn = phys_to_machine_mapping( + if ( !VALID_MFN(mfn = get_mfn_from_pfn( d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) || !get_page(pfn_to_page(mfn), d->domain) ) { @@ -996,6 +1058,15 @@ #if CONFIG_PAGING_LEVELS >= 4 if(!shadow_set_guest_paging_levels(d->domain, 4)) { + printk("Unsupported guest paging levels\n"); + domain_crash_synchronous(); /* need to take a clean path */ + } +#endif + } + else + { +#if CONFIG_PAGING_LEVELS >= 4 + if(!shadow_set_guest_paging_levels(d->domain, 2)) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ } @@ -1164,7 +1235,7 @@ * removed some translation or changed page attributes. * We simply invalidate the shadow. */ - mfn = phys_to_machine_mapping(value >> PAGE_SHIFT); + mfn = get_mfn_from_pfn(value >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(d->arch.guest_table)) __vmx_bug(regs); shadow_sync_all(d->domain); @@ -1175,7 +1246,7 @@ */ VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); if ( ((value >> PAGE_SHIFT) > d->domain->max_pages ) || - !VALID_MFN(mfn = phys_to_machine_mapping(value >> PAGE_SHIFT)) || + !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) || !get_page(pfn_to_page(mfn), d->domain) ) { printk("Invalid CR3 value=%lx", value); @@ -1282,13 +1353,20 @@ case TYPE_MOV_TO_CR: gp = exit_qualification & CONTROL_REG_ACCESS_REG; cr = exit_qualification & CONTROL_REG_ACCESS_NUM; + TRACE_VMEXIT(1,TYPE_MOV_TO_CR); + TRACE_VMEXIT(2,cr); + TRACE_VMEXIT(3,gp); return mov_to_cr(gp, cr, regs); case TYPE_MOV_FROM_CR: gp = exit_qualification & CONTROL_REG_ACCESS_REG; cr = exit_qualification & CONTROL_REG_ACCESS_NUM; + TRACE_VMEXIT(1,TYPE_MOV_FROM_CR); + TRACE_VMEXIT(2,cr); + TRACE_VMEXIT(3,gp); mov_from_cr(cr, gp, regs); break; case TYPE_CLTS: + TRACE_VMEXIT(1,TYPE_CLTS); clts(); setup_fpu(current); @@ -1301,6 +1379,7 @@ __vmwrite(CR0_READ_SHADOW, value); break; case TYPE_LMSW: + TRACE_VMEXIT(1,TYPE_LMSW); __vmread(CR0_READ_SHADOW, &value); value = (value & ~0xF) | (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF); @@ -1518,15 +1597,18 @@ __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field); if (idtv_info_field & INTR_INFO_VALID_MASK) { - if ((idtv_info_field & 0x0700) != 0x400) { /* exclude soft ints */ - __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); - - if (idtv_info_field & 0x800) { /* valid error code */ - unsigned long error_code; - __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code); - __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); - } - } + __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); + + __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); + if (inst_len >= 1 && inst_len <= 15) + __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len); + + if (idtv_info_field & 0x800) { /* valid error code */ + unsigned long error_code; + __vmread(IDT_VECTORING_ERROR_CODE, &error_code); + __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); + } + VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field); } @@ -1544,6 +1626,7 @@ __vmread(GUEST_RIP, &eip); TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason); + TRACE_VMEXIT(0,exit_reason); switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: @@ -1562,6 +1645,7 @@ __vmx_bug(®s); vector &= 0xff; + TRACE_VMEXIT(1,vector); perfc_incra(cause_vector, vector); TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector); @@ -1606,6 +1690,10 @@ { __vmread(EXIT_QUALIFICATION, &va); __vmread(VM_EXIT_INTR_ERROR_CODE, ®s.error_code); + + TRACE_VMEXIT(3,regs.error_code); + TRACE_VMEXIT(4,va); + VMX_DBG_LOG(DBG_LEVEL_VMMU, "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", (unsigned long)regs.eax, (unsigned long)regs.ebx, @@ -1680,6 +1768,8 @@ eip, inst_len, exit_qualification); if (vmx_cr_access(exit_qualification, ®s)) __update_guest_eip(inst_len); + TRACE_VMEXIT(3,regs.error_code); + TRACE_VMEXIT(4,exit_qualification); break; } case EXIT_REASON_DR_ACCESS: @@ -1692,6 +1782,7 @@ __vmread(EXIT_QUALIFICATION, &exit_qualification); __get_instruction_length(inst_len); vmx_io_instruction(®s, exit_qualification, inst_len); + TRACE_VMEXIT(4,exit_qualification); break; case EXIT_REASON_MSR_READ: __get_instruction_length(inst_len); @@ -1726,6 +1817,25 @@ #endif } +#ifdef TRACE_BUFFER +asmlinkage void trace_vmentry (void) +{ + TRACE_5D(TRC_VMENTRY,trace_values[current->processor][0], + trace_values[current->processor][1],trace_values[current->processor][2], + trace_values[current->processor][3],trace_values[current->processor][4]); + TRACE_VMEXIT(0,9); + TRACE_VMEXIT(1,9); + TRACE_VMEXIT(2,9); + TRACE_VMEXIT(3,9); + TRACE_VMEXIT(4,9); + return; +} +asmlinkage void trace_vmexit (void) +{ + TRACE_3D(TRC_VMEXIT,0,0,0); + return; +} +#endif #endif /* CONFIG_VMX */ /* diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_intercept.c --- a/xen/arch/x86/vmx_intercept.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/vmx_intercept.c Fri Sep 9 16:30:54 2005 @@ -172,7 +172,7 @@ if (p->size != 1 || p->pdata_valid || - p->port_mm) + p->type != IOREQ_TYPE_PIO) return 0; if (p->addr == PIT_MODE && @@ -284,7 +284,5 @@ if (!reinit) register_portio_handler(0x40, 4, intercept_pit_io); } - -} - +} #endif /* CONFIG_VMX */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/vmx_io.c Fri Sep 9 16:30:54 2005 @@ -33,6 +33,7 @@ #include <asm/vmx_platform.h> #include <asm/vmx_virpit.h> #include <asm/apic.h> +#include <asm/shadow.h> #include <public/io/ioreq.h> #include <public/io/vmx_vlapic.h> @@ -123,7 +124,6 @@ regs->esp &= 0xFFFF0000; regs->esp |= (value & 0xFFFF); break; - case 5: regs->ebp &= 0xFFFF0000; regs->ebp |= (value & 0xFFFF); @@ -207,7 +207,6 @@ *reg &= ~0xFFFF; *reg |= (value & 0xFFFF); break; - case LONG: *reg &= ~0xFFFFFFFF; *reg |= (value & 0xFFFFFFFF); @@ -322,13 +321,319 @@ } #endif +extern long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs); + +static inline void set_eflags_CF(int size, unsigned long v1, + unsigned long v2, struct cpu_user_regs *regs) +{ + unsigned long mask = (1 << (8 * size)) - 1; + + if ((v1 & mask) > (v2 & mask)) + regs->eflags |= X86_EFLAGS_CF; + else + regs->eflags &= ~X86_EFLAGS_CF; +} + +static inline void set_eflags_OF(int size, unsigned long v1, + unsigned long v2, unsigned long v3, struct cpu_user_regs *regs) +{ + if ((v3 ^ v2) & (v3 ^ v1) & (1 << ((8 * size) - 1))) + regs->eflags |= X86_EFLAGS_OF; +} + +static inline void set_eflags_AF(int size, unsigned long v1, + unsigned long v2, unsigned long v3, struct cpu_user_regs *regs) +{ + if ((v1 ^ v2 ^ v3) & 0x10) + regs->eflags |= X86_EFLAGS_AF; +} + +static inline void set_eflags_ZF(int size, unsigned long v1, + struct cpu_user_regs *regs) +{ + unsigned long mask = (1 << (8 * size)) - 1; + + if ((v1 & mask) == 0) + regs->eflags |= X86_EFLAGS_ZF; +} + +static inline void set_eflags_SF(int size, unsigned long v1, + struct cpu_user_regs *regs) +{ + if (v1 & (1 << ((8 * size) - 1))) + regs->eflags |= X86_EFLAGS_SF; +} + +static char parity_table[256] = { + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1 +}; + +static inline void set_eflags_PF(int size, unsigned long v1, + struct cpu_user_regs *regs) +{ + if (parity_table[v1 & 0xFF]) + regs->eflags |= X86_EFLAGS_PF; +} + +static void vmx_pio_assist(struct cpu_user_regs *regs, ioreq_t *p, + struct mi_per_cpu_info *mpcip) +{ + unsigned long old_eax; + int sign = p->df ? -1 : 1; + + if (p->dir == IOREQ_WRITE) { + if (p->pdata_valid) { + regs->esi += sign * p->count * p->size; + if (mpcip->flags & REPZ) + regs->ecx -= p->count; + } + } else { + if (mpcip->flags & OVERLAP) { + unsigned long addr; + + regs->edi += sign * p->count * p->size; + if (mpcip->flags & REPZ) + regs->ecx -= p->count; + + addr = regs->edi; + if (sign > 0) + addr -= p->size; + vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT); + } else if (p->pdata_valid) { + regs->edi += sign * p->count * p->size; + if (mpcip->flags & REPZ) + regs->ecx -= p->count; + } else { + old_eax = regs->eax; + switch (p->size) { + case 1: + regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); + break; + case 2: + regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); + break; + case 4: + regs->eax = (p->u.data & 0xffffffff); + break; + default: + printk("Error: %s unknown port size\n", __FUNCTION__); + domain_crash_synchronous(); + } + } + } +} + +static void vmx_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p, + struct mi_per_cpu_info *mpcip) +{ + int sign = p->df ? -1 : 1; + int size = -1, index = -1; + unsigned long value = 0, diff = 0; + unsigned long src, dst; + + src = mpcip->operand[0]; + dst = mpcip->operand[1]; + size = operand_size(src); + + switch (mpcip->instr) { + case INSTR_MOV: + if (dst & REGISTER) { + index = operand_index(dst); + set_reg_value(size, index, 0, regs, p->u.data); + } + break; + + case INSTR_MOVZ: + if (dst & REGISTER) { + index = operand_index(dst); + switch (size) { + case BYTE: p->u.data = p->u.data & 0xFFULL; break; + case WORD: p->u.data = p->u.data & 0xFFFFULL; break; + case LONG: p->u.data = p->u.data & 0xFFFFFFFFULL; break; + } + set_reg_value(operand_size(dst), index, 0, regs, p->u.data); + } + break; + + case INSTR_MOVS: + sign = p->df ? -1 : 1; + regs->esi += sign * p->count * p->size; + regs->edi += sign * p->count * p->size; + + if ((mpcip->flags & OVERLAP) && p->dir == IOREQ_READ) { + unsigned long addr = regs->edi; + + if (sign > 0) + addr -= p->size; + vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT); + } + + if (mpcip->flags & REPZ) + regs->ecx -= p->count; + break; + + case INSTR_STOS: + sign = p->df ? -1 : 1; + regs->edi += sign * p->count * p->size; + if (mpcip->flags & REPZ) + regs->ecx -= p->count; + break; + + case INSTR_AND: + if (src & REGISTER) { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + diff = (unsigned long) p->u.data & value; + } else if (src & IMMEDIATE) { + value = mpcip->immediate; + diff = (unsigned long) p->u.data & value; + } else if (src & MEMORY) { + index = operand_index(dst); + value = get_reg_value(size, index, 0, regs); + diff = (unsigned long) p->u.data & value; + set_reg_value(size, index, 0, regs, diff); + } + + /* + * The OF and CF flags are cleared; the SF, ZF, and PF + * flags are set according to the result. The state of + * the AF flag is undefined. + */ + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); + set_eflags_ZF(size, diff, regs); + set_eflags_SF(size, diff, regs); + set_eflags_PF(size, diff, regs); + break; + + case INSTR_OR: + if (src & REGISTER) { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + diff = (unsigned long) p->u.data | value; + } else if (src & IMMEDIATE) { + value = mpcip->immediate; + diff = (unsigned long) p->u.data | value; + } else if (src & MEMORY) { + index = operand_index(dst); + value = get_reg_value(size, index, 0, regs); + diff = (unsigned long) p->u.data | value; + set_reg_value(size, index, 0, regs, diff); + } + + /* + * The OF and CF flags are cleared; the SF, ZF, and PF + * flags are set according to the result. The state of + * the AF flag is undefined. + */ + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); + set_eflags_ZF(size, diff, regs); + set_eflags_SF(size, diff, regs); + set_eflags_PF(size, diff, regs); + break; + + case INSTR_XOR: + if (src & REGISTER) { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + diff = (unsigned long) p->u.data ^ value; + } else if (src & IMMEDIATE) { + value = mpcip->immediate; + diff = (unsigned long) p->u.data ^ value; + } else if (src & MEMORY) { + index = operand_index(dst); + value = get_reg_value(size, index, 0, regs); + diff = (unsigned long) p->u.data ^ value; + set_reg_value(size, index, 0, regs, diff); + } + + /* + * The OF and CF flags are cleared; the SF, ZF, and PF + * flags are set according to the result. The state of + * the AF flag is undefined. + */ + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); + set_eflags_ZF(size, diff, regs); + set_eflags_SF(size, diff, regs); + set_eflags_PF(size, diff, regs); + break; + + case INSTR_CMP: + if (src & REGISTER) { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + diff = (unsigned long) p->u.data - value; + } else if (src & IMMEDIATE) { + value = mpcip->immediate; + diff = (unsigned long) p->u.data - value; + } else if (src & MEMORY) { + index = operand_index(dst); + value = get_reg_value(size, index, 0, regs); + diff = value - (unsigned long) p->u.data; + } + + /* + * The CF, OF, SF, ZF, AF, and PF flags are set according + * to the result + */ + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); + set_eflags_CF(size, value, (unsigned long) p->u.data, regs); + set_eflags_OF(size, diff, value, (unsigned long) p->u.data, regs); + set_eflags_AF(size, diff, value, (unsigned long) p->u.data, regs); + set_eflags_ZF(size, diff, regs); + set_eflags_SF(size, diff, regs); + set_eflags_PF(size, diff, regs); + break; + + case INSTR_TEST: + if (src & REGISTER) { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + } else if (src & IMMEDIATE) { + value = mpcip->immediate; + } else if (src & MEMORY) { + index = operand_index(dst); + value = get_reg_value(size, index, 0, regs); + } + diff = (unsigned long) p->u.data & value; + + /* + * Sets the SF, ZF, and PF status flags. CF and OF are set to 0 + */ + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); + set_eflags_ZF(size, diff, regs); + set_eflags_SF(size, diff, regs); + set_eflags_PF(size, diff, regs); + break; + } + + load_cpu_user_regs(regs); +} + void vmx_io_assist(struct vcpu *v) { vcpu_iodata_t *vio; ioreq_t *p; struct cpu_user_regs *regs = guest_cpu_user_regs(); - unsigned long old_eax; - int sign; struct mi_per_cpu_info *mpci_p; struct cpu_user_regs *inst_decoder_regs; @@ -340,80 +645,26 @@ if (vio == 0) { VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx", (unsigned long) vio); + printf("bad shared page: %lx\n", (unsigned long) vio); domain_crash_synchronous(); } + p = &vio->vp_ioreq; - - if (p->state == STATE_IORESP_HOOK){ + if (p->state == STATE_IORESP_HOOK) vmx_hooks_assist(v); - } /* clear IO wait VMX flag */ if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { - if (p->state != STATE_IORESP_READY) { - /* An interrupt send event raced us */ - return; - } else { - p->state = STATE_INVALID; - } - clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); - } else { - return; - } - - sign = (p->df) ? -1 : 1; - if (p->port_mm) { - if (p->pdata_valid) { - regs->esi += sign * p->count * p->size; - regs->edi += sign * p->count * p->size; - } else { - if (p->dir == IOREQ_WRITE) { - return; - } - int size = -1, index = -1; - - size = operand_size(v->domain->arch.vmx_platform.mpci.mmio_target); - index = operand_index(v->domain->arch.vmx_platform.mpci.mmio_target); - - if (v->domain->arch.vmx_platform.mpci.mmio_target & WZEROEXTEND) { - p->u.data = p->u.data & 0xffff; - } - set_reg_value(size, index, 0, regs, p->u.data); - - } - load_cpu_user_regs(regs); - return; - } - - if (p->dir == IOREQ_WRITE) { - if (p->pdata_valid) { - regs->esi += sign * p->count * p->size; - regs->ecx -= p->count; - } - return; - } else { - if (p->pdata_valid) { - regs->edi += sign * p->count * p->size; - regs->ecx -= p->count; - return; - } - } - - old_eax = regs->eax; - - switch(p->size) { - case 1: - regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); - break; - case 2: - regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); - break; - case 4: - regs->eax = (p->u.data & 0xffffffff); - break; - default: - printk("Error: %s unknwon port size\n", __FUNCTION__); - domain_crash_synchronous(); + if (p->state == STATE_IORESP_READY) { + p->state = STATE_INVALID; + clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); + + if (p->type == IOREQ_TYPE_PIO) + vmx_pio_assist(regs, p, mpci_p); + else + vmx_mmio_assist(regs, p, mpci_p); + } + /* else an interrupt send event raced us */ } } @@ -456,8 +707,9 @@ int port = iopacket_port(current->domain); do { - if(!test_bit(port, ¤t->domain->shared_info->evtchn_pending[0])) + if (!test_bit(port, ¤t->domain->shared_info->evtchn_pending[0])) do_block(); + vmx_check_events(current); if (!test_bit(ARCH_VMX_IO_WAIT, ¤t->arch.arch_vmx.flags)) break; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_platform.c --- a/xen/arch/x86/vmx_platform.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/vmx_platform.c Fri Sep 9 16:30:54 2005 @@ -64,37 +64,37 @@ case QUAD: return (long)(reg); default: - printk("Error: <__get_reg_value>Invalid reg size\n"); + printf("Error: (__get_reg_value) Invalid reg size\n"); domain_crash_synchronous(); } } -static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) +long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) { if (size == BYTE) { switch (index) { - case 0: //%al + case 0: /* %al */ return (char)(regs->rax & 0xFF); - case 1: //%cl + case 1: /* %cl */ return (char)(regs->rcx & 0xFF); - case 2: //%dl + case 2: /* %dl */ return (char)(regs->rdx & 0xFF); - case 3: //%bl + case 3: /* %bl */ return (char)(regs->rbx & 0xFF); - case 4: //%ah + case 4: /* %ah */ return (char)((regs->rax & 0xFF00) >> 8); - case 5: //%ch + case 5: /* %ch */ return (char)((regs->rcx & 0xFF00) >> 8); - case 6: //%dh + case 6: /* %dh */ return (char)((regs->rdx & 0xFF00) >> 8); - case 7: //%bh + case 7: /* %bh */ return (char)((regs->rbx & 0xFF00) >> 8); default: - printk("Error: (get_reg_value)Invalid index value\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } - - } + } + switch (index) { case 0: return __get_reg_value(regs->rax, size); case 1: return __get_reg_value(regs->rcx, size); @@ -113,7 +113,7 @@ case 14: return __get_reg_value(regs->r14, size); case 15: return __get_reg_value(regs->r15, size); default: - printk("Error: (get_reg_value)Invalid index value\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } @@ -129,117 +129,91 @@ __vmread(GUEST_RIP, ®s->eip); } -static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) +static inline long __get_reg_value(unsigned long reg, int size) { - /* - * Reference the db_reg[] table - */ - switch (size) { - case BYTE: + switch(size) { + case WORD: + return (short)(reg & 0xFFFF); + case LONG: + return (int)(reg & 0xFFFFFFFF); + default: + printf("Error: (__get_reg_value) Invalid reg size\n"); + domain_crash_synchronous(); + } +} + +long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) +{ + if (size == BYTE) { switch (index) { - case 0: //%al + case 0: /* %al */ return (char)(regs->eax & 0xFF); - case 1: //%cl + case 1: /* %cl */ return (char)(regs->ecx & 0xFF); - case 2: //%dl + case 2: /* %dl */ return (char)(regs->edx & 0xFF); - case 3: //%bl + case 3: /* %bl */ return (char)(regs->ebx & 0xFF); - case 4: //%ah + case 4: /* %ah */ return (char)((regs->eax & 0xFF00) >> 8); - case 5: //%ch + case 5: /* %ch */ return (char)((regs->ecx & 0xFF00) >> 8); - case 6: //%dh + case 6: /* %dh */ return (char)((regs->edx & 0xFF00) >> 8); - case 7: //%bh + case 7: /* %bh */ return (char)((regs->ebx & 0xFF00) >> 8); default: - printk("Error: (get_reg_value)size case 0 error\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } - case WORD: + } + switch (index) { - case 0: //%ax - return (short)(regs->eax & 0xFFFF); - case 1: //%cx - return (short)(regs->ecx & 0xFFFF); - case 2: //%dx - return (short)(regs->edx & 0xFFFF); - case 3: //%bx - return (short)(regs->ebx & 0xFFFF); - case 4: //%sp - return (short)(regs->esp & 0xFFFF); - break; - case 5: //%bp - return (short)(regs->ebp & 0xFFFF); - case 6: //%si - return (short)(regs->esi & 0xFFFF); - case 7: //%di - return (short)(regs->edi & 0xFFFF); - default: - printk("Error: (get_reg_value)size case 1 error\n"); - domain_crash_synchronous(); - } - case LONG: - switch (index) { - case 0: //%eax - return regs->eax; - case 1: //%ecx - return regs->ecx; - case 2: //%edx - return regs->edx; - - case 3: //%ebx - return regs->ebx; - case 4: //%esp - return regs->esp; - case 5: //%ebp - return regs->ebp; - case 6: //%esi - return regs->esi; - case 7: //%edi - return regs->edi; - default: - printk("Error: (get_reg_value)size case 2 error\n"); - domain_crash_synchronous(); - } + case 0: return __get_reg_value(regs->eax, size); + case 1: return __get_reg_value(regs->ecx, size); + case 2: return __get_reg_value(regs->edx, size); + case 3: return __get_reg_value(regs->ebx, size); + case 4: return __get_reg_value(regs->esp, size); + case 5: return __get_reg_value(regs->ebp, size); + case 6: return __get_reg_value(regs->esi, size); + case 7: return __get_reg_value(regs->edi, size); default: - printk("Error: (get_reg_value)size case error\n"); + printf("Error: (get_reg_value) Invalid index value\n"); domain_crash_synchronous(); } } #endif -static inline const unsigned char *check_prefix(const unsigned char *inst, struct instruction *thread_inst, unsigned char *rex_p) +static inline unsigned char *check_prefix(unsigned char *inst, + struct instruction *thread_inst, unsigned char *rex_p) { while (1) { switch (*inst) { - /* rex prefix for em64t instructions*/ + /* rex prefix for em64t instructions */ case 0x40 ... 0x4e: *rex_p = *inst; break; - - case 0xf3: //REPZ + case 0xf3: /* REPZ */ thread_inst->flags = REPZ; - break; - case 0xf2: //REPNZ + break; + case 0xf2: /* REPNZ */ thread_inst->flags = REPNZ; - break; - case 0xf0: //LOCK + break; + case 0xf0: /* LOCK */ break; - case 0x2e: //CS - case 0x36: //SS - case 0x3e: //DS - case 0x26: //ES - case 0x64: //FS - case 0x65: //GS - thread_inst->seg_sel = *inst; + case 0x2e: /* CS */ + case 0x36: /* SS */ + case 0x3e: /* DS */ + case 0x26: /* ES */ + case 0x64: /* FS */ + case 0x65: /* GS */ + thread_inst->seg_sel = *inst; break; - case 0x66: //32bit->16bit + case 0x66: /* 32bit->16bit */ thread_inst->op_size = WORD; break; case 0x67: - printf("Error: Not handling 0x67 (yet)\n"); + printf("Error: Not handling 0x67 (yet)\n"); domain_crash_synchronous(); break; default: @@ -249,7 +223,7 @@ } } -static inline unsigned long get_immediate(int op16, const unsigned char *inst, int op_size) +static inline unsigned long get_immediate(int op16,const unsigned char *inst, int op_size) { int mod, reg, rm; unsigned long val = 0; @@ -317,275 +291,328 @@ static void init_instruction(struct instruction *mmio_inst) { - memset(mmio_inst->i_name, '0', I_NAME_LEN); - mmio_inst->op_size = 0; - mmio_inst->offset = 0; + mmio_inst->instr = 0; + mmio_inst->op_size = 0; mmio_inst->immediate = 0; mmio_inst->seg_sel = 0; - mmio_inst->op_num = 0; mmio_inst->operand[0] = 0; mmio_inst->operand[1] = 0; - mmio_inst->operand[2] = 0; mmio_inst->flags = 0; } #define GET_OP_SIZE_FOR_BYTE(op_size) \ - do {if (rex) op_size = BYTE_64;else op_size = BYTE;} while(0) + do { \ + if (rex) \ + op_size = BYTE_64; \ + else \ + op_size = BYTE; \ + } while(0) #define GET_OP_SIZE_FOR_NONEBYTE(op_size) \ - do {if (rex & 0x8) op_size = QUAD; else if (op_size != WORD) op_size = LONG;} while(0) - -static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst) + do { \ + if (rex & 0x8) \ + op_size = QUAD; \ + else if (op_size != WORD) \ + op_size = LONG; \ + } while(0) + + +/* + * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax) + */ +static int mem_acc(unsigned char size, struct instruction *instr) +{ + instr->operand[0] = mk_operand(size, 0, 0, MEMORY); + instr->operand[1] = mk_operand(size, 0, 0, REGISTER); + return DECODE_success; +} + +/* + * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32) + */ +static int acc_mem(unsigned char size, struct instruction *instr) +{ + instr->operand[0] = mk_operand(size, 0, 0, REGISTER); + instr->operand[1] = mk_operand(size, 0, 0, MEMORY); + return DECODE_success; +} + +/* + * Decode mem,reg operands (as in <opcode> r32/16, m32/16) + */ +static int mem_reg(unsigned char size, unsigned char *opcode, + struct instruction *instr, unsigned char rex) +{ + int index = get_index(opcode + 1, rex); + + instr->operand[0] = mk_operand(size, 0, 0, MEMORY); + instr->operand[1] = mk_operand(size, index, 0, REGISTER); + return DECODE_success; +} + +/* + * Decode reg,mem operands (as in <opcode> m32/16, r32/16) + */ +static int reg_mem(unsigned char size, unsigned char *opcode, + struct instruction *instr, unsigned char rex) +{ + int index = get_index(opcode + 1, rex); + + instr->operand[0] = mk_operand(size, index, 0, REGISTER); + instr->operand[1] = mk_operand(size, 0, 0, MEMORY); + return DECODE_success; +} + +static int vmx_decode(unsigned char *opcode, struct instruction *instr) { unsigned long eflags; int index, vm86 = 0; unsigned char rex = 0; unsigned char tmp_size = 0; - - init_instruction(thread_inst); - - inst = check_prefix(inst, thread_inst, &rex); + init_instruction(instr); + + opcode = check_prefix(opcode, instr, &rex); __vmread(GUEST_RFLAGS, &eflags); if (eflags & X86_EFLAGS_VM) vm86 = 1; if (vm86) { /* meaning is reversed */ - if (thread_inst->op_size == WORD) - thread_inst->op_size = LONG; - else if (thread_inst->op_size == LONG) - thread_inst->op_size = WORD; - else if (thread_inst->op_size == 0) - thread_inst->op_size = WORD; - } - - switch(*inst) { - case 0x81: - /* This is only a workaround for cmpl instruction*/ - strcpy((char *)thread_inst->i_name, "cmp"); + if (instr->op_size == WORD) + instr->op_size = LONG; + else if (instr->op_size == LONG) + instr->op_size = WORD; + else if (instr->op_size == 0) + instr->op_size = WORD; + } + + switch (*opcode) { + case 0x0B: /* or m32/16, r32/16 */ + instr->instr = INSTR_OR; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return mem_reg(instr->op_size, opcode, instr, rex); + + case 0x20: /* and r8, m8 */ + instr->instr = INSTR_AND; + GET_OP_SIZE_FOR_BYTE(instr->op_size); + return reg_mem(instr->op_size, opcode, instr, rex); + + case 0x21: /* and r32/16, m32/16 */ + instr->instr = INSTR_AND; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return reg_mem(instr->op_size, opcode, instr, rex); + + case 0x23: /* and m32/16, r32/16 */ + instr->instr = INSTR_AND; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return mem_reg(instr->op_size, opcode, instr, rex); + + case 0x30: /* xor r8, m8 */ + instr->instr = INSTR_XOR; + GET_OP_SIZE_FOR_BYTE(instr->op_size); + return reg_mem(instr->op_size, opcode, instr, rex); + + case 0x31: /* xor r32/16, m32/16 */ + instr->instr = INSTR_XOR; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return reg_mem(instr->op_size, opcode, instr, rex); + + case 0x39: /* cmp r32/16, m32/16 */ + instr->instr = INSTR_CMP; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return reg_mem(instr->op_size, opcode, instr, rex); + + case 0x81: + if (((opcode[1] >> 3) & 7) == 7) { /* cmp $imm, m32/16 */ + instr->instr = INSTR_CMP; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); + instr->immediate = get_immediate(vm86, opcode+1, BYTE); + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + return DECODE_success; - - case 0x88: - /* mov r8 to m8 */ - thread_inst->op_size = BYTE; - index = get_index((inst + 1), rex); - GET_OP_SIZE_FOR_BYTE(tmp_size); - thread_inst->operand[0] = mk_operand(tmp_size, index, 0, REGISTER); - - break; - case 0x89: - /* mov r32/16 to m32/16 */ - index = get_index((inst + 1), rex); - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); - thread_inst->operand[0] = mk_operand(thread_inst->op_size, index, 0, REGISTER); - - break; - case 0x8a: - /* mov m8 to r8 */ - thread_inst->op_size = BYTE; - index = get_index((inst + 1), rex); - GET_OP_SIZE_FOR_BYTE(tmp_size); - thread_inst->operand[1] = mk_operand(tmp_size, index, 0, REGISTER); - break; - case 0x8b: - /* mov r32/16 to m32/16 */ - index = get_index((inst + 1), rex); - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); - thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER); - break; - case 0x8c: - case 0x8e: - printk("%x, This opcode hasn't been handled yet!", *inst); - return DECODE_failure; - /* Not handle it yet. */ - case 0xa0: - /* mov byte to al */ - thread_inst->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(tmp_size); - thread_inst->operand[1] = mk_operand(tmp_size, 0, 0, REGISTER); - break; - case 0xa1: - /* mov word/doubleword to ax/eax */ - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); - thread_inst->operand[1] = mk_operand(thread_inst->op_size, 0, 0, REGISTER); - - break; - case 0xa2: - /* mov al to (seg:offset) */ - thread_inst->op_size = BYTE; - GET_OP_SIZE_FOR_BYTE(tmp_size); - thread_inst->operand[0] = mk_operand(tmp_size, 0, 0, REGISTER); - break; - case 0xa3: - /* mov ax/eax to (seg:offset) */ - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); - thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, REGISTER); - break; - case 0xa4: - /* movsb */ - thread_inst->op_size = BYTE; - strcpy((char *)thread_inst->i_name, "movs"); - return DECODE_success; - case 0xa5: - /* movsw/movsl */ - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); - strcpy((char *)thread_inst->i_name, "movs"); - return DECODE_success; - case 0xaa: - /* stosb */ - thread_inst->op_size = BYTE; - strcpy((char *)thread_inst->i_name, "stosb"); - return DECODE_success; - case 0xab: - /* stosw/stosl */ - if (thread_inst->op_size == WORD) { - strcpy((char *)thread_inst->i_name, "stosw"); - } else { - thread_inst->op_size = LONG; - strcpy((char *)thread_inst->i_name, "stosl"); - } - return DECODE_success; - case 0xc6: - /* mov imm8 to m8 */ - thread_inst->op_size = BYTE; - thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE); - thread_inst->immediate = get_immediate(vm86, - (inst+1), thread_inst->op_size); - break; - case 0xc7: - /* mov imm16/32 to m16/32 */ - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); - thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, IMMEDIATE); - thread_inst->immediate = get_immediate(vm86, (inst+1), thread_inst->op_size); + } else + return DECODE_failure; + + case 0x84: /* test m8, r8 */ + instr->instr = INSTR_TEST; + instr->op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(tmp_size); + return mem_reg(tmp_size, opcode, instr, rex); + + case 0x88: /* mov r8, m8 */ + instr->instr = INSTR_MOV; + instr->op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(tmp_size); + return reg_mem(tmp_size, opcode, instr, rex); + + case 0x89: /* mov r32/16, m32/16 */ + instr->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return reg_mem(instr->op_size, opcode, instr, rex); + + case 0x8A: /* mov m8, r8 */ + instr->instr = INSTR_MOV; + instr->op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(tmp_size); + return mem_reg(tmp_size, opcode, instr, rex); + + case 0x8B: /* mov m32/16, r32/16 */ + instr->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return mem_reg(instr->op_size, opcode, instr, rex); + + case 0xA0: /* mov <addr>, al */ + instr->instr = INSTR_MOV; + instr->op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(tmp_size); + return mem_acc(tmp_size, instr); + + case 0xA1: /* mov <addr>, ax/eax */ + instr->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return mem_acc(instr->op_size, instr); + + case 0xA2: /* mov al, <addr> */ + instr->instr = INSTR_MOV; + instr->op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(tmp_size); + return acc_mem(tmp_size, instr); + + case 0xA3: /* mov ax/eax, <addr> */ + instr->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return acc_mem(instr->op_size, instr); + + case 0xA4: /* movsb */ + instr->instr = INSTR_MOVS; + instr->op_size = BYTE; + return DECODE_success; - break; - case 0x0f: - break; - default: - printk("%x, This opcode hasn't been handled yet!", *inst); - return DECODE_failure; - } + case 0xA5: /* movsw/movsl */ + instr->instr = INSTR_MOVS; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return DECODE_success; - strcpy((char *)thread_inst->i_name, "mov"); - if (*inst != 0x0f) { + case 0xAA: /* stosb */ + instr->instr = INSTR_STOS; + instr->op_size = BYTE; return DECODE_success; - } - - inst++; - switch (*inst) { + + case 0xAB: /* stosw/stosl */ + instr->instr = INSTR_STOS; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + return DECODE_success; - /* movz */ - case 0xb6: - index = get_index((inst + 1), rex); - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); - thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER); - thread_inst->op_size = BYTE; - strcpy((char *)thread_inst->i_name, "movzb"); + case 0xC6: + if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */ + instr->instr = INSTR_MOV; + instr->op_size = BYTE; + + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); + instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); return DECODE_success; - case 0xb7: - index = get_index((inst + 1), rex); - if (rex & 0x8) { - thread_inst->op_size = LONG; - thread_inst->operand[1] = mk_operand(QUAD, index, 0, REGISTER); - } else { - thread_inst->op_size = WORD; - thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER); - } + } else + return DECODE_failure; - strcpy((char *)thread_inst->i_name, "movzw"); + case 0xC7: + if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */ + instr->instr = INSTR_MOV; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); + instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); return DECODE_success; - default: - printk("0f %x, This opcode hasn't been handled yet!", *inst); - return DECODE_failure; - } - - /* will never reach here */ - return DECODE_failure; + } else + return DECODE_failure; + + case 0xF6: + if (((opcode[1] >> 3) & 7) == 0) { /* testb $imm8, m8 */ + instr->instr = INSTR_TEST; + instr->op_size = BYTE; + + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); + instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + + return DECODE_success; + } else + return DECODE_failure; + + case 0x0F: + break; + + default: + printf("%x, This opcode isn't handled yet!\n", *opcode); + return DECODE_failure; + } + + switch (*++opcode) { + case 0xB6: /* movz m8, r16/r32 */ + instr->instr = INSTR_MOVZ; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + index = get_index(opcode + 1, rex); + instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); + instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER); + return DECODE_success; + + case 0xB7: /* movz m16, r32 */ + instr->instr = INSTR_MOVZ; + index = get_index(opcode + 1, rex); + if (rex & 0x8) { + instr->op_size = LONG; + instr->operand[1] = mk_operand(QUAD, index, 0, REGISTER); + } else { + instr->op_size = WORD; + instr->operand[1] = mk_operand(LONG, index, 0, REGISTER); + } + instr->operand[0] = mk_operand(instr->op_size, 0, 0, MEMORY); + return DECODE_success; + + default: + printf("0f %x, This opcode isn't handled yet\n", *opcode); + return DECODE_failure; + } } int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len) { - unsigned long gpa; - unsigned long mfn; - unsigned char *inst_start; - int remaining = 0; - - if ( (inst_len > MAX_INST_LEN) || (inst_len <= 0) ) + if (inst_len > MAX_INST_LEN || inst_len <= 0) return 0; - - if ( vmx_paging_enabled(current) ) - { - gpa = gva_to_gpa(guest_eip); - mfn = phys_to_machine_mapping(gpa >> PAGE_SHIFT); - - /* Does this cross a page boundary ? */ - if ( (guest_eip & PAGE_MASK) != ((guest_eip + inst_len) & PAGE_MASK) ) - { - remaining = (guest_eip + inst_len) & ~PAGE_MASK; - inst_len -= remaining; - } - } - else - { - mfn = phys_to_machine_mapping(guest_eip >> PAGE_SHIFT); - } - - inst_start = map_domain_page(mfn); - memcpy((char *)buf, inst_start + (guest_eip & ~PAGE_MASK), inst_len); - unmap_domain_page(inst_start); - - if ( remaining ) - { - gpa = gva_to_gpa(guest_eip+inst_len+remaining); - mfn = phys_to_machine_mapping(gpa >> PAGE_SHIFT); - - inst_start = map_domain_page(mfn); - memcpy((char *)buf+inst_len, inst_start, remaining); - unmap_domain_page(inst_start); - } - - return inst_len+remaining; -} - -static int read_from_mmio(struct instruction *inst_p) -{ - // Only for mov instruction now!!! - if (inst_p->operand[1] & REGISTER) - return 1; - - return 0; -} - -// dir: 1 read from mmio -// 0 write to mmio -static void send_mmio_req(unsigned long gpa, - struct instruction *inst_p, long value, int dir, int pvalid) + if (!vmx_copy(buf, guest_eip, inst_len, VMX_COPY_IN)) + return 0; + return inst_len; +} + +void send_mmio_req(unsigned char type, unsigned long gpa, + unsigned long count, int size, long value, int dir, int pvalid) { struct vcpu *d = current; vcpu_iodata_t *vio; ioreq_t *p; int vm86; - struct mi_per_cpu_info *mpci_p; - struct cpu_user_regs *inst_decoder_regs; + struct cpu_user_regs *regs; extern long evtchn_send(int lport); - mpci_p = ¤t->domain->arch.vmx_platform.mpci; - inst_decoder_regs = mpci_p->inst_decoder_regs; + regs = current->domain->arch.vmx_platform.mpci.inst_decoder_regs; vio = get_vio(d->domain, d->vcpu_id); - if (vio == NULL) { - printk("bad shared page\n"); + printf("bad shared page\n"); domain_crash_synchronous(); } + p = &vio->vp_ioreq; - vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM; + vm86 = regs->eflags & X86_EFLAGS_VM; if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) { printf("VMX I/O has not yet completed\n"); @@ -596,24 +623,21 @@ p->dir = dir; p->pdata_valid = pvalid; - p->port_mm = 1; - p->size = inst_p->op_size; + p->type = type; + p->size = size; p->addr = gpa; - p->u.data = value; + p->count = count; + p->df = regs->eflags & EF_DF ? 1 : 0; + + if (pvalid) { + if (vmx_paging_enabled(current)) + p->u.pdata = (void *) gva_to_gpa(value); + else + p->u.pdata = (void *) value; /* guest VA == guest PA */ + } else + p->u.data = value; p->state = STATE_IOREQ_READY; - - if (inst_p->flags & REPZ) { - if (vm86) - p->count = inst_decoder_regs->ecx & 0xFFFF; - else - p->count = inst_decoder_regs->ecx; - p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0; - } else - p->count = 1; - - if ((pvalid) && vmx_paging_enabled(current)) - p->u.pdata = (void *) gva_to_gpa(p->u.data); if (vmx_mmio_intercept(p)){ p->state = STATE_IORESP_READY; @@ -625,21 +649,53 @@ vmx_wait_io(); } +static void mmio_operands(int type, unsigned long gpa, struct instruction *inst, + struct mi_per_cpu_info *mpcip, struct cpu_user_regs *regs) +{ + unsigned long value = 0; + int index, size; + + size = operand_size(inst->operand[0]); + + mpcip->flags = inst->flags; + mpcip->instr = inst->instr; + mpcip->operand[0] = inst->operand[0]; /* source */ + mpcip->operand[1] = inst->operand[1]; /* destination */ + + if (inst->operand[0] & REGISTER) { /* dest is memory */ + index = operand_index(inst->operand[0]); + value = get_reg_value(size, index, 0, regs); + send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0); + } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */ + value = inst->immediate; + send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0); + } else if (inst->operand[0] & MEMORY) { /* dest is register */ + /* send the request and wait for the value */ + send_mmio_req(type, gpa, 1, size, 0, IOREQ_READ, 0); + } else { + printf("mmio_operands: invalid operand\n"); + domain_crash_synchronous(); + } +} + +#define GET_REPEAT_COUNT() \ + (mmio_inst.flags & REPZ ? (vm86 ? regs->ecx & 0xFFFF : regs->ecx) : 1) + void handle_mmio(unsigned long va, unsigned long gpa) { unsigned long eip, eflags, cs; unsigned long inst_len, inst_addr; - struct mi_per_cpu_info *mpci_p; - struct cpu_user_regs *inst_decoder_regs; + struct mi_per_cpu_info *mpcip; + struct cpu_user_regs *regs; struct instruction mmio_inst; unsigned char inst[MAX_INST_LEN]; - int vm86, ret; + int i, vm86, ret; - mpci_p = ¤t->domain->arch.vmx_platform.mpci; - inst_decoder_regs = mpci_p->inst_decoder_regs; + mpcip = ¤t->domain->arch.vmx_platform.mpci; + regs = mpcip->inst_decoder_regs; __vmread(GUEST_RIP, &eip); - __vmread(INSTRUCTION_LEN, &inst_len); + __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); __vmread(GUEST_RFLAGS, &eflags); vm86 = eflags & X86_EFLAGS_VM; @@ -647,108 +703,142 @@ __vmread(GUEST_CS_SELECTOR, &cs); inst_addr = (cs << 4) + eip; } else - inst_addr = eip; /* XXX should really look at GDT[cs].base too */ - - memset(inst, '0', MAX_INST_LEN); + inst_addr = eip; + + memset(inst, 0, MAX_INST_LEN); ret = inst_copy_from_guest(inst, inst_addr, inst_len); if (ret != inst_len) { - printk("handle_mmio - EXIT: get guest instruction fault\n"); + printf("handle_mmio - EXIT: get guest instruction fault\n"); domain_crash_synchronous(); } - init_instruction(&mmio_inst); if (vmx_decode(inst, &mmio_inst) == DECODE_failure) { - printk("vmx decode failure: eip=%lx, va=%lx\n %x %x %x %x\n", eip, va, - inst[0], inst[1], inst[2], inst[3]); + printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:", + va, gpa, inst_len); + for (i = 0; i < inst_len; i++) + printf(" %02x", inst[i] & 0xFF); + printf("\n"); domain_crash_synchronous(); } - __vmwrite(GUEST_RIP, eip + inst_len); - store_cpu_user_regs(inst_decoder_regs); - - // Only handle "mov" and "movs" instructions! - if (!strncmp((char *)mmio_inst.i_name, "movz", 4)) { - if (read_from_mmio(&mmio_inst)) { - // Send the request and waiting for return value. - mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND; - send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0); - return ; - } else { - printk("handle_mmio - EXIT: movz error!\n"); - domain_crash_synchronous(); - } - } - - if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) { + store_cpu_user_regs(regs); + regs->eip += inst_len; /* advance %eip */ + + switch (mmio_inst.instr) { + case INSTR_MOV: + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); + break; + + case INSTR_MOVS: + { + unsigned long count = GET_REPEAT_COUNT(); + unsigned long size = mmio_inst.op_size; + int sign = regs->eflags & EF_DF ? -1 : 1; unsigned long addr = 0; int dir; + /* determine non-MMIO address */ if (vm86) { unsigned long seg; __vmread(GUEST_ES_SELECTOR, &seg); - if (((seg << 4) + (inst_decoder_regs->edi & 0xFFFF)) == va) { + if (((seg << 4) + (regs->edi & 0xFFFF)) == va) { dir = IOREQ_WRITE; __vmread(GUEST_DS_SELECTOR, &seg); - addr = (seg << 4) + (inst_decoder_regs->esi & 0xFFFF); + addr = (seg << 4) + (regs->esi & 0xFFFF); } else { dir = IOREQ_READ; - addr = (seg << 4) + (inst_decoder_regs->edi & 0xFFFF); + addr = (seg << 4) + (regs->edi & 0xFFFF); } - } else { /* XXX should really look at GDT[ds/es].base too */ - if (va == inst_decoder_regs->edi) { + } else { + if (va == regs->edi) { dir = IOREQ_WRITE; - addr = inst_decoder_regs->esi; + addr = regs->esi; } else { dir = IOREQ_READ; - addr = inst_decoder_regs->edi; + addr = regs->edi; } } - send_mmio_req(gpa, &mmio_inst, addr, dir, 1); - return; - } - - if (!strncmp((char *)mmio_inst.i_name, "mov", 3)) { - long value = 0; - int size, index; - - if (read_from_mmio(&mmio_inst)) { - // Send the request and waiting for return value. - mpci_p->mmio_target = mmio_inst.operand[1]; - send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0); - return; - } else { - // Write to MMIO - if (mmio_inst.operand[0] & IMMEDIATE) { - value = mmio_inst.immediate; - } else if (mmio_inst.operand[0] & REGISTER) { - size = operand_size(mmio_inst.operand[0]); - index = operand_index(mmio_inst.operand[0]); - value = get_reg_value(size, index, 0, inst_decoder_regs); - } else { - domain_crash_synchronous(); - } - send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0); - return; - } - } - - if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) { - send_mmio_req(gpa, &mmio_inst, - inst_decoder_regs->eax, IOREQ_WRITE, 0); - return; - } - /* Workaround for cmp instruction */ - if (!strncmp((char *)mmio_inst.i_name, "cmp", 3)) { - inst_decoder_regs->eflags &= ~X86_EFLAGS_ZF; - __vmwrite(GUEST_RFLAGS, inst_decoder_regs->eflags); - return; - } - - domain_crash_synchronous(); + mpcip->flags = mmio_inst.flags; + mpcip->instr = mmio_inst.instr; + + /* + * In case of a movs spanning multiple pages, we break the accesses + * up into multiple pages (the device model works with non-continguous + * physical guest pages). To copy just one page, we adjust %ecx and + * do not advance %eip so that the next "rep movs" copies the next page. + * Unaligned accesses, for example movsl starting at PGSZ-2, are + * turned into a single copy where we handle the overlapping memory + * copy ourself. After this copy succeeds, "rep movs" is executed + * again. + */ + if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) { + unsigned long value = 0; + + mpcip->flags |= OVERLAP; + + regs->eip -= inst_len; /* do not advance %eip */ + + if (dir == IOREQ_WRITE) + vmx_copy(&value, addr, size, VMX_COPY_IN); + send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0); + } else { + if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) { + regs->eip -= inst_len; /* do not advance %eip */ + + if (sign > 0) + count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; + else + count = (addr & ~PAGE_MASK) / size; + } + + send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1); + } + break; + } + + case INSTR_MOVZ: + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); + break; + + case INSTR_STOS: + /* + * Since the destination is always in (contiguous) mmio space we don't + * need to break it up into pages. + */ + mpcip->flags = mmio_inst.flags; + mpcip->instr = mmio_inst.instr; + send_mmio_req(IOREQ_TYPE_COPY, gpa, + GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, IOREQ_WRITE, 0); + break; + + case INSTR_OR: + mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mpcip, regs); + break; + + case INSTR_AND: + mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mpcip, regs); + break; + + case INSTR_XOR: + mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mpcip, regs); + break; + + case INSTR_CMP: + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); + break; + + case INSTR_TEST: + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); + break; + + default: + printf("Unhandled MMIO instruction\n"); + domain_crash_synchronous(); + } } #endif /* CONFIG_VMX */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/vmx_vmcs.c --- a/xen/arch/x86/vmx_vmcs.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/vmx_vmcs.c Fri Sep 9 16:30:54 2005 @@ -44,7 +44,7 @@ rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high); vmcs_size = vmx_msr_high & 0x1fff; - vmcs = alloc_xenheap_pages(get_order(vmcs_size)); + vmcs = alloc_xenheap_pages(get_order_from_bytes(vmcs_size)); memset((char *)vmcs, 0, vmcs_size); /* don't remove this */ vmcs->vmcs_revision_id = vmx_msr_low; @@ -55,7 +55,7 @@ { int order; - order = get_order(vmcs_size); + order = get_order_from_bytes(vmcs_size); free_xenheap_pages(vmcs, order); } @@ -76,8 +76,8 @@ error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS); /* need to use 0x1000 instead of PAGE_SIZE */ - io_bitmap_a = (void*) alloc_xenheap_pages(get_order(0x1000)); - io_bitmap_b = (void*) alloc_xenheap_pages(get_order(0x1000)); + io_bitmap_a = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000)); + io_bitmap_b = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000)); memset(io_bitmap_a, 0xff, 0x1000); /* don't bother debug port access */ clear_bit(PC_DEBUG_PORT, io_bitmap_a); @@ -148,7 +148,7 @@ offset = (addr & ~PAGE_MASK); addr = round_pgdown(addr); - mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT); + mpfn = get_mfn_from_pfn(addr >> PAGE_SHIFT); p = map_domain_page(mpfn); e820p = (struct e820entry *) ((unsigned long) p + offset); @@ -175,7 +175,7 @@ unmap_domain_page(p); /* Initialise shared page */ - mpfn = phys_to_machine_mapping(gpfn); + mpfn = get_mfn_from_pfn(gpfn); p = map_domain_page(mpfn); d->domain->arch.vmx_platform.shared_page_va = (unsigned long)p; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/asm-offsets.c --- a/xen/arch/x86/x86_32/asm-offsets.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_32/asm-offsets.c Fri Sep 9 16:30:54 2005 @@ -71,6 +71,9 @@ OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask); BLANK(); + DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); + BLANK(); + OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code); OFFSET(TRAPBOUNCE_cr2, struct trap_bounce, cr2); OFFSET(TRAPBOUNCE_flags, struct trap_bounce, flags); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_32/entry.S Fri Sep 9 16:30:54 2005 @@ -61,6 +61,11 @@ #include <asm/page.h> #include <public/xen.h> +#define GET_GUEST_REGS(reg) \ + movl $~(STACK_SIZE-1),reg; \ + andl %esp,reg; \ + orl $(STACK_SIZE-CPUINFO_sizeof),reg; + #define GET_CURRENT(reg) \ movl $STACK_SIZE-4, reg; \ orl %esp, reg; \ @@ -121,6 +126,9 @@ ENTRY(vmx_asm_vmexit_handler) /* selectors are restored/saved by VMX */ VMX_SAVE_ALL_NOSEGREGS +#ifdef TRACE_BUFFER + call trace_vmexit +#endif call vmx_vmexit_handler jmp vmx_asm_do_resume @@ -142,6 +150,9 @@ /* vmx_restore_all_guest */ call vmx_intr_assist call load_cr2 +#ifdef TRACE_BUFFER + call trace_vmentry +#endif .endif VMX_RESTORE_ALL_NOSEGREGS /* @@ -273,7 +284,41 @@ GET_CURRENT(%ebx) andl $(NR_hypercalls-1),%eax PERFC_INCR(PERFC_hypercalls, %eax) +#ifndef NDEBUG + /* Deliberately corrupt parameter regs not used by this hypercall. */ + pushl %eax + pushl UREGS_eip+4(%esp) + pushl 28(%esp) # EBP + pushl 28(%esp) # EDI + pushl 28(%esp) # ESI + pushl 28(%esp) # EDX + pushl 28(%esp) # ECX + pushl 28(%esp) # EBX + movzb hypercall_args_table(,%eax,1),%ecx + leal (%esp,%ecx,4),%edi + subl $6,%ecx + negl %ecx + movl %eax,%esi + movl $0xDEADBEEF,%eax + rep stosl + movl %esi,%eax +#endif call *hypercall_table(,%eax,4) +#ifndef NDEBUG + /* Deliberately corrupt parameter regs used by this hypercall. */ + addl $24,%esp # Shadow parameters + popl %ecx # Shadow EIP + cmpl %ecx,UREGS_eip(%esp) + popl %ecx # Shadow hypercall index + jne skip_clobber # If EIP has changed then don't clobber + movzb hypercall_args_table(,%ecx,1),%ecx + movl %esp,%edi + movl %eax,%esi + movl $0xDEADBEEF,%eax + rep stosl + movl %esi,%eax +skip_clobber: +#endif movl %eax,UREGS_eax(%esp) # save the return value test_all_events: @@ -674,12 +719,14 @@ do_arch_sched_op: # Ensure we return success even if we return via schedule_tail() xorl %eax,%eax - movl %eax,UREGS_eax+4(%esp) + GET_GUEST_REGS(%ecx) + movl %eax,UREGS_eax(%ecx) jmp do_sched_op do_switch_vm86: - # Discard the return address - addl $4,%esp + # Reset the stack pointer + GET_GUEST_REGS(%ecx) + movl %ecx,%esp # GS:ESI == Ring-1 stack activation movl UREGS_esp(%esp),%esi @@ -749,7 +796,7 @@ .long do_get_debugreg .long do_update_descriptor /* 10 */ .long do_ni_hypercall - .long do_dom_mem_op + .long do_memory_op .long do_multicall .long do_update_va_mapping .long do_set_timer_op /* 15 */ @@ -768,3 +815,36 @@ .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr + +ENTRY(hypercall_args_table) + .byte 1 /* do_set_trap_table */ /* 0 */ + .byte 4 /* do_mmu_update */ + .byte 2 /* do_set_gdt */ + .byte 2 /* do_stack_switch */ + .byte 4 /* do_set_callbacks */ + .byte 1 /* do_fpu_taskswitch */ /* 5 */ + .byte 2 /* do_arch_sched_op */ + .byte 1 /* do_dom0_op */ + .byte 2 /* do_set_debugreg */ + .byte 1 /* do_get_debugreg */ + .byte 4 /* do_update_descriptor */ /* 10 */ + .byte 0 /* do_ni_hypercall */ + .byte 2 /* do_memory_op */ + .byte 2 /* do_multicall */ + .byte 4 /* do_update_va_mapping */ + .byte 2 /* do_set_timer_op */ /* 15 */ + .byte 1 /* do_event_channel_op */ + .byte 1 /* do_xen_version */ + .byte 3 /* do_console_io */ + .byte 1 /* do_physdev_op */ + .byte 3 /* do_grant_table_op */ /* 20 */ + .byte 2 /* do_vm_assist */ + .byte 5 /* do_update_va_mapping_otherdomain */ + .byte 0 /* do_switch_vm86 */ + .byte 2 /* do_boot_vcpu */ + .byte 0 /* do_ni_hypercall */ /* 25 */ + .byte 4 /* do_mmuext_op */ + .byte 1 /* do_acm_op */ + .rept NR_hypercalls-(.-hypercall_args_table) + .byte 0 /* do_ni_hypercall */ + .endr diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_32/mm.c Fri Sep 9 16:30:54 2005 @@ -95,7 +95,7 @@ * Allocate and map the machine-to-phys table and create read-only mapping * of MPT for guest-OS use. */ - mpt_size = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL; + mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { @@ -118,7 +118,8 @@ } /* Set up mapping cache for domain pages. */ - mapcache_order = get_order(MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER)); + mapcache_order = get_order_from_bytes( + MAPCACHE_MBYTES << (20 - PAGETABLE_ORDER)); mapcache = alloc_xenheap_pages(mapcache_order); memset(mapcache, 0, PAGE_SIZE << mapcache_order); for ( i = 0; i < (MAPCACHE_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_32/traps.c Fri Sep 9 16:30:54 2005 @@ -79,11 +79,8 @@ "ss: %04lx cs: %04lx\n", ds, es, fs, gs, ss, cs); - if ( GUEST_MODE(regs) ) - show_guest_stack(); - else - show_stack((unsigned long *)®s->esp); -} + show_stack(regs); +} void show_page_walk(unsigned long addr) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/asm-offsets.c --- a/xen/arch/x86/x86_64/asm-offsets.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_64/asm-offsets.c Fri Sep 9 16:30:54 2005 @@ -71,6 +71,9 @@ OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask); BLANK(); + DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info)); + BLANK(); + OFFSET(TRAPBOUNCE_error_code, struct trap_bounce, error_code); OFFSET(TRAPBOUNCE_cr2, struct trap_bounce, cr2); OFFSET(TRAPBOUNCE_flags, struct trap_bounce, flags); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_64/entry.S Fri Sep 9 16:30:54 2005 @@ -11,6 +11,11 @@ #include <asm/apicdef.h> #include <asm/page.h> #include <public/xen.h> + +#define GET_GUEST_REGS(reg) \ + movq $~(STACK_SIZE-1),reg; \ + andq %rsp,reg; \ + orq $(STACK_SIZE-CPUINFO_sizeof),reg; #define GET_CURRENT(reg) \ movq $STACK_SIZE-8, reg; \ @@ -120,10 +125,42 @@ /*hypercall:*/ movq %r10,%rcx andq $(NR_hypercalls-1),%rax +#ifndef NDEBUG + /* Deliberately corrupt parameter regs not used by this hypercall. */ + pushq %rdi; pushq %rsi; pushq %rdx; pushq %rcx; pushq %r8 ; pushq %r9 + leaq hypercall_args_table(%rip),%r10 + movq $6,%rcx + sub (%r10,%rax,1),%cl + movq %rsp,%rdi + movl $0xDEADBEEF,%eax + rep stosq + popq %r9 ; popq %r8 ; popq %rcx; popq %rdx; popq %rsi; popq %rdi + movq UREGS_rax(%rsp),%rax + andq $(NR_hypercalls-1),%rax + pushq %rax + pushq UREGS_rip+8(%rsp) +#endif leaq hypercall_table(%rip),%r10 PERFC_INCR(PERFC_hypercalls, %rax) callq *(%r10,%rax,8) - movq %rax,UREGS_rax(%rsp) # save the return value +#ifndef NDEBUG + /* Deliberately corrupt parameter regs used by this hypercall. */ + popq %r10 # Shadow RIP + cmpq %r10,UREGS_rip(%rsp) + popq %rcx # Shadow hypercall index + jne skip_clobber /* If RIP has changed then don't clobber. */ + leaq hypercall_args_table(%rip),%r10 + movb (%r10,%rcx,1),%cl + movl $0xDEADBEEF,%r10d + cmpb $1,%cl; jb skip_clobber; movq %r10,UREGS_rdi(%rsp) + cmpb $2,%cl; jb skip_clobber; movq %r10,UREGS_rsi(%rsp) + cmpb $3,%cl; jb skip_clobber; movq %r10,UREGS_rdx(%rsp) + cmpb $4,%cl; jb skip_clobber; movq %r10,UREGS_r10(%rsp) + cmpb $5,%cl; jb skip_clobber; movq %r10,UREGS_r8(%rsp) + cmpb $6,%cl; jb skip_clobber; movq %r10,UREGS_r9(%rsp) +skip_clobber: +#endif + movq %rax,UREGS_rax(%rsp) # save the return value /* %rbx: struct vcpu */ test_all_events: @@ -302,7 +339,8 @@ 1: /* In kernel context already: push new frame at existing %rsp. */ movq UREGS_rsp+8(%rsp),%rsi andb $0xfc,UREGS_cs+8(%rsp) # Indicate kernel context to guest. -2: movq $HYPERVISOR_VIRT_START,%rax +2: andq $~0xf,%rsi # Stack frames are 16-byte aligned. + movq $HYPERVISOR_VIRT_START,%rax cmpq %rax,%rsi jb 1f # In +ve address space? Then okay. movq $HYPERVISOR_VIRT_END+60,%rax @@ -538,7 +576,8 @@ do_arch_sched_op: # Ensure we return success even if we return via schedule_tail() xorl %eax,%eax - movq %rax,UREGS_rax+8(%rsp) + GET_GUEST_REGS(%r10) + movq %rax,UREGS_rax(%r10) jmp do_sched_op .data @@ -578,7 +617,7 @@ .quad do_get_debugreg .quad do_update_descriptor /* 10 */ .quad do_ni_hypercall - .quad do_dom_mem_op + .quad do_memory_op .quad do_multicall .quad do_update_va_mapping .quad do_set_timer_op /* 15 */ @@ -597,3 +636,36 @@ .rept NR_hypercalls-((.-hypercall_table)/4) .quad do_ni_hypercall .endr + +ENTRY(hypercall_args_table) + .byte 1 /* do_set_trap_table */ /* 0 */ + .byte 4 /* do_mmu_update */ + .byte 2 /* do_set_gdt */ + .byte 2 /* do_stack_switch */ + .byte 3 /* do_set_callbacks */ + .byte 1 /* do_fpu_taskswitch */ /* 5 */ + .byte 2 /* do_arch_sched_op */ + .byte 1 /* do_dom0_op */ + .byte 2 /* do_set_debugreg */ + .byte 1 /* do_get_debugreg */ + .byte 2 /* do_update_descriptor */ /* 10 */ + .byte 0 /* do_ni_hypercall */ + .byte 2 /* do_memory_op */ + .byte 2 /* do_multicall */ + .byte 3 /* do_update_va_mapping */ + .byte 1 /* do_set_timer_op */ /* 15 */ + .byte 1 /* do_event_channel_op */ + .byte 1 /* do_xen_version */ + .byte 3 /* do_console_io */ + .byte 1 /* do_physdev_op */ + .byte 3 /* do_grant_table_op */ /* 20 */ + .byte 2 /* do_vm_assist */ + .byte 4 /* do_update_va_mapping_otherdomain */ + .byte 0 /* do_switch_to_user */ + .byte 2 /* do_boot_vcpu */ + .byte 2 /* do_set_segment_base */ /* 25 */ + .byte 4 /* do_mmuext_op */ + .byte 1 /* do_acm_op */ + .rept NR_hypercalls-(.-hypercall_args_table) + .byte 0 /* do_ni_hypercall */ + .endr diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_64/mm.c Fri Sep 9 16:30:54 2005 @@ -98,7 +98,7 @@ * Allocate and map the machine-to-phys table. * This also ensures L3 is present for fixmaps. */ - mpt_size = (max_page * 4) + (1UL << L2_PAGETABLE_SHIFT) - 1UL; + mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/x86_64/traps.c Fri Sep 9 16:30:54 2005 @@ -15,24 +15,24 @@ void show_registers(struct cpu_user_regs *regs) { - printk("CPU: %d\nEIP: %04x:[<%016lx>]", + printk("CPU: %d\nRIP: %04x:[<%016lx>]", smp_processor_id(), 0xffff & regs->cs, regs->rip); if ( !GUEST_MODE(regs) ) print_symbol(" %s", regs->rip); - printk("\nEFLAGS: %016lx\n", regs->eflags); - printk("rax: %016lx rbx: %016lx rcx: %016lx rdx: %016lx\n", - regs->rax, regs->rbx, regs->rcx, regs->rdx); - printk("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", - regs->rsi, regs->rdi, regs->rbp, regs->rsp); - printk("r8: %016lx r9: %016lx r10: %016lx r11: %016lx\n", - regs->r8, regs->r9, regs->r10, regs->r11); - printk("r12: %016lx r13: %016lx r14: %016lx r15: %016lx\n", - regs->r12, regs->r13, regs->r14, regs->r15); + printk("\nRFLAGS: %016lx\n", regs->eflags); + printk("rax: %016lx rbx: %016lx rcx: %016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("rdx: %016lx rsi: %016lx rdi: %016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("rbp: %016lx rsp: %016lx r8: %016lx\n", + regs->rbp, regs->rsp, regs->r8); + printk("r9: %016lx r10: %016lx r11: %016lx\n", + regs->r9, regs->r10, regs->r11); + printk("r12: %016lx r13: %016lx r14: %016lx\n", + regs->r12, regs->r13, regs->r14); + printk("r15: %016lx\n", regs->r15); - if ( GUEST_MODE(regs) ) - show_guest_stack(); - else - show_stack((unsigned long *)regs->rsp); + show_stack(regs); } void show_page_walk(unsigned long addr) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/Makefile --- a/xen/common/Makefile Thu Sep 8 15:18:40 2005 +++ b/xen/common/Makefile Fri Sep 9 16:30:54 2005 @@ -2,7 +2,6 @@ include $(BASEDIR)/Rules.mk ifeq ($(TARGET_ARCH),ia64) -#OBJS := $(subst dom_mem_ops.o,,$(OBJS)) OBJS := $(subst grant_table.o,,$(OBJS)) endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/acm_ops.c --- a/xen/common/acm_ops.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/acm_ops.c Fri Sep 9 16:30:54 2005 @@ -19,6 +19,7 @@ #include <xen/types.h> #include <xen/lib.h> #include <xen/mm.h> +#include <public/acm.h> #include <public/acm_ops.h> #include <xen/sched.h> #include <xen/event.h> @@ -41,7 +42,8 @@ POLICY, /* access to policy interface (early drop) */ GETPOLICY, /* dump policy cache */ SETPOLICY, /* set policy cache (controls security) */ - DUMPSTATS /* dump policy statistics */ + DUMPSTATS, /* dump policy statistics */ + GETSSID /* retrieve ssidref for domain id */ } acm_operation_t; int acm_authorize_acm_ops(struct domain *d, acm_operation_t pops) @@ -117,6 +119,35 @@ } break; + case ACM_GETSSID: + { + ssidref_t ssidref; + + if (acm_authorize_acm_ops(current->domain, GETSSID)) + return -EACCES; + + if (op->u.getssid.get_ssid_by == SSIDREF) + ssidref = op->u.getssid.id.ssidref; + else if (op->u.getssid.get_ssid_by == DOMAINID) { + struct domain *subj = find_domain_by_id(op->u.getssid.id.domainid); + if (!subj) + return -ESRCH; /* domain not found */ + + ssidref = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; + put_domain(subj); + } else + return -ESRCH; + + ret = acm_get_ssid(ssidref, + op->u.getssid.ssidbuf, + op->u.getssid.ssidbuf_size); + if (ret == ACM_OK) + ret = 0; + else + ret = -ESRCH; + } + break; + default: ret = -ESRCH; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/domain.c --- a/xen/common/domain.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/domain.c Fri Sep 9 16:30:54 2005 @@ -114,6 +114,8 @@ sched_rem_domain(v); domain_relinquish_resources(d); put_domain(d); + + send_guest_virq(dom0->vcpu[0], VIRQ_DOM_EXC); } } @@ -174,7 +176,7 @@ void domain_shutdown(u8 reason) { struct domain *d = current->domain; - struct vcpu *v; + struct vcpu *v; if ( d->domain_id == 0 ) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/event_channel.c --- a/xen/common/event_channel.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/event_channel.c Fri Sep 9 16:30:54 2005 @@ -250,6 +250,9 @@ if ( virq >= ARRAY_SIZE(v->virq_to_evtchn) ) return -EINVAL; + + if ( d->domain_id == 0 && virq >= VIRQ_CONSOLE ) + v = d->vcpu[0]; spin_lock(&d->evtchn_lock); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/grant_table.c --- a/xen/common/grant_table.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/grant_table.c Fri Sep 9 16:30:54 2005 @@ -399,7 +399,7 @@ { int i; grant_mapping_t *new_mt; - grant_table_t *lgt = ld->grant_table; + grant_table_t *lgt = ld->grant_table; if ( (lgt->maptrack_limit << 1) > MAPTRACK_MAX_ENTRIES ) { @@ -437,9 +437,8 @@ ref, dom, dev_hst_ro_flags); #endif - if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref, - dev_hst_ro_flags, - addr, &frame))) + if ( (rc = __gnttab_activate_grant_ref(ld, led, rd, ref, dev_hst_ro_flags, + addr, &frame)) >= 0 ) { /* * Only make the maptrack live _after_ writing the pte, in case we @@ -807,7 +806,8 @@ int i; int result = GNTST_okay; - for (i = 0; i < count; i++) { + for ( i = 0; i < count; i++ ) + { gnttab_donate_t *gop = &uop[i]; #if GRANT_DEBUG printk("gnttab_donate: i=%d mfn=%lx domid=%d gref=%08x\n", @@ -815,19 +815,24 @@ #endif page = &frame_table[gop->mfn]; - if (unlikely(IS_XEN_HEAP_FRAME(page))) { + if ( unlikely(IS_XEN_HEAP_FRAME(page))) + { printk("gnttab_donate: xen heap frame mfn=%lx\n", (unsigned long) gop->mfn); gop->status = GNTST_bad_virt_addr; continue; } - if (unlikely(!pfn_valid(page_to_pfn(page)))) { + + if ( unlikely(!pfn_valid(page_to_pfn(page))) ) + { printk("gnttab_donate: invalid pfn for mfn=%lx\n", (unsigned long) gop->mfn); gop->status = GNTST_bad_virt_addr; continue; } - if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) { + + if ( unlikely((e = find_domain_by_id(gop->domid)) == NULL) ) + { printk("gnttab_donate: can't find domain %d\n", gop->domid); gop->status = GNTST_bad_domain; continue; @@ -881,47 +886,23 @@ * headroom. Also, a domain mustn't have PGC_allocated * pages when it is dying. */ -#ifdef GRANT_DEBUG - if (unlikely(e->tot_pages >= e->max_pages)) { - printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n", - e->tot_pages, e->max_pages); + if ( unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) || + unlikely(e->tot_pages >= e->max_pages) || + unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle)) ) + { + DPRINTK("gnttab_donate: Transferee has no reservation headroom " + "(%d,%d) or provided a bad grant ref (%08x) or " + "is dying (%lx)\n", + e->tot_pages, e->max_pages, gop->handle, e->domain_flags); spin_unlock(&e->page_alloc_lock); put_domain(e); - result = GNTST_general_error; + gop->status = result = GNTST_general_error; break; } - if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) { - printk("gnttab_donate: target domain is dying\n"); - spin_unlock(&e->page_alloc_lock); - put_domain(e); - result = GNTST_general_error; - break; - } - if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) { - printk("gnttab_donate: gnttab_prepare_for_transfer fails\n"); - spin_unlock(&e->page_alloc_lock); - put_domain(e); - result = GNTST_general_error; - break; - } -#else - ASSERT(e->tot_pages <= e->max_pages); - if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) || - unlikely(e->tot_pages == e->max_pages) || - unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) { - printk("gnttab_donate: Transferee has no reservation headroom (%d," - "%d) or provided a bad grant ref (%08x) or is dying (%p)\n", - e->tot_pages, e->max_pages, gop->handle, e->d_flags); - spin_unlock(&e->page_alloc_lock); - put_domain(e); - result = GNTST_general_error; - break; - } -#endif + /* Okay, add the page to 'e'. */ - if (unlikely(e->tot_pages++ == 0)) { + if ( unlikely(e->tot_pages++ == 0) ) get_knownalive_domain(e); - } list_add_tail(&page->list, &e->page_list); page_set_owner(page, e); @@ -937,6 +918,7 @@ gop->status = GNTST_okay; } + return result; } @@ -956,38 +938,38 @@ rc = -EFAULT; switch ( cmd ) - { - case GNTTABOP_map_grant_ref: - if ( unlikely(!array_access_ok( - uop, count, sizeof(gnttab_map_grant_ref_t))) ) - goto out; - rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); - break; - case GNTTABOP_unmap_grant_ref: - if ( unlikely(!array_access_ok( - uop, count, sizeof(gnttab_unmap_grant_ref_t))) ) - goto out; - rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, - count); - break; - case GNTTABOP_setup_table: - rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count); - break; + { + case GNTTABOP_map_grant_ref: + if ( unlikely(!array_access_ok( + uop, count, sizeof(gnttab_map_grant_ref_t))) ) + goto out; + rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); + break; + case GNTTABOP_unmap_grant_ref: + if ( unlikely(!array_access_ok( + uop, count, sizeof(gnttab_unmap_grant_ref_t))) ) + goto out; + rc = gnttab_unmap_grant_ref( + (gnttab_unmap_grant_ref_t *)uop, count); + break; + case GNTTABOP_setup_table: + rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count); + break; #if GRANT_DEBUG - case GNTTABOP_dump_table: - rc = gnttab_dump_table((gnttab_dump_table_t *)uop); - break; + case GNTTABOP_dump_table: + rc = gnttab_dump_table((gnttab_dump_table_t *)uop); + break; #endif - case GNTTABOP_donate: - if (unlikely(!array_access_ok(uop, count, - sizeof(gnttab_donate_t)))) - goto out; - rc = gnttab_donate(uop, count); - break; - default: - rc = -ENOSYS; - break; - } + case GNTTABOP_donate: + if (unlikely(!array_access_ok( + uop, count, sizeof(gnttab_donate_t)))) + goto out; + rc = gnttab_donate(uop, count); + break; + default: + rc = -ENOSYS; + break; + } out: UNLOCK_BIGLOCK(d); @@ -1020,17 +1002,17 @@ lgt = ld->grant_table; #if GRANT_DEBUG_VERBOSE - if ( ld->domain_ id != 0 ) { - DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n", - rd->domain_id, ld->domain_id, frame, readonly); - } + if ( ld->domain_id != 0 ) + DPRINTK("Foreign unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n", + rd->domain_id, ld->domain_id, frame, readonly); #endif /* Fast exit if we're not mapping anything using grant tables */ if ( lgt->map_count == 0 ) return 0; - if ( get_domain(rd) == 0 ) { + if ( get_domain(rd) == 0 ) + { DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n", rd->domain_id); return 0; @@ -1211,13 +1193,13 @@ DPRINTK("Bad pfn (%lx)\n", pfn); else { - machine_to_phys_mapping[frame] = pfn; + set_pfn_from_mfn(frame, pfn); if ( unlikely(shadow_mode_log_dirty(ld))) mark_dirty(ld, frame); if (shadow_mode_translate(ld)) - __phys_to_machine_mapping[pfn] = frame; + set_mfn_from_pfn(pfn, frame); } sha->frame = __mfn_to_gpfn(rd, frame); sha->domid = rd->domain_id; @@ -1267,9 +1249,11 @@ for ( i = 0; i < NR_GRANT_FRAMES; i++ ) { SHARE_PFN_WITH_DOMAIN( - virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d); - machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] = - INVALID_M2P_ENTRY; + virt_to_page((char *)t->shared + (i * PAGE_SIZE)), + d); + set_pfn_from_mfn( + (virt_to_phys(t->shared) >> PAGE_SHIFT) + i, + INVALID_M2P_ENTRY); } /* Okay, install the structure. */ @@ -1306,57 +1290,53 @@ { map = >->maptrack[handle]; - if ( map->ref_and_flags & GNTMAP_device_map ) - { - dom = map->domid; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; - - DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n", - handle, ref, - map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom); - - if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || - unlikely(ld == rd) ) + if ( !(map->ref_and_flags & GNTMAP_device_map) ) + continue; + + dom = map->domid; + ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + + DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n", + handle, ref, map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom); + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + printk(KERN_WARNING "Grant release: No dom%d\n", dom); + continue; + } + + act = &rd->grant_table->active[ref]; + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + + if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) ) + { + frame = act->frame; + + if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) && + ( (act->pin & GNTPIN_devw_mask) > 0 ) ) { - if ( rd != NULL ) - put_domain(rd); - - printk(KERN_WARNING "Grant release: No dom%d\n", dom); - continue; + clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); } - act = &rd->grant_table->active[ref]; - sha = &rd->grant_table->shared[ref]; - - spin_lock(&rd->grant_table->lock); - - if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) ) + map->ref_and_flags &= ~GNTMAP_device_map; + act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask); + if ( act->pin == 0 ) { - frame = act->frame; - - if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) && - ( (act->pin & GNTPIN_devw_mask) > 0 ) ) - { - clear_bit(_GTF_writing, &sha->flags); - put_page_type(&frame_table[frame]); - } - - act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask); - - if ( act->pin == 0 ) - { - clear_bit(_GTF_reading, &sha->flags); - map->ref_and_flags = 0; - put_page(&frame_table[frame]); - } - else - map->ref_and_flags &= ~GNTMAP_device_map; + clear_bit(_GTF_reading, &sha->flags); + map->ref_and_flags = 0; + put_page(&frame_table[frame]); } - - spin_unlock(&rd->grant_table->lock); - - put_domain(rd); - } + } + + spin_unlock(&rd->grant_table->lock); + + put_domain(rd); } } diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/kernel.c --- a/xen/common/kernel.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/kernel.c Fri Sep 9 16:30:54 2005 @@ -46,7 +46,7 @@ if ( optval != NULL ) *optval++ = '\0'; - for ( param = &__setup_start; param != &__setup_end; param++ ) + for ( param = &__setup_start; param <= &__setup_end; param++ ) { if ( strcmp(param->name, opt ) != 0 ) continue; @@ -110,6 +110,38 @@ return -EFAULT; return 0; } + + case XENVER_capabilities: + { + xen_capabilities_info_t info; + extern void arch_get_xen_caps(xen_capabilities_info_t * info); + + memset(&info, 0, sizeof(info)); + arch_get_xen_caps(&info); + + if ( copy_to_user(arg, &info, sizeof(info)) ) + return -EFAULT; + return 0; + } + + case XENVER_parameters: + { + xen_parameters_info_t info = { .virt_start = HYPERVISOR_VIRT_START }; + + if ( copy_to_user(arg, &info, sizeof(info)) ) + return -EFAULT; + return 0; + + } + + case XENVER_changeset: + { + xen_changeset_info_t chgset; + safe_strcpy(chgset, XEN_CHANGESET); + if ( copy_to_user(arg, chgset, sizeof(chgset)) ) + return -EFAULT; + return 0; + } } return -ENOSYS; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/multicall.c --- a/xen/common/multicall.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/multicall.c Fri Sep 9 16:30:54 2005 @@ -45,6 +45,18 @@ do_multicall_call(&mcs->call); +#ifndef NDEBUG + { + /* + * Deliberately corrupt the contents of the multicall structure. + * The caller must depend only on the 'result' field on return. + */ + multicall_entry_t corrupt; + memset(&corrupt, 0xAA, sizeof(corrupt)); + (void)__copy_to_user(&call_list[i], &corrupt, sizeof(corrupt)); + } +#endif + if ( unlikely(__put_user(mcs->call.result, &call_list[i].result)) ) { DPRINTK("Error writing result back to multicall block.\n"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/page_alloc.c Fri Sep 9 16:30:54 2005 @@ -216,7 +216,7 @@ #define NR_ZONES 3 -#define MAX_DMADOM_PFN 0xFFFFF +#define MAX_DMADOM_PFN 0x7FFFFUL /* 31 addressable bits */ #define pfn_dom_zone_type(_pfn) \ (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM) @@ -485,43 +485,40 @@ void init_domheap_pages(physaddr_t ps, physaddr_t pe) { + unsigned long s_tot, e_tot, s_dma, e_dma, s_nrm, e_nrm; + ASSERT(!in_irq()); - ps = round_pgup(ps) >> PAGE_SHIFT; - pe = round_pgdown(pe) >> PAGE_SHIFT; - if ( pe <= ps ) - return; - - if ( (ps < MAX_DMADOM_PFN) && (pe > MAX_DMADOM_PFN) ) - { - init_heap_pages( - MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps); - init_heap_pages( - MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN), pe - MAX_DMADOM_PFN); - } - else - { - init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps); - } + s_tot = round_pgup(ps) >> PAGE_SHIFT; + e_tot = round_pgdown(pe) >> PAGE_SHIFT; + + s_dma = min(s_tot, MAX_DMADOM_PFN + 1); + e_dma = min(e_tot, MAX_DMADOM_PFN + 1); + if ( s_dma < e_dma ) + init_heap_pages(MEMZONE_DMADOM, pfn_to_page(s_dma), e_dma - s_dma); + + s_nrm = max(s_tot, MAX_DMADOM_PFN + 1); + e_nrm = max(e_tot, MAX_DMADOM_PFN + 1); + if ( s_nrm < e_nrm ) + init_heap_pages(MEMZONE_DOM, pfn_to_page(s_nrm), e_nrm - s_nrm); } struct pfn_info *alloc_domheap_pages( struct domain *d, unsigned int order, unsigned int flags) { - struct pfn_info *pg; + struct pfn_info *pg = NULL; cpumask_t mask; int i; ASSERT(!in_irq()); - pg = NULL; - if (! (flags & ALLOC_DOM_DMA)) + if ( !(flags & ALLOC_DOM_DMA) ) pg = alloc_heap_pages(MEMZONE_DOM, order); - if (pg == NULL) { - if ( unlikely((pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL) ) + + if ( pg == NULL ) + if ( (pg = alloc_heap_pages(MEMZONE_DMADOM, order)) == NULL ) return NULL; - } mask = pg->u.free.cpumask; tlbflush_filter(mask, pg->tlbflush_timestamp); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/schedule.c --- a/xen/common/schedule.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/schedule.c Fri Sep 9 16:30:54 2005 @@ -218,9 +218,7 @@ && spin_is_locked(&schedule_data[v->processor].schedule_lock) ) cpu_relax(); - /* Counteract lazy context switching. */ - if ( cpu_isset(v->processor, v->domain->cpumask) ) - sync_lazy_execstate_cpu(v->processor); + sync_vcpu_execstate(v); } void vcpu_wake(struct vcpu *v) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/trace.c --- a/xen/common/trace.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/trace.c Fri Sep 9 16:30:54 2005 @@ -66,7 +66,7 @@ } nr_pages = num_online_cpus() * opt_tbuf_size; - order = get_order(nr_pages * PAGE_SIZE); + order = get_order_from_pages(nr_pages); if ( (rawbuf = alloc_xenheap_pages(order)) == NULL ) { diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/xmalloc.c --- a/xen/common/xmalloc.c Thu Sep 8 15:18:40 2005 +++ b/xen/common/xmalloc.c Fri Sep 9 16:30:54 2005 @@ -86,7 +86,7 @@ static void *xmalloc_whole_pages(size_t size) { struct xmalloc_hdr *hdr; - unsigned int pageorder = get_order(size); + unsigned int pageorder = get_order_from_bytes(size); hdr = alloc_xenheap_pages(pageorder); if ( hdr == NULL ) @@ -159,7 +159,7 @@ /* Big allocs free directly. */ if ( hdr->size >= PAGE_SIZE ) { - free_xenheap_pages(hdr, get_order(hdr->size)); + free_xenheap_pages(hdr, get_order_from_bytes(hdr->size)); return; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Thu Sep 8 15:18:40 2005 +++ b/xen/drivers/char/console.c Fri Sep 9 16:30:54 2005 @@ -627,7 +627,7 @@ if ( bytes == 0 ) return 0; - order = get_order(bytes); + order = get_order_from_bytes(bytes); debugtrace_buf = alloc_xenheap_pages(order); ASSERT(debugtrace_buf != NULL); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/drivers/char/serial.c --- a/xen/drivers/char/serial.c Thu Sep 8 15:18:40 2005 +++ b/xen/drivers/char/serial.c Fri Sep 9 16:30:54 2005 @@ -366,8 +366,9 @@ void serial_async_transmit(struct serial_port *port) { BUG_ON(!port->driver->tx_empty); - if ( !port->txbuf ) - port->txbuf = alloc_xenheap_pages(get_order(SERIAL_TXBUFSZ)); + if ( port->txbuf == NULL ) + port->txbuf = alloc_xenheap_pages( + get_order_from_bytes(SERIAL_TXBUFSZ)); } /* diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/acm/acm_core.h --- a/xen/include/acm/acm_core.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/acm/acm_core.h Fri Sep 9 16:30:54 2005 @@ -101,9 +101,15 @@ * primary ssidref = lower 16 bit * secondary ssidref = higher 16 bit */ +#define ACM_PRIMARY(ssidref) \ + ((ssidref) & 0xffff) + +#define ACM_SECONDARY(ssidref) \ + ((ssidref) >> 16) + #define GET_SSIDREF(POLICY, ssidref) \ ((POLICY) == acm_bin_pol.primary_policy_code) ? \ - ((ssidref) & 0xffff) : ((ssidref) >> 16) + ACM_PRIMARY(ssidref) : ACM_SECONDARY(ssidref) /* macros to access ssid pointer for primary / secondary policy */ #define GET_SSIDP(POLICY, ssid) \ @@ -116,6 +122,7 @@ int acm_set_policy(void *buf, u16 buf_size, int isuserbuffer); int acm_get_policy(void *buf, u16 buf_size); int acm_dump_statistics(void *buf, u16 buf_size); +int acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size); #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/acm/acm_hooks.h --- a/xen/include/acm/acm_hooks.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/acm/acm_hooks.h Fri Sep 9 16:30:54 2005 @@ -92,6 +92,7 @@ int (*dump_binary_policy) (u8 *buffer, u16 buf_size); int (*set_binary_policy) (u8 *buffer, u16 buf_size); int (*dump_statistics) (u8 *buffer, u16 buf_size); + int (*dump_ssid_types) (ssidref_t ssidref, u8 *buffer, u16 buf_size); /* domain management control hooks (can be NULL) */ int (*pre_domain_create) (void *subject_ssid, ssidref_t ssidref); void (*post_domain_create) (domid_t domid, ssidref_t ssidref); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/asm_defns.h --- a/xen/include/asm-x86/asm_defns.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/asm_defns.h Fri Sep 9 16:30:54 2005 @@ -6,11 +6,6 @@ #include <asm/asm-offsets.h> #include <asm/processor.h> -#ifndef STR -#define __STR(x) #x -#define STR(x) __STR(x) -#endif - #ifdef __x86_64__ #include <asm/x86_64/asm_defns.h> #else diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/bitops.h --- a/xen/include/asm-x86/bitops.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/bitops.h Fri Sep 9 16:30:54 2005 @@ -6,11 +6,6 @@ */ #include <xen/config.h> - -#ifndef STR -#define __STR(x) #x -#define STR(x) __STR(x) -#endif /* * These have to be done with inline assembly: that way the bit-setting diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/mm.h Fri Sep 9 16:30:54 2005 @@ -255,10 +255,13 @@ * contiguous (or near contiguous) physical memory. */ #undef machine_to_phys_mapping -#define machine_to_phys_mapping ((u32 *)RDWR_MPT_VIRT_START) -#define INVALID_M2P_ENTRY (~0U) -#define VALID_M2P(_e) (!((_e) & (1U<<31))) +#define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START) +#define INVALID_M2P_ENTRY (~0UL) +#define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1)))) #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e)) + +#define set_pfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) +#define get_pfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) /* * The phys_to_machine_mapping is the reversed mapping of MPT for full @@ -266,17 +269,17 @@ * guests, so we steal the address space that would have normally * been used by the read-only MPT map. */ -#define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START) -#define INVALID_MFN (~0UL) -#define VALID_MFN(_mfn) (!((_mfn) & (1U<<31))) - -/* Returns the machine physical */ -static inline unsigned long phys_to_machine_mapping(unsigned long pfn) +#define phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START) +#define INVALID_MFN (~0UL) +#define VALID_MFN(_mfn) (!((_mfn) & (1U<<31))) + +#define set_mfn_from_pfn(pfn, mfn) (phys_to_machine_mapping[(pfn)] = (mfn)) +static inline unsigned long get_mfn_from_pfn(unsigned long pfn) { unsigned long mfn; l1_pgentry_t pte; - if ( (__copy_from_user(&pte, &__phys_to_machine_mapping[pfn], + if ( (__copy_from_user(&pte, &phys_to_machine_mapping[pfn], sizeof(pte)) == 0) && (l1e_get_flags(pte) & _PAGE_PRESENT) ) mfn = l1e_get_pfn(pte); @@ -285,7 +288,6 @@ return mfn; } -#define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn) #ifdef MEMORY_GUARD void memguard_init(void); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/page-guest32.h --- a/xen/include/asm-x86/page-guest32.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/page-guest32.h Fri Sep 9 16:30:54 2005 @@ -32,6 +32,11 @@ /* Get pte access flags (unsigned int). */ #define l1e_get_flags_32(x) (get_pte_flags_32((x).l1)) #define l2e_get_flags_32(x) (get_pte_flags_32((x).l2)) + +#define l1e_get_paddr_32(x) \ + ((physaddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK)))) +#define l2e_get_paddr_32(x) \ + ((physaddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK)))) /* Construct an empty pte. */ #define l1e_empty_32() ((l1_pgentry_32_t) { 0 }) diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/page.h Fri Sep 9 16:30:54 2005 @@ -280,12 +280,21 @@ #ifndef __ASSEMBLY__ -static __inline__ int get_order(unsigned long size) +static inline int get_order_from_bytes(physaddr_t size) { int order; size = (size-1) >> PAGE_SHIFT; for ( order = 0; size; order++ ) size >>= 1; + return order; +} + +static inline int get_order_from_pages(unsigned long nr_pages) +{ + int order; + nr_pages--; + for ( order = 0; nr_pages; order++ ) + nr_pages >>= 1; return order; } diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/processor.h Fri Sep 9 16:30:54 2005 @@ -496,9 +496,7 @@ #endif -void show_guest_stack(); -void show_trace(unsigned long *esp); -void show_stack(unsigned long *esp); +void show_stack(struct cpu_user_regs *regs); void show_registers(struct cpu_user_regs *regs); void show_page_walk(unsigned long addr); asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/shadow.h Fri Sep 9 16:30:54 2005 @@ -34,6 +34,8 @@ #include <asm/vmx.h> #include <public/dom0_ops.h> #include <asm/shadow_public.h> +#include <asm/page-guest32.h> +#include <asm/shadow_ops.h> /* Shadow PT operation mode : shadow-mode variable in arch_domain. */ @@ -104,9 +106,9 @@ } while (0) #endif -#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min)) +#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min)) #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1)) -#define SHADOW_MAX(_encoded) ((L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16)) +#define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16)) extern void shadow_mode_init(void); extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc); @@ -132,6 +134,7 @@ struct domain_mmap_cache *cache); #if CONFIG_PAGING_LEVELS >= 3 #include <asm/page-guest32.h> +extern unsigned long gva_to_gpa(unsigned long gva); extern void shadow_l3_normal_pt_update(struct domain *d, unsigned long pa, l3_pgentry_t l3e, struct domain_mmap_cache *cache); @@ -269,14 +272,14 @@ #define __mfn_to_gpfn(_d, mfn) \ ( (shadow_mode_translate(_d)) \ - ? machine_to_phys_mapping[(mfn)] \ + ? get_pfn_from_mfn(mfn) \ : (mfn) ) #define __gpfn_to_mfn(_d, gpfn) \ ({ \ ASSERT(current->domain == (_d)); \ (shadow_mode_translate(_d)) \ - ? phys_to_machine_mapping(gpfn) \ + ? get_mfn_from_pfn(gpfn) \ : (gpfn); \ }) @@ -461,7 +464,7 @@ // This wants the nice compact set of PFNs from 0..domain's max, // which __mfn_to_gpfn() only returns for translated domains. // - pfn = machine_to_phys_mapping[mfn]; + pfn = get_pfn_from_mfn(mfn); /* * Values with the MSB set denote MFNs that aren't really part of the @@ -562,7 +565,7 @@ old_hl2e = v->arch.hl2_vtable[index]; if ( (l2e_get_flags(gl2e) & _PAGE_PRESENT) && - VALID_MFN(mfn = phys_to_machine_mapping(l2e_get_pfn(gl2e))) ) + VALID_MFN(mfn = get_mfn_from_pfn(l2e_get_pfn(gl2e))) ) new_hl2e = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); else new_hl2e = l1e_empty(); @@ -794,22 +797,22 @@ #endif static inline void l1pte_propagate_from_guest( - struct domain *d, l1_pgentry_t gpte, l1_pgentry_t *spte_p) + struct domain *d, guest_l1_pgentry_t gpte, l1_pgentry_t *spte_p) { unsigned long mfn; l1_pgentry_t spte; spte = l1e_empty(); - if ( ((l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == + if ( ((guest_l1e_get_flags(gpte) & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == (_PAGE_PRESENT|_PAGE_ACCESSED)) && VALID_MFN(mfn = __gpfn_to_mfn(d, l1e_get_pfn(gpte))) ) { spte = l1e_from_pfn( - mfn, l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL)); + mfn, guest_l1e_get_flags(gpte) & ~(_PAGE_GLOBAL | _PAGE_AVAIL)); if ( shadow_mode_log_dirty(d) || - !(l1e_get_flags(gpte) & _PAGE_DIRTY) || + !(guest_l1e_get_flags(gpte) & _PAGE_DIRTY) || mfn_is_page_table(mfn) ) { l1e_remove_flags(spte, _PAGE_RW); @@ -859,22 +862,22 @@ static inline void l2pde_general( struct domain *d, - l2_pgentry_t *gpde_p, + guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p, unsigned long sl1mfn) { - l2_pgentry_t gpde = *gpde_p; + guest_l2_pgentry_t gpde = *gpde_p; l2_pgentry_t spde; spde = l2e_empty(); - if ( (l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) ) + if ( (guest_l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) ) { spde = l2e_from_pfn( - sl1mfn, - (l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL); + sl1mfn, + (guest_l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL); /* N.B. PDEs do not have a dirty bit. */ - l2e_add_flags(gpde, _PAGE_ACCESSED); + guest_l2e_add_flags(gpde, _PAGE_ACCESSED); *gpde_p = gpde; } @@ -887,12 +890,12 @@ } static inline void l2pde_propagate_from_guest( - struct domain *d, l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p) -{ - l2_pgentry_t gpde = *gpde_p; + struct domain *d, guest_l2_pgentry_t *gpde_p, l2_pgentry_t *spde_p) +{ + guest_l2_pgentry_t gpde = *gpde_p; unsigned long sl1mfn = 0; - if ( l2e_get_flags(gpde) & _PAGE_PRESENT ) + if ( guest_l2e_get_flags(gpde) & _PAGE_PRESENT ) sl1mfn = __shadow_status(d, l2e_get_pfn(gpde), PGT_l1_shadow); l2pde_general(d, gpde_p, spde_p, sl1mfn); } @@ -904,7 +907,7 @@ static int inline validate_pte_change( struct domain *d, - l1_pgentry_t new_pte, + guest_l1_pgentry_t new_pte, l1_pgentry_t *shadow_pte_p) { l1_pgentry_t old_spte, new_spte; @@ -1004,7 +1007,7 @@ static int inline validate_pde_change( struct domain *d, - l2_pgentry_t new_gpde, + guest_l2_pgentry_t new_gpde, l2_pgentry_t *shadow_pde_p) { l2_pgentry_t old_spde, new_spde; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/shadow_64.h Fri Sep 9 16:30:54 2005 @@ -27,6 +27,7 @@ #ifndef _XEN_SHADOW_64_H #define _XEN_SHADOW_64_H #include <asm/shadow.h> +#include <asm/shadow_ops.h> #define READ_FAULT 0 #define WRITE_FAULT 1 @@ -42,14 +43,14 @@ #define ESH_LOG(_f, _a...) ((void)0) #endif -#define L4 4UL -#define L3 3UL -#define L2 2UL -#define L1 1UL +#define PAGING_L4 4UL +#define PAGING_L3 3UL +#define PAGING_L2 2UL +#define PAGING_L1 1UL #define L_MASK 0xff -#define ROOT_LEVEL_64 L4 -#define ROOT_LEVEL_32 L2 +#define ROOT_LEVEL_64 PAGING_L4 +#define ROOT_LEVEL_32 PAGING_L2 #define SHADOW_ENTRY (2UL << 16) #define GUEST_ENTRY (1UL << 16) @@ -58,6 +59,10 @@ #define SET_ENTRY (1UL << 8) #define PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) + +/* For 32-bit VMX guest to allocate shadow L1 & L2*/ +#define SL1_ORDER 1 +#define SL2_ORDER 2 typedef struct { intpte_t lo; } pgentry_64_t; #define shadow_level_to_type(l) (l << 29) @@ -76,6 +81,10 @@ #define entry_remove_flags(x, flags) ((x).lo &= ~put_pte_flags(flags)) #define entry_has_changed(x,y,flags) \ ( !!(((x).lo ^ (y).lo) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) + +#define PAE_SHADOW_SELF_ENTRY 259 +#define PDP_ENTRIES 4 + static inline int table_offset_64(unsigned long va, int level) { switch(level) { @@ -86,8 +95,13 @@ case 3: return (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)); #if CONFIG_PAGING_LEVELS >= 4 +#ifndef GUEST_PGENTRY_32 case 4: return (((va) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)); +#else + case 4: + return PAE_SHADOW_SELF_ENTRY; +#endif #endif default: //printk("<table_offset_64> level %d is too big\n", level); @@ -138,7 +152,7 @@ return NULL; mfn = entry_get_value(*le_e) >> PAGE_SHIFT; if ((flag & GUEST_ENTRY) && shadow_mode_translate(d)) - mfn = phys_to_machine_mapping(mfn); + mfn = get_mfn_from_pfn(mfn); le_p = (pgentry_64_t *)phys_to_virt(mfn << PAGE_SHIFT); index = table_offset_64(va, (level + i - 1)); le_e = &le_p[index]; @@ -165,30 +179,30 @@ return le_e; } #define __shadow_set_l4e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L4) + __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4) #define __shadow_get_l4e(v, va, sl4e) \ - __rw_entry(v, va, sl4e, SHADOW_ENTRY | GET_ENTRY | L4) + __rw_entry(v, va, sl4e, SHADOW_ENTRY | GET_ENTRY | PAGING_L4) #define __shadow_set_l3e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L3) + __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L3) #define __shadow_get_l3e(v, va, sl3e) \ - __rw_entry(v, va, sl3e, SHADOW_ENTRY | GET_ENTRY | L3) + __rw_entry(v, va, sl3e, SHADOW_ENTRY | GET_ENTRY | PAGING_L3) #define __shadow_set_l2e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L2) + __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L2) #define __shadow_get_l2e(v, va, sl2e) \ - __rw_entry(v, va, sl2e, SHADOW_ENTRY | GET_ENTRY | L2) + __rw_entry(v, va, sl2e, SHADOW_ENTRY | GET_ENTRY | PAGING_L2) #define __shadow_set_l1e(v, va, value) \ - __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | L1) + __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L1) #define __shadow_get_l1e(v, va, sl1e) \ - __rw_entry(v, va, sl1e, SHADOW_ENTRY | GET_ENTRY | L1) + __rw_entry(v, va, sl1e, SHADOW_ENTRY | GET_ENTRY | PAGING_L1) #define __guest_set_l4e(v, va, value) \ - __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L4) + __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L4) #define __guest_get_l4e(v, va, gl4e) \ - __rw_entry(v, va, gl4e, GUEST_ENTRY | GET_ENTRY | L4) + __rw_entry(v, va, gl4e, GUEST_ENTRY | GET_ENTRY | PAGING_L4) #define __guest_set_l3e(v, va, value) \ - __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L3) + __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L3) #define __guest_get_l3e(v, va, sl3e) \ - __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | L3) + __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3) static inline void * __guest_set_l2e( struct vcpu *v, u64 va, void *value, int size) @@ -205,7 +219,7 @@ return &l2va[l2_table_offset_32(va)]; } case 8: - return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L2); + return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L2); default: BUG(); return NULL; @@ -230,7 +244,7 @@ return &l2va[l2_table_offset_32(va)]; } case 8: - return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | L2); + return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | PAGING_L2); default: BUG(); return NULL; @@ -257,7 +271,7 @@ if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT))) return NULL; - l1mfn = phys_to_machine_mapping( + l1mfn = get_mfn_from_pfn( l2e_get_pfn(gl2e)); l1va = (l1_pgentry_32_t *) @@ -269,7 +283,7 @@ } case 8: - return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | L1); + return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L1); default: BUG(); return NULL; @@ -299,7 +313,7 @@ return NULL; - l1mfn = phys_to_machine_mapping( + l1mfn = get_mfn_from_pfn( l2e_get_pfn(gl2e)); l1va = (l1_pgentry_32_t *) phys_to_virt( l1mfn << L1_PAGETABLE_SHIFT); @@ -310,7 +324,7 @@ } case 8: // 64-bit guest - return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | L1); + return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | PAGING_L1); default: BUG(); return NULL; @@ -334,7 +348,7 @@ sle = entry_empty(); if ( (entry_get_flags(gle) & _PAGE_PRESENT) && (smfn != 0) ) { - if ((entry_get_flags(gle) & _PAGE_PSE) && level == L2) { + if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) { sle = entry_from_pfn(smfn, entry_get_flags(gle)); entry_remove_flags(sle, _PAGE_PSE); @@ -376,7 +390,7 @@ unsigned long smfn = 0; if ( entry_get_flags(gle) & _PAGE_PRESENT ) { - if ((entry_get_flags(gle) & _PAGE_PSE) && level == L2) { + if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) { smfn = __shadow_status(d, entry_get_value(gle) >> PAGE_SHIFT, PGT_fl1_shadow); } else { smfn = __shadow_status(d, entry_get_pfn(gle), @@ -421,86 +435,6 @@ return 1; } -/* - * Check P, R/W, U/S bits in the guest page table. - * If the fault belongs to guest return 1, - * else return 0. - */ -static inline int guest_page_fault(struct vcpu *v, - unsigned long va, unsigned int error_code, pgentry_64_t *gpl2e, pgentry_64_t *gpl1e) -{ - struct domain *d = v->domain; - pgentry_64_t gle, *lva; - unsigned long mfn; - int i; - - __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | L4); - if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT))) - return 1; - - if (error_code & ERROR_W) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_RW))) - return 1; - } - if (error_code & ERROR_U) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_USER))) - return 1; - } - for (i = L3; i >= L1; i--) { - /* - * If it's not external mode, then mfn should be machine physical. - */ - mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT)); - - lva = (pgentry_64_t *) phys_to_virt( - mfn << PAGE_SHIFT); - gle = lva[table_offset_64(va, i)]; - - if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT))) - return 1; - - if (error_code & ERROR_W) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_RW))) - return 1; - } - if (error_code & ERROR_U) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_USER))) - return 1; - } - - if (i == L2) { - if (gpl2e) - *gpl2e = gle; - - if (likely(entry_get_flags(gle) & _PAGE_PSE)) - return 0; - - } - - if (i == L1) - if (gpl1e) - *gpl1e = gle; - } - return 0; -} - -static inline unsigned long gva_to_gpa(unsigned long gva) -{ - struct vcpu *v = current; - pgentry_64_t gl1e = {0}; - pgentry_64_t gl2e = {0}; - unsigned long gpa; - - if (guest_page_fault(v, gva, 0, &gl2e, &gl1e)) - return 0; - if (entry_get_flags(gl2e) & _PAGE_PSE) - gpa = entry_get_paddr(gl2e) + (gva & ((1 << L2_PAGETABLE_SHIFT) - 1)); - else - gpa = entry_get_paddr(gl1e) + (gva & ~PAGE_MASK); - - return gpa; - -} #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/shadow_public.h --- a/xen/include/asm-x86/shadow_public.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/shadow_public.h Fri Sep 9 16:30:54 2005 @@ -49,6 +49,7 @@ (*mark_mfn_out_of_sync)(struct vcpu *v, unsigned long gpfn, unsigned long mfn); int (*is_out_of_sync)(struct vcpu *v, unsigned long va); + unsigned long (*gva_to_gpa)(unsigned long gva); }; #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/vmx.h --- a/xen/include/asm-x86/vmx.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/vmx.h Fri Sep 9 16:30:54 2005 @@ -275,7 +275,9 @@ return 0; } -static inline int __vmread (unsigned long field, void *value) +#define __vmread(x, ptr) ___vmread((x), (ptr), sizeof(*(ptr))) + +static always_inline int ___vmread (const unsigned long field, void *ptr, const int size) { unsigned long eflags; unsigned long ecx = 0; @@ -286,7 +288,23 @@ : "a" (field) : "memory"); - *((long *) value) = ecx; + switch (size) { + case 1: + *((u8 *) (ptr)) = ecx; + break; + case 2: + *((u16 *) (ptr)) = ecx; + break; + case 4: + *((u32 *) (ptr)) = ecx; + break; + case 8: + *((u64 *) (ptr)) = ecx; + break; + default: + domain_crash_synchronous(); + break; + } __save_flags(eflags); if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF) @@ -453,4 +471,7 @@ void load_cpu_user_regs(struct cpu_user_regs *regs); void store_cpu_user_regs(struct cpu_user_regs *regs); +enum { VMX_COPY_IN = 0, VMX_COPY_OUT }; +int vmx_copy(void *buf, unsigned long laddr, int size, int dir); + #endif /* __ASM_X86_VMX_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/vmx_platform.h --- a/xen/include/asm-x86/vmx_platform.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/vmx_platform.h Fri Sep 9 16:30:54 2005 @@ -24,8 +24,7 @@ #include <asm/vmx_virpit.h> #include <asm/vmx_intercept.h> -#define MAX_OPERAND_NUM 3 -#define I_NAME_LEN 16 +#define MAX_OPERAND_NUM 2 #define mk_operand(size, index, seg, flag) \ (((size) << 24) | ((index) << 16) | ((seg) << 8) | (flag)) @@ -35,54 +34,60 @@ #define operand_index(operand) \ ((operand >> 16) & 0xFF) - //For instruction.operand[].size + +/* for instruction.operand[].size */ #define BYTE 1 #define WORD 2 #define LONG 4 #define QUAD 8 #define BYTE_64 16 - //For instruction.operand[].flag +/* for instruction.operand[].flag */ #define REGISTER 0x1 #define MEMORY 0x2 #define IMMEDIATE 0x4 -#define WZEROEXTEND 0x8 - //For instruction.flags +/* for instruction.flags */ #define REPZ 0x1 #define REPNZ 0x2 +#define OVERLAP 0x4 + +#define INSTR_PIO 1 +#define INSTR_OR 2 +#define INSTR_AND 3 +#define INSTR_XOR 4 +#define INSTR_CMP 5 +#define INSTR_MOV 6 +#define INSTR_MOVS 7 +#define INSTR_MOVZ 8 +#define INSTR_STOS 9 +#define INSTR_TEST 10 struct instruction { - __s8 i_name[I_NAME_LEN]; //Instruction's name - __s16 op_size; //The operand's bit size, e.g. 16-bit or 32-bit. - - __u64 offset; //The effective address - //offset = Base + (Index * Scale) + Displacement - + __s8 instr; /* instruction type */ + __s16 op_size; /* the operand's bit size, e.g. 16-bit or 32-bit */ __u64 immediate; - - __u16 seg_sel; //Segmentation selector - - __u32 operand[MAX_OPERAND_NUM]; //The order of operand is from AT&T Assembly - __s16 op_num; //The operand numbers - - __u32 flags; // + __u16 seg_sel; /* segmentation selector */ + __u32 operand[MAX_OPERAND_NUM]; /* order is AT&T assembly */ + __u32 flags; }; #define MAX_INST_LEN 32 -struct mi_per_cpu_info -{ - unsigned long mmio_target; - struct cpu_user_regs *inst_decoder_regs; +struct mi_per_cpu_info { + int flags; + int instr; /* instruction */ + unsigned long operand[2]; /* operands */ + unsigned long immediate; /* immediate portion */ + struct cpu_user_regs *inst_decoder_regs; /* current context */ }; struct virtual_platform_def { - unsigned long *real_mode_data; /* E820, etc. */ + unsigned long *real_mode_data; /* E820, etc. */ unsigned long shared_page_va; struct vmx_virpit_t vmx_pit; struct vmx_handler_t vmx_handler; - struct mi_per_cpu_info mpci; /* MMIO */ + struct mi_per_cpu_info mpci; /* MMIO */ }; extern void handle_mmio(unsigned long, unsigned long); @@ -91,6 +96,6 @@ extern void vmx_io_assist(struct vcpu *v); // XXX - think about this -- maybe use bit 30 of the mfn to signify an MMIO frame. -#define mmio_space(gpa) (!VALID_MFN(phys_to_machine_mapping((gpa) >> PAGE_SHIFT))) +#define mmio_space(gpa) (!VALID_MFN(get_mfn_from_pfn((gpa) >> PAGE_SHIFT))) #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/vmx_vmcs.h --- a/xen/include/asm-x86/vmx_vmcs.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/vmx_vmcs.h Fri Sep 9 16:30:54 2005 @@ -183,7 +183,7 @@ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, VM_ENTRY_INTR_INFO_FIELD = 0x00004016, VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, - VM_ENTRY_INSTRUCTION_LENGTH = 0x0000401a, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, TPR_THRESHOLD = 0x0000401c, SECONDARY_VM_EXEC_CONTROL = 0x0000401e, VM_INSTRUCTION_ERROR = 0x00004400, @@ -192,7 +192,7 @@ VM_EXIT_INTR_ERROR_CODE = 0x00004406, IDT_VECTORING_INFO_FIELD = 0x00004408, IDT_VECTORING_ERROR_CODE = 0x0000440a, - INSTRUCTION_LEN = 0x0000440c, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, VMX_INSTRUCTION_INFO = 0x0000440e, GUEST_ES_LIMIT = 0x00004800, GUEST_CS_LIMIT = 0x00004802, diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/x86_32/asm_defns.h --- a/xen/include/asm-x86/x86_32/asm_defns.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/x86_32/asm_defns.h Fri Sep 9 16:30:54 2005 @@ -1,56 +1,26 @@ #ifndef __X86_32_ASM_DEFNS_H__ #define __X86_32_ASM_DEFNS_H__ -/* Maybe auto-generate the following two cases (quoted vs. unquoted). */ -#ifndef __ASSEMBLY__ - -#define __SAVE_ALL_PRE \ - "cld;" \ - "pushl %eax;" \ - "pushl %ebp;" \ - "pushl %edi;" \ - "pushl %esi;" \ - "pushl %edx;" \ - "pushl %ecx;" \ - "pushl %ebx;" \ - "testl $"STR(X86_EFLAGS_VM)","STR(UREGS_eflags)"(%esp);" \ - "jz 2f;" \ - "call setup_vm86_frame;" \ - "jmp 3f;" \ - "2:testb $3,"STR(UREGS_cs)"(%esp);" \ - "jz 1f;" \ - "mov %ds,"STR(UREGS_ds)"(%esp);" \ - "mov %es,"STR(UREGS_es)"(%esp);" \ - "mov %fs,"STR(UREGS_fs)"(%esp);" \ - "mov %gs,"STR(UREGS_gs)"(%esp);" \ - "3:" - -#define SAVE_ALL_NOSEGREGS(_reg) \ - __SAVE_ALL_PRE \ - "1:" - -#define SET_XEN_SEGMENTS(_reg) \ - "movl $("STR(__HYPERVISOR_DS)"),%e"STR(_reg)"x;" \ - "mov %e"STR(_reg)"x,%ds;" \ - "mov %e"STR(_reg)"x,%es;" - -#define SAVE_ALL(_reg) \ - __SAVE_ALL_PRE \ - SET_XEN_SEGMENTS(_reg) \ - "1:" - +#ifndef NDEBUG +/* Indicate special exception stack frame by inverting the frame pointer. */ +#define SETUP_EXCEPTION_FRAME_POINTER \ + movl %esp,%ebp; \ + notl %ebp #else +#define SETUP_EXCEPTION_FRAME_POINTER +#endif #define __SAVE_ALL_PRE \ cld; \ pushl %eax; \ pushl %ebp; \ + SETUP_EXCEPTION_FRAME_POINTER; \ pushl %edi; \ pushl %esi; \ pushl %edx; \ pushl %ecx; \ pushl %ebx; \ - testl $X86_EFLAGS_VM,UREGS_eflags(%esp); \ + testl $(X86_EFLAGS_VM),UREGS_eflags(%esp); \ jz 2f; \ call setup_vm86_frame; \ jmp 3f; \ @@ -83,8 +53,6 @@ #define PERFC_INCR(_name,_idx) #endif -#endif - #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) #define XBUILD_SMP_INTERRUPT(x,v) \ asmlinkage void x(void); \ @@ -92,7 +60,7 @@ "\n"__ALIGN_STR"\n" \ STR(x) ":\n\t" \ "pushl $"#v"<<16\n\t" \ - SAVE_ALL(a) \ + STR(SAVE_ALL(a)) \ "call "STR(smp_##x)"\n\t" \ "jmp ret_from_intr\n"); @@ -103,7 +71,7 @@ "\n"__ALIGN_STR"\n" \ STR(x) ":\n\t" \ "pushl $"#v"<<16\n\t" \ - SAVE_ALL(a) \ + STR(SAVE_ALL(a)) \ "movl %esp,%eax\n\t" \ "pushl %eax\n\t" \ "call "STR(smp_##x)"\n\t" \ @@ -114,7 +82,7 @@ __asm__( \ "\n" __ALIGN_STR"\n" \ "common_interrupt:\n\t" \ - SAVE_ALL(a) \ + STR(SAVE_ALL(a)) \ "movl %esp,%eax\n\t" \ "pushl %eax\n\t" \ "call " STR(do_IRQ) "\n\t" \ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/x86_64/asm_defns.h --- a/xen/include/asm-x86/x86_64/asm_defns.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/x86_64/asm_defns.h Fri Sep 9 16:30:54 2005 @@ -1,49 +1,14 @@ #ifndef __X86_64_ASM_DEFNS_H__ #define __X86_64_ASM_DEFNS_H__ -/* Maybe auto-generate the following two cases (quoted vs. unquoted). */ -#ifndef __ASSEMBLY__ - -#define SAVE_ALL \ - "cld;" \ - "pushq %rdi;" \ - "pushq %rsi;" \ - "pushq %rdx;" \ - "pushq %rcx;" \ - "pushq %rax;" \ - "pushq %r8;" \ - "pushq %r9;" \ - "pushq %r10;" \ - "pushq %r11;" \ - "pushq %rbx;" \ - "pushq %rbp;" \ - "pushq %r12;" \ - "pushq %r13;" \ - "pushq %r14;" \ - "pushq %r15;" - -#define RESTORE_ALL \ - "popq %r15;" \ - "popq %r14;" \ - "popq %r13;" \ - "popq %r12;" \ - "popq %rbp;" \ - "popq %rbx;" \ - "popq %r11;" \ - "popq %r10;" \ - "popq %r9;" \ - "popq %r8;" \ - "popq %rax;" \ - "popq %rcx;" \ - "popq %rdx;" \ - "popq %rsi;" \ - "popq %rdi;" - -/* Work around AMD erratum #88 */ -#define safe_swapgs \ - "mfence; swapgs;" - +#ifndef NDEBUG +/* Indicate special exception stack frame by inverting the frame pointer. */ +#define SETUP_EXCEPTION_FRAME_POINTER \ + movq %rsp,%rbp; \ + notq %rbp #else +#define SETUP_EXCEPTION_FRAME_POINTER +#endif #define SAVE_ALL \ cld; \ @@ -58,6 +23,7 @@ pushq %r11; \ pushq %rbx; \ pushq %rbp; \ + SETUP_EXCEPTION_FRAME_POINTER; \ pushq %r12; \ pushq %r13; \ pushq %r14; \ @@ -90,7 +56,9 @@ #define PERFC_INCR(_name,_idx) #endif -#endif +/* Work around AMD erratum #88 */ +#define safe_swapgs \ + "mfence; swapgs;" #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) #define XBUILD_SMP_INTERRUPT(x,v) \ @@ -100,7 +68,7 @@ STR(x) ":\n\t" \ "pushq $0\n\t" \ "movl $"#v",4(%rsp)\n\t" \ - SAVE_ALL \ + STR(SAVE_ALL) \ "callq "STR(smp_##x)"\n\t" \ "jmp ret_from_intr\n"); @@ -112,7 +80,7 @@ STR(x) ":\n\t" \ "pushq $0\n\t" \ "movl $"#v",4(%rsp)\n\t" \ - SAVE_ALL \ + STR(SAVE_ALL) \ "movq %rsp,%rdi\n\t" \ "callq "STR(smp_##x)"\n\t" \ "jmp ret_from_intr\n"); @@ -121,7 +89,7 @@ __asm__( \ "\n" __ALIGN_STR"\n" \ "common_interrupt:\n\t" \ - SAVE_ALL \ + STR(SAVE_ALL) \ "movq %rsp,%rdi\n\t" \ "callq " STR(do_IRQ) "\n\t" \ "jmp ret_from_intr\n"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/acm.h --- a/xen/include/public/acm.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/acm.h Fri Sep 9 16:30:54 2005 @@ -56,20 +56,22 @@ #define ACM_ACCESS_DENIED -111 #define ACM_NULL_POINTER_ERROR -200 -#define ACM_MAX_POLICY 3 - +/* primary policy in lower 4 bits */ #define ACM_NULL_POLICY 0 #define ACM_CHINESE_WALL_POLICY 1 #define ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY 2 -#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY 3 + +/* combinations have secondary policy component in higher 4bit */ +#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY \ + ((ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY << 4) | ACM_CHINESE_WALL_POLICY) /* policy: */ #define ACM_POLICY_NAME(X) \ - (X == ACM_NULL_POLICY) ? "NULL policy" : \ - (X == ACM_CHINESE_WALL_POLICY) ? "CHINESE WALL policy" : \ - (X == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY) ? "SIMPLE TYPE ENFORCEMENT policy" : \ - (X == ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT policy" : \ - "UNDEFINED policy" + ((X) == (ACM_NULL_POLICY)) ? "NULL policy" : \ + ((X) == (ACM_CHINESE_WALL_POLICY)) ? "CHINESE WALL policy" : \ + ((X) == (ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "SIMPLE TYPE ENFORCEMENT policy" : \ + ((X) == (ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT policy" : \ + "UNDEFINED policy" /* the following policy versions must be increased * whenever the interpretation of the related @@ -122,7 +124,7 @@ */ struct acm_policy_buffer { u32 policy_version; /* ACM_POLICY_VERSION */ - u32 magic; + u32 magic; u32 len; u32 primary_policy_code; u32 primary_buffer_offset; @@ -151,7 +153,7 @@ }; struct acm_stats_buffer { - u32 magic; + u32 magic; u32 len; u32 primary_policy_code; u32 primary_stats_offset; @@ -168,5 +170,15 @@ u32 gt_cachehit_count; }; +struct acm_ssid_buffer { + u32 len; + ssidref_t ssidref; + u32 primary_policy_code; + u32 primary_max_types; + u32 primary_types_offset; + u32 secondary_policy_code; + u32 secondary_max_types; + u32 secondary_types_offset; +}; #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/acm_ops.h --- a/xen/include/public/acm_ops.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/acm_ops.h Fri Sep 9 16:30:54 2005 @@ -1,3 +1,4 @@ + /****************************************************************************** * acm_ops.h * @@ -27,7 +28,7 @@ * This makes sure that old versions of acm tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define ACM_INTERFACE_VERSION 0xAAAA0003 +#define ACM_INTERFACE_VERSION 0xAAAA0004 /************************************************************************/ @@ -46,12 +47,25 @@ u16 pullcache_size; } acm_getpolicy_t; + #define ACM_DUMPSTATS 6 typedef struct acm_dumpstats { void *pullcache; u16 pullcache_size; } acm_dumpstats_t; + +#define ACM_GETSSID 7 +enum get_type {UNSET, SSIDREF, DOMAINID}; +typedef struct acm_getssid { + enum get_type get_ssid_by; + union { + domaintype_t domainid; + ssidref_t ssidref; + } id; + void *ssidbuf; + u16 ssidbuf_size; +} acm_getssid_t; typedef struct acm_op { u32 cmd; @@ -60,6 +74,7 @@ acm_setpolicy_t setpolicy; acm_getpolicy_t getpolicy; acm_dumpstats_t dumpstats; + acm_getssid_t getssid; } u; } acm_op_t; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/arch-x86_32.h Fri Sep 9 16:30:54 2005 @@ -55,7 +55,7 @@ # define HYPERVISOR_VIRT_START (0xFC000000UL) #endif #ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START) +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) #endif /* Maximum number of virtual CPUs in multi-processor guests. */ @@ -128,8 +128,11 @@ } vcpu_guest_context_t; typedef struct arch_shared_info { - /* MFN of a table of MFNs that make up p2m table */ - u64 pfn_to_mfn_frame_list; + unsigned long max_pfn; /* max pfn that appears in table */ + unsigned long pfn_to_mfn_frame_list_list; + /* frame containing list of mfns + containing list of mfns + containing the p2m table. */ } arch_shared_info_t; #endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/arch-x86_64.h Fri Sep 9 16:30:54 2005 @@ -71,7 +71,7 @@ /* The machine->physical mapping table starts at this address, read-only. */ #ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START) +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) #endif /* @@ -186,8 +186,11 @@ } vcpu_guest_context_t; typedef struct arch_shared_info { - /* MFN of a table of MFNs that make up p2m table */ - u64 pfn_to_mfn_frame_list; + unsigned long max_pfn; /* max pfn that appears in table */ + unsigned long pfn_to_mfn_frame_list_list; + /* frame containing list of mfns + containing list of mfns + containing the p2m table. */ } arch_shared_info_t; #endif /* !__ASSEMBLY__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/dom0_ops.h Fri Sep 9 16:30:54 2005 @@ -213,6 +213,7 @@ u32 cpu_khz; unsigned long total_pages; unsigned long free_pages; + u32 hw_cap[8]; } dom0_physinfo_t; /* @@ -373,6 +374,18 @@ /* IN variables. */ int quirk_id; } dom0_platform_quirk_t; + +#define DOM0_PHYSICAL_MEMORY_MAP 40 +typedef struct { + /* IN variables. */ + int max_map_entries; + /* OUT variables. */ + int nr_map_entries; + struct dom0_memory_map_entry { + u64 start, end; + int is_ram; + } *memory_map; +} dom0_physical_memory_map_t; typedef struct { u32 cmd; @@ -408,6 +421,7 @@ dom0_getvcpucontext_t getvcpucontext; dom0_getdomaininfolist_t getdomaininfolist; dom0_platform_quirk_t platform_quirk; + dom0_physical_memory_map_t physical_memory_map; } u; } dom0_op_t; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/io/ioreq.h --- a/xen/include/public/io/ioreq.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/io/ioreq.h Fri Sep 9 16:30:54 2005 @@ -29,9 +29,17 @@ #define STATE_IORESP_READY 3 #define STATE_IORESP_HOOK 4 -/* VMExit dispatcher should cooperate with instruction decoder to - prepare this structure and notify service OS and DM by sending - virq */ +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_AND 2 +#define IOREQ_TYPE_OR 3 +#define IOREQ_TYPE_XOR 4 + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq + */ typedef struct { u64 addr; /* physical address */ u64 size; /* size in bytes */ @@ -43,8 +51,8 @@ u8 state:4; u8 pdata_valid:1; /* if 1, use pdata above */ u8 dir:1; /* 1=read, 0=write */ - u8 port_mm:1; /* 0=portio, 1=mmio */ u8 df:1; + u8 type; /* I/O type */ } ioreq_t; #define MAX_VECTOR 256 diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/io/netif.h Fri Sep 9 16:30:54 2005 @@ -23,13 +23,13 @@ typedef struct { u16 id; /* Echoed in response message. */ -#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef CONFIG_XEN_NETDEV_GRANT grant_ref_t gref; /* 2: Reference to incoming granted frame */ #endif } netif_rx_request_t; typedef struct { -#ifdef CONFIG_XEN_NETDEV_GRANT_TX +#ifdef CONFIG_XEN_NETDEV_GRANT u32 addr; /* 0: Offset in page of start of received packet */ #else unsigned long addr; /* Machine address of packet. */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/trace.h --- a/xen/include/public/trace.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/trace.h Fri Sep 9 16:30:54 2005 @@ -23,7 +23,7 @@ #define TRC_VMXTIMER 0x00082000 /* VMX timer trace */ #define TRC_VMXINT 0x00084000 /* VMX interrupt trace */ #define TRC_VMXIO 0x00088000 /* VMX io emulation trace */ - +#define TRC_VMEXIT_HANDLER 0x00090000 /* VMX handler trace */ /* Trace events per class */ @@ -49,6 +49,10 @@ #define TRC_VMX_INT (TRC_VMXINT + 1) +#define TRC_VMEXIT (TRC_VMEXIT_HANDLER + 1) +#define TRC_VMENTRY (TRC_VMEXIT_HANDLER + 2) + + /* This structure represents a single trace buffer record. */ struct t_rec { u64 cycles; /* cycle counter timestamp */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/version.h --- a/xen/include/public/version.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/version.h Fri Sep 9 16:30:54 2005 @@ -28,4 +28,17 @@ char compile_date[32]; } xen_compile_info_t; +#define XENVER_capabilities 3 +typedef struct xen_capabilities_info { + char caps[1024]; +} xen_capabilities_info_t; + +#define XENVER_changeset 4 +typedef char xen_changeset_info_t[64]; + +#define XENVER_parameters 5 +typedef struct xen_paramaters_info { +unsigned long virt_start; +} xen_parameters_info_t; + #endif /* __XEN_PUBLIC_VERSION_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/xen.h --- a/xen/include/public/xen.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/xen.h Fri Sep 9 16:30:54 2005 @@ -42,7 +42,7 @@ #define __HYPERVISOR_set_debugreg 8 #define __HYPERVISOR_get_debugreg 9 #define __HYPERVISOR_update_descriptor 10 -#define __HYPERVISOR_dom_mem_op 12 +#define __HYPERVISOR_memory_op 12 #define __HYPERVISOR_multicall 13 #define __HYPERVISOR_update_va_mapping 14 #define __HYPERVISOR_set_timer_op 15 @@ -223,12 +223,6 @@ */ #define CONSOLEIO_write 0 #define CONSOLEIO_read 1 - -/* - * Commands to HYPERVISOR_dom_mem_op(). - */ -#define MEMOP_increase_reservation 0 -#define MEMOP_decrease_reservation 1 /* * Commands to HYPERVISOR_vm_assist(). @@ -438,19 +432,20 @@ #define MAX_GUEST_CMDLINE 1024 typedef struct start_info { /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ - unsigned long nr_pages; /* Total pages allocated to this domain. */ - unsigned long shared_info;/* MACHINE address of shared info struct. */ - u32 flags; /* SIF_xxx flags. */ - u16 domain_controller_evtchn; + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + u32 flags; /* SIF_xxx flags. */ + unsigned long store_mfn; /* MACHINE page number of shared page. */ + u16 store_evtchn; /* Event channel for store communication. */ + unsigned long console_mfn; /* MACHINE address of console page. */ + u16 console_evtchn; /* Event channel for console messages. */ /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ - unsigned long pt_base; /* VIRTUAL address of page directory. */ - unsigned long nr_pt_frames;/* Number of bootstrap p.t. frames. */ - unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ - unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ s8 cmd_line[MAX_GUEST_CMDLINE]; - unsigned long store_mfn; /* MACHINE page number of shared page. */ - u16 store_evtchn; /* Event channel for store communication. */ } start_info_t; /* These flags are passed in the 'flags' field of start_info_t. */ @@ -459,6 +454,7 @@ #define SIF_BLK_BE_DOMAIN (1<<4) /* Is this a block backend domain? */ #define SIF_NET_BE_DOMAIN (1<<5) /* Is this a net backend domain? */ #define SIF_USB_BE_DOMAIN (1<<6) /* Is this a usb backend domain? */ +#define SIF_TPM_BE_DOMAIN (1<<7) /* Is this a TPM backend domain? */ /* For use in guest OSes. */ extern shared_info_t *HYPERVISOR_shared_info; diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/xen/config.h --- a/xen/include/xen/config.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/xen/config.h Fri Sep 9 16:30:54 2005 @@ -40,4 +40,7 @@ #include <xen/compiler.h> #endif +#define __STR(...) #__VA_ARGS__ +#define STR(...) __STR(__VA_ARGS__) + #endif /* __XEN_CONFIG_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/xen/perfc.h --- a/xen/include/xen/perfc.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/xen/perfc.h Fri Sep 9 16:30:54 2005 @@ -4,6 +4,7 @@ #ifdef PERF_COUNTERS +#include <xen/lib.h> #include <asm/atomic.h> /* @@ -87,7 +88,7 @@ * Histogram: special treatment for 0 and 1 count. After that equally spaced * with last bucket taking the rest. */ -#ifdef PERFC_ARRAYS +#ifdef PERF_ARRAYS #define perfc_incr_histo(_x,_v,_n) \ do { \ if ( (_v) == 0 ) \ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Thu Sep 8 15:18:40 2005 +++ b/xen/include/xen/sched.h Fri Sep 9 16:30:54 2005 @@ -250,10 +250,11 @@ void vcpu_sleep_sync(struct vcpu *d); /* - * Force loading of currently-executing domain state on the specified CPU. - * This is used to counteract lazy state switching where required. - */ -extern void sync_lazy_execstate_cpu(unsigned int cpu); + * Force synchronisation of given VCPU's state. If it is currently descheduled, + * this call will ensure that all its state is committed to memory and that + * no CPU is using critical state (e.g., page tables) belonging to the VCPU. + */ +extern void sync_vcpu_execstate(struct vcpu *v); /* * Called by the scheduler to switch to another VCPU. On entry, although @@ -265,7 +266,7 @@ * The callee must ensure that the local CPU is no longer running in @prev's * context, and that the context is saved to memory, before returning. * Alternatively, if implementing lazy context switching, it suffices to ensure - * that invoking sync_lazy_execstate() will switch and commit @prev's state. + * that invoking sync_vcpu_execstate() will switch and commit @prev's state. */ extern void context_switch( struct vcpu *prev, diff -r 10b1d30d3f66 -r b2f4823b6ff0 docs/misc/vtpm.txt --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/docs/misc/vtpm.txt Fri Sep 9 16:30:54 2005 @@ -0,0 +1,122 @@ +Copyright: IBM Corporation (C), Intel Corporation +17 August 2005 +Authors: Stefan Berger <stefanb@xxxxxxxxxx> (IBM), + Employees of Intel Corp + +This document gives a short introduction to the virtual TPM support +in XEN and goes as far as connecting a user domain to a virtual TPM +instance and doing a short test to verify success. It is assumed +that the user is fairly familiar with compiling and installing XEN +and Linux on a machine. + +Production Prerequisites: An x86-based machine machine with an ATMEL or +National Semiconductor (NSC) TPM on the motherboard. +Development Prerequisites: An emulator for TESTING ONLY is provided + + +Compiling XEN tree: +------------------- + +Compile the XEN tree as usual. + +make uninstall; make mrproper; make install + +After compiling the tree, verify that in the linux-2.6.XX-xen0/.config +file at least the following entries are set as below (they should be set +by default): + +CONFIG_XEN_TPMDEV_BACKEND=y +CONFIG_XEN_TPMDEV_GRANT=y + +CONFIG_TCG_TPM=m +CONFIG_TCG_NSC=m +CONFIG_TCG_ATMEL=m + + +Verify that in the linux-2.6.XX-xenU/.config file at least the +Following entries are set as below (they should be set by default): + +CONFIG_XEN_TPMDEV_FRONTEND=y +CONFIG_XEN_TPMDEV_GRANT=y + +CONFIG_TCG_TPM=y +CONFIG_TCG_XEN=y + + +Reboot the machine with the created XEN-0 kernel. + +Note: If you do not want any TPM-related code compiled into your +kernel or built as module then comment all the above lines like +this example: +# CONFIG_TCG_TPM is not set + + +Modifying VM Configuration files: +--------------------------------- + +VM configuration files need to be adapted to make a TPM instance +available to a user domain. The following VM configuration file is +an example of how a user domain can be configured to have a TPM +available. It works similar to making a network interface +available to a domain. + +kernel = "/boot/vmlinuz-2.6.12-xenU" +ramdisk = "/xen/initrd_domU/U1_ramdisk.img" +memory = 32 +name = "TPMUserDomain0" +vtpm = ['instance=1,backend=0'] +root = "/dev/ram0 cosole=tty ro" +vif = ['backend=0'] + +In the above configuration file the line 'vtpm = ...' provides +information about the domain where the virtual TPM is running and +where the TPM backend has been compiled into - this has to be +domain 0 at the moment - and which TPM instance the user domain +is supposed to talk to. Note that each running VM must use a +different instance and that using instance 0 is NOT allowed. + +Note: If you do not want TPM functionality for your user domain simply +leave out the 'vtpm' line in the configuration file. + + +Running the TPM: +---------------- + +To run the vTPM, dev device /dev/vtpm must be available. +Verify that 'ls -l /dev/vtpm' shows the following output: + +crw------- 1 root root 10, 225 Aug 11 06:58 /dev/vtpm + +If it is not available, run the following command as 'root'. +mknod /dev/vtpm c 10 225 + +Make sure that the vTPM is running in domain 0. To do this run the +following + +/usr/bin/vtpm_managerd + +Start a user domain using the 'xm create' command. Once you are in the +shell of the user domain, you should be able to do the following: + +> cd /sys/devices/vtpm +> ls +cancel caps pcrs pubek +> cat pcrs +PCR-00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-01: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-02: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-03: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-04: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-05: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-06: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-07: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +PCR-08: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[...] + +At this point the user domain has been sucessfully connected to its +virtual TPM instance. + +For further information please read the documentation in +tools/vtpm_manager/README and tools/vtpm/README + +Stefan Berger and Employees of the Intel Corp diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/domain_config --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/domain_config Fri Sep 9 16:30:54 2005 @@ -0,0 +1,17 @@ +# -*- mode: python; -*- +#============================================================================ +# Python configuration setup for 'xm create'. +# This script sets the parameters used when a domain is created using 'xm create'. +# You use a separate script for each domain you want to create, or +# you can set the parameters for the domain on the xm command line. +#============================================================================ + +#---------------------------------------------------------------------------- +# Kernel image file. +kernel = "mini-os.elf" + +# Initial memory allocation (in megabytes) for the new domain. +memory = 32 + +# A name for your domain. All domains must have different names. +name = "Mini-OS" diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/ctype.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/ctype.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,79 @@ +#ifndef _CTYPE_H +#define _CTYPE_H + +/* + * NOTE! This ctype does not handle EOF like the standard C + * library is required to. + */ + +#define _U 0x01 /* upper */ +#define _L 0x02 /* lower */ +#define _D 0x04 /* digit */ +#define _C 0x08 /* cntrl */ +#define _P 0x10 /* punct */ +#define _S 0x20 /* white space (space/lf/tab) */ +#define _X 0x40 /* hex digit */ +#define _SP 0x80 /* hard space (0x20) */ + + +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ + +#define __ismask(x) (_ctype[(int)(unsigned char)(x)]) + +#define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) +#define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) +#define iscntrl(c) ((__ismask(c)&(_C)) != 0) +#define isdigit(c) ((__ismask(c)&(_D)) != 0) +#define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) +#define islower(c) ((__ismask(c)&(_L)) != 0) +#define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) +#define ispunct(c) ((__ismask(c)&(_P)) != 0) +#define isspace(c) ((__ismask(c)&(_S)) != 0) +#define isupper(c) ((__ismask(c)&(_U)) != 0) +#define isxdigit(c) ((__ismask(c)&(_D|_X)) != 0) + +#define isascii(c) (((unsigned char)(c))<=0x7f) +#define toascii(c) (((unsigned char)(c))&0x7f) + +static inline unsigned char __tolower(unsigned char c) +{ + if (isupper(c)) + c -= 'A'-'a'; + return c; +} + +static inline unsigned char __toupper(unsigned char c) +{ + if (islower(c)) + c -= 'a'-'A'; + return c; +} + +#define tolower(c) __tolower(c) +#define toupper(c) __toupper(c) + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/err.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/err.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,31 @@ +#ifndef _ERR_H +#define _ERR_H + +#include <errno.h> + +/* + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a dentry + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + */ +#define IS_ERR_VALUE(x) ((x) > (unsigned long)-1000L) + +static inline void *ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +static inline long IS_ERR(const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +#endif /* _LINUX_ERR_H */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/errno-base.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/errno-base.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,39 @@ +#ifndef _ERRNO_BASE_H +#define _ERRNO_BASE_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Argument list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/errno.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/errno.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,109 @@ +#ifndef _ERRNO_H +#define _ERRNO_H + +#include <errno-base.h> + +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ +#define ECANCELED 125 /* Operation Canceled */ +#define ENOKEY 126 /* Required key not available */ +#define EKEYEXPIRED 127 /* Key has expired */ +#define EKEYREVOKED 128 /* Key has been revoked */ +#define EKEYREJECTED 129 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/fcntl.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/fcntl.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,89 @@ +#ifndef _I386_FCNTL_H +#define _I386_FCNTL_H + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files + located on an ext2 file system */ +#define O_ACCMODE 0003 +#define O_RDONLY 00 +#define O_WRONLY 01 +#define O_RDWR 02 +#define O_CREAT 0100 /* not fcntl */ +#define O_EXCL 0200 /* not fcntl */ +#define O_NOCTTY 0400 /* not fcntl */ +#define O_TRUNC 01000 /* not fcntl */ +#define O_APPEND 02000 +#define O_NONBLOCK 04000 +#define O_NDELAY O_NONBLOCK +#define O_SYNC 010000 +#define FASYNC 020000 /* fcntl, for BSD compatibility */ +#define O_DIRECT 040000 /* direct disk access hint */ +#define O_LARGEFILE 0100000 +#define O_DIRECTORY 0200000 /* must be a directory */ +#define O_NOFOLLOW 0400000 /* don't follow links */ +#define O_NOATIME 01000000 + +#define F_DUPFD 0 /* dup */ +#define F_GETFD 1 /* get close_on_exec */ +#define F_SETFD 2 /* set/clear close_on_exec */ +#define F_GETFL 3 /* get file->f_flags */ +#define F_SETFL 4 /* set file->f_flags */ +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 + +#define F_SETOWN 8 /* for sockets. */ +#define F_GETOWN 9 /* for sockets. */ +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* for F_[GET|SET]FL */ +#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 0 +#define F_WRLCK 1 +#define F_UNLCK 2 + +/* for old implementation of bsd flock () */ +#define F_EXLCK 4 /* or 3 */ +#define F_SHLCK 8 /* or 4 */ + +/* for leases */ +#define F_INPROGRESS 16 + +/* operations for bsd flock(), also used by the kernel implementation */ +#define LOCK_SH 1 /* shared lock */ +#define LOCK_EX 2 /* exclusive lock */ +#define LOCK_NB 4 /* or'd with one of the above to prevent + blocking */ +#define LOCK_UN 8 /* remove lock */ + +#define LOCK_MAND 32 /* This is a mandatory flock */ +#define LOCK_READ 64 /* ... Which allows concurrent read operations */ +#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ +#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ + +/* +struct flock { + short l_type; + short l_whence; + off_t l_start; + off_t l_len; + pid_t l_pid; +}; + +struct flock64 { + short l_type; + short l_whence; + loff_t l_start; + loff_t l_len; + pid_t l_pid; +}; + +#define F_LINUX_SPECIFIC_BASE 1024 +*/ +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/list.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/list.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,184 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_add(struct list_head * new, + struct list_head * prev, + struct list_head * next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static __inline__ void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static __inline__ void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_del(struct list_head * prev, + struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is in an undefined state. + */ +static __inline__ void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static __inline__ void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static __inline__ int list_empty(struct list_head *head) +{ + return head->next == head; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static __inline__ void list_splice(struct list_head *list, struct list_head *head) +{ + struct list_head *first = list->next; + + if (first != list) { + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +#endif /* _LINUX_LIST_H */ + diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/sched.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/sched.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,38 @@ +#ifndef __SCHED_H__ +#define __SCHED_H__ + +#include <list.h> + +struct thread +{ + char *name; + char *stack; + unsigned long eps; + unsigned long eip; + struct list_head thread_list; + u32 flags; +}; + + + +void init_sched(void); +void run_idle_thread(void); +struct thread* create_thread(char *name, void (*function)(void *), void *data); +void schedule(void); + +static inline struct thread* get_current(void) +{ + struct thread **current; +#ifdef __i386__ + __asm__("andl %%esp,%0; ":"=r" (current) : "r" (~8191UL)); +#endif + return *current; +} + +#define current get_current() + + +void wake(struct thread *thread); +void block(struct thread *thread); + +#endif /* __SCHED_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/semaphore.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/semaphore.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,46 @@ +#ifndef _SEMAPHORE_H_ +#define _SEMAPHORE_H_ + +#include <wait.h> + +/* + * Implementation of semaphore in Mini-os is simple, because + * there are no preemptive threads, the atomicity is guaranteed. + */ + +struct semaphore +{ + int count; + struct wait_queue_head wait; +}; + + +#define __SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .count = n, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ +} + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name,1) + +#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) + +#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) + +#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) + +static void inline down(struct semaphore *sem) +{ + wait_event(sem->wait, sem->count > 0); + sem->count--; +} + +static void inline up(struct semaphore *sem) +{ + sem->count++; + wake_up(&sem->wait); +} + +#endif /* _SEMAPHORE_H */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/wait.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/wait.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,91 @@ +#ifndef __WAIT_H__ +#define __WAIT_H__ + +#include <sched.h> +#include <list.h> +#include <lib.h> +#include <os.h> + +struct wait_queue +{ + struct thread *thread; + struct list_head thread_list; +}; + +struct wait_queue_head +{ + /* TODO - lock required? */ + struct list_head thread_list; +}; + +#define DECLARE_WAIT_QUEUE_HEAD(name) \ + struct wait_queue_head name = \ + { .thread_list = { &(name).thread_list, &(name).thread_list} } + +#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ + .thread_list = { &(name).thread_list, &(name).thread_list } } + + +#define DEFINE_WAIT(name) \ +struct wait_queue name = { \ + .thread = current, \ + .thread_list = LIST_HEAD_INIT((name).thread_list), \ +} + + + +static inline void init_waitqueue_entry(struct wait_queue *q, struct thread *thread) +{ + q->thread = thread; +} + + +static inline void add_wait_queue(struct wait_queue_head *h, struct wait_queue *q) +{ + if (list_empty(&q->thread_list)) + list_add(&q->thread_list, &h->thread_list); +} + +static inline void remove_wait_queue(struct wait_queue *q) +{ + list_del(&q->thread_list); +} + +static inline void wake_up(struct wait_queue_head *head) +{ + struct list_head *tmp, *next; + list_for_each_safe(tmp, next, &head->thread_list) + { + struct wait_queue *curr; + curr = list_entry(tmp, struct wait_queue, thread_list); + wake(curr->thread); + } +} + +#define wait_event(wq, condition) do{ \ + unsigned long flags; \ + if(condition) \ + break; \ + DEFINE_WAIT(__wait); \ + for(;;) \ + { \ + /* protect the list */ \ + local_irq_save(flags); \ + add_wait_queue(&wq, &__wait); \ + block(current); \ + local_irq_restore(flags); \ + if(condition) \ + break; \ + schedule(); \ + } \ + local_irq_save(flags); \ + /* need to wake up */ \ + wake(current); \ + remove_wait_queue(&__wait); \ + local_irq_restore(flags); \ +} while(0) + + + + +#endif /* __WAIT_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/include/xenbus.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/include/xenbus.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,89 @@ +/****************************************************************************** + * xenbus.h + * + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_XEN_XENBUS_H +#define _ASM_XEN_XENBUS_H + + +/* Caller must hold this lock to call these functions: it's also held + * across watch callbacks. */ +// TODO +//extern struct semaphore xenbus_lock; + +char **xenbus_directory(const char *dir, const char *node, unsigned int *num); +void *xenbus_read(const char *dir, const char *node, unsigned int *len); +int xenbus_write(const char *dir, const char *node, + const char *string, int createflags); +int xenbus_mkdir(const char *dir, const char *node); +int xenbus_exists(const char *dir, const char *node); +int xenbus_rm(const char *dir, const char *node); +int xenbus_transaction_start(const char *subtree); +int xenbus_transaction_end(int abort); + +/* Single read and scanf: returns -errno or num scanned if > 0. */ +int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(scanf, 3, 4))); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(printf, 3, 4))); + +/* Generic read function: NULL-terminated triples of name, + * sprintf-style type string, and pointer. Returns 0 or errno.*/ +int xenbus_gather(const char *dir, ...); + +/* Register callback to watch this node. */ +struct xenbus_watch +{ + struct list_head list; + char *node; + void (*callback)(struct xenbus_watch *, const char *node); +}; + +int register_xenbus_watch(struct xenbus_watch *watch); +void unregister_xenbus_watch(struct xenbus_watch *watch); +void reregister_xenbus_watches(void); + +/* Called from xen core code. */ +void xenbus_suspend(void); +void xenbus_resume(void); + +#define XENBUS_IS_ERR_READ(str) ({ \ + if (!IS_ERR(str) && strlen(str) == 0) { \ + kfree(str); \ + str = ERR_PTR(-ERANGE); \ + } \ + IS_ERR(str); \ +}) + +#define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE) + +int xs_init(void); + +#endif /* _ASM_XEN_XENBUS_H */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/sched.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/sched.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,300 @@ +/* + **************************************************************************** + * (C) 2005 - Grzegorz Milos - Intel Research Cambridge + **************************************************************************** + * + * File: sched.c + * Author: Grzegorz Milos + * Changes: + * + * Date: Aug 2005 + * + * Environment: Xen Minimal OS + * Description: simple scheduler for Mini-Os + * + * The scheduler is non-preemptive (cooperative), and schedules according + * to Round Robin algorithm. + * + **************************************************************************** + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <os.h> +#include <hypervisor.h> +#include <time.h> +#include <mm.h> +#include <types.h> +#include <lib.h> +#include <xmalloc.h> +#include <list.h> +#include <sched.h> +#include <semaphore.h> + +#ifdef SCHED_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=sched.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + + +#define RUNNABLE_FLAG 0x00000001 + +#define is_runnable(_thread) (_thread->flags & RUNNABLE_FLAG) +#define set_runnable(_thread) (_thread->flags |= RUNNABLE_FLAG) +#define clear_runnable(_thread) (_thread->flags &= ~RUNNABLE_FLAG) + + +struct thread *idle_thread; +LIST_HEAD(exited_threads); + +void dump_stack(struct thread *thread) +{ + unsigned long *bottom = (unsigned long *)thread->stack + 2048; + unsigned long *pointer = (unsigned long *)thread->eps; + int count; + printk("The stack for \"%s\"\n", thread->name); + for(count = 0; count < 15 && pointer < bottom; count ++) + { + printk("[0x%lx] 0x%lx\n", pointer, *pointer); + pointer++; + } + + if(pointer < bottom) printk("Not the whole stack printed\n"); +} + +#ifdef __i386__ +#define switch_threads(prev, next) do { \ + unsigned long esi,edi; \ + __asm__ __volatile__("pushfl\n\t" \ + "pushl %%ebp\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "movl %4,%%esp\n\t" /* restore ESP */ \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %5\n\t" /* restore EIP */ \ + "ret\n\t" \ + "1:\t" \ + "popl %%ebp\n\t" \ + "popfl" \ + :"=m" (prev->eps),"=m" (prev->eip), \ + "=S" (esi),"=D" (edi) \ + :"m" (next->eps),"m" (next->eip), \ + "2" (prev), "d" (next)); \ +} while (0) +#elif __x86_64__ +/* FIXME */ +#endif + +void inline print_runqueue(void) +{ + struct list_head *it; + struct thread *th; + list_for_each(it, &idle_thread->thread_list) + { + th = list_entry(it, struct thread, thread_list); + printk(" Thread \"%s\", runnable=%d\n", th->name, is_runnable(th)); + } + printk("\n"); +} + + +void schedule(void) +{ + struct thread *prev, *next, *thread; + struct list_head *iterator; + unsigned long flags; + prev = current; + local_irq_save(flags); + list_for_each(iterator, &exited_threads) + { + thread = list_entry(iterator, struct thread, thread_list); + if(thread != prev) + { + list_del(&thread->thread_list); + free_pages(thread->stack, 1); + xfree(thread); + } + } + next = idle_thread; + /* Thread list needs to be protected */ + list_for_each(iterator, &idle_thread->thread_list) + { + thread = list_entry(iterator, struct thread, thread_list); + if(is_runnable(thread)) + { + next = thread; + /* Put this thread on the end of the list */ + list_del(&thread->thread_list); + list_add_tail(&thread->thread_list, &idle_thread->thread_list); + break; + } + } + local_irq_restore(flags); + /* Interrupting the switch is equivalent to having the next thread + inturrupted at the return instruction. And therefore at safe point. */ +/* The thread switching only works for i386 at the moment */ +#ifdef __i386__ + if(prev != next) switch_threads(prev, next); +#endif +} + + + +void exit_thread(struct thread *thread) +{ + unsigned long flags; + printk("Thread \"%s\" exited.\n", thread->name); + local_irq_save(flags); + /* Remove from the thread list */ + list_del(&thread->thread_list); + clear_runnable(thread); + /* Put onto exited list */ + list_add(&thread->thread_list, &exited_threads); + local_irq_restore(flags); + /* Schedule will free the resources */ + schedule(); +} + + +struct thread* create_thread(char *name, void (*function)(void *), void *data) +{ + struct thread *thread; + unsigned long flags; + + thread = xmalloc(struct thread); + /* Allocate 2 pages for stack, stack will be 2pages aligned */ + thread->stack = (char *)alloc_pages(1); + thread->name = name; + printk("Thread \"%s\": pointer: 0x%lx, stack: 0x%lx\n", name, thread, + thread->stack); + + thread->eps = (unsigned long)thread->stack + 4096 * 2 - 4; + /* Save pointer to the thread on the stack, used by current macro */ + *((unsigned long *)thread->stack) = (unsigned long)thread; + *((unsigned long *)thread->eps) = (unsigned long)thread; + thread->eps -= 4; + *((unsigned long *)thread->eps) = (unsigned long)data; + + /* No return address */ + thread->eps -= 4; + *((unsigned long *)thread->eps) = (unsigned long)exit_thread; + + thread->eip = (unsigned long)function; + + /* Not runable, not exited */ + thread->flags = 0; + set_runnable(thread); + + local_irq_save(flags); + if(idle_thread != NULL) + list_add_tail(&thread->thread_list, &idle_thread->thread_list); + local_irq_restore(flags); + + return thread; +} + + +void block(struct thread *thread) +{ + clear_runnable(thread); +} + +void wake(struct thread *thread) +{ + set_runnable(thread); +} + +void idle_thread_fn(void *unused) +{ + for(;;) + { + schedule(); + printk("Blocking the domain\n"); + block_domain(10000); + } +} + +void run_idle_thread(void) +{ + /* Switch stacks and run the thread */ + __asm__ __volatile__("mov %0,%%esp\n\t" + "push %1\n\t" + "ret" + :"=m" (idle_thread->eps) + :"m" (idle_thread->eip)); +} + + + +DECLARE_MUTEX(mutex); + +void th_f1(void *data) +{ + struct timeval tv1, tv2; + + for(;;) + { + down(&mutex); + printk("Thread \"%s\" got semaphore, runnable %d\n", current->name, is_runnable(current)); + schedule(); + printk("Thread \"%s\" releases the semaphore\n", current->name); + up(&mutex); + + + gettimeofday(&tv1); + for(;;) + { + gettimeofday(&tv2); + if(tv2.tv_sec - tv1.tv_sec > 2) break; + } + + + schedule(); + } +} + +void th_f2(void *data) +{ + for(;;) + { + printk("Thread OTHER executing, data 0x%lx\n", data); + schedule(); + } +} + + + +void init_sched(void) +{ + printk("Initialising scheduler\n"); + + idle_thread = create_thread("Idle", idle_thread_fn, NULL); + INIT_LIST_HEAD(&idle_thread->thread_list); + + +/* create_thread("1", th_f1, (void *)0x1234); + create_thread("2", th_f1, (void *)0x1234); + create_thread("3", th_f1, (void *)0x1234); + create_thread("4", th_f1, (void *)0x1234); + create_thread("5", th_f1, (void *)0x1234); + create_thread("6", th_f1, (void *)0x1234); + create_thread("second", th_f2, NULL); +*/ +} + diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/xenbus/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/xenbus/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,9 @@ +all: xenstore.h xenbus_comms.o xenbus_xs.o xenbus_probe.o + +xenstore.h: + [ -e xenstored.h ] || ln -sf ../../../tools/xenstore/xenstored.h xenstored.h + +clean: + #Taken care of by main Makefile + #rm xenstored.h + #rm *.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/xenbus/xenbus_comms.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/xenbus/xenbus_comms.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,231 @@ +/****************************************************************************** + * xenbus_comms.c + * + * Low level code to talks to Xen Store: ringbuffer and event channel. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include <types.h> +#include <wait.h> +#include <mm.h> +#include <hypervisor.h> +#include <events.h> +#include <os.h> +#include <lib.h> + + +#ifdef XENBUS_COMMS_DEBUG +#define DEBUG(_f, _a...) \ + printk("MINI_OS(file=xenbus_comms.c, line=%d) " _f "\n", __LINE__, ## _a) +#else +#define DEBUG(_f, _a...) ((void)0) +#endif + + +#define RINGBUF_DATASIZE ((PAGE_SIZE / 2) - sizeof(struct ringbuf_head)) +struct ringbuf_head +{ + u32 write; /* Next place to write to */ + u32 read; /* Next place to read from */ + u8 flags; + char buf[0]; +} __attribute__((packed)); + +DECLARE_WAIT_QUEUE_HEAD(xb_waitq); + +static inline struct ringbuf_head *outbuf(void) +{ + return mfn_to_virt(start_info.store_mfn); +} + +static inline struct ringbuf_head *inbuf(void) +{ + return (struct ringbuf_head *)((char *)mfn_to_virt(start_info.store_mfn) + PAGE_SIZE/2); +} + +static void wake_waiting(int port, struct pt_regs *regs) +{ + wake_up(&xb_waitq); +} + +static int check_buffer(const struct ringbuf_head *h) +{ + return (h->write < RINGBUF_DATASIZE && h->read < RINGBUF_DATASIZE); +} + +/* We can't fill last byte: would look like empty buffer. */ +static void *get_output_chunk(const struct ringbuf_head *h, + void *buf, u32 *len) +{ + u32 read_mark; + + if (h->read == 0) + read_mark = RINGBUF_DATASIZE - 1; + else + read_mark = h->read - 1; + + /* Here to the end of buffer, unless they haven't read some out. */ + *len = RINGBUF_DATASIZE - h->write; + if (read_mark >= h->write) + *len = read_mark - h->write; + return (void *)((char *)buf + h->write); +} + +static const void *get_input_chunk(const struct ringbuf_head *h, + const void *buf, u32 *len) +{ + /* Here to the end of buffer, unless they haven't written some. */ + *len = RINGBUF_DATASIZE - h->read; + if (h->write >= h->read) + *len = h->write - h->read; + return (void *)((char *)buf + h->read); +} + +static void update_output_chunk(struct ringbuf_head *h, u32 len) +{ + h->write += len; + if (h->write == RINGBUF_DATASIZE) + h->write = 0; +} + +static void update_input_chunk(struct ringbuf_head *h, u32 len) +{ + h->read += len; + if (h->read == RINGBUF_DATASIZE) + h->read = 0; +} + +static int output_avail(struct ringbuf_head *out) +{ + unsigned int avail; + + get_output_chunk(out, out->buf, &avail); + return avail != 0; +} + +int xb_write(const void *data, unsigned len) +{ + struct ringbuf_head h; + struct ringbuf_head *out = outbuf(); + + do { + void *dst; + unsigned int avail; + + wait_event(xb_waitq, output_avail(out)); + + /* Read, then check: not that we don't trust store. + * Hell, some of my best friends are daemons. But, + * in this post-911 world... */ + h = *out; + mb(); + if (!check_buffer(&h)) { + return -1; /* ETERRORIST! */ + } + + dst = get_output_chunk(&h, out->buf, &avail); + if (avail > len) + avail = len; + memcpy(dst, data, avail); + data = (void *)((char *)data + avail); + len -= avail; + update_output_chunk(out, avail); + notify_via_evtchn(start_info.store_evtchn); + } while (len != 0); + + return 0; +} + +int xs_input_avail(void) +{ + unsigned int avail; + struct ringbuf_head *in = inbuf(); + + get_input_chunk(in, in->buf, &avail); + return avail != 0; +} + +int xb_read(void *data, unsigned len) +{ + struct ringbuf_head h; + struct ringbuf_head *in = inbuf(); + int was_full; + + while (len != 0) { + unsigned int avail; + const char *src; + + wait_event(xb_waitq, xs_input_avail()); + h = *in; + mb(); + if (!check_buffer(&h)) { + return -1; + } + + src = get_input_chunk(&h, in->buf, &avail); + if (avail > len) + avail = len; + was_full = !output_avail(&h); + + memcpy(data, src, avail); + data = (void *)((char *)data + avail); + len -= avail; + update_input_chunk(in, avail); + DEBUG("Finished read of %i bytes (%i to go)\n", avail, len); + /* If it was full, tell them we've taken some. */ + if (was_full) + notify_via_evtchn(start_info.store_evtchn); + } + + /* If we left something, wake watch thread to deal with it. */ + if (xs_input_avail()) + wake_up(&xb_waitq); + + return 0; +} + +/* Set up interrupt handler off store event channel. */ +int xb_init_comms(void) +{ + printk("Init xenbus comms, store event channel %d\n", start_info.store_evtchn); + if (!start_info.store_evtchn) + return 0; + printk("Binding virq\n"); + bind_evtchn(start_info.store_evtchn, &wake_waiting); + + /* FIXME zero out page -- domain builder should probably do this*/ + memset(mfn_to_virt(start_info.store_mfn), 0, PAGE_SIZE); + notify_via_evtchn(start_info.store_evtchn); + return 0; +} + +void xb_suspend_comms(void) +{ + + if (!start_info.store_evtchn) + return; + + // TODO + //unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/xenbus/xenbus_comms.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/xenbus/xenbus_comms.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,40 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_COMMS_H +#define _XENBUS_COMMS_H + +int xb_init_comms(void); +void xb_suspend_comms(void); + +/* Low level routines. */ +int xb_write(const void *data, unsigned len); +int xb_read(void *data, unsigned len); +int xs_input_avail(void); +extern struct wait_queue_head xb_waitq; + +#endif /* _XENBUS_COMMS_H */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 extras/mini-os/xenbus/xenbus_xs.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/extras/mini-os/xenbus/xenbus_xs.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,554 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include <errno.h> +#include <types.h> +#include <list.h> +#include <lib.h> +#include <err.h> +#include <os.h> +#include <xmalloc.h> +#include <fcntl.h> +#include <xenbus.h> +#include <wait.h> +#include <sched.h> +#include <semaphore.h> +#include "xenstored.h" +#include "xenbus_comms.h" + +#define streq(a, b) (strcmp((a), (b)) == 0) + +static char printf_buffer[4096]; +static LIST_HEAD(watches); +//TODO +DECLARE_MUTEX(xenbus_lock); + +static int get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; !streq(errorstring, xsd_errors[i].errstring); i++) { + if (i == ARRAY_SIZE(xsd_errors) - 1) { + printk("XENBUS xen store gave: unknown error %s", + errorstring); + return EINVAL; + } + } + return xsd_errors[i].errnum; +} + +static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret; + int err; + + err = xb_read(&msg, sizeof(msg)); + if (err) + return ERR_PTR(err); + + ret = xmalloc_array(char, msg.len + 1); + if (!ret) + return ERR_PTR(-ENOMEM); + + err = xb_read(ret, msg.len); + if (err) { + xfree(ret); + return ERR_PTR(err); + } + ((char*)ret)[msg.len] = '\0'; + + *type = msg.type; + if (len) + *len = msg.len; + return ret; +} + +/* Emergency write. */ +void xenbus_debug_write(const char *str, unsigned int count) +{ + struct xsd_sockmsg msg; + + msg.type = XS_DEBUG; + msg.len = sizeof("print") + count + 1; + + xb_write(&msg, sizeof(msg)); + xb_write("print", sizeof("print")); + xb_write(str, count); + xb_write("", 1); +} + +/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ +static void *xs_talkv(enum xsd_sockmsg_type type, + const struct kvec *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int err; + + //WARN_ON(down_trylock(&xenbus_lock) == 0); + + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + err = xb_write(&msg, sizeof(msg)); + if (err) + return ERR_PTR(err); + + for (i = 0; i < num_vecs; i++) { + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; + if (err) + return ERR_PTR(err); + } + + /* Watches can have fired before reply comes: daemon detects + * and re-transmits, so we can ignore this. */ + do { + xfree(ret); + ret = read_reply(&msg.type, len); + if (IS_ERR(ret)) + return ret; + } while (msg.type == XS_WATCH_EVENT); + + if (msg.type == XS_ERROR) { + err = get_error(ret); + xfree(ret); + return ERR_PTR(-err); + } + + //BUG_ON(msg.type != type); + return ret; +} + +/* Simplified version of xs_talkv: single message. */ +static void *xs_single(enum xsd_sockmsg_type type, + const char *string, unsigned int *len) +{ + struct kvec iovec; + + iovec.iov_base = (void *)string; + iovec.iov_len = strlen(string) + 1; + return xs_talkv(type, &iovec, 1, len); +} + +/* Many commands only need an ack, don't care what it says. */ +static int xs_error(char *reply) +{ + if (IS_ERR(reply)) + return PTR_ERR(reply); + xfree(reply); + return 0; +} + +static unsigned int count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. */ +static char *join(const char *dir, const char *name) +{ + static char buffer[4096]; + + //BUG_ON(down_trylock(&xenbus_lock) == 0); + /* XXX FIXME: might not be correct if name == "" */ + //BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer)); + + strcpy(buffer, dir); + if (!streq(name, "")) { + strcat(buffer, "/"); + strcat(buffer, name); + } + return buffer; +} + +char **xenbus_directory(const char *dir, const char *node, unsigned int *num) +{ + char *strings, *p, **ret; + unsigned int len; + + strings = xs_single(XS_DIRECTORY, join(dir, node), &len); + if (IS_ERR(strings)) + return (char **)strings; + + /* Count the strings. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = (char **)xmalloc_array(char, *num * sizeof(char *) + len); + if (!ret) { + xfree(strings); + return ERR_PTR(-ENOMEM); + } + memcpy(&ret[*num], strings, len); + xfree(strings); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + return ret; +} + +/* Check if a path exists. Return 1 if it does. */ +int xenbus_exists(const char *dir, const char *node) +{ + char **d; + int dir_n; + + d = xenbus_directory(dir, node, &dir_n); + if (IS_ERR(d)) + return 0; + xfree(d); + return 1; +} + +/* Get the value of a single file. + * Returns a kmalloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xenbus_read(const char *dir, const char *node, unsigned int *len) +{ + return xs_single(XS_READ, join(dir, node), len); +} + +/* Write the value of a single file. + * Returns -err on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL. + */ +int xenbus_write(const char *dir, const char *node, + const char *string, int createflags) +{ + const char *flags, *path; + struct kvec iovec[3]; + + path = join(dir, node); + /* Format: Flags (as string), path, data. */ + if (createflags == 0) + flags = XS_WRITE_NONE; + else if (createflags == O_CREAT) + flags = XS_WRITE_CREATE; + else if (createflags == (O_CREAT|O_EXCL)) + flags = XS_WRITE_CREATE_EXCL; + else + return -EINVAL; + + iovec[0].iov_base = (void *)path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = (void *)flags; + iovec[1].iov_len = strlen(flags) + 1; + iovec[2].iov_base = (void *)string; + iovec[2].iov_len = strlen(string); + + return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); +} + +/* Create a new directory. */ +int xenbus_mkdir(const char *dir, const char *node) +{ + return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL)); +} + +/* Destroy a file or directory (directories must be empty). */ +int xenbus_rm(const char *dir, const char *node) +{ + return xs_error(xs_single(XS_RM, join(dir, node), NULL)); +} + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + * Transaction only applies to the given subtree. + * You can only have one transaction at any time. + */ +int xenbus_transaction_start(const char *subtree) +{ + return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL)); +} + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + */ +int xenbus_transaction_end(int abort) +{ + char abortstr[2]; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL)); +} + +/* Single read and scanf: returns -errno or num scanned. */ +int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *val; + + val = xenbus_read(dir, node, NULL); + if (IS_ERR(val)) + return PTR_ERR(val); + + va_start(ap, fmt); + ret = vsscanf(val, fmt, ap); + va_end(ap); + xfree(val); + /* Distinctive errno. */ + if (ret == 0) + return -ERANGE; + return ret; +} + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + + //BUG_ON(down_trylock(&xenbus_lock) == 0); + va_start(ap, fmt); + ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap); + va_end(ap); + + //BUG_ON(ret > sizeof(printf_buffer)-1); + return xenbus_write(dir, node, printf_buffer, O_CREAT); +} + + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xenbus_gather(const char *dir, ...) +{ + va_list ap; + const char *name; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + p = xenbus_read(dir, name, NULL); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = -EINVAL; + xfree(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} + +static int xs_watch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (void *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL)); +} + +static char *xs_read_watch(char **token) +{ + enum xsd_sockmsg_type type; + char *ret; + + ret = read_reply(&type, NULL); + if (IS_ERR(ret)) + return ret; + + //BUG_ON(type != XS_WATCH_EVENT); + *token = ret + strlen(ret) + 1; + return ret; +} + +static int xs_acknowledge_watch(const char *token) +{ + return xs_error(xs_single(XS_WATCH_ACK, token, NULL)); +} + +static int xs_unwatch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (char *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (char *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XS_UNWATCH, iov, ARRAY_SIZE(iov), NULL)); +} + +/* A little paranoia: we don't just trust token. */ +static struct xenbus_watch *find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)simple_strtoul(token, NULL, 16); + + list_for_each_entry(i, &watches, list) + if (i == cmp) + return i; + return NULL; +} + +/* Register callback to watch this node. */ +int register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + //BUG_ON(find_watch(token)); +printk("Registered watch for: %s\n", token); + err = xs_watch(watch->node, token); + if (!err) + list_add(&watch->list, &watches); + return err; +} + +void unregister_xenbus_watch(struct xenbus_watch *watch) +{ + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + //BUG_ON(!find_watch(token)); + + err = xs_unwatch(watch->node, token); + list_del(&watch->list); + + if (err) + printk("XENBUS Failed to release watch %s: %i\n", + watch->node, err); +} + +/* Re-register callbacks to all watches. */ +void reregister_xenbus_watches(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + list_for_each_entry(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } +} + +void watch_thread(void *unused) +{ + for (;;) { + char *token; + char *node = NULL; + + wait_event(xb_waitq, xs_input_avail()); + + /* If this is a spurious wakeup caused by someone + * doing an op, they'll hold the lock and the buffer + * will be empty by the time we get there. + */ + down(&xenbus_lock); + if (xs_input_avail()) + node = xs_read_watch(&token); + + if (node && !IS_ERR(node)) { + struct xenbus_watch *w; + int err; + + err = xs_acknowledge_watch(token); + if (err) + printk("XENBUS ack %s fail %i\n", node, err); + w = find_watch(token); + //BUG_ON(!w); + w->callback(w, node); + xfree(node); + } else + printk("XENBUS xs_read_watch: %li\n", PTR_ERR(node)); + up(&xenbus_lock); + } +} + + +static void ballon_changed(struct xenbus_watch *watch, const char *node) +{ + unsigned long new_target; + int err; + err = xenbus_scanf("memory", "target", "%lu", &new_target); + + if(err != 1) + { + printk("Unable to read memory/target\n"); + return; + } + + printk("Memory target changed to: %ld bytes, ignoring.\n", new_target); +} + + +static struct xenbus_watch ballon_watch = { + .node = "memory/target", + .callback = ballon_changed, +}; + + + +int xs_init(void) +{ + int err; + struct thread *watcher; + printk("xb_init_comms\n"); + err = xb_init_comms(); + if (err) + return err; + + watcher = create_thread("kxwatch", watch_thread, NULL); + down(&xenbus_lock); + register_xenbus_watch(&ballon_watch); + up(&xenbus_lock); + return 0; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/Kconfig.domU --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/Kconfig.domU Fri Sep 9 16:30:54 2005 @@ -0,0 +1,30 @@ +# +# TPM device configuration +# + +menu "TPM devices" + +config TCG_TPM + tristate "TPM Support for XEN" + depends on ARCH_XEN && !XEN_PHYSDEV_ACCESS + ---help--- + If you want to make TPM security available in your system, + say Yes and it will be accessible from within a user domain. For + more information see <http://www.trustedcomputinggroup.org>. + An implementation of the Trusted Software Stack (TSS), the + userspace enablement piece of the specification, can be + obtained at: <http://sourceforge.net/projects/trousers>. To + compile this driver as a module, choose M here; the module + will be called tpm. If unsure, say N. + +config TCG_XEN + tristate "XEN TPM Interface" + depends on TCG_TPM && ARCH_XEN && XEN_TPMDEV_FRONTEND + ---help--- + If you want to make TPM support available to a Xen + user domain, say Yes and it will + be accessible from within Linux. To compile this driver + as a module, choose M here; the module will be called + tpm_xen. + +endmenu diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,12 @@ +# +# Makefile for the kernel tpm device drivers. +# +ifeq ($(CONFIG_XEN_PHYSDEV_ACCESS),y) +obj-$(CONFIG_TCG_TPM) += tpm.o +obj-$(CONFIG_TCG_NSC) += tpm_nsc.o +obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o +obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o +else +obj-$(CONFIG_TCG_TPM) += tpm_nopci.o +obj-$(CONFIG_TCG_XEN) += tpm_xen.o +endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/tpm.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,627 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * Note, the TPM chip is not interrupt driven (only polling) + * and can have very long timeouts (minutes!). Hence the unusual + * calls to schedule_timeout. + * + */ + +#include <linux/sched.h> +#include <linux/poll.h> +#include <linux/spinlock.h> +#include "tpm.h" + +#define TPM_MINOR 224 /* officially assigned */ + +#define TPM_BUFSIZE 2048 + +static LIST_HEAD(tpm_chip_list); +static DEFINE_SPINLOCK(driver_lock); +static int dev_mask[32]; + +static void user_reader_timeout(unsigned long ptr) +{ + struct tpm_chip *chip = (struct tpm_chip *) ptr; + + down(&chip->buffer_mutex); + atomic_set(&chip->data_pending, 0); + memset(chip->data_buffer, 0, TPM_BUFSIZE); + up(&chip->buffer_mutex); +} + +void tpm_time_expired(unsigned long ptr) +{ + int *exp = (int *) ptr; + *exp = 1; +} + +EXPORT_SYMBOL_GPL(tpm_time_expired); + +/* + * Internal kernel interface to transmit TPM commands + */ +static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, + size_t bufsiz) +{ + ssize_t len; + u32 count; + __be32 *native_size; + + native_size = (__force __be32 *) (buf + 2); + count = be32_to_cpu(*native_size); + + if (count == 0) + return -ENODATA; + if (count > bufsiz) { + dev_err(&chip->pci_dev->dev, + "invalid count value %x %zx \n", count, bufsiz); + return -E2BIG; + } + + down(&chip->tpm_mutex); + + if ((len = chip->vendor->send(chip, (u8 *) buf, count)) < 0) { + dev_err(&chip->pci_dev->dev, + "tpm_transmit: tpm_send: error %zd\n", len); + return len; + } + + down(&chip->timer_manipulation_mutex); + chip->time_expired = 0; + init_timer(&chip->device_timer); + chip->device_timer.function = tpm_time_expired; + chip->device_timer.expires = jiffies + 2 * 60 * HZ; + chip->device_timer.data = (unsigned long) &chip->time_expired; + add_timer(&chip->device_timer); + up(&chip->timer_manipulation_mutex); + + do { + u8 status = inb(chip->vendor->base + 1); + if ((status & chip->vendor->req_complete_mask) == + chip->vendor->req_complete_val) { + down(&chip->timer_manipulation_mutex); + del_singleshot_timer_sync(&chip->device_timer); + up(&chip->timer_manipulation_mutex); + goto out_recv; + } + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(TPM_TIMEOUT); + rmb(); + } while (!chip->time_expired); + + + chip->vendor->cancel(chip); + dev_err(&chip->pci_dev->dev, "Time expired\n"); + up(&chip->tpm_mutex); + return -EIO; + +out_recv: + len = chip->vendor->recv(chip, (u8 *) buf, bufsiz); + if (len < 0) + dev_err(&chip->pci_dev->dev, + "tpm_transmit: tpm_recv: error %zd\n", len); + up(&chip->tpm_mutex); + return len; +} + +#define TPM_DIGEST_SIZE 20 +#define CAP_PCR_RESULT_SIZE 18 +static u8 cap_pcr[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 22, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 5, + 0, 0, 0, 4, + 0, 0, 1, 1 +}; + +#define READ_PCR_RESULT_SIZE 30 +static u8 pcrread[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 14, /* length */ + 0, 0, 0, 21, /* TPM_ORD_PcrRead */ + 0, 0, 0, 0 /* PCR index */ +}; + +static ssize_t show_pcrs(struct device *dev, char *buf) +{ + u8 data[READ_PCR_RESULT_SIZE]; + ssize_t len; + int i, j, index, num_pcrs; + char *str = buf; + + struct tpm_chip *chip = + pci_get_drvdata(container_of(dev, struct pci_dev, dev)); + if (chip == NULL) + return -ENODEV; + + memcpy(data, cap_pcr, sizeof(cap_pcr)); + if ((len = tpm_transmit(chip, data, sizeof(data))) + < CAP_PCR_RESULT_SIZE) + return len; + + num_pcrs = be32_to_cpu(*((__force __be32 *) (data + 14))); + + for (i = 0; i < num_pcrs; i++) { + memcpy(data, pcrread, sizeof(pcrread)); + index = cpu_to_be32(i); + memcpy(data + 10, &index, 4); + if ((len = tpm_transmit(chip, data, sizeof(data))) + < READ_PCR_RESULT_SIZE) + return len; + str += sprintf(str, "PCR-%02d: ", i); + for (j = 0; j < TPM_DIGEST_SIZE; j++) + str += sprintf(str, "%02X ", *(data + 10 + j)); + str += sprintf(str, "\n"); + } + return str - buf; +} + +static DEVICE_ATTR(pcrs, S_IRUGO, show_pcrs, NULL); + +#define READ_PUBEK_RESULT_SIZE 314 +static u8 readpubek[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 30, /* length */ + 0, 0, 0, 124, /* TPM_ORD_ReadPubek */ +}; + +static ssize_t show_pubek(struct device *dev, char *buf) +{ + u8 data[READ_PUBEK_RESULT_SIZE]; + ssize_t len; + __be32 *native_val; + int i; + char *str = buf; + + struct tpm_chip *chip = + pci_get_drvdata(container_of(dev, struct pci_dev, dev)); + if (chip == NULL) + return -ENODEV; + + memcpy(data, readpubek, sizeof(readpubek)); + memset(data + sizeof(readpubek), 0, 20); /* zero nonce */ + + if ((len = tpm_transmit(chip, data, sizeof(data))) < + READ_PUBEK_RESULT_SIZE) + return len; + + /* + ignore header 10 bytes + algorithm 32 bits (1 == RSA ) + encscheme 16 bits + sigscheme 16 bits + parameters (RSA 12->bytes: keybit, #primes, expbit) + keylenbytes 32 bits + 256 byte modulus + ignore checksum 20 bytes + */ + + native_val = (__force __be32 *) (data + 34); + + str += + sprintf(str, + "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n" + "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X" + " %02X %02X %02X %02X %02X %02X %02X %02X\n" + "Modulus length: %d\nModulus: \n", + data[10], data[11], data[12], data[13], data[14], + data[15], data[16], data[17], data[22], data[23], + data[24], data[25], data[26], data[27], data[28], + data[29], data[30], data[31], data[32], data[33], + be32_to_cpu(*native_val) + ); + + for (i = 0; i < 256; i++) { + str += sprintf(str, "%02X ", data[i + 39]); + if ((i + 1) % 16 == 0) + str += sprintf(str, "\n"); + } + return str - buf; +} + +static DEVICE_ATTR(pubek, S_IRUGO, show_pubek, NULL); + +#define CAP_VER_RESULT_SIZE 18 +static u8 cap_version[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 18, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 6, + 0, 0, 0, 0 +}; + +#define CAP_MANUFACTURER_RESULT_SIZE 18 +static u8 cap_manufacturer[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 22, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 5, + 0, 0, 0, 4, + 0, 0, 1, 3 +}; + +static ssize_t show_caps(struct device *dev, char *buf) +{ + u8 data[READ_PUBEK_RESULT_SIZE]; + ssize_t len; + char *str = buf; + + struct tpm_chip *chip = + pci_get_drvdata(container_of(dev, struct pci_dev, dev)); + if (chip == NULL) + return -ENODEV; + + memcpy(data, cap_manufacturer, sizeof(cap_manufacturer)); + + if ((len = tpm_transmit(chip, data, sizeof(data))) < + CAP_MANUFACTURER_RESULT_SIZE) + return len; + + str += sprintf(str, "Manufacturer: 0x%x\n", + be32_to_cpu(*(data + 14))); + + memcpy(data, cap_version, sizeof(cap_version)); + + if ((len = tpm_transmit(chip, data, sizeof(data))) < + CAP_VER_RESULT_SIZE) + return len; + + str += + sprintf(str, "TCG version: %d.%d\nFirmware version: %d.%d\n", + (int) data[14], (int) data[15], (int) data[16], + (int) data[17]); + + return str - buf; +} + +static DEVICE_ATTR(caps, S_IRUGO, show_caps, NULL); + +/* + * Device file system interface to the TPM + */ +int tpm_open(struct inode *inode, struct file *file) +{ + int rc = 0, minor = iminor(inode); + struct tpm_chip *chip = NULL, *pos; + + spin_lock(&driver_lock); + + list_for_each_entry(pos, &tpm_chip_list, list) { + if (pos->vendor->miscdev.minor == minor) { + chip = pos; + break; + } + } + + if (chip == NULL) { + rc = -ENODEV; + goto err_out; + } + + if (chip->num_opens) { + dev_dbg(&chip->pci_dev->dev, + "Another process owns this TPM\n"); + rc = -EBUSY; + goto err_out; + } + + chip->num_opens++; + pci_dev_get(chip->pci_dev); + + spin_unlock(&driver_lock); + + chip->data_buffer = kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL); + if (chip->data_buffer == NULL) { + chip->num_opens--; + pci_dev_put(chip->pci_dev); + return -ENOMEM; + } + + atomic_set(&chip->data_pending, 0); + + file->private_data = chip; + return 0; + +err_out: + spin_unlock(&driver_lock); + return rc; +} + +EXPORT_SYMBOL_GPL(tpm_open); + +int tpm_release(struct inode *inode, struct file *file) +{ + struct tpm_chip *chip = file->private_data; + + file->private_data = NULL; + + spin_lock(&driver_lock); + chip->num_opens--; + spin_unlock(&driver_lock); + + down(&chip->timer_manipulation_mutex); + if (timer_pending(&chip->user_read_timer)) + del_singleshot_timer_sync(&chip->user_read_timer); + else if (timer_pending(&chip->device_timer)) + del_singleshot_timer_sync(&chip->device_timer); + up(&chip->timer_manipulation_mutex); + + kfree(chip->data_buffer); + atomic_set(&chip->data_pending, 0); + + pci_dev_put(chip->pci_dev); + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_release); + +ssize_t tpm_write(struct file * file, const char __user * buf, + size_t size, loff_t * off) +{ + struct tpm_chip *chip = file->private_data; + int in_size = size, out_size; + + /* cannot perform a write until the read has cleared + either via tpm_read or a user_read_timer timeout */ + while (atomic_read(&chip->data_pending) != 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(TPM_TIMEOUT); + } + + down(&chip->buffer_mutex); + + if (in_size > TPM_BUFSIZE) + in_size = TPM_BUFSIZE; + + if (copy_from_user + (chip->data_buffer, (void __user *) buf, in_size)) { + up(&chip->buffer_mutex); + return -EFAULT; + } + + /* atomic tpm command send and result receive */ + out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE); + + atomic_set(&chip->data_pending, out_size); + atomic_set(&chip->data_position, 0); + up(&chip->buffer_mutex); + + /* Set a timeout by which the reader must come claim the result */ + down(&chip->timer_manipulation_mutex); + init_timer(&chip->user_read_timer); + chip->user_read_timer.function = user_reader_timeout; + chip->user_read_timer.data = (unsigned long) chip; + chip->user_read_timer.expires = jiffies + (60 * HZ); + add_timer(&chip->user_read_timer); + up(&chip->timer_manipulation_mutex); + + return in_size; +} + +EXPORT_SYMBOL_GPL(tpm_write); + +ssize_t tpm_read(struct file * file, char __user * buf, + size_t size, loff_t * off) +{ + struct tpm_chip *chip = file->private_data; + int ret_size = -ENODATA; + int pos, pending = 0; + + down(&chip->buffer_mutex); + ret_size = atomic_read(&chip->data_pending); + if ( ret_size > 0 ) { /* Result available */ + if (size < ret_size) + ret_size = size; + + pos = atomic_read(&chip->data_position); + + if (copy_to_user((void __user *) buf, + &chip->data_buffer[pos], ret_size)) { + ret_size = -EFAULT; + } else { + pending = atomic_read(&chip->data_pending) - ret_size; + if ( pending ) { + atomic_set( &chip->data_pending, pending ); + atomic_set( &chip->data_position, pos+ret_size ); + } + } + } + up(&chip->buffer_mutex); + + if ( ret_size <= 0 || pending == 0 ) { + atomic_set( &chip->data_pending, 0 ); + down(&chip->timer_manipulation_mutex); + del_singleshot_timer_sync(&chip->user_read_timer); + up(&chip->timer_manipulation_mutex); + } + + return ret_size; +} + +EXPORT_SYMBOL_GPL(tpm_read); + +void __devexit tpm_remove(struct pci_dev *pci_dev) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + + if (chip == NULL) { + dev_err(&pci_dev->dev, "No device data found\n"); + return; + } + + spin_lock(&driver_lock); + + list_del(&chip->list); + + spin_unlock(&driver_lock); + + pci_set_drvdata(pci_dev, NULL); + misc_deregister(&chip->vendor->miscdev); + + device_remove_file(&pci_dev->dev, &dev_attr_pubek); + device_remove_file(&pci_dev->dev, &dev_attr_pcrs); + device_remove_file(&pci_dev->dev, &dev_attr_caps); + + pci_disable_device(pci_dev); + + dev_mask[chip->dev_num / 32] &= !(1 << (chip->dev_num % 32)); + + kfree(chip); + + pci_dev_put(pci_dev); +} + +EXPORT_SYMBOL_GPL(tpm_remove); + +static u8 savestate[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 10, /* blob length (in bytes) */ + 0, 0, 0, 152 /* TPM_ORD_SaveState */ +}; + +/* + * We are about to suspend. Save the TPM state + * so that it can be restored. + */ +int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + if (chip == NULL) + return -ENODEV; + + tpm_transmit(chip, savestate, sizeof(savestate)); + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_pm_suspend); + +/* + * Resume from a power safe. The BIOS already restored + * the TPM state. + */ +int tpm_pm_resume(struct pci_dev *pci_dev) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + + if (chip == NULL) + return -ENODEV; + + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_pm_resume); + +/* + * Called from tpm_<specific>.c probe function only for devices + * the driver has determined it should claim. Prior to calling + * this function the specific probe function has called pci_enable_device + * upon errant exit from this function specific probe function should call + * pci_disable_device + */ +int tpm_register_hardware(struct pci_dev *pci_dev, + struct tpm_vendor_specific *entry) +{ + char devname[7]; + struct tpm_chip *chip; + int i, j; + + /* Driver specific per-device data */ + chip = kmalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + memset(chip, 0, sizeof(struct tpm_chip)); + + init_MUTEX(&chip->buffer_mutex); + init_MUTEX(&chip->tpm_mutex); + init_MUTEX(&chip->timer_manipulation_mutex); + INIT_LIST_HEAD(&chip->list); + + chip->vendor = entry; + + chip->dev_num = -1; + + for (i = 0; i < 32; i++) + for (j = 0; j < 8; j++) + if ((dev_mask[i] & (1 << j)) == 0) { + chip->dev_num = i * 32 + j; + dev_mask[i] |= 1 << j; + goto dev_num_search_complete; + } + +dev_num_search_complete: + if (chip->dev_num < 0) { + dev_err(&pci_dev->dev, + "No available tpm device numbers\n"); + kfree(chip); + return -ENODEV; + } else if (chip->dev_num == 0) + chip->vendor->miscdev.minor = TPM_MINOR; + else + chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR; + + snprintf(devname, sizeof(devname), "%s%d", "tpm", chip->dev_num); + chip->vendor->miscdev.name = devname; + + chip->vendor->miscdev.dev = &(pci_dev->dev); + chip->pci_dev = pci_dev_get(pci_dev); + + if (misc_register(&chip->vendor->miscdev)) { + dev_err(&chip->pci_dev->dev, + "unable to misc_register %s, minor %d\n", + chip->vendor->miscdev.name, + chip->vendor->miscdev.minor); + pci_dev_put(pci_dev); + kfree(chip); + dev_mask[i] &= !(1 << j); + return -ENODEV; + } + + pci_set_drvdata(pci_dev, chip); + + list_add(&chip->list, &tpm_chip_list); + + device_create_file(&pci_dev->dev, &dev_attr_pubek); + device_create_file(&pci_dev->dev, &dev_attr_pcrs); + device_create_file(&pci_dev->dev, &dev_attr_caps); + + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_register_hardware); + +static int __init init_tpm(void) +{ + return 0; +} + +static void __exit cleanup_tpm(void) +{ + +} + +module_init(init_tpm); +module_exit(cleanup_tpm); + +MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/tpm.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ +#include <linux/module.h> +#include <linux/version.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> + +#define TPM_TIMEOUT msecs_to_jiffies(5) + +/* TPM addresses */ +#define TPM_ADDR 0x4E +#define TPM_DATA 0x4F + +struct tpm_chip; + +struct tpm_vendor_specific { + u8 req_complete_mask; + u8 req_complete_val; + u16 base; /* TPM base address */ + + int (*recv) (struct tpm_chip *, u8 *, size_t); + int (*send) (struct tpm_chip *, u8 *, size_t); + void (*cancel) (struct tpm_chip *); + struct miscdevice miscdev; +}; + +struct tpm_chip { + struct pci_dev *pci_dev; /* PCI device stuff */ + + int dev_num; /* /dev/tpm# */ + int num_opens; /* only one allowed */ + int time_expired; + + /* Data passed to and from the tpm via the read/write calls */ + u8 *data_buffer; + atomic_t data_pending; + atomic_t data_position; + struct semaphore buffer_mutex; + + struct timer_list user_read_timer; /* user needs to claim result */ + struct semaphore tpm_mutex; /* tpm is processing */ + struct timer_list device_timer; /* tpm is processing */ + struct semaphore timer_manipulation_mutex; + + struct tpm_vendor_specific *vendor; + + struct list_head list; +}; + +static inline int tpm_read_index(int index) +{ + outb(index, TPM_ADDR); + return inb(TPM_DATA) & 0xFF; +} + +static inline void tpm_write_index(int index, int value) +{ + outb(index, TPM_ADDR); + outb(value & 0xFF, TPM_DATA); +} + +extern void tpm_time_expired(unsigned long); +extern int tpm_register_hardware(struct pci_dev *, + struct tpm_vendor_specific *); +extern int tpm_open(struct inode *, struct file *); +extern int tpm_release(struct inode *, struct file *); +extern ssize_t tpm_write(struct file *, const char __user *, size_t, + loff_t *); +extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *); +extern void __devexit tpm_remove(struct pci_dev *); +extern int tpm_pm_suspend(struct pci_dev *, pm_message_t); +extern int tpm_pm_resume(struct pci_dev *); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_atmel.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include "tpm.h" + +/* Atmel definitions */ +enum tpm_atmel_addr { + TPM_ATMEL_BASE_ADDR_LO = 0x08, + TPM_ATMEL_BASE_ADDR_HI = 0x09 +}; + +/* write status bits */ +#define ATML_STATUS_ABORT 0x01 +#define ATML_STATUS_LASTBYTE 0x04 + +/* read status bits */ +#define ATML_STATUS_BUSY 0x01 +#define ATML_STATUS_DATA_AVAIL 0x02 +#define ATML_STATUS_REWRITE 0x04 + + +static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count) +{ + u8 status, *hdr = buf; + u32 size; + int i; + __be32 *native_size; + + /* start reading header */ + if (count < 6) + return -EIO; + + for (i = 0; i < 6; i++) { + status = inb(chip->vendor->base + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(&chip->pci_dev->dev, + "error reading header\n"); + return -EIO; + } + *buf++ = inb(chip->vendor->base); + } + + /* size of the data received */ + native_size = (__force __be32 *) (hdr + 2); + size = be32_to_cpu(*native_size); + + if (count < size) { + dev_err(&chip->pci_dev->dev, + "Recv size(%d) less than available space\n", size); + for (; i < size; i++) { /* clear the waiting data anyway */ + status = inb(chip->vendor->base + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(&chip->pci_dev->dev, + "error reading data\n"); + return -EIO; + } + } + return -EIO; + } + + /* read all the data available */ + for (; i < size; i++) { + status = inb(chip->vendor->base + 1); + if ((status & ATML_STATUS_DATA_AVAIL) == 0) { + dev_err(&chip->pci_dev->dev, + "error reading data\n"); + return -EIO; + } + *buf++ = inb(chip->vendor->base); + } + + /* make sure data available is gone */ + status = inb(chip->vendor->base + 1); + if (status & ATML_STATUS_DATA_AVAIL) { + dev_err(&chip->pci_dev->dev, "data available is stuck\n"); + return -EIO; + } + + return size; +} + +static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count) +{ + int i; + + dev_dbg(&chip->pci_dev->dev, "tpm_atml_send: "); + for (i = 0; i < count; i++) { + dev_dbg(&chip->pci_dev->dev, "0x%x(%d) ", buf[i], buf[i]); + outb(buf[i], chip->vendor->base); + } + + return count; +} + +static void tpm_atml_cancel(struct tpm_chip *chip) +{ + outb(ATML_STATUS_ABORT, chip->vendor->base + 1); +} + +static struct file_operations atmel_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static struct tpm_vendor_specific tpm_atmel = { + .recv = tpm_atml_recv, + .send = tpm_atml_send, + .cancel = tpm_atml_cancel, + .req_complete_mask = ATML_STATUS_BUSY | ATML_STATUS_DATA_AVAIL, + .req_complete_val = ATML_STATUS_DATA_AVAIL, + .miscdev = { .fops = &atmel_ops, }, +}; + +static int __devinit tpm_atml_init(struct pci_dev *pci_dev, + const struct pci_device_id *pci_id) +{ + u8 version[4]; + int rc = 0; + int lo, hi; + + if (pci_enable_device(pci_dev)) + return -EIO; + + lo = tpm_read_index( TPM_ATMEL_BASE_ADDR_LO ); + hi = tpm_read_index( TPM_ATMEL_BASE_ADDR_HI ); + + tpm_atmel.base = (hi<<8)|lo; + dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base); + + /* verify that it is an Atmel part */ + if (tpm_read_index(4) != 'A' || tpm_read_index(5) != 'T' + || tpm_read_index(6) != 'M' || tpm_read_index(7) != 'L') { + rc = -ENODEV; + goto out_err; + } + + /* query chip for its version number */ + if ((version[0] = tpm_read_index(0x00)) != 0xFF) { + version[1] = tpm_read_index(0x01); + version[2] = tpm_read_index(0x02); + version[3] = tpm_read_index(0x03); + } else { + dev_info(&pci_dev->dev, "version query failed\n"); + rc = -ENODEV; + goto out_err; + } + + if ((rc = tpm_register_hardware(pci_dev, &tpm_atmel)) < 0) + goto out_err; + + dev_info(&pci_dev->dev, + "Atmel TPM version %d.%d.%d.%d\n", version[0], version[1], + version[2], version[3]); + + return 0; +out_err: + pci_disable_device(pci_dev); + return rc; +} + +static struct pci_device_id tpm_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)}, + {0,} +}; + +MODULE_DEVICE_TABLE(pci, tpm_pci_tbl); + +static struct pci_driver atmel_pci_driver = { + .name = "tpm_atmel", + .id_table = tpm_pci_tbl, + .probe = tpm_atml_init, + .remove = __devexit_p(tpm_remove), + .suspend = tpm_pm_suspend, + .resume = tpm_pm_resume, +}; + +static int __init init_atmel(void) +{ + return pci_register_driver(&atmel_pci_driver); +} + +static void __exit cleanup_atmel(void) +{ + pci_unregister_driver(&atmel_pci_driver); +} + +module_init(init_atmel); +module_exit(cleanup_atmel); + +MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,741 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * Note, the TPM chip is not interrupt driven (only polling) + * and can have very long timeouts (minutes!). Hence the unusual + * calls to schedule_timeout. + * + */ + +#include <linux/sched.h> +#include <linux/poll.h> +#include <linux/spinlock.h> +#include "tpm_nopci.h" + +enum { + TPM_MINOR = 224, /* officially assigned */ + TPM_BUFSIZE = 2048, + TPM_NUM_DEVICES = 256, + TPM_NUM_MASK_ENTRIES = TPM_NUM_DEVICES / (8 * sizeof(int)) +}; + + /* PCI configuration addresses */ +enum { + PCI_GEN_PMCON_1 = 0xA0, + PCI_GEN1_DEC = 0xE4, + PCI_LPC_EN = 0xE6, + PCI_GEN2_DEC = 0xEC +}; + +enum { + TPM_LOCK_REG = 0x0D, + TPM_INTERUPT_REG = 0x0A, + TPM_BASE_ADDR_LO = 0x08, + TPM_BASE_ADDR_HI = 0x09, + TPM_UNLOCK_VALUE = 0x55, + TPM_LOCK_VALUE = 0xAA, + TPM_DISABLE_INTERUPT_VALUE = 0x00 +}; + +static LIST_HEAD(tpm_chip_list); +static spinlock_t driver_lock = SPIN_LOCK_UNLOCKED; +static int dev_mask[32]; + +static void user_reader_timeout(unsigned long ptr) +{ + struct tpm_chip *chip = (struct tpm_chip *) ptr; + + down(&chip->buffer_mutex); + atomic_set(&chip->data_pending, 0); + memset(chip->data_buffer, 0, TPM_BUFSIZE); + up(&chip->buffer_mutex); +} + +void tpm_time_expired(unsigned long ptr) +{ + int *exp = (int *) ptr; + *exp = 1; +} + +EXPORT_SYMBOL_GPL(tpm_time_expired); + + +/* + * This function should be used by other kernel subsystems attempting to use the tpm through the tpm_transmit interface. + * A call to this function will return the chip structure corresponding to the TPM you are looking for that can then be sent with your command to tpm_transmit. + * Passing 0 as the argument corresponds to /dev/tpm0 and thus the first and probably primary TPM on the system. Passing 1 corresponds to /dev/tpm1 and the next TPM discovered. If a TPM with the given chip_num does not exist NULL will be returned. + */ +struct tpm_chip* tpm_chip_lookup(int chip_num) +{ + + struct tpm_chip *pos; + list_for_each_entry(pos, &tpm_chip_list, list) + if (pos->dev_num == chip_num || + chip_num == TPM_ANY_NUM) + return pos; + + return NULL; + +} + +/* + * Internal kernel interface to transmit TPM commands + */ +ssize_t tpm_transmit(struct tpm_chip * chip, const char *buf, + size_t bufsiz) +{ + ssize_t rc; + u32 count; + unsigned long stop; + + count = be32_to_cpu(*((__be32 *) (buf + 2))); + + if (count == 0) + return -ENODATA; + if (count > bufsiz) { + dev_err(chip->dev, + "invalid count value %x %x \n", count, bufsiz); + return -E2BIG; + } + + dev_dbg(chip->dev, "TPM Ordinal: %d\n", + be32_to_cpu(*((__be32 *) (buf + 6)))); + dev_dbg(chip->dev, "Chip Status: %x\n", + inb(chip->vendor->base + 1)); + + down(&chip->tpm_mutex); + + if ((rc = chip->vendor->send(chip, (u8 *) buf, count)) < 0) { + dev_err(chip->dev, + "tpm_transmit: tpm_send: error %d\n", rc); + goto out; + } + + stop = jiffies + 2 * 60 * HZ; + do { + u8 status = chip->vendor->status(chip); + if ((status & chip->vendor->req_complete_mask) == + chip->vendor->req_complete_val) { + goto out_recv; + } + + if ((status == chip->vendor->req_canceled)) { + dev_err(chip->dev, "Operation Canceled\n"); + rc = -ECANCELED; + goto out; + } + + msleep(TPM_TIMEOUT); /* CHECK */ + rmb(); + } + while (time_before(jiffies, stop)); + + + chip->vendor->cancel(chip); + dev_err(chip->dev, "Operation Timed out\n"); + rc = -ETIME; + goto out; + +out_recv: + rc = chip->vendor->recv(chip, (u8 *) buf, bufsiz); + if (rc < 0) + dev_err(chip->dev, + "tpm_transmit: tpm_recv: error %d\n", rc); + atomic_set(&chip->data_position, 0); + +out: + up(&chip->tpm_mutex); + return rc; +} + +EXPORT_SYMBOL_GPL(tpm_transmit); + +#define TPM_DIGEST_SIZE 20 +#define CAP_PCR_RESULT_SIZE 18 +static const u8 cap_pcr[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 22, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 5, + 0, 0, 0, 4, + 0, 0, 1, 1 +}; + +#define READ_PCR_RESULT_SIZE 30 +static const u8 pcrread[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 14, /* length */ + 0, 0, 0, 21, /* TPM_ORD_PcrRead */ + 0, 0, 0, 0 /* PCR index */ +}; + +ssize_t tpm_show_pcrs(struct device *dev, char *buf) +{ + u8 data[READ_PCR_RESULT_SIZE]; + ssize_t len; + int i, j, num_pcrs; + __be32 index; + char *str = buf; + + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return -ENODEV; + + memcpy(data, cap_pcr, sizeof(cap_pcr)); + if ((len = tpm_transmit(chip, data, sizeof(data))) + < CAP_PCR_RESULT_SIZE) + return len; + + num_pcrs = be32_to_cpu(*((__be32 *) (data + 14))); + + for (i = 0; i < num_pcrs; i++) { + memcpy(data, pcrread, sizeof(pcrread)); + index = cpu_to_be32(i); + memcpy(data + 10, &index, 4); + if ((len = tpm_transmit(chip, data, sizeof(data))) + < READ_PCR_RESULT_SIZE) + return len; + str += sprintf(str, "PCR-%02d: ", i); + for (j = 0; j < TPM_DIGEST_SIZE; j++) + str += sprintf(str, "%02X ", *(data + 10 + j)); + str += sprintf(str, "\n"); + } + return str - buf; +} + +EXPORT_SYMBOL_GPL(tpm_show_pcrs); + +/* + * Return 0 on success. On error pass along error code. + * chip_id Upper 2 bytes equal ANY, HW_ONLY or SW_ONLY + * Lower 2 bytes equal tpm idx # or AN& + * res_buf must fit a TPM_PCR (20 bytes) or NULL if you don't care + */ +int tpm_pcr_read( u32 chip_id, int pcr_idx, u8* res_buf, int res_buf_size ) +{ + u8 data[READ_PCR_RESULT_SIZE]; + int rc; + __be32 index; + int chip_num = chip_id & TPM_CHIP_NUM_MASK; + struct tpm_chip* chip; + + if ( res_buf && res_buf_size < TPM_DIGEST_SIZE ) + return -ENOSPC; + if ( (chip = tpm_chip_lookup( chip_num /*, + chip_id >> TPM_CHIP_TYPE_SHIFT*/ ) ) == NULL ) { + printk("chip %d not found.\n",chip_num); + return -ENODEV; + } + memcpy(data, pcrread, sizeof(pcrread)); + index = cpu_to_be32(pcr_idx); + memcpy(data + 10, &index, 4); + if ((rc = tpm_transmit(chip, data, sizeof(data))) > 0 ) + rc = be32_to_cpu(*((u32*)(data+6))); + + if ( rc == 0 && res_buf ) + memcpy(res_buf, data+10, TPM_DIGEST_SIZE); + return rc; +} +EXPORT_SYMBOL_GPL(tpm_pcr_read); + +#define EXTEND_PCR_SIZE 34 +static const u8 pcrextend[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 34, /* length */ + 0, 0, 0, 20, /* TPM_ORD_Extend */ + 0, 0, 0, 0 /* PCR index */ +}; + +/* + * Return 0 on success. On error pass along error code. + * chip_id Upper 2 bytes equal ANY, HW_ONLY or SW_ONLY + * Lower 2 bytes equal tpm idx # or ANY + */ +int tpm_pcr_extend(u32 chip_id, int pcr_idx, const u8* hash) +{ + u8 data[EXTEND_PCR_SIZE]; + int rc; + __be32 index; + int chip_num = chip_id & TPM_CHIP_NUM_MASK; + struct tpm_chip* chip; + + if ( (chip = tpm_chip_lookup( chip_num /*, + chip_id >> TPM_CHIP_TYPE_SHIFT */)) == NULL ) + return -ENODEV; + + memcpy(data, pcrextend, sizeof(pcrextend)); + index = cpu_to_be32(pcr_idx); + memcpy(data + 10, &index, 4); + memcpy( data + 14, hash, TPM_DIGEST_SIZE ); + if ((rc = tpm_transmit(chip, data, sizeof(data))) > 0 ) + rc = be32_to_cpu(*((u32*)(data+6))); + return rc; +} +EXPORT_SYMBOL_GPL(tpm_pcr_extend); + + + +#define READ_PUBEK_RESULT_SIZE 314 +static const u8 readpubek[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 30, /* length */ + 0, 0, 0, 124, /* TPM_ORD_ReadPubek */ +}; + +ssize_t tpm_show_pubek(struct device *dev, char *buf) +{ + u8 *data; + ssize_t len; + int i, rc; + char *str = buf; + + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return -ENODEV; + + data = kmalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; + + memcpy(data, readpubek, sizeof(readpubek)); + memset(data + sizeof(readpubek), 0, 20); /* zero nonce */ + + if ((len = tpm_transmit(chip, data, READ_PUBEK_RESULT_SIZE)) < + READ_PUBEK_RESULT_SIZE) { + rc = len; + goto out; + } + + /* + ignore header 10 bytes + algorithm 32 bits (1 == RSA ) + encscheme 16 bits + sigscheme 16 bits + parameters (RSA 12->bytes: keybit, #primes, expbit) + keylenbytes 32 bits + 256 byte modulus + ignore checksum 20 bytes + */ + + str += + sprintf(str, + "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n" + "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X" + " %02X %02X %02X %02X %02X %02X %02X %02X\n" + "Modulus length: %d\nModulus: \n", + data[10], data[11], data[12], data[13], data[14], + data[15], data[16], data[17], data[22], data[23], + data[24], data[25], data[26], data[27], data[28], + data[29], data[30], data[31], data[32], data[33], + be32_to_cpu(*((__be32 *) (data + 32)))); + + for (i = 0; i < 256; i++) { + str += sprintf(str, "%02X ", data[i + 39]); + if ((i + 1) % 16 == 0) + str += sprintf(str, "\n"); + } + rc = str - buf; +out: + kfree(data); + return rc; +} + +EXPORT_SYMBOL_GPL(tpm_show_pubek); + +#define CAP_VER_RESULT_SIZE 18 +static const u8 cap_version[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 18, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 6, + 0, 0, 0, 0 +}; + +#define CAP_MANUFACTURER_RESULT_SIZE 18 +static const u8 cap_manufacturer[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 22, /* length */ + 0, 0, 0, 101, /* TPM_ORD_GetCapability */ + 0, 0, 0, 5, + 0, 0, 0, 4, + 0, 0, 1, 3 +}; + +ssize_t tpm_show_caps(struct device *dev, char *buf) +{ + u8 data[sizeof(cap_manufacturer)]; + ssize_t len; + char *str = buf; + + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return -ENODEV; + + memcpy(data, cap_manufacturer, sizeof(cap_manufacturer)); + + if ((len = tpm_transmit(chip, data, sizeof(data))) < + CAP_MANUFACTURER_RESULT_SIZE) + return len; + + str += sprintf(str, "Manufacturer: 0x%x\n", + be32_to_cpu(*((__be32 *)(data + 14)))); + + memcpy(data, cap_version, sizeof(cap_version)); + + if ((len = tpm_transmit(chip, data, sizeof(data))) < + CAP_VER_RESULT_SIZE) + return len; + + str += + sprintf(str, "TCG version: %d.%d\nFirmware version: %d.%d\n", + (int) data[14], (int) data[15], (int) data[16], + (int) data[17]); + + return str - buf; +} + +EXPORT_SYMBOL_GPL(tpm_show_caps); + +ssize_t tpm_store_cancel(struct device * dev, const char *buf, + size_t count) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + if (chip == NULL) + return 0; + + chip->vendor->cancel(chip); + return count; +} + +EXPORT_SYMBOL_GPL(tpm_store_cancel); + +/* + * Device file system interface to the TPM + */ +int tpm_open(struct inode *inode, struct file *file) +{ + int rc = 0, minor = iminor(inode); + struct tpm_chip *chip = NULL, *pos; + + spin_lock(&driver_lock); + + list_for_each_entry(pos, &tpm_chip_list, list) { + if (pos->vendor->miscdev.minor == minor) { + chip = pos; + break; + } + } + + if (chip == NULL) { + rc = -ENODEV; + goto err_out; + } + + if (chip->num_opens) { + dev_dbg(chip->dev, "Another process owns this TPM\n"); + rc = -EBUSY; + goto err_out; + } + + chip->num_opens++; + get_device(chip->dev); + + spin_unlock(&driver_lock); + + chip->data_buffer = kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL); + if (chip->data_buffer == NULL) { + chip->num_opens--; + put_device(chip->dev); + return -ENOMEM; + } + + atomic_set(&chip->data_pending, 0); + + file->private_data = chip; + return 0; + +err_out: + spin_unlock(&driver_lock); + return rc; +} + +EXPORT_SYMBOL_GPL(tpm_open); + +int tpm_release(struct inode *inode, struct file *file) +{ + struct tpm_chip *chip = file->private_data; + + spin_lock(&driver_lock); + file->private_data = NULL; + chip->num_opens--; + del_singleshot_timer_sync(&chip->user_read_timer); + atomic_set(&chip->data_pending, 0); + put_device(chip->dev); + kfree(chip->data_buffer); + spin_unlock(&driver_lock); + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_release); + +ssize_t tpm_write(struct file * file, const char __user * buf, + size_t size, loff_t * off) +{ + struct tpm_chip *chip = file->private_data; + int in_size = size, out_size; + + /* cannot perform a write until the read has cleared + either via tpm_read or a user_read_timer timeout */ + while (atomic_read(&chip->data_pending) != 0) + msleep(TPM_TIMEOUT); + + down(&chip->buffer_mutex); + + if (in_size > TPM_BUFSIZE) + in_size = TPM_BUFSIZE; + + if (copy_from_user + (chip->data_buffer, (void __user *) buf, in_size)) { + up(&chip->buffer_mutex); + return -EFAULT; + } + + /* atomic tpm command send and result receive */ + out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE); + + atomic_set(&chip->data_pending, out_size); + up(&chip->buffer_mutex); + + /* Set a timeout by which the reader must come claim the result */ + mod_timer(&chip->user_read_timer, jiffies + (60 * HZ)); + + return in_size; +} + +EXPORT_SYMBOL_GPL(tpm_write); + +ssize_t tpm_read(struct file * file, char __user * buf, + size_t size, loff_t * off) +{ + struct tpm_chip *chip = file->private_data; + int ret_size; + + del_singleshot_timer_sync(&chip->user_read_timer); + ret_size = atomic_read(&chip->data_pending); + + if (ret_size > 0) { /* relay data */ + int position = atomic_read(&chip->data_position); + + if (size < ret_size) + ret_size = size; + + down(&chip->buffer_mutex); + + if (copy_to_user((void __user *) buf, + &chip->data_buffer[position], + ret_size)) { + ret_size = -EFAULT; + } else { + int pending = atomic_read(&chip->data_pending) - ret_size; + atomic_set(&chip->data_pending, + pending); + atomic_set(&chip->data_position, + position + ret_size); + } + up(&chip->buffer_mutex); + } + + return ret_size; +} + +EXPORT_SYMBOL_GPL(tpm_read); + +void tpm_remove_hardware(struct device *dev) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + int i; + + if (chip == NULL) { + dev_err(dev, "No device data found\n"); + return; + } + + spin_lock(&driver_lock); + + list_del(&chip->list); + + spin_unlock(&driver_lock); + + dev_set_drvdata(dev, NULL); + misc_deregister(&chip->vendor->miscdev); + + for (i = 0; i < TPM_NUM_ATTR; i++) + device_remove_file(dev, &chip->vendor->attr[i]); + + dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES] &= + !(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES)); + + kfree(chip); + + put_device(dev); +} + +EXPORT_SYMBOL_GPL(tpm_remove_hardware); + +static const u8 savestate[] = { + 0, 193, /* TPM_TAG_RQU_COMMAND */ + 0, 0, 0, 10, /* blob length (in bytes) */ + 0, 0, 0, 152 /* TPM_ORD_SaveState */ +}; + +/* + * We are about to suspend. Save the TPM state + * so that it can be restored. + */ +int tpm_pm_suspend(struct pci_dev *pci_dev, u32 pm_state) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + if (chip == NULL) + return -ENODEV; + + tpm_transmit(chip, savestate, sizeof(savestate)); + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_pm_suspend); + +/* + * Resume from a power safe. The BIOS already restored + * the TPM state. + */ +int tpm_pm_resume(struct pci_dev *pci_dev) +{ + struct tpm_chip *chip = pci_get_drvdata(pci_dev); + + if (chip == NULL) + return -ENODEV; + + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_pm_resume); + +/* + * Called from tpm_<specific>.c probe function only for devices + * the driver has determined it should claim. Prior to calling + * this function the specific probe function has called pci_enable_device + * upon errant exit from this function specific probe function should call + * pci_disable_device + */ +int tpm_register_hardware_nopci(struct device *dev, + struct tpm_vendor_specific *entry) +{ + char devname[7]; + struct tpm_chip *chip; + int i, j; + + /* Driver specific per-device data */ + chip = kmalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + memset(chip, 0, sizeof(struct tpm_chip)); + + init_MUTEX(&chip->buffer_mutex); + init_MUTEX(&chip->tpm_mutex); + INIT_LIST_HEAD(&chip->list); + + init_timer(&chip->user_read_timer); + chip->user_read_timer.function = user_reader_timeout; + chip->user_read_timer.data = (unsigned long) chip; + + chip->vendor = entry; + + chip->dev_num = -1; + + for (i = 0; i < TPM_NUM_MASK_ENTRIES; i++) + for (j = 0; j < 8 * sizeof(int); j++) + if ((dev_mask[i] & (1 << j)) == 0) { + chip->dev_num = + i * TPM_NUM_MASK_ENTRIES + j; + dev_mask[i] |= 1 << j; + goto dev_num_search_complete; + } + +dev_num_search_complete: + if (chip->dev_num < 0) { + dev_err(dev, "No available tpm device numbers\n"); + kfree(chip); + return -ENODEV; + } else if (chip->dev_num == 0) + chip->vendor->miscdev.minor = TPM_MINOR; + else + chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR; + + snprintf(devname, sizeof(devname), "%s%d", "tpm", chip->dev_num); + chip->vendor->miscdev.name = devname; + + chip->vendor->miscdev.dev = dev; + chip->dev = get_device(dev); + + + if (misc_register(&chip->vendor->miscdev)) { + dev_err(chip->dev, + "unable to misc_register %s, minor %d\n", + chip->vendor->miscdev.name, + chip->vendor->miscdev.minor); + put_device(dev); + kfree(chip); + dev_mask[i] &= !(1 << j); + return -ENODEV; + } + + spin_lock(&driver_lock); + + dev_set_drvdata(dev, chip); + + list_add(&chip->list, &tpm_chip_list); + + spin_unlock(&driver_lock); + + for (i = 0; i < TPM_NUM_ATTR; i++) + device_create_file(dev, &chip->vendor->attr[i]); + + return 0; +} + +EXPORT_SYMBOL_GPL(tpm_register_hardware_nopci); + +static int __init init_tpm(void) +{ + return 0; +} + +static void __exit cleanup_tpm(void) +{ + +} + +module_init(init_tpm); +module_exit(cleanup_tpm); + +MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ +#include <linux/module.h> +#include <linux/version.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/miscdevice.h> + +enum { + TPM_TIMEOUT = 5, /* msecs */ + TPM_NUM_ATTR = 4 +}; + +/* TPM addresses */ +enum { + TPM_ADDR = 0x4E, + TPM_DATA = 0x4F +}; + +/* + * Chip num is this value or a valid tpm idx in lower two bytes of chip_id + */ +enum tpm_chip_num { + TPM_ANY_NUM = 0xFFFF, +}; + +#define TPM_CHIP_NUM_MASK 0x0000ffff + +extern ssize_t tpm_show_pubek(struct device *, char *); +extern ssize_t tpm_show_pcrs(struct device *, char *); +extern ssize_t tpm_show_caps(struct device *, char *); +extern ssize_t tpm_store_cancel(struct device *, const char *, size_t); + +#define TPM_DEVICE_ATTRS { \ + __ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL), \ + __ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL), \ + __ATTR(caps, S_IRUGO, tpm_show_caps, NULL), \ + __ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel) } + +struct tpm_chip; + +struct tpm_vendor_specific { + u8 req_complete_mask; + u8 req_complete_val; + u8 req_canceled; + u16 base; /* TPM base address */ + + int (*recv) (struct tpm_chip *, u8 *, size_t); + int (*send) (struct tpm_chip *, u8 *, size_t); + void (*cancel) (struct tpm_chip *); + u8(*status) (struct tpm_chip *); + struct miscdevice miscdev; + struct device_attribute attr[TPM_NUM_ATTR]; +}; + +struct tpm_chip { + struct device *dev; /* PCI device stuff */ + + int dev_num; /* /dev/tpm# */ + int num_opens; /* only one allowed */ + int time_expired; + + /* Data passed to and from the tpm via the read/write calls */ + u8 *data_buffer; + atomic_t data_pending; + atomic_t data_position; + struct semaphore buffer_mutex; + + struct timer_list user_read_timer; /* user needs to claim result */ + struct semaphore tpm_mutex; /* tpm is processing */ + + struct tpm_vendor_specific *vendor; + + struct list_head list; +}; + +static inline int tpm_read_index(int index) +{ + outb(index, TPM_ADDR); + return inb(TPM_DATA) & 0xFF; +} + +static inline void tpm_write_index(int index, int value) +{ + outb(index, TPM_ADDR); + outb(value & 0xFF, TPM_DATA); +} + +extern void tpm_time_expired(unsigned long); +extern int tpm_lpc_bus_init(struct pci_dev *, u16); + +extern int tpm_register_hardware_nopci(struct device *, + struct tpm_vendor_specific *); +extern void tpm_remove_hardware(struct device *); +extern int tpm_open(struct inode *, struct file *); +extern int tpm_release(struct inode *, struct file *); +extern ssize_t tpm_write(struct file *, const char __user *, size_t, + loff_t *); +extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *); +extern int tpm_pcr_extend(u32 chip_id, int pcr_idx, const u8* hash); +extern int tpm_pcr_read( u32 chip_id, int pcr_idx, u8* res_buf, int res_buf_size ); + +extern int tpm_pm_suspend(struct pci_dev *, u32); +extern int tpm_pm_resume(struct pci_dev *); + +/* internal kernel interface */ +extern ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, + size_t bufsiz); +extern struct tpm_chip *tpm_chip_lookup(int chip_num); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_nsc.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,377 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include "tpm.h" + +/* National definitions */ +#define TPM_NSC_BASE 0x360 +#define TPM_NSC_IRQ 0x07 +#define TPM_NSC_BASE0_HI 0x60 +#define TPM_NSC_BASE0_LO 0x61 +#define TPM_NSC_BASE1_HI 0x62 +#define TPM_NSC_BASE1_LO 0x63 + +#define NSC_LDN_INDEX 0x07 +#define NSC_SID_INDEX 0x20 +#define NSC_LDC_INDEX 0x30 +#define NSC_DIO_INDEX 0x60 +#define NSC_CIO_INDEX 0x62 +#define NSC_IRQ_INDEX 0x70 +#define NSC_ITS_INDEX 0x71 + +#define NSC_STATUS 0x01 +#define NSC_COMMAND 0x01 +#define NSC_DATA 0x00 + +/* status bits */ +#define NSC_STATUS_OBF 0x01 /* output buffer full */ +#define NSC_STATUS_IBF 0x02 /* input buffer full */ +#define NSC_STATUS_F0 0x04 /* F0 */ +#define NSC_STATUS_A2 0x08 /* A2 */ +#define NSC_STATUS_RDY 0x10 /* ready to receive command */ +#define NSC_STATUS_IBR 0x20 /* ready to receive data */ + +/* command bits */ +#define NSC_COMMAND_NORMAL 0x01 /* normal mode */ +#define NSC_COMMAND_EOC 0x03 +#define NSC_COMMAND_CANCEL 0x22 + +/* + * Wait for a certain status to appear + */ +static int wait_for_stat(struct tpm_chip *chip, u8 mask, u8 val, u8 * data) +{ + int expired = 0; + struct timer_list status_timer = + TIMER_INITIALIZER(tpm_time_expired, jiffies + 10 * HZ, + (unsigned long) &expired); + + /* status immediately available check */ + *data = inb(chip->vendor->base + NSC_STATUS); + if ((*data & mask) == val) + return 0; + + /* wait for status */ + add_timer(&status_timer); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(TPM_TIMEOUT); + *data = inb(chip->vendor->base + 1); + if ((*data & mask) == val) { + del_singleshot_timer_sync(&status_timer); + return 0; + } + } + while (!expired); + + return -EBUSY; +} + +static int nsc_wait_for_ready(struct tpm_chip *chip) +{ + int status; + int expired = 0; + struct timer_list status_timer = + TIMER_INITIALIZER(tpm_time_expired, jiffies + 100, + (unsigned long) &expired); + + /* status immediately available check */ + status = inb(chip->vendor->base + NSC_STATUS); + if (status & NSC_STATUS_OBF) + status = inb(chip->vendor->base + NSC_DATA); + if (status & NSC_STATUS_RDY) + return 0; + + /* wait for status */ + add_timer(&status_timer); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(TPM_TIMEOUT); + status = inb(chip->vendor->base + NSC_STATUS); + if (status & NSC_STATUS_OBF) + status = inb(chip->vendor->base + NSC_DATA); + if (status & NSC_STATUS_RDY) { + del_singleshot_timer_sync(&status_timer); + return 0; + } + } + while (!expired); + + dev_info(&chip->pci_dev->dev, "wait for ready failed\n"); + return -EBUSY; +} + + +static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) +{ + u8 *buffer = buf; + u8 data, *p; + u32 size; + __be32 *native_size; + + if (count < 6) + return -EIO; + + if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) { + dev_err(&chip->pci_dev->dev, "F0 timeout\n"); + return -EIO; + } + if ((data = + inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_NORMAL) { + dev_err(&chip->pci_dev->dev, "not in normal mode (0x%x)\n", + data); + return -EIO; + } + + /* read the whole packet */ + for (p = buffer; p < &buffer[count]; p++) { + if (wait_for_stat + (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) { + dev_err(&chip->pci_dev->dev, + "OBF timeout (while reading data)\n"); + return -EIO; + } + if (data & NSC_STATUS_F0) + break; + *p = inb(chip->vendor->base + NSC_DATA); + } + + if ((data & NSC_STATUS_F0) == 0) { + dev_err(&chip->pci_dev->dev, "F0 not set\n"); + return -EIO; + } + if ((data = inb(chip->vendor->base + NSC_DATA)) != NSC_COMMAND_EOC) { + dev_err(&chip->pci_dev->dev, + "expected end of command(0x%x)\n", data); + return -EIO; + } + + native_size = (__force __be32 *) (buf + 2); + size = be32_to_cpu(*native_size); + + if (count < size) + return -EIO; + + return size; +} + +static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) +{ + u8 data; + int i; + + /* + * If we hit the chip with back to back commands it locks up + * and never set IBF. Hitting it with this "hammer" seems to + * fix it. Not sure why this is needed, we followed the flow + * chart in the manual to the letter. + */ + outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND); + + if (nsc_wait_for_ready(chip) != 0) + return -EIO; + + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { + dev_err(&chip->pci_dev->dev, "IBF timeout\n"); + return -EIO; + } + + outb(NSC_COMMAND_NORMAL, chip->vendor->base + NSC_COMMAND); + if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) { + dev_err(&chip->pci_dev->dev, "IBR timeout\n"); + return -EIO; + } + + for (i = 0; i < count; i++) { + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { + dev_err(&chip->pci_dev->dev, + "IBF timeout (while writing data)\n"); + return -EIO; + } + outb(buf[i], chip->vendor->base + NSC_DATA); + } + + if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { + dev_err(&chip->pci_dev->dev, "IBF timeout\n"); + return -EIO; + } + outb(NSC_COMMAND_EOC, chip->vendor->base + NSC_COMMAND); + + return count; +} + +static void tpm_nsc_cancel(struct tpm_chip *chip) +{ + outb(NSC_COMMAND_CANCEL, chip->vendor->base + NSC_COMMAND); +} + +static struct file_operations nsc_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static struct tpm_vendor_specific tpm_nsc = { + .recv = tpm_nsc_recv, + .send = tpm_nsc_send, + .cancel = tpm_nsc_cancel, + .req_complete_mask = NSC_STATUS_OBF, + .req_complete_val = NSC_STATUS_OBF, + .miscdev = { .fops = &nsc_ops, }, + +}; + +static int __devinit tpm_nsc_init(struct pci_dev *pci_dev, + const struct pci_device_id *pci_id) +{ + int rc = 0; + int lo, hi; + + hi = tpm_read_index(TPM_NSC_BASE0_HI); + lo = tpm_read_index(TPM_NSC_BASE0_LO); + + tpm_nsc.base = (hi<<8) | lo; + + if (pci_enable_device(pci_dev)) + return -EIO; + + /* verify that it is a National part (SID) */ + if (tpm_read_index(NSC_SID_INDEX) != 0xEF) { + rc = -ENODEV; + goto out_err; + } + + dev_dbg(&pci_dev->dev, "NSC TPM detected\n"); + dev_dbg(&pci_dev->dev, + "NSC LDN 0x%x, SID 0x%x, SRID 0x%x\n", + tpm_read_index(0x07), tpm_read_index(0x20), + tpm_read_index(0x27)); + dev_dbg(&pci_dev->dev, + "NSC SIOCF1 0x%x SIOCF5 0x%x SIOCF6 0x%x SIOCF8 0x%x\n", + tpm_read_index(0x21), tpm_read_index(0x25), + tpm_read_index(0x26), tpm_read_index(0x28)); + dev_dbg(&pci_dev->dev, "NSC IO Base0 0x%x\n", + (tpm_read_index(0x60) << 8) | tpm_read_index(0x61)); + dev_dbg(&pci_dev->dev, "NSC IO Base1 0x%x\n", + (tpm_read_index(0x62) << 8) | tpm_read_index(0x63)); + dev_dbg(&pci_dev->dev, "NSC Interrupt number and wakeup 0x%x\n", + tpm_read_index(0x70)); + dev_dbg(&pci_dev->dev, "NSC IRQ type select 0x%x\n", + tpm_read_index(0x71)); + dev_dbg(&pci_dev->dev, + "NSC DMA channel select0 0x%x, select1 0x%x\n", + tpm_read_index(0x74), tpm_read_index(0x75)); + dev_dbg(&pci_dev->dev, + "NSC Config " + "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + tpm_read_index(0xF0), tpm_read_index(0xF1), + tpm_read_index(0xF2), tpm_read_index(0xF3), + tpm_read_index(0xF4), tpm_read_index(0xF5), + tpm_read_index(0xF6), tpm_read_index(0xF7), + tpm_read_index(0xF8), tpm_read_index(0xF9)); + + dev_info(&pci_dev->dev, + "NSC PC21100 TPM revision %d\n", + tpm_read_index(0x27) & 0x1F); + + if (tpm_read_index(NSC_LDC_INDEX) == 0) + dev_info(&pci_dev->dev, ": NSC TPM not active\n"); + + /* select PM channel 1 */ + tpm_write_index(NSC_LDN_INDEX, 0x12); + tpm_read_index(NSC_LDN_INDEX); + + /* disable the DPM module */ + tpm_write_index(NSC_LDC_INDEX, 0); + tpm_read_index(NSC_LDC_INDEX); + + /* set the data register base addresses */ + tpm_write_index(NSC_DIO_INDEX, TPM_NSC_BASE >> 8); + tpm_write_index(NSC_DIO_INDEX + 1, TPM_NSC_BASE); + tpm_read_index(NSC_DIO_INDEX); + tpm_read_index(NSC_DIO_INDEX + 1); + + /* set the command register base addresses */ + tpm_write_index(NSC_CIO_INDEX, (TPM_NSC_BASE + 1) >> 8); + tpm_write_index(NSC_CIO_INDEX + 1, (TPM_NSC_BASE + 1)); + tpm_read_index(NSC_DIO_INDEX); + tpm_read_index(NSC_DIO_INDEX + 1); + + /* set the interrupt number to be used for the host interface */ + tpm_write_index(NSC_IRQ_INDEX, TPM_NSC_IRQ); + tpm_write_index(NSC_ITS_INDEX, 0x00); + tpm_read_index(NSC_IRQ_INDEX); + + /* enable the DPM module */ + tpm_write_index(NSC_LDC_INDEX, 0x01); + tpm_read_index(NSC_LDC_INDEX); + + if ((rc = tpm_register_hardware(pci_dev, &tpm_nsc)) < 0) + goto out_err; + + return 0; + +out_err: + pci_disable_device(pci_dev); + return rc; +} + +static struct pci_device_id tpm_pci_tbl[] __devinitdata = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0)}, + {PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_LPC)}, + {0,} +}; + +MODULE_DEVICE_TABLE(pci, tpm_pci_tbl); + +static struct pci_driver nsc_pci_driver = { + .name = "tpm_nsc", + .id_table = tpm_pci_tbl, + .probe = tpm_nsc_init, + .remove = __devexit_p(tpm_remove), + .suspend = tpm_pm_suspend, + .resume = tpm_pm_resume, +}; + +static int __init init_nsc(void) +{ + return pci_register_driver(&nsc_pci_driver); +} + +static void __exit cleanup_nsc(void) +{ + pci_unregister_driver(&nsc_pci_driver); +} + +module_init(init_nsc); +module_exit(cleanup_nsc); + +MODULE_AUTHOR("Leendert van Doorn (leendert@xxxxxxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,513 @@ +/* + * Copyright (C) 2004 IBM Corporation + * + * Authors: + * Leendert van Doorn <leendert@xxxxxxxxxxxxxx> + * Dave Safford <safford@xxxxxxxxxxxxxx> + * Reiner Sailer <sailer@xxxxxxxxxxxxxx> + * Kylene Hall <kjhall@xxxxxxxxxx> + * Stefan Berger <stefanb@xxxxxxxxxx> + * + * Maintained by: <tpmdd_devel@xxxxxxxxxxxxxxxxxxxxx> + * + * Device driver for TCG/TCPA TPM (trusted platform module) for XEN. + * Specifications at www.trustedcomputinggroup.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include <asm/uaccess.h> +#include <linux/list.h> +#include <linux/tpmfe.h> +#include <linux/device.h> +#include <linux/interrupt.h> +#include "tpm_nopci.h" + +/* read status bits */ +enum { + STATUS_BUSY = 0x01, + STATUS_DATA_AVAIL = 0x02, + STATUS_READY = 0x04 +}; + +#define MIN(x,y) ((x) < (y)) ? (x) : (y) + +struct transmission { + struct list_head next; + unsigned char *request; + unsigned int request_len; + unsigned char *rcv_buffer; + unsigned int buffersize; + struct tpm_chip *chip; + unsigned int flags; +}; + +enum { + TRANSMISSION_FLAG_WAS_QUEUED = 0x1 +}; + +struct data_exchange { + struct transmission *current_request; + spinlock_t req_list_lock; + wait_queue_head_t req_wait_queue; + + struct list_head queued_requests; + + struct transmission *current_response; + spinlock_t resp_list_lock; + wait_queue_head_t resp_wait_queue; // processes waiting for responses + + struct transmission *req_cancelled; // if a cancellation was encounterd + + unsigned int fe_status; + unsigned int flags; +}; + +enum { + DATAEX_FLAG_QUEUED_ONLY = 0x1 +}; + +static struct data_exchange dataex; + +static unsigned long disconnect_time; + +/* local function prototypes */ +static void __exit cleanup_xen(void); + + +/* ============================================================= + * Some utility functions + * ============================================================= + */ +static inline struct transmission * +transmission_alloc(void) +{ + struct transmission *t = kmalloc(sizeof(*t), GFP_KERNEL); + if (t) { + memset(t, 0x0, sizeof(*t)); + } + return t; +} + +static inline unsigned char * +transmission_set_buffer(struct transmission *t, + unsigned char *buffer, unsigned int len) +{ + if (NULL != t->request) { + kfree(t->request); + } + t->request = kmalloc(len, GFP_KERNEL); + if (t->request) { + memcpy(t->request, + buffer, + len); + t->request_len = len; + } + return t->request; +} + +static inline void +transmission_free(struct transmission *t) +{ + if (t->request) { + kfree(t->request); + } + if (t->rcv_buffer) { + kfree(t->rcv_buffer); + } + kfree(t); +} + +/* ============================================================= + * Interface with the TPM shared memory driver for XEN + * ============================================================= + */ +static int tpm_recv(const u8 *buffer, size_t count, const void *ptr) +{ + int ret_size = 0; + struct transmission *t, *temp; + + /* + * The list with requests must contain one request + * only and the element there must be the one that + * was passed to me from the front-end. + */ + if (dataex.current_request != ptr) { + printk("WARNING: The request pointer is different than the pointer " + "the shared memory driver returned to me. %p != %p\n", + dataex.current_request, ptr); + } + + /* + * If the request has been cancelled, just quit here + */ + if (dataex.req_cancelled == (struct transmission *)ptr) { + if (dataex.current_request == dataex.req_cancelled) { + dataex.current_request = NULL; + } + transmission_free(dataex.req_cancelled); + dataex.req_cancelled = NULL; + return 0; + } + + if (NULL != (temp = dataex.current_request)) { + transmission_free(temp); + dataex.current_request = NULL; + } + + t = transmission_alloc(); + if (NULL != t) { + unsigned long flags; + t->rcv_buffer = kmalloc(count, GFP_KERNEL); + if (NULL == t->rcv_buffer) { + transmission_free(t); + return -ENOMEM; + } + t->buffersize = count; + memcpy(t->rcv_buffer, buffer, count); + ret_size = count; + + spin_lock_irqsave(&dataex.resp_list_lock ,flags); + dataex.current_response = t; + spin_unlock_irqrestore(&dataex.resp_list_lock, flags); + wake_up_interruptible(&dataex.resp_wait_queue); + } + return ret_size; +} + + +static void tpm_fe_status(unsigned int flags) +{ + dataex.fe_status = flags; + if ((dataex.fe_status & TPMFE_STATUS_CONNECTED) == 0) { + disconnect_time = jiffies; + } +} + +/* ============================================================= + * Interface with the generic TPM driver + * ============================================================= + */ +static int tpm_xen_recv(struct tpm_chip *chip, u8 * buf, size_t count) +{ + unsigned long flags; + int rc = 0; + + spin_lock_irqsave(&dataex.resp_list_lock, flags); + /* + * Check if the previous operation only queued the command + * In this case there won't be a response, so I just + * return from here and reset that flag. In any other + * case I should receive a response from the back-end. + */ + if ((dataex.flags & DATAEX_FLAG_QUEUED_ONLY) != 0) { + dataex.flags &= ~DATAEX_FLAG_QUEUED_ONLY; + spin_unlock_irqrestore(&dataex.resp_list_lock, flags); + /* + * a little hack here. The first few measurements + * are queued since there's no way to talk to the + * TPM yet (due to slowness of the control channel) + * So we just make IMA happy by giving it 30 NULL + * bytes back where the most important part is + * that the result code is '0'. + */ + + count = MIN(count, 30); + memset(buf, 0x0, count); + return count; + } + /* + * Check whether something is in the responselist and if + * there's nothing in the list wait for something to appear. + */ + + if (NULL == dataex.current_response) { + spin_unlock_irqrestore(&dataex.resp_list_lock, flags); + interruptible_sleep_on_timeout(&dataex.resp_wait_queue, + 1000); + spin_lock_irqsave(&dataex.resp_list_lock ,flags); + } + + if (NULL != dataex.current_response) { + struct transmission *t = dataex.current_response; + dataex.current_response = NULL; + rc = MIN(count, t->buffersize); + memcpy(buf, t->rcv_buffer, rc); + transmission_free(t); + } + + spin_unlock_irqrestore(&dataex.resp_list_lock, flags); + return rc; +} + +static int tpm_xen_send(struct tpm_chip *chip, u8 * buf, size_t count) +{ + /* + * We simply pass the packet onto the XEN shared + * memory driver. + */ + unsigned long flags; + int rc; + struct transmission *t = transmission_alloc(); + + spin_lock_irqsave(&dataex.req_list_lock, flags); + /* + * If there's a current request, it must be the + * previous request that has timed out. + */ + if (dataex.current_request != NULL) { + printk("WARNING: Sending although there is a request outstanding.\n" + " Previous request must have timed out.\n"); + transmission_free(dataex.current_request); + dataex.current_request = NULL; + } + + if (t != NULL) { + unsigned int error = 0; + t->rcv_buffer = NULL; + t->buffersize = 0; + t->chip = chip; + + /* + * Queue the packet if the driver below is not + * ready, yet, or there is any packet already + * in the queue. + * If the driver below is ready, unqueue all + * packets first before sending our current + * packet. + * For each unqueued packet, except for the + * last (=current) packet, call the function + * tpm_xen_recv to wait for the response to come + * back. + */ + if ((dataex.fe_status & TPMFE_STATUS_CONNECTED) == 0) { + if (time_after(jiffies, disconnect_time + HZ * 10)) { + rc = -ENOENT; + } else { + /* + * copy the request into the buffer + */ + if (transmission_set_buffer(t, buf, count) + == NULL) { + transmission_free(t); + rc = -ENOMEM; + goto exit; + } + dataex.flags |= DATAEX_FLAG_QUEUED_ONLY; + list_add_tail(&t->next, &dataex.queued_requests); + rc = 0; + } + } else { + /* + * Check whether there are any packets in the queue + */ + while (!list_empty(&dataex.queued_requests)) { + /* + * Need to dequeue them. + * Read the result into a dummy buffer. + */ + unsigned char buffer[1]; + struct transmission *qt = (struct transmission *) dataex.queued_requests.next; + list_del(&qt->next); + dataex.current_request = qt; + spin_unlock_irqrestore(&dataex.req_list_lock, flags); + + rc = tpm_fe_send(qt->request, + qt->request_len, + qt); + + if (rc < 0) { + spin_lock_irqsave(&dataex.req_list_lock, flags); + if ((qt = dataex.current_request) != NULL) { + /* + * requeue it at the beginning + * of the list + */ + list_add(&qt->next, + &dataex.queued_requests); + } + dataex.current_request = NULL; + error = 1; + break; + } + /* + * After this point qt is not valid anymore! + * It is freed when the front-end is delivering the data + * by calling tpm_recv + */ + + /* + * Try to receive the response now into the provided dummy + * buffer (I don't really care about this response since + * there is no receiver anymore for this response) + */ + rc = tpm_xen_recv(chip, buffer, sizeof(buffer)); + + spin_lock_irqsave(&dataex.req_list_lock, flags); + } + + if (error == 0) { + /* + * Finally, send the current request. + */ + dataex.current_request = t; + /* + * Call the shared memory driver + * Pass to it the buffer with the request, the + * amount of bytes in the request and + * a void * pointer (here: transmission structure) + */ + rc = tpm_fe_send(buf, count, t); + /* + * The generic TPM driver will call + * the function to receive the response. + */ + if (rc < 0) { + dataex.current_request = NULL; + goto queue_it; + } + } else { +queue_it: + if (transmission_set_buffer(t, buf, count) == NULL) { + transmission_free(t); + rc = -ENOMEM; + goto exit; + } + /* + * An error occurred. Don't event try + * to send the current request. Just + * queue it. + */ + dataex.flags |= DATAEX_FLAG_QUEUED_ONLY; + list_add_tail(&t->next, &dataex.queued_requests); + rc = 0; + } + } + } else { + rc = -ENOMEM; + } + +exit: + spin_unlock_irqrestore(&dataex.req_list_lock, flags); + return rc; +} + +static void tpm_xen_cancel(struct tpm_chip *chip) +{ + unsigned long flags; + spin_lock_irqsave(&dataex.resp_list_lock,flags); + + dataex.req_cancelled = dataex.current_request; + + spin_unlock_irqrestore(&dataex.resp_list_lock,flags); +} + +static u8 tpm_xen_status(struct tpm_chip *chip) +{ + unsigned long flags; + u8 rc = 0; + spin_lock_irqsave(&dataex.resp_list_lock, flags); + /* + * Data are available if: + * - there's a current response + * - the last packet was queued only (this is fake, but necessary to + * get the generic TPM layer to call the receive function.) + */ + if (NULL != dataex.current_response || + 0 != (dataex.flags & DATAEX_FLAG_QUEUED_ONLY)) { + rc = STATUS_DATA_AVAIL; + } + spin_unlock_irqrestore(&dataex.resp_list_lock, flags); + return rc; +} + +static struct file_operations tpm_xen_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = tpm_open, + .read = tpm_read, + .write = tpm_write, + .release = tpm_release, +}; + +static struct tpm_vendor_specific tpm_xen = { + .recv = tpm_xen_recv, + .send = tpm_xen_send, + .cancel = tpm_xen_cancel, + .status = tpm_xen_status, + .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL, + .req_complete_val = STATUS_DATA_AVAIL, + .req_canceled = STATUS_READY, + .base = 0, + .attr = TPM_DEVICE_ATTRS, + .miscdev.fops = &tpm_xen_ops, +}; + +static struct device tpm_device = { + .bus_id = "vtpm", +}; + +static struct tpmfe_device tpmfe = { + .receive = tpm_recv, + .status = tpm_fe_status, +}; + + +static int __init init_xen(void) +{ + int rc; + + /* + * Register device with the low lever front-end + * driver + */ + if ((rc = tpm_fe_register_receiver(&tpmfe)) < 0) { + return rc; + } + + /* + * Register our device with the system. + */ + if ((rc = device_register(&tpm_device)) < 0) { + tpm_fe_unregister_receiver(); + return rc; + } + + if ((rc = tpm_register_hardware_nopci(&tpm_device, &tpm_xen)) < 0) { + device_unregister(&tpm_device); + tpm_fe_unregister_receiver(); + return rc; + } + + dataex.current_request = NULL; + spin_lock_init(&dataex.req_list_lock); + init_waitqueue_head(&dataex.req_wait_queue); + INIT_LIST_HEAD(&dataex.queued_requests); + + dataex.current_response = NULL; + spin_lock_init(&dataex.resp_list_lock); + init_waitqueue_head(&dataex.resp_wait_queue); + + disconnect_time = jiffies; + + return 0; +} + +static void __exit cleanup_xen(void) +{ + tpm_remove_hardware(&tpm_device); + device_unregister(&tpm_device); + tpm_fe_unregister_receiver(); +} + +fs_initcall(init_xen); +module_exit(cleanup_xen); + +MODULE_AUTHOR("Stefan Berger (stefanb@xxxxxxxxxx)"); +MODULE_DESCRIPTION("TPM Driver for XEN (shared memory)"); +MODULE_VERSION("1.0"); +MODULE_LICENSE("GPL"); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/common.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,112 @@ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include <linux/config.h> +#include <linux/version.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/vmalloc.h> +#include <asm/io.h> +#include <asm/setup.h> +#include <asm/pgalloc.h> +#include <asm-xen/evtchn.h> +#include <asm-xen/hypervisor.h> +#include <asm-xen/xen-public/io/blkif.h> +#include <asm-xen/xen-public/io/ring.h> +#include <asm-xen/gnttab.h> + +#if 0 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define ASSERT(_p) ((void)0) +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) + +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + blkif_pdev_t pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; +}; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned long shmem_frame; + unsigned int evtchn; + unsigned int remote_evtchn; + /* Comms information. */ + blkif_back_ring_t blk_ring; + /* VBDs attached to this interface. */ + struct vbd vbd; + /* Private fields. */ + enum { DISCONNECTED, CONNECTED } status; +#ifdef CONFIG_XEN_BLKDEV_TAP_BE + /* Is this a blktap frontend */ + unsigned int is_blktap; +#endif + struct list_head blkdev_list; + spinlock_t blk_ring_lock; + atomic_t refcnt; + + struct work_struct free_work; + u16 shmem_handle; + unsigned long shmem_vaddr; + grant_ref_t shmem_ref; +} blkif_t; + +void blkif_create(blkif_be_create_t *create); +void blkif_destroy(blkif_be_destroy_t *destroy); +void blkif_connect(blkif_be_connect_t *connect); +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); +void blkif_disconnect_complete(blkif_t *blkif); +blkif_t *alloc_blkif(domid_t domid); +void free_blkif_callback(blkif_t *blkif); +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ + free_blkif_callback(_b); \ + } while (0) + +/* Create a vbd. */ +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice, + int readonly); +void vbd_free(struct vbd *vbd); + +unsigned long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); + +void blkif_interface_init(void); + +void blkif_deschedule(blkif_t *blkif); + +void blkif_xenbus_init(void); + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,141 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" +#include <asm-xen/evtchn.h> + +static kmem_cache_t *blkif_cachep; + +blkif_t *alloc_blkif(domid_t domid) +{ + blkif_t *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + blkif->status = DISCONNECTED; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + + return blkif; +} + +static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, + unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + op.host_addr = localaddr; + op.flags = GNTMAP_host_map; + op.ref = shared_page; + op.dom = blkif->domid; + + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); + + if (op.handle < 0) { + DPRINTK(" Grant table operation failure !\n"); + return op.handle; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + blkif->shmem_vaddr = localaddr; + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + + op.host_addr = blkif->shmem_vaddr; + op.handle = blkif->shmem_handle; + op.dev_bus_addr = 0; + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); +} + +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +{ + struct vm_struct *vma; + blkif_sring_t *sring; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; + int err; + + BUG_ON(blkif->remote_evtchn); + + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page); + if (err) { + vfree(vma->addr); + return err; + } + + op.u.bind_interdomain.dom1 = DOMID_SELF; + op.u.bind_interdomain.dom2 = blkif->domid; + op.u.bind_interdomain.port1 = 0; + op.u.bind_interdomain.port2 = evtchn; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + unmap_frontend_page(blkif); + vfree(vma->addr); + return err; + } + + blkif->evtchn = op.u.bind_interdomain.port1; + blkif->remote_evtchn = evtchn; + + sring = (blkif_sring_t *)vma->addr; + SHARED_RING_INIT(sring); + BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); + + bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend", + blkif); + blkif->status = CONNECTED; + blkif->shmem_frame = shared_page; + + return 0; +} + +static void free_blkif(void *arg) +{ + evtchn_op_t op = { .cmd = EVTCHNOP_close }; + blkif_t *blkif = (blkif_t *)arg; + + op.u.close.port = blkif->evtchn; + op.u.close.dom = DOMID_SELF; + HYPERVISOR_event_channel_op(&op); + op.u.close.port = blkif->remote_evtchn; + op.u.close.dom = blkif->domid; + HYPERVISOR_event_channel_op(&op); + + if (blkif->evtchn) + unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); + + if (blkif->blk_ring.sring) { + unmap_frontend_page(blkif); + vfree(blkif->blk_ring.sring); + blkif->blk_ring.sring = NULL; + } + + kmem_cache_free(blkif_cachep, blkif); +} + +void free_blkif_callback(blkif_t *blkif) +{ + INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif); + schedule_work(&blkif->free_work); +} + +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,225 @@ +/* Xenbus code for blkif tap + + A Warfield. + + Hastily modified from the oroginal backend code: + + Copyright (C) 2005 Rusty Russell <rusty@xxxxxxxxxxxxxxx> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <stdarg.h> +#include <linux/module.h> +#include <asm-xen/xenbus.h> +#include "common.h" + +struct backend_info +{ + struct xenbus_device *dev; + + /* our communications channel */ + blkif_t *blkif; + + long int frontend_id; + + /* watch back end for changes */ + struct xenbus_watch backend_watch; + + /* watch front end for changes */ + struct xenbus_watch watch; + char *frontpath; +}; + +static int blkback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev->data; + + if (be->watch.node) + unregister_xenbus_watch(&be->watch); + unregister_xenbus_watch(&be->backend_watch); + if (be->blkif) + blkif_put(be->blkif); + if (be->frontpath) + kfree(be->frontpath); + kfree(be); + return 0; +} + +/* Front end tells us frame. */ +static void frontend_changed(struct xenbus_watch *watch, const char *node) +{ + unsigned long ring_ref; + unsigned int evtchn; + int err; + struct backend_info *be + = container_of(watch, struct backend_info, watch); + + /* If other end is gone, delete ourself. */ + if (node && !xenbus_exists(be->frontpath, "")) { + xenbus_rm(be->dev->nodename, ""); + device_unregister(&be->dev->dev); + return; + } + if (be->blkif == NULL || be->blkif->status == CONNECTED) + return; + + err = xenbus_gather(be->frontpath, "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_error(be->dev, err, + "reading %s/ring-ref and event-channel", + be->frontpath); + return; + } + + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_error(be->dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + goto abort; + } + + xenbus_dev_ok(be->dev); + + return; + +abort: + xenbus_transaction_end(1); +} + +/* + Setup supplies physical device. + We provide event channel and device details to front end. + Frontend supplies shared frame and event channel. + */ +static void backend_changed(struct xenbus_watch *watch, const char *node) +{ + int err; + char *p; + long int handle; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + + if (be->blkif == NULL) { + /* Front end dir is a number, which is used as the handle. */ + p = strrchr(be->frontpath, '/') + 1; + handle = simple_strtoul(p, NULL, 0); + + be->blkif = alloc_blkif(be->frontend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_error(dev, err, "creating block interface"); + return; + } + + /* Pass in NULL node to skip exist test. */ + frontend_changed(&be->watch, NULL); + } +} + +static int blkback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct backend_info *be; + char *frontend; + int err; + + be = kmalloc(sizeof(*be), GFP_KERNEL); + if (!be) { + xenbus_dev_error(dev, -ENOMEM, "allocating backend structure"); + return -ENOMEM; + } + memset(be, 0, sizeof(*be)); + + frontend = NULL; + err = xenbus_gather(dev->nodename, + "frontend-id", "%li", &be->frontend_id, + "frontend", NULL, &frontend, + NULL); + if (XENBUS_EXIST_ERR(err)) + goto free_be; + if (err < 0) { + xenbus_dev_error(dev, err, + "reading %s/frontend or frontend-id", + dev->nodename); + goto free_be; + } + if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) { + /* If we can't get a frontend path and a frontend-id, + * then our bus-id is no longer valid and we need to + * destroy the backend device. + */ + err = -ENOENT; + goto free_be; + } + + be->dev = dev; + be->backend_watch.node = dev->nodename; + be->backend_watch.callback = backend_changed; + err = register_xenbus_watch(&be->backend_watch); + if (err) { + be->backend_watch.node = NULL; + xenbus_dev_error(dev, err, "adding backend watch on %s", + dev->nodename); + goto free_be; + } + + be->frontpath = frontend; + be->watch.node = be->frontpath; + be->watch.callback = frontend_changed; + err = register_xenbus_watch(&be->watch); + if (err) { + be->watch.node = NULL; + xenbus_dev_error(dev, err, + "adding frontend watch on %s", + be->frontpath); + goto free_be; + } + + dev->data = be; + + backend_changed(&be->backend_watch, dev->nodename); + return 0; + + free_be: + if (be->backend_watch.node) + unregister_xenbus_watch(&be->backend_watch); + if (frontend) + kfree(frontend); + kfree(be); + return err; +} + +static struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + +static struct xenbus_driver blkback = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, +}; + +void blkif_xenbus_init(void) +{ + xenbus_register_backend(&blkback); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,128 @@ +#include <linux/version.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/serial.h> +#include <linux/major.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include <asm-xen/hypervisor.h> +#include <asm-xen/evtchn.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/err.h> +#include "xencons_ring.h" + + +struct ring_head +{ + u32 cons; + u32 prod; + char buf[0]; +} __attribute__((packed)); + + +#define XENCONS_RING_SIZE (PAGE_SIZE/2 - sizeof (struct ring_head)) +#define XENCONS_IDX(cnt) ((cnt) % XENCONS_RING_SIZE) +#define XENCONS_FULL(ring) (((ring)->prod - (ring)->cons) == XENCONS_RING_SIZE) + +static inline struct ring_head *outring(void) +{ + return machine_to_virt(xen_start_info->console_mfn << PAGE_SHIFT); +} + +static inline struct ring_head *inring(void) +{ + return machine_to_virt(xen_start_info->console_mfn << PAGE_SHIFT) + + PAGE_SIZE/2; +} + + +/* don't block - write as much as possible and return */ +static int __xencons_ring_send(struct ring_head *ring, const char *data, unsigned len) +{ + int copied = 0; + + mb(); + while (copied < len && !XENCONS_FULL(ring)) { + ring->buf[XENCONS_IDX(ring->prod)] = data[copied]; + ring->prod++; + copied++; + } + mb(); + + return copied; +} + +int xencons_ring_send(const char *data, unsigned len) +{ + struct ring_head *out = outring(); + int sent = 0; + + sent = __xencons_ring_send(out, data, len); + notify_via_evtchn(xen_start_info->console_evtchn); + return sent; + +} + + +static xencons_receiver_func *xencons_receiver; + +static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs) +{ + struct ring_head *ring = inring(); + while (ring->cons < ring->prod) { + if (xencons_receiver != NULL) { + xencons_receiver(ring->buf + XENCONS_IDX(ring->cons), + 1, regs); + } + ring->cons++; + } + return IRQ_HANDLED; +} + +void xencons_ring_register_receiver(xencons_receiver_func *f) +{ + xencons_receiver = f; +} + +int xencons_ring_init(void) +{ + int err; + + if (!xen_start_info->console_evtchn) + return 0; + + err = bind_evtchn_to_irqhandler(xen_start_info->console_evtchn, + handle_input, 0, "xencons", inring()); + if (err) { + xprintk("XEN console request irq failed %i\n", err); + return err; + } + + return 0; +} + +void xencons_suspend(void) +{ + + if (!xen_start_info->console_evtchn) + return; + + unbind_evtchn_from_irqhandler(xen_start_info->console_evtchn, + inring()); +} + +void xencons_resume(void) +{ + + (void)xencons_ring_init(); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,14 @@ +#ifndef _XENCONS_RING_H +#define _XENCONS_RING_H + +asmlinkage int xprintk(const char *fmt, ...); + + +int xencons_ring_init(void); +int xencons_ring_send(const char *data, unsigned len); + +typedef void (xencons_receiver_func)(char *buf, unsigned len, + struct pt_regs *regs); +void xencons_ring_register_receiver(xencons_receiver_func *f); + +#endif /* _XENCONS_RING_H */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmback/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,4 @@ + +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmbk.o + +tpmbk-y += tpmback.o interface.o xenbus.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmback/common.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,88 @@ +/****************************************************************************** + * drivers/xen/tpmback/common.h + */ + +#ifndef __NETIF__BACKEND__COMMON_H__ +#define __NETIF__BACKEND__COMMON_H__ + +#include <linux/config.h> +#include <linux/version.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <asm-xen/evtchn.h> +#include <asm-xen/xen-public/io/tpmif.h> +#include <asm/io.h> +#include <asm/pgalloc.h> +#include <asm-xen/xen-public/io/domain_controller.h> + +#if 0 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define ASSERT(_p) ((void)0) +#define DPRINTK(_f, _a...) ((void)0) +#endif + +typedef struct tpmif_st { + struct list_head tpmif_list; + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + + /* Physical parameters of the comms window. */ + unsigned long tx_shmem_frame; + unsigned int evtchn; + unsigned int remote_evtchn; + + /* The shared rings and indexes. */ + tpmif_tx_interface_t *tx; + + /* Miscellaneous private stuff. */ + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + int active; + + struct tpmif_st *hash_next; + struct list_head list; /* scheduling list */ + atomic_t refcnt; + + long int tpm_instance; + unsigned long mmap_vstart; + + struct work_struct work; + + u16 shmem_handle; + unsigned long shmem_vaddr; + grant_ref_t shmem_ref; + +} tpmif_t; + +void tpmif_disconnect_complete(tpmif_t * tpmif); +tpmif_t *tpmif_find(domid_t domid, long int instance); +void tpmif_interface_init(void); +void tpmif_schedule_work(tpmif_t * tpmif); +void tpmif_deschedule_work(tpmif_t * tpmif); +void tpmif_xenbus_init(void); +int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); +irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs); +int tpmif_vtpm_open(tpmif_t *tpmif, domid_t domain, u32 instance); +int tpmif_vtpm_close(u32 instance); + +int vtpm_release_packets(tpmif_t * tpmif, int send_msgs); + +#define tpmif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define tpmif_put(_b) \ + do { \ + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ + tpmif_disconnect_complete(_b); \ + } while (0) + + +extern int num_frontends; + +#define MMAP_VADDR(t,_req) ((t)->mmap_vstart + ((_req) * PAGE_SIZE)) + +#endif /* __TPMIF__BACKEND__COMMON_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,200 @@ +/****************************************************************************** + * drivers/xen/tpmback/interface.c + * + * Vritual TPM interface management. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@xxxxxxxxxx + * + * This code has been derived from drivers/xen/netback/interface.c + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" +#include <asm-xen/balloon.h> + +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) + +#define TPMIF_HASHSZ (2 << 5) +#define TPMIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(TPMIF_HASHSZ-1)) + +static kmem_cache_t *tpmif_cachep; +int num_frontends = 0; +LIST_HEAD(tpmif_list); + + +tpmif_t *alloc_tpmif(domid_t domid, long int instance) +{ + struct page *page; + tpmif_t *tpmif; + + tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL); + if (!tpmif) + return ERR_PTR(-ENOMEM); + + memset(tpmif, 0, sizeof(*tpmif)); + tpmif->domid = domid; + tpmif->status = DISCONNECTED; + tpmif->tpm_instance = instance; + atomic_set(&tpmif->refcnt, 1); + + page = balloon_alloc_empty_page_range(TPMIF_TX_RING_SIZE); + BUG_ON(page == NULL); + tpmif->mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + + list_add(&tpmif->tpmif_list, &tpmif_list); + num_frontends++; + + return tpmif; +} + + +void free_tpmif(tpmif_t *tpmif) +{ + num_frontends--; + list_del(&tpmif->tpmif_list); + kmem_cache_free(tpmif_cachep, tpmif); +} + + +tpmif_t *tpmif_find(domid_t domid, long int instance) +{ + tpmif_t *tpmif; + + list_for_each_entry(tpmif, &tpmif_list, tpmif_list) { + if (tpmif->tpm_instance == instance) { + if (tpmif->domid == domid) { + tpmif_get(tpmif); + return tpmif; + } else { + return NULL; + } + } + } + + return alloc_tpmif(domid, instance); +} + + +static int map_frontend_page(tpmif_t *tpmif, unsigned long localaddr, + unsigned long shared_page) +{ + struct gnttab_map_grant_ref op = { + .host_addr = localaddr, + .flags = GNTMAP_host_map, + .ref = shared_page, + .dom = tpmif->domid, + }; + + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); + + if (op.handle < 0) { + DPRINTK(" Grant table operation failure !\n"); + return op.handle; + } + + tpmif->shmem_ref = shared_page; + tpmif->shmem_handle = op.handle; + tpmif->shmem_vaddr = localaddr; + return 0; +} + + +static void unmap_frontend_page(tpmif_t *tpmif) +{ + struct gnttab_unmap_grant_ref op; + + op.host_addr = tpmif->shmem_vaddr; + op.handle = tpmif->shmem_handle; + op.dev_bus_addr = 0; + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); +} + + +int tpmif_map(tpmif_t *tpmif, + unsigned long shared_page, unsigned int evtchn) +{ + struct vm_struct *vma; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; + int err; + + BUG_ON(tpmif->remote_evtchn); + + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(tpmif, + VMALLOC_VMADDR(vma->addr), + shared_page); + if (err) { + vfree(vma->addr); + return err; + } + + op.u.bind_interdomain.dom1 = DOMID_SELF; + op.u.bind_interdomain.dom2 = tpmif->domid; + op.u.bind_interdomain.port1 = 0; + op.u.bind_interdomain.port2 = evtchn; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + unmap_frontend_page(tpmif); + vfree(vma->addr); + return err; + } + + tpmif->evtchn = op.u.bind_interdomain.port1; + tpmif->remote_evtchn = evtchn; + + tpmif->tx = (tpmif_tx_interface_t *) vma->addr; + + bind_evtchn_to_irqhandler(tpmif->evtchn, + tpmif_be_int, + 0, + "tpmif-backend", + tpmif); + tpmif->status = CONNECTED; + tpmif->shmem_ref = shared_page; + tpmif->active = 1; + + return 0; +} + + +static void __tpmif_disconnect_complete(void *arg) +{ + evtchn_op_t op = { .cmd = EVTCHNOP_close }; + tpmif_t *tpmif = (tpmif_t *) arg; + + op.u.close.port = tpmif->evtchn; + op.u.close.dom = DOMID_SELF; + HYPERVISOR_event_channel_op(&op); + op.u.close.port = tpmif->remote_evtchn; + op.u.close.dom = tpmif->domid; + HYPERVISOR_event_channel_op(&op); + + if (tpmif->evtchn) + unbind_evtchn_from_irqhandler(tpmif->evtchn, tpmif); + + if (tpmif->tx) { + unmap_frontend_page(tpmif); + vfree(tpmif->tx); + } + + free_tpmif(tpmif); +} + + +void tpmif_disconnect_complete(tpmif_t * tpmif) +{ + INIT_WORK(&tpmif->work, __tpmif_disconnect_complete, (void *)tpmif); + schedule_work(&tpmif->work); +} + + +void __init tpmif_interface_init(void) +{ + tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof(tpmif_t), + 0, 0, NULL, NULL); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,1077 @@ +/****************************************************************************** + * drivers/xen/tpmback/tpmback.c + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@xxxxxxxxxx + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from drivers/xen/netback/netback.c + * Copyright (c) 2002-2004, K A Fraser + * + */ + +#include "common.h" +#include <asm-xen/evtchn.h> + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/miscdevice.h> +#include <asm/uaccess.h> +#include <asm-xen/xenbus.h> +#include <asm-xen/xen-public/grant_table.h> + + +struct data_exchange { + struct list_head pending_pak; + struct list_head current_pak; + unsigned int copied_so_far; + u8 has_opener; + rwlock_t pak_lock; // protects all of the previous fields + wait_queue_head_t wait_queue; +}; + +struct packet { + struct list_head next; + unsigned int data_len; + u8 *data_buffer; + tpmif_t *tpmif; + u32 tpm_instance; + u8 req_tag; + u32 last_read; + u8 flags; + struct timer_list processing_timer; +}; + +enum { + PACKET_FLAG_DISCARD_RESPONSE = 1, + PACKET_FLAG_SEND_CONTROLMESSAGE = 2, +}; + +static struct data_exchange dataex; + +/* local function prototypes */ +static int vtpm_queue_packet(struct packet *pak); +static int _packet_write(struct packet *pak, + const char *data, size_t size, + int userbuffer); +static void processing_timeout(unsigned long ptr); +static int packet_read_shmem(struct packet *pak, + tpmif_t *tpmif, + u32 offset, + char *buffer, + int isuserbuffer, + u32 left); + + +#define MAX_PENDING_REQS TPMIF_TX_RING_SIZE + +static multicall_entry_t tx_mcl[MAX_PENDING_REQS]; + +#define MIN(x,y) (x) < (y) ? (x) : (y) + +/*************************************************************** + Packet-related functions +***************************************************************/ + +static struct packet * +packet_find_instance(struct list_head *head, u32 tpm_instance) +{ + struct packet *pak; + struct list_head *p; + /* + * traverse the list of packets and return the first + * one with the given instance number + */ + list_for_each(p, head) { + pak = list_entry(p, struct packet, next); + if (pak->tpm_instance == tpm_instance) { + return pak; + } + } + return NULL; +} + +static struct packet * +packet_find_packet(struct list_head *head, void *packet) +{ + struct packet *pak; + struct list_head *p; + /* + * traverse the list of packets and return the first + * one with the given instance number + */ + list_for_each(p, head) { + pak = list_entry(p, struct packet, next); + if (pak == packet) { + return pak; + } + } + return NULL; +} + +static struct packet * +packet_alloc(tpmif_t *tpmif, u32 size, u8 req_tag, u8 flags) +{ + struct packet *pak = NULL; + pak = kmalloc(sizeof(struct packet), + GFP_KERNEL); + if (NULL != pak) { + memset(pak, 0x0, sizeof(*pak)); + if (tpmif) { + pak->tpmif = tpmif; + pak->tpm_instance = tpmif->tpm_instance; + } + pak->data_len = size; + pak->req_tag = req_tag; + pak->last_read = 0; + pak->flags = flags; + + /* + * cannot do tpmif_get(tpmif); bad things happen + * on the last tpmif_put() + */ + init_timer(&pak->processing_timer); + pak->processing_timer.function = processing_timeout; + pak->processing_timer.data = (unsigned long)pak; + } + return pak; +} + +static void inline +packet_reset(struct packet *pak) +{ + pak->last_read = 0; +} + +static void inline +packet_free(struct packet *pak) +{ + del_singleshot_timer_sync(&pak->processing_timer); + if (pak->data_buffer) { + kfree(pak->data_buffer); + } + /* + * cannot do tpmif_put(pak->tpmif); bad things happen + * on the last tpmif_put() + */ + kfree(pak); +} + +static int +packet_set(struct packet *pak, + const unsigned char *buffer, u32 size) +{ + int rc = 0; + unsigned char *buf = kmalloc(size, GFP_KERNEL); + if (NULL != buf) { + pak->data_buffer = buf; + memcpy(buf, buffer, size); + pak->data_len = size; + } else { + rc = -ENOMEM; + } + return rc; +} + + +/* + * Write data to the shared memory and send it to the FE. + */ +static int +packet_write(struct packet *pak, + const char *data, size_t size, + int userbuffer) +{ + int rc = 0; + + DPRINTK("Supposed to send %d bytes to front-end!\n", + size); + + if (0 != (pak->flags & PACKET_FLAG_SEND_CONTROLMESSAGE)) { +#ifdef CONFIG_XEN_TPMDEV_CLOSE_IF_VTPM_FAILS + u32 res; + memcpy(&res, &data[2+4], sizeof(res)); + if (res != 0) { + /* + * Will close down this device and have the + * FE notified about closure. + */ + } +#endif + } + + if (0 != (pak->flags & PACKET_FLAG_DISCARD_RESPONSE)) { + /* Don't send a respone to this packet. Just acknowledge it. */ + rc = size; + } else { + rc = _packet_write(pak, data, size, userbuffer); + } + + return rc; +} + + +static int +_packet_write(struct packet *pak, + const char *data, size_t size, + int userbuffer) +{ + /* + * Write into the shared memory pages directly + * and send it to the front end. + */ + tpmif_t *tpmif = pak->tpmif; + u16 handle; + int rc = 0; + unsigned int i = 0; + unsigned int offset = 0; + multicall_entry_t *mcl; + + if (tpmif == NULL) + return -EFAULT; + + if (tpmif->status != CONNECTED) { + return size; + } + + mcl = tx_mcl; + while (offset < size && i < TPMIF_TX_RING_SIZE) { + unsigned int tocopy; + struct gnttab_map_grant_ref map_op; + struct gnttab_unmap_grant_ref unmap_op; + tpmif_tx_request_t *tx; + + tx = &tpmif->tx->ring[i].req; + + if (0 == tx->addr) { + DPRINTK("ERROR: Buffer for outgoing packet NULL?! i=%d\n", i); + return 0; + } + + map_op.host_addr = MMAP_VADDR(tpmif, i); + map_op.flags = GNTMAP_host_map; + map_op.ref = tx->ref; + map_op.dom = tpmif->domid; + + if(unlikely( + HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, + &map_op, + 1))) { + BUG(); + } + + handle = map_op.handle; + + if (map_op.handle < 0) { + DPRINTK(" Grant table operation failure !\n"); + return 0; + } + phys_to_machine_mapping[__pa(MMAP_VADDR(tpmif,i)) >> + PAGE_SHIFT] = + FOREIGN_FRAME(map_op.dev_bus_addr >> PAGE_SHIFT); + + tocopy = size - offset; + if (tocopy > PAGE_SIZE) { + tocopy = PAGE_SIZE; + } + if (userbuffer) { + if (copy_from_user((void *)(MMAP_VADDR(tpmif,i) | + (tx->addr & ~PAGE_MASK)), + (void __user *)&data[offset], + tocopy)) { + tpmif_put(tpmif); + return -EFAULT; + } + } else { + memcpy((void *)(MMAP_VADDR(tpmif,i) | + (tx->addr & ~PAGE_MASK)), + &data[offset], tocopy); + } + tx->size = tocopy; + + unmap_op.host_addr = MMAP_VADDR(tpmif, i); + unmap_op.handle = handle; + unmap_op.dev_bus_addr = 0; + + if(unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + &unmap_op, + 1))) { + BUG(); + } + + offset += tocopy; + i++; + } + + rc = offset; + DPRINTK("Notifying frontend via event channel %d\n", + tpmif->evtchn); + notify_via_evtchn(tpmif->evtchn); + + return rc; +} + +/* + * Read data from the shared memory and copy it directly into the + * provided buffer. Advance the read_last indicator which tells + * how many bytes have already been read. + */ +static int +packet_read(struct packet *pak, size_t numbytes, + char *buffer, size_t buffersize, + int userbuffer) +{ + tpmif_t *tpmif = pak->tpmif; + /* + * I am supposed to read 'numbytes' of data from the + * buffer. + * The first 4 bytes that are read are the instance number in + * network byte order, after that comes the data from the + * shared memory buffer. + */ + u32 to_copy; + u32 offset = 0; + u32 room_left = buffersize; + /* + * Ensure that we see the request when we copy it. + */ + mb(); + + if (pak->last_read < 4) { + /* + * copy the instance number into the buffer + */ + u32 instance_no = htonl(pak->tpm_instance); + u32 last_read = pak->last_read; + to_copy = MIN(4 - last_read, numbytes); + + if (userbuffer) { + if (copy_to_user(&buffer[0], + &(((u8 *)&instance_no)[last_read]), + to_copy)) { + return -EFAULT; + } + } else { + memcpy(&buffer[0], + &(((u8 *)&instance_no)[last_read]), + to_copy); + } + + pak->last_read += to_copy; + offset += to_copy; + room_left -= to_copy; + } + + /* + * If the packet has a data buffer appended, read from it... + */ + + if (room_left > 0) { + if (pak->data_buffer) { + u32 to_copy = MIN(pak->data_len - offset, room_left); + u32 last_read = pak->last_read - 4; + if (userbuffer) { + if (copy_to_user(&buffer[offset], + &pak->data_buffer[last_read], + to_copy)) { + return -EFAULT; + } + } else { + memcpy(&buffer[offset], + &pak->data_buffer[last_read], + to_copy); + } + pak->last_read += to_copy; + offset += to_copy; + } else { + offset = packet_read_shmem(pak, + tpmif, + offset, + buffer, + userbuffer, + room_left); + } + } + return offset; +} + + +static int +packet_read_shmem(struct packet *pak, + tpmif_t *tpmif, + u32 offset, + char *buffer, + int isuserbuffer, + u32 room_left) { + u32 last_read = pak->last_read - 4; + u32 i = (last_read / PAGE_SIZE); + u32 pg_offset = last_read & (PAGE_SIZE - 1); + u32 to_copy; + u16 handle; + + tpmif_tx_request_t *tx; + tx = &tpmif->tx->ring[0].req; + /* + * Start copying data at the page with index 'index' + * and within that page at offset 'offset'. + * Copy a maximum of 'room_left' bytes. + */ + to_copy = MIN(PAGE_SIZE - pg_offset, room_left); + while (to_copy > 0) { + void *src; + struct gnttab_map_grant_ref map_op; + struct gnttab_unmap_grant_ref unmap_op; + + tx = &tpmif->tx->ring[i].req; + + map_op.host_addr = MMAP_VADDR(tpmif, i); + map_op.flags = GNTMAP_host_map; + map_op.ref = tx->ref; + map_op.dom = tpmif->domid; + + if(unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + &map_op, + 1))) { + BUG(); + } + + if (map_op.handle < 0) { + DPRINTK(" Grant table operation failure !\n"); + return -EFAULT; + } + + handle = map_op.handle; + + if (to_copy > tx->size) { + /* + * This is the case when the user wants to read more + * than what we have. So we just give him what we + * have. + */ + to_copy = MIN(tx->size, to_copy); + } + + DPRINTK("Copying from mapped memory at %08lx\n", + (unsigned long)(MMAP_VADDR(tpmif,i) | + (tx->addr & ~PAGE_MASK))); + + src = (void *)(MMAP_VADDR(tpmif,i) | ((tx->addr & ~PAGE_MASK) + pg_offset)); + if (isuserbuffer) { + if (copy_to_user(&buffer[offset], + src, + to_copy)) { + return -EFAULT; + } + } else { + memcpy(&buffer[offset], + src, + to_copy); + } + + + DPRINTK("Data from TPM-FE of domain %d are %d %d %d %d\n", + tpmif->domid, buffer[offset], buffer[offset+1],buffer[offset+2],buffer[offset+3]); + + unmap_op.host_addr = MMAP_VADDR(tpmif, i); + unmap_op.handle = handle; + unmap_op.dev_bus_addr = 0; + + if(unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + &unmap_op, + 1))) { + BUG(); + } + + offset += to_copy; + pg_offset = 0; + last_read += to_copy; + room_left -= to_copy; + + to_copy = MIN(PAGE_SIZE, room_left); + i++; + } /* while (to_copy > 0) */ + /* + * Adjust the last_read pointer + */ + pak->last_read = last_read + 4; + return offset; +} + + +/* ============================================================ + * The file layer for reading data from this device + * ============================================================ + */ +static int +vtpm_op_open(struct inode *inode, struct file *f) +{ + int rc = 0; + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + if (dataex.has_opener == 0) { + dataex.has_opener = 1; + } else { + rc = -EPERM; + } + write_unlock_irqrestore(&dataex.pak_lock, flags); + return rc; +} + +static ssize_t +vtpm_op_read(struct file *file, + char __user * data, size_t size, loff_t * offset) +{ + int ret_size = -ENODATA; + struct packet *pak = NULL; + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + + if (list_empty(&dataex.pending_pak)) { + write_unlock_irqrestore(&dataex.pak_lock, flags); + wait_event_interruptible(dataex.wait_queue, + !list_empty(&dataex.pending_pak)); + write_lock_irqsave(&dataex.pak_lock, flags); + } + + if (!list_empty(&dataex.pending_pak)) { + unsigned int left; + pak = list_entry(dataex.pending_pak.next, struct packet, next); + + left = pak->data_len - dataex.copied_so_far; + + DPRINTK("size given by app: %d, available: %d\n", size, left); + + ret_size = MIN(size,left); + + ret_size = packet_read(pak, ret_size, data, size, 1); + if (ret_size < 0) { + ret_size = -EFAULT; + } else { + DPRINTK("Copied %d bytes to user buffer\n", ret_size); + + dataex.copied_so_far += ret_size; + if (dataex.copied_so_far >= pak->data_len + 4) { + DPRINTK("All data from this packet given to app.\n"); + /* All data given to app */ + + del_singleshot_timer_sync(&pak->processing_timer); + list_del(&pak->next); + list_add_tail(&pak->next, &dataex.current_pak); + /* + * The more fontends that are handled at the same time, + * the more time we give the TPM to process the request. + */ + mod_timer(&pak->processing_timer, + jiffies + (num_frontends * 10 * HZ)); + dataex.copied_so_far = 0; + } + } + } + write_unlock_irqrestore(&dataex.pak_lock, flags); + + DPRINTK("Returning result from read to app: %d\n", ret_size); + + return ret_size; +} + +/* + * Write operation - only works after a previous read operation! + */ +static ssize_t +vtpm_op_write(struct file *file, const char __user * data, size_t size, + loff_t * offset) +{ + struct packet *pak; + int rc = 0; + unsigned int off = 4; + unsigned long flags; + u32 instance_no = 0; + u32 len_no = 0; + + /* + * Minimum required packet size is: + * 4 bytes for instance number + * 2 bytes for tag + * 4 bytes for paramSize + * 4 bytes for the ordinal + * sum: 14 bytes + */ + if ( size < off + 10 ) { + return -EFAULT; + } + + if (copy_from_user(&instance_no, + (void __user *)&data[0], + 4)) { + return -EFAULT; + } + + if (copy_from_user(&len_no, + (void __user *)&data[off+2], + 4) || + (off + ntohl(len_no) != size)) { + return -EFAULT; + } + + write_lock_irqsave(&dataex.pak_lock, flags); + pak = packet_find_instance(&dataex.current_pak, ntohl(instance_no)); + + if (pak == NULL) { + write_unlock_irqrestore(&dataex.pak_lock, flags); + printk(KERN_ALERT "No associated packet!\n"); + return -EFAULT; + } else { + del_singleshot_timer_sync(&pak->processing_timer); + list_del(&pak->next); + } + + write_unlock_irqrestore(&dataex.pak_lock, flags); + + /* + * The first 'offset' bytes must be the instance number. + * I will just pull that from the packet. + */ + size -= off; + data = &data[off]; + + rc = packet_write(pak, data, size, 1); + + if (rc > 0) { + /* I neglected the first 4 bytes */ + rc += off; + } + packet_free(pak); + return rc; +} + +static int +vtpm_op_release(struct inode *inode, struct file *file) +{ + unsigned long flags; + vtpm_release_packets(NULL, 1); + write_lock_irqsave(&dataex.pak_lock, flags); + dataex.has_opener = 0; + write_unlock_irqrestore(&dataex.pak_lock, flags); + return 0; +} + +static unsigned int +vtpm_op_poll(struct file *file, struct poll_table_struct *pst) +{ + return 0; +} + +static struct file_operations vtpm_ops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = vtpm_op_open, + .read = vtpm_op_read, + .write = vtpm_op_write, + .release = vtpm_op_release, + .poll = vtpm_op_poll, +}; + +static struct miscdevice ibmvtpms_miscdevice = { + .minor = 225, + .name = "vtpm", + .fops = &vtpm_ops, +}; + + +/*************************************************************** + Virtual TPM functions and data stuctures +***************************************************************/ + +static u8 create_cmd[] = { + 1,193, /* 0: TPM_TAG_RQU_COMMAMD */ + 0,0,0,19, /* 2: length */ + 0,0,0,0x1, /* 6: VTPM_ORD_OPEN */ + 0, /* 10: VTPM type */ + 0,0,0,0, /* 11: domain id */ + 0,0,0,0 /* 15: instance id */ +}; + +static u8 destroy_cmd[] = { + 1,193, /* 0: TPM_TAG_RQU_COMMAMD */ + 0,0,0,14, /* 2: length */ + 0,0,0,0x2, /* 6: VTPM_ORD_CLOSE */ + 0,0,0,0 /* 10: instance id */ +}; + +int tpmif_vtpm_open(tpmif_t *tpmif, domid_t domid, u32 instance) +{ + int rc = 0; + struct packet *pak = packet_alloc(tpmif, sizeof(create_cmd), create_cmd[0], + PACKET_FLAG_DISCARD_RESPONSE| + PACKET_FLAG_SEND_CONTROLMESSAGE); + if (pak) { + u8 buf[sizeof(create_cmd)]; + u32 domid_no = htonl((u32)domid); + u32 instance_no = htonl(instance); + memcpy(buf, create_cmd, sizeof(create_cmd)); + + memcpy(&buf[11], &domid_no, sizeof(u32)); + memcpy(&buf[15], &instance_no, sizeof(u32)); + + /* copy the buffer into the packet */ + rc = packet_set(pak, buf, sizeof(buf)); + + if (rc == 0) { + pak->tpm_instance = 0; + rc = vtpm_queue_packet(pak); + } + if (rc < 0) { + /* could not be queued or built */ + packet_free(pak); + } + } else { + rc = -ENOMEM; + } + return rc; +} + +int tpmif_vtpm_close(u32 instid) +{ + int rc = 0; + struct packet *pak; + + pak = packet_alloc(NULL, + sizeof(create_cmd), + create_cmd[0], + PACKET_FLAG_DISCARD_RESPONSE| + PACKET_FLAG_SEND_CONTROLMESSAGE); + if (pak) { + u8 buf[sizeof(destroy_cmd)]; + u32 instid_no = htonl(instid); + memcpy(buf, destroy_cmd, sizeof(destroy_cmd)); + memcpy(&buf[10], &instid_no, sizeof(u32)); + + /* copy the buffer into the packet */ + rc = packet_set(pak, buf, sizeof(buf)); + + if (rc == 0) { + pak->tpm_instance = 0; + rc = vtpm_queue_packet(pak); + } + if (rc < 0) { + /* could not be queued or built */ + packet_free(pak); + } + } else { + rc = -ENOMEM; + } + return rc; +} + + +/*************************************************************** + Utility functions +***************************************************************/ + +static int +tpm_send_fail_message(struct packet *pak, u8 req_tag) +{ + int rc; + static const unsigned char tpm_error_message_fail[] = { + 0x00, 0x00, + 0x00, 0x00, 0x00, 0x0a, + 0x00, 0x00, 0x00, 0x09 /* TPM_FAIL */ + }; + unsigned char buffer[sizeof(tpm_error_message_fail)]; + + memcpy(buffer, tpm_error_message_fail, sizeof(tpm_error_message_fail)); + /* + * Insert the right response tag depending on the given tag + * All response tags are '+3' to the request tag. + */ + buffer[1] = req_tag + 3; + + /* + * Write the data to shared memory and notify the front-end + */ + rc = packet_write(pak, buffer, sizeof(buffer), 0); + + return rc; +} + + +static void +_vtpm_release_packets(struct list_head *head, tpmif_t *tpmif, + int send_msgs) +{ + struct packet *pak; + struct list_head *pos, *tmp; + + list_for_each_safe(pos, tmp, head) { + pak = list_entry(pos, struct packet, next); + if (tpmif == NULL || pak->tpmif == tpmif) { + int can_send = 0; + del_singleshot_timer_sync(&pak->processing_timer); + list_del(&pak->next); + + if (pak->tpmif && pak->tpmif->status == CONNECTED) { + can_send = 1; + } + + if (send_msgs && can_send) { + tpm_send_fail_message(pak, pak->req_tag); + } + packet_free(pak); + } + } +} + + +int +vtpm_release_packets(tpmif_t *tpmif, int send_msgs) +{ + unsigned long flags; + + write_lock_irqsave(&dataex.pak_lock, flags); + + _vtpm_release_packets(&dataex.pending_pak, tpmif, send_msgs); + _vtpm_release_packets(&dataex.current_pak, tpmif, send_msgs); + + write_unlock_irqrestore(&dataex.pak_lock, + flags); + return 0; +} + + +static int vtpm_queue_packet(struct packet *pak) +{ + int rc = 0; + if (dataex.has_opener) { + unsigned long flags; + write_lock_irqsave(&dataex.pak_lock, flags); + list_add_tail(&pak->next, &dataex.pending_pak); + /* give the TPM some time to pick up the request */ + mod_timer(&pak->processing_timer, jiffies + (10 * HZ)); + write_unlock_irqrestore(&dataex.pak_lock, + flags); + + wake_up_interruptible(&dataex.wait_queue); + } else { + rc = -EFAULT; + } + return rc; +} + + +static int vtpm_receive(tpmif_t *tpmif, u32 size) +{ + int rc = 0; + unsigned char buffer[10]; + __be32 *native_size; + + struct packet *pak = packet_alloc(tpmif, size, buffer[4], 0); + if (NULL == pak) { + return -ENOMEM; + } + /* + * Read 10 bytes from the received buffer to test its + * content for validity. + */ + if (sizeof(buffer) != packet_read(pak, + sizeof(buffer), buffer, + sizeof(buffer), 0)) { + goto failexit; + } + /* + * Reset the packet read pointer so we can read all its + * contents again. + */ + packet_reset(pak); + + native_size = (__force __be32 *)(&buffer[4+2]); + /* + * Verify that the size of the packet is correct + * as indicated and that there's actually someone reading packets. + * The minimum size of the packet is '10' for tag, size indicator + * and ordinal. + */ + if (size < 10 || + be32_to_cpu(*native_size) != size || + 0 == dataex.has_opener) { + rc = -EINVAL; + goto failexit; + } else { + if ((rc = vtpm_queue_packet(pak)) < 0) { + goto failexit; + } + } + return 0; + +failexit: + if (pak) { + tpm_send_fail_message(pak, buffer[4+1]); + packet_free(pak); + } + return rc; +} + + +/* + * Timeout function that gets invoked when a packet has not been processed + * during the timeout period. + * The packet must be on a list when this function is invoked. This + * also means that once its taken off a list, the timer must be + * destroyed as well. + */ +static void processing_timeout(unsigned long ptr) +{ + struct packet *pak = (struct packet *)ptr; + unsigned long flags; + write_lock_irqsave(&dataex.pak_lock, flags); + /* + * The packet needs to be searched whether it + * is still on the list. + */ + if (pak == packet_find_packet(&dataex.pending_pak, pak) || + pak == packet_find_packet(&dataex.current_pak, pak) ) { + list_del(&pak->next); + tpm_send_fail_message(pak, pak->req_tag); + packet_free(pak); + } + + write_unlock_irqrestore(&dataex.pak_lock, flags); +} + + + +static void tpm_tx_action(unsigned long unused); +static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0); + +#define MAX_PENDING_REQS TPMIF_TX_RING_SIZE + +static struct list_head tpm_schedule_list; +static spinlock_t tpm_schedule_list_lock; + +static inline void +maybe_schedule_tx_action(void) +{ + smp_mb(); + tasklet_schedule(&tpm_tx_tasklet); +} + +static inline int +__on_tpm_schedule_list(tpmif_t * tpmif) +{ + return tpmif->list.next != NULL; +} + +static void +remove_from_tpm_schedule_list(tpmif_t * tpmif) +{ + spin_lock_irq(&tpm_schedule_list_lock); + if (likely(__on_tpm_schedule_list(tpmif))) { + list_del(&tpmif->list); + tpmif->list.next = NULL; + tpmif_put(tpmif); + } + spin_unlock_irq(&tpm_schedule_list_lock); +} + +static void +add_to_tpm_schedule_list_tail(tpmif_t * tpmif) +{ + if (__on_tpm_schedule_list(tpmif)) + return; + + spin_lock_irq(&tpm_schedule_list_lock); + if (!__on_tpm_schedule_list(tpmif) && tpmif->active) { + list_add_tail(&tpmif->list, &tpm_schedule_list); + tpmif_get(tpmif); + } + spin_unlock_irq(&tpm_schedule_list_lock); +} + +void +tpmif_schedule_work(tpmif_t * tpmif) +{ + add_to_tpm_schedule_list_tail(tpmif); + maybe_schedule_tx_action(); +} + +void +tpmif_deschedule_work(tpmif_t * tpmif) +{ + remove_from_tpm_schedule_list(tpmif); +} + + +static void +tpm_tx_action(unsigned long unused) +{ + struct list_head *ent; + tpmif_t *tpmif; + tpmif_tx_request_t *tx; + + DPRINTK("%s: Getting data from front-end(s)!\n", __FUNCTION__); + + while (!list_empty(&tpm_schedule_list)) { + /* Get a tpmif from the list with work to do. */ + ent = tpm_schedule_list.next; + tpmif = list_entry(ent, tpmif_t, list); + tpmif_get(tpmif); + remove_from_tpm_schedule_list(tpmif); + /* + * Ensure that we see the request when we read from it. + */ + mb(); + + tx = &tpmif->tx->ring[0].req; + + /* pass it up */ + vtpm_receive(tpmif, tx->size); + + tpmif_put(tpmif); + } +} + +irqreturn_t +tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + tpmif_t *tpmif = dev_id; + add_to_tpm_schedule_list_tail(tpmif); + maybe_schedule_tx_action(); + return IRQ_HANDLED; +} + +static int __init +tpmback_init(void) +{ + int rc; + if (!(xen_start_info->flags & SIF_TPM_BE_DOMAIN) && + !(xen_start_info->flags & SIF_INITDOMAIN)) { + printk(KERN_ALERT "Neither TPM-BE Domain nor INIT domain!\n"); + return 0; + } + + if ((rc = misc_register(&ibmvtpms_miscdevice)) != 0) { + printk(KERN_ALERT "Could not register misc device for TPM BE.\n"); + return rc; + } + + INIT_LIST_HEAD(&dataex.pending_pak); + INIT_LIST_HEAD(&dataex.current_pak); + dataex.has_opener = 0; + rwlock_init(&dataex.pak_lock); + init_waitqueue_head(&dataex.wait_queue); + + spin_lock_init(&tpm_schedule_list_lock); + INIT_LIST_HEAD(&tpm_schedule_list); + + tpmif_interface_init(); + tpmif_xenbus_init(); + + printk(KERN_ALERT "Successfully initialized TPM backend driver.\n"); + + return 0; +} + +__initcall(tpmback_init); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,271 @@ +/* Xenbus code for tpmif backend + Copyright (C) 2005 Rusty Russell <rusty@xxxxxxxxxxxxxxx> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <stdarg.h> +#include <linux/module.h> +#include <asm-xen/xenbus.h> +#include "common.h" + +struct backend_info +{ + struct xenbus_device *dev; + + /* our communications channel */ + tpmif_t *tpmif; + + long int frontend_id; + long int instance; // instance of TPM + + /* watch front end for changes */ + struct xenbus_watch backend_watch; + + struct xenbus_watch watch; + char * frontpath; +}; + +static int tpmback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev->data; + + if (be->watch.node) { + unregister_xenbus_watch(&be->watch); + } + unregister_xenbus_watch(&be->backend_watch); + + tpmif_vtpm_close(be->instance); + + if (be->tpmif) { + tpmif_put(be->tpmif); + } + + if (be->frontpath) + kfree(be->frontpath); + kfree(be); + return 0; +} + + +static void frontend_changed(struct xenbus_watch *watch, const char *node) +{ + unsigned long ringref; + unsigned int evtchn; + unsigned long ready = 1; + int err; + struct backend_info *be + = container_of(watch, struct backend_info, watch); + + /* If other end is gone, delete ourself. */ + if (node && !xenbus_exists(be->frontpath, "")) { + xenbus_rm(be->dev->nodename, ""); + device_unregister(&be->dev->dev); + return; + } + + if (be->tpmif == NULL || be->tpmif->status == CONNECTED) + return; + + err = xenbus_gather(be->frontpath, + "ring-ref", "%lu", &ringref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_error(be->dev, err, + "reading %s/ring-ref and event-channel", + be->frontpath); + return; + } + + + /* + * Tell the front-end that we are ready to go - + * unless something bad happens + */ + err = xenbus_transaction_start(be->dev->nodename); + if (err) { + xenbus_dev_error(be->dev, err, "starting transaction"); + return; + } + + err = xenbus_printf(be->dev->nodename, + "ready", "%lu", ready); + if (err) { + xenbus_dev_error(be->dev, err, "writing 'ready'"); + goto abort; + } + + err = tpmif_map(be->tpmif, ringref, evtchn); + if (err) { + xenbus_dev_error(be->dev, err, + "mapping shared-frame %lu port %u", + ringref, evtchn); + goto abort; + } + + err = tpmif_vtpm_open(be->tpmif, + be->frontend_id, + be->instance); + if (err) { + xenbus_dev_error(be->dev, err, + "queueing vtpm open packet"); + /* + * Should close down this device and notify FE + * about closure. + */ + goto abort; + } + + xenbus_transaction_end(0); + xenbus_dev_ok(be->dev); + return; +abort: + xenbus_transaction_end(1); +} + + +static void backend_changed(struct xenbus_watch *watch, const char *node) +{ + int err; + long int instance; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + + err = xenbus_scanf(dev->nodename, "instance", "%li", &instance); + if (XENBUS_EXIST_ERR(err)) + return; + if (err < 0) { + xenbus_dev_error(dev, err, "reading 'instance' variable"); + return; + } + + if (be->instance != -1 && be->instance != instance) { + printk(KERN_WARNING + "cannot change the instance\n"); + return; + } + be->instance = instance; + + if (be->tpmif == NULL) { + be->tpmif = tpmif_find(be->frontend_id, + instance); + if (IS_ERR(be->tpmif)) { + err = PTR_ERR(be->tpmif); + be->tpmif = NULL; + xenbus_dev_error(dev, err, "creating interface"); + return; + } + + /* Pass in NULL node to skip exist test. */ + frontend_changed(&be->watch, be->frontpath); + } +} + + +static int tpmback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + struct backend_info *be; + char *frontend; + int err; + + be = kmalloc(sizeof(*be), GFP_KERNEL); + if (!be) { + xenbus_dev_error(dev, -ENOMEM, "allocating backend structure"); + err = -ENOMEM; + } + + memset(be, 0, sizeof(*be)); + + frontend = NULL; + err = xenbus_gather(dev->nodename, + "frontend-id", "%li", &be->frontend_id, + "frontend", NULL, &frontend, + NULL); + if (XENBUS_EXIST_ERR(err)) + goto free_be; + if (err < 0) { + xenbus_dev_error(dev, err, + "reading %s/frontend or frontend-id", + dev->nodename); + goto free_be; + } + if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) { + /* If we can't get a frontend path and a frontend-id, + * then our bus-id is no longer valid and we need to + * destroy the backend device. + */ + err = -ENOENT; + goto free_be; + } + + be->dev = dev; + be->backend_watch.node = dev->nodename; + be->backend_watch.callback = backend_changed; + be->instance = -1; + err = register_xenbus_watch(&be->backend_watch); + if (err) { + be->backend_watch.node = NULL; + xenbus_dev_error(dev, err, "adding backend watch on %s", + dev->nodename); + goto free_be; + } + + be->frontpath = frontend; + be->watch.node = be->frontpath; + be->watch.callback = frontend_changed; + err = register_xenbus_watch(&be->watch); + if (err) { + be->watch.node = NULL; + xenbus_dev_error(dev, err, + "adding frontend watch on %s", + be->frontpath); + goto free_be; + } + + dev->data = be; + + backend_changed(&be->backend_watch, dev->nodename); + return err; + +free_be: + if (be->backend_watch.node) + unregister_xenbus_watch(&be->backend_watch); + if (frontend) + kfree(frontend); + kfree(be); + return err; +} + + +static struct xenbus_device_id tpmback_ids[] = { + { "vtpm" }, + { "" } +}; + + +static struct xenbus_driver tpmback = { + .name = "vtpm", + .owner = THIS_MODULE, + .ids = tpmback_ids, + .probe = tpmback_probe, + .remove = tpmback_remove, +}; + + +void tpmif_xenbus_init(void) +{ + xenbus_register_backend(&tpmback); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmfront/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,2 @@ + +obj-$(CONFIG_XEN_TPMDEV_FRONTEND) += tpmfront.o diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,738 @@ +/* + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@xxxxxxxxxx + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from drivers/xen/netfront/netfront.c + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/version.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/tpmfe.h> + +#include <asm/semaphore.h> +#include <asm/io.h> +#include <asm-xen/evtchn.h> +#include <asm-xen/xen-public/io/tpmif.h> +#include <asm/uaccess.h> +#include <asm-xen/xenbus.h> +#include <asm-xen/xen-public/io/domain_controller.h> +#include <asm-xen/xen-public/grant_table.h> + +#include "tpmfront.h" + +#undef DEBUG + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) +#endif + +/* locally visible variables */ +static grant_ref_t gref_head; +static struct tpm_private my_private; + +/* local function prototypes */ +static irqreturn_t tpmif_int(int irq, + void *tpm_priv, + struct pt_regs *ptregs); +static void tpmif_rx_action(unsigned long unused); +static void tpmif_connect(u16 evtchn, domid_t domid); +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0); +static int tpm_allocate_buffers(struct tpm_private *tp); +static void tpmif_set_connected_state(struct tpm_private *tp, int newstate); +static int tpm_xmit(struct tpm_private *tp, + const u8 * buf, size_t count, int userbuffer, + void *remember); + +#if DEBUG +#define DPRINTK(fmt, args...) \ + printk(KERN_ALERT "xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "xen_tpm_fr: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args) + + +static inline int +tx_buffer_copy(struct tx_buffer *txb, const u8 * src, int len, + int isuserbuffer) +{ + int copied = len; + + if (len > txb->size) { + copied = txb->size; + } + if (isuserbuffer) { + if (copy_from_user(txb->data, + src, + copied)) { + return -EFAULT; + } + } else { + memcpy(txb->data, src, copied); + } + txb->len = len; + return copied; +} + +static inline struct tx_buffer *tx_buffer_alloc(void) +{ + struct tx_buffer *txb = kmalloc(sizeof (struct tx_buffer), + GFP_KERNEL); + + if (txb) { + txb->len = 0; + txb->size = PAGE_SIZE; + txb->data = (unsigned char *)__get_free_page(GFP_KERNEL); + if (txb->data == NULL) { + kfree(txb); + txb = NULL; + } + } + return txb; +} + + +/************************************************************** + + The interface to let the tpm plugin register its callback + function and send data to another partition using this module + +**************************************************************/ + +static DECLARE_MUTEX(upperlayer_lock); +static DECLARE_MUTEX(suspend_lock); +static struct tpmfe_device *upperlayer_tpmfe; + +/* + * Send data via this module by calling this function + */ +int tpm_fe_send(const u8 * buf, size_t count, void *ptr) +{ + int sent = 0; + struct tpm_private *tp = &my_private; + + down(&suspend_lock); + sent = tpm_xmit(tp, buf, count, 0, ptr); + up(&suspend_lock); + + return sent; +} +EXPORT_SYMBOL(tpm_fe_send); + +/* + * Register a callback for receiving data from this module + */ +int tpm_fe_register_receiver(struct tpmfe_device *tpmfe_dev) +{ + int rc = 0; + + down(&upperlayer_lock); + if (NULL == upperlayer_tpmfe) { + upperlayer_tpmfe = tpmfe_dev; + tpmfe_dev->max_tx_size = TPMIF_TX_RING_SIZE * PAGE_SIZE; + } else { + rc = -EBUSY; + } + up(&upperlayer_lock); + return rc; +} +EXPORT_SYMBOL(tpm_fe_register_receiver); + +/* + * Unregister the callback for receiving data from this module + */ +void tpm_fe_unregister_receiver(void) +{ + down(&upperlayer_lock); + upperlayer_tpmfe = NULL; + up(&upperlayer_lock); +} +EXPORT_SYMBOL(tpm_fe_unregister_receiver); + +/* + * Call this function to send data to the upper layer's + * registered receiver function. + */ +static int tpm_fe_send_upperlayer(const u8 * buf, size_t count, + const void *ptr) +{ + int rc; + + down(&upperlayer_lock); + + if (upperlayer_tpmfe && upperlayer_tpmfe->receive) { + rc = upperlayer_tpmfe->receive(buf, count, ptr); + } else { + rc = 0; + } + + up(&upperlayer_lock); + return rc; +} + +/************************************************************** + XENBUS support code +**************************************************************/ + +static void watch_for_status(struct xenbus_watch *watch, const char *node) +{ + struct tpmfront_info *info; + int err; + unsigned long ready; + struct tpm_private *tp = &my_private; + + info = container_of(watch, struct tpmfront_info, watch); + node += strlen(watch->node); + + if (tp->connected) + return; + + err = xenbus_gather(watch->node, + "ready", "%lu", &ready, + NULL); + if (err) { + xenbus_dev_error(info->dev, err, "reading 'ready' field"); + return; + } + + tpmif_set_connected_state(tp, 1); + + xenbus_dev_ok(info->dev); +} + + +static int setup_tpmring(struct xenbus_device *dev, + struct tpmfront_info * info, + domid_t backend_id) +{ + tpmif_tx_interface_t *sring; + struct tpm_private *tp = &my_private; + + evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound }; + int err; + + sring = (void *)__get_free_page(GFP_KERNEL); + if (!sring) { + xenbus_dev_error(dev, -ENOMEM, "allocating shared ring"); + return -ENOMEM; + } + tp->tx = sring; + + tpm_allocate_buffers(tp); + + info->ring_ref = gnttab_claim_grant_reference(&gref_head); + ASSERT(info->ring_ref != -ENOSPC); + gnttab_grant_foreign_access_ref(info->ring_ref, + backend_id, + (virt_to_machine(tp->tx) >> PAGE_SHIFT), + 0); + + op.u.alloc_unbound.dom = backend_id; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + free_page((unsigned long)sring); + tp->tx = 0; + xenbus_dev_error(dev, err, "allocating event channel"); + return err; + } + tpmif_connect(op.u.alloc_unbound.port, backend_id); + return 0; +} + + +static void destroy_tpmring(struct tpmfront_info *info, struct tpm_private *tp) +{ + tpmif_set_connected_state(tp,0); + + if ( tp->tx != NULL ) { + free_page((unsigned long)tp->tx); + tp->tx = NULL; + } + unbind_evtchn_from_irqhandler(tp->evtchn, NULL); + tp->evtchn = 0; +} + + +static int talk_to_backend(struct xenbus_device *dev, + struct tpmfront_info *info) +{ + char *backend; + const char *message; + int err; + int backend_id; + + backend = NULL; + err = xenbus_gather(dev->nodename, + "backend-id", "%i", &backend_id, + "backend", NULL, &backend, + NULL); + if (XENBUS_EXIST_ERR(err)) + goto out; + if (backend && strlen(backend) == 0) { + err = -ENOENT; + goto out; + } + if (err < 0) { + xenbus_dev_error(dev, err, "reading %s/backend or backend-id", + dev->nodename); + goto out; + } + + info->backend_id = backend_id; + my_private.backend_id = backend_id; + + err = setup_tpmring(dev, info, backend_id); + if (err) { + xenbus_dev_error(dev, err, "setting up ring"); + goto out; + } + + err = xenbus_transaction_start(dev->nodename); + if (err) { + xenbus_dev_error(dev, err, "starting transaction"); + goto destroy_tpmring; + } + + err = xenbus_printf(dev->nodename, + "ring-ref","%u", info->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + + err = xenbus_printf(dev->nodename, + "event-channel", "%u", my_private.evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + info->backend = backend; + backend = NULL; + + info->watch.node = info->backend; + info->watch.callback = watch_for_status; + err = register_xenbus_watch(&info->watch); + if (err) { + message = "registering watch on backend"; + goto abort_transaction; + } + + err = xenbus_transaction_end(0); + if (err) { + xenbus_dev_error(dev, err, "completing transaction"); + goto destroy_tpmring; + } + +out: + if (backend) + kfree(backend); + return err; + +abort_transaction: + xenbus_transaction_end(1); + /* Have to do this *outside* transaction. */ + xenbus_dev_error(dev, err, "%s", message); +destroy_tpmring: + destroy_tpmring(info, &my_private); + goto out; +} + + +static int tpmfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct tpmfront_info *info; + int handle; + + err = xenbus_scanf(dev->nodename, + "handle", "%i", &handle); + if (XENBUS_EXIST_ERR(err)) + return err; + + if (err < 0) { + xenbus_dev_error(dev,err,"reading virtual-device"); + return err; + } + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + xenbus_dev_error(dev,err,"allocating info structure"); + return err; + } + memset(info, 0x0, sizeof(*info)); + + info->dev = dev; + info->handle = handle; + dev->data = info; + + err = talk_to_backend(dev, info); + if (err) { + kfree(info); + dev->data = NULL; + return err; + } + + watch_for_status(&info->watch, info->watch.node); + return 0; +} + +static int tpmfront_remove(struct xenbus_device *dev) +{ + struct tpmfront_info *info = dev->data; + if (info->backend) + unregister_xenbus_watch(&info->watch); + + destroy_tpmring(info, &my_private); + + kfree(info->backend); + kfree(info); + + return 0; +} + +static int tpmfront_suspend(struct xenbus_device *dev) +{ + struct tpmfront_info *info = dev->data; + struct tpm_private *tp = &my_private; + + /* lock so no app can send */ + down(&suspend_lock); + + while (atomic_read(&tp->tx_busy)) { + printk("---- TPMIF: Outstanding request.\n"); +#if 0 + /* + * Would like to wait until the outstanding request + * has come back, but this does not work properly, yet. + */ + interruptible_sleep_on_timeout(&tp->wait_q, + 100); +#else + break; +#endif + } + + unregister_xenbus_watch(&info->watch); + + kfree(info->backend); + info->backend = NULL; + + destroy_tpmring(info, tp); + + return 0; +} + +static int tpmif_recover(void) +{ + return 0; +} + +static int tpmfront_resume(struct xenbus_device *dev) +{ + struct tpmfront_info *info = dev->data; + int err; + + err = talk_to_backend(dev, info); + if (!err) { + tpmif_recover(); + } + + /* unlock so apps can resume */ + up(&suspend_lock); + + return err; +} + +static void tpmif_connect(u16 evtchn, domid_t domid) +{ + int err = 0; + struct tpm_private *tp = &my_private; + + tp->evtchn = evtchn; + tp->backend_id = domid; + + err = bind_evtchn_to_irqhandler( + tp->evtchn, + tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); + if ( err != 0 ) { + WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); + return; + } +} + +static struct xenbus_device_id tpmfront_ids[] = { + { "vtpm" }, + { "" } +}; + +static struct xenbus_driver tpmfront = { + .name = "vtpm", + .owner = THIS_MODULE, + .ids = tpmfront_ids, + .probe = tpmfront_probe, + .remove = tpmfront_remove, + .resume = tpmfront_resume, + .suspend = tpmfront_suspend, +}; + +static void __init init_tpm_xenbus(void) +{ + xenbus_register_device(&tpmfront); +} + + +static int +tpm_allocate_buffers(struct tpm_private *tp) +{ + unsigned int i; + + i = 0; + while (i < TPMIF_TX_RING_SIZE) { + tp->tx_buffers[i] = tx_buffer_alloc(); + i++; + } + + return 1; +} + +static void +tpmif_rx_action(unsigned long unused) +{ + struct tpm_private *tp = &my_private; + + int i = 0; + unsigned int received; + unsigned int offset = 0; + u8 *buffer; + tpmif_tx_request_t *tx; + tx = &tp->tx->ring[i].req; + + received = tx->size; + + buffer = kmalloc(received, GFP_KERNEL); + if (NULL == buffer) { + goto exit; + } + + i = 0; + while (i < TPMIF_TX_RING_SIZE && + offset < received) { + struct tx_buffer *txb = tp->tx_buffers[i]; + tpmif_tx_request_t *tx; + unsigned int tocopy; + + tx = &tp->tx->ring[i].req; + tocopy = tx->size; + if (tocopy > PAGE_SIZE) { + tocopy = PAGE_SIZE; + } + + memcpy(&buffer[offset], txb->data, tocopy); + + gnttab_release_grant_reference(&gref_head, tx->ref); + + offset += tocopy; + i++; + } + + tpm_fe_send_upperlayer(buffer, received, tp->tx_remember); + kfree(buffer); + +exit: + atomic_set(&tp->tx_busy, 0); + wake_up_interruptible(&tp->wait_q); +} + + +static irqreturn_t +tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs) +{ + struct tpm_private *tp = tpm_priv; + unsigned long flags; + + spin_lock_irqsave(&tp->tx_lock, flags); + tasklet_schedule(&tpmif_rx_tasklet); + spin_unlock_irqrestore(&tp->tx_lock, flags); + + return IRQ_HANDLED; +} + + +static int +tpm_xmit(struct tpm_private *tp, + const u8 * buf, size_t count, int isuserbuffer, + void *remember) +{ + tpmif_tx_request_t *tx; + TPMIF_RING_IDX i; + unsigned int offset = 0; + + spin_lock_irq(&tp->tx_lock); + + if (unlikely(atomic_read(&tp->tx_busy))) { + printk("There's an outstanding request/response on the way!\n"); + spin_unlock_irq(&tp->tx_lock); + return -EBUSY; + } + + if (tp->connected != 1) { + spin_unlock_irq(&tp->tx_lock); + return -EIO; + } + + i = 0; + while (count > 0 && i < TPMIF_TX_RING_SIZE) { + struct tx_buffer *txb = tp->tx_buffers[i]; + int copied; + + if (NULL == txb) { + DPRINTK("txb (i=%d) is NULL. buffers initilized?\n", i); + DPRINTK("Not transmittin anything!\n"); + spin_unlock_irq(&tp->tx_lock); + return -EFAULT; + } + copied = tx_buffer_copy(txb, &buf[offset], count, + isuserbuffer); + if (copied < 0) { + /* An error occurred */ + return copied; + } + count -= copied; + offset += copied; + + tx = &tp->tx->ring[i].req; + + tx->id = i; + tx->addr = virt_to_machine(txb->data); + tx->size = txb->len; + + DPRINTK("First 4 characters sent by TPM-FE are 0x%02x 0x%02x 0x%02x 0x%02x\n", + txb->data[0],txb->data[1],txb->data[2],txb->data[3]); + + /* get the granttable reference for this page */ + tx->ref = gnttab_claim_grant_reference( &gref_head ); + + if(-ENOSPC == tx->ref ) { + DPRINTK(" Grant table claim reference failed in func:%s line:%d file:%s\n", __FUNCTION__, __LINE__, __FILE__); + return -ENOSPC; + } + gnttab_grant_foreign_access_ref( tx->ref, + tp->backend_id, + (tx->addr >> PAGE_SHIFT), + 0 /*RW*/); + i++; + wmb(); + } + + atomic_set(&tp->tx_busy, 1); + tp->tx_remember = remember; + mb(); + + DPRINTK("Notifying backend via event channel %d\n", + tp->evtchn); + + notify_via_evtchn(tp->evtchn); + + spin_unlock_irq(&tp->tx_lock); + return offset; +} + + +static void tpmif_notify_upperlayer(struct tpm_private *tp) +{ + /* + * Notify upper layer about the state of the connection + * to the BE. + */ + down(&upperlayer_lock); + + if (upperlayer_tpmfe != NULL) { + switch (tp->connected) { + case 1: + upperlayer_tpmfe->status(TPMFE_STATUS_CONNECTED); + break; + + default: + upperlayer_tpmfe->status(0); + break; + } + } + up(&upperlayer_lock); +} + + +static void tpmif_set_connected_state(struct tpm_private *tp, int newstate) +{ + if (newstate != tp->connected) { + tp->connected = newstate; + tpmif_notify_upperlayer(tp); + } +} + + +/* ================================================================= + * Initialization function. + * ================================================================= + */ + +static int __init +tpmif_init(void) +{ + IPRINTK("Initialising the vTPM driver.\n"); + if ( gnttab_alloc_grant_references ( TPMIF_TX_RING_SIZE, + &gref_head ) < 0) { + return -EFAULT; + } + /* + * Only don't send the driver status when we are in the + * INIT domain. + */ + spin_lock_init(&my_private.tx_lock); + init_waitqueue_head(&my_private.wait_q); + + init_tpm_xenbus(); + + return 0; +} + +__initcall(tpmif_init); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,38 @@ +#ifndef TPM_FRONT_H +#define TPM_FRONT_H + + +struct tpm_private { + tpmif_tx_interface_t *tx; + unsigned int evtchn; + int connected; + + spinlock_t tx_lock; + + struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE]; + + atomic_t tx_busy; + void *tx_remember; + domid_t backend_id; + wait_queue_head_t wait_q; +}; + + +struct tpmfront_info +{ + struct xenbus_watch watch; + int handle; + struct xenbus_device *dev; + char *backend; + int ring_ref; + domid_t backend_id; +}; + + +struct tx_buffer { + unsigned int size; // available space in data + unsigned int len; // used space in data + unsigned char *data; // pointer to a page +}; + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,186 @@ +/* + * xenbus_dev.c + * + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/uio.h> +#include <linux/notifier.h> +#include <linux/wait.h> +#include <linux/fs.h> + +#include "xenstored.h" +#include "xenbus_comms.h" + +#include <asm/uaccess.h> +#include <asm-xen/xenbus.h> +#include <asm-xen/linux-public/xenbus_dev.h> +#include <asm-xen/xen_proc.h> + +struct xenbus_dev_data { + int in_transaction; +}; + +static struct proc_dir_entry *xenbus_dev_intf; + +void *xs_talkv(enum xsd_sockmsg_type type, const struct kvec *iovec, + unsigned int num_vecs, unsigned int *len); + +static int xenbus_dev_talkv(struct xenbus_dev_data *u, unsigned long data) +{ + struct xenbus_dev_talkv xt; + unsigned int len; + void *resp, *base; + struct kvec *iovec; + int ret = -EFAULT, v = 0; + + if (copy_from_user(&xt, (void *)data, sizeof(xt))) + return -EFAULT; + + iovec = kmalloc(xt.num_vecs * sizeof(struct kvec), GFP_KERNEL); + if (iovec == NULL) + return -ENOMEM; + + if (copy_from_user(iovec, xt.iovec, + xt.num_vecs * sizeof(struct kvec))) + goto out; + + for (v = 0; v < xt.num_vecs; v++) { + base = iovec[v].iov_base; + iovec[v].iov_base = kmalloc(iovec[v].iov_len, GFP_KERNEL); + if (iovec[v].iov_base == NULL || + copy_from_user(iovec[v].iov_base, base, iovec[v].iov_len)) + { + if (iovec[v].iov_base) + kfree(iovec[v].iov_base); + else + ret = -ENOMEM; + v--; + goto out; + } + } + + resp = xs_talkv(xt.type, iovec, xt.num_vecs, &len); + if (IS_ERR(resp)) { + ret = PTR_ERR(resp); + goto out; + } + + switch (xt.type) { + case XS_TRANSACTION_START: + u->in_transaction = 1; + break; + case XS_TRANSACTION_END: + u->in_transaction = 0; + break; + default: + break; + } + + ret = len; + if (len > xt.len) + len = xt.len; + + if (copy_to_user(xt.buf, resp, len)) + ret = -EFAULT; + + kfree(resp); + out: + while (v-- > 0) + kfree(iovec[v].iov_base); + kfree(iovec); + return ret; +} + +static int xenbus_dev_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long data) +{ + struct xenbus_dev_data *u = filp->private_data; + int ret = -ENOSYS; + + switch (cmd) { + case IOCTL_XENBUS_DEV_TALKV: + ret = xenbus_dev_talkv(u, data); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int xenbus_dev_open(struct inode *inode, struct file *filp) +{ + struct xenbus_dev_data *u; + + u = kmalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + memset(u, 0, sizeof(*u)); + + filp->private_data = u; + + down(&xenbus_lock); + + return 0; +} + +static int xenbus_dev_release(struct inode *inode, struct file *filp) +{ + struct xenbus_dev_data *u = filp->private_data; + + if (u->in_transaction) + xenbus_transaction_end(1); + + up(&xenbus_lock); + + kfree(u); + + return 0; +} + +static struct file_operations xenbus_dev_file_ops = { + ioctl: xenbus_dev_ioctl, + open: xenbus_dev_open, + release: xenbus_dev_release +}; + +static int __init +xenbus_dev_init(void) +{ + xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400); + if (xenbus_dev_intf) + xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops; + + return 0; +} + +__initcall(xenbus_dev_init); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/linux-public/xenbus_dev.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/xenbus_dev.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,47 @@ +/* + * xenbus_dev.h + * + * Copyright (c) 2005, Christian Limpach + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_DEV_H_ +#define _XENBUS_DEV_H_ + +struct xenbus_dev_talkv { + enum xsd_sockmsg_type type; + const struct kvec *iovec; + unsigned int num_vecs; + char *buf; + unsigned int len; +}; + +/* + * @cmd: IOCTL_XENBUS_DEV_TALKV + * @arg: struct xenbus_dev_talkv + * Return: 0 on success, error code on failure. + */ +#define IOCTL_XENBUS_DEV_TALKV \ + _IOC(_IOC_NONE, 'X', 0, sizeof(struct xenbus_dev_talkv)) + +#endif /* _XENBUS_DEV_H_ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/linux/tpmfe.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/linux-2.6-xen-sparse/include/linux/tpmfe.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,33 @@ +#ifndef TPM_FE_H +#define TPM_FE_H + +struct tpmfe_device { + /* + * Let upper layer receive data from front-end + */ + int (*receive)(const u8 *buffer, size_t count, const void *ptr); + /* + * Indicate the status of the front-end to the upper + * layer. + */ + void (*status)(unsigned int flags); + + /* + * This field indicates the maximum size the driver can + * transfer in one chunk. It is filled out by the front-end + * driver and should be propagated to the generic tpm driver + * for allocation of buffers. + */ + unsigned int max_tx_size; +}; + +enum { + TPMFE_STATUS_DISCONNECTED = 0x0, + TPMFE_STATUS_CONNECTED = 0x1 +}; + +int tpm_fe_send(const u8 * buf, size_t count, void *ptr); +int tpm_fe_register_receiver(struct tpmfe_device *); +void tpm_fe_unregister_receiver(void); + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/README.sept05 --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/README.sept05 Fri Sep 9 16:30:54 2005 @@ -0,0 +1,33 @@ +The blktap has been rewritten substantially based on the current +blkback driver. I've removed passthrough support, as this is broken +by the move to grant tables and the lack of transitive grants. A +blktap VM is now only capable of terminating block requests in +userspace. + +ublkback/ contains a _very_ initial cut at a user-level version of the block +backend driver. It gives a working example of how the current tap +interfaces are used, in particular w.r.t. the vbd directories in +xenstore. + +parallax/ contains fairly recent parallax code. This does not run on +the changed blktap interface, but should only be a couple of hours +work to get going again. + +All of the tricky bits are done, but there is plenty of cleaning to +do, and the top-level functionality is not here yet. At the moment, +the daemon ignores the pdev requested by the tools and opens the file +or device specified by TMP_IMAGE_FILE_NAME in ublkback.c. + +TODO: +1. Fix to allow pdev in the store to specify the device to open. +2. Add support (to tools as well) to mount arbitrary files... + just write the filename to mount into the store, instead of pdev. +3. Reeximine blkif refcounting, it is almost certainly broken at the moment. + - creating a blkif should take a reference. + - each inflight request should take a reference on dequeue in blktaplib + - sending responses should drop refs. + - blkif should be implicitly freed when refcounts fall to 0. +4. Modify the parallax req/rsp code as per ublkback to use the new tap + interfaces. +5. Write a front end that allows parallax and normal mounts to coexist +6. Allow blkback and blktap to run at the same time. diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/blkif.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/blkif.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,213 @@ +/* + * blkif.c + * + * The blkif interface for blktap. A blkif describes an in-use virtual disk. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <err.h> + +#include "blktaplib.h" + +#if 1 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#define BLKIF_HASHSZ 1024 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) + +static blkif_t *blkif_hash[BLKIF_HASHSZ]; + +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) +{ + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif != NULL) && + ((blkif->domid != domid) || (blkif->handle != handle)) ) + blkif = blkif->hash_next; + return blkif; +} + +blkif_t *alloc_blkif(domid_t domid) +{ + blkif_t *blkif; + + blkif = (blkif_t *)malloc(sizeof(blkif_t)); + if (!blkif) + return NULL; + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + + return blkif; +} + +static int (*new_blkif_hook)(blkif_t *blkif) = NULL; +void register_new_blkif_hook(int (*fn)(blkif_t *blkif)) +{ + new_blkif_hook = fn; +} + +int blkif_init(blkif_t *blkif, long int handle, long int pdev, + long int readonly) +{ + domid_t domid; + blkif_t **pblkif; + + if (blkif == NULL) + return -EINVAL; + + domid = blkif->domid; + blkif->handle = handle; + blkif->pdev = pdev; + blkif->readonly = readonly; + + /* + * Call out to the new_blkif_hook. The tap application should define this, + * and it should return having set blkif->ops + * + */ + if (new_blkif_hook == NULL) + { + warn("Probe detected a new blkif, but no new_blkif_hook!"); + return -1; + } + new_blkif_hook(blkif); + + /* Now wire it in. */ + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( *pblkif != NULL ) + { + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) + { + DPRINTF("Could not create blkif: already exists\n"); + return -1; + } + pblkif = &(*pblkif)->hash_next; + } + blkif->hash_next = NULL; + *pblkif = blkif; + + return 0; +} + +void free_blkif(blkif_t *blkif) +{ + blkif_t **pblkif, *curs; + + pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)]; + while ( (curs = *pblkif) != NULL ) + { + if ( blkif == curs ) + { + *pblkif = curs->hash_next; + } + pblkif = &curs->hash_next; + } + if (blkif != NULL) + free(blkif); +} + +void blkif_register_request_hook(blkif_t *blkif, char *name, + int (*rh)(blkif_t *, blkif_request_t *, int)) +{ + request_hook_t *rh_ent, **c; + + rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t)); + if (!rh_ent) + { + warn("couldn't allocate a new hook"); + return; + } + + rh_ent->func = rh; + rh_ent->next = NULL; + if (asprintf(&rh_ent->name, "%s", name) == -1) + { + free(rh_ent); + warn("couldn't allocate a new hook name"); + return; + } + + c = &blkif->request_hook_chain; + while (*c != NULL) { + c = &(*c)->next; + } + *c = rh_ent; +} + +void blkif_register_response_hook(blkif_t *blkif, char *name, + int (*rh)(blkif_t *, blkif_response_t *, int)) +{ + response_hook_t *rh_ent, **c; + + rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t)); + if (!rh_ent) + { + warn("couldn't allocate a new hook"); + return; + } + + rh_ent->func = rh; + rh_ent->next = NULL; + if (asprintf(&rh_ent->name, "%s", name) == -1) + { + free(rh_ent); + warn("couldn't allocate a new hook name"); + return; + } + + c = &blkif->response_hook_chain; + while (*c != NULL) { + c = &(*c)->next; + } + *c = rh_ent; +} + +void blkif_print_hooks(blkif_t *blkif) +{ + request_hook_t *req_hook; + response_hook_t *rsp_hook; + + DPRINTF("Request Hooks:\n"); + req_hook = blkif->request_hook_chain; + while (req_hook != NULL) + { + DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name); + req_hook = req_hook->next; + } + + DPRINTF("Response Hooks:\n"); + rsp_hook = blkif->response_hook_chain; + while (rsp_hook != NULL) + { + DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name); + rsp_hook = rsp_hook->next; + } +} + + +long int vbd_size(blkif_t *blkif) +{ + return 1000000000; +} + +long int vbd_secsize(blkif_t *blkif) +{ + return 512; +} + +unsigned vbd_info(blkif_t *blkif) +{ + return 0; +} + + +void __init_blkif(void) +{ + memset(blkif_hash, 0, sizeof(blkif_hash)); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/list.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/list.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,55 @@ +/* + * list.h + * + * This is a subset of linux's list.h intended to be used in user-space. + * + */ + +#ifndef __LIST_H__ +#define __LIST_H__ + +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +#endif /* __LIST_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/ublkback/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/ublkback/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,42 @@ + +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk + +INCLUDES += -I.. + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +IBIN = ublkback +INSTALL_DIR = /usr/sbin + +CFLAGS += -Wall +CFLAGS += -Werror +CFLAGS += -Wno-unused +#CFLAGS += -O3 +CFLAGS += -g3 +CFLAGS += -fno-strict-aliasing +CFLAGS += -I $(XEN_LIBXC) +CFLAGS += $(INCLUDES) -I. +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE +# Get gcc to generate the dependencies for us. +CFLAGS += -Wp,-MD,.$(@F).d +DEPS = .*.d + +OBJS = $(patsubst %.c,%.o,$(SRCS)) + +all: $(IBIN) + +LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) + +install: + $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR) +clean: + rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN) + +ublkback: + $(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L.. \ + -lblktap -laio ublkback.c ublkbacklib.c -pg + +.PHONY: clean install + +-include $(DEPS) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/ublkback/ublkback.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/ublkback/ublkback.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,18 @@ +/* ublkback.c + * + * libaio-based userlevel backend. + */ + +#include "blktaplib.h" +#include "ublkbacklib.h" + + +int main(int argc, char *argv[]) +{ + ublkback_init(); + + register_new_blkif_hook(ublkback_new_blkif); + blktap_listen(); + + return 0; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/ublkback/ublkbacklib.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/ublkback/ublkbacklib.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,477 @@ +/* ublkbacklib.c + * + * file/device image-backed block device -- using linux libaio. + * + * (c) 2004 Andrew Warfield. + * + * Xend has been modified to use an amorfs:[fsid] disk tag. + * This will show up as device type (maj:240,min:0) = 61440. + * + * The fsid is placed in the sec_start field of the disk extent. + * + * NOTE: This doesn't work. Grrr. + */ + +#define _GNU_SOURCE +#define __USE_LARGEFILE64 + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <db.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/poll.h> +#include <unistd.h> +#include <errno.h> +#include <libaio.h> +#include <pthread.h> +#include <time.h> +#include <err.h> +#include "blktaplib.h" + +/* XXXX: */ +/* Current code just mounts this file/device to any requests that come in. */ +//#define TMP_IMAGE_FILE_NAME "/dev/sda1" +#define TMP_IMAGE_FILE_NAME "fc3.image" + +#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ +#define MAX_SEGMENTS_PER_REQ 11 +#define SECTOR_SHIFT 9 +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) + +#if 0 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + +/* Note on pending_reqs: I assume all reqs are queued before they start to + * get filled. so count of 0 is an unused record. + */ +typedef struct { + blkif_request_t req; + blkif_t *blkif; + int count; +} pending_req_t; + +static pending_req_t pending_list[MAX_REQUESTS]; +static io_context_t ctx; +static struct iocb *iocb_free[MAX_AIO_REQS]; +static int iocb_free_count; + +/* ---[ Notification mecahnism ]--------------------------------------- */ + +enum { + READ = 0, + WRITE = 1 +}; + +static int aio_notify[2]; +static volatile int aio_listening = 0; +static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER; + +static struct io_event aio_events[MAX_AIO_REQS]; +static int aio_event_count = 0; + +/* this is commented out in libaio.h for some reason. */ +extern int io_queue_wait(io_context_t ctx, struct timespec *timeout); + +static void *notifier_thread(void *arg) +{ + int ret; + int msg = 0x00feeb00; + + DPRINTF("Notifier thread started.\n"); + for (;;) { + pthread_mutex_lock(¬ifier_sem); + if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) { + aio_event_count = ret; + write(aio_notify[WRITE], &msg, sizeof(msg)); + } else { + printf("[io_queue_wait error! %d]\n", errno); + pthread_mutex_unlock(¬ifier_sem); + } + } +} + +/* --- Talking to xenstore: ------------------------------------------- */ + +int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done); +int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done); + +typedef struct image { + /* These need to turn into an array/rbtree for multi-disk support. */ + int fd; + u64 fsid; + blkif_vdev_t vdevice; + long int size; + long int secsize; + long int info; +} image_t; + +long int ublkback_get_size(blkif_t *blkif) +{ + image_t *img = (image_t *)blkif->prv; + return img->size; +} + +long int ublkback_get_secsize(blkif_t *blkif) +{ + image_t *img = (image_t *)blkif->prv; + return img->secsize; +} + +unsigned ublkback_get_info(blkif_t *blkif) +{ + image_t *img = (image_t *)blkif->prv; + return img->info; +} + +static struct blkif_ops ublkback_ops = { + get_size: ublkback_get_size, + get_secsize: ublkback_get_secsize, + get_info: ublkback_get_info, +}; + +int ublkback_new_blkif(blkif_t *blkif) +{ + image_t *image; + struct stat stat; + int ret; + + image = (image_t *)malloc(sizeof(image_t)); + if (image == NULL) { + printf("error allocating image record.\n"); + return -ENOMEM; + } + + /* Open it. */ + image->fd = open(TMP_IMAGE_FILE_NAME, + O_RDWR | O_DIRECT | O_LARGEFILE); + + if ((image->fd < 0) && (errno == EINVAL)) { + /* Maybe O_DIRECT isn't supported. */ + warn("open() failed on '%s', trying again without O_DIRECT", + TMP_IMAGE_FILE_NAME); + image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE); + } + + if (image->fd < 0) { + warn("Couldn't open image file!"); + free(image); + return -EINVAL; + } + + /* Size it. */ + ret = fstat(image->fd, &stat); + if (ret != 0) { + printf("Couldn't stat image in PROBE!"); + return -EINVAL; + } + + image->size = (stat.st_size >> SECTOR_SHIFT); + + /* TODO: IOCTL to get size of raw device. */ +/* + ret = ioctl(img->fd, BLKGETSIZE, &blksize); + if (ret != 0) { + printf("Couldn't ioctl image in PROBE!\n"); + goto err; + } +*/ + if (image->size == 0) + image->size =((u64) 16836057); + image->secsize = 512; + image->info = 0; + + /* Register the hooks */ + blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request); + blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response); + + + printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", + blkif->pdev, TMP_IMAGE_FILE_NAME); + + blkif->ops = &ublkback_ops; + blkif->prv = (void *)image; + + return 0; +} + + +/* --- Moving the bits: ----------------------------------------------- */ + +static int batch_count = 0; +int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done) +{ + int fd; + u64 sector; + char *spage, *dpage; + int ret, i, idx; + blkif_response_t *rsp; + domid_t dom = ID_TO_DOM(req->id); + static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; + static int io_idx = 0; + struct iocb *io; + image_t *img; + + img = (image_t *)blkif->prv; + fd = img->fd; + + switch (req->operation) + { + case BLKIF_OP_WRITE: + { + unsigned long size; + + + batch_count++; + + idx = ID_TO_IDX(req->id); + ASSERT(pending_list[idx].count == 0); + memcpy(&pending_list[idx].req, req, sizeof(*req)); + pending_list[idx].count = req->nr_segments; + pending_list[idx].blkif = blkif; + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + if (blkif_first_sect(req->frame_and_sects[i]) != 0) + DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT)); + + spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + + /*convert size and sector to byte offsets */ + size <<= SECTOR_SHIFT; + sector <<= SECTOR_SHIFT; + + io = iocb_free[--iocb_free_count]; + io_prep_pwrite(io, fd, spage, size, sector); + io->data = (void *)idx; + //ioq[i] = io; + ioq[io_idx++] = io; + } + + if (batch_done) { + ret = io_submit(ctx, io_idx, ioq); + batch_count = 0; + if (ret < 0) + printf("BADNESS: io_submit error! (%d)\n", errno); + io_idx = 0; + } + + return BLKTAP_STOLEN; + + } + case BLKIF_OP_READ: + { + unsigned long size; + + batch_count++; + idx = ID_TO_IDX(req->id); + ASSERT(pending_list[idx].count == 0); + memcpy(&pending_list[idx].req, req, sizeof(*req)); + pending_list[idx].count = req->nr_segments; + pending_list[idx].blkif = blkif; + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + + if (blkif_first_sect(req->frame_and_sects[i]) != 0) + DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) " + "pos: %15lu dpage: %p\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT), dpage); + + /*convert size and sector to byte offsets */ + size <<= SECTOR_SHIFT; + sector <<= SECTOR_SHIFT; + + + /* + * NB: Looks like AIO now has non-page aligned support, this path + * can probably be removed... Only really used for hunting + * superblocks anyway... ;) + */ + if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) { + /* AIO to raw devices must be page aligned, so do this read + * synchronously. The OS is probably just looking for + * a superblock or something, so this won't hurt performance. + */ + int ret; + + printf("Slow path block read.\n"); + /* Question: do in-progress aio ops modify the file cursor? */ + ret = lseek(fd, sector, SEEK_SET); + if (ret == (off_t)-1) + printf("lseek failed!\n"); + ret = read(fd, dpage, size); + if (ret < 0) + printf("read problem (%d)\n", ret); + printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, ret); + + /* not an async request any more... */ + pending_list[idx].count--; + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_READ; + rsp->status = BLKIF_RSP_OKAY; + return BLKTAP_RESPOND; + /* Doh -- need to flush aio if this is end-of-batch */ + } + + io = iocb_free[--iocb_free_count]; + + io_prep_pread(io, fd, dpage, size, sector); + io->data = (void *)idx; + + ioq[io_idx++] = io; + //ioq[i] = io; + } + + if (batch_done) { + ret = io_submit(ctx, io_idx, ioq); + batch_count = 0; + if (ret < 0) + printf("BADNESS: io_submit error! (%d)\n", errno); + io_idx = 0; + } + + return BLKTAP_STOLEN; + + } + } + + printf("Unknown block operation!\n"); +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; +} + + +int ublkback_pollhook(int fd) +{ + struct io_event *ep; + int n, ret, idx; + blkif_request_t *req; + blkif_response_t *rsp; + int responses_queued = 0; + int pages=0; + + for (ep = aio_events; aio_event_count-- > 0; ep++) { + struct iocb *io = ep->obj; + idx = (int) ep->data; + + if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){ + printf("invalid index returned(%u)!\n", idx); + break; + } + + if ((int)ep->res < 0) + printf("***\n***aio request error! (%d,%d)\n***\n", + (int)ep->res, (int)ep->res2); + + pending_list[idx].count--; + iocb_free[iocb_free_count++] = io; + pages++; + + if (pending_list[idx].count == 0) { + blkif_request_t tmp = pending_list[idx].req; + rsp = (blkif_response_t *)&pending_list[idx].req; + rsp->id = tmp.id; + rsp->operation = tmp.operation; + rsp->status = BLKIF_RSP_OKAY; + blkif_inject_response(pending_list[idx].blkif, rsp); + responses_queued++; + } + } + + if (responses_queued) { + blktap_kick_responses(); + } + + read(aio_notify[READ], &idx, sizeof(idx)); + aio_listening = 1; + pthread_mutex_unlock(¬ifier_sem); + + return 0; +} + +/* the image library terminates the request stream. _resp is a noop. */ +int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done) +{ + return BLKTAP_PASS; +} + +void ublkback_init(void) +{ + int i, rc; + pthread_t p; + + for (i = 0; i < MAX_REQUESTS; i++) + pending_list[i].count = 0; + + memset(&ctx, 0, sizeof(ctx)); + rc = io_queue_init(MAX_AIO_REQS, &ctx); + if (rc != 0) { + printf("queue_init failed! (%d)\n", rc); + exit(0); + } + + for (i=0; i<MAX_AIO_REQS; i++) { + if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) { + printf("error allocating iocb array\n"); + exit(0); + } + iocb_free_count = i; + } + + rc = pipe(aio_notify); + if (rc != 0) { + printf("pipe failed! (%d)\n", errno); + exit(0); + } + + rc = pthread_create(&p, NULL, notifier_thread, NULL); + if (rc != 0) { + printf("pthread_create failed! (%d)\n", errno); + exit(0); + } + + aio_listening = 1; + + blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook); +} + diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/ublkback/ublkbacklib.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/ublkback/ublkbacklib.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,16 @@ +/* blkaiolib.h + * + * aio image-backed block device. + * + * (c) 2004 Andrew Warfield. + * + * Xend has been modified to use an amorfs:[fsid] disk tag. + * This will show up as device type (maj:240,min:0) = 61440. + * + * The fsid is placed in the sec_start field of the disk extent. + */ + +int ublkback_request(blkif_request_t *req, int batch_done); +int ublkback_response(blkif_response_t *rsp); /* noop */ +int ublkback_new_blkif(blkif_t *blkif); +void ublkback_init(void); diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/blktap/xenbus.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/blktap/xenbus.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,578 @@ +/* + * xenbus.c + * + * xenbus interface to the blocktap. + * + * this handles the top-half of integration with block devices through the + * store -- the tap driver negotiates the device channel etc, while the + * userland tap clinet needs to sort out the disk parameters etc. + * + * A. Warfield 2005 Based primarily on the blkback and xenbus driver code. + * Comments there apply here... + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <err.h> +#include <stdarg.h> +#include <errno.h> +#include <xs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <poll.h> +#include "blktaplib.h" +#include "list.h" + +#if 0 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +/* --- Xenstore / Xenbus helpers ---------------------------------------- */ +/* + * These should all be pulled out into the xenstore API. I'm faulting commands + * in from the xenbus interface as i need them. + */ + + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xs_gather(struct xs_handle *xs, const char *dir, ...) +{ + va_list ap; + const char *name; + char *path; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + if (asprintf(&path, "%s/%s", dir, name) == -1) + { + warn("allocation error in xs_gather!\n"); + ret = ENOMEM; + break; + } + p = xs_read(xs, path, NULL); + free(path); + if (p == NULL) { + ret = ENOENT; + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = EINVAL; + free(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} + +/* Single printf and write: returns -errno or 0. */ +int xs_printf(struct xs_handle *h, const char *dir, const char *node, + const char *fmt, ...) +{ + char *buf, *path; + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vasprintf(&buf, fmt, ap); + va_end(ap); + + asprintf(&path, "%s/%s", dir, node); + + if ((path == NULL) || (buf == NULL)) + return 0; + + ret = xs_write(h, path, buf, strlen(buf)+1, O_CREAT); + + free(buf); + free(path); + + return ret; +} + + +int xs_exists(struct xs_handle *h, const char *path) +{ + char **d; + int num; + + d = xs_directory(h, path, &num); + if (d == NULL) + return 0; + free(d); + return 1; +} + + + +/* This assumes that the domain name we are looking for is unique! */ +char *get_dom_uuid(struct xs_handle *h, const char *name) +{ + char **e, *val, *uuid = NULL; + int num, i, len; + char *path; + + e = xs_directory(h, "/domain", &num); + + i=0; + while (i < num) { + asprintf(&path, "/domain/%s/name", e[i]); + val = xs_read(h, path, &len); + free(path); + if (val == NULL) + continue; + if (strcmp(val, name) == 0) { + /* match! */ + asprintf(&path, "/domain/%s/uuid", e[i]); + uuid = xs_read(h, path, &len); + free(val); + free(path); + break; + } + free(val); + i++; + } + + free(e); + return uuid; +} + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : -ERANGE; +} + + +/* xenbus watches: */ +/* Register callback to watch this node. */ +struct xenbus_watch +{ + struct list_head list; + char *node; + void (*callback)(struct xs_handle *h, + struct xenbus_watch *, + const char *node); +}; + +static LIST_HEAD(watches); + +/* A little paranoia: we don't just trust token. */ +static struct xenbus_watch *find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)strtoul(token, NULL, 16); + + list_for_each_entry(i, &watches, list) + if (i == cmp) + return i; + return NULL; +} + +/* Register callback to watch this node. like xs_watch, return 0 on failure */ +int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int er; + + sprintf(token, "%lX", (long)watch); + if (find_watch(token)) + { + warn("watch collision!"); + return -EINVAL; + } + + er = xs_watch(h, watch->node, token); + if (er != 0) { + list_add(&watch->list, &watches); + } + + return er; +} + +int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch) +{ + char token[sizeof(watch) * 2 + 1]; + int er; + + sprintf(token, "%lX", (long)watch); + if (!find_watch(token)) + { + warn("no such watch!"); + return -EINVAL; + } + + + er = xs_unwatch(h, watch->node, token); + list_del(&watch->list); + + if (er == 0) + warn("XENBUS Failed to release watch %s: %i", + watch->node, er); + return 0; +} + +/* Re-register callbacks to all watches. */ +void reregister_xenbus_watches(struct xs_handle *h) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + list_for_each_entry(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(h, watch->node, token); + } +} + +/* based on watch_thread() */ +int xs_fire_next_watch(struct xs_handle *h) +{ + char **res; + char *token; + char *node = NULL; + struct xenbus_watch *w; + int er; + + res = xs_read_watch(h); + if (res == NULL) + return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */ + + node = res[0]; + token = res[1]; + + er = xs_acknowledge_watch(h, token); + if (er == 0) + warn("Couldn't acknowledge watch (%s)", token); + + w = find_watch(token); + if (!w) + { + warn("unregistered watch fired"); + goto done; + } + w->callback(h, w, node); + + done: + free(res); + return 1; +} + + + + +/* ---------------------------------------------------------------------- */ + +struct backend_info +{ + /* our communications channel */ + blkif_t *blkif; + + long int frontend_id; + long int pdev; + long int readonly; + + /* watch back end for changes */ + struct xenbus_watch backend_watch; + char *backpath; + + /* watch front end for changes */ + struct xenbus_watch watch; + char *frontpath; + + struct list_head list; +}; + +static LIST_HEAD(belist); + +static struct backend_info *be_lookup_be(const char *bepath) +{ + struct backend_info *be; + + list_for_each_entry(be, &belist, list) + if (strcmp(bepath, be->backpath) == 0) + return be; + return (struct backend_info *)NULL; +} + +static int be_exists_be(const char *bepath) +{ + return ( be_lookup_be(bepath) != NULL ); +} + +static struct backend_info *be_lookup_fe(const char *fepath) +{ + struct backend_info *be; + + list_for_each_entry(be, &belist, list) + if (strcmp(fepath, be->frontpath) == 0) + return be; + return (struct backend_info *)NULL; +} + +static int backend_remove(struct xs_handle *h, struct backend_info *be) +{ + /* Turn off watches. */ + if (be->watch.node) + unregister_xenbus_watch(h, &be->watch); + if (be->backend_watch.node) + unregister_xenbus_watch(h, &be->backend_watch); + + /* Unhook from be list. */ + list_del(&be->list); + + /* Free everything else. */ + if (be->blkif) + free_blkif(be->blkif); + if (be->frontpath) + free(be->frontpath); + if (be->backpath) + free(be->backpath); + free(be); + return 0; +} + +static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, + const char *fepath_im) +{ + struct backend_info *be; + char *fepath = NULL; + int er; + + be = be_lookup_fe(w->node); + if (be == NULL) + { + warn("frontend changed called for nonexistent backend! (%s)", fepath); + goto fail; + } + + /* If other end is gone, delete ourself. */ + if (w->node && !xs_exists(h, be->frontpath)) { + DPRINTF("DELETING BE: %s\n", be->backpath); + backend_remove(h, be); + return; + } + + if (be->blkif == NULL || (be->blkif->state == CONNECTED)) + return; + + /* Supply the information about the device the frontend needs */ + er = xs_transaction_start(h, be->backpath); + if (er == 0) { + warn("starting transaction"); + goto fail; + } + + er = xs_printf(h, be->backpath, "sectors", "%lu", + be->blkif->ops->get_size(be->blkif)); + if (er == 0) { + warn("writing sectors"); + goto fail; + } + + er = xs_printf(h, be->backpath, "info", "%u", + be->blkif->ops->get_info(be->blkif)); + if (er == 0) { + warn("writing info"); + goto fail; + } + + er = xs_printf(h, be->backpath, "sector-size", "%lu", + be->blkif->ops->get_secsize(be->blkif)); + if (er == 0) { + warn("writing sector-size"); + goto fail; + } + + be->blkif->state = CONNECTED; + + xs_transaction_end(h, 0); + + return; + + fail: + if (fepath) + free(fepath); +} + + +static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, + const char *bepath_im) +{ + struct backend_info *be; + char *path = NULL, *p; + int len, er; + long int pdev = 0, handle; + + be = be_lookup_be(w->node); + if (be == NULL) + { + warn("backend changed called for nonexistent backend! (%s)", w->node); + goto fail; + } + + er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL); + if (er != 0) + goto fail; + + if (be->pdev && be->pdev != pdev) { + warn("changing physical-device not supported"); + goto fail; + } + be->pdev = pdev; + + asprintf(&path, "%s/%s", w->node, "read-only"); + if (xs_exists(h, path)) + be->readonly = 1; + + if (be->blkif == NULL) { + /* Front end dir is a number, which is used as the handle. */ + p = strrchr(be->frontpath, '/') + 1; + handle = strtoul(p, NULL, 0); + + be->blkif = alloc_blkif(be->frontend_id); + if (be->blkif == NULL) + goto fail; + + er = blkif_init(be->blkif, handle, be->pdev, be->readonly); + if (er) + goto fail; + + DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node); + + /* Pass in NULL node to skip exist test. */ + frontend_changed(h, &be->watch, NULL); + } + + fail: + if (path) + free(path); + +} + +static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, + const char *bepath_im) +{ + struct backend_info *be = NULL; + char *frontend = NULL, *bepath = NULL; + int er, len; + + bepath = strdup(bepath_im); + if (!bepath) + return; + len = strsep_len(bepath, '/', 6); + if (len < 0) + goto free_be; + + bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */ + + be = malloc(sizeof(*be)); + if (!be) { + warn("allocating backend structure"); + goto free_be; + } + memset(be, 0, sizeof(*be)); + + frontend = NULL; + er = xs_gather(h, bepath, + "frontend-id", "%li", &be->frontend_id, + "frontend", NULL, &frontend, + NULL); + if (er) + goto free_be; + + if (strlen(frontend) == 0 || !xs_exists(h, frontend)) { + /* If we can't get a frontend path and a frontend-id, + * then our bus-id is no longer valid and we need to + * destroy the backend device. + */ + DPRINTF("No frontend (%s)\n", frontend); + goto free_be; + } + + /* Are we already tracking this device? */ + if (be_exists_be(bepath)) + goto free_be; + + be->backpath = bepath; + be->backend_watch.node = be->backpath; + be->backend_watch.callback = backend_changed; + er = register_xenbus_watch(h, &be->backend_watch); + if (er == 0) { + be->backend_watch.node = NULL; + warn("error adding backend watch on %s", bepath); + goto free_be; + } + + be->frontpath = frontend; + be->watch.node = be->frontpath; + be->watch.callback = frontend_changed; + er = register_xenbus_watch(h, &be->watch); + if (er == 0) { + be->watch.node = NULL; + warn("adding frontend watch on %s", be->frontpath); + goto free_be; + } + + list_add(&be->list, &belist); + + DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im); + + backend_changed(h, &be->backend_watch, bepath); + return; + + free_be: + if ((be) && (be->backend_watch.node)) + unregister_xenbus_watch(h, &be->backend_watch); + if (frontend) + free(frontend); + if (bepath) + free(bepath); + free(be); + return; +} + + +int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname) +{ + char *uuid, *path; + struct xenbus_watch *vbd_watch; + int er; + + uuid = get_dom_uuid(h, domname); + + DPRINTF("%s: %s\n", domname, (uuid != NULL) ? uuid : "[ not found! ]"); + + asprintf(&path, "/domain/%s/backend/vbd", uuid); + if (path == NULL) + return -ENOMEM; + + vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch)); + vbd_watch->node = path; + vbd_watch->callback = blkback_probe; + er = register_xenbus_watch(h, vbd_watch); + if (er == 0) { + warn("Error adding vbd probe watch %s", path); + return -EINVAL; + } + + return 0; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/xen-backend.agent --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/examples/xen-backend.agent Fri Sep 9 16:30:54 2005 @@ -0,0 +1,25 @@ +#! /bin/sh + +#ACTION=add +#DEVPATH=/devices/xen-backend/vif-1-0 +#PHYSDEVDRIVER=vif +#XENBUS_TYPE=vif + +PATH=/etc/xen/scripts:$PATH + +case "$ACTION" in + add) + ;; + remove) + ;; + online) + case "$PHYSDEVDRIVER" in + vif) + [ -n "$script" ] && $script up + ;; + esac + ;; + offline) + ;; +esac + diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/hw/pcnet.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/hw/pcnet.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,1209 @@ +/* + * QEMU AMD PC-Net II (Am79C970A) emulation + * + * Copyright (c) 2004 Antony T Curtis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This software was written to be compatible with the specification: + * AMD Am79C970A PCnet-PCI II Ethernet Controller Data-Sheet + * AMD Publication# 19436 Rev:E Amendment/0 Issue Date: June 2000 + */ + +#include "vl.h" +#include <sys/times.h> +#include <arpa/inet.h> +#include <net/ethernet.h> + +//#define PCNET_DEBUG +//#define PCNET_DEBUG_IO +//#define PCNET_DEBUG_BCR +//#define PCNET_DEBUG_CSR +//#define PCNET_DEBUG_RMD +//#define PCNET_DEBUG_TMD +//#define PCNET_DEBUG_MATCH + + +#define PCNET_IOPORT_SIZE 0x20 +#define PCNET_PNPMMIO_SIZE 0x20 + + +typedef struct PCNetState_st PCNetState; + +struct PCNetState_st { + PCIDevice dev; + NetDriverState *nd; + QEMUTimer *poll_timer; + int mmio_io_addr, rap, isr, lnkst; + target_phys_addr_t rdra, tdra; + uint8_t prom[16]; + uint16_t csr[128]; + uint16_t bcr[32]; + uint64_t timer; + int xmit_pos, recv_pos; + uint8_t buffer[4096]; +}; + +#include "pcnet.h" + +static void pcnet_poll(PCNetState *s); +static void pcnet_poll_timer(void *opaque); + +static uint32_t pcnet_csr_readw(PCNetState *s, uint32_t rap); +static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value); +static void pcnet_bcr_writew(PCNetState *s, uint32_t rap, uint32_t val); +static uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap); + +static void pcnet_s_reset(PCNetState *s) +{ +#ifdef PCNET_DEBUG + printf("pcnet_s_reset\n"); +#endif + + s->lnkst = 0x40; + s->rdra = 0; + s->tdra = 0; + s->rap = 0; + + s->bcr[BCR_BSBC] &= ~0x0080; + + s->csr[0] = 0x0004; + s->csr[3] = 0x0000; + s->csr[4] = 0x0115; + s->csr[5] = 0x0000; + s->csr[6] = 0x0000; + s->csr[8] = 0; + s->csr[9] = 0; + s->csr[10] = 0; + s->csr[11] = 0; + s->csr[12] = le16_to_cpu(((uint16_t *)&s->prom[0])[0]); + s->csr[13] = le16_to_cpu(((uint16_t *)&s->prom[0])[1]); + s->csr[14] = le16_to_cpu(((uint16_t *)&s->prom[0])[2]); + s->csr[15] &= 0x21c4; + s->csr[72] = 1; + s->csr[74] = 1; + s->csr[76] = 1; + s->csr[78] = 1; + s->csr[80] = 0x1410; + s->csr[88] = 0x1003; + s->csr[89] = 0x0262; + s->csr[94] = 0x0000; + s->csr[100] = 0x0200; + s->csr[103] = 0x0105; + s->csr[103] = 0x0105; + s->csr[112] = 0x0000; + s->csr[114] = 0x0000; + s->csr[122] = 0x0000; + s->csr[124] = 0x0000; +} + +static void pcnet_update_irq(PCNetState *s) +{ + int isr = 0; + s->csr[0] &= ~0x0080; + +#if 1 + if (((s->csr[0] & ~s->csr[3]) & 0x5f00) || + (((s->csr[4]>>1) & ~s->csr[4]) & 0x0115) || + (((s->csr[5]>>1) & s->csr[5]) & 0x0048)) +#else + if ((!(s->csr[3] & 0x4000) && !!(s->csr[0] & 0x4000)) /* BABL */ || + (!(s->csr[3] & 0x1000) && !!(s->csr[0] & 0x1000)) /* MISS */ || + (!(s->csr[3] & 0x0100) && !!(s->csr[0] & 0x0100)) /* IDON */ || + (!(s->csr[3] & 0x0200) && !!(s->csr[0] & 0x0200)) /* TINT */ || + (!(s->csr[3] & 0x0400) && !!(s->csr[0] & 0x0400)) /* RINT */ || + (!(s->csr[3] & 0x0800) && !!(s->csr[0] & 0x0800)) /* MERR */ || + (!(s->csr[4] & 0x0001) && !!(s->csr[4] & 0x0002)) /* JAB */ || + (!(s->csr[4] & 0x0004) && !!(s->csr[4] & 0x0008)) /* TXSTRT */ || + (!(s->csr[4] & 0x0010) && !!(s->csr[4] & 0x0020)) /* RCVO */ || + (!(s->csr[4] & 0x0100) && !!(s->csr[4] & 0x0200)) /* MFCO */ || + (!!(s->csr[5] & 0x0040) && !!(s->csr[5] & 0x0080)) /* EXDINT */ || + (!!(s->csr[5] & 0x0008) && !!(s->csr[5] & 0x0010)) /* MPINT */) +#endif + { + + isr = CSR_INEA(s); + s->csr[0] |= 0x0080; + } + + if (!!(s->csr[4] & 0x0080) && CSR_INEA(s)) { /* UINT */ + s->csr[4] &= ~0x0080; + s->csr[4] |= 0x0040; + s->csr[0] |= 0x0080; + isr = 1; +#ifdef PCNET_DEBUG + printf("pcnet user int\n"); +#endif + } + +#if 1 + if (((s->csr[5]>>1) & s->csr[5]) & 0x0500) +#else + if ((!!(s->csr[5] & 0x0400) && !!(s->csr[5] & 0x0800)) /* SINT */ || + (!!(s->csr[5] & 0x0100) && !!(s->csr[5] & 0x0200)) /* SLPINT */ ) +#endif + { + isr = 1; + s->csr[0] |= 0x0080; + } + + if (isr != s->isr) { +#ifdef PCNET_DEBUG + printf("pcnet: INTA=%d\n", isr); +#endif + } + pci_set_irq(&s->dev, 0, isr); + s->isr = isr; +} + +static void pcnet_init(PCNetState *s) +{ +#ifdef PCNET_DEBUG + printf("pcnet_init init_addr=0x%08x\n", PHYSADDR(s,CSR_IADR(s))); +#endif + +#define PCNET_INIT() do { \ + cpu_physical_memory_read(PHYSADDR(s,CSR_IADR(s)), \ + (uint8_t *)&initblk, sizeof(initblk)); \ + s->csr[15] = le16_to_cpu(initblk.mode); \ + CSR_RCVRL(s) = (initblk.rlen < 9) ? (1 << initblk.rlen) : 512; \ + CSR_XMTRL(s) = (initblk.tlen < 9) ? (1 << initblk.tlen) : 512; \ + s->csr[ 6] = (initblk.tlen << 12) | (initblk.rlen << 8); \ + s->csr[ 8] = le16_to_cpu(initblk.ladrf1); \ + s->csr[ 9] = le16_to_cpu(initblk.ladrf2); \ + s->csr[10] = le16_to_cpu(initblk.ladrf3); \ + s->csr[11] = le16_to_cpu(initblk.ladrf4); \ + s->csr[12] = le16_to_cpu(initblk.padr1); \ + s->csr[13] = le16_to_cpu(initblk.padr2); \ + s->csr[14] = le16_to_cpu(initblk.padr3); \ + s->rdra = PHYSADDR(s,initblk.rdra); \ + s->tdra = PHYSADDR(s,initblk.tdra); \ +} while (0) + + if (BCR_SSIZE32(s)) { + struct pcnet_initblk32 initblk; + PCNET_INIT(); +#ifdef PCNET_DEBUG + printf("initblk.rlen=0x%02x, initblk.tlen=0x%02x\n", + initblk.rlen, initblk.tlen); +#endif + } else { + struct pcnet_initblk16 initblk; + PCNET_INIT(); +#ifdef PCNET_DEBUG + printf("initblk.rlen=0x%02x, initblk.tlen=0x%02x\n", + initblk.rlen, initblk.tlen); +#endif + } + +#undef PCNET_INIT + + CSR_RCVRC(s) = CSR_RCVRL(s); + CSR_XMTRC(s) = CSR_XMTRL(s); + +#ifdef PCNET_DEBUG + printf("pcnet ss32=%d rdra=0x%08x[%d] tdra=0x%08x[%d]\n", + BCR_SSIZE32(s), + s->rdra, CSR_RCVRL(s), s->tdra, CSR_XMTRL(s)); +#endif + + s->csr[0] |= 0x0101; + s->csr[0] &= ~0x0004; /* clear STOP bit */ +} + +static void pcnet_start(PCNetState *s) +{ +#ifdef PCNET_DEBUG + printf("pcnet_start\n"); +#endif + + if (!CSR_DTX(s)) + s->csr[0] |= 0x0010; /* set TXON */ + + if (!CSR_DRX(s)) + s->csr[0] |= 0x0020; /* set RXON */ + + s->csr[0] &= ~0x0004; /* clear STOP bit */ + s->csr[0] |= 0x0002; +} + +static void pcnet_stop(PCNetState *s) +{ +#ifdef PCNET_DEBUG + printf("pcnet_stop\n"); +#endif + s->csr[0] &= ~0x7feb; + s->csr[0] |= 0x0014; + s->csr[4] &= ~0x02c2; + s->csr[5] &= ~0x0011; + pcnet_poll_timer(s); +} + +static void pcnet_rdte_poll(PCNetState *s) +{ + s->csr[28] = s->csr[29] = 0; + if (s->rdra) { + int bad = 0; +#if 1 + target_phys_addr_t crda = pcnet_rdra_addr(s, CSR_RCVRC(s)); + target_phys_addr_t nrda = pcnet_rdra_addr(s, -1 + CSR_RCVRC(s)); + target_phys_addr_t nnrd = pcnet_rdra_addr(s, -2 + CSR_RCVRC(s)); +#else + target_phys_addr_t crda = s->rdra + + (CSR_RCVRL(s) - CSR_RCVRC(s)) * + (BCR_SWSTYLE(s) ? 16 : 8 ); + int nrdc = CSR_RCVRC(s)<=1 ? CSR_RCVRL(s) : CSR_RCVRC(s)-1; + target_phys_addr_t nrda = s->rdra + + (CSR_RCVRL(s) - nrdc) * + (BCR_SWSTYLE(s) ? 16 : 8 ); + int nnrc = nrdc<=1 ? CSR_RCVRL(s) : nrdc-1; + target_phys_addr_t nnrd = s->rdra + + (CSR_RCVRL(s) - nnrc) * + (BCR_SWSTYLE(s) ? 16 : 8 ); +#endif + + CHECK_RMD(PHYSADDR(s,crda), bad); + if (!bad) { + CHECK_RMD(PHYSADDR(s,nrda), bad); + if (bad || (nrda == crda)) nrda = 0; + CHECK_RMD(PHYSADDR(s,nnrd), bad); + if (bad || (nnrd == crda)) nnrd = 0; + + s->csr[28] = crda & 0xffff; + s->csr[29] = crda >> 16; + s->csr[26] = nrda & 0xffff; + s->csr[27] = nrda >> 16; + s->csr[36] = nnrd & 0xffff; + s->csr[37] = nnrd >> 16; +#ifdef PCNET_DEBUG + if (bad) { + printf("pcnet: BAD RMD RECORDS AFTER 0x%08x\n", + PHYSADDR(s,crda)); + } + } else { + printf("pcnet: BAD RMD RDA=0x%08x\n", PHYSADDR(s,crda)); +#endif + } + } + + if (CSR_CRDA(s)) { + struct pcnet_RMD rmd; + RMDLOAD(&rmd, PHYSADDR(s,CSR_CRDA(s))); + CSR_CRBC(s) = rmd.rmd1.bcnt; + CSR_CRST(s) = ((uint32_t *)&rmd)[1] >> 16; +#ifdef PCNET_DEBUG_RMD_X + printf("CRDA=0x%08x CRST=0x%04x RCVRC=%d RMD1=0x%08x RMD2=0x%08x\n", + PHYSADDR(s,CSR_CRDA(s)), CSR_CRST(s), CSR_RCVRC(s), + ((uint32_t *)&rmd)[1], ((uint32_t *)&rmd)[2]); + PRINT_RMD(&rmd); +#endif + } else { + CSR_CRBC(s) = CSR_CRST(s) = 0; + } + + if (CSR_NRDA(s)) { + struct pcnet_RMD rmd; + RMDLOAD(&rmd, PHYSADDR(s,CSR_NRDA(s))); + CSR_NRBC(s) = rmd.rmd1.bcnt; + CSR_NRST(s) = ((uint32_t *)&rmd)[1] >> 16; + } else { + CSR_NRBC(s) = CSR_NRST(s) = 0; + } + +} + +static int pcnet_tdte_poll(PCNetState *s) +{ + s->csr[34] = s->csr[35] = 0; + if (s->tdra) { + target_phys_addr_t cxda = s->tdra + + (CSR_XMTRL(s) - CSR_XMTRC(s)) * + (BCR_SWSTYLE(s) ? 16 : 8 ); + int bad = 0; + CHECK_TMD(PHYSADDR(s, cxda),bad); + if (!bad) { + if (CSR_CXDA(s) != cxda) { + s->csr[60] = s->csr[34]; + s->csr[61] = s->csr[35]; + s->csr[62] = CSR_CXBC(s); + s->csr[63] = CSR_CXST(s); + } + s->csr[34] = cxda & 0xffff; + s->csr[35] = cxda >> 16; +#ifdef PCNET_DEBUG + } else { + printf("pcnet: BAD TMD XDA=0x%08x\n", PHYSADDR(s,cxda)); +#endif + } + } + + if (CSR_CXDA(s)) { + struct pcnet_TMD tmd; + + TMDLOAD(&tmd, PHYSADDR(s,CSR_CXDA(s))); + + CSR_CXBC(s) = tmd.tmd1.bcnt; + CSR_CXST(s) = ((uint32_t *)&tmd)[1] >> 16; + } else { + CSR_CXBC(s) = CSR_CXST(s) = 0; + } + + return !!(CSR_CXST(s) & 0x8000); +} + +static int pcnet_can_receive(void *opaque) +{ + PCNetState *s = opaque; + if (CSR_STOP(s) || CSR_SPND(s)) + return 0; + + if (s->recv_pos > 0) + return 0; + + return sizeof(s->buffer)-16; +} + +static void pcnet_receive(void *opaque, const uint8_t *buf, int size) +{ + PCNetState *s = opaque; + int is_padr = 0, is_bcast = 0, is_ladr = 0; + + if (CSR_DRX(s) || CSR_STOP(s) || CSR_SPND(s) || !size) + return; + +#ifdef PCNET_DEBUG + printf("pcnet_receive size=%d\n", size); +#endif + + if (CSR_PROM(s) + || (is_padr=padr_match(s, buf, size)) + || (is_bcast=padr_bcast(s, buf, size)) + || (is_ladr=ladr_match(s, buf, size))) { + + pcnet_rdte_poll(s); + + if (!(CSR_CRST(s) & 0x8000) && s->rdra) { + struct pcnet_RMD rmd; + int rcvrc = CSR_RCVRC(s)-1,i; + target_phys_addr_t nrda; + for (i = CSR_RCVRL(s)-1; i > 0; i--, rcvrc--) { + if (rcvrc <= 1) + rcvrc = CSR_RCVRL(s); + nrda = s->rdra + + (CSR_RCVRL(s) - rcvrc) * + (BCR_SWSTYLE(s) ? 16 : 8 ); + RMDLOAD(&rmd, PHYSADDR(s,nrda)); + if (rmd.rmd1.own) { +#ifdef PCNET_DEBUG_RMD + printf("pcnet - scan buffer: RCVRC=%d PREV_RCVRC=%d\n", + rcvrc, CSR_RCVRC(s)); +#endif + CSR_RCVRC(s) = rcvrc; + pcnet_rdte_poll(s); + break; + } + } + } + + if (!(CSR_CRST(s) & 0x8000)) { +#ifdef PCNET_DEBUG_RMD + printf("pcnet - no buffer: RCVRC=%d\n", CSR_RCVRC(s)); +#endif + s->csr[0] |= 0x1000; /* Set MISS flag */ + CSR_MISSC(s)++; + } else { + uint8_t *src = &s->buffer[8]; + target_phys_addr_t crda = CSR_CRDA(s); + struct pcnet_RMD rmd; + int pktcount = 0; + + memcpy(src, buf, size); + + if (!CSR_ASTRP_RCV(s)) { + uint32_t fcs = ~0; +#if 0 + uint8_t *p = s->buffer; + + ((uint32_t *)p)[0] = ((uint32_t *)p)[1] = 0xaaaaaaaa; + p[7] = 0xab; +#else + uint8_t *p = src; +#endif + + while (size < 46) { + src[size++] = 0; + } + + while (p != &src[size]) { + CRC(fcs, *p++); + } + ((uint32_t *)&src[size])[0] = htonl(fcs); + size += 4; /* FCS at end of packet */ + } else size += 4; + +#ifdef PCNET_DEBUG_MATCH + PRINT_PKTHDR(buf); +#endif + + RMDLOAD(&rmd, PHYSADDR(s,crda)); + /*if (!CSR_LAPPEN(s))*/ + rmd.rmd1.stp = 1; + +#define PCNET_RECV_STORE() do { \ + int count = MIN(4096 - rmd.rmd1.bcnt,size); \ + target_phys_addr_t rbadr = PHYSADDR(s, rmd.rmd0.rbadr); \ + cpu_physical_memory_write(rbadr, src, count); \ + cpu_physical_memory_set_dirty(rbadr); \ + cpu_physical_memory_set_dirty(rbadr+count); \ + src += count; size -= count; \ + rmd.rmd2.mcnt = count; rmd.rmd1.own = 0; \ + RMDSTORE(&rmd, PHYSADDR(s,crda)); \ + pktcount++; \ +} while (0) + + PCNET_RECV_STORE(); + if ((size > 0) && CSR_NRDA(s)) { + target_phys_addr_t nrda = CSR_NRDA(s); + RMDLOAD(&rmd, PHYSADDR(s,nrda)); + if (rmd.rmd1.own) { + crda = nrda; + PCNET_RECV_STORE(); + if ((size > 0) && (nrda=CSR_NNRD(s))) { + RMDLOAD(&rmd, PHYSADDR(s,nrda)); + if (rmd.rmd1.own) { + crda = nrda; + PCNET_RECV_STORE(); + } + } + } + } + +#undef PCNET_RECV_STORE + + RMDLOAD(&rmd, PHYSADDR(s,crda)); + if (size == 0) { + rmd.rmd1.enp = 1; + rmd.rmd1.pam = !CSR_PROM(s) && is_padr; + rmd.rmd1.lafm = !CSR_PROM(s) && is_ladr; + rmd.rmd1.bam = !CSR_PROM(s) && is_bcast; + } else { + rmd.rmd1.oflo = 1; + rmd.rmd1.buff = 1; + rmd.rmd1.err = 1; + } + RMDSTORE(&rmd, PHYSADDR(s,crda)); + s->csr[0] |= 0x0400; + +#ifdef PCNET_DEBUG + printf("RCVRC=%d CRDA=0x%08x BLKS=%d\n", + CSR_RCVRC(s), PHYSADDR(s,CSR_CRDA(s)), pktcount); +#endif +#ifdef PCNET_DEBUG_RMD + PRINT_RMD(&rmd); +#endif + + while (pktcount--) { + if (CSR_RCVRC(s) <= 1) + CSR_RCVRC(s) = CSR_RCVRL(s); + else + CSR_RCVRC(s)--; + } + + pcnet_rdte_poll(s); + + } + } + + pcnet_poll(s); + pcnet_update_irq(s); +} + +static void pcnet_transmit(PCNetState *s) +{ + target_phys_addr_t xmit_cxda = 0; + int count = CSR_XMTRL(s)-1; + s->xmit_pos = -1; + + if (!CSR_TXON(s)) { + s->csr[0] &= ~0x0008; + return; + } + + txagain: + if (pcnet_tdte_poll(s)) { + struct pcnet_TMD tmd; + + TMDLOAD(&tmd, PHYSADDR(s,CSR_CXDA(s))); + +#ifdef PCNET_DEBUG_TMD + printf(" TMDLOAD 0x%08x\n", PHYSADDR(s,CSR_CXDA(s))); + PRINT_TMD(&tmd); +#endif + if (tmd.tmd1.stp) { + s->xmit_pos = 0; + if (!tmd.tmd1.enp) { + cpu_physical_memory_read(PHYSADDR(s, tmd.tmd0.tbadr), + s->buffer, 4096 - tmd.tmd1.bcnt); + s->xmit_pos += 4096 - tmd.tmd1.bcnt; + } + xmit_cxda = PHYSADDR(s,CSR_CXDA(s)); + } + if (tmd.tmd1.enp && (s->xmit_pos >= 0)) { + cpu_physical_memory_read(PHYSADDR(s, tmd.tmd0.tbadr), + s->buffer + s->xmit_pos, 4096 - tmd.tmd1.bcnt); + s->xmit_pos += 4096 - tmd.tmd1.bcnt; + + tmd.tmd1.own = 0; + TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s))); + +#ifdef PCNET_DEBUG + printf("pcnet_transmit size=%d\n", s->xmit_pos); +#endif + if (CSR_LOOP(s)) + pcnet_receive(s, s->buffer, s->xmit_pos); + else + qemu_send_packet(s->nd, s->buffer, s->xmit_pos); + + s->csr[0] &= ~0x0008; /* clear TDMD */ + s->csr[4] |= 0x0004; /* set TXSTRT */ + s->xmit_pos = -1; + } else { + tmd.tmd1.own = 0; + TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s))); + } + if (!CSR_TOKINTD(s) || (CSR_LTINTEN(s) && tmd.tmd1.ltint)) + s->csr[0] |= 0x0200; /* set TINT */ + + if (CSR_XMTRC(s)<=1) + CSR_XMTRC(s) = CSR_XMTRL(s); + else + CSR_XMTRC(s)--; + if (count--) + goto txagain; + + } else + if (s->xmit_pos >= 0) { + struct pcnet_TMD tmd; + TMDLOAD(&tmd, PHYSADDR(s,xmit_cxda)); + tmd.tmd2.buff = tmd.tmd2.uflo = tmd.tmd1.err = 1; + tmd.tmd1.own = 0; + TMDSTORE(&tmd, PHYSADDR(s,xmit_cxda)); + s->csr[0] |= 0x0200; /* set TINT */ + if (!CSR_DXSUFLO(s)) { + s->csr[0] &= ~0x0010; + } else + if (count--) + goto txagain; + } +} + +static void pcnet_poll(PCNetState *s) +{ + if (CSR_RXON(s)) { + pcnet_rdte_poll(s); + } + + if (CSR_TDMD(s) || + (CSR_TXON(s) && !CSR_DPOLL(s) && pcnet_tdte_poll(s))) + pcnet_transmit(s); +} + +static void pcnet_poll_timer(void *opaque) +{ + PCNetState *s = opaque; + + qemu_del_timer(s->poll_timer); + + if (CSR_TDMD(s)) { + pcnet_transmit(s); + } + + pcnet_update_irq(s); + + if (!CSR_STOP(s) && !CSR_SPND(s) && !CSR_DPOLL(s)) { + uint64_t now = qemu_get_clock(vm_clock) * 33; + if (!s->timer || !now) + s->timer = now; + else { + uint64_t t = now - s->timer + CSR_POLL(s); + if (t > 0xffffLL) { + pcnet_poll(s); + CSR_POLL(s) = CSR_PINT(s); + } else + CSR_POLL(s) = t; + } + qemu_mod_timer(s->poll_timer, + pcnet_get_next_poll_time(s,qemu_get_clock(vm_clock))); + } +} + + +static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value) +{ + uint16_t val = new_value; +#ifdef PCNET_DEBUG_CSR + printf("pcnet_csr_writew rap=%d val=0x%04x\n", rap, val); +#endif + switch (rap) { + case 0: + s->csr[0] &= ~(val & 0x7f00); /* Clear any interrupt flags */ + + s->csr[0] = (s->csr[0] & ~0x0040) | (val & 0x0048); + + val = (val & 0x007f) | (s->csr[0] & 0x7f00); + + /* IFF STOP, STRT and INIT are set, clear STRT and INIT */ + if ((val&7) == 7) + val &= ~3; + + if (!CSR_STOP(s) && (val & 4)) + pcnet_stop(s); + + if (!CSR_INIT(s) && (val & 1)) + pcnet_init(s); + + if (!CSR_STRT(s) && (val & 2)) + pcnet_start(s); + + if (CSR_TDMD(s)) + pcnet_transmit(s); + + return; + case 1: + case 2: + case 8: + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 18: /* CRBAL */ + case 19: /* CRBAU */ + case 20: /* CXBAL */ + case 21: /* CXBAU */ + case 22: /* NRBAU */ + case 23: /* NRBAU */ + case 24: + case 25: + case 26: + case 27: + case 28: + case 29: + case 30: + case 31: + case 32: + case 33: + case 34: + case 35: + case 36: + case 37: + case 38: + case 39: + case 40: /* CRBC */ + case 41: + case 42: /* CXBC */ + case 43: + case 44: + case 45: + case 46: /* POLL */ + case 47: /* POLLINT */ + case 72: + case 74: + case 76: /* RCVRL */ + case 78: /* XMTRL */ + case 112: + if (CSR_STOP(s) || CSR_SPND(s)) + break; + return; + case 3: + break; + case 4: + s->csr[4] &= ~(val & 0x026a); + val &= ~0x026a; val |= s->csr[4] & 0x026a; + break; + case 5: + s->csr[5] &= ~(val & 0x0a90); + val &= ~0x0a90; val |= s->csr[5] & 0x0a90; + break; + case 16: + pcnet_csr_writew(s,1,val); + return; + case 17: + pcnet_csr_writew(s,2,val); + return; + case 58: + pcnet_bcr_writew(s,BCR_SWS,val); + break; + default: + return; + } + s->csr[rap] = val; +} + +static uint32_t pcnet_csr_readw(PCNetState *s, uint32_t rap) +{ + uint32_t val; + switch (rap) { + case 0: + pcnet_update_irq(s); + val = s->csr[0]; + val |= (val & 0x7800) ? 0x8000 : 0; + break; + case 16: + return pcnet_csr_readw(s,1); + case 17: + return pcnet_csr_readw(s,2); + case 58: + return pcnet_bcr_readw(s,BCR_SWS); + case 88: + val = s->csr[89]; + val <<= 16; + val |= s->csr[88]; + break; + default: + val = s->csr[rap]; + } +#ifdef PCNET_DEBUG_CSR + printf("pcnet_csr_readw rap=%d val=0x%04x\n", rap, val); +#endif + return val; +} + +static void pcnet_bcr_writew(PCNetState *s, uint32_t rap, uint32_t val) +{ + rap &= 127; +#ifdef PCNET_DEBUG_BCR + printf("pcnet_bcr_writew rap=%d val=0x%04x\n", rap, val); +#endif + switch (rap) { + case BCR_SWS: + if (!(CSR_STOP(s) || CSR_SPND(s))) + return; + val &= ~0x0300; + switch (val & 0x00ff) { + case 0: + val |= 0x0200; + break; + case 1: + val |= 0x0100; + break; + case 2: + case 3: + val |= 0x0300; + break; + default: + printf("Bad SWSTYLE=0x%02x\n", val & 0xff); + val = 0x0200; + break; + } +#ifdef PCNET_DEBUG + printf("BCR_SWS=0x%04x\n", val); +#endif + case BCR_LNKST: + case BCR_LED1: + case BCR_LED2: + case BCR_LED3: + case BCR_MC: + case BCR_FDC: + case BCR_BSBC: + case BCR_EECAS: + case BCR_PLAT: + s->bcr[rap] = val; + break; + default: + break; + } +} + +static uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap) +{ + uint32_t val; + rap &= 127; + switch (rap) { + case BCR_LNKST: + case BCR_LED1: + case BCR_LED2: + case BCR_LED3: + val = s->bcr[rap] & ~0x8000; + val |= (val & 0x017f & s->lnkst) ? 0x8000 : 0; + break; + default: + val = rap < 32 ? s->bcr[rap] : 0; + break; + } +#ifdef PCNET_DEBUG_BCR + printf("pcnet_bcr_readw rap=%d val=0x%04x\n", rap, val); +#endif + return val; +} + +static void pcnet_h_reset(PCNetState *s) +{ + int i; + uint16_t checksum; + + /* Initialize the PROM */ + + memcpy(s->prom, s->nd->macaddr, 6); + s->prom[12] = s->prom[13] = 0x00; + s->prom[14] = s->prom[15] = 0x57; + + for (i = 0,checksum = 0; i < 16; i++) + checksum += s->prom[i]; + *(uint16_t *)&s->prom[12] = cpu_to_le16(checksum); + + + s->bcr[BCR_MSRDA] = 0x0005; + s->bcr[BCR_MSWRA] = 0x0005; + s->bcr[BCR_MC ] = 0x0002; + s->bcr[BCR_LNKST] = 0x00c0; + s->bcr[BCR_LED1 ] = 0x0084; + s->bcr[BCR_LED2 ] = 0x0088; + s->bcr[BCR_LED3 ] = 0x0090; + s->bcr[BCR_FDC ] = 0x0000; + s->bcr[BCR_BSBC ] = 0x9001; + s->bcr[BCR_EECAS] = 0x0002; + s->bcr[BCR_SWS ] = 0x0200; + s->bcr[BCR_PLAT ] = 0xff06; + + pcnet_s_reset(s); +} + +static void pcnet_aprom_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCNetState *s = opaque; +#ifdef PCNET_DEBUG + printf("pcnet_aprom_writeb addr=0x%08x val=0x%02x\n", addr, val); +#endif + /* Check APROMWE bit to enable write access */ + if (pcnet_bcr_readw(s,2) & 0x80) + s->prom[addr & 15] = val; +} + +static uint32_t pcnet_aprom_readb(void *opaque, uint32_t addr) +{ + PCNetState *s = opaque; + uint32_t val = s->prom[addr &= 15]; +#ifdef PCNET_DEBUG + printf("pcnet_aprom_readb addr=0x%08x val=0x%02x\n", addr, val); +#endif + return val; +} + +static void pcnet_ioport_writew(void *opaque, uint32_t addr, uint32_t val) +{ + PCNetState *s = opaque; + pcnet_poll_timer(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_writew addr=0x%08x val=0x%04x\n", addr, val); +#endif + if (!BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + pcnet_csr_writew(s, s->rap, val); + break; + case 0x02: + s->rap = val & 0x7f; + break; + case 0x06: + pcnet_bcr_writew(s, s->rap, val); + break; + } + } + pcnet_update_irq(s); +} + +static uint32_t pcnet_ioport_readw(void *opaque, uint32_t addr) +{ + PCNetState *s = opaque; + uint32_t val = -1; + pcnet_poll_timer(s); + if (!BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + val = pcnet_csr_readw(s, s->rap); + break; + case 0x02: + val = s->rap; + break; + case 0x04: + pcnet_s_reset(s); + val = 0; + break; + case 0x06: + val = pcnet_bcr_readw(s, s->rap); + break; + } + } + pcnet_update_irq(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_readw addr=0x%08x val=0x%04x\n", addr, val & 0xffff); +#endif + return val; +} + +static void pcnet_ioport_writel(void *opaque, uint32_t addr, uint32_t val) +{ + PCNetState *s = opaque; + pcnet_poll_timer(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_writel addr=0x%08x val=0x%08x\n", addr, val); +#endif + if (BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + pcnet_csr_writew(s, s->rap, val & 0xffff); + break; + case 0x04: + s->rap = val & 0x7f; + break; + case 0x0c: + pcnet_bcr_writew(s, s->rap, val & 0xffff); + break; + } + } else + if ((addr & 0x0f) == 0) { + /* switch device to dword i/o mode */ + pcnet_bcr_writew(s, BCR_BSBC, pcnet_bcr_readw(s, BCR_BSBC) | 0x0080); +#ifdef PCNET_DEBUG_IO + printf("device switched into dword i/o mode\n"); +#endif + } + pcnet_update_irq(s); +} + +static uint32_t pcnet_ioport_readl(void *opaque, uint32_t addr) +{ + PCNetState *s = opaque; + uint32_t val = -1; + pcnet_poll_timer(s); + if (BCR_DWIO(s)) { + switch (addr & 0x0f) { + case 0x00: /* RDP */ + val = pcnet_csr_readw(s, s->rap); + break; + case 0x04: + val = s->rap; + break; + case 0x08: + pcnet_s_reset(s); + val = 0; + break; + case 0x0c: + val = pcnet_bcr_readw(s, s->rap); + break; + } + } + pcnet_update_irq(s); +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_readl addr=0x%08x val=0x%08x\n", addr, val); +#endif + return val; +} + +static void pcnet_ioport_map(PCIDevice *pci_dev, int region_num, + uint32_t addr, uint32_t size, int type) +{ + PCNetState *d = (PCNetState *)pci_dev; + +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_map addr=0x%04x size=0x%04x\n", addr, size); +#endif + + register_ioport_write(addr, 16, 1, pcnet_aprom_writeb, d); + register_ioport_read(addr, 16, 1, pcnet_aprom_readb, d); + + register_ioport_write(addr + 0x10, 0x10, 2, pcnet_ioport_writew, d); + register_ioport_read(addr + 0x10, 0x10, 2, pcnet_ioport_readw, d); + register_ioport_write(addr + 0x10, 0x10, 4, pcnet_ioport_writel, d); + register_ioport_read(addr + 0x10, 0x10, 4, pcnet_ioport_readl, d); +} + +static void pcnet_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + PCNetState *d = opaque; +#ifdef PCNET_DEBUG_IO + printf("pcnet_mmio_writeb addr=0x%08x val=0x%02x\n", addr, val); +#endif + if (!(addr & 0x10)) + pcnet_aprom_writeb(d, addr & 0x0f, val); +} + +static uint32_t pcnet_mmio_readb(void *opaque, target_phys_addr_t addr) +{ + PCNetState *d = opaque; + uint32_t val = -1; + if (!(addr & 0x10)) + val = pcnet_aprom_readb(d, addr & 0x0f); +#ifdef PCNET_DEBUG_IO + printf("pcnet_mmio_readb addr=0x%08x val=0x%02x\n", addr, val & 0xff); +#endif + return val; +} + +static void pcnet_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + PCNetState *d = opaque; +#ifdef PCNET_DEBUG_IO + printf("pcnet_mmio_writew addr=0x%08x val=0x%04x\n", addr, val); +#endif + if (addr & 0x10) + pcnet_ioport_writew(d, addr & 0x0f, val); + else { + addr &= 0x0f; + pcnet_aprom_writeb(d, addr, val & 0xff); + pcnet_aprom_writeb(d, addr+1, (val & 0xff00) >> 8); + } +} + +static uint32_t pcnet_mmio_readw(void *opaque, target_phys_addr_t addr) +{ + PCNetState *d = opaque; + uint32_t val = -1; + if (addr & 0x10) + val = pcnet_ioport_readw(d, addr & 0x0f); + else { + addr &= 0x0f; + val = pcnet_aprom_readb(d, addr+1); + val <<= 8; + val |= pcnet_aprom_readb(d, addr); + } +#ifdef PCNET_DEBUG_IO + printf("pcnet_mmio_readw addr=0x%08x val = 0x%04x\n", addr, val & 0xffff); +#endif + return val; +} + +static void pcnet_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + PCNetState *d = opaque; +#ifdef PCNET_DEBUG_IO + printf("pcnet_mmio_writel addr=0x%08x val=0x%08x\n", addr, val); +#endif + if (addr & 0x10) + pcnet_ioport_writel(d, addr & 0x0f, val); + else { + addr &= 0x0f; + pcnet_aprom_writeb(d, addr, val & 0xff); + pcnet_aprom_writeb(d, addr+1, (val & 0xff00) >> 8); + pcnet_aprom_writeb(d, addr+2, (val & 0xff0000) >> 16); + pcnet_aprom_writeb(d, addr+3, (val & 0xff000000) >> 24); + } +} + +static uint32_t pcnet_mmio_readl(void *opaque, target_phys_addr_t addr) +{ + PCNetState *d = opaque; + uint32_t val; + if (addr & 0x10) + val = pcnet_ioport_readl(d, addr & 0x0f); + else { + addr &= 0x0f; + val = pcnet_aprom_readb(d, addr+3); + val <<= 8; + val |= pcnet_aprom_readb(d, addr+2); + val <<= 8; + val |= pcnet_aprom_readb(d, addr+1); + val <<= 8; + val |= pcnet_aprom_readb(d, addr); + } +#ifdef PCNET_DEBUG_IO + printf("pcnet_mmio_readl addr=0x%08x val=0x%08x\n", addr, val); +#endif + return val; +} + + +static CPUWriteMemoryFunc *pcnet_mmio_write[] = { + (CPUWriteMemoryFunc *)&pcnet_mmio_writeb, + (CPUWriteMemoryFunc *)&pcnet_mmio_writew, + (CPUWriteMemoryFunc *)&pcnet_mmio_writel +}; + +static CPUReadMemoryFunc *pcnet_mmio_read[] = { + (CPUReadMemoryFunc *)&pcnet_mmio_readb, + (CPUReadMemoryFunc *)&pcnet_mmio_readw, + (CPUReadMemoryFunc *)&pcnet_mmio_readl +}; + +static void pcnet_mmio_map(PCIDevice *pci_dev, int region_num, + uint32_t addr, uint32_t size, int type) +{ + PCNetState *d = (PCNetState *)pci_dev; + +#ifdef PCNET_DEBUG_IO + printf("pcnet_ioport_map addr=0x%08x 0x%08x\n", addr, size); +#endif + + cpu_register_physical_memory(addr, PCNET_PNPMMIO_SIZE, d->mmio_io_addr); +} + +void pci_pcnet_init(PCIBus *bus, NetDriverState *nd) +{ + PCNetState *d; + uint8_t *pci_conf; + +#if 0 + printf("sizeof(RMD)=%d, sizeof(TMD)=%d\n", + sizeof(struct pcnet_RMD), sizeof(struct pcnet_TMD)); +#endif + + d = (PCNetState *)pci_register_device(bus, "PCNet", sizeof(PCNetState), + -1, NULL, NULL); + + pci_conf = d->dev.config; + + *(uint16_t *)&pci_conf[0x00] = cpu_to_le16(0x1022); + *(uint16_t *)&pci_conf[0x02] = cpu_to_le16(0x2000); + *(uint16_t *)&pci_conf[0x04] = cpu_to_le16(0x0007); + *(uint16_t *)&pci_conf[0x06] = cpu_to_le16(0x0280); + pci_conf[0x08] = 0x10; + pci_conf[0x09] = 0x00; + pci_conf[0x0a] = 0x00; // ethernet network controller + pci_conf[0x0b] = 0x02; + pci_conf[0x0e] = 0x00; // header_type + + *(uint32_t *)&pci_conf[0x10] = cpu_to_le32(0x00000001); + *(uint32_t *)&pci_conf[0x14] = cpu_to_le32(0x00000000); + + pci_conf[0x3d] = 1; // interrupt pin 0 + pci_conf[0x3e] = 0x06; + pci_conf[0x3f] = 0xff; + + /* Handler for memory-mapped I/O */ + d->mmio_io_addr = + cpu_register_io_memory(0, pcnet_mmio_read, pcnet_mmio_write, d); + + pci_register_io_region((PCIDevice *)d, 0, PCNET_IOPORT_SIZE, + PCI_ADDRESS_SPACE_IO, pcnet_ioport_map); + + pci_register_io_region((PCIDevice *)d, 1, PCNET_PNPMMIO_SIZE, + PCI_ADDRESS_SPACE_MEM, pcnet_mmio_map); + + d->poll_timer = qemu_new_timer(vm_clock, pcnet_poll_timer, d); + + d->nd = nd; + + pcnet_h_reset(d); + + qemu_add_read_packet(nd, pcnet_can_receive, pcnet_receive, d); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/ioemu/hw/pcnet.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/ioemu/hw/pcnet.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,583 @@ +/* + * QEMU AMD PC-Net II (Am79C970A) emulation + * + * Copyright (c) 2004 Antony T Curtis + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This software was written to be compatible with the specification: + * AMD Am79C970A PCnet-PCI II Ethernet Controller Data-Sheet + * AMD Publication# 19436 Rev:E Amendment/0 Issue Date: June 2000 + */ + +#ifdef __GNUC__ +#define PACKED(A) A __attribute__ ((packed)) +#else +#error FixMe +#endif + +/* BUS CONFIGURATION REGISTERS */ +#define BCR_MSRDA 0 +#define BCR_MSWRA 1 +#define BCR_MC 2 +#define BCR_LNKST 4 +#define BCR_LED1 5 +#define BCR_LED2 6 +#define BCR_LED3 7 +#define BCR_FDC 9 +#define BCR_BSBC 18 +#define BCR_EECAS 19 +#define BCR_SWS 20 +#define BCR_PLAT 22 + +#define BCR_DWIO(S) !!((S)->bcr[BCR_BSBC] & 0x0080) +#define BCR_SSIZE32(S) !!((S)->bcr[BCR_SWS ] & 0x0100) +#define BCR_SWSTYLE(S) ((S)->bcr[BCR_SWS ] & 0x00FF) + +#define CSR_INIT(S) !!(((S)->csr[0])&0x0001) +#define CSR_STRT(S) !!(((S)->csr[0])&0x0002) +#define CSR_STOP(S) !!(((S)->csr[0])&0x0004) +#define CSR_TDMD(S) !!(((S)->csr[0])&0x0008) +#define CSR_TXON(S) !!(((S)->csr[0])&0x0010) +#define CSR_RXON(S) !!(((S)->csr[0])&0x0020) +#define CSR_INEA(S) !!(((S)->csr[0])&0x0040) +#define CSR_LAPPEN(S) !!(((S)->csr[3])&0x0020) +#define CSR_DXSUFLO(S) !!(((S)->csr[3])&0x0040) +#define CSR_ASTRP_RCV(S) !!(((S)->csr[4])&0x0800) +#define CSR_DPOLL(S) !!(((S)->csr[4])&0x1000) +#define CSR_SPND(S) !!(((S)->csr[5])&0x0001) +#define CSR_LTINTEN(S) !!(((S)->csr[5])&0x4000) +#define CSR_TOKINTD(S) !!(((S)->csr[5])&0x8000) +#define CSR_DRX(S) !!(((S)->csr[15])&0x0001) +#define CSR_DTX(S) !!(((S)->csr[15])&0x0002) +#define CSR_LOOP(S) !!(((S)->csr[15])&0x0004) +#define CSR_DRCVPA(S) !!(((S)->csr[15])&0x2000) +#define CSR_DRCVBC(S) !!(((S)->csr[15])&0x4000) +#define CSR_PROM(S) !!(((S)->csr[15])&0x8000) + +#define CSR_CRBC(S) ((S)->csr[40]) +#define CSR_CRST(S) ((S)->csr[41]) +#define CSR_CXBC(S) ((S)->csr[42]) +#define CSR_CXST(S) ((S)->csr[43]) +#define CSR_NRBC(S) ((S)->csr[44]) +#define CSR_NRST(S) ((S)->csr[45]) +#define CSR_POLL(S) ((S)->csr[46]) +#define CSR_PINT(S) ((S)->csr[47]) +#define CSR_RCVRC(S) ((S)->csr[72]) +#define CSR_XMTRC(S) ((S)->csr[74]) +#define CSR_RCVRL(S) ((S)->csr[76]) +#define CSR_XMTRL(S) ((S)->csr[78]) +#define CSR_MISSC(S) ((S)->csr[112]) + +#define CSR_IADR(S) ((S)->csr[ 1] | ((S)->csr[ 2] << 16)) +#define CSR_CRBA(S) ((S)->csr[18] | ((S)->csr[19] << 16)) +#define CSR_CXBA(S) ((S)->csr[20] | ((S)->csr[21] << 16)) +#define CSR_NRBA(S) ((S)->csr[22] | ((S)->csr[23] << 16)) +#define CSR_BADR(S) ((S)->csr[24] | ((S)->csr[25] << 16)) +#define CSR_NRDA(S) ((S)->csr[26] | ((S)->csr[27] << 16)) +#define CSR_CRDA(S) ((S)->csr[28] | ((S)->csr[29] << 16)) +#define CSR_BADX(S) ((S)->csr[30] | ((S)->csr[31] << 16)) +#define CSR_NXDA(S) ((S)->csr[32] | ((S)->csr[33] << 16)) +#define CSR_CXDA(S) ((S)->csr[34] | ((S)->csr[35] << 16)) +#define CSR_NNRD(S) ((S)->csr[36] | ((S)->csr[37] << 16)) +#define CSR_NNXD(S) ((S)->csr[38] | ((S)->csr[39] << 16)) +#define CSR_PXDA(S) ((S)->csr[60] | ((S)->csr[61] << 16)) +#define CSR_NXBA(S) ((S)->csr[64] | ((S)->csr[65] << 16)) + +#define PHYSADDR(S,A) \ + (BCR_SSIZE32(S) ? (A) : (A) | ((0xff00 & (uint32_t)(s)->csr[2])<<16)) + +struct pcnet_initblk16 { + uint16_t mode; + uint16_t padr1; + uint16_t padr2; + uint16_t padr3; + uint16_t ladrf1; + uint16_t ladrf2; + uint16_t ladrf3; + uint16_t ladrf4; + unsigned PACKED(rdra:24); + unsigned PACKED(res1:5); + unsigned PACKED(rlen:3); + unsigned PACKED(tdra:24); + unsigned PACKED(res2:5); + unsigned PACKED(tlen:3); +}; + +struct pcnet_initblk32 { + uint16_t mode; + unsigned PACKED(res1:4); + unsigned PACKED(rlen:4); + unsigned PACKED(res2:4); + unsigned PACKED(tlen:4); + uint16_t padr1; + uint16_t padr2; + uint16_t padr3; + uint16_t _res; + uint16_t ladrf1; + uint16_t ladrf2; + uint16_t ladrf3; + uint16_t ladrf4; + uint32_t rdra; + uint32_t tdra; +}; + +struct pcnet_TMD { + struct { + unsigned tbadr:32; + } tmd0; + struct { + unsigned PACKED(bcnt:12), PACKED(ones:4), PACKED(res:7), PACKED(bpe:1); + unsigned PACKED(enp:1), PACKED(stp:1), PACKED(def:1), PACKED(one:1); + unsigned PACKED(ltint:1), PACKED(nofcs:1), PACKED(err:1), PACKED(own:1); + } tmd1; + struct { + unsigned PACKED(trc:4), PACKED(res:12); + unsigned PACKED(tdr:10), PACKED(rtry:1), PACKED(lcar:1); + unsigned PACKED(lcol:1), PACKED(exdef:1), PACKED(uflo:1), PACKED(buff:1); + } tmd2; + struct { + unsigned res:32; + } tmd3; +}; + +struct pcnet_RMD { + struct { + unsigned rbadr:32; + } rmd0; + struct { + unsigned PACKED(bcnt:12), PACKED(ones:4), PACKED(res:4); + unsigned PACKED(bam:1), PACKED(lafm:1), PACKED(pam:1), PACKED(bpe:1); + unsigned PACKED(enp:1), PACKED(stp:1), PACKED(buff:1), PACKED(crc:1); + unsigned PACKED(oflo:1), PACKED(fram:1), PACKED(err:1), PACKED(own:1); + } rmd1; + struct { + unsigned PACKED(mcnt:12), PACKED(zeros:4); + unsigned PACKED(rpc:8), PACKED(rcc:8); + } rmd2; + struct { + unsigned res:32; + } rmd3; +}; + + +#define PRINT_TMD(T) printf( \ + "TMD0 : TBADR=0x%08x\n" \ + "TMD1 : OWN=%d, ERR=%d, FCS=%d, LTI=%d, " \ + "ONE=%d, DEF=%d, STP=%d, ENP=%d,\n" \ + " BPE=%d, BCNT=%d\n" \ + "TMD2 : BUF=%d, UFL=%d, EXD=%d, LCO=%d, " \ + "LCA=%d, RTR=%d,\n" \ + " TDR=%d, TRC=%d\n", \ + (T)->tmd0.tbadr, \ + (T)->tmd1.own, (T)->tmd1.err, (T)->tmd1.nofcs, \ + (T)->tmd1.ltint, (T)->tmd1.one, (T)->tmd1.def, \ + (T)->tmd1.stp, (T)->tmd1.enp, (T)->tmd1.bpe, \ + 4096-(T)->tmd1.bcnt, \ + (T)->tmd2.buff, (T)->tmd2.uflo, (T)->tmd2.exdef,\ + (T)->tmd2.lcol, (T)->tmd2.lcar, (T)->tmd2.rtry, \ + (T)->tmd2.tdr, (T)->tmd2.trc) + +#define PRINT_RMD(R) printf( \ + "RMD0 : RBADR=0x%08x\n" \ + "RMD1 : OWN=%d, ERR=%d, FRAM=%d, OFLO=%d, " \ + "CRC=%d, BUFF=%d, STP=%d, ENP=%d,\n " \ + "BPE=%d, PAM=%d, LAFM=%d, BAM=%d, ONES=%d, BCNT=%d\n" \ + "RMD2 : RCC=%d, RPC=%d, MCNT=%d, ZEROS=%d\n", \ + (R)->rmd0.rbadr, \ + (R)->rmd1.own, (R)->rmd1.err, (R)->rmd1.fram, \ + (R)->rmd1.oflo, (R)->rmd1.crc, (R)->rmd1.buff, \ + (R)->rmd1.stp, (R)->rmd1.enp, (R)->rmd1.bpe, \ + (R)->rmd1.pam, (R)->rmd1.lafm, (R)->rmd1.bam, \ + (R)->rmd1.ones, 4096-(R)->rmd1.bcnt, \ + (R)->rmd2.rcc, (R)->rmd2.rpc, (R)->rmd2.mcnt, \ + (R)->rmd2.zeros) + +static inline void pcnet_tmd_load(PCNetState *s, struct pcnet_TMD *tmd, target_phys_addr_t addr) +{ + if (!BCR_SWSTYLE(s)) { + uint16_t xda[4]; + cpu_physical_memory_read(addr, + (void *)&xda[0], sizeof(xda)); + ((uint32_t *)tmd)[0] = (xda[0]&0xffff) | + ((xda[1]&0x00ff) << 16); + ((uint32_t *)tmd)[1] = (xda[2]&0xffff)| + ((xda[1] & 0xff00) << 16); + ((uint32_t *)tmd)[2] = + (xda[3] & 0xffff) << 16; + ((uint32_t *)tmd)[3] = 0; + } + else + if (BCR_SWSTYLE(s) != 3) + cpu_physical_memory_read(addr, (void *)tmd, 16); + else { + uint32_t xda[4]; + cpu_physical_memory_read(addr, + (void *)&xda[0], sizeof(xda)); + ((uint32_t *)tmd)[0] = xda[2]; + ((uint32_t *)tmd)[1] = xda[1]; + ((uint32_t *)tmd)[2] = xda[0]; + ((uint32_t *)tmd)[3] = xda[3]; + } +} + +static inline void pcnet_tmd_store(PCNetState *s, struct pcnet_TMD *tmd, target_phys_addr_t addr) +{ + cpu_physical_memory_set_dirty(addr); + if (!BCR_SWSTYLE(s)) { + uint16_t xda[4]; + xda[0] = ((uint32_t *)tmd)[0] & 0xffff; + xda[1] = ((((uint32_t *)tmd)[0]>>16)&0x00ff) | + ((((uint32_t *)tmd)[1]>>16)&0xff00); + xda[2] = ((uint32_t *)tmd)[1] & 0xffff; + xda[3] = ((uint32_t *)tmd)[2] >> 16; + cpu_physical_memory_write(addr, + (void *)&xda[0], sizeof(xda)); + cpu_physical_memory_set_dirty(addr+7); + } + else { + if (BCR_SWSTYLE(s) != 3) + cpu_physical_memory_write(addr, (void *)tmd, 16); + else { + uint32_t xda[4]; + xda[0] = ((uint32_t *)tmd)[2]; + xda[1] = ((uint32_t *)tmd)[1]; + xda[2] = ((uint32_t *)tmd)[0]; + xda[3] = ((uint32_t *)tmd)[3]; + cpu_physical_memory_write(addr, + (void *)&xda[0], sizeof(xda)); + } + cpu_physical_memory_set_dirty(addr+15); + } +} + +static inline void pcnet_rmd_load(PCNetState *s, struct pcnet_RMD *rmd, target_phys_addr_t addr) +{ + if (!BCR_SWSTYLE(s)) { + uint16_t rda[4]; + cpu_physical_memory_read(addr, + (void *)&rda[0], sizeof(rda)); + ((uint32_t *)rmd)[0] = (rda[0]&0xffff)| + ((rda[1] & 0x00ff) << 16); + ((uint32_t *)rmd)[1] = (rda[2]&0xffff)| + ((rda[1] & 0xff00) << 16); + ((uint32_t *)rmd)[2] = rda[3] & 0xffff; + ((uint32_t *)rmd)[3] = 0; + } + else + if (BCR_SWSTYLE(s) != 3) + cpu_physical_memory_read(addr, (void *)rmd, 16); + else { + uint32_t rda[4]; + cpu_physical_memory_read(addr, + (void *)&rda[0], sizeof(rda)); + ((uint32_t *)rmd)[0] = rda[2]; + ((uint32_t *)rmd)[1] = rda[1]; + ((uint32_t *)rmd)[2] = rda[0]; + ((uint32_t *)rmd)[3] = rda[3]; + } +} + +static inline void pcnet_rmd_store(PCNetState *s, struct pcnet_RMD *rmd, target_phys_addr_t addr) +{ + cpu_physical_memory_set_dirty(addr); + if (!BCR_SWSTYLE(s)) { + uint16_t rda[4]; \ + rda[0] = ((uint32_t *)rmd)[0] & 0xffff; \ + rda[1] = ((((uint32_t *)rmd)[0]>>16)&0xff)|\ + ((((uint32_t *)rmd)[1]>>16)&0xff00);\ + rda[2] = ((uint32_t *)rmd)[1] & 0xffff; \ + rda[3] = ((uint32_t *)rmd)[2] & 0xffff; \ + cpu_physical_memory_write(addr, \ + (void *)&rda[0], sizeof(rda)); \ + cpu_physical_memory_set_dirty(addr+7); + } + else { + if (BCR_SWSTYLE(s) != 3) + cpu_physical_memory_write(addr, (void *)rmd, 16); + else { + uint32_t rda[4]; + rda[0] = ((uint32_t *)rmd)[2]; + rda[1] = ((uint32_t *)rmd)[1]; + rda[2] = ((uint32_t *)rmd)[0]; + rda[3] = ((uint32_t *)rmd)[3]; + cpu_physical_memory_write(addr, + (void *)&rda[0], sizeof(rda)); + } + cpu_physical_memory_set_dirty(addr+15); + } +} + + +#define TMDLOAD(TMD,ADDR) pcnet_tmd_load(s,TMD,ADDR) + +#define TMDSTORE(TMD,ADDR) pcnet_tmd_store(s,TMD,ADDR) + +#define RMDLOAD(RMD,ADDR) pcnet_rmd_load(s,RMD,ADDR) + +#define RMDSTORE(RMD,ADDR) pcnet_rmd_store(s,RMD,ADDR) + +#if 1 + +#define CHECK_RMD(ADDR,RES) do { \ + struct pcnet_RMD rmd; \ + RMDLOAD(&rmd,(ADDR)); \ + (RES) |= (rmd.rmd1.ones != 15) \ + || (rmd.rmd2.zeros != 0); \ +} while (0) + +#define CHECK_TMD(ADDR,RES) do { \ + struct pcnet_TMD tmd; \ + TMDLOAD(&tmd,(ADDR)); \ + (RES) |= (tmd.tmd1.ones != 15); \ +} while (0) + +#else + +#define CHECK_RMD(ADDR,RES) do { \ + switch (BCR_SWSTYLE(s)) { \ + case 0x00: \ + do { \ + uint16_t rda[4]; \ + cpu_physical_memory_read((ADDR), \ + (void *)&rda[0], sizeof(rda)); \ + (RES) |= (rda[2] & 0xf000)!=0xf000; \ + (RES) |= (rda[3] & 0xf000)!=0x0000; \ + } while (0); \ + break; \ + case 0x01: \ + case 0x02: \ + do { \ + uint32_t rda[4]; \ + cpu_physical_memory_read((ADDR), \ + (void *)&rda[0], sizeof(rda)); \ + (RES) |= (rda[1] & 0x0000f000L)!=0x0000f000L; \ + (RES) |= (rda[2] & 0x0000f000L)!=0x00000000L; \ + } while (0); \ + break; \ + case 0x03: \ + do { \ + uint32_t rda[4]; \ + cpu_physical_memory_read((ADDR), \ + (void *)&rda[0], sizeof(rda)); \ + (RES) |= (rda[0] & 0x0000f000L)!=0x00000000L; \ + (RES) |= (rda[1] & 0x0000f000L)!=0x0000f000L; \ + } while (0); \ + break; \ + } \ +} while (0) + +#define CHECK_TMD(ADDR,RES) do { \ + switch (BCR_SWSTYLE(s)) { \ + case 0x00: \ + do { \ + uint16_t xda[4]; \ + cpu_physical_memory_read((ADDR), \ + (void *)&xda[0], sizeof(xda)); \ + (RES) |= (xda[2] & 0xf000)!=0xf000;\ + } while (0); \ + break; \ + case 0x01: \ + case 0x02: \ + case 0x03: \ + do { \ + uint32_t xda[4]; \ + cpu_physical_memory_read((ADDR), \ + (void *)&xda[0], sizeof(xda)); \ + (RES) |= (xda[1] & 0x0000f000L)!=0x0000f000L; \ + } while (0); \ + break; \ + } \ +} while (0) + +#endif + +#define PRINT_PKTHDR(BUF) do { \ + struct ether_header *hdr = (void *)(BUF); \ + printf("packet dhost=%02x:%02x:%02x:%02x:%02x:%02x, " \ + "shost=%02x:%02x:%02x:%02x:%02x:%02x, " \ + "type=0x%04x (bcast=%d)\n", \ + hdr->ether_dhost[0],hdr->ether_dhost[1],hdr->ether_dhost[2], \ + hdr->ether_dhost[3],hdr->ether_dhost[4],hdr->ether_dhost[5], \ + hdr->ether_shost[0],hdr->ether_shost[1],hdr->ether_shost[2], \ + hdr->ether_shost[3],hdr->ether_shost[4],hdr->ether_shost[5], \ + htons(hdr->ether_type), \ + !!ETHER_IS_MULTICAST(hdr->ether_dhost)); \ +} while (0) + +#define MULTICAST_FILTER_LEN 8 + +static inline uint32_t lnc_mchash(const uint8_t *ether_addr) +{ +#define LNC_POLYNOMIAL 0xEDB88320UL + uint32_t crc = 0xFFFFFFFF; + int idx, bit; + uint8_t data; + + for (idx = 0; idx < ETHER_ADDR_LEN; idx++) { + for (data = *ether_addr++, bit = 0; bit < MULTICAST_FILTER_LEN; bit++) { + crc = (crc >> 1) ^ (((crc ^ data) & 1) ? LNC_POLYNOMIAL : 0); + data >>= 1; + } + } + return crc; +#undef LNC_POLYNOMIAL +} + +#define MIN(X,Y) ((X>Y) ? (Y) : (X)) + +#define CRC(crc, ch) (crc = (crc >> 8) ^ crctab[(crc ^ (ch)) & 0xff]) + +/* generated using the AUTODIN II polynomial + * x^32 + x^26 + x^23 + x^22 + x^16 + + * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1 + */ +static const uint32_t crctab[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, +}; + +static inline int padr_match(PCNetState *s, const uint8_t *buf, int size) +{ + struct ether_header *hdr = (void *)buf; + uint8_t padr[6] = { + s->csr[12] & 0xff, s->csr[12] >> 8, + s->csr[13] & 0xff, s->csr[13] >> 8, + s->csr[14] & 0xff, s->csr[14] >> 8 + }; + int result = (!CSR_DRCVPA(s)) && !bcmp(hdr->ether_dhost, padr, 6); +#ifdef PCNET_DEBUG_MATCH + printf("packet dhost=%02x:%02x:%02x:%02x:%02x:%02x, " + "padr=%02x:%02x:%02x:%02x:%02x:%02x\n", + hdr->ether_dhost[0],hdr->ether_dhost[1],hdr->ether_dhost[2], + hdr->ether_dhost[3],hdr->ether_dhost[4],hdr->ether_dhost[5], + padr[0],padr[1],padr[2],padr[3],padr[4],padr[5]); + printf("padr_match result=%d\n", result); +#endif + return result; +} + +static inline int padr_bcast(PCNetState *s, const uint8_t *buf, int size) +{ + static uint8_t BCAST[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + struct ether_header *hdr = (void *)buf; + int result = !CSR_DRCVBC(s) && !bcmp(hdr->ether_dhost, BCAST, 6); +#ifdef PCNET_DEBUG_MATCH + printf("padr_bcast result=%d\n", result); +#endif + return result; +} + +static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size) +{ + struct ether_header *hdr = (void *)buf; + if ((*(hdr->ether_dhost)&0x01) && + ((uint64_t *)&s->csr[8])[0] != 0LL) { + uint8_t ladr[8] = { + s->csr[8] & 0xff, s->csr[8] >> 8, + s->csr[9] & 0xff, s->csr[9] >> 8, + s->csr[10] & 0xff, s->csr[10] >> 8, + s->csr[11] & 0xff, s->csr[11] >> 8 + }; + int index = lnc_mchash(hdr->ether_dhost) >> 26; + return !!(ladr[index >> 3] & (1 << (index & 7))); + } + return 0; +} + +static inline target_phys_addr_t pcnet_rdra_addr(PCNetState *s, int idx) +{ + while (idx < 1) idx += CSR_RCVRL(s); + return s->rdra + ((CSR_RCVRL(s) - idx) * (BCR_SWSTYLE(s) ? 16 : 8)); +} + +static inline int64_t pcnet_get_next_poll_time(PCNetState *s, int64_t current_time) +{ + int64_t next_time = current_time + + muldiv64(65536 - (CSR_SPND(s) ? 0 : CSR_POLL(s)), + ticks_per_sec, 33000000L); + if (next_time <= current_time) + next_time = current_time + 1; + return next_time; +} + + diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/tpmif.py --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/server/tpmif.py Fri Sep 9 16:30:54 2005 @@ -0,0 +1,45 @@ +# Copyright (C) 2005 IBM Corporation +# Authort: Stefan Berger, stefanb@xxxxxxxxxx +# Derived from netif.py: +# Copyright (C) 2004 Mike Wray <mike.wray@xxxxxx> +"""Support for virtual TPM interfaces. +""" + +import random + +from xen.xend import sxp +from xen.xend.XendError import XendError, VmError +from xen.xend.XendLogging import log +from xen.xend.XendRoot import get_component +from xen.xend.xenstore import DBVar + +from xen.xend.server.controller import Dev, DevController + +class TPMifController(DevController): + """TPM interface controller. Handles all TPM devices for a domain. + """ + + def __init__(self, vm, recreate=False): + DevController.__init__(self, vm, recreate=recreate) + + def initController(self, recreate=False, reboot=False): + self.destroyed = False + + def destroyController(self, reboot=False): + """Destroy the controller and all devices. + """ + self.destroyed = True + self.destroyDevices(reboot=reboot) + + def sxpr(self): + val = ['tpmif', ['dom', self.getDomain()]] + return val + + def newDevice(self, id, config, recreate=False): + """Create a TPM device. + + @param id: interface id + @param config: device configuration + @param recreate: recreate flag (true after xend restart) + """ + return None diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/xenstore/xstransact.py --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/xenstore/xstransact.py Fri Sep 9 16:30:54 2005 @@ -0,0 +1,113 @@ +# Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx> + +# This file is subject to the terms and conditions of the GNU General +# Public License. See the file "COPYING" in the main directory of +# this archive for more details. + +import threading +from xen.lowlevel import xs + +handles = {} + +# XXX need to g/c handles from dead threads +def xshandle(): + if not handles.has_key(threading.currentThread()): + handles[threading.currentThread()] = xs.open() + return handles[threading.currentThread()] + +class xstransact: + + def __init__(self, path): + self.path = path.rstrip("/") + xshandle().transaction_start(path) + self.in_transaction = True + + def __del__(self): + if self.in_transaction: + xshandle().transaction_end(True) + + def commit(self): + if not self.in_transaction: + raise RuntimeError + self.in_transaction = False + return xshandle().transaction_end(False) + + def abort(self): + if not self.in_transaction: + raise RuntimeError + self.in_transaction = False + return xshandle().transaction_end(True) + + def _read(self, key): + path = "%s/%s" % (self.path, key) + return xshandle().read(path) + + def read(self, *args): + if len(args) == 0: + raise TypeError + if len(args) == 1: + return self._read(args[0]) + ret = [] + for key in args: + ret.append(self._read(key)) + return ret + + def _write(self, key, data, create=True, excl=False): + path = "%s/%s" % (self.path, key) + xshandle().write(path, data, create=create, excl=excl) + + def write(self, *args, **opts): + create = opts.get('create') or True + excl = opts.get('excl') or False + if len(args) == 0: + raise TypeError + if isinstance(args[0], dict): + for d in args: + if not isinstance(d, dict): + raise TypeError + for key in d.keys(): + self._write(key, d[key], create, excl) + elif isinstance(args[0], list): + for l in args: + if not len(l) == 2: + raise TypeError + self._write(l[0], l[1], create, excl) + elif len(args) % 2 == 0: + for i in range(len(args) / 2): + self._write(args[i * 2], args[i * 2 + 1], create, excl) + else: + raise TypeError + + def Read(cls, path, *args): + t = cls(path) + v = t.read(*args) + t.commit() + return v + + Read = classmethod(Read) + + def Write(cls, path, *args, **opts): + t = cls(path) + t.write(*args, **opts) + t.commit() + + Write = classmethod(Write) + + def SafeRead(cls, path, *args): + while True: + try: + return cls.Read(path, *args) + except RuntimeError, ex: + pass + + SafeRead = classmethod(SafeRead) + + def SafeWrite(cls, path, *args, **opts): + while True: + try: + cls.Write(path, *args, **opts) + return + except RuntimeError, ex: + pass + + SafeWrite = classmethod(SafeWrite) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/xenstore/xswatch.py --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/python/xen/xend/xenstore/xswatch.py Fri Sep 9 16:30:54 2005 @@ -0,0 +1,65 @@ +# Copyright (C) 2005 Christian Limpach <Christian.Limpach@xxxxxxxxxxxx> + +# This file is subject to the terms and conditions of the GNU General +# Public License. See the file "COPYING" in the main directory of +# this archive for more details. + +import select +import threading +from xen.lowlevel import xs + +class xswatch: + + watchThread = None + threadcond = threading.Condition() + xs = None + xslock = threading.Lock() + + def __init__(self, path, fn, args=(), kwargs={}): + self.fn = fn + self.args = args + self.kwargs = kwargs + xswatch.watchStart() + xswatch.xslock.acquire() + xswatch.xs.watch(path, self) + xswatch.xslock.release() + + def watchStart(cls): + cls.threadcond.acquire() + if cls.watchThread: + cls.threadcond.release() + return + cls.watchThread = threading.Thread(name="Watcher", + target=cls.watchMain) + cls.watchThread.setDaemon(True) + cls.watchThread.start() + while cls.xs == None: + cls.threadcond.wait() + cls.threadcond.release() + + watchStart = classmethod(watchStart) + + def watchMain(cls): + cls.threadcond.acquire() + cls.xs = xs.open() + cls.threadcond.notifyAll() + cls.threadcond.release() + while True: + try: + (ord, owr, oer) = select.select([ cls.xs ], [], []) + cls.xslock.acquire() + # reconfirm ready to read with lock + (ord, owr, oer) = select.select([ cls.xs ], [], [], 0.001) + if not cls.xs in ord: + cls.xslock.release() + continue + we = cls.xs.read_watch() + watch = we[1] + cls.xs.acknowledge_watch(watch) + cls.xslock.release() + except RuntimeError, ex: + print ex + raise + watch.fn(*watch.args, **watch.kwargs) + + watchMain = classmethod(watchMain) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/getlabel.sh --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/security/getlabel.sh Fri Sep 9 16:30:54 2005 @@ -0,0 +1,130 @@ +#!/bin/sh +# * +# * getlabel +# * +# * Copyright (C) 2005 IBM Corporation +# * +# * Authors: +# * Stefan Berger <stefanb@xxxxxxxxxx> +# * +# * This program is free software; you can redistribute it and/or +# * modify it under the terms of the GNU General Public License as +# * published by the Free Software Foundation, version 2 of the +# * License. +# * +# * 'getlabel' tries to find the labels corresponding to the ssidref +# * +# * 'getlabel -?' shows the usage of the program +# * +# * 'getlabel -sid <ssidref> [<policy name>]' lists the label corresponding +# * to the given ssidref. +# * +# * 'getlabel -dom <domain id> [<policy name>]' lists the label of the +# * domain with given id +# * +# + +if [ -z "$runbash" ]; then + runbash="1" + export runbash + exec sh -c "bash $0 $*" +fi + + +export PATH=$PATH:. +source labelfuncs.sh + +usage () +{ + echo "Usage: $0 -sid <ssidref> [<policy name>] or" + echo " $0 -dom <domid> [<policy name>] " + echo "" + echo "policy name : the name of the policy, i.e. 'chwall'" + echo " If the policy name is omitted, the grub.conf" + echo " entry of the running system is tried to be read" + echo " and the policy name determined from there." + echo "ssidref : an ssidref in hex or decimal format, i.e., '0x00010002'" + echo " or '65538'" + echo "domid : id of the domain, i.e., '1'; Use numbers from the 2nd" + echo " column shown when invoking 'xm list'" + echo "" +} + + + +if [ "$1" == "-?" ]; then + mode="usage" +elif [ "$1" == "-dom" ]; then + mode="domid" + shift +elif [ "$1" == "-sid" ]; then + mode="sid" + shift +elif [ "$1" == "" ]; then + usage + exit -1 +fi + + +if [ "$mode" == "usage" ]; then + usage +elif [ "$mode" == "domid" ]; then + if [ "$2" == "" ]; then + findGrubConf + ret=$? + if [ $ret -eq 0 ]; then + echo "Could not find grub.conf" + exit -1; + fi + findPolicyInGrub $grubconf + if [ "$policy" != "" ]; then + echo "Assuming policy to be '$policy'."; + else + echo "Could not find policy." + exit -1; + fi + else + policy=$2 + fi + findMapFile $policy + res=$? + if [ "$res" != "0" ]; then + getSSIDUsingSecpolTool $1 + res=$? + if [ "$res" != "0" ]; then + translateSSIDREF $ssid $mapfile + else + echo "Could not determine the SSID of the domain." + fi + else + echo "Could not find map file for policy '$policy'." + fi +elif [ "$mode" == "sid" ]; then + if [ "$2" == "" ]; then + findGrubConf + ret=$? + if [ $ret -eq 0 ]; then + echo "Could not find grub.conf" + exit -1; + fi + findPolicyInGrub $grubconf + if [ "$policy" != "" ]; then + echo "Assuming policy to be '$policy'."; + else + echo "Could not find policy." + exit -1; + fi + else + policy=$2 + fi + findMapFile $policy + res=$? + if [ "$res" != "0" ]; then + translateSSIDREF $1 $mapfile + else + echo "Could not find map file for policy '$policy'." + fi + +else + usage +fi diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/security/labelfuncs.sh --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/security/labelfuncs.sh Fri Sep 9 16:30:54 2005 @@ -0,0 +1,675 @@ +# * +# * labelfuncs.sh +# * +# * Copyright (C) 2005 IBM Corporation +# * +# * Authors: +# * Stefan Berger <stefanb@xxxxxxxxxx> +# * +# * This program is free software; you can redistribute it and/or +# * modify it under the terms of the GNU General Public License as +# * published by the Free Software Foundation, version 2 of the +# * License. +# * +# * +# * A collection of functions to handle polcies, mapfiles, +# * and ssidrefs. +# + + +# Find the mapfile given a policy nmame +# Parameters: +# 1st : the name of the policy whose map file is to be found, i.e., +# chwall +# Results: +# The variable mapfile will hold the realtive path to the mapfile +# for the given policy. +# In case the mapfile could be found, the functions returns a '1', +# a '0' otherwise. +findMapFile () +{ + mapfile="./$1.map" + if [ -r "$mapfile" ]; then + return 1 + fi + + mapfile="./policies/$1/$1.map" + if [ -r "$mapfile" ]; then + return 1 + fi + + return 0 +} + + +# Determine the name of the primary policy +# Parameters +# 1st : the path to the mapfile; the path may be relative +# to the current directory +# Results +# The variable primary will hold the name of the primary policy +getPrimaryPolicy () +{ + mapfile=$1 + primary=`cat $mapfile | \ + awk ' \ + { \ + if ( $1 == "PRIMARY" ) { \ + res=$2; \ + } \ + } END { \ + print res; \ + } '` +} + + +# Determine the name of the secondary policy +# Parameters +# 1st : the path to the mapfile; the path may be relative +# to the current directory +# Results +# The variable secondary will hold the name of the secondary policy +getSecondaryPolicy () +{ + mapfile=$1 + secondary=`cat $mapfile | \ + awk ' \ + { \ + if ( $1 == "SECONDARY" ) { \ + res=$2; \ + } \ + } END { \ + print res; \ + } '` +} + + +#Return where the grub.conf file is. +#I only know of one place it can be. +findGrubConf() +{ + grubconf="/boot/grub/grub.conf" + if [ -w $grubconf ]; then + return 1 + fi + if [ -r $grubconf ]; then + return 2 + fi + return 0 +} + + +# This function sets the global variable 'linux' +# to the name and version of the Linux kernel that was compiled +# for domain 0. +# If this variable could not be found, the variable 'linux' +# will hold a pattern +# Parameters: +# 1st: the path to reach the root directory of the XEN build tree +# where linux-*-xen0 is located at +# Results: +# The variable linux holds then name and version of the compiled +# kernel, i.e., 'vmlinuz-2.6.12-xen0' +getLinuxVersion () +{ + path=$1 + linux="" + for f in $path/linux-*-xen0 ; do + versionfile=$f/include/linux/version.h + if [ -r $versionfile ]; then + lnx=`cat $versionfile | \ + grep UTS_RELEASE | \ + awk '{ \ + len=length($3); \ + print substr($3,2,len-2) }'` + fi + if [ "$lnx" != "" ]; then + linux="[./0-9a-zA-z]*$lnx" + return; + fi + done + + #Last resort. + linux="vmlinuz-2.[45678].[0-9]*[.0-9]*-xen0$" +} + + +# Find out with which policy the hypervisor was booted with. +# Parameters +# 1st : The complete path to grub.conf, i.e., /boot/grub/grub.conf +# +findPolicyInGrub () +{ + grubconf=$1 + linux=`uname -r` + policy=`cat $grubconf | \ + awk -vlinux=$linux '{ \ + if ( $1 == "title" ) { \ + kernelfound = 0; \ + policymaycome = 0; \ + } \ + else if ( $1 == "kernel" ) { \ + if ( match($2,"xen.gz$") ) { \ + pathlen=RSTART; \ + kernelfound = 1; \ + } \ + } \ + else if ( $1 == "module" && \ + kernelfound == 1 && \ + match($2,linux) ) { \ + policymaycome = 1; \ + } \ + else if ( $1 == "module" && \ + kernelfound == 1 && \ + policymaycome == 1 && \ + match($2,"[0-9a-zA-Z_]*.bin$") ) { \ + policymaycome = 0; \ + kernelfound = 0; \ + polname = substr($2,pathlen); \ + len=length(polname); \ + polname = substr(polname,0,len-4); \ + } \ + } END { \ + print polname \ + }'` +} + + +# Get the SSID of a domain +# Parameters: +# 1st : domain ID, i.e. '1' +# Results +# If the ssid could be found, the variable 'ssid' will hold +# the currently used ssid in the hex format, i.e., '0x00010001'. +# The funtion returns '1' on success, '0' on failure +getSSIDUsingSecpolTool () +{ + domid=$1 + export PATH=$PATH:. + ssid=`secpol_tool getssid -d $domid -f | \ + grep -E "SSID:" | \ + awk '{ print $4 }'` + + if [ "$ssid" != "" ]; then + return 1 + fi + return 0 +} + + +# Break the ssid identifier into its high and low values, +# which are equal to the secondary and primary policy references. +# Parameters: +# 1st: ssid to break into high and low value, i.e., '0x00010002' +# Results: +# The variable ssidlo_int and ssidhi_int will hold the low and +# high ssid values as integers. +getSSIDLOHI () +{ + ssid=$1 + ssidlo_int=`echo $ssid | awk \ + '{ \ + len=length($0); \ + beg=substr($0,1,2); \ + if ( beg == "0x" ) { \ + dig = len - 2; \ + if (dig <= 0) { \ + exit; \ + } \ + if (dig > 4) { \ + dig=4; \ + } \ + lo=sprintf("0x%s",substr($0,len-dig+1,dig)); \ + print strtonum(lo);\ + } else { \ + lo=strtonum($0); \ + if (lo < 65536) { \ + print lo; \ + } else { \ + hi=lo; \ + hi2= (hi / 65536);\ + hi2_str=sprintf("%d",hi2); \ + hi2=strtonum(hi2_str);\ + lo=hi-(hi2*65536); \ + printf("%d",lo); \ + } \ + } \ + }'` + ssidhi_int=`echo $ssid | awk \ + '{ \ + len=length($0); \ + beg=substr($0,1,2); \ + if ( beg == "0x" ) { \ + dig = len - 2; \ + if (dig <= 0 || \ + dig > 8) { \ + exit; \ + } \ + if (dig < 4) { \ + print 0; \ + exit; \ + } \ + dig -= 4; \ + hi=sprintf("0x%s",substr($0,len-4-dig+1,dig)); \ + print strtonum(hi);\ + } else { \ + hi=strtonum($0); \ + if (hi >= 65536) { \ + hi = hi / 65536; \ + printf ("%d",hi);\ + } else { \ + printf ("0"); \ + } \ + } \ + }'` + if [ "$ssidhi_int" == "" -o \ + "$ssidlo_int" == "" ]; then + return 0; + fi + return 1 +} + + +#Update the grub configuration file. +#Search for existing entries and replace the current +#policy entry with the policy passed to this script +# +#Arguments passed to this function +# 1st : the grub configuration file with full path +# 2nd : the binary policy file name, i.e. chwall.bin +# 3rd : the name or pattern of the linux kernel name to match +# (this determines where the module entry will be made) +# +# The algorithm here is based on pattern matching +# and is working correctly if +# - under a title a line beginning with 'kernel' is found +# whose following item ends with "xen.gz" +# Example: kernel /xen.gz dom0_mem=.... +# - a module line matching the 3rd parameter is found +# +updateGrub () +{ + grubconf=$1 + policyfile=$2 + linux=$3 + + tmpfile="/tmp/new_grub.conf" + + cat $grubconf | \ + awk -vpolicy=$policyfile \ + -vlinux=$linux '{ \ + if ( $1 == "title" ) { \ + kernelfound = 0; \ + if ( policymaycome == 1 ){ \ + printf ("\tmodule %s%s\n", path, policy); \ + } \ + policymaycome = 0; \ + } \ + else if ( $1 == "kernel" ) { \ + if ( match($2,"xen.gz$") ) { \ + path=substr($2,1,RSTART-1); \ + kernelfound = 1; \ + } \ + } \ + else if ( $1 == "module" && \ + kernelfound == 1 && \ + match($2,linux) ) { \ + policymaycome = 1; \ + } \ + else if ( $1 == "module" && \ + kernelfound == 1 && \ + policymaycome == 1 && \ + match($2,"[0-9a-zA-Z]*.bin$") ) { \ + printf ("\tmodule %s%s\n", path, policy); \ + policymaycome = 0; \ + kernelfound = 0; \ + dontprint = 1; \ + } \ + else if ( $1 == "" && \ + kernelfound == 1 && \ + policymaycome == 1) { \ + dontprint = 1; \ + } \ + if (dontprint == 0) { \ + printf ("%s\n", $0); \ + } \ + dontprint = 0; \ + } END { \ + if ( policymaycome == 1 ) { \ + printf ("\tmodule %s%s\n", path, policy); \ + } \ + }' > $tmpfile + if [ ! -r $tmpfile ]; then + echo "Could not create temporary file! Aborting." + exit -1 + fi + mv -f $tmpfile $grubconf +} + + +# Display all the labels in a given mapfile +# Parameters +# 1st: Full or relative path to the policy's mapfile +showLabels () +{ + mapfile=$1 + if [ ! -r "$mapfile" -o "$mapfile" == "" ]; then + echo "Cannot read from vm configuration file $vmfile." + return -1 + fi + + getPrimaryPolicy $mapfile + getSecondaryPolicy $mapfile + + echo "The following labels are available:" + let line=1 + while [ 1 ]; do + ITEM=`cat $mapfile | \ + awk -vline=$line \ + -vprimary=$primary \ + '{ \ + if ($1 == "LABEL->SSID" && \ + $2 == "VM" && \ + $3 == primary ) { \ + ctr++; \ + if (ctr == line) { \ + print $4; \ + } \ + } \ + } END { \ + }'` + + if [ "$ITEM" == "" ]; then + break + fi + if [ "$secondary" != "NULL" ]; then + LABEL=`cat $mapfile | \ + awk -vitem=$ITEM \ + '{ + if ($1 == "LABEL->SSID" && \ + $2 == "VM" && \ + $3 == "CHWALL" && \ + $4 == item ) { \ + result = item; \ + } \ + } END { \ + print result \ + }'` + else + LABEL=$ITEM + fi + + if [ "$LABEL" != "" ]; then + echo "$LABEL" + found=1 + fi + let line=line+1 + done + if [ "$found" != "1" ]; then + echo "No labels found." + fi +} + + +# Get the default SSID given a mapfile and the policy name +# Parameters +# 1st: Full or relative path to the policy's mapfile +# 2nd: the name of the policy +getDefaultSsid () +{ + mapfile=$1 + pol=$2 + RES=`cat $mapfile \ + awk -vpol=$pol \ + { \ + if ($1 == "LABEL->SSID" && \ + $2 == "ANY" && \ + $3 == pol && \ + $4 == "DEFAULT" ) {\ + res=$5; \ + } \ + } END { \ + printf "%04x", strtonum(res) \ + }'` + echo "default NULL mapping is $RES" + defaultssid=$RES +} + + +#Relabel a VM configuration file +# Parameters +# 1st: Full or relative path to the VM configuration file +# 2nd: The label to translate into an ssidref +# 3rd: Full or relative path to the policy's map file +# 4th: The mode this function is supposed to operate in: +# 'relabel' : Relabels the file without querying the user +# other : Prompts the user whether to proceed +relabel () +{ + vmfile=$1 + label=$2 + mapfile=$3 + mode=$4 + + if [ ! -r "$vmfile" ]; then + echo "Cannot read from vm configuration file $vmfile." + return -1 + fi + + if [ ! -w "$vmfile" ]; then + echo "Cannot write to vm configuration file $vmfile." + return -1 + fi + + if [ ! -r "$mapfile" ] ; then + echo "Cannot read mapping file $mapfile." + return -1 + fi + + # Determine which policy is primary, which sec. + getPrimaryPolicy $mapfile + getSecondaryPolicy $mapfile + + # Calculate the primary policy's SSIDREF + if [ "$primary" == "NULL" ]; then + SSIDLO="0001" + else + SSIDLO=`cat $mapfile | \ + awk -vlabel=$label \ + -vprimary=$primary \ + '{ \ + if ( $1 == "LABEL->SSID" && \ + $2 == "VM" && \ + $3 == primary && \ + $4 == label ) { \ + result=$5 \ + } \ + } END { \ + if (result != "" ) \ + {printf "%04x", strtonum(result)}\ + }'` + fi + + # Calculate the secondary policy's SSIDREF + if [ "$secondary" == "NULL" ]; then + if [ "$primary" == "NULL" ]; then + SSIDHI="0001" + else + SSIDHI="0000" + fi + else + SSIDHI=`cat $mapfile | \ + awk -vlabel=$label \ + -vsecondary=$secondary \ + '{ \ + if ( $1 == "LABEL->SSID" && \ + $2 == "VM" && \ + $3 == secondary && \ + $4 == label ) { \ + result=$5 \ + } \ + } END { \ + if (result != "" ) \ + {printf "%04x", strtonum(result)}\ + }'` + fi + + if [ "$SSIDLO" == "" -o \ + "$SSIDHI" == "" ]; then + echo "Could not map the given label '$label'." + return -1 + fi + + ACM_POLICY=`cat $mapfile | \ + awk ' { if ( $1 == "POLICY" ) { \ + result=$2 \ + } \ + } \ + END { \ + if (result != "") { \ + printf result \ + } \ + }'` + + if [ "$ACM_POLICY" == "" ]; then + echo "Could not find 'POLICY' entry in map file." + return -1 + fi + + SSIDREF="0x$SSIDHI$SSIDLO" + + if [ "$mode" != "relabel" ]; then + RES=`cat $vmfile | \ + awk '{ \ + if ( substr($1,0,7) == "ssidref" ) {\ + print $0; \ + } \ + }'` + if [ "$RES" != "" ]; then + echo "Do you want to overwrite the existing mapping ($RES)? (y/N)" + read user + if [ "$user" != "y" -a "$user" != "Y" ]; then + echo "Aborted." + return 0 + fi + fi + fi + + #Write the output + vmtmp1="/tmp/__setlabel.tmp1" + vmtmp2="/tmp/__setlabel.tmp2" + touch $vmtmp1 + touch $vmtmp2 + if [ ! -w "$vmtmp1" -o ! -w "$vmtmp2" ]; then + echo "Cannot create temporary files. Aborting." + return -1 + fi + RES=`sed -e '/^#ACM_POLICY/d' $vmfile > $vmtmp1` + RES=`sed -e '/^#ACM_LABEL/d' $vmtmp1 > $vmtmp2` + RES=`sed -e '/^ssidref/d' $vmtmp2 > $vmtmp1` + echo "#ACM_POLICY=$ACM_POLICY" >> $vmtmp1 + echo "#ACM_LABEL=$label" >> $vmtmp1 + echo "ssidref = $SSIDREF" >> $vmtmp1 + mv -f $vmtmp1 $vmfile + rm -rf $vmtmp1 $vmtmp2 + echo "Mapped label '$label' to ssidref '$SSIDREF'." +} + + +# Translate an ssidref into its label. This does the reverse lookup +# to the relabel function above. +# This function displays the results. +# Parameters: +# 1st: The ssidref to translate; must be in the form '0x00010002' +# 2nd: Full or relative path to the policy's mapfile +translateSSIDREF () +{ + ssidref=$1 + mapfile=$2 + + if [ ! -r "$mapfile" -o "$mapfile" == "" ]; then + echo "Cannot read from vm configuration file $vmfile." + return -1 + fi + + getPrimaryPolicy $mapfile + getSecondaryPolicy $mapfile + + if [ "$primary" == "NULL" -a "$secondary" == "NULL" ]; then + echo "There are no labels for the NULL policy." + return + fi + + getSSIDLOHI $ssidref + ret=$? + if [ $ret -ne 1 ]; then + echo "Error while parsing the ssid ref number '$ssidref'." + fi; + + let line1=0 + let line2=0 + while [ 1 ]; do + ITEM1=`cat $mapfile | \ + awk -vprimary=$primary \ + -vssidlo=$ssidlo_int \ + -vline=$line1 \ + '{ \ + if ( $1 == "LABEL->SSID" && \ + $3 == primary && \ + int($5) == ssidlo ) { \ + if (l == line) { \ + print $4; \ + exit; \ + } \ + l++; \ + } \ + }'` + + ITEM2=`cat $mapfile | \ + awk -vsecondary=$secondary \ + -vssidhi=$ssidhi_int \ + -vline=$line2 \ + '{ \ + if ( $1 == "LABEL->SSID" && \ + $3 == secondary && \ + int($5) == ssidhi ) { \ + if (l == line) { \ + print $4; \ + exit; \ + } \ + l++; \ + } \ + }'` + + if [ "$secondary" != "NULL" ]; then + if [ "$ITEM1" == "" ]; then + let line1=0 + let line2=line2+1 + else + let line1=line1+1 + fi + + if [ "$ITEM1" == "" -a \ + "$ITEM2" == "" ]; then + echo "Could not determine the referenced label." + break + fi + + if [ "$ITEM1" == "$ITEM2" ]; then + echo "Label: $ITEM1" + break + fi + else + if [ "$ITEM1" != "" ]; then + echo "Label: $ITEM1" + else + if [ "$found" == "0" ]; then + found=1 + else + break + fi + fi + let line1=line1+1 + fi + done +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,63 @@ +XEN_ROOT = ../.. + +# Base definitions and rules +include $(XEN_ROOT)/tools/vtpm/Rules.mk + +# Dir name for emulator (as dom0 tpm driver) +TPM_EMULATOR_DIR = tpm_emulator-0.2 +# Dir name for vtpm instance +VTPM_DIR = vtpm + +# Emulator tarball name +TPM_EMULATOR_TARFILE = tpm_emulator-0.2b.tar.gz + +all: build + +build: $(TPM_EMULATOR_TARFILE) extract patch build_sub + +install: build + $(MAKE) -C $(TPM_EMULATOR_DIR) $@ + $(MAKE) -C $(VTPM_DIR) $@ + +clean: + if [ -d $(TPM_EMULATOR_DIR) ]; \ + then $(MAKE) -C $(TPM_EMULATOR_DIR) clean; \ + fi + if [ -d $(VTPM_DIR) ]; \ + then $(MAKE) -C $(VTPM_DIR) clean; \ + fi + rm -rf $(TPM_EMULATOR_DIR) + rm -rf $(VTPM_DIR) + +mrproper: clean + rm -f $(TPM_EMULATOR_TARFILE) + +# Download Swiss emulator +$(TPM_EMULATOR_TARFILE): + wget http://download.berlios.de/tpm-emulator/$(TPM_EMULATOR_TARFILE) + +# Create vtpm and TPM emulator dirs +extract: $(TPM_EMULATOR_DIR)/README $(VTPM_DIR)/README + +$(TPM_EMULATOR_DIR)/README: + -rm -rf $(TPM_EMULATOR_DIR) + tar -xzf $(TPM_EMULATOR_TARFILE) + +$(VTPM_DIR)/README: + -rm -rf $(VTPM_DIR) + cp -r --preserve $(TPM_EMULATOR_DIR) $(VTPM_DIR) + +# apply patches for 1) used as dom0 tpm driver 2) used as vtpm device instance +patch: $(TPM_EMULATOR_DIR)/Makefile $(VTPM_DIR)/Makefile + +$(TPM_EMULATOR_DIR)/Makefile: tpm_emulator.patch + -cd $(TPM_EMULATOR_DIR); \ + patch -p1 <../tpm_emulator.patch + +$(VTPM_DIR)/Makefile: vtpm.patch + -cd $(VTPM_DIR); \ + patch -p1 <../vtpm.patch + +build_sub: + $(MAKE) -C $(TPM_EMULATOR_DIR) + $(MAKE) -C $(VTPM_DIR) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm/README --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm/README Fri Sep 9 16:30:54 2005 @@ -0,0 +1,44 @@ + +Directory Structure +=================== +tools/vtpm/tpm_emulator-0.2b.tar.gz -> TPM Emulator downloaded at build time that will + be patched and used for our vtpms +tools/vtpm/vtpm.patch -> patch applied to tpm_emulator to make vtpm +tools/vtpm/vtpm/ -> (created on build) tpm_emulator moved to ring 3, + listens on a pair of fifos for TPM commands, + persistent state is sent via named fifo to vtpm + manager, which encrypts it and protects it. +tools/vtpm/tpm_emulator.patch -> To allow for debugging and testing on non-TPM + platforms, this patches the emulator to allow + it to be inserted into the dom0 kernel +tools/vtpm/tpm_emulator-0.2 -> (created on build) directory containing patched emulator + +Compile Flags +=================== +VTPM_MULTI_VM -> Defined (not finished): VTPMs run in their own VMs + Not Defined (default): VTPMs are processes + +Requirements +============ +- xen-unstable +- IBM frontend/backend vtpm driver patch +- vtpm_managerd + +vtpmd Flow (for vtpm_manager. vtpmd never run by default) +============================ +- Launch the VTPM manager (vtpm_managerd) which which begins listening to the BE with one thread + and listens to a named fifo that is shared by the vtpms to commuincate with the manager. +- VTPM Manager listens to TPM BE. +- When xend launches a tpm frontend equipped VM it contacts the manager over the vtpm backend. +- When the manager receives the open message from the BE, it launches a vtpm +- Xend allows the VM to continue booting. +- When a TPM request is issued to the front end, the front end transmits the TPM request to the backend. +- The manager receives the TPM requests and uses a named fifo to forward the request to the vtpm. +- The fifo listener begins listening for the reply from vtpm for the request. +- Vtpm processes request and replies to manager over shared named fifo. +- If needed, the vtpm may send a request to the vtpm_manager at any time to save it's secrets to disk. +- Manager receives response from vtpm and passes it back to backend for forwarding to guest. + +tpm_emulator flow +================== +Read documentation in tpm_emulator-0.2 directory diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm/Rules.mk --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm/Rules.mk Fri Sep 9 16:30:54 2005 @@ -0,0 +1,37 @@ +# Base definitions and rules (XEN_ROOT must be defined in including Makefile) +include $(XEN_ROOT)/tools/Rules.mk + +# +# Tool definitions +# + +# Installation program and options +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +INSTALL_DIR = $(INSTALL) -d -m0755 + +# Xen tools installation directory +TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin + +# General compiler flags +CFLAGS = -Wall -Werror -g3 -I. + +# For generating dependencies +CFLAGS += -Wp,-MD,.$(@F).d + +DEP_FILES = .*.d + +# Generic project files +HDRS = $(wildcard *.h) +SRCS = $(wildcard *.c) +OBJS = $(patsubst %.c,%.o,$(SRCS)) + +# Generic (non-header) dependencies +$(SRCS): Makefile $(XEN_ROOT)/tools/Rules.mk $(XEN_ROOT)/tools/vtpm/Rules.mk + +$(OBJS): $(SRCS) + +-include $(DEP_FILES) + +# Make sure these are just rules +.PHONY : all build install clean diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm/tpm_emulator.patch --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm/tpm_emulator.patch Fri Sep 9 16:30:54 2005 @@ -0,0 +1,151 @@ +diff -uprN orig/tpm_emulator-0.2/AUTHORS tpm_emulator-0.2/AUTHORS +--- orig/tpm_emulator-0.2/AUTHORS 2005-08-17 10:58:36.000000000 -0700 ++++ tpm_emulator-0.2/AUTHORS 2005-08-17 10:55:52.000000000 -0700 +@@ -1 +1,2 @@ + Mario Strasser <mast@xxxxxxx> ++INTEL Corp <> +diff -uprN orig/tpm_emulator-0.2/ChangeLog tpm_emulator-0.2/ChangeLog +--- orig/tpm_emulator-0.2/ChangeLog 2005-08-17 10:58:36.000000000 -0700 ++++ tpm_emulator-0.2/ChangeLog 2005-08-17 10:55:52.000000000 -0700 +@@ -1,3 +1,7 @@ ++2005-08-16: INTEL Corp ++ * Set default permissions to PCRs ++ * Changed device to /dev/tpm0 ++ + 2005-08-15 Mario Strasser <mast@xxxxxxx> + * all: some typos corrected + * tpm_integrity.c: bug in TPM_Extend fixed +diff -uprN orig/tpm_emulator-0.2/Makefile tpm_emulator-0.2/Makefile +--- orig/tpm_emulator-0.2/Makefile 2005-08-17 10:58:36.000000000 -0700 ++++ tpm_emulator-0.2/Makefile 2005-08-17 10:55:52.000000000 -0700 +@@ -1,15 +1,19 @@ + # Software-Based Trusted Platform Module (TPM) Emulator for Linux + # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> ++# Copyright (C) 2005 INTEL Corp. + # + # $Id: Makefile 10 2005-04-26 20:59:50Z mast $ + ++XEN_ROOT := ../../.. ++EUID := $(shell id -u) ++ + # kernel settings + KERNEL_RELEASE := $(shell uname -r) +-KERNEL_BUILD := /lib/modules/$(KERNEL_RELEASE)/build ++KERNEL_BUILD := $(XEN_ROOT)/linux-2.6.12-xen0 + MOD_SUBDIR := misc + + # module settings +-MODULE_NAME := tpm_emulator ++BIN := tpm_emulator + VERSION_MAJOR := 0 + VERSION_MINOR := 2 + VERSION_BUILD := $(shell date +"%s") +@@ -27,11 +30,9 @@ DIRS := . crypto tpm + SRCS := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c)) + OBJS := $(patsubst %.c, %.o, $(SRCS)) + SRCS += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h)) +-DISTSRC := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS) +-DISTDIR := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR) + +-obj-m := $(MODULE_NAME).o +-$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a ++obj-m := $(BIN).o ++$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a + + EXTRA_CFLAGS += -I$(src) -I$(src)/crypto -I$(src)/tpm + +@@ -42,23 +43,17 @@ all: $(src)/crypto/gmp.h $(src)/crypto/l + @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules + + install: +- @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install +- test -d /var/tpm || mkdir /var/tpm +- test -c /dev/tpm || mknod /dev/tpm c 10 224 +- chmod 666 /dev/tpm +- depmod -a ++ @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) INSTALL_MOD_PATH=$(DESTDIR) modules_install ++ test -d $(DESTDIR)/var/tpm || mkdir $(DESTDIR)/var/tpm ++ test -d $(DESTDIR)/dev || mkdir $(DESTDIR)/dev ++ test -c $(DESTDIR)/dev/tpm0 || [ $(EUID) -ne 0 ] || mknod $(DESTDIR)/dev/tpm0 c 10 224 ++ [ $(EUID) -ne 0 ] || chmod 666 $(DESTDIR)/dev/tpm0 + + clean: + @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean + rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a + +-dist: $(DISTSRC) +- rm -rf $(DISTDIR) +- mkdir $(DISTDIR) +- cp --parents $(DISTSRC) $(DISTDIR)/ +- rm -f $(DISTDIR)/crypto/gmp.h +- tar -chzf $(DISTDIR).tar.gz $(DISTDIR) +- rm -rf $(DISTDIR) ++mrproper: clean + + $(src)/crypto/libgmp.a: + test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) $(src)/crypto/libgmp.a +diff -uprN orig/tpm_emulator-0.2/README tpm_emulator-0.2/README +--- orig/tpm_emulator-0.2/README 2005-08-17 10:58:36.000000000 -0700 ++++ tpm_emulator-0.2/README 2005-08-17 10:55:52.000000000 -0700 +@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli + Copyright + -------------------------------------------------------------------------- + Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> and Swiss Federal +-Institute of Technology (ETH) Zurich. ++ Institute of Technology (ETH) Zurich. ++Copyright (C) 2005 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by +diff -uprN orig/tpm_emulator-0.2/linux_module.h tpm_emulator-0.2/linux_module.h +--- orig/tpm_emulator-0.2/linux_module.h 2005-08-17 10:58:36.000000000 -0700 ++++ tpm_emulator-0.2/linux_module.h 2005-08-17 10:55:52.000000000 -0700 +@@ -1,5 +1,6 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, ++ * Copyright (C) 2005 INTEL Corp. + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -33,7 +34,7 @@ + #include "tpm_version.h" + + #define TPM_DEVICE_MINOR 224 +-#define TPM_DEVICE_NAME "tpm" ++#define TPM_DEVICE_NAME "tpm0" + #define TPM_MODULE_NAME "tpm_emulator" + + /* debug and log output functions */ +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c tpm_emulator-0.2/tpm/tpm_data.c +--- orig/tpm_emulator-0.2/tpm/tpm_data.c 2005-08-17 10:58:36.000000000 -0700 ++++ tpm_emulator-0.2/tpm/tpm_data.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -85,6 +86,11 @@ void tpm_init_data(void) + tpmData.permanent.data.version.revMinor = VERSION_MINOR; + /* setup PCR attributes */ + for (i = 0; i < TPM_NUM_PCR; i++) { ++ int j; ++ for (j=0; j < TPM_NUM_LOCALITY; j++) { ++ tpmData.permanent.data.pcrAttrib[i].pcrExtendLocal[j] = TRUE; ++ } ++ + tpmData.permanent.data.pcrAttrib[i].pcrReset = TRUE; + } + /* set tick type */ +diff -uprN orig/tpm_emulator-0.2/tpm_version.h tpm_emulator-0.2/tpm_version.h +--- orig/tpm_emulator-0.2/tpm_version.h 2005-08-17 10:58:36.000000000 -0700 ++++ tpm_emulator-0.2/tpm_version.h 2005-08-17 10:55:53.000000000 -0700 +@@ -2,5 +2,5 @@ + #define _TPM_VERSION_H_ + #define VERSION_MAJOR 0 + #define VERSION_MINOR 2 +-#define VERSION_BUILD 1123950310 ++#define VERSION_BUILD 1124301353 + #endif /* _TPM_VERSION_H_ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm/vtpm.patch --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm/vtpm.patch Fri Sep 9 16:30:54 2005 @@ -0,0 +1,1645 @@ +diff -uprN orig/tpm_emulator-0.2/AUTHORS vtpm/AUTHORS +--- orig/tpm_emulator-0.2/AUTHORS 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/AUTHORS 2005-08-17 10:55:52.000000000 -0700 +@@ -1 +1,2 @@ + Mario Strasser <mast@xxxxxxx> ++INTEL Corp <> +diff -uprN orig/tpm_emulator-0.2/ChangeLog vtpm/ChangeLog +--- orig/tpm_emulator-0.2/ChangeLog 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/ChangeLog 2005-08-17 10:55:52.000000000 -0700 +@@ -1,3 +1,7 @@ ++2005-08-16 Intel Corp ++ Moved module out of kernel to run as a ring 3 app ++ Modified save_to_file and load_from_file to call a xen backend driver to call a VTPM manager ++ + 2005-08-15 Mario Strasser <mast@xxxxxxx> + * all: some typos corrected + * tpm_integrity.c: bug in TPM_Extend fixed +diff -uprN orig/tpm_emulator-0.2/Makefile vtpm/Makefile +--- orig/tpm_emulator-0.2/Makefile 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/Makefile 2005-08-17 10:55:52.000000000 -0700 +@@ -1,21 +1,29 @@ + # Software-Based Trusted Platform Module (TPM) Emulator for Linux + # Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> ++# Copyright (C) 2005 INTEL Corp. + # + # $Id: Makefile 10 2005-04-26 20:59:50Z mast $ + +-# kernel settings +-KERNEL_RELEASE := $(shell uname -r) +-KERNEL_BUILD := /lib/modules/$(KERNEL_RELEASE)/build +-MOD_SUBDIR := misc +- + # module settings +-MODULE_NAME := tpm_emulator ++BIN := vtpmd + VERSION_MAJOR := 0 + VERSION_MINOR := 2 + VERSION_BUILD := $(shell date +"%s") + +-# enable/disable DEBUG messages +-EXTRA_CFLAGS += -DDEBUG -g ++# Installation program and options ++INSTALL = install ++INSTALL_PROG = $(INSTALL) -m0755 ++INSTALL_DIR = $(INSTALL) -d -m0755 ++ ++# Xen tools installation directory ++TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin ++ ++CC := gcc ++CFLAGS += -g -Wall $(INCLUDE) -DDEBUG ++CFLAGS += -I. -Itpm ++ ++# Is the simulator running in it's own vm? ++#CFLAGS += -DVTPM_MULTI_VM + + # GNU MP configuration + GMP_LIB := /usr/lib/libgmp.a +@@ -27,38 +35,31 @@ DIRS := . crypto tpm + SRCS := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c)) + OBJS := $(patsubst %.c, %.o, $(SRCS)) + SRCS += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h)) +-DISTSRC := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS) +-DISTDIR := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR) + +-obj-m := $(MODULE_NAME).o +-$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a ++obj-m := $(BIN) ++$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a + + EXTRA_CFLAGS += -I$(src) -I$(src)/crypto -I$(src)/tpm + + # do not print "Entering directory ..." + MAKEFLAGS += --no-print-directory + +-all: $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version +- @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules ++all: $(BIN) ++ ++$(BIN): $(src)/crypto/gmp.h $(src)/crypto/libgmp.a version $(SRCS) $(OBJS) ++ $(CC) $(CFLAGS) $(OBJS) $(src)/crypto/libgmp.a -o $(BIN) ++ ++%.o: %.c ++ $(CC) $(CFLAGS) -c $< -o $@ + + install: +- @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install +- test -d /var/tpm || mkdir /var/tpm +- test -c /dev/tpm || mknod /dev/tpm c 10 224 +- chmod 666 /dev/tpm +- depmod -a ++ $(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR) + + clean: +- @$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean +- rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a ++ rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a $(OBJS) + +-dist: $(DISTSRC) +- rm -rf $(DISTDIR) +- mkdir $(DISTDIR) +- cp --parents $(DISTSRC) $(DISTDIR)/ +- rm -f $(DISTDIR)/crypto/gmp.h +- tar -chzf $(DISTDIR).tar.gz $(DISTDIR) +- rm -rf $(DISTDIR) ++mrproper: clean ++ rm -f $(BIN) + + $(src)/crypto/libgmp.a: + test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) $(src)/crypto/libgmp.a +diff -uprN orig/tpm_emulator-0.2/README vtpm/README +--- orig/tpm_emulator-0.2/README 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/README 2005-08-17 10:55:52.000000000 -0700 +@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli + Copyright + -------------------------------------------------------------------------- + Copyright (C) 2004 Mario Strasser <mast@xxxxxxx> and Swiss Federal +-Institute of Technology (ETH) Zurich. ++ Institute of Technology (ETH) Zurich. ++Copyright (C) 2005 INTEL Corp + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by +diff -uprN orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c vtpm/crypto/gmp_kernel_wrapper.c +--- orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/crypto/gmp_kernel_wrapper.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,5 +1,6 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -24,15 +25,10 @@ int __gmp_junk; + void __attribute__ ((regparm(0))) __gmp_assert_fail(const char *filename, + int linenum, const char *expr) + { +- panic(KERN_CRIT TPM_MODULE_NAME "%s:%d: GNU MP assertion failed: %s\n", ++ error("%s:%d: GNU MP assertion failed: %s\n", + filename, linenum, expr); + } + +-void __attribute__ ((regparm(0))) abort(void) +-{ +- panic(KERN_CRIT TPM_MODULE_NAME "GNU MP abort() was called\n"); +-} +- + /* overwrite GNU MP random functions (used by mpz/millerrabin.c) */ + + void __attribute__ ((regparm(0))) gmp_randinit(gmp_randstate_t rstate, +@@ -77,20 +73,19 @@ void __attribute__ ((regparm(0))) mpz_ur + + void __attribute__ ((regparm(0))) *kernel_allocate(size_t size) + { +- void *ret = (void*)kmalloc(size, GFP_KERNEL); +- if (!ret) panic(KERN_CRIT TPM_MODULE_NAME +- "GMP: cannot allocate memory (size=%u)\n", size); ++ void *ret = (void*)malloc(size); ++ if (!ret) error("GMP: cannot allocate memory (size=%u)\n", size); + return ret; + } + + void __attribute__ ((regparm(0))) *kernel_reallocate(void *oldptr, + size_t old_size, size_t new_size) + { +- void *ret = (void*)kmalloc(new_size, GFP_KERNEL); +- if (!ret) panic(KERN_CRIT TPM_MODULE_NAME "GMP: Cannot reallocate memory " ++ void *ret = (void*)malloc(new_size); ++ if (!ret) error("GMP: Cannot reallocate memory " + "(old_size=%u new_size=%u)\n", old_size, new_size); + memcpy(ret, oldptr, old_size); +- kfree(oldptr); ++ free(oldptr); + return ret; + } + +@@ -99,7 +94,7 @@ void __attribute__ ((regparm(0))) kernel + /* overwrite used memory */ + if (blk_ptr != NULL) { + memset(blk_ptr, 0, blk_size); +- kfree(blk_ptr); ++ free(blk_ptr); + } + } + +diff -uprN orig/tpm_emulator-0.2/crypto/rsa.c vtpm/crypto/rsa.c +--- orig/tpm_emulator-0.2/crypto/rsa.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/crypto/rsa.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,5 +1,6 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -363,7 +364,7 @@ static int encode_message(int type, uint + msg[0] = 0x00; + get_random_bytes(&msg[1], SHA1_DIGEST_LENGTH); + sha1_init(&ctx); +- sha1_update(&ctx, "TCPA", 4); ++ sha1_update(&ctx, (uint8_t *) "TCPA", 4); + sha1_final(&ctx, &msg[1 + SHA1_DIGEST_LENGTH]); + memset(&msg[1 + 2 * SHA1_DIGEST_LENGTH], 0x00, + msg_len - data_len - 2 * SHA1_DIGEST_LENGTH - 2); +@@ -411,7 +412,7 @@ static int decode_message(int type, uint + mask_generation(&msg[1], SHA1_DIGEST_LENGTH, + &msg[1 + SHA1_DIGEST_LENGTH], msg_len - SHA1_DIGEST_LENGTH - 1); + sha1_init(&ctx); +- sha1_update(&ctx, "TCPA", 4); ++ sha1_update(&ctx, (uint8_t *) "TCPA", 4); + sha1_final(&ctx, &msg[1]); + if (memcmp(&msg[1], &msg[1 + SHA1_DIGEST_LENGTH], + SHA1_DIGEST_LENGTH) != 0) return -1; +diff -uprN orig/tpm_emulator-0.2/linux_module.c vtpm/linux_module.c +--- orig/tpm_emulator-0.2/linux_module.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/linux_module.c 1969-12-31 16:00:00.000000000 -0800 +@@ -1,163 +0,0 @@ +-/* Software-Based Trusted Platform Module (TPM) Emulator for Linux +- * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, +- * +- * This module is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published +- * by the Free Software Foundation; either version 2 of the License, +- * or (at your option) any later version. +- * +- * This module is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * $Id: linux_module.c 19 2005-05-18 08:29:37Z mast $ +- */ +- +-#include <linux/module.h> +-#include <linux/kernel.h> +-#include <linux/init.h> +-#include <linux/miscdevice.h> +-#include <linux/poll.h> +-#include "linux_module.h" +-#include "tpm/tpm_emulator.h" +- +-MODULE_LICENSE("GPL"); +-MODULE_AUTHOR("Mario Strasser <mast@xxxxxxx>"); +-MODULE_DESCRIPTION("Trusted Platform Module (TPM) Emulator"); +-MODULE_SUPPORTED_DEVICE(TPM_DEVICE_NAME); +- +-/* module startup parameters */ +-char *startup = "save"; +-MODULE_PARM(startup, "s"); +-MODULE_PARM_DESC(startup, " Sets the startup mode of the TPM. " +- "Possible values are 'clear', 'save' (default) and 'deactivated."); +-char *storage_file = "/var/tpm/tpm_emulator-1.2.0.1"; +-MODULE_PARM(storage_file, "s"); +-MODULE_PARM_DESC(storage_file, " Sets the persistent-data storage " +- "file of the TPM."); +- +-/* TPM lock */ +-static struct semaphore tpm_mutex; +- +-/* TPM command response */ +-static struct { +- uint8_t *data; +- uint32_t size; +-} tpm_response; +- +-/* module state */ +-#define STATE_IS_OPEN 0 +-static uint32_t module_state; +- +-static int tpm_open(struct inode *inode, struct file *file) +-{ +- debug("%s()", __FUNCTION__); +- if (test_and_set_bit(STATE_IS_OPEN, (void*)&module_state)) return -EBUSY; +- return 0; +-} +- +-static int tpm_release(struct inode *inode, struct file *file) +-{ +- debug("%s()", __FUNCTION__); +- clear_bit(STATE_IS_OPEN, (void*)&module_state); +- return 0; +-} +- +-static ssize_t tpm_read(struct file *file, char *buf, size_t count, loff_t *ppos) +-{ +- debug("%s(%d)", __FUNCTION__, count); +- down(&tpm_mutex); +- if (tpm_response.data != NULL) { +- count = min(count, (size_t)tpm_response.size - (size_t)*ppos); +- count -= copy_to_user(buf, &tpm_response.data[*ppos], count); +- *ppos += count; +- } else { +- count = 0; +- } +- up(&tpm_mutex); +- return count; +-} +- +-static ssize_t tpm_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +-{ +- debug("%s(%d)", __FUNCTION__, count); +- down(&tpm_mutex); +- *ppos = 0; +- if (tpm_response.data != NULL) kfree(tpm_response.data); +- if (tpm_handle_command(buf, count, &tpm_response.data, +- &tpm_response.size) != 0) { +- count = -EILSEQ; +- tpm_response.data = NULL; +- } +- up(&tpm_mutex); +- return count; +-} +- +-static int tpm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +-{ +- debug("%s(%d, %ld)", __FUNCTION__, cmd, arg); +- return -1; +-} +- +-struct file_operations fops = { +- .owner = THIS_MODULE, +- .open = tpm_open, +- .release = tpm_release, +- .read = tpm_read, +- .write = tpm_write, +- .ioctl = tpm_ioctl, +-}; +- +-static struct miscdevice tpm_dev = { +- .minor = TPM_DEVICE_MINOR, +- .name = TPM_DEVICE_NAME, +- .fops = &fops, +-}; +- +-int __init init_tpm_module(void) +-{ +- int res = misc_register(&tpm_dev); +- if (res != 0) { +- error("misc_register() failed for minor %d\n", TPM_DEVICE_MINOR); +- return res; +- } +- /* initialize variables */ +- sema_init(&tpm_mutex, 1); +- module_state = 0; +- tpm_response.data = NULL; +- /* initialize TPM emulator */ +- if (!strcmp(startup, "clear")) { +- tpm_emulator_init(1); +- } else if (!strcmp(startup, "save")) { +- tpm_emulator_init(2); +- } else if (!strcmp(startup, "deactivated")) { +- tpm_emulator_init(3); +- } else { +- error("invalid startup mode '%s'; must be 'clear', " +- "'save' (default) or 'deactivated", startup); +- misc_deregister(&tpm_dev); +- return -EINVAL; +- } +- return 0; +-} +- +-void __exit cleanup_tpm_module(void) +-{ +- tpm_emulator_shutdown(); +- misc_deregister(&tpm_dev); +-} +- +-module_init(init_tpm_module); +-module_exit(cleanup_tpm_module); +- +-uint64_t tpm_get_ticks(void) +-{ +- static struct timespec old_time = {0, 0}; +- struct timespec new_time = current_kernel_time(); +- uint64_t ticks = (uint64_t)(old_time.tv_sec - new_time.tv_sec) * 1000000 +- + (old_time.tv_nsec - new_time.tv_nsec) / 1000; +- old_time = new_time; +- return (ticks > 0) ? ticks : 1; +-} +- +diff -uprN orig/tpm_emulator-0.2/linux_module.h vtpm/linux_module.h +--- orig/tpm_emulator-0.2/linux_module.h 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/linux_module.h 2005-08-17 10:55:52.000000000 -0700 +@@ -1,5 +1,6 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -17,17 +18,22 @@ + #ifndef _LINUX_MODULE_H_ + #define _LINUX_MODULE_H_ + +-#include <linux/version.h> +-#include <linux/kernel.h> +-#include <linux/slab.h> ++#include <malloc.h> ++#include <stdint.h> ++#include <stdio.h> ++#include <string.h> + #include <linux/types.h> +-#include <linux/string.h> +-#include <linux/random.h> +-#include <linux/time.h> +-#include <asm/byteorder.h> + +-/* module settings */ ++#include <endian.h> ++#define __BYTEORDER_HAS_U64__ ++#ifdef LITTLE_ENDIAN ++ #include <linux/byteorder/little_endian.h> ++#else ++ #include <linux/byteorder/big_endian.h> ++#endif + ++/* module settings */ ++#define min(A,B) ((A)<(B)?(A):(B)) + #define STR(s) __STR__(s) + #define __STR__(s) #s + #include "tpm_version.h" +@@ -39,32 +45,35 @@ + /* debug and log output functions */ + + #ifdef DEBUG +-#define debug(fmt, ...) printk(KERN_DEBUG "%s %s:%d: Debug: " fmt "\n", \ +- TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__) ++#define debug(fmt, ...) printf("%s:%d: Debug: " fmt "\n", \ ++ __FILE__, __LINE__, ## __VA_ARGS__) + #else + #define debug(fmt, ...) + #endif +-#define info(fmt, ...) printk(KERN_INFO "%s %s:%d: Info: " fmt "\n", \ +- TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__) +-#define error(fmt, ...) printk(KERN_ERR "%s %s:%d: Error: " fmt "\n", \ +- TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__) +-#define alert(fmt, ...) printk(KERN_ALERT "%s %s:%d: Alert: " fmt "\n", \ +- TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__) ++#define info(fmt, ...) printf("%s:%d: Info: " fmt "\n", \ ++ __FILE__, __LINE__, ## __VA_ARGS__) ++#define error(fmt, ...) printf("%s:%d: Error: " fmt "\n", \ ++ __FILE__, __LINE__, ## __VA_ARGS__) ++#define alert(fmt, ...) printf("%s:%d: Alert: " fmt "\n", \ ++ __FILE__, __LINE__, ## __VA_ARGS__) + + /* memory allocation */ + + static inline void *tpm_malloc(size_t size) + { +- return kmalloc(size, GFP_KERNEL); ++ return malloc(size); + } + + static inline void tpm_free(const void *ptr) + { +- if (ptr != NULL) kfree(ptr); ++ if (ptr != NULL) free( (void *) ptr); + } + + /* random numbers */ + ++//FIXME; ++void get_random_bytes(void *buf, int nbytes); ++ + static inline void tpm_get_random_bytes(void *buf, int nbytes) + { + get_random_bytes(buf, nbytes); +@@ -84,9 +93,9 @@ uint64_t tpm_get_ticks(void); + #define CPU_TO_LE16(x) __cpu_to_le16(x) + + #define BE64_TO_CPU(x) __be64_to_cpu(x) +-#define LE64_TO_CPU(x) __be64_to_cpu(x) ++#define LE64_TO_CPU(x) __le64_to_cpu(x) + #define BE32_TO_CPU(x) __be32_to_cpu(x) +-#define LE32_TO_CPU(x) __be32_to_cpu(x) ++#define LE32_TO_CPU(x) __le32_to_cpu(x) + #define BE16_TO_CPU(x) __be16_to_cpu(x) + #define LE16_TO_CPU(x) __le16_to_cpu(x) + +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_audit.c vtpm/tpm/tpm_audit.c +--- orig/tpm_emulator-0.2/tpm/tpm_audit.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_audit.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -45,14 +46,14 @@ void tpm_audit_request(TPM_COMMAND_CODE + tpmData.permanent.data.auditMonotonicCounter++; + } + /* update audit digest */ +- *((UINT16*)&buf[0]) = cpu_to_be16(TPM_TAG_AUDIT_EVENT_IN); +- *((UINT32*)&buf[2]) = cpu_to_be32(ordinal); ++ *((UINT16*)&buf[0]) = CPU_TO_BE16(TPM_TAG_AUDIT_EVENT_IN); ++ *((UINT32*)&buf[2]) = CPU_TO_BE32(ordinal); + sha1_init(&sha1_ctx); + sha1_update(&sha1_ctx, req->param, req->paramSize); + sha1_final(&sha1_ctx, &buf[6]); +- *((UINT16*)&buf[26]) = cpu_to_be16(TPM_TAG_COUNTER_VALUE); ++ *((UINT16*)&buf[26]) = CPU_TO_BE16(TPM_TAG_COUNTER_VALUE); + memset(&buf[30], 0, 4); +- *((UINT32*)&buf[34]) = cpu_to_be32(tpmData.permanent.data.auditMonotonicCounter); ++ *((UINT32*)&buf[34]) = CPU_TO_BE32(tpmData.permanent.data.auditMonotonicCounter); + sha1_init(&sha1_ctx); + sha1_update(&sha1_ctx, tpmData.stany.data.auditDigest.digest, + sizeof(TPM_DIGEST)); +@@ -70,15 +71,15 @@ void tpm_audit_response(TPM_COMMAND_CODE + && (AUDIT_STATUS[ord / 8] & (1 << (ord & 0x07)))) { + info("tpm_audit_response()"); + /* update audit digest */ +- *((UINT16*)&buf[0]) = cpu_to_be16(TPM_TAG_AUDIT_EVENT_OUT); +- *((UINT32*)&buf[2]) = cpu_to_be32(ordinal); ++ *((UINT16*)&buf[0]) = CPU_TO_BE16(TPM_TAG_AUDIT_EVENT_OUT); ++ *((UINT32*)&buf[2]) = CPU_TO_BE32(ordinal); + sha1_init(&sha1_ctx); + sha1_update(&sha1_ctx, rsp->param, rsp->paramSize); + sha1_final(&sha1_ctx, &buf[6]); +- *((UINT16*)&buf[26]) = cpu_to_be16(TPM_TAG_COUNTER_VALUE); ++ *((UINT16*)&buf[26]) = CPU_TO_BE16(TPM_TAG_COUNTER_VALUE); + memset(&buf[30], 0, 4); +- *((UINT32*)&buf[34]) = cpu_to_be32(tpmData.permanent.data.auditMonotonicCounter); +- *((UINT32*)&buf[34]) = cpu_to_be32(rsp->result); ++ *((UINT32*)&buf[34]) = CPU_TO_BE32(tpmData.permanent.data.auditMonotonicCounter); ++ *((UINT32*)&buf[34]) = CPU_TO_BE32(rsp->result); + sha1_init(&sha1_ctx); + sha1_update(&sha1_ctx, tpmData.stany.data.auditDigest.digest, + sizeof(TPM_DIGEST)); +@@ -158,7 +159,7 @@ TPM_RESULT TPM_GetAuditDigestSigned(TPM_ + } + memcpy(&buf[0], "\x05\x00ADIG", 6); + memcpy(&buf[6], antiReplay->nonce, 20); +- *(UINT32*)&buf[26] = cpu_to_be32(buf_size - 30); ++ *(UINT32*)&buf[26] = CPU_TO_BE32(buf_size - 30); + memcpy(&buf[30], auditDigest->digest, 20); + ptr = &buf[50]; + len = buf_size - 50; +@@ -198,4 +199,3 @@ TPM_RESULT TPM_SetOrdinalAuditStatus(TPM + } + return TPM_SUCCESS; + } +- +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_authorization.c vtpm/tpm/tpm_authorization.c +--- orig/tpm_emulator-0.2/tpm/tpm_authorization.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_authorization.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -268,7 +269,7 @@ TPM_RESULT tpm_verify_auth(TPM_AUTH *aut + { + hmac_ctx_t ctx; + TPM_SESSION_DATA *session; +- UINT32 auth_handle = cpu_to_be32(auth->authHandle); ++ UINT32 auth_handle = CPU_TO_BE32(auth->authHandle); + + info("tpm_verify_auth(%08x)", auth->authHandle); + /* get dedicated authorization session */ +@@ -316,5 +317,3 @@ void tpm_decrypt_auth_secret(TPM_ENCAUTH + for (i = 0; i < sizeof(TPM_SECRET); i++) + plainAuth[i] ^= encAuth[i]; + } +- +- +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_capability.c vtpm/tpm/tpm_capability.c +--- orig/tpm_emulator-0.2/tpm/tpm_capability.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_capability.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -398,7 +399,7 @@ TPM_RESULT TPM_GetCapability(TPM_CAPABIL + + case TPM_CAP_KEY_HANDLE: + debug("[TPM_CAP_KEY_HANDLE]"); +- subCapSize = cpu_to_be32(TPM_RT_KEY); ++ subCapSize = CPU_TO_BE32(TPM_RT_KEY); + return cap_handle(4, (BYTE*)&subCapSize, respSize, resp); + + case TPM_CAP_CHECK_LOADED: +@@ -472,4 +473,3 @@ TPM_RESULT TPM_GetCapability(TPM_CAPABIL + return TPM_BAD_MODE; + } + } +- +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c vtpm/tpm/tpm_cmd_handler.c +--- orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_cmd_handler.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -26,7 +27,7 @@ static void tpm_compute_in_param_digest( + { + sha1_ctx_t sha1; + UINT32 offset; +- UINT32 ord = cpu_to_be32(req->ordinal); ++ UINT32 ord = CPU_TO_BE32(req->ordinal); + + /* skip all key-handles at the beginning */ + switch (req->ordinal) { +@@ -82,8 +83,8 @@ static void tpm_compute_in_param_digest( + static void tpm_compute_out_param_digest(TPM_COMMAND_CODE ordinal, TPM_RESPONSE *rsp) + { + sha1_ctx_t sha1; +- UINT32 res = cpu_to_be32(rsp->result); +- UINT32 ord = cpu_to_be32(ordinal); ++ UINT32 res = CPU_TO_BE32(rsp->result); ++ UINT32 ord = CPU_TO_BE32(ordinal); + + /* compute SHA1 hash */ + sha1_init(&sha1); +@@ -3081,7 +3082,7 @@ static void tpm_setup_rsp_auth(TPM_COMMA + hmac_update(&hmac, rsp->auth2->digest, sizeof(rsp->auth2->digest)); + #if 0 + if (tpm_get_auth(rsp->auth2->authHandle)->type == TPM_ST_OIAP) { +- UINT32 handle = cpu_to_be32(rsp->auth2->authHandle); ++ UINT32 handle = CPU_TO_BE32(rsp->auth2->authHandle); + hmac_update(&hmac, (BYTE*)&handle, 4); + } + #endif +@@ -3096,7 +3097,7 @@ static void tpm_setup_rsp_auth(TPM_COMMA + hmac_update(&hmac, rsp->auth1->digest, sizeof(rsp->auth1->digest)); + #if 0 + if (tpm_get_auth(rsp->auth1->authHandle)->type == TPM_ST_OIAP) { +- UINT32 handle = cpu_to_be32(rsp->auth1->authHandle); ++ UINT32 handle = CPU_TO_BE32(rsp->auth1->authHandle); + hmac_update(&hmac, (BYTE*)&handle, 4); + } + #endif +@@ -3179,7 +3180,9 @@ extern const char *tpm_error_to_string(T + static void tpm_execute_command(TPM_REQUEST *req, TPM_RESPONSE *rsp) + { + TPM_RESULT res; +- ++ ++ req->tag = (BYTE) req->tag; // VIN HACK!!! ++ + /* setup authorisation as well as response tag and size */ + memset(rsp, 0, sizeof(*rsp)); + switch (req->tag) { +@@ -3878,4 +3881,3 @@ int tpm_handle_command(const uint8_t *in + tpm_free(rsp.param); + return 0; + } +- +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_crypto.c vtpm/tpm/tpm_crypto.c +--- orig/tpm_emulator-0.2/tpm/tpm_crypto.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_crypto.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -106,7 +107,7 @@ TPM_RESULT tpm_sign(TPM_KEY_DATA *key, T + /* setup TPM_SIGN_INFO structure */ + memcpy(&buf[0], "\x05\x00SIGN", 6); + memcpy(&buf[6], auth->nonceOdd.nonce, 20); +- *(UINT32*)&buf[26] = cpu_to_be32(areaToSignSize); ++ *(UINT32*)&buf[26] = CPU_TO_BE32(areaToSignSize); + memcpy(&buf[30], areaToSign, areaToSignSize); + if (rsa_sign(&key->key, RSA_SSA_PKCS1_SHA1, + buf, areaToSignSize + 30, *sig)) { +@@ -379,4 +380,3 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL + } + return TPM_SUCCESS; + } +- +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c vtpm/tpm/tpm_data.c +--- orig/tpm_emulator-0.2/tpm/tpm_data.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_data.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -15,9 +16,15 @@ + * $Id: tpm_data.c 9 2005-04-26 18:15:31Z mast $ + */ + ++#include <sys/types.h> ++#include <sys/stat.h> ++#include <fcntl.h> ++#include <unistd.h> ++ + #include "tpm_emulator.h" + #include "tpm_structures.h" + #include "tpm_marshalling.h" ++#include "vtpm_manager.h" + + TPM_DATA tpmData; + +@@ -28,6 +35,7 @@ BOOL tpm_get_physical_presence(void) + + void tpm_init_data(void) + { ++#ifndef TPM_GENERATE_EK + /* endorsement key */ + uint8_t ek_n[] = "\xa8\xdb\xa9\x42\xa8\xf3\xb8\x06\x85\x90\x76\x93\xad\xf7" + "\x74\xec\x3f\xd3\x3d\x9d\xe8\x2e\xff\x15\xed\x0e\xce\x5f\x93" +@@ -66,6 +74,8 @@ void tpm_init_data(void) + "\xd1\xc0\x8b\x5b\xa2\x2e\xa7\x15\xca\x50\x75\x10\x48\x9c\x2b" + "\x18\xb9\x67\x8f\x5d\x64\xc3\x28\x9f\x2f\x16\x2f\x08\xda\x47" + "\xec\x86\x43\x0c\x80\x99\x07\x34\x0f"; ++#endif ++ + int i; + /* reset all data to NULL, FALSE or 0 */ + memset(&tpmData, 0, sizeof(tpmData)); +@@ -85,6 +95,10 @@ void tpm_init_data(void) + tpmData.permanent.data.version.revMinor = VERSION_MINOR; + /* setup PCR attributes */ + for (i = 0; i < TPM_NUM_PCR; i++) { ++ int j; ++ for (j=0; j < TPM_NUM_LOCALITY; j++) { ++ tpmData.permanent.data.pcrAttrib[i].pcrExtendLocal[j] = TRUE; ++ } + tpmData.permanent.data.pcrAttrib[i].pcrReset = TRUE; + } + /* set tick type */ +@@ -115,49 +129,235 @@ void tpm_release_data(void) + + #ifdef TPM_STORE_TO_FILE + +-#include <linux/fs.h> +-#include <linux/unistd.h> +-#include <asm/uaccess.h> ++#include <sys/types.h> ++#include <sys/stat.h> ++#include <fcntl.h> ++ ++ static int vtpm_tx_fh=-1, vtpm_rx_fh=-1; + +-#define TPM_STORAGE_FILE "/var/tpm/tpm_emulator-1.2." STR(VERSION_MAJOR) "." STR(VERSION_MINOR) ++#ifdef VTPM_MUTLI_VM ++ #define DEV_FE "/dev/tpm" ++#else ++ #define VTPM_RX_FIFO_D "/var/vtpm/fifos/vtpm-to-%d.fifo" ++ #define VTPM_TX_FIFO "/var/vtpm/fifos/vtpm-from-all.fifo" ++ ++ extern int dmi_id; ++ static char *vtpm_rx_name=NULL; ++#endif + + static int write_to_file(uint8_t *data, size_t data_length) + { +- int res; +- struct file *fp; +- mm_segment_t old_fs = get_fs(); +- fp = filp_open(TPM_STORAGE_FILE, O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR); +- if (IS_ERR(fp)) return -1; +- set_fs(get_ds()); +- res = fp->f_op->write(fp, data, data_length, &fp->f_pos); +- set_fs(old_fs); +- filp_close(fp, NULL); +- return (res == data_length) ? 0 : -1; ++ int res, out_data_size, in_header_size; ++ BYTE *ptr, *out_data, *in_header; ++ UINT32 result, len, in_rsp_size; ++ UINT16 tag = VTPM_TAG_REQ; ++ ++ printf("Saving NVM\n"); ++ if (vtpm_tx_fh < 0) { ++#ifdef VTPM_MUTLI_VM ++ vtpm_tx_fh = open(DEV_FE, O_RDWR); ++#else ++ vtpm_tx_fh = open(VTPM_TX_FIFO, O_WRONLY); ++#endif ++ } ++ ++ if (vtpm_tx_fh < 0) { ++ return -1; ++ } ++ ++ // Send request to VTPM Manager to encrypt data ++#ifdef VTPM_MUTLI_VM ++ out_data_size = len = VTPM_COMMAND_HEADER_SIZE_CLT + data_length; ++#else ++ out_data_size = len = VTPM_COMMAND_HEADER_SIZE_SRV + data_length; ++#endif ++ ++ out_data = ptr = (BYTE *) malloc(len); ++ ++ if (ptr == NULL ++#ifndef VTPM_MUTLI_VM ++ || tpm_marshal_UINT32(&ptr, &len, dmi_id) ++#endif ++ || tpm_marshal_UINT16(&ptr, &len, tag) ++#ifdef VTPM_MUTLI_VM ++ || tpm_marshal_UINT32(&ptr, &len, out_data_size) ++#else ++ || tpm_marshal_UINT32(&ptr, &len, out_data_size - sizeof(uint32_t)) ++#endif ++ || tpm_marshal_UINT32(&ptr, &len, VTPM_ORD_SAVENVM) ++ || tpm_marshal_BYTE_ARRAY(&ptr, &len, data, data_length)) { ++ free(out_data); ++ return -1; ++ } ++ ++ printf("\tSending SaveNVM Command.\n"); ++ res = write(vtpm_tx_fh, out_data, out_data_size); ++ free(out_data); ++ if (res != out_data_size) return -1; ++ ++ if (vtpm_rx_fh < 0) { ++#ifdef VTPM_MUTLI_VM ++ vtpm_rx_fh = vtpm_tx_fh ++#else ++ if (vtpm_rx_name == NULL) { ++ vtpm_rx_name = malloc(10 + strlen(VTPM_RX_FIFO_D)); ++ sprintf(vtpm_rx_name, VTPM_RX_FIFO_D, (uint32_t) dmi_id); ++ } ++ vtpm_rx_fh = open(vtpm_rx_name, O_RDONLY); ++#endif ++ } ++ ++ if (vtpm_rx_fh < 0) { ++ return -1; ++ } ++ ++ // Read Header of response so we can get the size & status ++#ifdef VTPM_MUTLI_VM ++ in_header_size = len = VTPM_COMMAND_HEADER_SIZE_CLT; ++#else ++ in_header_size = len = VTPM_COMMAND_HEADER_SIZE_SRV; ++#endif ++ in_header = ptr = malloc(in_header_size); ++ ++ printf("\tReading SaveNVM header.\n"); ++ res = read(vtpm_rx_fh, in_header, in_header_size); ++ ++ if ( (res != in_header_size) ++#ifndef VTPM_MUTLI_VM ++ || tpm_unmarshal_UINT32(&ptr, &len, (UINT32*)&dmi_id) ++#endif ++ || tpm_unmarshal_UINT16(&ptr, &len, &tag) ++ || tpm_unmarshal_UINT32(&ptr, &len, &in_rsp_size) ++ || tpm_unmarshal_UINT32(&ptr, &len, &result) ) { ++ free(in_header); ++ return -1; ++ } ++ free(in_header); ++ ++ if (result != VTPM_SUCCESS) { ++ return -1; ++ } ++ ++#ifdef VTPM_MUTLI_VM ++ close(vtpm_tx_fh); close(vtpm_rx_fh); ++#endif ++ ++ printf("\tFinishing up SaveNVM\n"); ++ return (0); + } + + static int read_from_file(uint8_t **data, size_t *data_length) + { +- int res; +- struct file *fp; +- mm_segment_t old_fs = get_fs(); +- fp = filp_open(TPM_STORAGE_FILE, O_RDONLY, 0); +- if (IS_ERR(fp)) return -1; +- *data_length = (size_t)fp->f_dentry->d_inode->i_size; +- /* *data_length = i_size_read(fp->f_dentry->d_inode); */ +- *data = tpm_malloc(*data_length); +- if (*data == NULL) { +- filp_close(fp, NULL); ++ int res, out_data_size, in_header_size; ++ uint8_t *ptr, *out_data, *in_header; ++ UINT16 tag = VTPM_TAG_REQ; ++ UINT32 len, in_rsp_size, result; ++#ifdef VTPM_MUTLI_VM ++ int vtpm_rx_fh, vtpm_tx_fh; ++#endif ++ ++ printf("Loading NVM.\n"); ++ if (vtpm_tx_fh < 0) { ++#ifdef VTPM_MUTLI_VM ++ vtpm_tx_fh = open(DEV_FE, O_RDWR); ++#else ++ vtpm_tx_fh = open(VTPM_TX_FIFO, O_WRONLY); ++#endif ++ } ++ ++ if (vtpm_tx_fh < 0) { ++ return -1; ++ } ++ ++ // Send request to VTPM Manager to encrypt data ++#ifdef VTPM_MUTLI_VM ++ out_data_size = len = VTPM_COMMAND_HEADER_SIZE_CLT; ++#else ++ out_data_size = len = VTPM_COMMAND_HEADER_SIZE_SRV; ++#endif ++ out_data = ptr = (BYTE *) malloc(len); ++ ++ if (ptr == NULL ++#ifndef VTPM_MUTLI_VM ++ || tpm_marshal_UINT32(&ptr, &len, dmi_id) ++#endif ++ || tpm_marshal_UINT16(&ptr, &len, tag) ++#ifdef VTPM_MUTLI_VM ++ || tpm_marshal_UINT32(&ptr, &len, out_data_size) ++#else ++ || tpm_marshal_UINT32(&ptr, &len, out_data_size - sizeof(uint32_t)) ++#endif ++ || tpm_marshal_UINT32(&ptr, &len, VTPM_ORD_LOADNVM)) { ++ free(out_data); + return -1; + } +- set_fs(get_ds()); +- res = fp->f_op->read(fp, *data, *data_length, &fp->f_pos); +- set_fs(old_fs); +- filp_close(fp, NULL); ++ ++ printf("\tSending LoadNVM command\n"); ++ res = write(vtpm_tx_fh, out_data, out_data_size); ++ free(out_data); ++ if (res != out_data_size) return -1; ++ ++ if (vtpm_rx_fh < 0) { ++#ifdef VTPM_MUTLI_VM ++ vtpm_rx_fh = vtpm_tx_fh; ++#else ++ if (vtpm_rx_name == NULL) { ++ vtpm_rx_name = malloc(10 + strlen(VTPM_RX_FIFO_D)); ++ sprintf(vtpm_rx_name, VTPM_RX_FIFO_D, (uint32_t) dmi_id); ++ } ++ vtpm_rx_fh = open(vtpm_rx_name, O_RDONLY); ++#endif ++ } ++ ++ if (vtpm_rx_fh < 0) { ++ return -1; ++ } ++ ++ // Read Header of response so we can get the size & status ++#ifdef VTPM_MUTLI_VM ++ in_header_size = len = VTPM_COMMAND_HEADER_SIZE_CLT; ++#else ++ in_header_size = len = VTPM_COMMAND_HEADER_SIZE_SRV; ++#endif ++ in_header = ptr = malloc(in_header_size); ++ ++ printf("\tReading LoadNVM header\n"); ++ res = read(vtpm_rx_fh, in_header, in_header_size); ++ ++ if ( (res != in_header_size) ++#ifndef VTPM_MUTLI_VM ++ || tpm_unmarshal_UINT32(&ptr, &len, (UINT32*)&dmi_id) ++#endif ++ || tpm_unmarshal_UINT16(&ptr, &len, &tag) ++ || tpm_unmarshal_UINT32(&ptr, &len, &in_rsp_size) ++ || tpm_unmarshal_UINT32(&ptr, &len, &result) ) { ++ free(in_header); ++ return -1; ++ } ++ free(in_header); ++ ++ if (result != VTPM_SUCCESS) { ++ return -1; ++ } ++ ++ // Read Encrypted data from VTPM Manager ++ *data_length = in_rsp_size - VTPM_COMMAND_HEADER_SIZE_CLT; ++ *data = (uint8_t *) malloc(*data_length); ++ ++ printf("\tReading clear data from LoadNVM.\n"); ++ res = read(vtpm_rx_fh, *data, *data_length); ++#ifdef VTPM_MUTLI_VM ++ close(vtpm_rx_fh);close(vtpm_tx_fh); ++#endif ++ ++ printf("\tReturing from loading NVM\n"); + if (res != *data_length) { +- tpm_free(*data); +- return -1; ++ free(*data); ++ return -1; ++ } else { ++ return 0; + } +- return 0; ++ + } + + #else +@@ -231,7 +431,6 @@ int tpm_restore_permanent_data(void) + + int tpm_erase_permanent_data(void) + { +- int res = write_to_file("", 0); ++ int res = write_to_file((uint8_t*)"", 0); + return res; + } +- +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_deprecated.c vtpm/tpm/tpm_deprecated.c +--- orig/tpm_emulator-0.2/tpm/tpm_deprecated.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_deprecated.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -50,7 +51,7 @@ TPM_RESULT TPM_SaveKeyContext(TPM_KEY_HA + BYTE *ptr; + UINT32 len; + info("TPM_SaveKeyContext()"); +- res = TPM_SaveContext(keyHandle, TPM_RT_KEY, "SaveKeyContext..", ++ res = TPM_SaveContext(keyHandle, TPM_RT_KEY, (BYTE*)"SaveKeyContext..", + keyContextSize, &contextBlob); + if (res != TPM_SUCCESS) return res; + len = *keyContextSize; +@@ -82,7 +83,7 @@ TPM_RESULT TPM_SaveAuthContext(TPM_AUTHH + BYTE *ptr; + UINT32 len; + info("TPM_SaveAuthContext()"); +- res = TPM_SaveContext(authHandle, TPM_RT_KEY, "SaveAuthContext.", ++ res = TPM_SaveContext(authHandle, TPM_RT_KEY, (BYTE*)"SaveAuthContext.", + authContextSize, &contextBlob); + if (res != TPM_SUCCESS) return res; + len = *authContextSize; +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_emulator.h vtpm/tpm/tpm_emulator.h +--- orig/tpm_emulator-0.2/tpm/tpm_emulator.h 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_emulator.h 2005-08-17 10:55:52.000000000 -0700 +@@ -1,5 +1,6 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -22,7 +23,8 @@ + /* TPM configuration */ + #define TPM_STORE_TO_FILE 1 + #undef TPM_STRONG_PERSISTENCE +-#undef TPM_GENERATE_EK ++//#undef TPM_GENERATE_EK ++#define TPM_GENERATE_EK + + /** + * tpm_emulator_init - initialises and starts the TPM emulator +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_integrity.c vtpm/tpm/tpm_integrity.c +--- orig/tpm_emulator-0.2/tpm/tpm_integrity.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_integrity.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -194,4 +195,3 @@ TPM_RESULT tpm_verify_pcr(TPM_KEY_DATA * + } + return TPM_SUCCESS; + } +- +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_structures.h vtpm/tpm/tpm_structures.h +--- orig/tpm_emulator-0.2/tpm/tpm_structures.h 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_structures.h 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -18,7 +19,7 @@ + #ifndef _TPM_STRUCTURES_H_ + #define _TPM_STRUCTURES_H_ + +-#include <linux/types.h> ++//#include <linux/types.h> + #include "crypto/rsa.h" + + /* +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_testing.c vtpm/tpm/tpm_testing.c +--- orig/tpm_emulator-0.2/tpm/tpm_testing.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_testing.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -95,24 +96,24 @@ static int tpm_test_sha1(void) + struct { + uint8_t *data; uint32_t repetitions; uint8_t *digest; + } test_cases[] = {{ +- "abc", 1, +- "\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E\x25\x71\x78\x50\xC2\x6C\x9C\xD0\xD8\x9D" ++ (uint8_t*)"abc", 1, ++ (uint8_t*)"\xA9\x99\x3E\x36\x47\x06\x81\x6A\xBA\x3E\x25\x71\x78\x50\xC2\x6C\x9C\xD0\xD8\x9D" + }, { +- "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 1, +- "\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE\x4A\xA1\xF9\x51\x29\xE5\xE5\x46\x70\xF1" ++ (uint8_t*)"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 1, ++ (uint8_t*)"\x84\x98\x3E\x44\x1C\x3B\xD2\x6E\xBA\xAE\x4A\xA1\xF9\x51\x29\xE5\xE5\x46\x70\xF1" + }, { +- "a", 1000000, +- "\x34\xAA\x97\x3C\xD4\xC4\xDA\xA4\xF6\x1E\xEB\x2B\xDB\xAD\x27\x31\x65\x34\x01\x6F" ++ (uint8_t*)"a", 1000000, ++ (uint8_t*)"\x34\xAA\x97\x3C\xD4\xC4\xDA\xA4\xF6\x1E\xEB\x2B\xDB\xAD\x27\x31\x65\x34\x01\x6F" + }, { +- "0123456701234567012345670123456701234567012345670123456701234567", 10, +- "\xDE\xA3\x56\xA2\xCD\xDD\x90\xC7\xA7\xEC\xED\xC5\xEB\xB5\x63\x93\x4F\x46\x04\x52" ++ (uint8_t*)"0123456701234567012345670123456701234567012345670123456701234567", 10, ++ (uint8_t*)"\xDE\xA3\x56\xA2\xCD\xDD\x90\xC7\xA7\xEC\xED\xC5\xEB\xB5\x63\x93\x4F\x46\x04\x52" + }}; + + debug("tpm_test_sha1()"); + for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + sha1_init(&ctx); + for (j = 0; j < test_cases[i].repetitions; j++) +- sha1_update(&ctx, test_cases[i].data, strlen(test_cases[i].data)); ++ sha1_update(&ctx, test_cases[i].data, strlen((char*)test_cases[i].data)); + sha1_final(&ctx, digest); + if (memcmp(digest, test_cases[i].digest, SHA1_DIGEST_LENGTH) != 0) return -1; + } +@@ -128,41 +129,41 @@ static int tpm_test_hmac(void) + struct { + uint8_t *key, key_len, *data, data_len, *digest; + } test_cases[] = {{ +- "\x0b", 20, "Hi There", 8, +- "\xb6\x17\x31\x86\x55\x05\x72\x64\xe2\x8b\xc0\xb6\xfb\x37\x8c\x8e\xf1\x46\xbe\x00" ++ (uint8_t*)"\x0b", 20, (uint8_t*)"Hi There", 8, ++ (uint8_t*)"\xb6\x17\x31\x86\x55\x05\x72\x64\xe2\x8b\xc0\xb6\xfb\x37\x8c\x8e\xf1\x46\xbe\x00" + }, { +- "Jefe", 4, "what do ya want for nothing?", 28, +- "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79" ++ (uint8_t*)"Jefe", 4, (uint8_t*)"what do ya want for nothing?", 28, ++ (uint8_t*)"\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79" + }, { +- "\xaa", 20, "\xdd", 50, +- "\x12\x5d\x73\x42\xb9\xac\x11\xcd\x91\xa3\x9a\xf4\x8a\xa1\x7b\x4f\x63\xf1\x75\xd3" ++ (uint8_t*)"\xaa", 20, (uint8_t*)"\xdd", 50, ++ (uint8_t*)"\x12\x5d\x73\x42\xb9\xac\x11\xcd\x91\xa3\x9a\xf4\x8a\xa1\x7b\x4f\x63\xf1\x75\xd3" + }, { +- "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14" +- "\x15\x16\x17\x18\x19", 25, "\xcd", 50, +- "\x4c\x90\x07\xf4\x02\x62\x50\xc6\xbc\x84\x14\xf9\xbf\x50\xc8\x6c\x2d\x72\x35\xda" ++ (uint8_t*)"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14" ++ "\x15\x16\x17\x18\x19", 25, (uint8_t*)"\xcd", 50, ++ (uint8_t*)"\x4c\x90\x07\xf4\x02\x62\x50\xc6\xbc\x84\x14\xf9\xbf\x50\xc8\x6c\x2d\x72\x35\xda" + }, { +- "\x0c", 20, "Test With Truncation", 20, +- "\x4c\x1a\x03\x42\x4b\x55\xe0\x7f\xe7\xf2\x7b\xe1\xd5\x8b\xb9\x32\x4a\x9a\x5a\x04" ++ (uint8_t*)"\x0c", 20, (uint8_t*)"Test With Truncation", 20, ++ (uint8_t*)"\x4c\x1a\x03\x42\x4b\x55\xe0\x7f\xe7\xf2\x7b\xe1\xd5\x8b\xb9\x32\x4a\x9a\x5a\x04" + }, { +- "\xaa", 80, "Test Using Larger Than Block-Size Key - Hash Key First", 54, +- "\xaa\x4a\xe5\xe1\x52\x72\xd0\x0e\x95\x70\x56\x37\xce\x8a\x3b\x55\xed\x40\x21\x12" ++ (uint8_t*)"\xaa", 80, (uint8_t*)"Test Using Larger Than Block-Size Key - Hash Key First", 54, ++ (uint8_t*)"\xaa\x4a\xe5\xe1\x52\x72\xd0\x0e\x95\x70\x56\x37\xce\x8a\x3b\x55\xed\x40\x21\x12" + }, { +- "\xaa", 80, +- "Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data", 73, +- "\xe8\xe9\x9d\x0f\x45\x23\x7d\x78\x6d\x6b\xba\xa7\x96\x5c\x78\x08\xbb\xff\x1a\x91" ++ (uint8_t*)"\xaa", 80, ++ (uint8_t*)"Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data", 73, ++ (uint8_t*)"\xe8\xe9\x9d\x0f\x45\x23\x7d\x78\x6d\x6b\xba\xa7\x96\x5c\x78\x08\xbb\xff\x1a\x91" + }}; + + debug("tpm_test_hmac()"); + for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { +- if (strlen(test_cases[i].key) < test_cases[i].key_len) { ++ if (strlen((char*)test_cases[i].key) < test_cases[i].key_len) { + uint8_t key[test_cases[i].key_len]; + memset(key, test_cases[i].key[0], test_cases[i].key_len); + hmac_init(&ctx, key, test_cases[i].key_len); + } else { + hmac_init(&ctx, test_cases[i].key, test_cases[i].key_len); + } +- for (j = 0; j < test_cases[i].data_len; j += strlen(test_cases[i].data)) { +- hmac_update(&ctx, test_cases[i].data, strlen(test_cases[i].data)); ++ for (j = 0; j < test_cases[i].data_len; j += strlen((char*)test_cases[i].data)) { ++ hmac_update(&ctx, test_cases[i].data, strlen((char*)test_cases[i].data)); + } + hmac_final(&ctx, digest); + if (memcmp(digest, test_cases[i].digest, SHA1_DIGEST_LENGTH) != 0) return -1; +@@ -173,9 +174,9 @@ static int tpm_test_hmac(void) + static int tpm_test_rsa_EK(void) + { + int res = 0; +- char *data = "RSA PKCS #1 v1.5 Test-String"; ++ uint8_t *data = (uint8_t*)"RSA PKCS #1 v1.5 Test-String"; + uint8_t buf[256]; +- size_t buf_len, data_len = strlen(data); ++ size_t buf_len, data_len = strlen((char*)data); + rsa_private_key_t priv_key; + rsa_public_key_t pub_key; + +diff -uprN orig/tpm_emulator-0.2/tpm/tpm_ticks.c vtpm/tpm/tpm_ticks.c +--- orig/tpm_emulator-0.2/tpm/tpm_ticks.c 2005-08-17 10:58:36.000000000 -0700 ++++ vtpm/tpm/tpm_ticks.c 2005-08-17 10:55:52.000000000 -0700 +@@ -1,6 +1,7 @@ + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux + * Copyright (C) 2004 Mario Strasser <mast@xxxxxxx>, + * Swiss Federal Institute of Technology (ETH) Zurich ++ * Copyright (C) 2005 INTEL Corp + * + * This module is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published +@@ -37,9 +38,7 @@ TPM_RESULT TPM_SetTickType(TPM_TICKTYPE + TPM_RESULT TPM_GetTicks(TPM_CURRENT_TICKS *currentTime) + { + info("TPM_GetTicks()"); +- memcpy(currentTime, &tpmData.stany.data.currentTicks, +- sizeof(TPM_CURRENT_TICKS)); +- return TPM_SUCCESS; ++ return TPM_DISABLED_CMD; + } + + TPM_RESULT TPM_TickStampBlob(TPM_KEY_HANDLE keyHandle, TPM_NONCE *antiReplay, +@@ -47,61 +46,12 @@ TPM_RESULT TPM_TickStampBlob(TPM_KEY_HAN + TPM_CURRENT_TICKS *currentTicks, + UINT32 *sigSize, BYTE **sig) + { +- TPM_RESULT res; +- TPM_KEY_DATA *key; +- BYTE *info, *p; +- UINT32 info_length, length; + info("TPM_TickStampBlob()"); +- /* get key */ +- key = tpm_get_key(keyHandle); +- if (key == NULL) return TPM_INVALID_KEYHANDLE; +- /* verify authorization */ +- res = tpm_verify_auth(auth1, key->usageAuth, keyHandle); +- if (res != TPM_SUCCESS) return res; +- if (key->keyUsage != TPM_KEY_SIGNING && key->keyUsage != TPM_KEY_LEGACY +- && key->keyUsage != TPM_KEY_IDENTITY) return TPM_INVALID_KEYUSAGE; +- /* get current ticks */ +- TPM_GetTicks(currentTicks); +- /* sign data using signature scheme PKCS1_SHA1 and TPM_SIGN_INFO container */ +- *sigSize = key->key.size >> 3; +- *sig = tpm_malloc(*sigSize); +- if (*sig == NULL) return TPM_FAIL; +- /* setup TPM_SIGN_INFO structure */ +- info_length = 30 + sizeof(TPM_DIGEST) + sizeof_TPM_CURRENT_TICKS(currentTicks); +- info = tpm_malloc(info_length); +- if (info == NULL) { +- tpm_free(*sig); +- return TPM_FAIL; +- } +- memcpy(&info[0], "\x05\x00TSTP", 6); +- memcpy(&info[6], antiReplay->nonce, 20); +- *(UINT32*)&info[26] = cpu_to_be32(20 +- + sizeof_TPM_CURRENT_TICKS(currentTicks)); +- memcpy(&info[30], digestToStamp->digest, sizeof(TPM_DIGEST)); +- p = &info[30 + sizeof(TPM_DIGEST)]; +- length = sizeof_TPM_CURRENT_TICKS(currentTicks); +- if (tpm_marshal_TPM_CURRENT_TICKS(&p, &length, currentTicks) +- || rsa_sign(&key->key, RSA_SSA_PKCS1_SHA1, info, info_length, *sig)) { +- tpm_free(*sig); +- tpm_free(info); +- return TPM_FAIL; +- } +- return TPM_SUCCESS; ++ return TPM_DISABLED_CMD; + } + + void tpm_update_ticks(void) + { +- if (tpmData.stany.data.currentTicks.tag == 0) { +- tpmData.stany.data.currentTicks.tag = TPM_TAG_CURRENT_TICKS; +- tpmData.stany.data.currentTicks.currentTicks += tpm_get_ticks(); +- tpmData.stany.data.currentTicks.tickType = tpmData.permanent.data.tickType; +- tpm_get_random_bytes(tpmData.stany.data.currentTicks.tickNonce.nonce, +- sizeof(TPM_NONCE)); +- tpmData.stany.data.currentTicks.tickRate = 1; +- tpmData.stany.data.currentTicks.tickSecurity = TICK_SEC_NO_CHECK; +- } else { +- tpmData.stany.data.currentTicks.currentTicks += tpm_get_ticks(); +- } + } + + +diff -uprN orig/tpm_emulator-0.2/tpm/vtpm_manager.h vtpm/tpm/vtpm_manager.h +--- orig/tpm_emulator-0.2/tpm/vtpm_manager.h 1969-12-31 16:00:00.000000000 -0800 ++++ vtpm/tpm/vtpm_manager.h 2005-08-17 10:55:52.000000000 -0700 +@@ -0,0 +1,126 @@ ++// =================================================================== ++// ++// Copyright (c) 2005, Intel Corp. ++// All rights reserved. ++// ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions ++// are met: ++// ++// * Redistributions of source code must retain the above copyright ++// notice, this list of conditions and the following disclaimer. ++// * Redistributions in binary form must reproduce the above ++// copyright notice, this list of conditions and the following ++// disclaimer in the documentation and/or other materials provided ++// with the distribution. ++// * Neither the name of Intel Corporation nor the names of its ++// contributors may be used to endorse or promote products derived ++// from this software without specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++// OF THE POSSIBILITY OF SUCH DAMAGE. ++// =================================================================== ++// ++// vtpm_manager.h ++// ++// Public Interface header for VTPM Manager ++// ++// ================================================================== ++ ++#ifndef __VTPM_MANAGER_H__ ++#define __VTPM_MANAGER_H__ ++ ++#define VTPM_TAG_REQ 0x01c1 ++#define VTPM_TAG_RSP 0x01c4 ++#define COMMAND_BUFFER_SIZE 4096 ++ ++// Header sizes. Note Header MAY include the DMI ++#define VTPM_COMMAND_HEADER_SIZE_SRV ( sizeof(UINT32) + sizeof(TPM_TAG) + sizeof(UINT32) + sizeof(TPM_COMMAND_CODE)) ++#define VTPM_COMMAND_HEADER_SIZE_CLT ( sizeof(TPM_TAG) + sizeof(UINT32) + sizeof(TPM_COMMAND_CODE)) ++ ++//************************ Command Codes **************************** ++#define VTPM_ORD_OPEN 1 // ULM Creates New DMI ++#define VTPM_ORD_CLOSE 2 // ULM Closes a DMI ++#define VTPM_ORD_DELETE 3 // ULM Permemently Deletes DMI ++#define VTPM_ORD_SAVENVM 4 // DMI requests Secrets Unseal ++#define VTPM_ORD_LOADNVM 5 // DMI requests Secrets Saved ++#define VTPM_ORD_TPMCOMMAND 6 // DMI issues HW TPM Command ++ ++//************************ Return Codes **************************** ++#define VTPM_SUCCESS 0 ++#define VTPM_FAIL 1 ++#define VTPM_UNSUPPORTED 2 ++#define VTPM_FORBIDDEN 3 ++#define VTPM_RESTORE_CONTEXT_FAILED 4 ++#define VTPM_INVALID_REQUEST 5 ++ ++/******************* Command Parameter API ************************* ++ ++VTPM Command Format ++ dmi: 4 bytes // Source of message. ++ // WARNING: This is prepended by the channel. ++ // Thus it is received by VTPM Manager, ++ // but not sent by DMI ++ tpm tag: 2 bytes ++ command size: 4 bytes // Size of command including header but not DMI ++ ord: 4 bytes // Command ordinal above ++ parameters: size - 10 bytes // Command Parameter ++ ++VTPM Response Format ++ tpm tag: 2 bytes ++ response_size: 4 bytes ++ status: 4 bytes ++ parameters: size - 10 bytes ++ ++ ++VTPM_Open: ++ Input Parameters: ++ Domain_type: 1 byte ++ domain_id: 4 bytes ++ instance_id: 4 bytes ++ Output Parameters: ++ None ++ ++VTPM_Close ++ Input Parameters: ++ instance_id: 4 bytes ++ Output Parameters: ++ None ++ ++VTPM_Delete ++ Input Parameters: ++ instance_id: 4 bytes ++ Output Parameters: ++ None ++ ++VTPM_SaveNVM ++ Input Parameters: ++ data: n bytes (Header indicates size of data) ++ Output Parameters: ++ None ++ ++VTPM_LoadNVM ++ Input Parameters: ++ None ++ Output Parameters: ++ data: n bytes (Header indicates size of data) ++ ++VTPM_TPMCommand ++ Input Parameters: ++ TPM Command Byte Stream: n bytes ++ Output Parameters: ++ TPM Reponse Byte Stream: n bytes ++ ++*********************************************************************/ ++ ++#endif //_VTPM_MANAGER_H_ +diff -uprN orig/tpm_emulator-0.2/tpmd.c vtpm/tpmd.c +--- orig/tpm_emulator-0.2/tpmd.c 1969-12-31 16:00:00.000000000 -0800 ++++ vtpm/tpmd.c 2005-08-17 10:55:52.000000000 -0700 +@@ -0,0 +1,207 @@ ++/* Software-Based Trusted Platform Module (TPM) Emulator for Linux ++ * Copyright (C) 2005 INTEL Corp ++ * ++ * This module is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published ++ * by the Free Software Foundation; either version 2 of the License, ++ * or (at your option) any later version. ++ * ++ * This module is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ */ ++ ++#include <stdio.h> ++#include <stdlib.h> ++#include <unistd.h> ++#include <string.h> ++#include <sys/types.h> ++#include <sys/stat.h> ++#include <fcntl.h> ++#include <sys/time.h> ++ ++#include "tpm_emulator.h" ++#include "vtpm_manager.h" ++ ++#ifdef VTPM_MULTI_VM ++ #define DEV_BE "/dev/vtpm" ++#else ++ #define GUEST_RX_FIFO_D "/var/vtpm/fifos/guest-to-%d.fifo" ++ #define GUEST_TX_FIFO "/var/vtpm/fifos/guest-from-all.fifo" ++ ++ int dmi_id; ++#endif ++ ++#define BUFFER_SIZE 2048 ++ ++static uint8_t ctrl_msg[] = { 0, 0, 0, 0, // destination ++ 1, 193, // VTPM_TAG ++ 0, 0, 0, 10, // Size ++ 0, 0, 0, 0}; // TPM_SUCCESS ++ ++ ++static int devurandom=0; ++ ++ ++void get_random_bytes(void *buf, int nbytes) { ++ ++ if (devurandom == 0) { ++ devurandom = open("/dev/urandom", O_RDONLY); ++ } ++ ++ if (read(devurandom, buf, nbytes) != nbytes) { ++ printf("Can't get random number.\n"); ++ exit(-1); ++ } ++} ++ ++uint64_t tpm_get_ticks(void) ++{ ++ //struct timeval tv; ++ //int gettimeofday(&tv, struct timezone *tz); ++ return 0; ++} ++ ++int main(int argc, char **argv) ++{ ++ uint8_t in[BUFFER_SIZE], *out, *addressed_out; ++ uint32_t out_size; ++ int in_size, written ; ++ int i, guest_id=-1; ++ ++ int vtpm_tx_fh=-1, vtpm_rx_fh=-1; ++#ifdef VTPM_MULTI_VM ++ if (argc < 2) { ++ printf("Usage: tpmd clear|save|deactivated\n" ); ++#else ++ if (argc < 3) { ++ printf("Usage: tpmd clear|save|deactivated vtpmid\n" ); ++#endif ++ return -1; ++ } ++ ++#ifndef VTPM_MULTI_VM ++ dmi_id = atoi(argv[2]); ++#endif ++ ++ /* initialize TPM emulator */ ++ if (!strcmp(argv[1], "clear")) { ++ printf("Initializing tpm: %s\n", argv[1]); ++ tpm_emulator_init(1); ++ } else if (!strcmp(argv[1], "save")) { ++ printf("Initializing tpm: %s\n", argv[1]); ++ tpm_emulator_init(2); ++ } else if (!strcmp(argv[1], "deactivated")) { ++ printf("Initializing tpm: %s\n", argv[1]); ++ tpm_emulator_init(3); ++ } else { ++ printf("invalid startup mode '%s'; must be 'clear', " ++ "'save' (default) or 'deactivated", argv[1]); ++ return -1; ++ } ++ ++ char *guest_rx_file = malloc(10 + strlen(GUEST_RX_FIFO_D)); ++ sprintf(guest_rx_file, GUEST_RX_FIFO_D, (uint32_t) dmi_id); ++ ++ while (1) { ++abort_command: ++ if (vtpm_rx_fh < 0) { ++#ifdef VTPM_MUTLI_VM ++ vtpm_rx_fh = open(DEV_BE, O_RDWR); ++#else ++ vtpm_rx_fh = open(guest_rx_file, O_RDONLY); ++#endif ++ } ++ ++ if (vtpm_rx_fh < 0) { ++ printf("ERROR: failed to open devices to listen to guest.\n"); ++ return -1; ++ } ++ ++ in_size = read(vtpm_rx_fh, in, BUFFER_SIZE); ++ if (in_size < 6) { // Magic size of minium TPM command ++ printf("Recv[%d] to small: 0x", in_size); ++ if (in_size <= 0) { ++ close(vtpm_rx_fh); ++ vtpm_rx_fh = -1; ++ goto abort_command; ++ } ++ } else { ++ printf("Recv[%d]: 0x", in_size); ++ for (i=0; i< in_size; i++) ++ printf("%x ", in[i]); ++ printf("\n"); ++ } ++ ++ if (guest_id == -1) { ++ guest_id = *((uint32_t *) in); ++ *((uint32_t *) ctrl_msg) = *((uint32_t *) in); ++ } else { ++ if (guest_id != *((uint32_t *) in) ) { ++ printf("WARNING: More than one guest attached\n"); ++ } ++ } ++ ++ if (vtpm_tx_fh < 0) { ++#ifdef VTPM_MUTLI_VM ++ vtpm_tx_fh = open(DEV_BE, O_RDWR); ++ vtpm_rx_fh = vtpm_tx_fh; ++#else ++ vtpm_tx_fh = open(GUEST_TX_FIFO, O_WRONLY); ++#endif ++ } ++ ++ if (vtpm_tx_fh < 0) { ++ printf("ERROR: failed to open devices to respond to guest.\n"); ++ return -1; ++ } ++ ++ // Handle command, but we need to skip the identifier ++ if ( BE16_TO_CPU( ((uint16_t *) in)[2] ) == VTPM_TAG_REQ ) { // Control message from xend ++ // This DM doesn't really care about ctrl messages. Just ACK the message ++ written = write(vtpm_tx_fh, ctrl_msg, sizeof(ctrl_msg)); ++ ++ if (written != sizeof(ctrl_msg)) { ++ printf("ERROR: Part of response not written %d/%d.\n", written, sizeof(ctrl_msg)); ++ } else { ++ printf("Send Ctrl Message confermation\n"); ++ } ++ } else { // Message from Guest ++ if (tpm_handle_command(in + sizeof(uint32_t), in_size - sizeof(uint32_t), &out, &out_size) != 0) { ++ printf("ERROR: Handler Failed.\n"); ++ } ++ ++ addressed_out = (uint8_t *) tpm_malloc(sizeof(uint32_t) + out_size); ++ *(uint32_t *) addressed_out = *(uint32_t *) in; ++ memcpy(addressed_out + sizeof(uint32_t), out, out_size); ++ ++ written = write(vtpm_tx_fh, addressed_out, out_size + sizeof(uint32_t)); ++ ++ if (written != out_size + sizeof(uint32_t)) { ++ printf("ERROR: Part of response not written %d/%d.\n", written, out_size); ++ for (i=0; i< out_size+ sizeof(uint32_t); i++) ++ printf("%x ", addressed_out[i]); ++ printf("\n"); ++ } else { ++ printf("Sent[%d]: ", out_size + sizeof(uint32_t)); ++ for (i=0; i< out_size+ sizeof(uint32_t); i++) ++ printf("%x ", addressed_out[i]); ++ printf("\n"); ++ } ++ tpm_free(out); ++ tpm_free(addressed_out); ++ } ++ ++ } // loop ++ ++ tpm_emulator_shutdown(); ++ ++ close(vtpm_tx_fh); ++#ifndef VTPM_MUTLI_VM ++ close(vtpm_rx_fh); ++ free (guest_rx_file); ++#endif ++ ++} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/COPYING --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/COPYING Fri Sep 9 16:30:54 2005 @@ -0,0 +1,32 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,31 @@ +XEN_ROOT = ../.. + +# Base definitions and rules +include $(XEN_ROOT)/tools/vtpm_manager/Rules.mk + +SUBDIRS = crypto tcs util manager + +all: build + +build: + @set -e; for subdir in $(SUBDIRS); do \ + $(MAKE) -C $$subdir $@; \ + done + +install: build + @set -e; for subdir in $(SUBDIRS); do \ + $(MAKE) -C $$subdir $@; \ + done + +clean: + @set -e; for subdir in $(SUBDIRS); do \ + $(MAKE) -C $$subdir $@; \ + done + + +mrproper: + @set -e; for subdir in $(SUBDIRS); do \ + $(MAKE) -C $$subdir $@; \ + done + + diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/README --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/README Fri Sep 9 16:30:54 2005 @@ -0,0 +1,89 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== + +Directory Structure +=================== +tools/vtpm_manager/crypto -> crypto files +tools/vtpm_manager/TCS -> TCS implementation +tools/vtpm_manager/util -> Utility Library. Include disk-io and buffers. +tools/vtpm_manager/manager -> VTPM Manager + +Compile Flags +=================== +LOGGING_MODULES -> How extensive logging happens + see util/log.h for more info + +VTPM_MULTI_VM -> Defined: VTPMs run in their own VMs + Not Defined (default): VTPMs are processes + +# Debugging flags that may disappear without notice in the future + +DUMMY_BACKEND -> vtpm_manager listens on /tmp/in.fifo and + /tmp/out.fifo rather than backend + +MANUAL_DM_LAUNCH -> User must manually launch & kill VTPMs + +USE_FIXED_SRK_AUTH -> Do not randomly generate a random SRK & Owner auth + +Requirements +============ +- xen-unstable +- IBM frontend/backend vtpm driver patch + +Single-VM Flow +============================ +- Launch the VTPM manager (vtpm_managerd) which which begins listening to the BE with one thread + and listens to a named fifo that is shared by the vtpms to commuincate with the manager. +- VTPM Manager listens to TPM BE. +- When xend launches a tpm frontend equipped VM it contacts the manager over the vtpm backend. +- When the manager receives the open message from the BE, it launches a vtpm +- Xend allows the VM to continue booting. +- When a TPM request is issued to the front end, the front end transmits the TPM request to the backend. +- The manager receives the TPM requests and uses a named fifo to forward the request to the vtpm. +- The fifo listener begins listening for the reply from vtpm for the request. +- Vtpm processes request and replies to manager over shared named fifo. +- If needed, the vtpm may send a request to the vtpm_manager at any time to save it's secrets to disk. +- Manager receives response from vtpm and passes it back to backend for forwarding to guest. + +NOTES: +* SaveService SHOULD seal it's table before saving it to disk. However, + the current Xen infrastructure does not provide a mechanism for this to be + unsealed later. Specifically, the auth and wrapped key must be available ONLY + to the service, or it's not even worth encrypting + + In the future the vtpm manager will be protected by an early boot mechanism + that will allow for better protection of it's data. + +TODO: +- Timeout on crashed vtpms +- create lock for shared fifo for talking to vtpms. diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/Rules.mk --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/Rules.mk Fri Sep 9 16:30:54 2005 @@ -0,0 +1,68 @@ +# Base definitions and rules (XEN_ROOT must be defined in including Makefile) +include $(XEN_ROOT)/tools/Rules.mk + +# +# Tool definitions +# + +# Installation program and options +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +INSTALL_DIR = $(INSTALL) -d -m0755 + +# Xen tools installation directory +TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin + +# General compiler flags +CFLAGS = -Wall -Werror -g3 -I. + +# For generating dependencies +CFLAGS += -Wp,-MD,.$(@F).d + +DEP_FILES = .*.d + +# Generic project files +HDRS = $(wildcard *.h) +SRCS = $(wildcard *.c) +OBJS = $(patsubst %.c,%.o,$(SRCS)) + +# Generic (non-header) dependencies +$(SRCS): Makefile $(XEN_ROOT)/tools/Rules.mk $(XEN_ROOT)/tools/vtpm_manager/Rules.mk + +$(OBJS): $(SRCS) + +-include $(DEP_FILES) + +# Make sure these are just rules +.PHONY : all build install clean + +# +# Project-specific definitions +# + +# Logging Level. See utils/tools.h for usage +CFLAGS += -DLOGGING_MODULES="(BITMASK(VTPM_LOG_TCS)|BITMASK(VTPM_LOG_VTSP)|BITMASK(VTPM_LOG_VTPM)|BITMASK(VTPM_LOG_VTPM_DEEP))" + +# Silent Mode +#CFLAGS += -DLOGGING_MODULES=0x0 +#CFLAGS += -DLOGGING_MODULES=0xff + +# Use frontend/backend pairs between manager & DMs? +#CFLAGS += -DVTPM_MULTI_VM + +# vtpm_manager listens on /tmp/in.fifo and /tmp/out.fifo rather than backend +#CFLAGS += -DDUMMY_BACKEND + +# Do not have manager launch DMs. +#CFLAGS += -DMANUAL_DM_LAUNCH + +# Fixed SRK +CFLAGS += -DUSE_FIXED_SRK_AUTH + +# TPM Hardware Device or TPM Simulator +#CFLAGS += -DTPM_HWDEV + +# Include +CFLAGS += -I$(XEN_ROOT)/tools/vtpm_manager/crypto +CFLAGS += -I$(XEN_ROOT)/tools/vtpm_manager/util +CFLAGS += -I$(XEN_ROOT)/tools/vtpm_manager/tcs diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/crypto/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/crypto/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,18 @@ +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/vtpm_manager/Rules.mk + +BIN = libtcpaCrypto.a + +all: build + +build: $(BIN) + +install: build + +clean: + rm -f *.a *.so *.o *.rpm $(DEP_FILES) + +mrproper: clean + +$(BIN): $(OBJS) + $(AR) rcs $(BIN) $(OBJS) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/crypto/crypto.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/crypto/crypto.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,88 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// crypto.c +// +// This file will handle all the TPM Crypto functionality +// +// ================================================================== + +#include <string.h> +#include <openssl/crypto.h> +#include <openssl/err.h> +#include <openssl/evp.h> +#include <openssl/rand.h> +#include "crypto.h" +#include "log.h" + +/** + * Initialize cryptography library + * @rand: random seed + * @size: size of @rand + */ +void Crypto_Init(const BYTE* rand, int size) { + ERR_load_crypto_strings(); + CRYPTO_malloc_init(); + OpenSSL_add_all_algorithms(); + SYM_CIPHER = EVP_aes_128_cbc(); + RAND_poll(); + if (rand == NULL) + return; + + RAND_add(rand, size, size); +} + +/** + * Shutdown cryptography library + */ +void Crypto_Exit() { + ERR_free_strings(); + ERR_remove_state(0); + EVP_cleanup(); +} + + +/** + * Get random data + * @data: (OUT) Random data + * @size: Size of @data + */ +void Crypto_GetRandom(void* data, int size) { + int result; + + result = RAND_pseudo_bytes((BYTE*) data, size); + + if (result <= 0) + vtpmlogerror (VTPM_LOG_CRYPTO, "RAND_pseudo_bytes failed: %s\n", + ERR_error_string (ERR_get_error(), NULL)); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/crypto/crypto.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/crypto/crypto.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,175 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// crypto.h +// +// This file defines the TPM Crypto API +// +// ================================================================== + +#ifndef __CRYPTO_H__ +#define __CRYPTO_H__ + +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> + +#include "tcg.h" +#include "sym_crypto.h" + +#define CRYPTO_MAX_SIG_SIZE (2048 / 8) +#define CRYPTO_MAX_RSA_KEY_SIZE (4096 / 8) //in bytes + +#define OAEP_P "TCPA" +#define OAEP_P_SIZE 4 + +// Algorithms supported by crypto. Stored in CRYPTO_INFO.algorithmID +#define CRYPTO_ALGORITH_RSA 0x01 + +// Supported Encryption Schemes CRYPTO_INFO.encScheme +#define CRYPTO_ES_NONE 0x0001 +#define CRYPTO_ES_RSAESPKCSv15 0x0002 +#define CRYPTO_ES_RSAESOAEP_SHA1_MGF1 0x0003 + +// Supported Signature schemes CRYPTO_INFO.sigScheme +#define CRYPTO_SS_NONE 0x0001 +#define CRYPTO_SS_RSASSAPKCS1v15_SHA1 0x0002 +#define CRYPTO_SS_RSASSAPKCS1v15_DER 0x0003 + +typedef struct CRYPTO_INFO { + void *keyInfo; + UINT32 algorithmID; + UINT32 encScheme; + UINT32 sigScheme; +} CRYPTO_INFO; + + +void Crypto_Init(const BYTE* rand, int size); + +void Crypto_Exit(); + +void Crypto_GetRandom(void* data, int size); + +void Crypto_HMAC( const BYTE* text, + int text_len, + const BYTE* key, + int key_len, + BYTE* digest); + +TPM_RESULT Crypto_HMAC_buf (const buffer_t * text, + const buffer_t * key, + BYTE * o_digest); /* presumably of 20 bytes */ + +void Crypto_SHA1Full( const BYTE* text, + UINT32 size, + BYTE* hash); //Complete 3part SHA1 + +// o_hash needs to be large enough to hold the digest, ie 20 bytes +TPM_RESULT Crypto_SHA1Full_buf (const buffer_t * buf, + BYTE * o_hash); + +void Crypto_SHA1Start(UINT32* maxNumBytes); +void Crypto_SHA1Update(int numBytes, const BYTE* hashData); +void Crypto_SHA1Complete( int hashDataSize, + const BYTE* hashData, + BYTE* hashValue); + +void Crypto_RSACreateKey( /*in*/ UINT32 keySize, + /*in*/ UINT32 pubExpSize, + /*in*/ BYTE *pubExp, + /*out*/ UINT32 *privExpSize, + /*out*/ BYTE *privExp, + /*out*/ UINT32 *modulusSize, + /*out*/ BYTE *modulus, + /*out*/ CRYPTO_INFO *keys); + +void Crypto_RSABuildCryptoInfo( /*[IN]*/ UINT32 pubExpSize, + /*[IN]*/ BYTE *pubExp, + /*[IN]*/ UINT32 privExpSize, + /*[IN]*/ BYTE *privExp, + /*[IN]*/ UINT32 modulusSize, + /*[IN]*/ BYTE *modulus, + /*[OUT]*/ CRYPTO_INFO* cryptoInfo); + +void Crypto_RSABuildCryptoInfoPublic( /*[IN]*/ UINT32 pubExpSize, + /*[IN]*/ BYTE *pubExp, + /*[IN]*/ UINT32 modulusSize, + /*[IN]*/ BYTE *modulus, + CRYPTO_INFO* cryptoInfo); + +// +// symmetric pack and unpack operations +// +TPM_RESULT Crypto_RSAPackCryptoInfo (const CRYPTO_INFO* cryptoInfo, + BYTE ** io_buf, UINT32 * io_buflen); + +TPM_RESULT Crypto_RSAUnpackCryptoInfo (CRYPTO_INFO * ci, + BYTE * in, UINT32 len, + UINT32 * o_lenread); + + +// return 0 on success, -1 on error +int Crypto_RSAEnc( CRYPTO_INFO *keys, + UINT32 inDataSize, + BYTE *inData, + /*out*/ UINT32 *outDataSize, + /*out*/ BYTE *outData); + +// return 0 on success, -1 on error +int Crypto_RSADec( CRYPTO_INFO *keys, + UINT32 inDataSize, + BYTE *inData, + /*out*/ UINT32 *outDataSize, + /*out*/ BYTE *outData); + +// return 0 on success, -1 on error +int Crypto_RSASign( CRYPTO_INFO *keys, + UINT32 inDataSize, + BYTE *inData, + /*out*/ UINT32 *sigSize, + /*out*/ BYTE *sig); + +bool Crypto_RSAVerify( CRYPTO_INFO *keys, + UINT32 inDataSize, + BYTE *inData, + UINT32 sigSize, + BYTE *sig); + +//private: +int RSA_verify_DER(int dtype, unsigned char *m, unsigned int m_len, + unsigned char *sigbuf, unsigned int siglen, CRYPTO_INFO *key); + +int RSA_sign_DER(int type, unsigned char *m, unsigned int m_len, + unsigned char *sigret, unsigned int *siglen, CRYPTO_INFO *key); + +#endif // __CRYPTO_H__ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/crypto/hash.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/crypto/hash.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,153 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// hash.c +// +// This file will handle all the TPM Hash functionality +// +// ================================================================== + +#include <string.h> +#include <openssl/crypto.h> +#include <openssl/err.h> +#include <openssl/evp.h> +#include <openssl/rand.h> +#include <openssl/hmac.h> +#include <openssl/sha.h> +#include <openssl/bn.h> +#include <openssl/rsa.h> + +#include "tcg.h" // for TPM_SUCCESS +#include "crypto.h" + +static SHA_CTX g_shaContext; + +void Crypto_HMAC( const BYTE* text, + int text_len, + const BYTE* key, + int key_len, + BYTE* digest) { + if (text == NULL || key == NULL || text_len == 0 || key_len == 0) + return; + + HMAC(EVP_sha1(), key, key_len, text, text_len, digest, NULL); +} + +TPM_RESULT Crypto_HMAC_buf (const buffer_t * text, + const buffer_t * key, + BYTE * o_digest) { /* presumably of 20 bytes */ + + Crypto_HMAC (text->bytes, text->size, + key->bytes, key->size, + o_digest); + + return TPM_SUCCESS; +} + + +/* + * SHA1 + * (OUT) Create a SHA1 hash of text. Calls all three SHA1 steps internally + */ +void Crypto_SHA1Full( const BYTE* text, + uint32_t size, + BYTE* hash) { + + if (text == NULL || size == 0) + return; + + // Run SHA1Start + SHAUpdate (if necessary) + SHAComplete + uint32_t maxBytes; // Not used for anything + Crypto_SHA1Start(&maxBytes); + + while (size > 64){ + Crypto_SHA1Update(64, text); + size -= 64; + text += 64; + } + + Crypto_SHA1Complete(size, text, hash); +} + +// same thing using buffer_t +TPM_RESULT Crypto_SHA1Full_buf (const buffer_t * buf, + BYTE * o_digest) { + + if (buf->bytes == NULL || buf->size == 0) + return TPM_BAD_PARAMETER; + + Crypto_SHA1Full (buf->bytes, buf->size, o_digest); + + return TPM_SUCCESS; +} + + +/* + * Initialize SHA1 + * (OUT) Maximum number of bytes that can be sent to SHA1Update. + * Must be a multiple of 64 bytes. + */ +void Crypto_SHA1Start(uint32_t* maxNumBytes) { + int max = SHA_CBLOCK; + // Initialize the crypto library + SHA1_Init(&g_shaContext); + *maxNumBytes = max; +} + +/* + * Process SHA1 + * @numBytes: (IN) The number of bytes in hashData. + * Must be a multiple of 64 bytes. + * @hashData: (IN) Bytes to be hashed. + */ +void Crypto_SHA1Update(int numBytes, const BYTE* hashData) { + + if (hashData == NULL || numBytes == 0 || numBytes%64 != 0) + return; + + SHA1_Update(&g_shaContext, hashData, numBytes); +} + +/* + * Complete the SHA1 process + * @hashDataSize: (IN) Number of bytes in hashData. + * Must be a multiple of 64 bytes. + * @hashData: (IN) Final bytes to be hashed. + * @hashValue: (OUT) The output of the SHA-1 hash. + */ +void Crypto_SHA1Complete(int hashDataSize, + const BYTE* hashData, + BYTE* hashValue) { + SHA1_Update(&g_shaContext, hashData, hashDataSize); + SHA1_Final(hashValue, &g_shaContext); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/crypto/rsa.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/crypto/rsa.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,434 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// rsa.c +// +// This file will handle all the TPM RSA crypto functionality +// +// ================================================================== + +#include <string.h> +#include <openssl/crypto.h> +#include <openssl/evp.h> +#include <openssl/bn.h> +#include <openssl/rsa.h> +#include <openssl/rand.h> +#include <openssl/x509.h> + +#include <openssl/err.h> +#include <stdio.h> + +#include "tcg.h" +#include "buffer.h" +#include "crypto.h" +#include "log.h" + +void Crypto_RSACreateKey( /*in*/ UINT32 keySize, + /*in*/ UINT32 pubExpSize, + /*in*/ BYTE *pubExp, + /*out*/ UINT32 *privExpSize, + /*out*/ BYTE *privExp, + /*out*/ UINT32 *modulusSize, + /*out*/ BYTE *modulus, + /*out*/ CRYPTO_INFO *keys) { + unsigned long e_value; + + if (pubExpSize == 0) // Default e = 2^16+1 + e_value = (0x01 << 16) + 1; + else { + // This is not supported, but the following line MIGHT work + // under then assumption that the format is BigNum compatable + // Though it's not in the spec, so who knows what it is. + // Forcing the default. + //BN_bin2bn(pubExp, pubExpSize, NULL); + e_value = (0x01 << 16) + 1; + } + + RSA *rsa = RSA_generate_key(keySize, e_value, NULL, NULL); + + if (keys) { + keys->keyInfo = rsa; + keys->algorithmID = CRYPTO_ALGORITH_RSA; + } + + if (modulus) *modulusSize = BN_bn2bin(rsa->n, modulus); + if (privExp) *privExpSize = BN_bn2bin(rsa->d, privExp); +} + +// Create a CRYPTO_INFO struct from the BYTE * key parts. +// If pubExp info is NULL, use TCG default. +// If there is a remainder while calculating the privExp, return FALSE. + +void Crypto_RSABuildCryptoInfo( /*[IN]*/ UINT32 pubExpSize, + /*[IN]*/ BYTE *pubExp, + /*[IN]*/ UINT32 privExpSize, + /*[IN]*/ BYTE *privExp, + /*[IN]*/ UINT32 modulusSize, + /*[IN]*/ BYTE *modulus, + CRYPTO_INFO* cryptoInfo) { + cryptoInfo->keyInfo = RSA_new(); + RSA *rsa = (RSA *) cryptoInfo->keyInfo; + + rsa->e = BN_new(); + + if (pubExpSize == 0) { // Default e = 2^16+1 + BN_set_bit(rsa->e, 16); + BN_set_bit(rsa->e, 0); + } else { + // This is not supported, but the following line MIGHT work + // under then assumption that the format is BigNum compatable + // Though it's not in the spec, so who knows what it is. + // Forcing the default. + //BN_bin2bn(pubExp, pubExpSize, NULL); + BN_set_bit(rsa->e, 16); + BN_set_bit(rsa->e, 0); + } + + rsa->n = BN_bin2bn(modulus, modulusSize, NULL); + rsa->d = BN_bin2bn(privExp, privExpSize, NULL); +} + +// Create a CRYPTO_INFO struct from the BYTE * key parts. +// If pubExp info is NULL, use TCG default. +// If there is a remainder while calculating the privExp, return FALSE. + +void Crypto_RSABuildCryptoInfoPublic( /*[IN]*/ UINT32 pubExpSize, + /*[IN]*/ BYTE *pubExp, + /*[IN]*/ UINT32 modulusSize, + /*[IN]*/ BYTE *modulus, + CRYPTO_INFO* cryptoInfo) { + cryptoInfo->keyInfo = RSA_new(); + RSA *rsa = (RSA *) cryptoInfo->keyInfo; + + rsa->e = BN_new(); + + if (pubExpSize == 0) { // Default e = 2^16+1 + BN_set_bit(rsa->e, 16); + BN_set_bit(rsa->e, 0); + } else { + // This is not supported, but the following line MIGHT work + // under then assumption that the format is BigNum compatable + // Though it's not in the spec, so who knows what it is. + // Forcing the default. + //BN_bin2bn(pubExp, pubExpSize, NULL); + BN_set_bit(rsa->e, 16); + BN_set_bit(rsa->e, 0); + } + + rsa->n = BN_bin2bn(modulus, modulusSize, NULL); + +} + +int Crypto_RSAEnc( CRYPTO_INFO *key, + UINT32 inDataSize, + BYTE *inData, + /*out*/ UINT32 *outDataSize, + /*out*/ BYTE *outData) { + RSA *rsa = (RSA *) key->keyInfo; + UINT32 paddedDataSize = RSA_size (rsa); + BYTE *paddedData = (BYTE *)malloc(sizeof(BYTE) * paddedDataSize); + int rc; + + if (paddedData == NULL) + return -1; + + *outDataSize = 0; + + switch (key->encScheme) { + case CRYPTO_ES_RSAESPKCSv15: + if (RSA_padding_add_PKCS1_type_2(paddedData, paddedDataSize, inData, inDataSize) <= 0) { + rc = -1; + goto abort_egress; + } + break; + case CRYPTO_ES_RSAESOAEP_SHA1_MGF1: + if (RSA_padding_add_PKCS1_OAEP(paddedData,paddedDataSize,inData,inDataSize, (BYTE *) OAEP_P,OAEP_P_SIZE) <= 0 ) { + rc = -1; + goto abort_egress; + } + break; + default: + rc = -1; + goto abort_egress; + } + + rc = RSA_public_encrypt(paddedDataSize, paddedData, outData, rsa, RSA_NO_PADDING); + if (rc == -1) + goto abort_egress; + + *outDataSize = rc; + + if (rc > 0) rc = 0; + + goto egress; + + abort_egress: + egress: + + if (paddedData) + free (paddedData); + return rc; + +} + +int Crypto_RSADec( CRYPTO_INFO *key, + UINT32 inDataSize, + BYTE *inData, + /*out*/ UINT32 *outDataSize, + /*out*/ BYTE *outData) { + + RSA *rsa = (RSA *) key->keyInfo; + UINT32 paddedDataSize = RSA_size (rsa); + BYTE *paddedData = (BYTE *)malloc(sizeof(BYTE) * paddedDataSize); + int rc; + + if (paddedData == NULL) + goto abort_egress; + + rc = RSA_private_decrypt(inDataSize, inData, paddedData, rsa, RSA_NO_PADDING); + if (rc == -1) { + vtpmlogerror(VTPM_LOG_CRYPTO, "RSA_private_decrypt: %s\n", ERR_error_string(ERR_get_error(), NULL)); + goto abort_egress; + } + + paddedDataSize = rc; + + switch (key->encScheme) { + case CRYPTO_ES_RSAESPKCSv15: + rc = RSA_padding_check_PKCS1_type_2 (outData, paddedDataSize, + paddedData + 1, paddedDataSize - 1, + RSA_size(rsa)); + if (rc == -1) { + vtpmlogerror(VTPM_LOG_CRYPTO, "RSA_padding_check_PKCS1_type_2: %s\n", + ERR_error_string(ERR_get_error(), NULL)); + goto abort_egress; + } + *outDataSize = rc; + break; + case CRYPTO_ES_RSAESOAEP_SHA1_MGF1: + rc = RSA_padding_check_PKCS1_OAEP(outData, paddedDataSize, + paddedData + 1, paddedDataSize - 1, + RSA_size(rsa), + (BYTE *) OAEP_P, OAEP_P_SIZE); + if (rc == -1) { + vtpmlogerror(VTPM_LOG_CRYPTO, "RSA_padding_check_PKCS1_OAEP: %s\n", + ERR_error_string(ERR_get_error(), NULL)); + goto abort_egress; + } + *outDataSize = rc; + break; + default: + *outDataSize = 0; + } + + free(paddedData); paddedData = NULL; + goto egress; + + abort_egress: + + if (paddedData) + free (paddedData); + return -1; + + egress: + return 0; +} + +// Signs either a SHA1 digest of a message or a DER encoding of a message +// Textual messages MUST be encoded or Hashed before sending into this function +// It will NOT SHA the message. +int Crypto_RSASign( CRYPTO_INFO *key, + UINT32 inDataSize, + BYTE *inData, + /*out*/ UINT32 *sigSize, + /*out*/ BYTE *sig) { + int status; + unsigned int intSigSize; + + switch(key->sigScheme) { + case CRYPTO_SS_RSASSAPKCS1v15_SHA1: + status = RSA_sign(NID_sha1, inData, inDataSize, sig, &intSigSize, (RSA *) key->keyInfo); + break; + case CRYPTO_SS_RSASSAPKCS1v15_DER: + // status = Crypto_RSA_sign_DER(NID_md5_sha1, inData, inDataSize, sig, &intSigSize, key); + vtpmlogerror(VTPM_LOG_CRYPTO, "Crypto: Unimplemented sign type (%d)\n", key->sigScheme); + status = 0; + break; + default: + status = 0; + } + + if (status == 0) { + *sigSize = 0; + vtpmlogerror(VTPM_LOG_CRYPTO, "%s\n", ERR_error_string(ERR_get_error(), NULL)); + return -1; + } + + *sigSize = (UINT32) intSigSize; + return 0; +} + +bool Crypto_RSAVerify( CRYPTO_INFO *key, + UINT32 inDataSize, + BYTE *inData, + UINT32 sigSize, + BYTE *sig) { + int status; + + switch(key->sigScheme){ + case CRYPTO_SS_RSASSAPKCS1v15_SHA1: + status = RSA_verify(NID_sha1, inData, inDataSize, sig, sigSize, (RSA *) key->keyInfo); + break; + case CRYPTO_SS_RSASSAPKCS1v15_DER: + //status = Crypto_RSA_verify_DER(NID_md5_sha1, inData, inDataSize, sig, sigSize, key); + vtpmlogerror(VTPM_LOG_CRYPTO, "Crypto: Unimplemented sign type (%d)\n", key->sigScheme); + status = 0; + break; + default: + status = 0; + } + + if (status) + return(1); + else { + vtpmlogerror(VTPM_LOG_CRYPTO, "RSA verify: %s\n", ERR_error_string(ERR_get_error(), NULL)); + return(0); + } + +} + +// helper which packs everything into a BIO! + +// packs the parameters first, then the private key, then the public key +// if *io_buf is NULL, allocate it here as needed. otherwise its size is in +// *io_buflen +TPM_RESULT Crypto_RSAPackCryptoInfo (const CRYPTO_INFO* cryptoInfo, + BYTE ** io_buf, UINT32 * io_buflen) { + TPM_RESULT status = TPM_SUCCESS; + BYTE * buf; + long len, outlen = *io_buflen; + + const long PARAMSLEN = 3*sizeof(UINT32); + + RSA *rsa = (RSA *) cryptoInfo->keyInfo; + + BIO *mem = BIO_new(BIO_s_mem()); + + + // write the openssl keys to the BIO + if ( i2d_RSAPrivateKey_bio (mem, rsa) == 0 ) { + ERR_print_errors_fp (stderr); + ERRORDIE (TPM_SIZE); + } + if ( i2d_RSAPublicKey_bio (mem, rsa) == 0 ) { + ERR_print_errors_fp (stderr); + ERRORDIE (TPM_SIZE); + } + + // get the buffer out + len = BIO_get_mem_data (mem, &buf); + + // see if we need to allocate a return buffer + if (*io_buf == NULL) { + *io_buf = (BYTE*) malloc (PARAMSLEN + len); + if (*io_buf == NULL) + ERRORDIE (TPM_SIZE); + } else { // *io_buf is already allocated + if (outlen < len + PARAMSLEN) + ERRORDIE (TPM_SIZE); // but not large enough! + } + + // copy over the parameters (three UINT32's starting at algorithmID) + memcpy (*io_buf, &cryptoInfo->algorithmID, PARAMSLEN); + + // copy over the DER keys + memcpy (*io_buf + PARAMSLEN, buf, len); + + *io_buflen = len + PARAMSLEN; + + goto egress; + + + abort_egress: + egress: + + BIO_free (mem); + + return status; +} + + + +// sets up ci, and returns the number of bytes read in o_lenread +TPM_RESULT Crypto_RSAUnpackCryptoInfo (CRYPTO_INFO * ci, + BYTE * in, UINT32 len, + UINT32 * o_lenread) { + + TPM_RESULT status = TPM_SUCCESS; + long l; + BIO *mem; + RSA *rsa; + + // first load up the params + l = 3 * sizeof(UINT32); + memcpy (&ci->algorithmID, in, l); + len -= l; + in += l; + + // and now the openssl keys, private first + mem = BIO_new_mem_buf (in, len); + + if ( (rsa = d2i_RSAPrivateKey_bio (mem, NULL)) == NULL ) { + ERR_print_errors_fp (stderr); + ERRORDIE (TPM_BAD_PARAMETER); + } + // now use the same RSA object and fill in the private key + if ( d2i_RSAPublicKey_bio (mem, &rsa) == NULL ) { + ERR_print_errors_fp (stderr); + ERRORDIE (TPM_BAD_PARAMETER); + } + + ci->keyInfo = rsa; // needs to be freed somehow later + + // FIXME: havent figured out yet how to tell how many bytes were read in the + // above oprations! so o_lenread is not set + + goto egress; + + abort_egress: + egress: + + BIO_free (mem); + + return status; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/crypto/sym_crypto.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/crypto/sym_crypto.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,242 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// sym_crypto.c +// +// Symmetric crypto portion of crypto +// +// ================================================================== + +#include <openssl/evp.h> +#include <openssl/rand.h> + +#include "tcg.h" +#include "sym_crypto.h" + +typedef enum crypt_op_type_t { + CRYPT_ENCRYPT, + CRYPT_DECRYPT +} crypt_op_type_t; + +TPM_RESULT ossl_symcrypto_op (symkey_t* key, + const buffer_t* in, + const buffer_t* iv, + buffer_t * out, + crypt_op_type_t optype); + + +// this is initialized in Crypto_Init() +const EVP_CIPHER * SYM_CIPHER = NULL; + +const BYTE ZERO_IV[EVP_MAX_IV_LENGTH] = {0}; + + +TPM_RESULT Crypto_symcrypto_initkey (symkey_t * key, const buffer_t* keybits) { + TPM_RESULT status = TPM_SUCCESS; + + EVP_CIPHER_CTX_init (&key->context); + + key->cipher = SYM_CIPHER; + + status = buffer_init_copy (&key->key, keybits); + STATUSCHECK(status); + + goto egress; + + abort_egress: + EVP_CIPHER_CTX_cleanup (&key->context); + + egress: + + return status; +} + + + +TPM_RESULT Crypto_symcrypto_genkey (symkey_t * key) { + int res; + TPM_RESULT status = TPM_SUCCESS; + + // hmm, EVP_CIPHER_CTX_init does not return a value + EVP_CIPHER_CTX_init (&key->context); + + key->cipher = SYM_CIPHER; + + status = buffer_init (&key->key, EVP_CIPHER_key_length(key->cipher), NULL); + STATUSCHECK (status); + + // and generate the key material + res = RAND_pseudo_bytes (key->key.bytes, key->key.size); + if (res < 0) + ERRORDIE (TPM_SHORTRANDOM); + + + goto egress; + + abort_egress: + EVP_CIPHER_CTX_cleanup (&key->context); + buffer_free (&key->key); + + egress: + return status; +} + + +TPM_RESULT Crypto_symcrypto_encrypt (symkey_t* key, + const buffer_t* clear, + buffer_t* o_cipher) { + TPM_RESULT status = TPM_SUCCESS; + + buffer_t iv, cipher_alias; + + buffer_init_const (&iv, EVP_MAX_IV_LENGTH, ZERO_IV); + + buffer_init (o_cipher, + clear->size + + EVP_CIPHER_iv_length(key->cipher) + + EVP_CIPHER_block_size (key->cipher), + 0); + + // copy the IV into the front + buffer_copy (o_cipher, &iv); + + // make an alias into which we'll put the ciphertext + buffer_init_alias (&cipher_alias, o_cipher, EVP_CIPHER_iv_length(key->cipher), 0); + + status = ossl_symcrypto_op (key, clear, &iv, &cipher_alias, CRYPT_ENCRYPT); + STATUSCHECK (status); + + // set the output size correctly + o_cipher->size += cipher_alias.size; + + goto egress; + + abort_egress: + + egress: + + return status; + +} + + + +TPM_RESULT Crypto_symcrypto_decrypt (symkey_t* key, + const buffer_t* cipher, + buffer_t* o_clear) { + TPM_RESULT status = TPM_SUCCESS; + + buffer_t iv, cipher_alias; + + // alias for the IV + buffer_init_alias (&iv, cipher, 0, EVP_CIPHER_iv_length(key->cipher)); + + // make an alias to where the ciphertext is, after the IV + buffer_init_alias (&cipher_alias, cipher, EVP_CIPHER_iv_length(key->cipher), 0); + + // prepare the output buffer + status = buffer_init (o_clear, + cipher->size + - EVP_CIPHER_iv_length(key->cipher) + + EVP_CIPHER_block_size(key->cipher), + 0); + STATUSCHECK(status); + + // and decrypt + status = ossl_symcrypto_op (key, &cipher_alias, &iv, o_clear, CRYPT_DECRYPT); + STATUSCHECK (status); + + goto egress; + + abort_egress: + buffer_free (o_clear); + + egress: + + return status; +} + + + +TPM_RESULT Crypto_symcrypto_freekey (symkey_t * key) { + buffer_memset (&key->key, 0); + buffer_free (&key->key); + + EVP_CIPHER_CTX_cleanup (&key->context); + + return TPM_SUCCESS; +} + + +TPM_RESULT ossl_symcrypto_op (symkey_t* key, + const buffer_t* in, + const buffer_t* iv, + buffer_t * out, + crypt_op_type_t optype) { + TPM_RESULT status = TPM_SUCCESS; + + int inlen, outlen; + tpm_size_t running; + + if ( ! EVP_CipherInit_ex (&key->context, + key->cipher, NULL, key->key.bytes, iv->bytes, + optype == CRYPT_ENCRYPT ? 1 : 0) ) + ERRORDIE (TPM_FAIL); + + + + inlen = in->size; + + outlen = 0; + running = 0; + + + if ( ! EVP_CipherUpdate (&key->context, out->bytes, &outlen, in->bytes, inlen) ) + ERRORDIE (TPM_FAIL); + + running += outlen; + + if ( ! EVP_CipherFinal_ex (&key->context, out->bytes + running, &outlen) ) + ERRORDIE (TPM_FAIL); + + running += outlen; + + out->size = running; + + goto egress; + + abort_egress: + egress: + + return status; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/crypto/sym_crypto.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/crypto/sym_crypto.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,72 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// sym_crypto.h +// +// Symmetric Crypto +// +// ================================================================== + +#ifndef _SYM_CRYPTO_H +#define _SYM_CRYPTO_H + +#include <openssl/evp.h> +#include "buffer.h" + +typedef struct symkey_t { + buffer_t key; + + EVP_CIPHER_CTX context; + const EVP_CIPHER * cipher; +} symkey_t; + +extern const EVP_CIPHER * SYM_CIPHER; + +TPM_RESULT Crypto_symcrypto_genkey (symkey_t * key); + +TPM_RESULT Crypto_symcrypto_initkey (symkey_t * key, const buffer_t* keybits); + + +// these functions will allocate their output buffers +TPM_RESULT Crypto_symcrypto_encrypt (symkey_t* key, + const buffer_t* clear, + buffer_t* o_cipher); + +TPM_RESULT Crypto_symcrypto_decrypt (symkey_t* key, + const buffer_t* cipher, + buffer_t* o_clear); + +// only free the internal parts, not the 'key' ptr +TPM_RESULT Crypto_symcrypto_freekey (symkey_t * key); + +#endif /* _SYM_CRYPTO_H */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,27 @@ +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/vtpm_manager/Rules.mk + +BIN = vtpm_managerd + +all: build + +build: $(BIN) + +install: build + if [ ! -d "$(DESTDIR)/var/vtpm/fifos" ]; \ + then mkdir -p $(DESTDIR)/var/vtpm/fifos; \ + fi + $(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR) + +clean: + rm -f *.a *.so *.o *.rpm $(DEP_FILES) + +mrproper: clean + rm -f $(BIN) + +$(BIN): $(OBJS) + $(CC) $(LDFLAGS) $^ $(LIBS) -o $@ + +# libraries +LIBS += ../tcs/libTCS.a ../util/libTCGUtils.a ../crypto/libtcpaCrypto.a +LIBS += -lcrypto -lpthread -lrt -lm diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/dmictl.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/dmictl.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,339 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// dmictl.c +// +// Functions for creating and destroying DMIs +// +// ================================================================== + +#include <stdio.h> +#include <unistd.h> +#include <string.h> + +#ifndef VTPM_MUTLI_VM + #include <sys/types.h> + #include <sys/stat.h> + #include <fcntl.h> + #include <signal.h> + #include <wait.h> +#endif + +#include "vtpmpriv.h" +#include "bsg.h" +#include "buffer.h" +#include "log.h" +#include "hashtable.h" +#include "hashtable_itr.h" + +#define TPM_EMULATOR_PATH "/usr/bin/vtpmd" + +TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res) { + TPM_RESULT status = TPM_FAIL; + + if (dmi_res == NULL) + return TPM_SUCCESS; + + status = TCS_CloseContext(dmi_res->TCSContext); + free ( dmi_res->NVMLocation ); + dmi_res->connected = FALSE; + +#ifndef VTPM_MULTI_VM + free(dmi_res->guest_tx_fname); + free(dmi_res->vtpm_tx_fname); + + close(dmi_res->guest_tx_fh); dmi_res->guest_tx_fh = -1; + close(dmi_res->vtpm_tx_fh); dmi_res->vtpm_tx_fh = -1; + + + #ifndef MANUAL_DM_LAUNCH + if (dmi_res->dmi_id != VTPM_CTL_DM) { + if (dmi_res->dmi_pid != 0) { + vtpmloginfo(VTPM_LOG_VTPM, "Killing dmi on pid %d.\n", dmi_res->dmi_pid); + if ((kill(dmi_res->dmi_pid, SIGKILL) !=0) || + (waitpid(dmi_res->dmi_pid, NULL, 0) != dmi_res->dmi_pid)){ + vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi on pid %d.\n", dmi_res->dmi_pid); + status = TPM_FAIL; + } + } else + vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi because it's pid was 0.\n"); + } + #endif +#endif + + return status; +} + +TPM_RESULT VTPM_Handle_New_DMI( const buffer_t *param_buf) { + + VTPM_DMI_RESOURCE *new_dmi=NULL; + TPM_RESULT status=TPM_FAIL; + BYTE type; + UINT32 dmi_id, domain_id, *dmi_id_key; + int fh; + +#ifndef VTPM_MUTLI_VM + char dmi_id_str[11]; // UINT32s are up to 10 digits + NULL + struct stat file_info; +#endif + + if (param_buf == NULL) { // Assume creation of Dom 0 control + type = 0; + domain_id = VTPM_CTL_DM; + dmi_id = VTPM_CTL_DM; + } else if (buffer_len(param_buf) != sizeof(BYTE) + sizeof(UINT32) *2) { + vtpmloginfo(VTPM_LOG_VTPM, "New DMI command wrong length: %d.\n", buffer_len(param_buf)); + status = TPM_BAD_PARAMETER; + goto abort_egress; + } else { + BSG_UnpackList( param_buf->bytes, 3, + BSG_TYPE_BYTE, &type, + BSG_TYPE_UINT32, &domain_id, + BSG_TYPE_UINT32, &dmi_id); + } + + new_dmi = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, &dmi_id); + if (new_dmi == NULL) { + vtpmloginfo(VTPM_LOG_VTPM, "Creating new DMI instance %d attached on domain %d.\n", dmi_id, domain_id); + // Brand New DMI. Initialize the persistent pieces + if ((new_dmi = (VTPM_DMI_RESOURCE *) malloc (sizeof(VTPM_DMI_RESOURCE))) == NULL) { + status = TPM_RESOURCES; + goto abort_egress; + } + memset(new_dmi, 0, sizeof(VTPM_DMI_RESOURCE)); + new_dmi->dmi_id = dmi_id; + new_dmi->connected = FALSE; + + if ((dmi_id_key = (UINT32 *) malloc (sizeof(UINT32))) == NULL) { + status = TPM_RESOURCES; + goto abort_egress; + } + *dmi_id_key = new_dmi->dmi_id; + + // install into map + if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, new_dmi)){ + free(new_dmi); + free(dmi_id_key); + status = TPM_FAIL; + goto egress; + } + + } else + vtpmloginfo(VTPM_LOG_VTPM, "Re-attaching DMI instance %d on domain %d .\n", dmi_id, domain_id); + + if (new_dmi->connected) { + vtpmlogerror(VTPM_LOG_VTPM, "Attempt to re-attach, currently attached instance %d. Ignoring\n", dmi_id); + status = TPM_BAD_PARAMETER; + goto egress; + } + + // Initialize the Non-persistent pieces + new_dmi->dmi_domain_id = domain_id; + new_dmi->NVMLocation = NULL; + + new_dmi->TCSContext = 0; + TPMTRYRETURN( TCS_OpenContext(&new_dmi->TCSContext) ); + + new_dmi->NVMLocation = (char *) malloc(11 + strlen(DMI_NVM_FILE)); + sprintf(new_dmi->NVMLocation, DMI_NVM_FILE, (uint32_t) new_dmi->dmi_id); + + // Measure DMI + // FIXME: This will measure DMI. Until then use a fixed DMI_Measurement value + /* + fh = open(TPM_EMULATOR_PATH, O_RDONLY); + stat_ret = fstat(fh, &file_stat); + if (stat_ret == 0) + dmi_size = file_stat.st_size; + else { + vtpmlogerror(VTPM_LOG_VTPM, "Could not open tpm_emulator!!\n"); + status = TPM_IOERROR; + goto abort_egress; + } + dmi_buffer + */ + memset(&new_dmi->DMI_measurement, 0xcc, sizeof(TPM_DIGEST)); + +#ifndef VTPM_MULTI_VM + if (dmi_id != VTPM_CTL_DM) { + // Create a pair of fifo pipes + if( (new_dmi->guest_tx_fname = (char *) malloc(11 + strlen(GUEST_TX_FIFO))) == NULL){ + status = TPM_RESOURCES; + goto abort_egress; + } + sprintf(new_dmi->guest_tx_fname, GUEST_TX_FIFO, (uint32_t) dmi_id); + + if ((new_dmi->vtpm_tx_fname = (char *) malloc(11 + strlen(VTPM_TX_FIFO))) == NULL) { + status = TPM_RESOURCES; + goto abort_egress; + } + sprintf(new_dmi->vtpm_tx_fname, VTPM_TX_FIFO, (uint32_t) dmi_id); + + new_dmi->guest_tx_fh = -1; + new_dmi->vtpm_tx_fh= -1; + + if ( stat(new_dmi->guest_tx_fname, &file_info) == -1) { + if ( mkfifo(new_dmi->guest_tx_fname, S_IWUSR | S_IRUSR ) ){ + status = TPM_FAIL; + goto abort_egress; + } + } + + if ( (fh = open(new_dmi->vtpm_tx_fname, O_RDWR)) == -1) { + if ( mkfifo(new_dmi->vtpm_tx_fname, S_IWUSR | S_IRUSR ) ) { + status = TPM_FAIL; + goto abort_egress; + } + } + + // Launch DMI + sprintf(dmi_id_str, "%d", (int) dmi_id); +#ifdef MANUAL_DM_LAUNCH + vtpmlogerror(VTPM_LOG_VTPM, "FAKING starting vtpm with dmi=%s\n", dmi_id_str); + new_dmi->dmi_pid = 0; +#else + pid_t pid = fork(); + + if (pid == -1) { + vtpmlogerror(VTPM_LOG_VTPM, "Could not fork to launch vtpm\n"); + status = TPM_RESOURCES; + goto abort_egress; + } else if (pid == 0) { + if ( stat(new_dmi->NVMLocation, &file_info) == -1) + execl (TPM_EMULATOR_PATH, "vtmpd", "clear", dmi_id_str, NULL); + else + execl (TPM_EMULATOR_PATH, "vtpmd", "save", dmi_id_str, NULL); + + // Returning from these at all is an error. + vtpmlogerror(VTPM_LOG_VTPM, "Could not exec to launch vtpm\n"); + } else { + new_dmi->dmi_pid = pid; + vtpmloginfo(VTPM_LOG_VTPM, "Launching DMI on PID = %d\n", pid); + } +#endif // MANUAL_DM_LAUNCH + } +#else // VTPM_MUTLI_VM + // FIXME: Measure DMI through call to Measurement agent in platform. +#endif + + vtpm_globals->DMI_table_dirty = TRUE; + new_dmi->connected = TRUE; + status=TPM_SUCCESS; + goto egress; + + abort_egress: + close_dmi( new_dmi ); + + egress: + return status; +} + +TPM_RESULT VTPM_Handle_Close_DMI( const buffer_t *param_buf) { + + TPM_RESULT status=TPM_FAIL; + VTPM_DMI_RESOURCE *dmi_res=NULL; + UINT32 dmi_id; + + if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) { + vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size."); + status = TPM_BAD_PARAMETER; + goto abort_egress; + } + + BSG_UnpackList( param_buf->bytes, 1, + BSG_TYPE_UINT32, &dmi_id); + + vtpmloginfo(VTPM_LOG_VTPM, "Closing DMI %d.\n", dmi_id); + + dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, &dmi_id); + if (dmi_res == NULL ) { + vtpmlogerror(VTPM_LOG_VTPM, "Trying to close nonexistent DMI.\n"); + status = TPM_BAD_PARAMETER; + goto abort_egress; + } + + if (!dmi_res->connected) { + vtpmlogerror(VTPM_LOG_VTPM, "Closing non-connected DMI.\n"); + status = TPM_BAD_PARAMETER; + goto abort_egress; + } + + // Close Dmi + TPMTRYRETURN(close_dmi( dmi_res )); + + status=TPM_SUCCESS; + goto egress; + + abort_egress: + egress: + + return status; +} + +TPM_RESULT VTPM_Handle_Delete_DMI( const buffer_t *param_buf) { + + TPM_RESULT status=TPM_FAIL; + VTPM_DMI_RESOURCE *dmi_res=NULL; + UINT32 dmi_id; + + if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) { + vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.\n"); + status = TPM_BAD_PARAMETER; + goto abort_egress; + } + + BSG_UnpackList( param_buf->bytes, 1, + BSG_TYPE_UINT32, &dmi_id); + + vtpmloginfo(VTPM_LOG_VTPM, "Deleting DMI %d.\n", dmi_id); + + dmi_res = (VTPM_DMI_RESOURCE *) hashtable_remove(vtpm_globals->dmi_map, &dmi_id); + if (dmi_res == NULL) { + vtpmlogerror(VTPM_LOG_VTPM, "Closing non-existent DMI.\n"); + status = TPM_BAD_PARAMETER; + goto abort_egress; + } + + //TODO: Automatically delete file dmi_res->NVMLocation + + // Close DMI first + TPMTRYRETURN(close_dmi( dmi_res )); + free ( dmi_res ); + + status=TPM_SUCCESS; + goto egress; + + abort_egress: + egress: + + return status; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/securestorage.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/securestorage.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,401 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// securestorage.c +// +// Functions regarding securely storing DMI secrets. +// +// ================================================================== + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> + +#include "tcg.h" +#include "vtpm_manager.h" +#include "vtpmpriv.h" +#include "vtsp.h" +#include "bsg.h" +#include "crypto.h" +#include "hashtable.h" +#include "hashtable_itr.h" +#include "buffer.h" +#include "log.h" + +TPM_RESULT VTPM_Handle_Save_NVM(VTPM_DMI_RESOURCE *myDMI, + const buffer_t *inbuf, + buffer_t *outbuf) { + + TPM_RESULT status = TPM_SUCCESS; + symkey_t symkey; + buffer_t state_cipher = NULL_BUF, + symkey_cipher = NULL_BUF; + int fh; + long bytes_written; + BYTE *sealed_NVM=NULL; + UINT32 sealed_NVM_size, i; + struct pack_constbuf_t symkey_cipher32, state_cipher32; + + vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Save_NVMing[%d]: 0x", buffer_len(inbuf)); + for (i=0; i< buffer_len(inbuf); i++) + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]); + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + + // Generate a sym key and encrypt state with it + TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_genkey (&symkey) ); + TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_encrypt (&symkey, inbuf, &state_cipher) ); + + // Encrypt symmetric key + TPMTRYRETURN( VTSP_Bind( &vtpm_globals->storageKey, + &symkey.key, + &symkey_cipher) ); + + // Create output blob: symkey_size + symkey_cipher + state_cipher_size + state_cipher + + symkey_cipher32.size = buffer_len(&symkey_cipher); + symkey_cipher32.data = symkey_cipher.bytes; + + state_cipher32.size = buffer_len(&state_cipher); + state_cipher32.data = state_cipher.bytes; + + sealed_NVM = (BYTE *) malloc( 2 * sizeof(UINT32) + symkey_cipher32.size + state_cipher32.size); + + sealed_NVM_size = BSG_PackList(sealed_NVM, 2, + BSG_TPM_SIZE32_DATA, &symkey_cipher32, + BSG_TPM_SIZE32_DATA, &state_cipher32); + + // Mark DMI Table so new save state info will get pushed to disk on return. + vtpm_globals->DMI_table_dirty = TRUE; + + // Write sealed blob off disk from NVMLocation + // TODO: How to properly return from these. Do we care if we return failure + // after writing the file? We can't get the old one back. + // TODO: Backup old file and try and recover that way. + fh = open(myDMI->NVMLocation, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE); + if ( (bytes_written = write(fh, sealed_NVM, sealed_NVM_size) ) != (long) sealed_NVM_size) { + vtpmlogerror(VTPM_LOG_VTPM, "We just overwrote a DMI_NVM and failed to finish. %ld/%ld bytes.\n", bytes_written, (long)sealed_NVM_size); + status = TPM_IOERROR; + goto abort_egress; + } + close(fh); + + Crypto_SHA1Full (sealed_NVM, sealed_NVM_size, (BYTE *) &myDMI->NVM_measurement); + + vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of E(NVM)\n", buffer_len(&symkey_cipher), buffer_len(&state_cipher)); + goto egress; + + abort_egress: + vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n."); + + egress: + + buffer_free ( &state_cipher); + buffer_free ( &symkey_cipher); + free(sealed_NVM); + Crypto_symcrypto_freekey (&symkey); + + return status; +} + + +/* inbuf = null outbuf = sealed blob size, sealed blob.*/ +TPM_RESULT VTPM_Handle_Load_NVM(VTPM_DMI_RESOURCE *myDMI, + const buffer_t *inbuf, + buffer_t *outbuf) { + + TPM_RESULT status = TPM_SUCCESS; + symkey_t symkey; + buffer_t state_cipher = NULL_BUF, + symkey_clear = NULL_BUF, + symkey_cipher = NULL_BUF; + struct pack_buf_t symkey_cipher32, state_cipher32; + + UINT32 sealed_NVM_size; + BYTE *sealed_NVM = NULL; + long fh_size; + int fh, stat_ret, i; + struct stat file_stat; + TPM_DIGEST sealedNVMHash; + + memset(&symkey, 0, sizeof(symkey_t)); + + if (myDMI->NVMLocation == NULL) { + vtpmlogerror(VTPM_LOG_VTPM, "Unable to load NVM because the file name NULL.\n"); + status = TPM_AUTHFAIL; + goto abort_egress; + } + + //Read sealed blob off disk from NVMLocation + fh = open(myDMI->NVMLocation, O_RDONLY); + stat_ret = fstat(fh, &file_stat); + if (stat_ret == 0) + fh_size = file_stat.st_size; + else { + status = TPM_IOERROR; + goto abort_egress; + } + + sealed_NVM = (BYTE *) malloc(fh_size); + if (read(fh, sealed_NVM, fh_size) != fh_size) { + status = TPM_IOERROR; + goto abort_egress; + } + close(fh); + + vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Load_NVMing[%ld]: 0x", fh_size); + for (i=0; i< fh_size; i++) + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_NVM[i]); + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + + sealed_NVM_size = BSG_UnpackList(sealed_NVM, 2, + BSG_TPM_SIZE32_DATA, &symkey_cipher32, + BSG_TPM_SIZE32_DATA, &state_cipher32); + + TPMTRYRETURN( buffer_init_convert (&symkey_cipher, + symkey_cipher32.size, + symkey_cipher32.data) ); + + TPMTRYRETURN( buffer_init_convert (&state_cipher, + state_cipher32.size, + state_cipher32.data) ); + + Crypto_SHA1Full(sealed_NVM, sealed_NVM_size, (BYTE *) &sealedNVMHash); + + // Verify measurement of sealed blob. + if (memcmp(&sealedNVMHash, &myDMI->NVM_measurement, sizeof(TPM_DIGEST)) ) { + vtpmlogerror(VTPM_LOG_VTPM, "VTPM LoadNVM NVM measurement check failed.\n"); + vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Correct hash: "); + for (i=0; i< sizeof(TPM_DIGEST); i++) + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&myDMI->NVM_measurement)[i]); + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + + vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Measured hash: "); + for (i=0; i< sizeof(TPM_DIGEST); i++) + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&sealedNVMHash)[i]); + vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + + status = TPM_AUTHFAIL; + goto abort_egress; + } + + // Decrypt Symmetric Key + TPMTRYRETURN( VTSP_Unbind( myDMI->TCSContext, + vtpm_globals->storageKeyHandle, + &symkey_cipher, + (const TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth, + &symkey_clear, + &(vtpm_globals->keyAuth) ) ); + + // create symmetric key using saved bits + Crypto_symcrypto_initkey (&symkey, &symkey_clear); + + // Decrypt State + TPMTRY(TPM_DECRYPT_ERROR, Crypto_symcrypto_decrypt (&symkey, &state_cipher, outbuf) ); + + goto egress; + + abort_egress: + vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n."); + + egress: + + buffer_free ( &state_cipher); + buffer_free ( &symkey_clear); + buffer_free ( &symkey_cipher); + free( sealed_NVM ); + Crypto_symcrypto_freekey (&symkey); + + return status; +} + +TPM_RESULT VTPM_SaveService(void) { + TPM_RESULT status=TPM_SUCCESS; + int fh, dmis=-1; + + BYTE *flat_global; + int flat_global_size, bytes_written; + UINT32 storageKeySize = buffer_len(&vtpm_globals->storageKeyWrap); + struct pack_buf_t storage_key_pack = {storageKeySize, vtpm_globals->storageKeyWrap.bytes}; + + struct hashtable_itr *dmi_itr; + VTPM_DMI_RESOURCE *dmi_res; + + UINT32 flat_global_full_size; + + // Global Values needing to be saved + flat_global_full_size = 3*sizeof(TPM_DIGEST) + // Auths + sizeof(UINT32) + // storagekeysize + storageKeySize + // storage key + hashtable_count(vtpm_globals->dmi_map) * // num DMIS + (sizeof(UINT32) + 2*sizeof(TPM_DIGEST)); // Per DMI info + + + flat_global = (BYTE *) malloc( flat_global_full_size); + + flat_global_size = BSG_PackList(flat_global, 4, + BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth, + BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth, + BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth, + BSG_TPM_SIZE32_DATA, &storage_key_pack); + + // Per DMI values to be saved + if (hashtable_count(vtpm_globals->dmi_map) > 0) { + + dmi_itr = hashtable_iterator(vtpm_globals->dmi_map); + do { + dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr); + dmis++; + + // No need to save dmi0. + if (dmi_res->dmi_id == 0) + continue; + + + flat_global_size += BSG_PackList( flat_global + flat_global_size, 3, + BSG_TYPE_UINT32, &dmi_res->dmi_id, + BSG_TPM_DIGEST, &dmi_res->NVM_measurement, + BSG_TPM_DIGEST, &dmi_res->DMI_measurement); + + } while (hashtable_iterator_advance(dmi_itr)); + } + + //FIXME: Once we have a way to protect a TPM key, we should use it to + // encrypt this blob. BUT, unless there is a way to ensure the key is + // not used by other apps, this encryption is useless. + fh = open(STATE_FILE, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE); + if (fh == -1) { + vtpmlogerror(VTPM_LOG_VTPM, "Unable to open %s file for write.\n", STATE_FILE); + status = TPM_IOERROR; + goto abort_egress; + } + + if ( (bytes_written = write(fh, flat_global, flat_global_size)) != flat_global_size ) { + vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data. %d/%d bytes written.\n", bytes_written, flat_global_size); + status = TPM_IOERROR; + goto abort_egress; + } + vtpm_globals->DMI_table_dirty = FALSE; + + goto egress; + + abort_egress: + egress: + + free(flat_global); + close(fh); + + vtpmloginfo(VTPM_LOG_VTPM, "Saved VTPM Service state (status = %d, dmis = %d)\n", (int) status, dmis); + return status; +} + +TPM_RESULT VTPM_LoadService(void) { + + TPM_RESULT status=TPM_SUCCESS; + int fh, stat_ret, dmis=0; + long fh_size = 0, step_size; + BYTE *flat_global=NULL; + struct pack_buf_t storage_key_pack; + UINT32 *dmi_id_key; + + VTPM_DMI_RESOURCE *dmi_res; + struct stat file_stat; + + fh = open(STATE_FILE, O_RDONLY ); + stat_ret = fstat(fh, &file_stat); + if (stat_ret == 0) + fh_size = file_stat.st_size; + else { + status = TPM_IOERROR; + goto abort_egress; + } + + flat_global = (BYTE *) malloc(fh_size); + + if ((long) read(fh, flat_global, fh_size) != fh_size ) { + status = TPM_IOERROR; + goto abort_egress; + } + + // Global Values needing to be saved + step_size = BSG_UnpackList( flat_global, 4, + BSG_TPM_AUTHDATA, &vtpm_globals->owner_usage_auth, + BSG_TPM_AUTHDATA, &vtpm_globals->srk_usage_auth, + BSG_TPM_SECRET, &vtpm_globals->storage_key_usage_auth, + BSG_TPM_SIZE32_DATA, &storage_key_pack); + + TPMTRYRETURN(buffer_init(&vtpm_globals->storageKeyWrap, 0, 0) ); + TPMTRYRETURN(buffer_append_raw(&vtpm_globals->storageKeyWrap, storage_key_pack.size, storage_key_pack.data) ); + + // Per DMI values to be saved + while ( step_size < fh_size ){ + if (fh_size - step_size < (long) (sizeof(UINT32) + 2*sizeof(TPM_DIGEST))) { + vtpmlogerror(VTPM_LOG_VTPM, "Encountered %ld extra bytes at end of manager state.\n", fh_size-step_size); + step_size = fh_size; + } else { + dmi_res = (VTPM_DMI_RESOURCE *) malloc(sizeof(VTPM_DMI_RESOURCE)); + dmis++; + + dmi_res->connected = FALSE; + + step_size += BSG_UnpackList(flat_global + step_size, 3, + BSG_TYPE_UINT32, &dmi_res->dmi_id, + BSG_TPM_DIGEST, &dmi_res->NVM_measurement, + BSG_TPM_DIGEST, &dmi_res->DMI_measurement); + + // install into map + dmi_id_key = (UINT32 *) malloc (sizeof(UINT32)); + *dmi_id_key = dmi_res->dmi_id; + if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, dmi_res)) { + status = TPM_FAIL; + goto abort_egress; + } + + } + + } + + goto egress; + + abort_egress: + vtpmlogerror(VTPM_LOG_VTPM, "Failed to save service data\n"); + egress: + + if (flat_global) + free(flat_global); + close(fh); + + vtpmloginfo(VTPM_LOG_VTPM, "Previously saved state reloaded (status = %d, dmis = %d).\n", (int) status, dmis); + return status; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/tpmpassthrough.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/tpmpassthrough.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,110 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// tpmpassthrough.c +// +// Functions regarding passing DMI requests to HWTPM +// +// ================================================================== + +#include "tcg.h" +#include "vtpm_manager.h" +#include "vtpmpriv.h" +#include "vtsp.h" +#include "log.h" + +TPM_RESULT VTPM_Handle_TPM_Command( VTPM_DMI_RESOURCE *dmi, + buffer_t *inbuf, + buffer_t *outbuf) { + + TPM_RESULT status = TPM_SUCCESS; + TPM_COMMAND_CODE *ord; + + ord = (TPM_COMMAND_CODE *) (inbuf->bytes + sizeof(TPM_TAG) + sizeof(UINT32)); + + switch (*ord) { + + // Forbidden for DMI use + case TPM_ORD_TakeOwnership: + case TPM_ORD_ChangeAuthOwner: + case TPM_ORD_DirWriteAuth: + case TPM_ORD_DirRead: + case TPM_ORD_AuthorizeMigrationKey: + case TPM_ORD_CreateMaintenanceArchive: + case TPM_ORD_LoadMaintenanceArchive: + case TPM_ORD_KillMaintenanceFeature: + case TPM_ORD_LoadManuMaintPub: + case TPM_ORD_ReadManuMaintPub: + case TPM_ORD_SelfTestFull: + case TPM_ORD_SelfTestStartup: + case TPM_ORD_CertifySelfTest: + case TPM_ORD_ContinueSelfTest: + case TPM_ORD_GetTestResult: + case TPM_ORD_Reset: + case TPM_ORD_OwnerClear: + case TPM_ORD_DisableOwnerClear: + case TPM_ORD_ForceClear: + case TPM_ORD_DisableForceClear: + case TPM_ORD_GetCapabilityOwner: + case TPM_ORD_OwnerSetDisable: + case TPM_ORD_PhysicalEnable: + case TPM_ORD_PhysicalDisable: + case TPM_ORD_SetOwnerInstall: + case TPM_ORD_PhysicalSetDeactivated: + case TPM_ORD_SetTempDeactivated: + case TPM_ORD_CreateEndorsementKeyPair: + case TPM_ORD_GetAuditEvent: + case TPM_ORD_GetAuditEventSigned: + case TPM_ORD_GetOrdinalAuditStatus: + case TPM_ORD_SetOrdinalAuditStatus: + case TPM_ORD_SetRedirection: + case TPM_ORD_FieldUpgrade: + case TSC_ORD_PhysicalPresence: + status = TPM_DISABLED_CMD; + goto abort_egress; + break; + + } // End ORD Switch + + // Call TCS with command + + TPMTRY(TPM_IOERROR, VTSP_RawTransmit( dmi->TCSContext,inbuf, outbuf) ); + + goto egress; + + abort_egress: + vtpmloginfo(VTPM_LOG_VTPM, "TPM Command Failed in tpmpassthrough.\n"); + egress: + + return status; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/vtpm_manager.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/vtpm_manager.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,735 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// vtpm_manager.c +// +// This file will house the main logic of the VTPM Manager +// +// ================================================================== + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> + +#ifndef VTPM_MULTI_VM +#include <pthread.h> +#include <errno.h> +#include <aio.h> +#include <time.h> +#endif + +#include "vtpm_manager.h" +#include "vtpmpriv.h" +#include "vtsp.h" +#include "bsg.h" +#include "hashtable.h" +#include "hashtable_itr.h" + +#include "log.h" +#include "buffer.h" + +VTPM_GLOBALS *vtpm_globals=NULL; + +#ifdef VTPM_MULTI_VM + #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, fmt, ##args ); + #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, fmt, ##args ); + #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, fmt, ##args ); +#else + #define vtpmhandlerloginfo(module,fmt,args...) vtpmloginfo (module, "[%d]: " fmt, threadType, ##args ); + #define vtpmhandlerloginfomore(module,fmt,args...) vtpmloginfomore (module, fmt, ##args ); + #define vtpmhandlerlogerror(module,fmt,args...) vtpmlogerror (module, "[%d]: " fmt, threadType, ##args ); +#endif + +// --------------------------- Static Auths -------------------------- +#ifdef USE_FIXED_SRK_AUTH + +static BYTE FIXED_SRK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +static BYTE FIXED_EK_AUTH[20] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + +#endif + +// -------------------------- Hash table functions -------------------- + +static unsigned int hashfunc32(void *ky) { + return (* (UINT32 *) ky); +} + +static int equals32(void *k1, void *k2) { + return (*(UINT32 *) k1 == *(UINT32 *) k2); +} + +// --------------------------- Functions ------------------------------ + +TPM_RESULT VTPM_Create_Service(){ + + TPM_RESULT status = TPM_SUCCESS; + + // Generate Auth's for SRK & Owner +#ifdef USE_FIXED_SRK_AUTH + memcpy(vtpm_globals->owner_usage_auth, FIXED_SRK_AUTH, sizeof(TPM_AUTHDATA)); + memcpy(vtpm_globals->srk_usage_auth, FIXED_EK_AUTH, sizeof(TPM_AUTHDATA)); +#else + Crypto_GetRandom(vtpm_globals->owner_usage_auth, sizeof(TPM_AUTHDATA) ); + Crypto_GetRandom(vtpm_globals->srk_usage_auth, sizeof(TPM_AUTHDATA) ); +#endif + + // Take Owership of TPM + CRYPTO_INFO ek_cryptoInfo; + + vtpmloginfo(VTPM_LOG_VTPM, "Attempting Pubek Read. NOTE: Failure is ok.\n"); + status = VTSP_ReadPubek(vtpm_globals->manager_tcs_handle, &ek_cryptoInfo); + + // If we can read PubEK then there is no owner and we should take it. + if (status == TPM_SUCCESS) { + TPMTRYRETURN(VTSP_TakeOwnership(vtpm_globals->manager_tcs_handle, + (const TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, + (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth, + &ek_cryptoInfo, + &vtpm_globals->keyAuth)); + + TPMTRYRETURN(VTSP_DisablePubekRead(vtpm_globals->manager_tcs_handle, + (const TPM_AUTHDATA*)&vtpm_globals->owner_usage_auth, + &vtpm_globals->keyAuth)); + } + + // Generate storage key's auth + Crypto_GetRandom( &vtpm_globals->storage_key_usage_auth, + sizeof(TPM_AUTHDATA) ); + + TCS_AUTH osap; + TPM_AUTHDATA sharedsecret; + + TPMTRYRETURN( VTSP_OSAP(vtpm_globals->manager_tcs_handle, + TPM_ET_SRK, + 0, + (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth, + &sharedsecret, + &osap) ); + + TPMTRYRETURN( VTSP_CreateWrapKey( vtpm_globals->manager_tcs_handle, + TPM_KEY_BIND, + (const TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth, + TPM_SRK_KEYHANDLE, + (const TPM_AUTHDATA*)&sharedsecret, + &vtpm_globals->storageKeyWrap, + &osap) ); + + vtpm_globals->keyAuth.fContinueAuthSession = TRUE; + + goto egress; + + abort_egress: + exit(1); + + egress: + vtpmloginfo(VTPM_LOG_VTPM, "New VTPM Service initialized (Status = %d).\n", status); + return status; + +} + + +////////////////////////////////////////////////////////////////////////////// +#ifdef VTPM_MULTI_VM +int VTPM_Service_Handler(){ +#else +void *VTPM_Service_Handler(void *threadTypePtr){ +#endif + TPM_RESULT status = TPM_FAIL; // Should never return + UINT32 dmi, in_param_size, cmd_size, out_param_size, out_message_size, out_message_size_full, dmi_cmd_size; + BYTE *cmd_header, *in_param, *out_message, *dmi_cmd; + buffer_t *command_buf=NULL, *result_buf=NULL; + TPM_TAG tag; + TPM_COMMAND_CODE ord; + VTPM_DMI_RESOURCE *dmi_res; + int size_read, size_write, i; + +#ifndef VTPM_MULTI_VM + int threadType = *(int *) threadTypePtr; + + // async io structures + struct aiocb dmi_aio; + struct aiocb *dmi_aio_a[1]; + dmi_aio_a[0] = &dmi_aio; +#endif + +#ifdef DUMMY_BACKEND + int dummy_rx; +#endif + + // TODO: Reinsert ifdefs to enable support for MULTI-VM + + cmd_header = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV); + command_buf = (buffer_t *) malloc(sizeof(buffer_t)); + result_buf = (buffer_t *) malloc(sizeof(buffer_t)); + +#ifndef VTPM_MULTI_VM + TPM_RESULT *ret_value = (TPM_RESULT *) malloc(sizeof(TPM_RESULT)); +#endif + + int *tx_fh, *rx_fh; + +#ifdef VTPM_MULTI_VM + rx_fh = &vtpm_globals->be_fh; +#else + if (threadType == BE_LISTENER_THREAD) { +#ifdef DUMMY_BACKEND + dummy_rx = -1; + rx_fh = &dummy_rx; +#else + rx_fh = &vtpm_globals->be_fh; +#endif + } else { // DMI_LISTENER_THREAD + rx_fh = &vtpm_globals->vtpm_rx_fh; + } +#endif + +#ifndef VTPM_MULTI_VM + int fh; + if (threadType == BE_LISTENER_THREAD) { + tx_fh = &vtpm_globals->be_fh; + if ( (fh = open(GUEST_RX_FIFO, O_RDWR)) == -1) { + if ( mkfifo(GUEST_RX_FIFO, S_IWUSR | S_IRUSR ) ){ + *ret_value = TPM_FAIL; + pthread_exit(ret_value); + } + } else + close(fh); + + } else { // else DMI_LISTENER_THREAD + // tx_fh will be set once the DMI is identified + // But we need to make sure the read pip is created. + if ( (fh = open(VTPM_RX_FIFO, O_RDWR)) == -1) { + if ( mkfifo(VTPM_RX_FIFO, S_IWUSR | S_IRUSR ) ){ + *ret_value = TPM_FAIL; + pthread_exit(ret_value); + } + } else + close(fh); + + } +#endif + + while(1) { + + if (threadType == BE_LISTENER_THREAD) { + vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for Guest requests & ctrl messages.\n"); + } else + vtpmhandlerloginfo(VTPM_LOG_VTPM, "Waiting for DMI messages.\n"); + + + if (*rx_fh < 0) { + if (threadType == BE_LISTENER_THREAD) +#ifdef DUMMY_BACKEND + *rx_fh = open("/tmp/in.fifo", O_RDWR); +#else + *rx_fh = open(VTPM_BE_DEV, O_RDWR); +#endif + else // DMI Listener + *rx_fh = open(VTPM_RX_FIFO, O_RDWR); + + } + + if (*rx_fh < 0) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh.\n"); +#ifdef VTPM_MULTI_VM + return TPM_IOERROR; +#else + *ret_value = TPM_IOERROR; + pthread_exit(ret_value); +#endif + } + + size_read = read(*rx_fh, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV); + if (size_read > 0) { + vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV[%d}: 0x", size_read); + for (i=0; i<size_read; i++) + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cmd_header[i]); + } else { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't read from BE. Aborting... \n"); + close(*rx_fh); + *rx_fh = -1; + goto abort_command; + } + + if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) { + vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "\n"); + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command shorter than normal header (%d bytes). Aborting...\n", size_read); + goto abort_command; + } + + BSG_UnpackList(cmd_header, 4, + BSG_TYPE_UINT32, &dmi, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, &in_param_size, + BSG_TPM_COMMAND_CODE, &ord ); + + // Note that in_param_size is in the client's context + cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT; + if (cmd_size > 0) { + in_param = (BYTE *) malloc(cmd_size); + size_read = read( *rx_fh, in_param, cmd_size); + if (size_read > 0) { + for (i=0; i<size_read; i++) + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]); + + } else { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. Aborting... \n"); + close(*rx_fh); + *rx_fh = -1; + goto abort_command; + } + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + + if (size_read < (int) cmd_size) { + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) is shorter than header indicates(%d). Aborting...\n", size_read, cmd_size); + goto abort_command; + } + } else { + in_param = NULL; + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + } + + if ((threadType != BE_LISTENER_THREAD) && (dmi == 0)) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to access dom0 commands from DMI interface. Aborting...\n"); + goto abort_command; + } + + dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, &dmi); + if (dmi_res == NULL) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to non-existent DMI in domain: %d. Aborting...\n", dmi); + goto abort_command; + } + if (!dmi_res->connected) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempted access to disconnected DMI in domain: %d. Aborting...\n", dmi); + goto abort_command; + } + + if (threadType != BE_LISTENER_THREAD) + tx_fh = &dmi_res->vtpm_tx_fh; + // else we set this before the while loop since it doesn't change. + + if ( (buffer_init_convert(command_buf, cmd_size, in_param) != TPM_SUCCESS) || + (buffer_init(result_buf, 0, 0) != TPM_SUCCESS) ) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. Aborting...\n"); + goto abort_command; + } + + // Dispatch it as either control or user request. + if (tag == VTPM_TAG_REQ) { + if (dmi_res->dmi_id == VTPM_CTL_DM){ + switch (ord) { + case VTPM_ORD_OPEN: + status = VTPM_Handle_New_DMI(command_buf); + break; + + case VTPM_ORD_CLOSE: + status = VTPM_Handle_Close_DMI(command_buf); + break; + + case VTPM_ORD_DELETE: + status = VTPM_Handle_Delete_DMI(command_buf); + break; + default: + status = TPM_BAD_ORDINAL; + } // switch + } else { + + switch (ord) { + case VTPM_ORD_SAVENVM: + status= VTPM_Handle_Save_NVM(dmi_res, + command_buf, + result_buf); + break; + case VTPM_ORD_LOADNVM: + status= VTPM_Handle_Load_NVM(dmi_res, + command_buf, + result_buf); + break; + + case VTPM_ORD_TPMCOMMAND: + status= VTPM_Handle_TPM_Command(dmi_res, + command_buf, + result_buf); + break; + + default: + status = TPM_BAD_ORDINAL; + } // switch + } + } else { // This is not a VTPM Command at all + + if (threadType == BE_LISTENER_THREAD) { + if (dmi == 0) { + // This usually indicates a FE/BE driver. + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Illegal use of TPM command from dom0\n"); + status = TPM_FAIL; + } else { + vtpmhandlerloginfo(VTPM_LOG_VTPM, "Forwarding command to DMI.\n"); + + if (dmi_res->guest_tx_fh < 0) + dmi_res->guest_tx_fh = open(dmi_res->guest_tx_fname, O_WRONLY | O_NONBLOCK); + + if (dmi_res->guest_tx_fh < 0){ + vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound fh to dmi.\n"); + status = TPM_IOERROR; + goto abort_with_error; + } + + //Note: Send message + dmi_id + if (cmd_size) { + dmi_cmd = (BYTE *) malloc(VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size); + dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size; + memcpy(dmi_cmd, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV); + memcpy(dmi_cmd + VTPM_COMMAND_HEADER_SIZE_SRV, in_param, cmd_size); + size_write = write(dmi_res->guest_tx_fh, dmi_cmd, dmi_cmd_size); + + if (size_write > 0) { + vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT (DMI): 0x"); + for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV + cmd_size; i++) { + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", dmi_cmd[i]); + } + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + } else { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. Aborting... \n"); + close(dmi_res->guest_tx_fh); + dmi_res->guest_tx_fh = -1; + status = TPM_IOERROR; + goto abort_with_error; + } + free(dmi_cmd); + } else { + dmi_cmd_size = VTPM_COMMAND_HEADER_SIZE_SRV; + size_write = write(dmi_res->guest_tx_fh, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV ); + if (size_write > 0) { + for (i=0; i<VTPM_COMMAND_HEADER_SIZE_SRV; i++) + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cmd_header[i]); + + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + } else { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to DMI. Aborting... \n"); + close(dmi_res->guest_tx_fh); + dmi_res->guest_tx_fh = -1; + status = TPM_IOERROR; + goto abort_with_error; + } + } + + if (size_write != (int) dmi_cmd_size) + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Could not write entire command to DMI (%d/%d)\n", size_write, dmi_cmd_size); + buffer_free(command_buf); + + if (vtpm_globals->guest_rx_fh < 0) + vtpm_globals->guest_rx_fh = open(GUEST_RX_FIFO, O_RDONLY); + + if (vtpm_globals->guest_rx_fh < 0){ + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Can't open inbound fh to dmi.\n"); + status = TPM_IOERROR; + goto abort_with_error; + } + + size_read = read( vtpm_globals->guest_rx_fh, cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV); + if (size_read > 0) { + vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "RECV (DMI): 0x"); + for (i=0; i<size_read; i++) + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", cmd_header[i]); + + } else { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from DMI. Aborting... \n"); + close(vtpm_globals->guest_rx_fh); + vtpm_globals->guest_rx_fh = -1; + status = TPM_IOERROR; + goto abort_with_error; + } + + if (size_read < (int) VTPM_COMMAND_HEADER_SIZE_SRV) { + //vtpmdeepsublog("\n"); + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command from DMI shorter than normal header. Aborting...\n"); + status = TPM_IOERROR; + goto abort_with_error; + } + + BSG_UnpackList(cmd_header, 4, + BSG_TYPE_UINT32, &dmi, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, &in_param_size, + BSG_TPM_COMMAND_CODE, &status ); + + // Note that in_param_size is in the client's context + cmd_size = in_param_size - VTPM_COMMAND_HEADER_SIZE_CLT; + if (cmd_size > 0) { + in_param = (BYTE *) malloc(cmd_size); + size_read = read( vtpm_globals->guest_rx_fh, in_param, cmd_size); + if (size_read > 0) { + for (i=0; i<size_read; i++) + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[i]); + + } else { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error reading from BE. Aborting... \n"); + close(vtpm_globals->guest_rx_fh); + vtpm_globals->guest_rx_fh = -1; + status = TPM_IOERROR; + goto abort_with_error; + } + vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n"); + + if (size_read < (int)cmd_size) { + vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n"); + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Command read(%d) from DMI is shorter than header indicates(%d). Aborting...\n", size_read, cmd_size); + status = TPM_IOERROR; + goto abort_with_error; + } + } else { + in_param = NULL; + vtpmhandlerloginfomore(VTPM_LOG_VTPM, "\n"); + } + + if (buffer_init_convert(result_buf, cmd_size, in_param) != TPM_SUCCESS) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Failed to setup buffers. Aborting...\n"); + status = TPM_FAIL; + goto abort_with_error; + } + + vtpmhandlerloginfo(VTPM_LOG_VTPM, "Sending DMI's response to guest.\n"); + } // end else for if (dmi==0) + + } else { // This is a DMI lister thread. Thus this is from a DMI +#ifdef VTPM_MULTI_VM + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Attempt to use unsupported direct access to TPM.\n"); + vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "Bad Command. dmi:%d, tag:%d, size:%d, ord:%d, Params: ", dmi, tag, in_param_size, ord); + for (UINT32 q=0; q<cmd_size; q++) + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", in_param[q]); + + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + + status = TPM_FAIL; +#else + +#endif + } // end else for if BE Listener + } // end else for is VTPM Command + + // Send response to Backend + if (*tx_fh < 0) { + if (threadType == BE_LISTENER_THREAD) +#ifdef DUMMY_BACKEND + *tx_fh = open("/tmp/out.fifo", O_RDWR); +#else + *tx_fh = open(VTPM_BE_DEV, O_RDWR); +#endif + else // DMI Listener + *tx_fh = open(dmi_res->vtpm_tx_fname, O_WRONLY); + } + + if (*tx_fh < 0) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "VTPM ERROR: Can't open outbound fh.\n"); +#ifdef VTPM_MULTI_VM + return TPM_IOERROR; +#else + *ret_value = TPM_IOERROR; + pthread_exit(ret_value); +#endif + } + + abort_with_error: + // Prepend VTPM header with destination DM stamped + out_param_size = buffer_len(result_buf); + out_message_size = VTPM_COMMAND_HEADER_SIZE_CLT + out_param_size; + out_message_size_full = VTPM_COMMAND_HEADER_SIZE_SRV + out_param_size; + out_message = (BYTE *) malloc (out_message_size_full); + + BSG_PackList(out_message, 4, + BSG_TYPE_UINT32, (BYTE *) &dmi, + BSG_TPM_TAG, (BYTE *) &tag, + BSG_TYPE_UINT32, (BYTE *) &out_message_size, + BSG_TPM_RESULT, (BYTE *) &status); + + if (buffer_len(result_buf) > 0) + memcpy(out_message + VTPM_COMMAND_HEADER_SIZE_SRV, result_buf->bytes, out_param_size); + + + //Note: Send message + dmi_id + size_write = write(*tx_fh, out_message, out_message_size_full ); + if (size_write > 0) { + vtpmhandlerloginfo(VTPM_LOG_VTPM_DEEP, "SENT: 0x"); + for (i=0; i < out_message_size_full; i++) + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", out_message[i]); + + vtpmhandlerloginfomore(VTPM_LOG_VTPM_DEEP, "\n"); + } else { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Error writing to BE. Aborting... \n"); + close(*tx_fh); + *tx_fh = -1; + goto abort_command; + } + free(out_message); + + if (size_write < (int)out_message_size_full) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "Unable to write full command to BE (%d/%d)\n", size_write, out_message_size_full); + goto abort_command; + } + + abort_command: + //free buffers + bzero(cmd_header, VTPM_COMMAND_HEADER_SIZE_SRV); + //free(in_param); // This was converted to command_buf. No need to free + if (command_buf != result_buf) + buffer_free(result_buf); + + buffer_free(command_buf); + +#ifndef VTPM_MULTI_VM + if (threadType != BE_LISTENER_THREAD) { +#endif + if ( (vtpm_globals->DMI_table_dirty) && + (VTPM_SaveService() != TPM_SUCCESS) ) { + vtpmhandlerlogerror(VTPM_LOG_VTPM, "ERROR: Unable to save manager data.\n"); + } +#ifndef VTPM_MULTI_VM + } +#endif + + } // End while(1) + +} + + +/////////////////////////////////////////////////////////////////////////////// +TPM_RESULT VTPM_Init_Service() { + TPM_RESULT status = TPM_FAIL; + BYTE *randomsead; + UINT32 randomsize; + + if ((vtpm_globals = (VTPM_GLOBALS *) malloc(sizeof(VTPM_GLOBALS))) == NULL){ + status = TPM_FAIL; + goto abort_egress; + } + memset(vtpm_globals, 0, sizeof(VTPM_GLOBALS)); + vtpm_globals->be_fh = -1; + +#ifndef VTPM_MULTI_VM + vtpm_globals->vtpm_rx_fh = -1; + vtpm_globals->guest_rx_fh = -1; +#endif + if ((vtpm_globals->dmi_map = create_hashtable(10, hashfunc32, equals32)) == NULL){ + status = TPM_FAIL; + goto abort_egress; + } + + vtpm_globals->DMI_table_dirty = FALSE; + + // Create new TCS Object + vtpm_globals->manager_tcs_handle = 0; + + TPMTRYRETURN(TCS_create()); + + // Create TCS Context for service + TPMTRYRETURN( TCS_OpenContext(&vtpm_globals->manager_tcs_handle ) ); + + TPMTRYRETURN( TCSP_GetRandom(vtpm_globals->manager_tcs_handle, + &randomsize, + &randomsead)); + + Crypto_Init(randomsead, randomsize); + TPMTRYRETURN( TCS_FreeMemory (vtpm_globals->manager_tcs_handle, randomsead)); + + // Create OIAP session for service's authorized commands + TPMTRYRETURN( VTSP_OIAP( vtpm_globals->manager_tcs_handle, + &vtpm_globals->keyAuth) ); + vtpm_globals->keyAuth.fContinueAuthSession = TRUE; + + // If failed, create new Service. + if (VTPM_LoadService() != TPM_SUCCESS) + TPMTRYRETURN( VTPM_Create_Service() ); + + + //Load Storage Key + TPMTRYRETURN( VTSP_LoadKey( vtpm_globals->manager_tcs_handle, + TPM_SRK_KEYHANDLE, + &vtpm_globals->storageKeyWrap, + (const TPM_AUTHDATA*)&vtpm_globals->srk_usage_auth, + &vtpm_globals->storageKeyHandle, + &vtpm_globals->keyAuth, + &vtpm_globals->storageKey) ); + + // Create entry for Dom0 for control messages + TPMTRYRETURN( VTPM_Handle_New_DMI(NULL) ); + + // --------------------- Command handlers --------------------------- + + goto egress; + + abort_egress: + egress: + + return(status); +} + +void VTPM_Stop_Service() { + VTPM_DMI_RESOURCE *dmi_res; + struct hashtable_itr *dmi_itr; + + // Close all the TCS contexts. TCS should evict keys based on this + if (hashtable_count(vtpm_globals->dmi_map) > 0) { + dmi_itr = hashtable_iterator(vtpm_globals->dmi_map); + do { + dmi_res = (VTPM_DMI_RESOURCE *) hashtable_iterator_value(dmi_itr); + if (dmi_res->connected) + if (close_dmi( dmi_res ) != TPM_SUCCESS) + vtpmlogerror(VTPM_LOG_VTPM, "Failed to close dmi %d properly.\n", dmi_res->dmi_id); + + } while (hashtable_iterator_advance(dmi_itr)); + free (dmi_itr); + } + + + TCS_CloseContext(vtpm_globals->manager_tcs_handle); + + if ( (vtpm_globals->DMI_table_dirty) && + (VTPM_SaveService() != TPM_SUCCESS) ) + vtpmlogerror(VTPM_LOG_VTPM, "Unable to save manager data.\n"); + + hashtable_destroy(vtpm_globals->dmi_map, 1); + free(vtpm_globals); + + close(vtpm_globals->be_fh); + Crypto_Exit(); + + vtpmloginfo(VTPM_LOG_VTPM, "VTPM Manager stopped.\n"); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/vtpm_manager.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/vtpm_manager.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,137 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// vtpm_manager.h +// +// Public Interface header for VTPM Manager +// +// ================================================================== + +#ifndef __VTPM_MANAGER_H__ +#define __VTPM_MANAGER_H__ + +#include "tcg.h" + +#define VTPM_TAG_REQ 0x01c1 +#define VTPM_TAG_RSP 0x01c4 +#define COMMAND_BUFFER_SIZE 4096 + +// Header sizes. Note Header MAY include the DMI +#define VTPM_COMMAND_HEADER_SIZE_SRV ( sizeof(UINT32) + sizeof(TPM_TAG) + sizeof(UINT32) + sizeof(TPM_COMMAND_CODE)) +#define VTPM_COMMAND_HEADER_SIZE_CLT ( sizeof(TPM_TAG) + sizeof(UINT32) + sizeof(TPM_COMMAND_CODE)) + +// ********************** Public Functions ************************* +TPM_RESULT VTPM_Init_Service(); // Start VTPM Service +void VTPM_Stop_Service(); // Stop VTPM Service +#ifdef VTPM_MULTI_VM +int VTPM_Service_Handler(); +#else +void *VTPM_Service_Handler(void *threadTypePtr); +#endif + +//************************ Command Codes **************************** +#define VTPM_ORD_OPEN 1 // ULM Creates New DMI +#define VTPM_ORD_CLOSE 2 // ULM Closes a DMI +#define VTPM_ORD_DELETE 3 // ULM Permemently Deletes DMI +#define VTPM_ORD_SAVENVM 4 // DMI requests Secrets Unseal +#define VTPM_ORD_LOADNVM 5 // DMI requests Secrets Saved +#define VTPM_ORD_TPMCOMMAND 6 // DMI issues HW TPM Command + +//************************ Return Codes **************************** +#define VTPM_SUCCESS 0 +#define VTPM_FAIL 1 +#define VTPM_UNSUPPORTED 2 +#define VTPM_FORBIDDEN 3 +#define VTPM_RESTORE_CONTEXT_FAILED 4 +#define VTPM_INVALID_REQUEST 5 + +/******************* Command Parameter API ************************* + +VTPM Command Format + dmi: 4 bytes // Source of message. + // WARNING: This is prepended by the channel. + // Thus it is received by VTPM Manager, + // but not sent by DMI + tpm tag: 2 bytes + command size: 4 bytes // Size of command including header but not DMI + ord: 4 bytes // Command ordinal above + parameters: size - 10 bytes // Command Parameter + +VTPM Response Format + tpm tag: 2 bytes + response_size: 4 bytes + status: 4 bytes + parameters: size - 10 bytes + + +VTPM_Open: + Input Parameters: + Domain_type: 1 byte + domain_id: 4 bytes + instance_id: 4 bytes + Output Parameters: + None + +VTPM_Close + Input Parameters: + instance_id: 4 bytes + Output Parameters: + None + +VTPM_Delete + Input Parameters: + instance_id: 4 bytes + Output Parameters: + None + +VTPM_SaveNVM + Input Parameters: + data: n bytes (Header indicates size of data) + Output Parameters: + None + +VTPM_LoadNVM + Input Parameters: + None + Output Parameters: + data: n bytes (Header indicates size of data) + +VTPM_TPMCommand + Input Parameters: + TPM Command Byte Stream: n bytes + Output Parameters: + TPM Reponse Byte Stream: n bytes + +*********************************************************************/ + +#endif //_VTPM_MANAGER_H_ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/vtpmd.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/vtpmd.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,134 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// vtpmd.c +// +// Application +// +// =================================================================== + +#include <stdio.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> +#include "vtpm_manager.h" +#include "vtpmpriv.h" +#include "tcg.h" +#include "log.h" + +#ifndef VTPM_MULTI_VM + #include <pthread.h> +#endif + +void signal_handler(int reason) { +#ifndef VTPM_MULTI_VM + + if (pthread_equal(pthread_self(), vtpm_globals->master_pid)) { + if (reason >= 0) { // Reason is a signal + vtpmloginfo(VTPM_LOG_VTPM, "VTPM Manager shutting down for signal %d.\n", reason); + } else {// Reason is a TPM_RESULT * -1 + vtpmloginfo(VTPM_LOG_VTPM,"VTPM Manager shuting down for: %s\n", tpm_get_error_name(-1 * reason) ); + } + + return; + } else { + vtpmloginfo(VTPM_LOG_VTPM, "Child shutting down\n"); + pthread_exit(NULL); + } +#else + VTPM_Stop_Service(); + exit(-1); +#endif +} + +struct sigaction ctl_c_handler; + +int main(int argc, char **argv) { + + vtpmloginfo(VTPM_LOG_VTPM, "Starting VTPM.\n"); + + if (VTPM_Init_Service() != TPM_SUCCESS) { + vtpmlogerror(VTPM_LOG_VTPM, "Closing vtpmd due to error during startup.\n"); + return -1; + } + + ctl_c_handler.sa_handler = signal_handler; + sigemptyset(&ctl_c_handler.sa_mask); + ctl_c_handler.sa_flags = 0; + + if (sigaction(SIGINT, &ctl_c_handler, NULL) == -1) + vtpmlogerror(VTPM_LOG_VTPM, "Could not install SIGINT handler. Ctl+break will not stop service gently.\n"); + + // For easier debuggin with gdb + if (sigaction(SIGHUP, &ctl_c_handler, NULL) == -1) + vtpmlogerror(VTPM_LOG_VTPM, "Could not install SIGHUP handler. Ctl+break will not stop service gently.\n"); + +#ifdef VTPM_MULTI_VM + TPM_RESULT status = VTPM_Service_Handler(); + + if (status != TPM_SUCCESS) + vtpmlogerror(VTPM_LOG_VTPM, "VTPM Manager exited with status %s. It never should exit.\n", tpm_get_error_name(status)); + + return -1; +#else + sigset_t sig_mask; + + sigemptyset(&sig_mask); + sigaddset(&sig_mask, SIGPIPE); + sigprocmask(SIG_BLOCK, &sig_mask, NULL); + //pthread_mutex_init(&vtpm_globals->dmi_mutex, NULL); + pthread_t be_thread, dmi_thread; + int betype_be, dmitype_dmi; + + vtpm_globals->master_pid = pthread_self(); + + betype_be = BE_LISTENER_THREAD; + if (pthread_create(&be_thread, NULL, VTPM_Service_Handler, &betype_be) != 0) { + vtpmlogerror(VTPM_LOG_VTPM, "Failed to launch BE Thread.\n"); + exit(-1); + } + + dmitype_dmi = DMI_LISTENER_THREAD; + if (pthread_create(&dmi_thread, NULL, VTPM_Service_Handler, &dmitype_dmi) != 0) { + vtpmlogerror(VTPM_LOG_VTPM, "Failed to launch DMI Thread.\n"); + exit(-1); + } + + //Join the other threads until exit time. + pthread_join(be_thread, NULL); + pthread_join(dmi_thread, NULL); + + VTPM_Stop_Service(); + return 0; +#endif +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/vtpmpriv.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/vtpmpriv.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,151 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// vtpmpriv.h +// +// Structures and functions private to the manager +// +// ================================================================== + +#ifndef __VTPMPRIV_H__ +#define __VTPMPRIV_H__ + +#include "tcg.h" +#include "tcs.h" +#include "buffer.h" +#include "crypto.h" + +#define STATE_FILE "/var/vtpm/VTPM" +#define DMI_NVM_FILE "/var/vtpm/vtpm_dm_%d.data" +#define VTPM_BE_DEV "/dev/vtpm" +#define VTPM_CTL_DM 0 + +#ifndef VTPM_MUTLI_VM + #include <sys/types.h> + #define GUEST_TX_FIFO "/var/vtpm/fifos/guest-to-%d.fifo" + #define GUEST_RX_FIFO "/var/vtpm/fifos/guest-from-all.fifo" + + #define VTPM_TX_FIFO "/var/vtpm/fifos/vtpm-to-%d.fifo" + #define VTPM_RX_FIFO "/var/vtpm/fifos/vtpm-from-all.fifo" + + #define BE_LISTENER_THREAD 1 + #define DMI_LISTENER_THREAD 2 + + // Seconds until DMI timeout. Timeouts result in DMI being out + // of sync, which may require a reboot of DMI and guest to recover + // from. Don't set this to low. Also note that DMI may issue a TPM + // call so we should expect time to process at DMI + TPM processing. + #define DMI_TIMEOUT 90 +#endif + + +// ------------------------ Private Structures ----------------------- +typedef struct VTPM_DMI_RESOURCE_T { + // I/O info for Manager to talk to DMI's over FIFOs +#ifndef VTPM_MUTLI_VM + int guest_tx_fh; // open GUEST_TX_FIFO + int vtpm_tx_fh; // open VTPM_TX_FIFO + char *guest_tx_fname; // open GUEST_TX_FIFO + char *vtpm_tx_fname; // open VTPM_TX_FIFO + + pid_t dmi_pid; +#endif + // Non-persistent Information + bool connected; + UINT32 dmi_domain_id; + TCS_CONTEXT_HANDLE TCSContext; // TCS Handle + char *NVMLocation; // NULL term string indicating location + // of NVM. + // Persistent Information about DMI + UINT32 dmi_id; + TPM_DIGEST NVM_measurement; // Equal to the SHA1 of the blob + TPM_DIGEST DMI_measurement; // Correct measurement of the owning DMI +} VTPM_DMI_RESOURCE; + +typedef struct tdVTPM_GLOBALS { + // Non-persistent data + int be_fh; // File handle to ipc used to communicate with backend +#ifndef VTPM_MULTI_VM + int vtpm_rx_fh; + int guest_rx_fh; + + pid_t master_pid; +#endif + struct hashtable *dmi_map; // Table of all DMI's known indexed by persistent instance # +#ifndef VTPM_MULTI_VM + pthread_mutex_t dmi_map_mutex; // +#endif + TCS_CONTEXT_HANDLE manager_tcs_handle; // TCS Handle used by manager + TPM_HANDLE storageKeyHandle; // Key used by persistent store + CRYPTO_INFO storageKey; // For software encryption + TCS_AUTH keyAuth; // OIAP session for storageKey + BOOL DMI_table_dirty; // Indicates that a command + // has updated the DMI table + + + // Persistent Data + TPM_AUTHDATA owner_usage_auth; // OwnerAuth of real TPM + TPM_AUTHDATA srk_usage_auth; // SRK Auth of real TPM + buffer_t storageKeyWrap; // Wrapped copy of storageKey + + TPM_AUTHDATA storage_key_usage_auth; + +}VTPM_GLOBALS; + +//Global dmi map +extern VTPM_GLOBALS *vtpm_globals; + +// ********************** Command Handler Prototypes *********************** +TPM_RESULT VTPM_Handle_Load_NVM( VTPM_DMI_RESOURCE *myDMI, + const buffer_t *inbuf, + buffer_t *outbuf); + +TPM_RESULT VTPM_Handle_Save_NVM( VTPM_DMI_RESOURCE *myDMI, + const buffer_t *inbuf, + buffer_t *outbuf); + +TPM_RESULT VTPM_Handle_TPM_Command( VTPM_DMI_RESOURCE *dmi, + buffer_t *inbuf, + buffer_t *outbuf); + +TPM_RESULT VTPM_Handle_New_DMI(const buffer_t *param_buf); + +TPM_RESULT VTPM_Handle_Close_DMI(const buffer_t *param_buf); + +TPM_RESULT VTPM_Handle_Delete_DMI(const buffer_t *param_buf); + +TPM_RESULT VTPM_SaveService(void); +TPM_RESULT VTPM_LoadService(void); + +TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res); +#endif // __VTPMPRIV_H__ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/vtsp.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/vtsp.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,810 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// vtsp.c +// +// Higher level interface to TCS for use in service. +// +// ================================================================== + +#include <string.h> +#include "tcg.h" +#include "tcs.h" +#include "bsg.h" +#include "log.h" +#include "crypto.h" +#include "vtsp.h" +#include "buffer.h" + +#define RSA_KEY_SIZE 0x0800 + +/*********************************************************************************** + * GenerateAuth: Generate authorization info to be sent back to application + * + * Parameters: outParamDigestText The concatenation of output parameters to be SHA1ed + * outParamDigestTextSize Size of inParamDigestText + * HMACkey Key to be used for HMACing + * For OIAP use key.authUsage or PersistStore.ownerAuth + * For OSAP use shared secret + * pAuth Authorization information from the application + * + * Return: TPM_SUCCESS Authorization data created + * TPM_AUTHFAIL Invalid (NULL) HMACkey presented for OSAP + *************************************************************************************/ +TPM_RESULT GenerateAuth( /*[IN]*/ const BYTE *inParamDigestText, + /*[IN]*/ UINT32 inParamDigestTextSize, + /*[IN]*/ const TPM_SECRET *HMACkey, + /*[IN,OUT]*/ TCS_AUTH *auth) { + + if (inParamDigestText == NULL || auth == NULL) + return (TPM_AUTHFAIL); + else { + + //Generate new OddNonce + Crypto_GetRandom(auth->NonceOdd.nonce, sizeof(TPM_NONCE)); + + // Create SHA1 inParamDigest + TPM_DIGEST inParamDigest; + Crypto_SHA1Full(inParamDigestText, inParamDigestTextSize, (BYTE *) &inParamDigest); + + // Create HMAC text. (Concat inParamsDigest with inAuthSetupParams). + BYTE hmacText[sizeof(TPM_DIGEST) + (2 * sizeof(TPM_NONCE)) + sizeof(BOOL)]; + + BSG_PackList( hmacText, 4, + BSG_TPM_DIGEST, &inParamDigest, + BSG_TPM_NONCE, &(auth->NonceEven), + BSG_TPM_NONCE, &(auth->NonceOdd), + BSG_TYPE_BOOL, &(auth->fContinueAuthSession) ); + + Crypto_HMAC((BYTE *) hmacText, sizeof(hmacText), (BYTE *) HMACkey, sizeof(TPM_DIGEST), (BYTE *) &(auth->HMAC)); + + return(TPM_SUCCESS); + + } +} + +/*********************************************************************************** + * VerifyAuth: Verify the authdata for a command requiring authorization + * + * Parameters: inParamDigestText The concatenation of parameters to be SHA1ed + * inParamDigestTextSize Size of inParamDigestText + * authDataUsage AuthDataUsage for the Entity being used + * Key->authDataUsage or TPM_AUTH_OWNER + * HMACkey Key to be used for HMACing + * For OIAP use key.authUsage or PersistStore.ownerAuth + * For OSAP use NULL (It will be aquired from the Auth Session) + * If unknown (default), assume OIAP + * sessionAuth A TCS_AUTH info for the session + * pAuth Authorization information from the application + * hContext If specified, on failed Auth, VerifyAuth will + * generate a new OIAP session in place of themselves + * destroyed session. + * + * Return: TPM_SUCCESS Authorization Verified + * TPM_AUTHFAIL Authorization Failed + * TPM_FAIL Failure during SHA1 routines + *************************************************************************************/ +TPM_RESULT VerifyAuth( /*[IN]*/ const BYTE *outParamDigestText, + /*[IN]*/ UINT32 outParamDigestTextSize, + /*[IN]*/ const TPM_SECRET *HMACkey, + /*[IN,OUT]*/ TCS_AUTH *auth, + /*[IN]*/ TCS_CONTEXT_HANDLE hContext) { + if (outParamDigestText == NULL || auth == NULL) + return (TPM_AUTHFAIL); + + + // Create SHA1 inParamDigest + TPM_DIGEST outParamDigest; + Crypto_SHA1Full(outParamDigestText, outParamDigestTextSize, (BYTE *) &outParamDigest); + + // Create HMAC text. (Concat inParamsDigest with inAuthSetupParams). + TPM_DIGEST hm; + BYTE hmacText[sizeof(TPM_DIGEST) + (2 * sizeof(TPM_NONCE)) + sizeof(BOOL)]; + + BSG_PackList( hmacText, 4, + BSG_TPM_DIGEST, &outParamDigest, + BSG_TPM_NONCE, &(auth->NonceEven), + BSG_TPM_NONCE, &(auth->NonceOdd), + BSG_TYPE_BOOL, &(auth->fContinueAuthSession) ); + + Crypto_HMAC((BYTE *) hmacText, sizeof(hmacText), + (BYTE *) HMACkey, sizeof(TPM_DIGEST), (BYTE *) &hm); + + // Compare correct HMAC with provided one. + if (memcmp (&hm, &(auth->HMAC), sizeof(TPM_DIGEST)) == 0) // 0 indicates equality + return (TPM_SUCCESS); + else { + VTSP_OIAP( hContext, auth); + return (TPM_AUTHFAIL); + } +} + +TPM_RESULT VTSP_OIAP(const TCS_CONTEXT_HANDLE hContext, + TCS_AUTH *auth) { + + vtpmloginfo(VTPM_LOG_VTSP, "OIAP.\n"); + TPM_RESULT status = TPM_SUCCESS; + TPMTRYRETURN( TCSP_OIAP(hContext, + &auth->AuthHandle, + &auth->NonceEven) ); + goto egress; + + abort_egress: + + egress: + + return status; +} + +TPM_RESULT VTSP_OSAP(const TCS_CONTEXT_HANDLE hContext, + const TPM_ENTITY_TYPE entityType, + const UINT32 entityValue, + const TPM_AUTHDATA *usageAuth, + TPM_SECRET *sharedSecret, + TCS_AUTH *auth) { + + vtpmloginfo(VTPM_LOG_VTSP, "OSAP.\n"); + TPM_RESULT status = TPM_SUCCESS; + TPM_NONCE nonceEvenOSAP, nonceOddOSAP; + + Crypto_GetRandom((BYTE *) &nonceOddOSAP, sizeof(TPM_NONCE) ); + + TPMTRYRETURN( TCSP_OSAP( hContext, + TPM_ET_SRK, + 0, + nonceOddOSAP, + &auth->AuthHandle, + &auth->NonceEven, + &nonceEvenOSAP) ); + + // Calculating Session Secret + BYTE sharedSecretText[TPM_DIGEST_SIZE * 2]; + + BSG_PackList( sharedSecretText, 2, + BSG_TPM_NONCE, &nonceEvenOSAP, + BSG_TPM_NONCE, &nonceOddOSAP); + + Crypto_HMAC(sharedSecretText, sizeof(sharedSecretText), (BYTE *) usageAuth, TPM_DIGEST_SIZE, (BYTE *) sharedSecret); + + goto egress; + + abort_egress: + + egress: + + return status; +} + + + +TPM_RESULT VTSP_ReadPubek( const TCS_CONTEXT_HANDLE hContext, + CRYPTO_INFO *crypto_info) { + + TPM_RESULT status; + TPM_NONCE antiReplay; + TPM_DIGEST checksum; + BYTE *pubEKtext; + UINT32 pubEKtextsize; + + vtpmloginfo(VTPM_LOG_VTSP, "Reading Public EK.\n"); + + // GenerateAuth new nonceOdd + Crypto_GetRandom(&antiReplay, sizeof(TPM_NONCE) ); + + + TPMTRYRETURN( TCSP_ReadPubek( hContext, + antiReplay, + &pubEKtextsize, + &pubEKtext, + &checksum) ); + + + // Extract the remaining output parameters + TPM_PUBKEY pubEK; + + BSG_Unpack(BSG_TPM_PUBKEY, pubEKtext, (BYTE *) &pubEK); + + // Build CryptoInfo for the bindingKey + TPM_RSA_KEY_PARMS rsaKeyParms; + + BSG_Unpack(BSG_TPM_RSA_KEY_PARMS, + pubEK.algorithmParms.parms, + &rsaKeyParms); + + Crypto_RSABuildCryptoInfoPublic(rsaKeyParms.exponentSize, + rsaKeyParms.exponent, + pubEK.pubKey.keyLength, + pubEK.pubKey.key, + crypto_info); + + // Destroy rsaKeyParms + BSG_Destroy(BSG_TPM_RSA_KEY_PARMS, &rsaKeyParms); + + // Set encryption scheme + crypto_info->encScheme = CRYPTO_ES_RSAESOAEP_SHA1_MGF1; + //crypto_info->encScheme = pubEK.algorithmParms.encScheme; + crypto_info->algorithmID = pubEK.algorithmParms.algorithmID; + + goto egress; + + abort_egress: + + egress: + + return status; +} + +TPM_RESULT VTSP_TakeOwnership( const TCS_CONTEXT_HANDLE hContext, + const TPM_AUTHDATA *ownerAuth, + const TPM_AUTHDATA *srkAuth, + CRYPTO_INFO *ek_cryptoInfo, + TCS_AUTH *auth) { + + vtpmloginfo(VTPM_LOG_VTSP, "Taking Ownership of TPM.\n"); + + TPM_RESULT status = TPM_SUCCESS; + TPM_COMMAND_CODE command = TPM_ORD_TakeOwnership; + TPM_PROTOCOL_ID proto_id = TPM_PID_OWNER; + BYTE *new_srk; + + BYTE *paramText; // Digest to make Auth. + UINT32 paramTextSize; + + // vars for srkpubkey parameter + TPM_KEY srkPub; + TPM_KEY_PARMS srkKeyInfo = {TPM_ALG_RSA, TPM_ES_RSAESOAEP_SHA1_MGF1, TPM_SS_NONE, 12, 0}; + BYTE srkRSAkeyInfo[12] = { 0x00, 0x00, (RSA_KEY_SIZE >> 8), 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00}; + srkKeyInfo.parms = (BYTE *) &srkRSAkeyInfo; + + struct pack_buf_t srkText; + + // GenerateAuth new nonceOdd + Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) ); + + //These values are accurate for an enc(AuthData). + struct pack_buf_t encOwnerAuth, encSrkAuth; + + encOwnerAuth.data = (BYTE *)malloc(sizeof(BYTE) * 256); + encSrkAuth.data = (BYTE *)malloc(sizeof(BYTE) * 256); + + if (encOwnerAuth.data == NULL || encSrkAuth.data == NULL) { + vtpmloginfo(VTPM_LOG_VTSP, "Could not malloc encrypted auths.\n"); + status = TPM_RESOURCES; + goto abort_egress; + } + + Crypto_RSAEnc(ek_cryptoInfo, sizeof(TPM_SECRET), (BYTE *) ownerAuth, &encOwnerAuth.size, encOwnerAuth.data); + Crypto_RSAEnc(ek_cryptoInfo, sizeof(TPM_SECRET), (BYTE *) srkAuth, &encSrkAuth.size, encSrkAuth.data); + + + // Build srk public key struct + srkPub.ver = TPM_STRUCT_VER_1_1; + srkPub.keyUsage = TPM_KEY_STORAGE; + srkPub.keyFlags = 0x00; + srkPub.authDataUsage = TPM_AUTH_ALWAYS; + memcpy(&srkPub.algorithmParms, &srkKeyInfo, sizeof(TPM_KEY_PARMS)); + srkPub.PCRInfoSize = 0; + srkPub.PCRInfo = 0; + srkPub.pubKey.keyLength= 0; + srkPub.encDataSize = 0; + + srkText.data = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + srkText.size = BSG_Pack(BSG_TPM_KEY, (BYTE *) &srkPub, srkText.data); + + paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + + paramTextSize = BSG_PackList(paramText, 5, + BSG_TPM_COMMAND_CODE,&command, + BSG_TPM_PROTOCOL_ID, &proto_id, + BSG_TPM_SIZE32_DATA, &encOwnerAuth, + BSG_TPM_SIZE32_DATA, &encSrkAuth, + BSG_TPM_KEY, &srkPub); + + TPMTRYRETURN( GenerateAuth( paramText, paramTextSize, ownerAuth, auth) ); + + new_srk = srkText.data; + TPMTRYRETURN( TCSP_TakeOwnership ( hContext, + proto_id, + encOwnerAuth.size, + encOwnerAuth.data, + encSrkAuth.size, + encSrkAuth.data, + &srkText.size, + &new_srk, + auth ) ); + + + paramTextSize = BSG_PackList(paramText, 2, + BSG_TPM_RESULT, &status, + BSG_TPM_COMMAND_CODE, &command); + memcpy(paramText + paramTextSize, new_srk, srkText.size); + paramTextSize += srkText.size; + + + TPMTRYRETURN( VerifyAuth( paramText, paramTextSize, + ownerAuth, auth, + hContext) ); + + goto egress; + + abort_egress: + + egress: + + free(srkText.data); + free(encSrkAuth.data); + free(encOwnerAuth.data); + free(paramText); + + TCS_FreeMemory(hContext, new_srk); + + return status; +} + +TPM_RESULT VTSP_DisablePubekRead( const TCS_CONTEXT_HANDLE hContext, + const TPM_AUTHDATA *ownerAuth, + TCS_AUTH *auth) { + + vtpmloginfo(VTPM_LOG_VTSP, "Disabling Pubek Read.\n"); + + TPM_RESULT status = TPM_SUCCESS; + TPM_COMMAND_CODE command = TPM_ORD_DisablePubekRead; + + BYTE *paramText; // Digest to make Auth. + UINT32 paramTextSize; + + // Generate HMAC + Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) ); + + paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + + paramTextSize = BSG_PackList(paramText, 1, + BSG_TPM_COMMAND_CODE, &command); + + TPMTRYRETURN( GenerateAuth( paramText, paramTextSize, + ownerAuth, auth) ); + + // Call TCS + TPMTRYRETURN( TCSP_DisablePubekRead ( hContext, // in + auth) ); + + // Verify Auth + paramTextSize = BSG_PackList(paramText, 2, + BSG_TPM_RESULT, &status, + BSG_TPM_COMMAND_CODE, &command); + + TPMTRYRETURN( VerifyAuth( paramText, paramTextSize, + ownerAuth, auth, + hContext) ); + goto egress; + + abort_egress: + egress: + free(paramText); + return status; +} + +TPM_RESULT VTSP_CreateWrapKey( const TCS_CONTEXT_HANDLE hContext, + const TPM_KEY_USAGE usage, + const TPM_AUTHDATA *newKeyAuth, + const TCS_KEY_HANDLE parentHandle, + const TPM_AUTHDATA *osapSharedSecret, + buffer_t *pubKeyBuf, + TCS_AUTH *auth) { + + int i; + TPM_RESULT status = TPM_SUCCESS; + TPM_COMMAND_CODE command = TPM_ORD_CreateWrapKey; + + vtpmloginfo(VTPM_LOG_VTSP, "Creating new key of type %d.\n", usage); + + // vars for Calculate encUsageAuth + BYTE *paramText; + UINT32 paramTextSize; + + // vars for Calculate encUsageAuth + BYTE XORbuffer[sizeof(TPM_SECRET) + sizeof(TPM_NONCE)]; + TPM_DIGEST XORKey1; + UINT32 XORbufferSize; + TPM_SECRET encUsageAuth, encMigrationAuth; + + // vars for Flatten newKey prototype + BYTE *flatKey = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + UINT32 flatKeySize = TCPA_MAX_BUFFER_LENGTH; + struct pack_buf_t newKeyText; + + // Fill in newKey + TPM_KEY newKey; + + BYTE RSAkeyInfo[12] = { 0x00, 0x00, (RSA_KEY_SIZE >> 8), 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00}; + newKey.algorithmParms.algorithmID = TPM_ALG_RSA; + newKey.algorithmParms.parms = (BYTE *) &RSAkeyInfo; + newKey.algorithmParms.parmSize = 12; + + switch (usage) { + case TPM_KEY_SIGNING: + vtpmloginfo(VTPM_LOG_VTSP, "Creating Signing Key...\n"); + newKey.keyUsage = TPM_KEY_SIGNING; + newKey.algorithmParms.encScheme = TPM_ES_NONE; + newKey.algorithmParms.sigScheme = TPM_SS_RSASSAPKCS1v15_SHA1; + break; + case TPM_KEY_STORAGE: + vtpmloginfo(VTPM_LOG_VTSP, "Creating Storage Key...\n"); + newKey.keyUsage = TPM_KEY_STORAGE; + newKey.algorithmParms.encScheme = TPM_ES_RSAESOAEP_SHA1_MGF1; + newKey.algorithmParms.sigScheme = TPM_SS_NONE; + break; + case TPM_KEY_BIND: + vtpmloginfo(VTPM_LOG_VTSP, "Creating Binding Key...\n"); + newKey.keyUsage = TPM_KEY_BIND; + newKey.algorithmParms.encScheme = TPM_ES_RSAESOAEP_SHA1_MGF1; + newKey.algorithmParms.sigScheme = TPM_SS_NONE; + break; + default: + vtpmloginfo(VTPM_LOG_VTSP, "Cannot create key. Invalid Key Type.\n"); + status = TPM_BAD_PARAMETER; + goto abort_egress; + } + + + newKey.ver = TPM_STRUCT_VER_1_1; + + newKey.keyFlags = 0; + newKey.authDataUsage = TPM_AUTH_ALWAYS; + newKey.pubKey.keyLength= 0; + newKey.encDataSize = 0; + newKey.encData = NULL; + + // FIXME: Support PCR bindings + newKey.PCRInfoSize = 0; + newKey.PCRInfo = NULL; + + // Calculate encUsageAuth + XORbufferSize = BSG_PackList( XORbuffer, 2, + BSG_TPM_SECRET, osapSharedSecret, + BSG_TPM_NONCE, &auth->NonceEven); + Crypto_SHA1Full(XORbuffer, XORbufferSize, (BYTE *) &XORKey1); + + // FIXME: No support for migratable keys. + for (i=0; i < TPM_DIGEST_SIZE; i++) + ((BYTE *) &encUsageAuth)[i] = ((BYTE *) &XORKey1)[i] ^ ((BYTE *) newKeyAuth)[i]; + + // Flatten newKey prototype + flatKeySize = BSG_Pack(BSG_TPM_KEY, (BYTE *) &newKey, flatKey); + newKeyText.data = flatKey; + newKeyText.size = flatKeySize; + + // GenerateAuth new nonceOdd + Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) ); + + // Generate HMAC + paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + + paramTextSize = BSG_PackList(paramText, 3, + BSG_TPM_COMMAND_CODE, &command, + BSG_TPM_AUTHDATA, &encUsageAuth, + BSG_TPM_AUTHDATA, &encMigrationAuth); + memcpy(paramText + paramTextSize, newKeyText.data, newKeyText.size); + paramTextSize += newKeyText.size; + + + TPMTRYRETURN( GenerateAuth( paramText, paramTextSize, + osapSharedSecret, auth) ); + + // Call TCS + TPMTRYRETURN( TCSP_CreateWrapKey( hContext, + parentHandle, + encUsageAuth, + encMigrationAuth, + &newKeyText.size, + &newKeyText.data, + auth) ); + + // Verify Auth + paramTextSize = BSG_PackList(paramText, 2, + BSG_TPM_RESULT, &status, + BSG_TPM_COMMAND_CODE, &command); + memcpy(paramText + paramTextSize, newKeyText.data, newKeyText.size); + paramTextSize += newKeyText.size; + + TPMTRYRETURN( VerifyAuth( paramText, paramTextSize, + osapSharedSecret, auth, 0) ); + + // Unpack/return key structure + TPMTRYRETURN(buffer_init(pubKeyBuf, 0, 0) ); + TPMTRYRETURN(buffer_append_raw(pubKeyBuf, newKeyText.size, newKeyText.data) ); + + goto egress; + + abort_egress: + + egress: + + free(flatKey); + free(paramText); + TCS_FreeMemory(hContext, newKeyText.data); + + return status; +} + +TPM_RESULT VTSP_LoadKey(const TCS_CONTEXT_HANDLE hContext, + const TCS_KEY_HANDLE hUnwrappingKey, + const buffer_t *rgbWrappedKeyBlob, + const TPM_AUTHDATA *parentAuth, + TPM_HANDLE *newKeyHandle, + TCS_AUTH *auth, + CRYPTO_INFO *cryptoinfo /*= NULL*/) { + + + vtpmloginfo(VTPM_LOG_VTSP, "Loading Key.\n%s",""); + + TPM_RESULT status = TPM_SUCCESS; + TPM_COMMAND_CODE command = TPM_ORD_LoadKey; + + BYTE *paramText; // Digest to make Auth. + UINT32 paramTextSize; + + if ((rgbWrappedKeyBlob == NULL) || (parentAuth == NULL) || + (newKeyHandle==NULL) || (auth==NULL)) { + status = TPM_BAD_PARAMETER; + goto abort_egress; + } + + // Generate Extra TCS Parameters + TPM_HANDLE phKeyHMAC; + + // Generate HMAC + Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) ); + + paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + + paramTextSize = BSG_PackList(paramText, 1, + BSG_TPM_COMMAND_CODE, &command); + + memcpy(paramText + paramTextSize, rgbWrappedKeyBlob->bytes, buffer_len(rgbWrappedKeyBlob)); + paramTextSize += buffer_len(rgbWrappedKeyBlob); + + TPMTRYRETURN( GenerateAuth( paramText, paramTextSize, + parentAuth, auth) ); + + // Call TCS + TPMTRYRETURN( TCSP_LoadKeyByBlob( hContext, + hUnwrappingKey, + buffer_len(rgbWrappedKeyBlob), + rgbWrappedKeyBlob->bytes, + auth, + newKeyHandle, + &phKeyHMAC) ); + + // Verify Auth + paramTextSize = BSG_PackList(paramText, 3, + BSG_TPM_RESULT, &status, + BSG_TPM_COMMAND_CODE, &command, + BSG_TPM_HANDLE, newKeyHandle); + + TPMTRYRETURN( VerifyAuth( paramText, paramTextSize, + parentAuth, auth, + hContext) ); + + // Unpack/return key structure + if (cryptoinfo != NULL) { + TPM_KEY newKey; + + BSG_Unpack(BSG_TPM_KEY, rgbWrappedKeyBlob->bytes , &newKey); + TPM_RSA_KEY_PARMS rsaKeyParms; + + BSG_Unpack(BSG_TPM_RSA_KEY_PARMS, + newKey.algorithmParms.parms, + &rsaKeyParms); + + Crypto_RSABuildCryptoInfoPublic(rsaKeyParms.exponentSize, + rsaKeyParms.exponent, + newKey.pubKey.keyLength, + newKey.pubKey.key, + cryptoinfo); + + // Destroy rsaKeyParms + BSG_Destroy(BSG_TPM_RSA_KEY_PARMS, &rsaKeyParms); + + // Set encryption scheme + cryptoinfo->encScheme = CRYPTO_ES_RSAESOAEP_SHA1_MGF1; + } + + goto egress; + + abort_egress: + + egress: + + free(paramText); + return status; +} + +TPM_RESULT VTSP_Unbind( const TCS_CONTEXT_HANDLE hContext, + const TPM_KEY_HANDLE key_handle, + const buffer_t *bound_data, + const TPM_AUTHDATA *usage_auth, + buffer_t *clear_data, + TCS_AUTH *auth) { + + vtpmloginfo(VTPM_LOG_VTSP, "Unbinding %d bytes of data.\n", buffer_len(bound_data)); + + TPM_RESULT status = TPM_SUCCESS; + TPM_COMMAND_CODE command = TPM_ORD_UnBind; + + BYTE *paramText; // Digest to make Auth. + UINT32 paramTextSize; + + // Generate Extra TCS Parameters + struct pack_buf_t clear_data32; + BYTE *clear_data_text; + UINT32 clear_data_size; + + // Generate HMAC + Crypto_GetRandom(&auth->NonceOdd, sizeof(TPM_NONCE) ); + + struct pack_buf_t bound_data32 = {bound_data->size, bound_data->bytes}; + + paramText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + + paramTextSize = BSG_PackList(paramText, 2, + BSG_TPM_COMMAND_CODE, &command, + BSG_TPM_SIZE32_DATA, &bound_data32); + + TPMTRYRETURN( GenerateAuth( paramText, paramTextSize, + usage_auth, auth) ); + + // Call TCS + TPMTRYRETURN( TCSP_UnBind( hContext, + key_handle, + buffer_len(bound_data), + bound_data->bytes, + auth, + &clear_data_size, + &clear_data_text) ); + + + // Verify Auth + clear_data32.size = clear_data_size; + clear_data32.data = clear_data_text; + paramTextSize = BSG_PackList(paramText, 3, + BSG_TPM_RESULT, &status, + BSG_TPM_COMMAND_CODE, &command, + BSG_TPM_SIZE32_DATA, &clear_data32); + + TPMTRYRETURN( VerifyAuth( paramText, paramTextSize, + usage_auth, auth, + hContext) ); + + // Unpack/return key structure + TPMTRYRETURN(buffer_init(clear_data, 0, 0)); + TPMTRYRETURN(buffer_append_raw (clear_data, clear_data_size, clear_data_text) ); + + goto egress; + + abort_egress: + + egress: + + free(paramText); + TCS_FreeMemory(hContext, clear_data_text); + + return status; +} + +TPM_RESULT VTSP_Bind( CRYPTO_INFO *cryptoInfo, + const buffer_t *inData, + buffer_t *outData) +{ + vtpmloginfo(VTPM_LOG_VTSP, "Binding %d bytes of data.\n", buffer_len(inData)); + TPM_BOUND_DATA boundData; + UINT32 i; + + // Fill boundData's accessory information + boundData.ver = TPM_STRUCT_VER_1_1; + boundData.payload = TPM_PT_BIND; + boundData.payloadData = inData->bytes; + + // Pack boundData before encryption + BYTE* flatBoundData = (BYTE *)malloc(sizeof(BYTE) * + (sizeof(TPM_VERSION) + + sizeof(TPM_PAYLOAD_TYPE) + + buffer_len(inData))); + if (flatBoundData == NULL) { + return TPM_NOSPACE; + } + UINT32 flatBoundDataSize = 0; + flatBoundDataSize = BSG_PackList( flatBoundData, 2, + BSG_TPM_VERSION, &boundData.ver, + BSG_TYPE_BYTE, &boundData.payload); + + memcpy(flatBoundData+flatBoundDataSize, inData->bytes, buffer_len(inData)); + flatBoundDataSize += buffer_len(inData); + + BYTE out_tmp[RSA_KEY_SIZE/8]; // RSAEnc does not do blocking, So this is what will come out. + UINT32 out_tmp_size; + + // Encrypt flatBoundData + Crypto_RSAEnc( cryptoInfo, + flatBoundDataSize, + flatBoundData, + &out_tmp_size, + out_tmp); + + if (out_tmp_size > RSA_KEY_SIZE/8) { + // The result of RSAEnc should be a fixed size based on key size. + vtpmlogerror(VTPM_LOG_VTSP, "Enc buffer just overflowed.\n"); + } + + buffer_init(outData, 0, NULL); + buffer_append_raw(outData, out_tmp_size, out_tmp); + + vtpmloginfo(VTPM_LOG_TXDATA, "Bind Generated[%d] = 0x", out_tmp_size); + for(i = 0 ; i < out_tmp_size ; i++) { + vtpmloginfomore(VTPM_LOG_TXDATA, "%2.2x ", out_tmp[i]); + } + vtpmloginfomore(VTPM_LOG_TXDATA, "\n"); + + // Free flatBoundData + free(flatBoundData); + + return TPM_SUCCESS; +} + +// Function Reaches into unsupported TCS command, beware. +TPM_RESULT VTSP_RawTransmit(const TCS_CONTEXT_HANDLE hContext, + const buffer_t *inbuf, + buffer_t *outbuf ) { + + vtpmloginfo(VTPM_LOG_VTSP, "Passthrough in use.\n"); + TPM_RESULT status = TPM_SUCCESS; + + // Generate Extra TCS Parameters + BYTE *resultText = (BYTE *) malloc(sizeof(BYTE) * TCPA_MAX_BUFFER_LENGTH); + UINT32 resultTextSize = TCPA_MAX_BUFFER_LENGTH; + + // Call TCS + TPMTRYRETURN( TCSP_RawTransmitData(buffer_len(inbuf), inbuf->bytes, + &resultTextSize, resultText) ); + + // Unpack/return key structure + TPMTRYRETURN(buffer_init (outbuf, resultTextSize, resultText) ); + goto egress; + + abort_egress: + + egress: + TCS_FreeMemory(hContext, resultText); + free(resultText); + return status; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/manager/vtsp.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/manager/vtsp.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,102 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// vtsp.h +// +// Higher level interface to TCS. +// +// ================================================================== + +#ifndef __VTSP_H__ +#define __VTSP_H__ + +#include "tcg.h" +#include "tcs.h" + +#define KEY_BUFFER_SIZE 2048 + +TPM_RESULT VTSP_RawTransmit(const TCS_CONTEXT_HANDLE hContext, + const buffer_t *inbuf, + buffer_t *outbuf ); + +TPM_RESULT VTSP_OIAP( const TCS_CONTEXT_HANDLE hContext, + TCS_AUTH *auth); + +TPM_RESULT VTSP_OSAP( const TCS_CONTEXT_HANDLE hContext, + const TPM_ENTITY_TYPE entityType, + const UINT32 entityValue, + const TPM_AUTHDATA *usageAuth, + TPM_SECRET *sharedsecret, + TCS_AUTH *auth); + +TPM_RESULT VTSP_ReadPubek( const TCS_CONTEXT_HANDLE hContext, + CRYPTO_INFO *cypto_info); + +TPM_RESULT VTSP_TakeOwnership( const TCS_CONTEXT_HANDLE hContext, + const TPM_AUTHDATA *ownerAuth, + const TPM_AUTHDATA *srkAuth, + CRYPTO_INFO *ek_cryptoInfo, + TCS_AUTH *auth); + +TPM_RESULT VTSP_DisablePubekRead( const TCS_CONTEXT_HANDLE hContext, + const TPM_AUTHDATA *ownerAuth, + TCS_AUTH *auth); + +TPM_RESULT VTSP_CreateWrapKey( const TCS_CONTEXT_HANDLE hContext, + const TPM_KEY_USAGE usage, + const TPM_AUTHDATA *newKeyAuth, + const TCS_KEY_HANDLE parentHandle, + const TPM_AUTHDATA *osapSharedSecret, + buffer_t *pubKeyBuf, + TCS_AUTH *auth); + +TPM_RESULT VTSP_LoadKey(const TCS_CONTEXT_HANDLE hContext, + const TCS_KEY_HANDLE hUnwrappingKey, + const buffer_t *rgbWrappedKeyBlob, + const TPM_AUTHDATA *parentAuth, + TPM_HANDLE *newKeyHandle, + TCS_AUTH *pAuth, + CRYPTO_INFO *cryptoinfo); + +TPM_RESULT VTSP_Unbind( const TCS_CONTEXT_HANDLE hContext, + const TPM_KEY_HANDLE key_handle, + const buffer_t *bound_data, + const TPM_AUTHDATA *usage_auth, + buffer_t *clear_data, + TCS_AUTH *auth); + +TPM_RESULT VTSP_Bind( CRYPTO_INFO *cryptoInfo, + const buffer_t *inData, + buffer_t *outData); + +#endif //_VTSP_H_ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/tcs/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/tcs/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,18 @@ +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/vtpm_manager/Rules.mk + +BIN = libTCS.a + +all: build + +build: $(BIN) + +install: build + +clean: + rm -f *.a *.so *.o *.rpm $(DEP_FILES) + +mrproper: clean + +$(BIN): $(OBJS) + $(AR) rcs $(BIN) $(OBJS) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/tcs/contextmgr.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/tcs/contextmgr.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,219 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// contextmgr.c +// +// This file contains the context management functions for TCS. +// +// ================================================================== + +#include <stdio.h> +#include <string.h> +#include <malloc.h> +#include "tcs.h" +#include "contextmgr.h" +#include "log.h" + +BYTE* AddMemBlock(CONTEXT_HANDLE* pContextHandle, // in + int BlockSize) { // in + + BLOCK* pCurrentBlock = NULL; + BLOCK* pBlock = NULL; + + // check incoming params + if (pContextHandle == NULL || BlockSize == 0) + return NULL; + + // Create New Block + pBlock = (BLOCK *)malloc(sizeof(BLOCK)); + if (pBlock == NULL) + return (0); + + pBlock->aMemory = (BYTE *)malloc(sizeof(BYTE) * BlockSize); + if (pBlock->aMemory == NULL) + return (0); + + memset(pBlock->aMemory, 0, BlockSize); + pBlock->nBlockSize = BlockSize; + pBlock->pNextBlock = NULL; + + // search for the last block created where to add the + // newly created block + if(pContextHandle->pTopBlock != NULL) { + pCurrentBlock = pContextHandle->pTopBlock; + while(pCurrentBlock->pNextBlock != NULL) + pCurrentBlock = pCurrentBlock->pNextBlock; + + + pCurrentBlock->pNextBlock= pBlock; + } else + pContextHandle->pTopBlock = pBlock; + + + pContextHandle->nBlockCount++; + + return pBlock->aMemory; +} + + +BOOL DeleteMemBlock(CONTEXT_HANDLE* pContextHandle, // in + BYTE* pTCPA_BYTEs) { // in + BLOCK* pCurrentBlock = NULL; + BLOCK* pParentBlock = NULL; + BOOL bFound = FALSE; + + if (pContextHandle == NULL) + return FALSE; + + + // Search for the Block in the context by aMemory pointer + pParentBlock = NULL; + pCurrentBlock = pContextHandle->pTopBlock; + + while(pCurrentBlock != NULL) { + // If aMemory block is found, delete it + if(pCurrentBlock->aMemory == pTCPA_BYTEs || pTCPA_BYTEs == NULL) { + // if it is the top Block, remove it from the top, + // otherwise remove it from the ParentBlock and stitch + // the NextBlock to the ParentBlock + if(pParentBlock == NULL) + pContextHandle->pTopBlock = pContextHandle->pTopBlock->pNextBlock; + else + pParentBlock->pNextBlock = pCurrentBlock->pNextBlock; + + // delete memory Block associated with pointer pTCPA_BYTEs + free(pCurrentBlock->aMemory); + pCurrentBlock->aMemory = NULL; + + free(pCurrentBlock); + pCurrentBlock = pParentBlock; + + pContextHandle->nBlockCount--; + bFound = TRUE; + } + + if(pCurrentBlock != NULL) { + pParentBlock = pCurrentBlock; + pCurrentBlock = pCurrentBlock->pNextBlock; + } + } + + return bFound; +} + +BOOL AddHandleToList(CONTEXT_HANDLE* pContextHandle, // in + TPM_RESOURCE_TYPE type, // in + TPM_HANDLE handle) { // in + HANDLE_LIST* pNewHandle = NULL; + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Adding Handle to list\n"); + if (pContextHandle == NULL) + return 0; + + pNewHandle = (HANDLE_LIST *)malloc(sizeof(HANDLE_LIST)); + + if (pNewHandle == NULL) + return (0); + + pNewHandle->handle = handle; + pNewHandle->type = type; + pNewHandle->pNextHandle = pContextHandle->pHandleList; + + pContextHandle->pHandleList = pNewHandle; + + return 1; +} + +BOOL DeleteHandleFromList( CONTEXT_HANDLE* pContextHandle, // in + TPM_HANDLE handle) { // in + + HANDLE_LIST *pCurrentHandle = pContextHandle->pHandleList, + *pLastHandle = pCurrentHandle; + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Deleting Handle from list\n"); + + if (pContextHandle == NULL) + return 0; + + while (1) { + + if (pCurrentHandle->handle == handle) { // Found element + if (pCurrentHandle == pLastHandle) { // First element in list + pContextHandle->pHandleList = pCurrentHandle->pNextHandle; + free(pCurrentHandle); + } else { // Ordinary element + pLastHandle->pNextHandle = pCurrentHandle->pNextHandle; + free(pCurrentHandle); + } + + return 1; + + } else { // Not found yet; + pLastHandle = pCurrentHandle; + pCurrentHandle = pCurrentHandle->pNextHandle; + if (pCurrentHandle == NULL) // Found end of list + return 0; + } + + } +} + +BOOL FreeHandleList( CONTEXT_HANDLE* pContextHandle) { // in + HANDLE_LIST* pCurrentHandle; + BOOL returncode = TRUE; + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Freeing all handles for context\n"); + + if (pContextHandle == NULL) + return 1; + + pCurrentHandle = pContextHandle->pHandleList; + while (pCurrentHandle != NULL) { + + switch (pCurrentHandle->type) { + case TPM_RT_KEY: + returncode = returncode && !TCSP_EvictKey((TCS_CONTEXT_HANDLE) pContextHandle, pCurrentHandle->handle); + break; + case TPM_RT_AUTH: + returncode = returncode && !TCSP_TerminateHandle((TCS_CONTEXT_HANDLE) pContextHandle, pCurrentHandle->handle); + break; + default: + returncode = FALSE; + } + + pCurrentHandle = pCurrentHandle->pNextHandle; + + } + + return 1; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/tcs/contextmgr.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/tcs/contextmgr.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,81 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// contextmgr.c +// +// This file contains the context management functions for TCS. +// +// ================================================================== + +#ifndef __CONTEXTMGR_H__ +#define __CONTEXTMGR_H__ + +#include "tcg.h" + +#define BLOCK_SIZE 300 + +typedef struct block { + int nBlockSize; + BYTE* aMemory; + struct block* pNextBlock; +} BLOCK; + +typedef struct handle_List { + TPM_HANDLE handle; + TPM_RESOURCE_TYPE type; + struct handle_List* pNextHandle; +} HANDLE_LIST; + +typedef struct context_handle { + int nBlockCount; + BLOCK* pTopBlock; + HANDLE_LIST* pHandleList; +} CONTEXT_HANDLE; + +BYTE* AddMemBlock( CONTEXT_HANDLE* pContextHandle, // in + int BlockSize); // in + +BOOL DeleteMemBlock(CONTEXT_HANDLE* pContextHandle, // in + BYTE* pTCPA_BYTEs); // in + + +BOOL AddHandleToList( CONTEXT_HANDLE* pContextHandle, // in + TPM_RESOURCE_TYPE type, // in + TPM_HANDLE handle); // in + +BOOL DeleteHandleFromList( CONTEXT_HANDLE* pContextHandle, // in + TPM_HANDLE handle); // in + +BOOL FreeHandleList( CONTEXT_HANDLE* pContextHandle); // in + +#endif //_CONTEXTMGR_H_ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/tcs/tcs.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/tcs/tcs.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,1102 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// tcs.c +// +// This file contains the functions that implement a TCS. +// +// ================================================================== + +#include <stdio.h> +#include <string.h> +#include <malloc.h> + +#include "tcg.h" +#include "bsg.h" +#include "tcs.h" +#include "contextmgr.h" +#include "tpmddl.h" +#include "log.h" + +// Static Global Vars for the TCS +static BOOL TCS_m_bConnected; +static int TCS_m_nCount = 0; + +#define TCPA_MAX_BUFFER_LENGTH 0x2000 + +static BYTE InBuf [TCPA_MAX_BUFFER_LENGTH]; +static BYTE OutBuf[TCPA_MAX_BUFFER_LENGTH]; + + +// --------------------------------------------------------------------------------- +// Initialization/Uninitialization SubComponent API +// --------------------------------------------------------------------------------- +TPM_RESULT TCS_create() { + TDDL_RESULT hRes = TDDL_E_FAIL; + TPM_RESULT result = TPM_FAIL; + TCS_m_bConnected = FALSE; + + if (TCS_m_nCount == 0) { + vtpmloginfo(VTPM_LOG_TCS, "Constructing new TCS:\n"); + hRes = TDDL_Open(); + + if (hRes == TDDL_SUCCESS) { + TCS_m_bConnected = TRUE; + result = TPM_SUCCESS; + } + } else + TCS_m_bConnected = TRUE; + + TCS_m_nCount++; + + return(result); +} + + +void TCS_destroy() +{ + // FIXME: Should iterate through all open contexts and close them. + TCS_m_nCount--; + + if (TCS_m_bConnected == TRUE && TCS_m_nCount == 0) { + vtpmloginfo(VTPM_LOG_TCS, "Destructing TCS:\n"); + TDDL_Close(); + TCS_m_bConnected = FALSE; + } + +} + +TPM_RESULT TCS_Malloc( TCS_CONTEXT_HANDLE hContext, // in + UINT32 MemSize, // in + BYTE** ppMemPtr) {// out + + TPM_RESULT returnCode = TPM_FAIL; + CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext; + + if (pContextHandle != NULL && ppMemPtr != NULL) { + *ppMemPtr = (BYTE *)AddMemBlock(pContextHandle, MemSize); + returnCode = TPM_SUCCESS; + } + + return returnCode; +} + +TPM_RESULT TCS_FreeMemory( TCS_CONTEXT_HANDLE hContext, // in + BYTE* pMemory) { // in + TPM_RESULT returnCode = TPM_FAIL; + CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext; + + if ( (pContextHandle != NULL && pMemory != NULL) && + (DeleteMemBlock(pContextHandle, pMemory) == TRUE) ) + returnCode = TPM_SUCCESS; + + + return returnCode; +} + +TPM_RESULT TCS_OpenContext(TCS_CONTEXT_HANDLE* hContext) { // out + TPM_RESULT returnCode = TPM_FAIL; + + vtpmloginfo(VTPM_LOG_TCS, "Calling TCS_OpenContext:\n"); + + // hContext must point to a null memory context handle + if(*hContext == HANDLE_NULL) { + CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE *)malloc(sizeof(CONTEXT_HANDLE)); + if (pContextHandle == NULL) + return TPM_SIZE; + + + // initialize to 0 + pContextHandle->nBlockCount = 0; + pContextHandle->pTopBlock = NULL; + pContextHandle->pHandleList = NULL; + + // Create New Block + AddMemBlock(pContextHandle, BLOCK_SIZE); + + *hContext = (TCS_CONTEXT_HANDLE)pContextHandle; + returnCode = TPM_SUCCESS; + } + + return(returnCode); +} + +TPM_RESULT TCS_CloseContext(TCS_CONTEXT_HANDLE hContext) {// in + //FIXME: TCS SHOULD Track track failed auths and make sure + //we don't try and re-free them here. + TPM_RESULT returnCode = TPM_FAIL; + + CONTEXT_HANDLE* pContextHandle = (CONTEXT_HANDLE*)hContext; + + if(pContextHandle != NULL) { + // Print test info + vtpmloginfo(VTPM_LOG_TCS, "Calling TCS_CloseContext.\n"); + + // free memory for all the blocks + DeleteMemBlock(pContextHandle, NULL ); + pContextHandle->pTopBlock = NULL; + + FreeHandleList(pContextHandle); + if (pContextHandle->pHandleList != NULL) + vtpmlogerror(VTPM_LOG_TCS, "Not all handles evicted from TPM.\n"); + + // Release the TPM's resources + free(pContextHandle); + returnCode = TPM_SUCCESS; + } + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Finished closing context\n"); + return(returnCode); +} + +// ------------------------------------------------------------------ +// Internal Functions +// ------------------------------------------------------------------ +int packAuth(BYTE* dst, TCS_AUTH* auth) { + // CHECK: according to the command specs, the outgoing auth params are: + // nonceEven + // nonceOdd + // continueAuthSession + // auth digest for return params + // + // this is a bit different than this code... + + return BSG_PackList(dst, 4, + BSG_TYPE_UINT32, &(auth->AuthHandle), + BSG_TPM_NONCE, &(auth->NonceOdd), + BSG_TYPE_BOOL, &(auth->fContinueAuthSession), + BSG_TPM_AUTHDATA, &(auth->HMAC)); +} + +int unpackAuth(TCS_AUTH* auth, BYTE* src) { + return BSG_UnpackList(src, 3, + BSG_TPM_NONCE, &(auth->NonceEven), + BSG_TYPE_BOOL, &(auth->fContinueAuthSession), + BSG_TPM_AUTHDATA, &(auth->HMAC)); +} + +// ------------------------------------------------------------------ +// Authorization Commands +// ------------------------------------------------------------------ + +TPM_RESULT TCSP_OIAP(TCS_CONTEXT_HANDLE hContext, // in + TCS_AUTHHANDLE* authHandle, // out + TPM_NONCE* nonce0) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_COMMAND; + TPM_COMMAND_CODE ordinal = TPM_ORD_OIAP; + UINT32 paramSize = 0; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (authHandle == NULL || nonce0 == NULL) + return TPM_BAD_PARAMETER; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal); + + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) + == TDDL_SUCCESS) { + + // unpack to get the tag, paramSize, & returnCode + int i = BSG_UnpackList( OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_COMMAND) { + // Extract the remaining output parameters + BSG_UnpackList(OutBuf+i, 2, + BSG_TYPE_UINT32, authHandle, + BSG_TPM_NONCE, nonce0); + + if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_AUTH, *authHandle)) + vtpmlogerror(VTPM_LOG_TCS, "New AuthHandle not recorded\n"); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "Failed with return code %s\n", tpm_get_error_name(returnCode)); + + } + + return(returnCode); +} + +TPM_RESULT TCSP_OSAP(TCS_CONTEXT_HANDLE hContext, // in + TPM_ENTITY_TYPE entityType, // in + UINT32 entityValue, // in + TPM_NONCE nonceOddOSAP, // in + TCS_AUTHHANDLE* authHandle, // out + TPM_NONCE* nonceEven, // out + TPM_NONCE* nonceEvenOSAP) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_OSAP; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (authHandle == NULL || nonceEven == NULL || nonceEvenOSAP == NULL) + return TPM_BAD_PARAMETER; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 6, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT16, &entityType, + BSG_TYPE_UINT32, &entityValue, + BSG_TPM_NONCE, &nonceOddOSAP); + + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) + == TDDL_SUCCESS) { + + // unpack to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_COMMAND) { + // Extract the remaining output parameters + BSG_UnpackList(OutBuf+i, 3, + BSG_TYPE_UINT32, authHandle, + BSG_TPM_NONCE, nonceEven, + BSG_TPM_NONCE, nonceEvenOSAP); + + if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_AUTH, *authHandle)) { + vtpmlogerror(VTPM_LOG_TCS, "New AuthHandle not recorded\n"); + } + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "Failed with return code %s\n", tpm_get_error_name(returnCode)); + + } + + return(returnCode); +} + +TPM_RESULT TCSP_TakeOwnership(TCS_CONTEXT_HANDLE hContext, // in + UINT16 protocolID, // in + UINT32 encOwnerAuthSize, // in + BYTE* encOwnerAuth, // in + UINT32 encSrkAuthSize, // in + BYTE* encSrkAuth, // in + UINT32* SrkSize, // in, out + BYTE** Srk, // in, out + TCS_AUTH* ownerAuth) // in, out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_AUTH1_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_TakeOwnership; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (encOwnerAuth == NULL || encSrkAuth == NULL || SrkSize == NULL || *Srk == NULL) + return TPM_BAD_PARAMETER; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 5, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT16, &protocolID, + BSG_TYPE_UINT32, &encOwnerAuthSize); + + memcpy(InBuf+InLength, encOwnerAuth, encOwnerAuthSize); + InLength += encOwnerAuthSize; + InLength += BSG_Pack( BSG_TYPE_UINT32, + &encSrkAuthSize, + InBuf+InLength); + memcpy(InBuf+InLength, encSrkAuth, encSrkAuthSize); + InLength += encSrkAuthSize; + memcpy(InBuf+InLength, *Srk, *SrkSize); + InLength += *SrkSize; + InLength += packAuth(InBuf+InLength, ownerAuth); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, + &InLength, + InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) + == TDDL_SUCCESS){ + + // unpack to get the tag, paramSize, & returnCode + int i = BSG_UnpackList( OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_AUTH1_COMMAND) { + // Extract the remaining output parameters + TPM_KEY srkPub; + i += BSG_Unpack(BSG_TPM_KEY, OutBuf+i, &srkPub); + unpackAuth(ownerAuth, OutBuf+i); + + // fill output params + BYTE tempBuf[1024]; + *SrkSize = BSG_Pack(BSG_TPM_KEY, &srkPub, tempBuf); + if (TCS_Malloc(hContext, *SrkSize, Srk) == TPM_FAIL) { + return(TPM_SIZE); + } + memcpy(*Srk, tempBuf, *SrkSize); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_TakeOwnership Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + + +TPM_RESULT TCSP_DisablePubekRead ( TCS_CONTEXT_HANDLE hContext, // in + TCS_AUTH* ownerAuth) { // in, out + + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_AUTH1_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_DisablePubekRead; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal); + + InLength += packAuth(InBuf+InLength, ownerAuth); + + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) + == TDDL_SUCCESS){ + + // unpack to get the tag, paramSize, & returnCode + int i = BSG_UnpackList( OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_AUTH1_COMMAND) { + // Extract the remaining output parameters + unpackAuth(ownerAuth, OutBuf+i); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_DisablePubekRead Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + + +TPM_RESULT TCSP_TerminateHandle(TCS_CONTEXT_HANDLE hContext, // in + TCS_AUTHHANDLE handle) // in +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_Terminate_Handle; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 4, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &handle); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) + == TDDL_SUCCESS) { + + // unpack to get the tag, paramSize, & returnCode + BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (!DeleteHandleFromList((CONTEXT_HANDLE *)hContext, handle)) + vtpmlogerror(VTPM_LOG_TCS, "KeyHandle not removed from list\n"); + + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_COMMAND) { + // Print debug info + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_TerminateHandle Failed with return code %s\n", tpm_get_error_name(returnCode)); + + } + + return(returnCode); +} + +// TPM Mandatory +TPM_RESULT TCSP_Extend( TCS_CONTEXT_HANDLE hContext, // in + TPM_PCRINDEX pcrNum, // in + TPM_DIGEST inDigest, // in + TPM_PCRVALUE* outDigest) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_Extend; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 5, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &pcrNum, + BSG_TPM_DIGEST, &inDigest); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) + == TDDL_SUCCESS) { + + // unpack to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_COMMAND){ + // Extract the remaining output parameters + BSG_Unpack(BSG_TPM_PCRVALUE, OutBuf+i, outDigest); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_Extend Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + +TPM_RESULT TCSP_Seal( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE keyHandle, // in + TPM_ENCAUTH encAuth, // in + UINT32 pcrInfoSize, // in + BYTE* PcrInfo, // in + UINT32 inDataSize, // in + BYTE* inData, // in + TCS_AUTH* pubAuth, // in, out + UINT32* SealedDataSize, // out + BYTE** SealedData) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_AUTH1_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_Seal; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (inData == NULL || pubAuth == NULL || SealedDataSize == NULL || *SealedData == NULL) + return TPM_BAD_PARAMETER; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 6, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &keyHandle, + BSG_TPM_ENCAUTH, encAuth, + BSG_TYPE_UINT32, &pcrInfoSize); + memcpy(InBuf+InLength, PcrInfo, pcrInfoSize); + InLength += pcrInfoSize; + InLength += BSG_Pack(BSG_TYPE_UINT32, &inDataSize, InBuf+InLength); + memcpy(InBuf+InLength, inData, inDataSize); + InLength += inDataSize; + InLength += packAuth(InBuf+InLength, pubAuth); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) + == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_AUTH1_COMMAND) { + // Extract the remaining output parameters + TPM_STORED_DATA sealedData; + + i += BSG_Unpack(BSG_TPM_STORED_DATA, OutBuf+i, &sealedData); + unpackAuth(pubAuth, OutBuf+i); + + // fill SealedData + BYTE tempBuf[1024]; + *SealedDataSize = BSG_Pack(BSG_TPM_STORED_DATA, &sealedData, tempBuf); + if (TCS_Malloc(hContext, *SealedDataSize, SealedData) == TPM_FAIL) { + return TPM_SIZE; + } + memcpy(*SealedData, tempBuf, *SealedDataSize); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_Seal Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + +TPM_RESULT TCSP_Unseal(TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE parentHandle, // in + UINT32 SealedDataSize, // in + BYTE* SealedData, // in + TCS_AUTH* parentAuth, // in, out + TCS_AUTH* dataAuth, // in, out + UINT32* DataSize, // out + BYTE** Data) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_AUTH2_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_Unseal; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (SealedData == NULL || parentAuth == NULL || dataAuth == NULL || + DataSize == NULL || Data == NULL) + return TPM_BAD_PARAMETER; + + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 4, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &parentHandle); + memcpy(InBuf+InLength, SealedData, SealedDataSize); + InLength += SealedDataSize; + InLength += packAuth(InBuf+InLength, parentAuth); + InLength += packAuth(InBuf+InLength, dataAuth); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + int i = BSG_UnpackList( OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_AUTH2_COMMAND) { + // Extract the remaining output parameters + i += BSG_Unpack(BSG_TYPE_UINT32, OutBuf+i, DataSize); + if (TCS_Malloc(hContext, *DataSize, Data) == TPM_FAIL) { + return TPM_SIZE; + } + memcpy(*Data, OutBuf+i, *DataSize); + i += *DataSize; + i += unpackAuth(parentAuth, OutBuf+i); + unpackAuth(dataAuth, OutBuf+i); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_Unseal Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + +TPM_RESULT TCSP_UnBind(TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE keyHandle, // in + UINT32 inDataSize, // in + BYTE* inData, // in + TCS_AUTH* privAuth, // in, out + UINT32* outDataSize, // out + BYTE** outData) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_AUTH1_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_UnBind; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (inData == NULL || privAuth == NULL || outDataSize == NULL || *outData == NULL) + return TPM_BAD_PARAMETER; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 5, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &keyHandle, + BSG_TYPE_UINT32, &inDataSize); + memcpy(InBuf+InLength, inData, inDataSize); + InLength += inDataSize; + InLength += packAuth(InBuf+InLength, privAuth); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "\n\tSending paramSize = %d", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_AUTH1_COMMAND) { + // Extract the remaining output parameters + i += BSG_Unpack(BSG_TYPE_UINT32, OutBuf+i, outDataSize); + if (TCS_Malloc(hContext, *outDataSize, outData) == TPM_FAIL) + return TPM_SIZE; + + memcpy(*outData, OutBuf+i, *outDataSize); + i += *outDataSize; + unpackAuth(privAuth, OutBuf+i); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_UnBind Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + +TPM_RESULT TCSP_CreateWrapKey(TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE hWrappingKey, // in + TPM_ENCAUTH KeyUsageAuth, // in + TPM_ENCAUTH KeyMigrationAuth, // in + UINT32* pcKeySize, // in, out + BYTE** prgbKey, // in, out + TCS_AUTH* pAuth) // in, out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_AUTH1_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_CreateWrapKey; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (pcKeySize == NULL || *prgbKey == NULL || pAuth == NULL) + return TPM_BAD_PARAMETER; + + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 6, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &hWrappingKey, + BSG_TPM_ENCAUTH, KeyUsageAuth, + BSG_TPM_ENCAUTH, KeyMigrationAuth); + memcpy(InBuf+InLength, *prgbKey, *pcKeySize); + InLength += *pcKeySize; + InLength += packAuth(InBuf+InLength, pAuth); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_RESULT, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_AUTH1_COMMAND) { + // Extract the remaining output parameters + TPM_KEY wrappedKey; + + i += BSG_Unpack(BSG_TPM_KEY, OutBuf+i, &wrappedKey); + unpackAuth(pAuth, OutBuf+i); + + // Fill prgbKey + BYTE tempBuf[1024]; + *pcKeySize = BSG_Pack(BSG_TPM_KEY, &wrappedKey, tempBuf); + if (TCS_Malloc(hContext, *pcKeySize, prgbKey) == TPM_FAIL) + return TPM_SIZE; + + memcpy(*prgbKey, tempBuf, *pcKeySize); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_CreateWrapKey Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + +TPM_RESULT TCSP_LoadKeyByBlob(TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE hUnwrappingKey, // in + UINT32 cWrappedKeyBlobSize, // in + BYTE* rgbWrappedKeyBlob, // in + TCS_AUTH* pAuth, // in, out + TCS_KEY_HANDLE* phKeyTCSI, // out + TCS_KEY_HANDLE* phKeyHMAC) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_AUTH1_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_LoadKey; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (rgbWrappedKeyBlob == NULL || pAuth == NULL || phKeyTCSI == NULL || phKeyHMAC == NULL) + return TPM_BAD_PARAMETER; + + *phKeyHMAC = hUnwrappingKey; // the parent key is the one that the TPM use to make the HMAC calc + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 4, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &hUnwrappingKey); + memcpy(InBuf+InLength, rgbWrappedKeyBlob, cWrappedKeyBlobSize); + InLength += cWrappedKeyBlobSize; + InLength += packAuth(InBuf+InLength, pAuth); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_AUTH1_COMMAND) { + // Extract the remaining output parameters + i += BSG_Unpack(BSG_TYPE_UINT32, + OutBuf+i, + phKeyTCSI); + unpackAuth(pAuth, OutBuf+i); + + if (!AddHandleToList((CONTEXT_HANDLE *)hContext, TPM_RT_KEY, *phKeyTCSI)) { + vtpmlogerror(VTPM_LOG_TCS, "New KeyHandle not recorded\n"); + } + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else + vtpmlogerror(VTPM_LOG_TCS, "TCSP_LoadKeyByBlob Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + + return(returnCode); +} + +TPM_RESULT TCSP_EvictKey(TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE hKey) // in +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_EvictKey; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 4, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, &hKey); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (!DeleteHandleFromList((CONTEXT_HANDLE *)hContext, hKey)) { + vtpmlogerror(VTPM_LOG_TCS, "KeyHandle not removed from list\n"); + } + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_COMMAND) { + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else { + vtpmlogerror(VTPM_LOG_TCS, "TCSP_EvictKey Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + } + + return(returnCode); +} + +TPM_RESULT TCSP_GetRandom(TCS_CONTEXT_HANDLE hContext, // in + UINT32* bytesRequested, // in, out + BYTE** randomBytes) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_GetRandom; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (bytesRequested == NULL || *randomBytes == NULL){ + return TPM_BAD_PARAMETER; + } + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 4, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TYPE_UINT32, bytesRequested); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_COMMAND) { + // Extract the remaining output parameters + BSG_Unpack(BSG_TYPE_UINT32, OutBuf+i, bytesRequested); + if (TCS_Malloc(hContext, *bytesRequested, randomBytes) == TPM_FAIL) { + return TPM_SIZE; + } + memcpy(*randomBytes, OutBuf+i+sizeof(UINT32), *bytesRequested); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else { + vtpmlogerror(VTPM_LOG_TCS, "TCSP_GetRandom Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + } + + return(returnCode); +} + + +TPM_RESULT TCSP_ReadPubek(TCS_CONTEXT_HANDLE hContext, // in + TPM_NONCE antiReplay, // in + UINT32* pubEndorsementKeySize, // out + BYTE** pubEndorsementKey, // out + TPM_DIGEST* checksum) // out +{ + // setup input/output parameters block + TPM_TAG tag = TPM_TAG_RQU_COMMAND; + UINT32 paramSize = 0; + TPM_COMMAND_CODE ordinal = TPM_ORD_ReadPubek; + TPM_RESULT returnCode = TPM_SUCCESS; + + // setup the TPM driver input and output buffers + TDDL_RESULT hRes = TDDL_E_FAIL; + TDDL_UINT32 InLength = TCPA_MAX_BUFFER_LENGTH; + TDDL_UINT32 OutLength = TCPA_MAX_BUFFER_LENGTH; + + // check input params + if (pubEndorsementKeySize == NULL || pubEndorsementKey == NULL || checksum == NULL) { + return TPM_BAD_PARAMETER; + } + + // Convert Byte Input parameter in the input byte stream InBuf + InLength = BSG_PackList(InBuf, 4, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &ordinal, + BSG_TPM_NONCE, &antiReplay); + // fill paramSize again as we now have the correct size + BSG_Pack(BSG_TYPE_UINT32, &InLength, InBuf+2); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Sending paramSize = %d\n", InLength); + + // call the TPM driver + if ((hRes = TDDL_TransmitData(InBuf, InLength, OutBuf, &OutLength)) == TDDL_SUCCESS) { + // unpack OutBuf to get the tag, paramSize, & returnCode + int i = BSG_UnpackList(OutBuf, 3, + BSG_TPM_TAG, &tag, + BSG_TYPE_UINT32, ¶mSize, + BSG_TPM_COMMAND_CODE, &returnCode); + + if (returnCode == TPM_SUCCESS && tag == TPM_TAG_RSP_COMMAND) { + // Extract the remaining output parameters + TPM_PUBKEY pubEK; + i += BSG_UnpackList(OutBuf+i, 2, + BSG_TPM_PUBKEY, &pubEK, + BSG_TPM_DIGEST, checksum); + + // fill EndorsementKey + BYTE tempBuf[1024]; + *pubEndorsementKeySize = BSG_Pack(BSG_TPM_PUBKEY, &pubEK, tempBuf); + if (TCS_Malloc(hContext, *pubEndorsementKeySize, pubEndorsementKey) == TPM_FAIL) { + return TPM_SIZE; + } + memcpy(*pubEndorsementKey, tempBuf, *pubEndorsementKeySize); + + vtpmloginfo(VTPM_LOG_TCS_DEEP, "Received paramSize : %d\n", paramSize); + } else { + vtpmlogerror(VTPM_LOG_TCS, "TCSP_ReadPubek Failed with return code %s\n", tpm_get_error_name(returnCode)); + } + } + + return(returnCode); +} + +TPM_RESULT TCSP_RawTransmitData( UINT32 inDataSize, // in + BYTE *inData, // in + UINT32 *outDataSize,// in/out + BYTE *outData) { // out + + TDDL_RESULT hRes; + + vtpmloginfo(VTPM_LOG_TCS, "Calling TransmitData directly.\n"); + //FIXME: Add Context Management + hRes = TDDL_TransmitData( inData, + inDataSize, + outData, + outDataSize); + + if (hRes == TDDL_SUCCESS) { + return TPM_SUCCESS; + } else { + vtpmlogerror(VTPM_LOG_TCS, "TCSP_RawTransmitData Failed with return code %s\n", tpm_get_error_name(TPM_IOERROR)); + return TPM_IOERROR; + } + +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/tcs/tcs.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/tcs/tcs.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,238 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// tcs.h +// +// This file declares the TCS API +// +// ================================================================== + +#ifndef __TCS_H__ +#define __TCS_H__ + +#include "tcg.h" +#include "buffer.h" + +#define HANDLE_NULL 0 + +// ------------------------------------------------------------------ +// Exposed API +// ------------------------------------------------------------------ + +TPM_RESULT TCS_create(); +void TCS_destroy(); + +TPM_RESULT TCS_OpenContext( /* OUT */ TCS_CONTEXT_HANDLE* hContext ); + +TPM_RESULT TCS_CloseContext ( /* IN */ TCS_CONTEXT_HANDLE hContext ); + +TPM_RESULT TCS_Malloc ( TCS_CONTEXT_HANDLE hContext, // in + UINT32 MemSize, // in + BYTE** ppMemPtr ); //out + +TPM_RESULT TCS_FreeMemory ( TCS_CONTEXT_HANDLE hContext, // in + BYTE* pMemory); // in + +// ------------------------------------------------------------------ +// Exposed API +// ------------------------------------------------------------------ + +// TPM v1.1B Command Set + +// Authorzation +TPM_RESULT TCSP_OIAP( TCS_CONTEXT_HANDLE hContext, // in + TCS_AUTHHANDLE* authHandle, // out + TPM_NONCE* nonce0 // out + ); + +TPM_RESULT TCSP_OSAP ( TCS_CONTEXT_HANDLE hContext, // in + TPM_ENTITY_TYPE entityType, // in + UINT32 entityValue, // in + TPM_NONCE nonceOddOSAP, // in + TCS_AUTHHANDLE* authHandle, // out + TPM_NONCE* nonceEven, // out + TPM_NONCE* nonceEvenOSAP // out + ); + +TPM_RESULT TCSP_TakeOwnership ( TCS_CONTEXT_HANDLE hContext, // in + UINT16 protocolID, // in + UINT32 encOwnerAuthSize, // in + BYTE* encOwnerAuth, // in + UINT32 encSrkAuthSize, // in + BYTE* encSrkAuth, // in + UINT32* SrkSize, // in, out + BYTE** Srk, // in, out + TCS_AUTH* ownerAuth // in, out + ); + +TPM_RESULT TCSP_DisablePubekRead ( TCS_CONTEXT_HANDLE hContext, // in + TCS_AUTH* ownerAuth // in, out + ); + +TPM_RESULT TCSP_TerminateHandle ( TCS_CONTEXT_HANDLE hContext, // in + TCS_AUTHHANDLE handle // in + ); + +TPM_RESULT TCSP_FlushSpecific ( TCS_CONTEXT_HANDLE hContext, // in + TCS_AUTHHANDLE handle, // in + TPM_RESOURCE_TYPE resourceType //in + ); + +// TPM Mandatory +TPM_RESULT TCSP_Extend ( TCS_CONTEXT_HANDLE hContext, // in + TPM_PCRINDEX pcrNum, // in + TPM_DIGEST inDigest, // in + TPM_PCRVALUE* outDigest // out + ); + +TPM_RESULT TCSP_PcrRead ( TCS_CONTEXT_HANDLE hContext, // in + TPM_PCRINDEX pcrNum, // in + TPM_PCRVALUE* outDigest // out + ); + +TPM_RESULT TCSP_Quote ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE keyHandle, // in + TPM_NONCE antiReplay, // in + UINT32* PcrDataSize, // in, out + BYTE** PcrData, // in, out + TCS_AUTH* privAuth, // in, out + UINT32* sigSize, // out + BYTE** sig // out + ); + +TPM_RESULT TCSP_Seal ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE keyHandle, // in + TPM_ENCAUTH encAuth, // in + UINT32 pcrInfoSize, // in + BYTE* PcrInfo, // in + UINT32 inDataSize, // in + BYTE* inData, // in + TCS_AUTH* pubAuth, // in, out + UINT32* SealedDataSize, // out + BYTE** SealedData // out + ); + +TPM_RESULT TCSP_Unseal ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE parentHandle, // in + UINT32 SealedDataSize, // in + BYTE* SealedData, // in + TCS_AUTH* parentAuth, // in, out + TCS_AUTH* dataAuth, // in, out + UINT32* DataSize, // out + BYTE** Data // out + ); + +TPM_RESULT TCSP_DirWriteAuth ( TCS_CONTEXT_HANDLE hContext, // in + TPM_DIRINDEX dirIndex, // in + TPM_DIRVALUE newContents, // in + TCS_AUTH* ownerAuth // in, out + ); + +TPM_RESULT TCSP_DirRead ( TCS_CONTEXT_HANDLE hContext, // in + TPM_DIRINDEX dirIndex, // in + TPM_DIRVALUE* dirValue // out + ); + +TPM_RESULT TCSP_UnBind ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE keyHandle, // in + UINT32 inDataSize, // in + BYTE* inData, // in + TCS_AUTH* privAuth, // in, out + UINT32* outDataSize, // out + BYTE** outData // out + ); + +TPM_RESULT TCSP_CreateWrapKey ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE hWrappingKey, // in + TPM_ENCAUTH KeyUsageAuth, // in + TPM_ENCAUTH KeyMigrationAuth, // in + UINT32* pcKeySize, // in, out + BYTE** prgbKey, // in, out + TCS_AUTH* pAuth // in, out + ); + +TPM_RESULT TCSP_LoadKeyByBlob ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE hUnwrappingKey, // in + UINT32 cWrappedKeyBlobSize, // in + BYTE* rgbWrappedKeyBlob, // in + TCS_AUTH* pAuth, // in, out + TCS_KEY_HANDLE* phKeyTCSI, // out + TCS_KEY_HANDLE* phKeyHMAC // out + ); + +TPM_RESULT TCSP_GetPubKey ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE hKey, // in + TCS_AUTH* pAuth, // in, out + UINT32* pcPubKeySize, // out + BYTE** prgbPubKey // out + ); + +TPM_RESULT TCSP_EvictKey ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE hKey // in + ); + +TPM_RESULT TCSP_Sign ( TCS_CONTEXT_HANDLE hContext, // in + TCS_KEY_HANDLE keyHandle, // in + UINT32 areaToSignSize, // in + BYTE* areaToSign, // in + TCS_AUTH* privAuth, // in, out + UINT32* sigSize, // out + BYTE** sig // out + ); + +TPM_RESULT TCSP_GetRandom ( TCS_CONTEXT_HANDLE hContext, // in + UINT32* bytesRequested, // in, out + BYTE** randomBytes // out + ); + +TPM_RESULT TCSP_StirRandom ( TCS_CONTEXT_HANDLE hContext, // in + UINT32 inDataSize, // in + BYTE* inData // in + ); + +TPM_RESULT TCSP_ReadPubek ( TCS_CONTEXT_HANDLE hContext, // in + TPM_NONCE antiReplay, // in + UINT32* pubEndorsementKeySize, // out + BYTE** pubEndorsementKey, // out + TPM_DIGEST* checksum // out + ); + + +// Non-Standard TCSP call to give direct access to TransmitData. +// Key and Auth Management is done before transfering command to TDDL. +TPM_RESULT TCSP_RawTransmitData(UINT32 inDataSize, // in + BYTE *inData, // in + UINT32 *outDataSize,// in/out + BYTE *outData); // out + +#endif //TCS_H diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/tcs/tpmddl.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/tcs/tpmddl.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,69 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// tpmddl.c +// +// This file defines the TDDLI API +// +// ================================================================== + +#ifndef __TPMDDL_H__ +#define __TPMDDL_H__ + +#define TDDL_CAP_PROP_MANUFACTURER 0x0001 + +#define TDDL_E_FAIL 1 +#define TDDL_E_SUCCESS 0 +#define TDDL_SUCCESS 0 + +typedef unsigned int TDDL_UINT32; +typedef TDDL_UINT32 TDDL_RESULT; +typedef unsigned char TDDL_BYTE; + +TDDL_RESULT TDDL_Open(); +void TDDL_Close(); +TDDL_RESULT TDDL_TransmitData( TDDL_BYTE* in, + TDDL_UINT32 insize, + TDDL_BYTE* out, + TDDL_UINT32* outsize); +TDDL_RESULT TDDL_GetStatus(); +TDDL_RESULT TDDL_GetCapability( TDDL_UINT32 cap, + TDDL_UINT32 sub, + TDDL_BYTE* buffer, + TDDL_UINT32* size); +TDDL_RESULT TDDL_SetCapability( TDDL_UINT32 cap, + TDDL_UINT32 sub, + TDDL_BYTE* buffer, + TDDL_UINT32* size); + +#endif // __TPMDDL_H__ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/tcs/transmit.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/tcs/transmit.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,131 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== + +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <fcntl.h> + +#include "tcg.h" +#include "buffer.h" +#include "log.h" +#include "tpmddl.h" + +// flag to track whether TDDL has been opened +static int g_TDDL_open = 0; +static int g_fd = -1; // the fd to the TPM + +TPM_RESULT +TDDL_TransmitData( TDDL_BYTE* in, + TDDL_UINT32 insize, + TDDL_BYTE* out, + TDDL_UINT32* outsize) { + TPM_RESULT status = TPM_SUCCESS; + TDDL_UINT32 i; + + vtpmloginfo(VTPM_LOG_TXDATA, "Sending buffer = 0x"); + for(i = 0 ; i < insize ; i++) + vtpmloginfomore(VTPM_LOG_TXDATA, "%2.2x ", in[i]); + + vtpmloginfomore(VTPM_LOG_TXDATA, "\n"); + + ssize_t size = 0; + int fd = g_fd; + + // send the request + size = write (fd, in, insize); + if (size < 0) { + vtpmlogerror(VTPM_LOG_TXDATA, "write() failed"); + ERRORDIE (TPM_IOERROR); + } + else if ((TDDL_UINT32) size < insize) { + vtpmlogerror(VTPM_LOG_TXDATA, "Wrote %d instead of %d bytes!\n", size, insize); + // ... ? + } + + // read the response + size = read (fd, out, TCPA_MAX_BUFFER_LENGTH); + if (size < 0) { + vtpmlogerror(VTPM_LOG_TXDATA, "read() failed"); + ERRORDIE (TPM_IOERROR); + } + + vtpmloginfo(VTPM_LOG_TXDATA, "Receiving buffer = 0x"); + for(i = 0 ; i < size ; i++) + vtpmloginfomore(VTPM_LOG_TXDATA, "%2.2x ", out[i]); + + vtpmloginfomore(VTPM_LOG_TXDATA, "\n"); + + *outsize = size; + // close connection + goto egress; + + abort_egress: + egress: + return status; +} + +TPM_RESULT TDDL_Open() { + + TDDL_RESULT status = TDDL_SUCCESS; + int fd = -1; + + if (g_TDDL_open) + return TPM_FAIL; + + fd = open ("/dev/tpm0", O_RDWR); + if (fd < 0) { + vtpmlogerror(VTPM_LOG_TXDATA, "TPM open failed"); + return TPM_IOERROR; + } + + g_fd = fd; + g_TDDL_open = 1; + + return status; +} + +void TDDL_Close() { + if (! g_TDDL_open) + return; + + if (g_fd>= 0) { + if (close(g_fd) < 0) + vtpmlogerror(VTPM_LOG_TXDATA, "closeing tpm failed"); + + g_fd = -1; + } + + g_TDDL_open = 0; + +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/Makefile --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/Makefile Fri Sep 9 16:30:54 2005 @@ -0,0 +1,18 @@ +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/vtpm_manager/Rules.mk + +BIN = libTCGUtils.a + +all: build + +build: $(BIN) + +install: build + +clean: + rm -f *.a *.so *.o *.rpm $(DEP_FILES) + +mrproper: clean + +$(BIN): $(OBJS) + $(AR) rcs $(BIN) $(OBJS) diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/bsg.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/bsg.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,830 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// bsg.cpp +// +// This file will handle all the TPM Byte Stream functions +// +// ================================================================== + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <malloc.h> +#include "tcg.h" +#include "crypto.h" +#include "bsg.h" +#include "log.h" + +static int g_log_recursion_level = 0; + +// a largest buffer size. if we get a buf size bigger than this when unpacking, +// will complain! +#define BSG_MAX_BUF_SIZE (1<<18) + +#define bsglog(fmt, ...) do { \ + int __i; \ + for (__i=0; __i < g_log_recursion_level; __i++) { \ + vtpmloginfomore (VTPM_LOG_BSG, "%s", " "); \ + } \ + vtpmloginfomore (VTPM_LOG_BSG, fmt, __VA_ARGS__); \ + } while (0) + + +// FIXME: trigger the selfcheck--need to use glibc hook to do this +//BOOL dummy1 = BSG_static_selfcheck(); + + +// Interpretting Types +// ------------------- +// +// Incoming Types are composed of two parts {format, info} squished into a +// BSG_UINT32. The first 4 bits is a format spec indicating what type of +// data it is. If the first 4 bits are zero the info corresponds to a value in +// BSG_s_fmt[]. This is a structure whose composition is described in +// BSG_s_fmt[]. If the value is non-zero, info corresponds to the size of the +// data (in bytes) being passed in. For example a UINT32 being passed in would +// have a format of (__FMT_CONST | 4). If both, the format and info are zero, +// this is interpretted as the end of the structure, and the result is returned. + +// these flags are mutually exclusive, so I'll just make them +// format values which indicate the semantics of the 'info' part and the source +// data. The above description has been accordingly adjusted. + +// format values for determining what type of data the incoming type is +// it's a 4 bit value, occupying the high 4 bits +#define __FMT_CONST (1UL << 28) // Constant sized value +#define __FMT_DATA (2UL << 28) // Believed to be raw data NOT {size,data} +#define __FMT_SIZE (3UL << 28) // A size. Used in FMT_SIZE??_DATA. +#define __FMT_HSIZE (4UL << 28) // A number of handles +#define __FMT_PACKED (5UL << 28) // 'info' is unused; the source data consists + // of {size32, data} but we're to pack only the + // data as that is already packed, and so + // can/must be unpacked without + // explicitly reading it size + +#define __FMT_MASK 0x0FFFFFFFUL // this masks out the 4-bit format +#define __FMT_MASK_SIZE(type) ((type) & __FMT_MASK) +#define __FMT_MASK_FORMAT(type) ((type) & (~__FMT_MASK)) + +// constant (8/16/32-bits) +#define FMT_U8 (__FMT_CONST | 1UL) +#define FMT_U16 (__FMT_CONST | 2UL) +#define FMT_U32 (__FMT_CONST | 4UL) + +// const with a compiler-computed size +#define FMT_SIZEOF(type) (__FMT_CONST | sizeof(type)) + +// other data (size bytes) +// Used primarily for DIGESTS -> FMT_DATA(20) +#define FMT_DATA(size) (__FMT_DATA | ((BSG_UINT32) (size) & __FMT_MASK)) + +// 16/32-bit size followed by N bytes of data +#define FMT_SIZE16_DATA (__FMT_SIZE | 2UL) +#define FMT_SIZE32_DATA (__FMT_SIZE | 4UL) + +// 16-bit size followed by N key handles +#define FMT_SIZE16_HANDLES (__FMT_HSIZE | 2UL) + +#define DIGEST_SIZE 20 +typedef BSG_UINT32 BSG_HANDLE; + +// TCPA_AUTH has 11 fields! +#define MAX_FIELDS 11 +typedef struct BSG_Format +{ + BSG_Type type; + const char* name; + BSG_UINT32 fields[MAX_FIELDS + 1]; +} BSG_Format; + +/* + * TCPA structure data formats + */ +// this has to be manually kept in sync with the +// Type enum!! the static_selfcheck() function should be used regularly! +static BSG_Format s_fmt[] = +{ + {BSG_TYPE_UINT32, "BSG_TYPE_UINT32", {FMT_U32, 0}}, + {BSG_TYPE_UINT16, "BSG_TYPE_UINT16", {FMT_U16, 0}}, + {BSG_TYPE_BYTE, "BSG_TYPE_BYTE", {FMT_U8, 0}}, + {BSG_TYPE_BOOL, "BSG_TYPE_BOOL", {FMT_U8, 0}}, + {BSG_TPM_SIZE32_DATA, "BSG_TPM_SIZE32_DATA", {FMT_SIZE32_DATA, 0}}, + {BSG_TPM_TAG, "BSG_TPM_TAG", {FMT_SIZEOF(TPM_TAG), 0}}, + {BSG_TPM_HANDLE, "BSG_TPM_HANDLE", {FMT_SIZEOF(TPM_HANDLE), 0}}, + {BSG_TPM_RESULT, "BSG_TPM_RESULT", {FMT_SIZEOF(TPM_RESULT), 0}}, + {BSG_TPM_RESOURCE_TYPE, "BSG_TPM_RESOURCE_TYPE", {FMT_SIZEOF(TPM_RESOURCE_TYPE), 0}}, + {BSG_TPM_COMMAND_CODE, "BSG_TPM_COMMAND_CODE", {FMT_U32, 0}}, + {BSG_TPM_AUTH_DATA_USAGE, "BSG_TPM_AUTH_DATA_USAGE", {FMT_U8, 0}}, + {BSG_TPM_ALGORITHM_ID, "BSG_TPM_ALGORITHM_ID", {FMT_U32, 0}}, + {BSG_TPM_PROTOCOL_ID, "BSG_TPM_PROTOCOL_ID", {FMT_SIZEOF(TPM_PROTOCOL_ID), 0}}, + {BSG_TPM_KEY_USAGE, "BSG_TPM_KEY_USAGE", {FMT_U16, 0}}, + {BSG_TPM_ENC_SCHEME, "BSG_TPM_ENC_SCHEME", {FMT_U16, 0}}, + {BSG_TPM_SIG_SCHEME, "BSG_TPM_SIG_SCHEME", {FMT_U16, 0}}, + {BSG_TPM_MIGRATE_SCHEME, "BSG_TPM_MIGRATE_SCHEME", {FMT_U16, 0}}, + {BSG_TPM_KEY_FLAGS, "BSG_TPM_KEY_FLAGS", {FMT_U32, 0}}, + + {BSG_TPM_AUTHDATA, "BSG_TPM_AUTHDATA", {FMT_DATA(DIGEST_SIZE), 0}}, + {BSG_TPM_SECRET, "BSG_TPM_SECRET", {BSG_TPM_AUTHDATA, 0}}, + {BSG_TPM_ENCAUTH, "BSG_TPM_ENCAUTH", {BSG_TPM_AUTHDATA, 0}}, + {BSG_TPM_PAYLOAD_TYPE, "BSG_TPM_PAYLOAD_TYPE", {FMT_SIZEOF(TPM_PAYLOAD_TYPE), 0}}, + + {BSG_TPM_VERSION, "BSG_TPM_VERSION", {FMT_DATA(4), 0}}, // vers 1.2 + {BSG_TPM_DIGEST, "BSG_TPM_DIGEST", {FMT_DATA(DIGEST_SIZE), 0}}, + {BSG_TPM_COMPOSITE_HASH, "BSG_TPM_COMPOSITE_HASH", {BSG_TPM_DIGEST, 0}}, + {BSG_TPM_CHOSENID_HASH, "BSG_TPM_CHOSENID_HASH", {BSG_TPM_DIGEST, 0}}, + + {BSG_TPM_NONCE, "BSG_TPM_NONCE", {FMT_DATA(DIGEST_SIZE), 0}}, + {BSG_TPM_KEY_HANDLE, "BSG_TPM_KEY_HANDLE", {FMT_SIZEOF(TPM_KEY_HANDLE), 0}}, + {BSG_TPM_KEY_HANDLE_LIST, "BSG_TPM_KEY_HANDLE_LIST", + {FMT_SIZE16_HANDLES, 0}}, + + {BSG_TPM_KEY_PARMS, "BSG_TPM_KEY_PARMS", { + BSG_TPM_ALGORITHM_ID, + BSG_TPM_ENC_SCHEME, + BSG_TPM_SIG_SCHEME, + FMT_SIZE32_DATA, + 0}}, + {BSG_TPM_RSA_KEY_PARMS, "BSG_TPM_RSA_KEY_PARMS", { + FMT_U32, FMT_U32, FMT_SIZE32_DATA, 0}}, + {BSG_TPM_STORE_PUBKEY, "BSG_TPM_STORE_PUBKEY", {FMT_SIZE32_DATA, 0}}, + {BSG_TPM_PUBKEY, "BSG_TPM_PUBKEY", {BSG_TPM_KEY_PARMS, BSG_TPM_STORE_PUBKEY, 0}}, + {BSG_TPM_KEY, "BSG_TPM_KEY", { + BSG_TPM_VERSION, + BSG_TPM_KEY_USAGE, + BSG_TPM_KEY_FLAGS, + BSG_TPM_AUTH_DATA_USAGE, + BSG_TPM_KEY_PARMS, + FMT_SIZE32_DATA, // the PCR_INFO + BSG_TPM_STORE_PUBKEY, + FMT_SIZE32_DATA, // the encrypted part + 0}}, + + {BSG_TPM_MIGRATIONKEYAUTH, "BSG_TPM_MIGRATIONKEYAUTH", { + BSG_TPM_PUBKEY, + BSG_TPM_MIGRATE_SCHEME, + BSG_TPM_DIGEST, 0}}, + + {BSG_TCPA_AUDIT_EVENT, "TCPA_AUDIT_EVENT", { + BSG_TPM_COMMAND_CODE, + BSG_TPM_RESULT, 0 }}, + + {BSG_TCPA_EVENT_CERT, "TCPA_EVENT_CERT", { + BSG_TPM_DIGEST, + BSG_TPM_DIGEST, + FMT_DATA(2), + FMT_SIZE32_DATA, 0}}, + + {BSG_TPM_PCR_SELECTION, "BSG_TPM_PCR_SELECTION", {FMT_SIZE16_DATA, 0} }, + {BSG_TPM_PCR_COMPOSITE, "BSG_TPM_PCR_COMPOSITE", { BSG_TPM_PCR_SELECTION, + FMT_SIZE32_DATA, + 0} }, + + {BSG_TPM_PCR_INFO, "BSG_TPM_PCR_INFO", { BSG_TPM_PCR_SELECTION, + BSG_TPM_COMPOSITE_HASH, + BSG_TPM_COMPOSITE_HASH, + 0} }, + + + {BSG_TPM_STORED_DATA, "BSG_TPM_STORED_DATA", { + BSG_TPM_VERSION, + FMT_SIZE32_DATA, + FMT_SIZE32_DATA, + 0}}, + {BSG_TPM_SYMMETRIC_KEY, "BSG_TPM_SYMMETRIC_KEY", { + BSG_TPM_ALGORITHM_ID, + BSG_TPM_ENC_SCHEME, + FMT_SIZE16_DATA, + 0}}, + {BSG_TPM_STORE_PRIVKEY, "BSG_TPM_STORE_PRIVKEY", {FMT_SIZE32_DATA, 0}}, + {BSG_TPM_STORE_ASYMKEY, "BSG_TPM_STORE_ASYMKEY", { + BSG_TPM_PAYLOAD_TYPE, + BSG_TPM_SECRET, + BSG_TPM_SECRET, + BSG_TPM_DIGEST, + BSG_TPM_STORE_PRIVKEY, + 0}}, + {BSG_TPM_MIGRATE_ASYMKEY, "BSG_TPM_MIGRATE_ASYMKEY", { + BSG_TPM_PAYLOAD_TYPE, + BSG_TPM_SECRET, + BSG_TPM_DIGEST, + FMT_U32, + BSG_TPM_STORE_PRIVKEY, + 0}}, + + {BSG_TPM_QUOTE_INFO, "BSG_TPM_QUOTE_INFO", { + BSG_TPM_VERSION, + FMT_DATA(4), + BSG_TPM_COMPOSITE_HASH, + BSG_TPM_NONCE, + 0}}, + + {BSG_TPM_IDENTITY_CONTENTS, "BSG_TPM_IDENTITY_CONTENTS", { + BSG_TPM_VERSION, + FMT_U32, + BSG_TPM_CHOSENID_HASH, + BSG_TPM_PUBKEY, + 0}}, + + {BSG_TPM_PCRVALUE, "BSG_TPM_PCRVALUE", {FMT_DATA(DIGEST_SIZE), 0}}, + + {BSG_TCPA_PCR_FLAGS, "TCPA_PCR_FLAGS", { + FMT_U8, + FMT_U8, + 0}}, + + {BSG_TCS_AUTH, "TCS_AUTH", { + BSG_TYPE_UINT32, + BSG_TPM_NONCE, + BSG_TPM_NONCE, + BSG_TYPE_BOOL, + BSG_TPM_AUTHDATA, + 0}}, + + {BSG_TPM_KEY_NONSENSITIVE, "BSG_TPM_KEY_NONSENSITIVE", { + BSG_TPM_VERSION, + BSG_TPM_KEY_USAGE, + BSG_TPM_KEY_FLAGS, + BSG_TPM_AUTH_DATA_USAGE, + BSG_TPM_KEY_PARMS, + FMT_SIZE32_DATA, + BSG_TPM_STORE_PUBKEY, + 0}}, + + {BSG_PACKED, "BSG_PACKED", { + __FMT_PACKED, + 0 }}, + + {BSG_TYPE_MAX, "", {0}}, +}; + + +static const BSG_Format* find_format (BSG_Type t) { + BSG_Format * f = s_fmt; + + if (t >= BSG_TYPE_MAX) { + return NULL; + } + + // WARNING: this depends on the enum and s_fmt[] array being in sync! make + // sure to run the static_selfcheck() to make sure + f = s_fmt + (t - BSG_TYPE_FIRST); + + return f; +} + +// +// a consistency-checking routine which can be run at compile time +// (ie. immediately after compilation) +// +// tasks: +// - verify that s_fmt has one entry per Type t, and that entry is at s_fmt[t] +// +// conditions: +// - need that s_fmt[0] is the first type listed in the Type enum! ie the first +// Type has value 0, not 1 +// +// FIXME: should have a function be passed in here which is called if the test +// fails. Then the caller can decide what to do: abort, notify, whatever +// +BOOL BSG_static_selfcheck () +{ + int i; + + for (i=BSG_TYPE_FIRST; i <= BSG_TYPE_MAX; i++) { + if (s_fmt[i - BSG_TYPE_FIRST].type != i) { + bsglog ("%s\n", "BSG: static_selfcheck failed!\n"); + bsglog ("failure at %i, allegedly %s\n", + i, s_fmt[i - BSG_TYPE_FIRST].name); + abort(); + return FALSE; + } + } + + bsglog ("%s\n", "BSG: static_selfcheck success!"); + return TRUE; +} + + +/** + * Flatten a TCPA structure into a buffer in big-endian format + * @type: TCPA structure type + * @src: (IN) TCPA structure (OUT) end of TCPA structure + * @dst: (OUT) flattened data + * Returns: Flattened size or -1 for unknown types + */ +// make it so that it can just run through the whole process and return +// the packed size, without packing anything. this will be done if dst is NULL. +static int BSG_Pack_private(BSG_Type type, const BSG_BYTE** src, BSG_BYTE* dst) +{ + // check incoming parameters + if (*src == NULL) + return 0; + + const BSG_BYTE* s = *src; + BSG_BYTE* d = dst; + + BSG_UINT32 size = __FMT_MASK_SIZE(type); + BSG_UINT32 format = __FMT_MASK_FORMAT(type); + + if (format == __FMT_CONST) // We are dealing with a fixed length value eg. UINT32 + { + BSG_UINT32 val = 0; + switch (size) { + case 1: val = * (BYTE*) s; break; + case 2: val = * (unsigned short*) s; break; + case 4: val = * (BSG_UINT32*) s; break; + } + if (dst) + BSG_PackConst(val, size, d); + + s += size; + d += size; + } else if (format == __FMT_DATA) { // We are dealing with raw data. Not sure when + // this is used. + + if (dst) { + bsglog ("BSG: __FMT_DATA size %d, src %p, dst %p\n", size, s, d); + memcpy(d, s, size); + } + + s += size; + d += size; + } else if (format == __FMT_SIZE || format == __FMT_HSIZE) { // It's a size, followed by that much data or handles + + BSG_UINT32 psize = 0; + switch (size) { + case 1: psize = * (BYTE*) s; break; + case 2: psize = * (unsigned short*) s; break; + case 4: psize = * (BSG_UINT32*) s; break; + } + + if (dst) + BSG_PackConst(psize, size, d); + + s += size; + d += size; + + // now 's' points to an address, so cast it to BSG_BYTE** + const BSG_BYTE* pdata = * ((BSG_BYTE**) s); + s += sizeof(BSG_BYTE*); + + if (format == __FMT_HSIZE) {// This is a list of psize Handles + if (dst) { + BSG_HANDLE* d2 = (BSG_HANDLE*) d; + BSG_HANDLE* p2 = (BSG_HANDLE*) pdata; + BSG_UINT32 i; + for (i = 0; i < psize; i++) + d2[i] = BSG_UnpackConst((BSG_BYTE*)(p2 + i), 4); + + } + d += psize * sizeof(BSG_HANDLE); + } else {// If it's not psize handles, it's psize data. + if (psize > 0) { + if (dst) { + bsglog ("BSG: __FMT_SIZE, size=%d, src=%p, dst=%p\n", + psize, pdata, d); + memcpy(d, pdata, psize); + } + } + d += psize; + } + } else if (format == __FMT_PACKED) { + // the source buffer is a pack_constbuf_t, which has a size and a + // pointer. just copy the buffer value, the size is not included in the + // output stream. + pack_constbuf_t * buf = (pack_constbuf_t*) s; + + if (dst) { + bsglog ("BSG: __FMT_PACKED, size=%d, src=%p, dst=%p\n", + buf->size, buf->data, d); + memcpy(d, buf->data, buf->size); + } + + s += buf->size; + d += buf->size; + } else if (format == 0) {// No flags are set. This is a structure & it should + // be looked up in the bsg_s_fmt[] + + const BSG_Format* x = find_format (type); + if (x == NULL) { + vtpmloginfo(VTPM_LOG_BSG, "BSG_Pack: cannot find type %d\n", type); + return -1; + } + + if (dst) + bsglog ("BSG_Pack type %s\n", x->name); + + + // iterate through the fields + const BSG_UINT32* f = x->fields; + for (; *f; f++) { + int fsize; + + g_log_recursion_level++; + fsize = BSG_Pack_private((BSG_Type) *f, &s, dst ? d : NULL); + g_log_recursion_level--; + + if (fsize <= 0) + return fsize; + + d += fsize; + } + } else { + vtpmlogerror(VTPM_LOG_BSG, "BSG_Pack(): Unknown format %d\n", format); + return -1; + } + + *src = s; + return (d - dst); +} + +/** + * Unflatten a TCPA structure from a buffer in big-endian format + * @type: TCPA structure type + * @src: flattened data + * @dst: (IN) TCPA structure (OUT) end of TCPA structure + * Returns: Flattened size + * Note: Returns flattened size NOT the unpacked structure size + */ +static int BSG_Unpack_private(BSG_Type type, const BSG_BYTE* src, BSG_BYTE** dst) { + // check incoming parameters + if (src == NULL) + return 0; + + + const BSG_BYTE* s = src; + BSG_BYTE* d = dst ? *dst:NULL; + if (dst && !d) + dst = NULL; + + BSG_UINT32 size = __FMT_MASK_SIZE(type); + BSG_UINT32 format = __FMT_MASK_FORMAT(type); + + if (format == __FMT_CONST) {// We are dealing with a fixed length value ie. UINT32 + + BSG_UINT32 val = BSG_UnpackConst(s, size); + + if (dst) { + switch (size) { + case 1: *(BYTE *) d = (BSG_BYTE) val; break; + case 2: *(unsigned short*) d = (unsigned short) val; break; + case 4: *(BSG_UINT32*) d = (BSG_UINT32) val; break; + } + } + + s += size; + d += size; + } else if (format == __FMT_DATA) {// We are dealing with raw data. Not sure when this is used. + if (dst) + memcpy(d, s, size); + + d += size; + s += size; + } else if (format == __FMT_SIZE || format == __FMT_HSIZE) {// It's a size, followed by that much data or handles + + BSG_UINT32 psize = BSG_UnpackConst(s, size); + + if (psize > BSG_MAX_BUF_SIZE) { + vtpmlogerror(VTPM_LOG_BSG, "BSG_Unpack runs into var-sized data bigger than %u bytes!!\n", + BSG_MAX_BUF_SIZE); + return -1; + } + + if (dst) { + switch (size) { + case 1: *(BYTE *) d = (BSG_BYTE) psize; break; + case 2: *(unsigned short*) d = (unsigned short) psize; break; + case 4: *(BSG_UINT32*) d = (BSG_UINT32) psize; break; + } + } + + s += size; + d += size; + + BSG_BYTE* pdata = NULL; + + if (psize) { + if (format == __FMT_HSIZE) { // This is a list of psize Handles + if (dst) { + BSG_HANDLE* s2 = (BSG_HANDLE*) s; + pdata = (BSG_BYTE *)malloc(psize * sizeof(BSG_HANDLE)); + if (!pdata) + return -1; + + BSG_HANDLE* p2 = (BSG_HANDLE*) pdata; + BSG_UINT32 i; + for (i = 0; i < psize; i++) { + BSG_PackConst(s2[i], 4, (BSG_BYTE*)(p2 + i)); + } + } + s += psize * sizeof(BSG_HANDLE); + } else { // If it's not psize handles, it's psize data. + if (dst) { + pdata = (BSG_BYTE *)malloc(sizeof(BSG_BYTE) * psize); + if (!pdata) + return -1; + memcpy(pdata, s, psize); + } + s += psize; + } + } + if (dst) + *(void**) d = pdata; + + d += sizeof(void*); + } else if (format == __FMT_PACKED) { + + // this doesn't make sense for unpacking! + vtpmlogerror(VTPM_LOG_BSG, "BSG_Unpack() called with format __FMT_PACKED. " + "This does not make sense\n"); + + return -1; + } else if (format == 0) {// No flags are set. This is a structure & it should + // be looked up in the bsg_s_fmt[] + + const BSG_Format* x = find_format (type); + if (x == NULL) { + vtpmlogerror(VTPM_LOG_BSG, "BSG_Unpack: cannot find type %d\n", type); + return -1; + } + + const BSG_UINT32* f = x->fields; + for (; *f; f++) { + int fsize = BSG_Unpack_private((BSG_Type) *f, s, dst ? &d:NULL); + if (fsize <= 0) + return fsize; + s += fsize; + } + } + + if (dst) + *dst = d; + return (s - src); +} + +/** + * Free memory associated with unpacked TCPA structure + * @type: TCPA structure type + * @src: (IN) TCPA structure (OUT) end of TCPA structure + * Note: Destroy should be called on all structures created with Unpack + * to ensure that any allocated memory is freed + */ +static void BSG_Destroy_private(BSG_Type type, BSG_BYTE** src) { + BSG_BYTE* s = *src; + + BSG_UINT32 size = __FMT_MASK_SIZE(type); + BSG_UINT32 format = __FMT_MASK_FORMAT(type); + + if ((src == NULL) || (*src == NULL)) { + vtpmlogerror(VTPM_LOG_BSG, "BSG_Destroy() called with NULL src\n"); + return; + } + + if (format == __FMT_CONST || format == __FMT_DATA) + s += size; + else if (format == __FMT_SIZE || format == __FMT_HSIZE) { + s += size; + BSG_BYTE* ptr = *(BSG_BYTE**) s; + if (ptr) + free(ptr); + s += sizeof(void*); + } else if (format == __FMT_PACKED) { + + // this doesn't make sense for unpacking, hence also for Destroy() + vtpmlogerror(VTPM_LOG_BSG, "BSG_Destroy() called with format __FMT_PACKED. " + "This does not make sense\n"); + + return; + } else if (format == 0) { + const BSG_Format* x = find_format (type); + if (x == NULL) { + vtpmlogerror(VTPM_LOG_BSG, "BSG_Destroy: cannot find type %d\n", type); + return; + } + + const BSG_UINT32* f = x->fields; + for (; *f; f++) + BSG_Destroy_private((BSG_Type) *f, &s); + } + + *src = s; +} + +int BSG_Pack(BSG_Type type, const void* src, BSG_BYTE* dst) +{ + const BSG_BYTE* src2 = (const BSG_BYTE*) src; + return BSG_Pack_private(type, &src2, dst); +} + +int BSG_Unpack(BSG_Type type, const BSG_BYTE* src, void* dst) +{ + BSG_BYTE* dst2 = (BSG_BYTE*) dst; + return BSG_Unpack_private(type, src, dst ? &dst2:NULL); +} + +void BSG_Destroy(BSG_Type type, void* src) +{ + BSG_BYTE* src2 = (BSG_BYTE*) src; + BSG_Destroy_private(type, &src2); +} + +/** + * Pack a 8/16/32-bit constant into a buffer in big-endian format + * @val: constant value + * @size: constant size in bytes (1, 2, or 4) + * @dst: (OUT) buffer + */ +void BSG_PackConst(BSG_UINT32 val, int size, BSG_BYTE* dst) { + bsglog ("BSG: PackConst on %d of size %i into address %p\n", val, size, dst); + + switch (size) { + case 4: + dst[0] = (BSG_BYTE)((val >> 24) & 0xff); + dst[1] = (BSG_BYTE)((val >> 16) & 0xff); + dst[2] = (BSG_BYTE)((val >> 8) & 0xff); + dst[3] = (BSG_BYTE)(val & 0xff); + break; + case 2: + dst[0] = (BSG_BYTE)((val >> 8) & 0xff); + dst[1] = (BSG_BYTE)(val & 0xff); + break; + case 1: + dst[0] = (BSG_BYTE)(val & 0xff); + break; + } +} + +/** + * Unpack a 8/16/32-bit constant from a buffer in big-endian format + * @src: buffer + * @size: constant size in bytes (1, 2, or 4) + */ +BSG_UINT32 BSG_UnpackConst(const BSG_BYTE* src, int size) { + BSG_UINT32 val = 0; + + if (src == NULL) + return 0; + + switch (size) { + case 4: + val = (((BSG_UINT32) src[0]) << 24 + | ((BSG_UINT32) src[1]) << 16 + | ((BSG_UINT32) src[2]) << 8 + | (BSG_UINT32) src[3]); + break; + case 2: + val = (((BSG_UINT32) src[0]) << 8 | (BSG_UINT32) src[1]); + break; + case 1: + val = (BSG_UINT32) src[0]; + break; + } + return val; +} + +// Pack a list of parameters. Beware not to send values, but rather you must +// send a pointer to your values Instead. This includes UINT32's. +int BSG_PackList( BSG_BYTE* dst, int ParamCount, ... ) { + int ParamNumber; + BSG_Type format; + BSG_BYTE* val = NULL; + int size=0; + + va_list paramList; + va_start( paramList, ParamCount ); + + for( ParamNumber = 1; ParamNumber <= ParamCount; ParamNumber++) { + //Strangeness with int is because gcc wanted an int rather than a enum of ints. + format = (BSG_Type) va_arg( paramList, int ); + val = va_arg( paramList, BSG_BYTE* ); + size += BSG_Pack(format, val, dst == NULL ? NULL : dst + size); + } + + va_end (paramList); + + return size; +} + +// Unpack a list of parameters. +int BSG_UnpackList( const BSG_BYTE* src, int ParamCount, ... ) { + int ParamNumber = 0; + BSG_Type format; + BSG_BYTE* val = NULL; + int size = 0; + + va_list paramList; + va_start( paramList, ParamCount ); + + for( ParamNumber = 1; ParamNumber <= ParamCount; ParamNumber++) { + format = (BSG_Type) va_arg( paramList, int ); + val = va_arg( paramList, BSG_BYTE* ); + + size += BSG_Unpack(format, src + size, val); + } + + va_end( paramList ); + + return size; +} + +// Destroy any memory allocated by calls to unpack +void BSG_DestroyList(int ParamCount, ... ) { + int ParamNumber = 0; + BSG_Type argType; + BSG_BYTE* paramValue = NULL; + + va_list paramList; + va_start( paramList, ParamCount ); + + for( ParamNumber = 1; ParamNumber <= ParamCount; ParamNumber++) { + argType = (BSG_Type) va_arg( paramList, int ); + paramValue = va_arg( paramList, BSG_BYTE* ); + + BSG_Destroy(argType, paramValue); + } + + va_end( paramList ); + + return; +} + + +// and a tuple version +TPM_RESULT BSG_DestroyTuple (int numParams, pack_tuple_t params[]) { + int i; + + for (i = 0; i < numParams; i++) + BSG_Destroy (params[i].type, params[i].addr); + + return TPM_SUCCESS; +} + + +// +// wrappers of Pack and PackList which malloc the ouput buffer. to be freed +// by the caller later +// + +int BSG_PackMalloc (BSG_Type type, const void* src, BSG_BYTE** o_dst) { + int size = BSG_Pack (type, src, NULL); + BSG_BYTE * dest = (BSG_BYTE*) malloc (size); + if (dest == NULL) + return -1; + + size = BSG_Pack(type, src, dest); + *o_dst = dest; + return size; +} + + + +int BSG_PackListMalloc(BSG_BYTE** outBuffer, int ParamCount, ... ) { + va_list args; + int size; + + va_start (args, ParamCount); + size = BSG_PackList (NULL, ParamCount, args); + va_end (args); + + BSG_BYTE * dest = (BSG_BYTE*) malloc (size); + if (dest == NULL) + return -1; + + va_start (args, ParamCount); + size = BSG_PackList (dest, ParamCount, args); + va_end (args); + + *outBuffer = dest; + return size; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/bsg.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/bsg.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,166 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// bsg.h +// +// This file contains API's for the TPM Byte Stream functions +// +// ================================================================== + +#ifndef __BSG_H__ +#define __BSG_H__ + +#include <stdarg.h> +#include "buffer.h" + +typedef unsigned int BSG_UINT32; +typedef unsigned char BSG_BYTE; + +// forward decl +struct pack_const_tuple_t; + +struct pack_tuple_t; + + +/** + * Byte stream generator + */ +// this has to be manually kept in sync with the +// s_fmt array!! +// but now we have a self-check function which can make sure things are well +// (if used!) +typedef enum BSG_Type +{ + BSG_TYPE_FIRST = 1, + BSG_TYPE_UINT32 = 1, // start at 1 so that Type 0 only serves as an + // unused/special value + BSG_TYPE_UINT16, + BSG_TYPE_BYTE, + BSG_TYPE_BOOL, + BSG_TPM_SIZE32_DATA, // a 32 bit unsigned size, followed by + // a pointer to that much data. can pass a + // struct pack_buf_t as the param + BSG_TPM_TAG, + BSG_TPM_HANDLE, + BSG_TPM_RESULT, + BSG_TPM_RESOURCE_TYPE, + BSG_TPM_COMMAND_CODE, + BSG_TPM_AUTH_DATA_USAGE, + BSG_TPM_ALGORITHM_ID, + BSG_TPM_PROTOCOL_ID, + BSG_TPM_KEY_USAGE, + BSG_TPM_ENC_SCHEME, + BSG_TPM_SIG_SCHEME, + BSG_TPM_MIGRATE_SCHEME, + BSG_TPM_KEY_FLAGS, + BSG_TPM_AUTHDATA, + BSG_TPM_SECRET, + BSG_TPM_ENCAUTH, + BSG_TPM_PAYLOAD_TYPE, + + BSG_TPM_VERSION, + BSG_TPM_DIGEST, + BSG_TPM_COMPOSITE_HASH, + BSG_TPM_CHOSENID_HASH, + BSG_TPM_NONCE, + BSG_TPM_KEY_HANDLE, + BSG_TPM_KEY_HANDLE_LIST, + BSG_TPM_KEY_PARMS, + BSG_TPM_RSA_KEY_PARMS, + BSG_TPM_STORE_PUBKEY, + BSG_TPM_PUBKEY, + BSG_TPM_KEY, + + BSG_TPM_MIGRATIONKEYAUTH, + BSG_TCPA_AUDIT_EVENT, + BSG_TCPA_EVENT_CERT, + BSG_TPM_PCR_SELECTION, + BSG_TPM_PCR_COMPOSITE, + BSG_TPM_PCR_INFO, + BSG_TPM_STORED_DATA, + BSG_TPM_SYMMETRIC_KEY, + BSG_TPM_STORE_PRIVKEY, + BSG_TPM_STORE_ASYMKEY, + BSG_TPM_MIGRATE_ASYMKEY, + BSG_TPM_QUOTE_INFO, + BSG_TPM_IDENTITY_CONTENTS, + BSG_TPM_PCRVALUE, + BSG_TCPA_PCR_FLAGS, + BSG_TCS_AUTH, + + // this is the BSG_TPM_KEY struct without the encData field + BSG_TPM_KEY_NONSENSITIVE, + + BSG_PACKED, + + BSG_TYPE_MAX +} BSG_Type; + +struct pack_const_tuple_t { + BSG_Type type; + const void * addr; +}; + + +typedef struct pack_tuple_t { + BSG_Type type; + void * addr; +} pack_tuple_t; + +int BSG_Pack(BSG_Type type, const void* src, BSG_BYTE* dst); +int BSG_Unpack(BSG_Type type, const BSG_BYTE* src, void* dst); +void BSG_Destroy(BSG_Type type, void* src); + +// wrappers of Pack and PackList which malloc the ouput buffer. to be freed +// by the caller later. returns size of allocated buffer, or -1 in case +// allocation failed +int BSG_PackMalloc (BSG_Type type, const void* src, BSG_BYTE** o_dst); +int BSG_PackListMalloc (BSG_BYTE** outBuffer, int ParamCount, ... ); + +// a va_list version of PackList +int BSG_PackList(BSG_BYTE* outBuffer, int ParamCount, ... ); +int BSG_UnpackList(const BSG_BYTE* inBuffer, int ParamCount, ... ); +void BSG_DestroyList(int ParamCount, ... ); + +// wrapper of PackList which uses a buffer_t +TPM_RESULT BSG_PackListBuf (buffer_t * o_buf, int ParamCount, ...); + +// and a tuple version +TPM_RESULT BSG_DestroyTuple (int numParams, pack_tuple_t params[]); + +void BSG_PackConst(BSG_UINT32 val, int size, BSG_BYTE* dst); +BSG_UINT32 BSG_UnpackConst(const BSG_BYTE* src, int size); + +BOOL BSG_static_selfcheck (); + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/buffer.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/buffer.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,213 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== + + +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/param.h> + +#include "tcg.h" +#include "bsg.h" +#include "buffer.h" + +static TPM_RESULT buffer_priv_realloc (buffer_t * buf, tpm_size_t newsize); + +// +// buffer functions! +// + +TPM_RESULT buffer_init (buffer_t * buf, tpm_size_t initsize, const BYTE* initval) { + if (initsize == 0) { + memset(buf, 0, sizeof(*buf)); + return TPM_SUCCESS; + } + + + buf->bytes = (BYTE*) malloc (initsize); + if (buf->bytes == NULL) + return TPM_RESOURCES; + + buf->size = initsize; + buf->alloc_size = initsize; + + if (initval) + memcpy (buf->bytes, initval, initsize); + + buf->is_owner = TRUE; + + return TPM_SUCCESS; +} + +TPM_RESULT buffer_init_convert (buffer_t * buf, tpm_size_t initsize, BYTE* initval) { + + buf->size = initsize; + buf->alloc_size = initsize; + buf->bytes = initval; + + buf->is_owner = TRUE; + + return TPM_SUCCESS; +} + +TPM_RESULT buffer_init_copy (buffer_t * buf, const buffer_t * src) { + TPM_RESULT status = buffer_init (buf, src->size, src->bytes); + buf->is_owner = TRUE; + + return status; +} + + + +// make an alias to a constant array +TPM_RESULT buffer_init_const (buffer_t * buf, tpm_size_t size, const BYTE* val) { + // TODO: try to enforce the const things somehow! + buf->bytes = (BYTE*) val; + buf->size = size; + buf->alloc_size = 0; // this field is now unneeded + + buf->is_owner = FALSE; + + return TPM_SUCCESS; +} + +// make an alias into buf, with given offset and length +// if len = 0, make the alias go to the end of buf +TPM_RESULT buffer_init_alias (buffer_t * buf, const buffer_t * b, + tpm_size_t offset, tpm_size_t len) { + if (offset + len > b->size) { + return TPM_NOSPACE; + } + + buf->bytes = b->bytes + offset; + buf->size = len > 0 ? len : b->size - offset; + + //VS/ buf->alloc_size = 0; + if (len ==0) + buf->alloc_size = b->alloc_size - offset; + else + buf->alloc_size = MIN(b->alloc_size - offset, len); + + + buf->is_owner = FALSE; + + return TPM_SUCCESS; +} + + +// copy into the start of dest +TPM_RESULT buffer_copy (buffer_t * dest, const buffer_t* src) +{ + TPM_RESULT status = TPM_SUCCESS; + + if (dest->alloc_size < src->size) { + status = buffer_priv_realloc (dest, src->size); + STATUSCHECK (status); + } + + memcpy (dest->bytes, src->bytes, src->size); + dest->size = src->size; + + //VS/ dest->is_owner = TRUE; + + abort_egress: + + return status; +} + + + +BOOL buffer_eq (const buffer_t * a, const buffer_t * b) { + return (a->size == b->size && memcmp (a->bytes, b->bytes, a->size) == 0); +} + + +void buffer_memset (buffer_t * buf, BYTE b) { + memset (buf->bytes, b, buf->size); +} + + +TPM_RESULT buffer_append_raw (buffer_t * buf, tpm_size_t len, const BYTE* bytes) { + TPM_RESULT status = TPM_SUCCESS; + + if (buf->alloc_size < buf->size + len) { + status = buffer_priv_realloc (buf, buf->size + len); + STATUSCHECK (status); + } + + memcpy (buf->bytes + buf->size, bytes, len); + + buf->size += len; + + goto egress; + + abort_egress: + + egress: + + return status; +} + +tpm_size_t buffer_len (const buffer_t* buf) { + return buf->size; +} + +TPM_RESULT buffer_free (buffer_t * buf) { + if (buf && buf->is_owner && buf->bytes != NULL) { + free (buf->bytes); + buf->bytes = NULL; + } + + return TPM_SUCCESS; +} + +TPM_RESULT buffer_priv_realloc (buffer_t * buf, tpm_size_t newsize) { + + // we want to realloc to twice the size, or the new size, whichever + // bigger + + BYTE * tmpbuf = NULL; + + newsize = MAX (buf->alloc_size * 2, newsize); + + tmpbuf = (BYTE*) realloc (buf->bytes, newsize); + if (tmpbuf == NULL) + return TPM_SIZE; + + + buf->bytes = tmpbuf; + buf->alloc_size = newsize; + + return TPM_SUCCESS; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/buffer.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/buffer.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,103 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== + +#ifndef __VTPM_BUFFER_H__ +#define __VTPM_BUFFER_H__ + +#include <stddef.h> // for pointer NULL +#include "tcg.h" + +// structure to enable use of FMT_SIZE32_DATA in BSG_Unpack +typedef struct pack_buf_t { + UINT32 size; + BYTE * data; +} pack_buf_t; + +// and a const version for Pack +typedef struct pack_constbuf_t { + UINT32 size; + const BYTE* data; +} pack_constbuf_t; + +typedef UINT32 tpm_size_t; + +// first version, probably will be expanded... + +#define NULL_BUF {0,0,0,0} + +typedef struct { + // private!! + tpm_size_t size, alloc_size; + BYTE * bytes; + + BOOL is_owner; // do we own this buffer, and need to free it? +} buffer_t; + +// allocate the buffer if initsize > 0, copying over initval if provided +TPM_RESULT buffer_init (buffer_t * buf, + tpm_size_t initsize, + const BYTE* initval); + +// Create a new buffer from a BYTE *. Use buffer_free to destroy original BYTE * +TPM_RESULT buffer_init_convert (buffer_t * buf, + tpm_size_t initsize, + BYTE* initval); + +// make an alias to a constant array, no copying +TPM_RESULT buffer_init_const (buffer_t * buf, tpm_size_t size, const BYTE* val); + +// make an alias into buf, with given offset and length +// if len = 0, make the alias go to the end of buf +TPM_RESULT buffer_init_alias (buffer_t * buf, const buffer_t * b, + tpm_size_t offset, tpm_size_t); + +// "copy constructor" +TPM_RESULT buffer_init_copy (buffer_t * buf, const buffer_t * src); + + +// copy into the start of a +TPM_RESULT buffer_copy (buffer_t * dest, const buffer_t* src); + +// are they equal? +BOOL buffer_eq (const buffer_t * a, const buffer_t * b); + +// set the buffer to a constant byte +void buffer_memset (buffer_t * buf, BYTE b); + +tpm_size_t buffer_len (const buffer_t* buf); + +TPM_RESULT buffer_free (buffer_t * buf); + +TPM_RESULT buffer_append_raw (buffer_t * buf, tpm_size_t len, const BYTE* bytes); + +#endif // _TOOLS_H_ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/depend --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/depend Fri Sep 9 16:30:54 2005 @@ -0,0 +1,7 @@ +hashtable.o: hashtable.c hashtable.h hashtable_private.h +hashtable_itr.o: hashtable_itr.c hashtable.h hashtable_private.h \ + hashtable_itr.h +bsg.o: bsg.c tcg.h ../crypto/crypto.h ../crypto/sym_crypto.h buffer.h \ + bsg.h log.h +log.o: log.c buffer.h tcg.h +buffer.o: buffer.c tcg.h bsg.h buffer.h diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/hashtable.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/hashtable.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2005, Intel Corp + * Copyright (c) 2002, Christopher Clark <firstname.lastname@xxxxxxxxxxxx> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "hashtable.h" +#include "hashtable_private.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> + +/* +Credit for primes table: Aaron Krowne + http://br.endernet.org/~akrowne/ + http://planetmath.org/encyclopedia/GoodHashTablePrimes.html +*/ +static const unsigned int primes[] = { +53, 97, 193, 389, +769, 1543, 3079, 6151, +12289, 24593, 49157, 98317, +196613, 393241, 786433, 1572869, +3145739, 6291469, 12582917, 25165843, +50331653, 100663319, 201326611, 402653189, +805306457, 1610612741 +}; +const unsigned int prime_table_length = sizeof(primes)/sizeof(primes[0]); +const float max_load_factor = 0.65; + +/*****************************************************************************/ +struct hashtable * +create_hashtable(unsigned int minsize, + unsigned int (*hashf) (void*), + int (*eqf) (void*,void*)) +{ + struct hashtable *h; + unsigned int pindex, size = primes[0]; + /* Check requested hashtable isn't too large */ + if (minsize > (1u << 30)) return NULL; + /* Enforce size as prime */ + for (pindex=0; pindex < prime_table_length; pindex++) { + if (primes[pindex] > minsize) { size = primes[pindex]; break; } + } + h = (struct hashtable *)malloc(sizeof(struct hashtable)); + if (NULL == h) return NULL; /*oom*/ + h->table = (struct entry **)malloc(sizeof(struct entry*) * size); + if (NULL == h->table) { free(h); return NULL; } /*oom*/ + memset(h->table, 0, size * sizeof(struct entry *)); + h->tablelength = size; + h->primeindex = pindex; + h->entrycount = 0; + h->hashfn = hashf; + h->eqfn = eqf; + h->loadlimit = (unsigned int) ceil(size * max_load_factor); +#ifdef HASHTABLE_THREADED + pthread_mutex_init(&h->mutex, NULL); +#endif + return h; +} + +/*****************************************************************************/ +unsigned int +hash(struct hashtable *h, void *k) +{ + unsigned int i = h->hashfn(k); + i += ~(i << 9); + i ^= ((i >> 14) | (i << 18)); /* >>> */ + i += (i << 4); + i ^= ((i >> 10) | (i << 22)); /* >>> */ + return i; +} + +/*****************************************************************************/ +static int +hashtable_expand(struct hashtable *h) +{ + /* Double the size of the table to accomodate more entries */ + struct entry **newtable; + struct entry *e; + struct entry **pE; + unsigned int newsize, i, index; + /* Check we're not hitting max capacity */ + if (h->primeindex == (prime_table_length - 1)) return 0; + newsize = primes[++(h->primeindex)]; + + newtable = (struct entry **)malloc(sizeof(struct entry*) * newsize); + if (NULL != newtable) + { + memset(newtable, 0, newsize * sizeof(struct entry *)); + /* This algorithm is not 'stable'. ie. it reverses the list + * when it transfers entries between the tables */ + for (i = 0; i < h->tablelength; i++) { + while (NULL != (e = h->table[i])) { + h->table[i] = e->next; + index = indexFor(newsize,e->h); + e->next = newtable[index]; + newtable[index] = e; + } + } + free(h->table); + h->table = newtable; + } + /* Plan B: realloc instead */ + else + { + newtable = (struct entry **) + realloc(h->table, newsize * sizeof(struct entry *)); + if (NULL == newtable) { (h->primeindex)--; return 0; } + h->table = newtable; + memset(newtable[h->tablelength], 0, newsize - h->tablelength); + for (i = 0; i < h->tablelength; i++) { + for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) { + index = indexFor(newsize,e->h); + if (index == i) + { + pE = &(e->next); + } + else + { + *pE = e->next; + e->next = newtable[index]; + newtable[index] = e; + } + } + } + } + h->tablelength = newsize; + h->loadlimit = (unsigned int) ceil(newsize * max_load_factor); + return -1; +} + +/*****************************************************************************/ +unsigned int +hashtable_count(struct hashtable *h) +{ + unsigned int count; +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + count = h->entrycount; +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&h->mutex); +#endif + return count; +} + +/*****************************************************************************/ +int +hashtable_insert(struct hashtable *h, void *k, void *v) +{ + /* This method allows duplicate keys - but they shouldn't be used */ + unsigned int index; + struct entry *e; +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + if (++(h->entrycount) > h->loadlimit) + { + /* Ignore the return value. If expand fails, we should + * still try cramming just this value into the existing table + * -- we may not have memory for a larger table, but one more + * element may be ok. Next time we insert, we'll try expanding again.*/ + hashtable_expand(h); + } + e = (struct entry *)malloc(sizeof(struct entry)); + if (NULL == e) { --(h->entrycount); return 0; } /*oom*/ + e->h = hash(h,k); + index = indexFor(h->tablelength,e->h); + e->k = k; + e->v = v; + e->next = h->table[index]; + h->table[index] = e; +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&h->mutex); +#endif + return -1; +} + +/*****************************************************************************/ +void * /* returns value associated with key */ +hashtable_search(struct hashtable *h, void *k) +{ +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + struct entry *e; + unsigned int hashvalue, index; + hashvalue = hash(h,k); + index = indexFor(h->tablelength,hashvalue); + e = h->table[index]; + while (NULL != e) + { + /* Check hash value to short circuit heavier comparison */ + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) { +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&h->mutex); +#endif + return e->v; + } + e = e->next; + } +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&h->mutex); +#endif + return NULL; +} + +/*****************************************************************************/ +void * /* returns value associated with key */ +hashtable_remove(struct hashtable *h, void *k) +{ + /* TODO: consider compacting the table when the load factor drops enough, + * or provide a 'compact' method. */ +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + struct entry *e; + struct entry **pE; + void *v; + unsigned int hashvalue, index; + + hashvalue = hash(h,k); + index = indexFor(h->tablelength,hash(h,k)); + pE = &(h->table[index]); + e = *pE; + while (NULL != e) + { + /* Check hash value to short circuit heavier comparison */ + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) + { + *pE = e->next; + h->entrycount--; + v = e->v; + freekey(e->k); + free(e); + return v; + } + pE = &(e->next); + e = e->next; + } +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&h->mutex); +#endif + return NULL; +} + +/*****************************************************************************/ +/* destroy */ +void +hashtable_destroy(struct hashtable *h, int free_values) +{ +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + unsigned int i; + struct entry *e, *f; + struct entry **table = h->table; + if (free_values) + { + for (i = 0; i < h->tablelength; i++) + { + e = table[i]; + while (NULL != e) + { f = e; e = e->next; freekey(f->k); free(f->v); free(f); } + } + } + else + { + for (i = 0; i < h->tablelength; i++) + { + e = table[i]; + while (NULL != e) + { f = e; e = e->next; freekey(f->k); free(f); } + } + } + free(h->table); +#ifdef HASHTABLE_THREADED + pthread_mutex_destroy(&h->mutex); +#endif + free(h); +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/hashtable.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/hashtable.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2005, Intel Corp + * Copyright (c) 2002, Christopher Clark <firstname.lastname@xxxxxxxxxxxx> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef __HASHTABLE_CWC22_H__ +#define __HASHTABLE_CWC22_H__ + +struct hashtable; + +/* Example of use: + * + * struct hashtable *h; + * struct some_key *k; + * struct some_value *v; + * + * static unsigned int hash_from_key_fn( void *k ); + * static int keys_equal_fn ( void *key1, void *key2 ); + * + * h = create_hashtable(16, hash_from_key_fn, keys_equal_fn); + * k = (struct some_key *) malloc(sizeof(struct some_key)); + * v = (struct some_value *) malloc(sizeof(struct some_value)); + * + * (initialise k and v to suitable values) + * + * if (! hashtable_insert(h,k,v) ) + * { exit(-1); } + * + * if (NULL == (found = hashtable_search(h,k) )) + * { printf("not found!"); } + * + * if (NULL == (found = hashtable_remove(h,k) )) + * { printf("Not found\n"); } + * + */ + +/* Macros may be used to define type-safe(r) hashtable access functions, with + * methods specialized to take known key and value types as parameters. + * + * Example: + * + * Insert this at the start of your file: + * + * DEFINE_HASHTABLE_INSERT(insert_some, struct some_key, struct some_value); + * DEFINE_HASHTABLE_SEARCH(search_some, struct some_key, struct some_value); + * DEFINE_HASHTABLE_REMOVE(remove_some, struct some_key, struct some_value); + * + * This defines the functions 'insert_some', 'search_some' and 'remove_some'. + * These operate just like hashtable_insert etc., with the same parameters, + * but their function signatures have 'struct some_key *' rather than + * 'void *', and hence can generate compile time errors if your program is + * supplying incorrect data as a key (and similarly for value). + * + * Note that the hash and key equality functions passed to create_hashtable + * still take 'void *' parameters instead of 'some key *'. This shouldn't be + * a difficult issue as they're only defined and passed once, and the other + * functions will ensure that only valid keys are supplied to them. + * + * The cost for this checking is increased code size and runtime overhead + * - if performance is important, it may be worth switching back to the + * unsafe methods once your program has been debugged with the safe methods. + * This just requires switching to some simple alternative defines - eg: + * #define insert_some hashtable_insert + * + */ + +/***************************************************************************** + * create_hashtable + + * @name create_hashtable + * @param minsize minimum initial size of hashtable + * @param hashfunction function for hashing keys + * @param key_eq_fn function for determining key equality + * @return newly created hashtable or NULL on failure + */ + +struct hashtable * +create_hashtable(unsigned int minsize, + unsigned int (*hashfunction) (void*), + int (*key_eq_fn) (void*,void*)); + +/***************************************************************************** + * hashtable_insert + + * @name hashtable_insert + * @param h the hashtable to insert into + * @param k the key - hashtable claims ownership and will free on removal + * @param v the value - does not claim ownership + * @return non-zero for successful insertion + * + * This function will cause the table to expand if the insertion would take + * the ratio of entries to table size over the maximum load factor. + * + * This function does not check for repeated insertions with a duplicate key. + * The value returned when using a duplicate key is undefined -- when + * the hashtable changes size, the order of retrieval of duplicate key + * entries is reversed. + * If in doubt, remove before insert. + */ + +int +hashtable_insert(struct hashtable *h, void *k, void *v); + +#define DEFINE_HASHTABLE_INSERT(fnname, keytype, valuetype) \ +int fnname (struct hashtable *h, keytype *k, valuetype *v) \ +{ \ + return hashtable_insert(h,k,v); \ +} + +/***************************************************************************** + * hashtable_search + + * @name hashtable_search + * @param h the hashtable to search + * @param k the key to search for - does not claim ownership + * @return the value associated with the key, or NULL if none found + */ + +void * +hashtable_search(struct hashtable *h, void *k); + +#define DEFINE_HASHTABLE_SEARCH(fnname, keytype, valuetype) \ +valuetype * fnname (struct hashtable *h, keytype *k) \ +{ \ + return (valuetype *) (hashtable_search(h,k)); \ +} + +/***************************************************************************** + * hashtable_remove + + * @name hashtable_remove + * @param h the hashtable to remove the item from + * @param k the key to search for - does not claim ownership + * @return the value associated with the key, or NULL if none found + */ + +void * /* returns value */ +hashtable_remove(struct hashtable *h, void *k); + +#define DEFINE_HASHTABLE_REMOVE(fnname, keytype, valuetype) \ +valuetype * fnname (struct hashtable *h, keytype *k) \ +{ \ + return (valuetype *) (hashtable_remove(h,k)); \ +} + + +/***************************************************************************** + * hashtable_count + + * @name hashtable_count + * @param h the hashtable + * @return the number of items stored in the hashtable + */ +unsigned int +hashtable_count(struct hashtable *h); + + +/***************************************************************************** + * hashtable_destroy + + * @name hashtable_destroy + * @param h the hashtable + * @param free_values whether to call 'free' on the remaining values + */ + +void +hashtable_destroy(struct hashtable *h, int free_values); + +#endif /* __HASHTABLE_CWC22_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/hashtable_itr.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/hashtable_itr.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2005, Intel Corp + * Copyright (c) 2002, Christopher Clark <firstname.lastname@xxxxxxxxxxxx> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "hashtable.h" +#include "hashtable_private.h" +#include "hashtable_itr.h" +#include <stdlib.h> /* defines NULL */ + +/*****************************************************************************/ +/* hashtable_iterator - iterator constructor */ + +struct hashtable_itr * +hashtable_iterator(struct hashtable *h) +{ + unsigned int i, tablelength; + struct hashtable_itr *itr = (struct hashtable_itr *) + malloc(sizeof(struct hashtable_itr)); + if (NULL == itr) return NULL; +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + itr->h = h; + itr->e = NULL; + itr->parent = NULL; + tablelength = h->tablelength; + itr->index = tablelength; + if (0 == h->entrycount) { +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&h->mutex); +#endif + return itr; + } + + for (i = 0; i < tablelength; i++) + { + if (NULL != h->table[i]) + { + itr->e = h->table[i]; + itr->index = i; + break; + } + } +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&h->mutex); +#endif + return itr; +} + +/*****************************************************************************/ +/* key - return the key of the (key,value) pair at the current position */ +/* value - return the value of the (key,value) pair at the current position */ + +void * +hashtable_iterator_key(struct hashtable_itr *i) +{ return i->e->k; } + +void * +hashtable_iterator_value(struct hashtable_itr *i) +{ return i->e->v; } + +/*****************************************************************************/ +/* advance - advance the iterator to the next element + * returns zero if advanced to end of table */ + +int +hashtable_iterator_advance(struct hashtable_itr *itr) +{ +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&itr->h->mutex); +#endif + unsigned int j,tablelength; + struct entry **table; + struct entry *next; + int ret; + if (NULL == itr->e) { /* stupidity check */ + ret = 0; + goto egress; + } + + next = itr->e->next; + if (NULL != next) + { + itr->parent = itr->e; + itr->e = next; + ret = -1; + goto egress; + } + + tablelength = itr->h->tablelength; + itr->parent = NULL; + if (tablelength <= (j = ++(itr->index))) + { + itr->e = NULL; + ret = 0; + goto egress; + } + table = itr->h->table; + while (NULL == (next = table[j])) + { + if (++j >= tablelength) + { + itr->index = tablelength; + itr->e = NULL; + ret = 0; + goto egress; + } + } + itr->index = j; + itr->e = next; + ret = -1; + + egress: +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&itr->h->mutex); +#endif + return ret; +} + +/*****************************************************************************/ +/* remove - remove the entry at the current iterator position + * and advance the iterator, if there is a successive + * element. + * If you want the value, read it before you remove: + * beware memory leaks if you don't. + * Returns zero if end of iteration. */ + +int +hashtable_iterator_remove(struct hashtable_itr *itr) +{ +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&itr->h->mutex); +#endif + struct entry *remember_e, *remember_parent; + int ret; + + /* Do the removal */ + if (NULL == (itr->parent)) + { + /* element is head of a chain */ + itr->h->table[itr->index] = itr->e->next; + } else { + /* element is mid-chain */ + itr->parent->next = itr->e->next; + } + /* itr->e is now outside the hashtable */ + remember_e = itr->e; + itr->h->entrycount--; + freekey(remember_e->k); + + /* Advance the iterator, correcting the parent */ + remember_parent = itr->parent; + ret = hashtable_iterator_advance(itr); + if (itr->parent == remember_e) { itr->parent = remember_parent; } + free(remember_e); +#ifdef HASHTABLE_THREADED + pthread_mutex_unlock(&itr->h->mutex); +#endif + return ret; +} + +/*****************************************************************************/ +int /* returns zero if not found */ +hashtable_iterator_search(struct hashtable_itr *itr, + struct hashtable *h, void *k) +{ +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + struct entry *e, *parent; + unsigned int hashvalue, index; + int ret; + + hashvalue = hash(h,k); + index = indexFor(h->tablelength,hashvalue); + + e = h->table[index]; + parent = NULL; + while (NULL != e) + { + /* Check hash value to short circuit heavier comparison */ + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) + { + itr->index = index; + itr->e = e; + itr->parent = parent; + itr->h = h; + ret= -1; + goto egress; + } + parent = e; + e = e->next; + } + ret = 0; + +egress: +#ifdef HASHTABLE_THREADED + pthread_mutex_lock(&h->mutex); +#endif + return ret; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/hashtable_itr.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/hashtable_itr.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2005, Intel Corp + * Copyright (c) 2002, Christopher Clark <firstname.lastname@xxxxxxxxxxxx> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef __HASHTABLE_ITR_CWC22__ +#define __HASHTABLE_ITR_CWC22__ +#include "hashtable.h" +#include "hashtable_private.h" /* needed to enable inlining */ + +/*****************************************************************************/ +/* This struct is only concrete here to allow the inlining of two of the + * accessor functions. */ +struct hashtable_itr +{ + struct hashtable *h; + struct entry *e; + struct entry *parent; + unsigned int index; +}; + + +/*****************************************************************************/ +/* hashtable_iterator + */ + +struct hashtable_itr * +hashtable_iterator(struct hashtable *h); + +/*****************************************************************************/ +/* hashtable_iterator_key + * - return the value of the (key,value) pair at the current position */ + +void *hashtable_iterator_key(struct hashtable_itr *i); + +/*****************************************************************************/ +/* value - return the value of the (key,value) pair at the current position */ + +void *hashtable_iterator_value(struct hashtable_itr *i); + +/*****************************************************************************/ +/* advance - advance the iterator to the next element + * returns zero if advanced to end of table */ + +int +hashtable_iterator_advance(struct hashtable_itr *itr); + +/*****************************************************************************/ +/* remove - remove current element and advance the iterator to the next element + * NB: if you need the value to free it, read it before + * removing. ie: beware memory leaks! + * returns zero if advanced to end of table */ + +int +hashtable_iterator_remove(struct hashtable_itr *itr); + +/*****************************************************************************/ +/* search - overwrite the supplied iterator, to point to the entry + * matching the supplied key. + h points to the hashtable to be searched. + * returns zero if not found. */ +int +hashtable_iterator_search(struct hashtable_itr *itr, + struct hashtable *h, void *k); + +#define DEFINE_HASHTABLE_ITERATOR_SEARCH(fnname, keytype) \ +int fnname (struct hashtable_itr *i, struct hashtable *h, keytype *k) \ +{ \ + return (hashtable_iterator_search(i,h,k)); \ +} + + + +#endif /* __HASHTABLE_ITR_CWC22__*/ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/hashtable_private.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/hashtable_private.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2005, Intel Corp + * Copyright (c) 2002, Christopher Clark <firstname.lastname@xxxxxxxxxxxx> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of the original author; nor the names of any contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __HASHTABLE_PRIVATE_CWC22_H__ +#define __HASHTABLE_PRIVATE_CWC22_H__ + +#include "hashtable.h" +#ifdef HASHTABLE_THREADED +#include <pthread.h> +#endif + +/*****************************************************************************/ +struct entry +{ + void *k, *v; + unsigned int h; + struct entry *next; +}; + +struct hashtable { + unsigned int tablelength; + struct entry **table; + unsigned int entrycount; + unsigned int loadlimit; + unsigned int primeindex; + unsigned int (*hashfn) (void *k); + int (*eqfn) (void *k1, void *k2); +#ifdef HASHTABLE_THREADED + pthread_mutex_t mutex; +#endif +}; + +/*****************************************************************************/ +unsigned int +hash(struct hashtable *h, void *k); + +/*****************************************************************************/ +/* indexFor */ +static inline unsigned int +indexFor(unsigned int tablelength, unsigned int hashvalue) { + return (hashvalue % tablelength); +}; + +/* Only works if tablelength == 2^N */ +/*static inline unsigned int +indexFor(unsigned int tablelength, unsigned int hashvalue) +{ + return (hashvalue & (tablelength - 1u)); +} +*/ + +/*****************************************************************************/ +#define freekey(X) free(X) +/*define freekey(X) ; */ + + +/*****************************************************************************/ + +#endif /* __HASHTABLE_PRIVATE_CWC22_H__*/ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/log.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/log.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,142 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "buffer.h" +#include "tcg.h" + +// Helper code for the consts, eg. to produce messages for error codes. + +typedef struct error_code_entry_t { + TPM_RESULT code; + char * code_name; + char * msg; +} error_code_entry_t; + +static const error_code_entry_t error_msgs [] = { + { TPM_SUCCESS, "TPM_SUCCESS", "Successful completion of the operation" }, + { TPM_AUTHFAIL, "TPM_AUTHFAIL", "Authentication failed" }, + { TPM_BADINDEX, "TPM_BADINDEX", "The index to a PCR, DIR or other register is incorrect" }, + { TPM_BAD_PARAMETER, "TPM_BAD_PARAMETER", "One or more parameter is bad" }, + { TPM_AUDITFAILURE, "TPM_AUDITFAILURE", "An operation completed successfully but the auditing of that operation failed." }, + { TPM_CLEAR_DISABLED, "TPM_CLEAR_DISABLED", "The clear disable flag is set and all clear operations now require physical access" }, + { TPM_DEACTIVATED, "TPM_DEACTIVATED", "The TPM is deactivated" }, + { TPM_DISABLED, "TPM_DISABLED", "The TPM is disabled" }, + { TPM_DISABLED_CMD, "TPM_DISABLED_CMD", "The target command has been disabled" }, + { TPM_FAIL, "TPM_FAIL", "The operation failed" }, + { TPM_BAD_ORDINAL, "TPM_BAD_ORDINAL", "The ordinal was unknown or inconsistent" }, + { TPM_INSTALL_DISABLED, "TPM_INSTALL_DISABLED", "The ability to install an owner is disabled" }, + { TPM_INVALID_KEYHANDLE, "TPM_INVALID_KEYHANDLE", "The key handle presented was invalid" }, + { TPM_KEYNOTFOUND, "TPM_KEYNOTFOUND", "The target key was not found" }, + { TPM_INAPPROPRIATE_ENC, "TPM_INAPPROPRIATE_ENC", "Unacceptable encryption scheme" }, + { TPM_MIGRATEFAIL, "TPM_MIGRATEFAIL", "Migration authorization failed" }, + { TPM_INVALID_PCR_INFO, "TPM_INVALID_PCR_INFO", "PCR information could not be interpreted" }, + { TPM_NOSPACE, "TPM_NOSPACE", "No room to load key." }, + { TPM_NOSRK, "TPM_NOSRK", "There is no SRK set" }, + { TPM_NOTSEALED_BLOB, "TPM_NOTSEALED_BLOB", "An encrypted blob is invalid or was not created by this TPM" }, + { TPM_OWNER_SET, "TPM_OWNER_SET", "There is already an Owner" }, + { TPM_RESOURCES, "TPM_RESOURCES", "The TPM has insufficient internal resources to perform the requested action." }, + { TPM_SHORTRANDOM, "TPM_SHORTRANDOM", "A random string was too short" }, + { TPM_SIZE, "TPM_SIZE", "The TPM does not have the space to perform the operation." }, + { TPM_WRONGPCRVAL, "TPM_WRONGPCRVAL", "The named PCR value does not match the current PCR value." }, + { TPM_BAD_PARAM_SIZE, "TPM_BAD_PARAM_SIZE", "The paramSize argument to the command has the incorrect value" }, + { TPM_SHA_THREAD, "TPM_SHA_THREAD", "There is no existing SHA-1 thread." }, + { TPM_SHA_ERROR, "TPM_SHA_ERROR", "The calculation is unable to proceed because the existing SHA-1 thread has already encountered an error." }, + { TPM_FAILEDSELFTEST, "TPM_FAILEDSELFTEST", "Self-test has failed and the TPM has shutdown." }, + { TPM_AUTH2FAIL, "TPM_AUTH2FAIL", "The authorization for the second key in a 2 key function failed authorization" }, + { TPM_BADTAG, "TPM_BADTAG", "The tag value sent to for a command is invalid" }, + { TPM_IOERROR, "TPM_IOERROR", "An IO error occurred transmitting information to the TPM" }, + { TPM_ENCRYPT_ERROR, "TPM_ENCRYPT_ERROR", "The encryption process had a problem." }, + { TPM_DECRYPT_ERROR, "TPM_DECRYPT_ERROR", "The decryption process did not complete." }, + { TPM_INVALID_AUTHHANDLE, "TPM_INVALID_AUTHHANDLE", "An invalid handle was used." }, + { TPM_NO_ENDORSEMENT, "TPM_NO_ENDORSEMENT", "The TPM does not a EK installed" }, + { TPM_INVALID_KEYUSAGE, "TPM_INVALID_KEYUSAGE", "The usage of a key is not allowed" }, + { TPM_WRONG_ENTITYTYPE, "TPM_WRONG_ENTITYTYPE", "The submitted entity type is not allowed" }, + { TPM_INVALID_POSTINIT, "TPM_INVALID_POSTINIT", "The command was received in the wrong sequence relative to TPM_Init and a subsequent TPM_Startup" }, + { TPM_INAPPROPRIATE_SIG, "TPM_INAPPROPRIATE_SIG", "Signed data cannot include additional DER information" }, + { TPM_BAD_KEY_PROPERTY, "TPM_BAD_KEY_PROPERTY", "The key properties in TPM_KEY_PARMs are not supported by this TPM" }, + + { TPM_BAD_MIGRATION, "TPM_BAD_MIGRATION", "The migration properties of this key are incorrect." }, + { TPM_BAD_SCHEME, "TPM_BAD_SCHEME", "The signature or encryption scheme for this key is incorrect or not permitted in this situation." }, + { TPM_BAD_DATASIZE, "TPM_BAD_DATASIZE", "The size of the data (or blob) parameter is bad or inconsistent with the referenced key" }, + { TPM_BAD_MODE, "TPM_BAD_MODE", "A mode parameter is bad, such as capArea or subCapArea for TPM_GetCapability, phsicalPresence parameter for TPM_PhysicalPresence, or migrationType for TPM_CreateMigrationBlob." }, + { TPM_BAD_PRESENCE, "TPM_BAD_PRESENCE", "Either the physicalPresence or physicalPresenceLock bits have the wrong value" }, + { TPM_BAD_VERSION, "TPM_BAD_VERSION", "The TPM cannot perform this version of the capability" }, + { TPM_NO_WRAP_TRANSPORT, "TPM_NO_WRAP_TRANSPORT", "The TPM does not allow for wrapped transport sessions" }, + { TPM_AUDITFAIL_UNSUCCESSFUL, "TPM_AUDITFAIL_UNSUCCESSFUL", "TPM audit construction failed and the underlying command was returning a failure code also" }, + { TPM_AUDITFAIL_SUCCESSFUL, "TPM_AUDITFAIL_SUCCESSFUL", "TPM audit construction failed and the underlying command was returning success" }, + { TPM_NOTRESETABLE, "TPM_NOTRESETABLE", "Attempt to reset a PCR register that does not have the resettable attribute" }, + { TPM_NOTLOCAL, "TPM_NOTLOCAL", "Attempt to reset a PCR register that requires locality and locality modifier not part of command transport" }, + { TPM_BAD_TYPE, "TPM_BAD_TYPE", "Make identity blob not properly typed" }, + { TPM_INVALID_RESOURCE, "TPM_INVALID_RESOURCE", "When saving context identified resource type does not match actual resource" }, + { TPM_NOTFIPS, "TPM_NOTFIPS", "The TPM is attempting to execute a command only available when in FIPS mode" }, + { TPM_INVALID_FAMILY, "TPM_INVALID_FAMILY", "The command is attempting to use an invalid family ID" }, + { TPM_NO_NV_PERMISSION, "TPM_NO_NV_PERMISSION", "The permission to manipulate the NV storage is not available" }, + { TPM_REQUIRES_SIGN, "TPM_REQUIRES_SIGN", "The operation requires a signed command" }, + { TPM_KEY_NOTSUPPORTED, "TPM_KEY_NOTSUPPORTED", "Wrong operation to load an NV key" }, + { TPM_AUTH_CONFLICT, "TPM_AUTH_CONFLICT", "NV_LoadKey blob requires both owner and blob authorization" }, + { TPM_AREA_LOCKED, "TPM_AREA_LOCKED", "The NV area is locked and not writtable" }, + { TPM_BAD_LOCALITY, "TPM_BAD_LOCALITY", "The locality is incorrect for the attempted operation" }, + { TPM_READ_ONLY, "TPM_READ_ONLY", "The NV area is read only and can't be written to" }, + { TPM_PER_NOWRITE, "TPM_PER_NOWRITE", "There is no protection on the write to the NV area" }, + { TPM_FAMILYCOUNT, "TPM_FAMILYCOUNT", "The family count value does not match" }, + { TPM_WRITE_LOCKED, "TPM_WRITE_LOCKED", "The NV area has already been written to" }, + { TPM_BAD_ATTRIBUTES, "TPM_BAD_ATTRIBUTES", "The NV area attributes conflict" }, + { TPM_INVALID_STRUCTURE, "TPM_INVALID_STRUCTURE", "The structure tag and version are invalid or inconsistent" }, + { TPM_KEY_OWNER_CONTROL, "TPM_KEY_OWNER_CONTROL", "The key is under control of the TPM Owner and can only be evicted by the TPM Owner." }, + { TPM_BAD_COUNTER, "TPM_BAD_COUNTER", "The counter handle is incorrect" }, + { TPM_NOT_FULLWRITE, "TPM_NOT_FULLWRITE", "The write is not a complete write of the area" }, + { TPM_CONTEXT_GAP, "TPM_CONTEXT_GAP", "The gap between saved context counts is too large" }, + { TPM_MAXNVWRITES, "TPM_MAXNVWRITES", "The maximum number of NV writes without an owner has been exceeded" }, + { TPM_NOOPERATOR, "TPM_NOOPERATOR", "No operator authorization value is set" }, + { TPM_RESOURCEMISSING, "TPM_RESOURCEMISSING", "The resource pointed to by context is not loaded" }, + { TPM_DELEGATE_LOCK, "TPM_DELEGATE_LOCK", "The delegate administration is locked" }, + { TPM_DELEGATE_FAMILY, "TPM_DELEGATE_FAMILY", "Attempt to manage a family other then the delegated family" }, + { TPM_DELEGATE_ADMIN, "TPM_DELEGATE_ADMIN", "Delegation table management not enabled" }, + { TPM_TRANSPORT_EXCLUSIVE, "TPM_TRANSPORT_EXCLUSIVE", "There was a command executed outside of an exclusive transport session" }, +}; + + +// helper function for the error codes: +const char* tpm_get_error_name (TPM_RESULT code) { + // just do a linear scan for now + unsigned i; + for (i = 0; i < sizeof(error_msgs)/sizeof(error_msgs[0]); i++) + if (code == error_msgs[i].code) + return error_msgs[i].code_name; + + return "Failed to find code name for given code"; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/log.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/log.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,92 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== + +#ifndef __VTPM_LOG_H__ +#define __VTPM_LOG_H__ + +#include <stdint.h> // for uint32_t +#include <stddef.h> // for pointer NULL + +// =========================== LOGGING ============================== + +// the logging module numbers +#define VTPM_LOG_CRYPTO 1 +#define VTPM_LOG_BSG 2 +#define VTPM_LOG_TXDATA 3 +#define VTPM_LOG_TCS 4 +#define VTPM_LOG_TCS_DEEP 5 +#define VTPM_LOG_VTSP 6 +#define VTPM_LOG_VTPM 7 +#define VTPM_LOG_VTPM_DEEP 8 + +static char *module_names[] = { "", + "CRYPTO", + "BSG", + "TXDATA", + "TCS", + "TCS", + "VTSP", + "VTPM", + "VTPM" + }; + +// Default to standard logging +#ifndef LOGGING_MODULES +#define LOGGING_MODULES (BITMASK(VTPM_LOG_VTPM)) +#endif + +// bit-access macros +#define BITMASK(idx) ( 1U << (idx) ) +#define GETBIT(num,idx) ( ((num) & BITMASK(idx)) >> idx ) +#define SETBIT(num,idx) (num) |= BITMASK(idx) +#define CLEARBIT(num,idx) (num) &= ( ~ BITMASK(idx) ) + +#define vtpmloginfo(module, fmt, args...) \ + if (GETBIT (LOGGING_MODULES, module) == 1) { \ + fprintf (stdout, "INFO[%s]: " fmt, module_names[module], ##args); \ + } + +#define vtpmloginfomore(module, fmt, args...) \ + if (GETBIT (LOGGING_MODULES, module) == 1) { \ + fprintf (stdout, fmt,##args); \ + } + +#define vtpmlogerror(module, fmt, args...) \ + fprintf (stderr, "ERROR[%s]: " fmt, module_names[module], ##args); + +//typedef UINT32 tpm_size_t; + +// helper function for the error codes: +const char* tpm_get_error_name (TPM_RESULT code); + +#endif // _VTPM_LOG_H_ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/vtpm_manager/util/tcg.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/vtpm_manager/util/tcg.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,486 @@ +// =================================================================== +// +// Copyright (c) 2005, Intel Corp. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +// OF THE POSSIBILITY OF SUCH DAMAGE. +// =================================================================== +// +// tcg.h +// +// This file contains all the structure and type definitions +// +// ================================================================== + +#ifndef __TCG_H__ +#define __TCG_H__ + +// This pragma is used to disallow structure padding +#pragma pack(push, 1) + +// *************************** TYPEDEFS ********************************* +typedef unsigned char BYTE; +typedef unsigned char BOOL; +typedef unsigned short UINT16; +typedef unsigned int UINT32; +typedef unsigned long long UINT64; + +typedef UINT32 TPM_RESULT; +typedef UINT32 TPM_PCRINDEX; +typedef UINT32 TPM_DIRINDEX; +typedef UINT32 TPM_HANDLE; +typedef TPM_HANDLE TPM_AUTHHANDLE; +typedef TPM_HANDLE TCPA_HASHHANDLE; +typedef TPM_HANDLE TCPA_HMACHANDLE; +typedef TPM_HANDLE TCPA_ENCHANDLE; +typedef TPM_HANDLE TPM_KEY_HANDLE; +typedef TPM_HANDLE TCPA_ENTITYHANDLE; +typedef UINT32 TPM_RESOURCE_TYPE; +typedef UINT32 TPM_COMMAND_CODE; +typedef UINT16 TPM_PROTOCOL_ID; +typedef BYTE TPM_AUTH_DATA_USAGE; +typedef UINT16 TPM_ENTITY_TYPE; +typedef UINT32 TPM_ALGORITHM_ID; +typedef UINT16 TPM_KEY_USAGE; +typedef UINT16 TPM_STARTUP_TYPE; +typedef UINT32 TPM_CAPABILITY_AREA; +typedef UINT16 TPM_ENC_SCHEME; +typedef UINT16 TPM_SIG_SCHEME; +typedef UINT16 TPM_MIGRATE_SCHEME; +typedef UINT16 TPM_PHYSICAL_PRESENCE; +typedef UINT32 TPM_KEY_FLAGS; + +#define TPM_DIGEST_SIZE 20 // Don't change this +typedef BYTE TPM_AUTHDATA[TPM_DIGEST_SIZE]; +typedef TPM_AUTHDATA TPM_SECRET; +typedef TPM_AUTHDATA TPM_ENCAUTH; +typedef BYTE TPM_PAYLOAD_TYPE; +typedef UINT16 TPM_TAG; + +// Data Types of the TCS +typedef UINT32 TCS_AUTHHANDLE; // Handle addressing a authorization session +typedef UINT32 TCS_CONTEXT_HANDLE; // Basic context handle +typedef UINT32 TCS_KEY_HANDLE; // Basic key handle + +// ************************* STRUCTURES ********************************** + +typedef struct TPM_VERSION { + BYTE major; + BYTE minor; + BYTE revMajor; + BYTE revMinor; +} TPM_VERSION; + +static const TPM_VERSION TPM_STRUCT_VER_1_1 = { 1,1,0,0 }; + +typedef struct TPM_DIGEST { + BYTE digest[TPM_DIGEST_SIZE]; +} TPM_DIGEST; + +typedef TPM_DIGEST TPM_PCRVALUE; +typedef TPM_DIGEST TPM_COMPOSITE_HASH; +typedef TPM_DIGEST TPM_DIRVALUE; +typedef TPM_DIGEST TPM_HMAC; +typedef TPM_DIGEST TPM_CHOSENID_HASH; + +typedef struct TPM_NONCE { + BYTE nonce[TPM_DIGEST_SIZE]; +} TPM_NONCE; + +typedef struct TPM_KEY_PARMS { + TPM_ALGORITHM_ID algorithmID; + TPM_ENC_SCHEME encScheme; + TPM_SIG_SCHEME sigScheme; + UINT32 parmSize; + BYTE* parms; +} TPM_KEY_PARMS; + +typedef struct TPM_RSA_KEY_PARMS { + UINT32 keyLength; + UINT32 numPrimes; + UINT32 exponentSize; + BYTE* exponent; +} TPM_RSA_KEY_PARMS; + +typedef struct TPM_STORE_PUBKEY { + UINT32 keyLength; + BYTE* key; +} TPM_STORE_PUBKEY; + +typedef struct TPM_PUBKEY { + TPM_KEY_PARMS algorithmParms; + TPM_STORE_PUBKEY pubKey; +} TPM_PUBKEY; + +typedef struct TPM_KEY { + TPM_VERSION ver; + TPM_KEY_USAGE keyUsage; + TPM_KEY_FLAGS keyFlags; + TPM_AUTH_DATA_USAGE authDataUsage; + TPM_KEY_PARMS algorithmParms; + UINT32 PCRInfoSize; + BYTE* PCRInfo; // this should be a TPM_PCR_INFO, or NULL + TPM_STORE_PUBKEY pubKey; + UINT32 encDataSize; + BYTE* encData; +} TPM_KEY; + +typedef struct TPM_PCR_SELECTION { + UINT16 sizeOfSelect; /// in bytes + BYTE* pcrSelect; +} TPM_PCR_SELECTION; + +typedef struct TPM_PCR_COMPOSITE { + TPM_PCR_SELECTION select; + UINT32 valueSize; + TPM_PCRVALUE* pcrValue; +} TPM_PCR_COMPOSITE; + + +typedef struct TPM_PCR_INFO { + TPM_PCR_SELECTION pcrSelection; + TPM_COMPOSITE_HASH digestAtRelease; + TPM_COMPOSITE_HASH digestAtCreation; +} TPM_PCR_INFO; + + +typedef struct TPM_BOUND_DATA { + TPM_VERSION ver; + TPM_PAYLOAD_TYPE payload; + BYTE* payloadData; +} TPM_BOUND_DATA; + +typedef struct TPM_STORED_DATA { + TPM_VERSION ver; + UINT32 sealInfoSize; + BYTE* sealInfo; + UINT32 encDataSize; + BYTE* encData; +} TPM_STORED_DATA; + +typedef struct TCS_AUTH { + TCS_AUTHHANDLE AuthHandle; + TPM_NONCE NonceOdd; // system + TPM_NONCE NonceEven; // TPM + BOOL fContinueAuthSession; + TPM_AUTHDATA HMAC; +} TCS_AUTH; + +// **************************** CONSTANTS ********************************* + +// BOOL values +#define TRUE 0x01 +#define FALSE 0x00 + +#define TCPA_MAX_BUFFER_LENGTH 0x2000 + +// +// TPM_COMMAND_CODE values +#define TPM_PROTECTED_ORDINAL 0x00000000UL +#define TPM_UNPROTECTED_ORDINAL 0x80000000UL +#define TPM_CONNECTION_ORDINAL 0x40000000UL +#define TPM_VENDOR_ORDINAL 0x20000000UL + +#define TPM_ORD_OIAP (10UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_OSAP (11UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ChangeAuth (12UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_TakeOwnership (13UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ChangeAuthAsymStart (14UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ChangeAuthAsymFinish (15UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ChangeAuthOwner (16UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Extend (20UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_PcrRead (21UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Quote (22UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Seal (23UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Unseal (24UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_DirWriteAuth (25UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_DirRead (26UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_UnBind (30UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_CreateWrapKey (31UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_LoadKey (32UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetPubKey (33UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_EvictKey (34UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_CreateMigrationBlob (40UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ReWrapKey (41UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ConvertMigrationBlob (42UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_AuthorizeMigrationKey (43UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_CreateMaintenanceArchive (44UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_LoadMaintenanceArchive (45UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_KillMaintenanceFeature (46UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_LoadManuMaintPub (47UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ReadManuMaintPub (48UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_CertifyKey (50UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Sign (60UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetRandom (70UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_StirRandom (71UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SelfTestFull (80UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SelfTestStartup (81UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_CertifySelfTest (82UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ContinueSelfTest (83UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetTestResult (84UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Reset (90UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_OwnerClear (91UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_DisableOwnerClear (92UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ForceClear (93UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_DisableForceClear (94UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetCapabilitySigned (100UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetCapability (101UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetCapabilityOwner (102UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_OwnerSetDisable (110UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_PhysicalEnable (111UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_PhysicalDisable (112UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SetOwnerInstall (113UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_PhysicalSetDeactivated (114UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SetTempDeactivated (115UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_CreateEndorsementKeyPair (120UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_MakeIdentity (121UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ActivateIdentity (122UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_ReadPubek (124UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_OwnerReadPubek (125UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_DisablePubekRead (126UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetAuditEvent (130UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetAuditEventSigned (131UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_GetOrdinalAuditStatus (140UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SetOrdinalAuditStatus (141UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Terminate_Handle (150UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Init (151UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SaveState (152UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_Startup (153UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SetRedirection (154UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SHA1Start (160UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SHA1Update (161UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SHA1Complete (162UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SHA1CompleteExtend (163UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_FieldUpgrade (170UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SaveKeyContext (180UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_LoadKeyContext (181UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_SaveAuthContext (182UL + TPM_PROTECTED_ORDINAL) +#define TPM_ORD_LoadAuthContext (183UL + TPM_PROTECTED_ORDINAL) +#define TSC_ORD_PhysicalPresence (10UL + TPM_CONNECTION_ORDINAL) + + + +// +// TPM_RESULT values +// +// just put in the whole table from spec 1.2 + +#define TPM_BASE 0x0 // The start of TPM return codes +#define TPM_VENDOR_ERROR 0x00000400 // Mask to indicate that the error code is vendor specific for vendor specific commands +#define TPM_NON_FATAL 0x00000800 // Mask to indicate that the error code is a non-fatal failure. + +#define TPM_SUCCESS TPM_BASE // Successful completion of the operation +#define TPM_AUTHFAIL TPM_BASE + 1 // Authentication failed +#define TPM_BADINDEX TPM_BASE + 2 // The index to a PCR, DIR or other register is incorrect +#define TPM_BAD_PARAMETER TPM_BASE + 3 // One or more parameter is bad +#define TPM_AUDITFAILURE TPM_BASE + 4 // An operation completed successfully but the auditing of that operation failed. +#define TPM_CLEAR_DISABLED TPM_BASE + 5 // The clear disable flag is set and all clear operations now require physical access +#define TPM_DEACTIVATED TPM_BASE + 6 // The TPM is deactivated +#define TPM_DISABLED TPM_BASE + 7 // The TPM is disabled +#define TPM_DISABLED_CMD TPM_BASE + 8 // The target command has been disabled +#define TPM_FAIL TPM_BASE + 9 // The operation failed +#define TPM_BAD_ORDINAL TPM_BASE + 10 // The ordinal was unknown or inconsistent +#define TPM_INSTALL_DISABLED TPM_BASE + 11 // The ability to install an owner is disabled +#define TPM_INVALID_KEYHANDLE TPM_BASE + 12 // The key handle presented was invalid +#define TPM_KEYNOTFOUND TPM_BASE + 13 // The target key was not found +#define TPM_INAPPROPRIATE_ENC TPM_BASE + 14 // Unacceptable encryption scheme +#define TPM_MIGRATEFAIL TPM_BASE + 15 // Migration authorization failed +#define TPM_INVALID_PCR_INFO TPM_BASE + 16 // PCR information could not be interpreted +#define TPM_NOSPACE TPM_BASE + 17 // No room to load key. +#define TPM_NOSRK TPM_BASE + 18 // There is no SRK set +#define TPM_NOTSEALED_BLOB TPM_BASE + 19 // An encrypted blob is invalid or was not created by this TPM +#define TPM_OWNER_SET TPM_BASE + 20 // There is already an Owner +#define TPM_RESOURCES TPM_BASE + 21 // The TPM has insufficient internal resources to perform the requested action. +#define TPM_SHORTRANDOM TPM_BASE + 22 // A random string was too short +#define TPM_SIZE TPM_BASE + 23 // The TPM does not have the space to perform the operation. +#define TPM_WRONGPCRVAL TPM_BASE + 24 // The named PCR value does not match the current PCR value. +#define TPM_BAD_PARAM_SIZE TPM_BASE + 25 // The paramSize argument to the command has the incorrect value +#define TPM_SHA_THREAD TPM_BASE + 26 // There is no existing SHA-1 thread. +#define TPM_SHA_ERROR TPM_BASE + 27 // The calculation is unable to proceed because the existing SHA-1 thread has already encountered an error. +#define TPM_FAILEDSELFTEST TPM_BASE + 28 // Self-test has failed and the TPM has shutdown. +#define TPM_AUTH2FAIL TPM_BASE + 29 // The authorization for the second key in a 2 key function failed authorization +#define TPM_BADTAG TPM_BASE + 30 // The tag value sent to for a command is invalid +#define TPM_IOERROR TPM_BASE + 31 // An IO error occurred transmitting information to the TPM +#define TPM_ENCRYPT_ERROR TPM_BASE + 32 // The encryption process had a problem. +#define TPM_DECRYPT_ERROR TPM_BASE + 33 // The decryption process did not complete. +#define TPM_INVALID_AUTHHANDLE TPM_BASE + 34 // An invalid handle was used. +#define TPM_NO_ENDORSEMENT TPM_BASE + 35 // The TPM does not a EK installed +#define TPM_INVALID_KEYUSAGE TPM_BASE + 36 // The usage of a key is not allowed +#define TPM_WRONG_ENTITYTYPE TPM_BASE + 37 // The submitted entity type is not allowed +#define TPM_INVALID_POSTINIT TPM_BASE + 38 // The command was received in the wrong sequence relative to TPM_Init and a subsequent TPM_Startup +#define TPM_INAPPROPRIATE_SIG TPM_BASE + 39 // Signed data cannot include additional DER information +#define TPM_BAD_KEY_PROPERTY TPM_BASE + 40 // The key properties in TPM_KEY_PARMs are not supported by this TPM + +#define TPM_BAD_MIGRATION TPM_BASE + 41 // The migration properties of this key are incorrect. +#define TPM_BAD_SCHEME TPM_BASE + 42 // The signature or encryption scheme for this key is incorrect or not permitted in this situation. +#define TPM_BAD_DATASIZE TPM_BASE + 43 // The size of the data (or blob) parameter is bad or inconsistent with the referenced key +#define TPM_BAD_MODE TPM_BASE + 44 // A mode parameter is bad, such as capArea or subCapArea for TPM_GetCapability, phsicalPresence parameter for TPM_PhysicalPresence, or migrationType for TPM_CreateMigrationBlob. +#define TPM_BAD_PRESENCE TPM_BASE + 45 // Either the physicalPresence or physicalPresenceLock bits have the wrong value +#define TPM_BAD_VERSION TPM_BASE + 46 // The TPM cannot perform this version of the capability +#define TPM_NO_WRAP_TRANSPORT TPM_BASE + 47 // The TPM does not allow for wrapped transport sessions +#define TPM_AUDITFAIL_UNSUCCESSFUL TPM_BASE + 48 // TPM audit construction failed and the underlying command was returning a failure code also +#define TPM_AUDITFAIL_SUCCESSFUL TPM_BASE + 49 // TPM audit construction failed and the underlying command was returning success +#define TPM_NOTRESETABLE TPM_BASE + 50 // Attempt to reset a PCR register that does not have the resettable attribute +#define TPM_NOTLOCAL TPM_BASE + 51 // Attempt to reset a PCR register that requires locality and locality modifier not part of command transport +#define TPM_BAD_TYPE TPM_BASE + 52 // Make identity blob not properly typed +#define TPM_INVALID_RESOURCE TPM_BASE + 53 // When saving context identified resource type does not match actual resource +#define TPM_NOTFIPS TPM_BASE + 54 // The TPM is attempting to execute a command only available when in FIPS mode +#define TPM_INVALID_FAMILY TPM_BASE + 55 // The command is attempting to use an invalid family ID +#define TPM_NO_NV_PERMISSION TPM_BASE + 56 // The permission to manipulate the NV storage is not available +#define TPM_REQUIRES_SIGN TPM_BASE + 57 // The operation requires a signed command +#define TPM_KEY_NOTSUPPORTED TPM_BASE + 58 // Wrong operation to load an NV key +#define TPM_AUTH_CONFLICT TPM_BASE + 59 // NV_LoadKey blob requires both owner and blob authorization +#define TPM_AREA_LOCKED TPM_BASE + 60 // The NV area is locked and not writtable +#define TPM_BAD_LOCALITY TPM_BASE + 61 // The locality is incorrect for the attempted operation +#define TPM_READ_ONLY TPM_BASE + 62 // The NV area is read only and can't be written to +#define TPM_PER_NOWRITE TPM_BASE + 63 // There is no protection on the write to the NV area +#define TPM_FAMILYCOUNT TPM_BASE + 64 // The family count value does not match +#define TPM_WRITE_LOCKED TPM_BASE + 65 // The NV area has already been written to +#define TPM_BAD_ATTRIBUTES TPM_BASE + 66 // The NV area attributes conflict +#define TPM_INVALID_STRUCTURE TPM_BASE + 67 // The structure tag and version are invalid or inconsistent +#define TPM_KEY_OWNER_CONTROL TPM_BASE + 68 // The key is under control of the TPM Owner and can only be evicted by the TPM Owner. +#define TPM_BAD_COUNTER TPM_BASE + 69 // The counter handle is incorrect +#define TPM_NOT_FULLWRITE TPM_BASE + 70 // The write is not a complete write of the area +#define TPM_CONTEXT_GAP TPM_BASE + 71 // The gap between saved context counts is too large +#define TPM_MAXNVWRITES TPM_BASE + 72 // The maximum number of NV writes without an owner has been exceeded +#define TPM_NOOPERATOR TPM_BASE + 73 // No operator authorization value is set +#define TPM_RESOURCEMISSING TPM_BASE + 74 // The resource pointed to by context is not loaded +#define TPM_DELEGATE_LOCK TPM_BASE + 75 // The delegate administration is locked +#define TPM_DELEGATE_FAMILY TPM_BASE + 76 // Attempt to manage a family other then the delegated family +#define TPM_DELEGATE_ADMIN TPM_BASE + 77 // Delegation table management not enabled +#define TPM_TRANSPORT_EXCLUSIVE TPM_BASE + 78 // There was a command executed outside of an exclusive transport session + +// TPM_TAG values +#define TPM_TAG_RQU_COMMAND 0x00c1 +#define TPM_TAG_RQU_AUTH1_COMMAND 0x00c2 +#define TPM_TAG_RQU_AUTH2_COMMAND 0x00c3 +#define TPM_TAG_RSP_COMMAND 0x00c4 +#define TPM_TAG_RSP_AUTH1_COMMAND 0x00c5 +#define TPM_TAG_RSP_AUTH2_COMMAND 0x00c6 + +// TPM_PAYLOAD_TYPE values +#define TPM_PT_ASYM 0x01 +#define TPM_PT_BIND 0x02 +#define TPM_PT_MIGRATE 0x03 +#define TPM_PT_MAINT 0x04 +#define TPM_PT_SEAL 0x05 + +// TPM_ENTITY_TYPE values +#define TPM_ET_KEYHANDLE 0x0001 +#define TPM_ET_OWNER 0x0002 +#define TPM_ET_DATA 0x0003 +#define TPM_ET_SRK 0x0004 +#define TPM_ET_KEY 0x0005 + +/// TPM_ResourceTypes +#define TPM_RT_KEY 0x00000001 +#define TPM_RT_AUTH 0x00000002 +#define TPM_RT_TRANS 0x00000004 +#define TPM_RT_CONTEXT 0x00000005 + +// TPM_PROTOCOL_ID values +#define TPM_PID_OIAP 0x0001 +#define TPM_PID_OSAP 0x0002 +#define TPM_PID_ADIP 0x0003 +#define TPM_PID_ADCP 0x0004 +#define TPM_PID_OWNER 0x0005 + +// TPM_ALGORITHM_ID values +#define TPM_ALG_RSA 0x00000001 +#define TPM_ALG_DES 0x00000002 +#define TPM_ALG_3DES 0X00000003 +#define TPM_ALG_SHA 0x00000004 +#define TPM_ALG_HMAC 0x00000005 +#define TCPA_ALG_AES 0x00000006 + +// TPM_ENC_SCHEME values +#define TPM_ES_NONE 0x0001 +#define TPM_ES_RSAESPKCSv15 0x0002 +#define TPM_ES_RSAESOAEP_SHA1_MGF1 0x0003 + +// TPM_SIG_SCHEME values +#define TPM_SS_NONE 0x0001 +#define TPM_SS_RSASSAPKCS1v15_SHA1 0x0002 +#define TPM_SS_RSASSAPKCS1v15_DER 0x0003 + +// TPM_KEY_USAGE values +#define TPM_KEY_EK 0x0000 +#define TPM_KEY_SIGNING 0x0010 +#define TPM_KEY_STORAGE 0x0011 +#define TPM_KEY_IDENTITY 0x0012 +#define TPM_KEY_AUTHCHANGE 0X0013 +#define TPM_KEY_BIND 0x0014 +#define TPM_KEY_LEGACY 0x0015 + +// TPM_AUTH_DATA_USAGE values +#define TPM_AUTH_NEVER 0x00 +#define TPM_AUTH_ALWAYS 0x01 + +// Key Handle of owner and srk +#define TPM_OWNER_KEYHANDLE 0x40000001 +#define TPM_SRK_KEYHANDLE 0x40000000 + +// ---------------------- Functions for checking TPM_RESULTs ----------------- + +// FIXME: Review use of these and delete unneeded ones. + +// these are really badly dependent on local structure: +// DEPENDS: local var 'status' of type TPM_RESULT +// DEPENDS: label 'abort_egress' which cleans up and returns the status +#define ERRORDIE(s) do { status = s; \ + fprintf (stderr, "*** ERRORDIE in %s, line %i\n", __func__, __LINE__); \ + goto abort_egress; } \ + while (0) + +// ASSUME: the return value used after the abort_egress label has been set +// already (eg. the 'status' local var) +#define STATUSCHECK(s) if (s != TPM_SUCCESS) { \ + fprintf (stderr, "*** ERR in %s, line %i\n", __func__, __LINE__); \ + goto abort_egress; \ + } + +// DEPENDS: local var 'status' of type TPM_RESULT +// DEPENDS: label 'abort_egress' which cleans up and returns the status +// Try command c. If it fails, set status to s and goto shame. +#define TPMTRY(s,c) if (c != TPM_SUCCESS) { \ + status = s; \ + goto abort_egress; \ + } + +// Try command c. If it fails, print error message, set status to actual return code. Goto shame +#define TPMTRYRETURN(c) do { status = c; \ + if (status != TPM_SUCCESS) { \ + printf("ERROR in %s:%i code: %s.\n", __func__, __LINE__, tpm_get_error_name(status)); \ + goto abort_egress; \ + } \ + } while(0) + + +#pragma pack(pop) + +#endif //__TCPA_H__ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xenstore/xenstore_client.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/tools/xenstore/xenstore_client.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,130 @@ +/* + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of + * this archive for more details. + * + * Copyright (C) 2005 by Christian Limpach + * + */ + +#include <err.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <xs.h> + +static void +usage(const char *progname) +{ +#if defined(CLIENT_read) + errx(1, "Usage: %s [-h] [-p] key [...]", progname); +#elif defined(CLIENT_write) + errx(1, "Usage: %s [-h] key value [...]", progname); +#elif defined(CLIENT_rm) + errx(1, "Usage: %s [-h] key [...]", progname); +#endif +} + +int +main(int argc, char **argv) +{ + struct xs_handle *xsh; + bool success; + int ret = 0; +#if defined(CLIENT_read) + char *val; + int prefix = 0; +#endif + + xsh = xs_domain_open(); + if (xsh == NULL) + err(1, "xs_domain_open"); + + while (1) { + int c, index = 0; + static struct option long_options[] = { + {"help", 0, 0, 'h'}, +#if defined(CLIENT_read) + {"prefix", 0, 0, 'p'}, +#endif + {0, 0, 0, 0} + }; + + c = getopt_long(argc, argv, "h" +#if defined(CLIENT_read) + "p" +#endif + , long_options, &index); + if (c == -1) + break; + + switch (c) { + case 'h': + usage(argv[0]); + /* NOTREACHED */ +#if defined(CLIENT_read) + case 'p': + prefix = 1; + break; +#endif + } + } + + if (optind == argc) { + usage(argv[0]); + /* NOTREACHED */ + } +#if defined(CLIENT_write) + if ((argc - optind) % 1) { + usage(argv[0]); + /* NOTREACHED */ + } +#endif + + /* XXX maybe find longest common prefix */ + success = xs_transaction_start(xsh, "/"); + if (!success) + errx(1, "couldn't start transaction"); + + while (optind < argc) { +#if defined(CLIENT_read) + val = xs_read(xsh, argv[optind], NULL); + if (val == NULL) { + warnx("couldn't read path %s", argv[optind]); + ret = 1; + goto out; + } + if (prefix) + printf("%s: ", argv[optind]); + printf("%s\n", val); + free(val); + optind++; +#elif defined(CLIENT_write) + success = xs_write(xsh, argv[optind], argv[optind + 1], + strlen(argv[optind + 1]), O_CREAT); + if (!success) { + warnx("could not write path %s", argv[optind]); + ret = 1; + goto out; + } + optind += 2; +#elif defined(CLIENT_rm) + success = xs_rm(xsh, argv[optind]); + if (!success) { + warnx("could not remove path %s", argv[optind]); + ret = 1; + goto out; + } + optind++; +#endif + } + + out: + success = xs_transaction_end(xsh, ret ? true : false); + if (!success) + errx(1, "couldn't end transaction"); + + return ret; +} diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/arch/x86/shadow_guest32.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/xen/arch/x86/shadow_guest32.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,18 @@ +#define GUEST_PGENTRY_32 +#if defined (__x86_64__) + +#include "shadow.c" +struct shadow_ops MODE_D_HANDLER = { + .guest_paging_levels = 2, + .invlpg = shadow_invlpg_64, + .fault = shadow_fault_64, + .update_pagetables = shadow_update_pagetables, + .sync_all = sync_all, + .remove_all_write_access = remove_all_write_access, + .do_update_va_mapping = do_update_va_mapping, + .mark_mfn_out_of_sync = mark_mfn_out_of_sync, + .is_out_of_sync = is_out_of_sync, + .gva_to_gpa = gva_to_gpa_64, +}; + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/memory.c --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/xen/common/memory.c Fri Sep 9 16:30:54 2005 @@ -0,0 +1,214 @@ +/****************************************************************************** + * memory.c + * + * Code to handle memory-related requests. + * + * Copyright (c) 2003-2004, B Dragovic + * Copyright (c) 2003-2005, K A Fraser + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/lib.h> +#include <xen/mm.h> +#include <xen/perfc.h> +#include <xen/sched.h> +#include <xen/event.h> +#include <xen/shadow.h> +#include <asm/current.h> +#include <asm/hardirq.h> +#include <public/memory.h> + +static long +increase_reservation( + struct domain *d, + unsigned long *extent_list, + unsigned int nr_extents, + unsigned int extent_order, + unsigned int flags, + int *preempted) +{ + struct pfn_info *page; + unsigned long i; + + if ( (extent_list != NULL) && + !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) ) + return 0; + + if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) ) + { + DPRINTK("Only I/O-capable domains may allocate > order-0 memory.\n"); + return 0; + } + + for ( i = 0; i < nr_extents; i++ ) + { + if ( hypercall_preempt_check() ) + { + *preempted = 1; + return i; + } + + if ( unlikely((page = alloc_domheap_pages( + d, extent_order, flags)) == NULL) ) + { + DPRINTK("Could not allocate order=%d extent: id=%d flags=%x\n", + extent_order, d->domain_id, flags); + return i; + } + + /* Inform the domain of the new page's machine address. */ + if ( (extent_list != NULL) && + (__put_user(page_to_pfn(page), &extent_list[i]) != 0) ) + return i; + } + + return nr_extents; +} + +static long +decrease_reservation( + struct domain *d, + unsigned long *extent_list, + unsigned int nr_extents, + unsigned int extent_order, + unsigned int flags, + int *preempted) +{ + struct pfn_info *page; + unsigned long i, j, mpfn; + + if ( !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) ) + return 0; + + for ( i = 0; i < nr_extents; i++ ) + { + if ( hypercall_preempt_check() ) + { + *preempted = 1; + return i; + } + + if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) ) + return i; + + for ( j = 0; j < (1 << extent_order); j++ ) + { + if ( unlikely((mpfn + j) >= max_page) ) + { + DPRINTK("Domain %u page number out of range (%lx >= %lx)\n", + d->domain_id, mpfn + j, max_page); + return i; + } + + page = &frame_table[mpfn + j]; + if ( unlikely(!get_page(page, d)) ) + { + DPRINTK("Bad page free for domain %u\n", d->domain_id); + return i; + } + + if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) + put_page_and_type(page); + + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) + put_page(page); + + shadow_sync_and_drop_references(d, page); + + put_page(page); + } + } + + return nr_extents; +} + +/* + * To allow safe resume of do_memory_op() after preemption, we need to know + * at what point in the page list to resume. For this purpose I steal the + * high-order bits of the @cmd parameter, which are otherwise unused and zero. + */ +#define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */ + +long do_memory_op(int cmd, void *arg) +{ + struct domain *d; + int rc, start_extent, op, flags = 0, preempted = 0; + struct xen_memory_reservation reservation; + + op = cmd & ((1 << START_EXTENT_SHIFT) - 1); + + switch ( op ) + { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + if ( copy_from_user(&reservation, arg, sizeof(reservation)) ) + return -EFAULT; + + start_extent = cmd >> START_EXTENT_SHIFT; + if ( unlikely(start_extent > reservation.nr_extents) ) + return -EINVAL; + + if ( reservation.extent_start != NULL ) + reservation.extent_start += start_extent; + reservation.nr_extents -= start_extent; + + if ( (reservation.address_bits != 0) && + (reservation.address_bits < + (get_order_from_pages(max_page) + PAGE_SHIFT)) ) + { + if ( reservation.address_bits < 31 ) + return -ENOMEM; + flags = ALLOC_DOM_DMA; + } + + if ( likely(reservation.domid == DOMID_SELF) ) + d = current->domain; + else if ( !IS_PRIV(current->domain) ) + return -EPERM; + else if ( (d = find_domain_by_id(reservation.domid)) == NULL ) + return -ESRCH; + + rc = ((op == XENMEM_increase_reservation) ? + increase_reservation : decrease_reservation)( + d, + reservation.extent_start, + reservation.nr_extents, + reservation.extent_order, + flags, + &preempted); + + if ( unlikely(reservation.domid != DOMID_SELF) ) + put_domain(d); + + rc += start_extent; + + if ( preempted ) + return hypercall2_create_continuation( + __HYPERVISOR_memory_op, op | (rc << START_EXTENT_SHIFT), arg); + + break; + + case XENMEM_maximum_ram_page: + if ( put_user(max_page, (unsigned long *)arg) ) + return -EFAULT; + rc = 0; + break; + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/asm-x86/shadow_ops.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/xen/include/asm-x86/shadow_ops.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,130 @@ +/****************************************************************************** + * include/asm-x86/shadow_ops.h + * + * Copyright (c) 2005 Michael A Fetterman + * Based on an earlier implementation by Ian Pratt et al + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XEN_SHADOW_OPS_H +#define _XEN_SHADOW_OPS_H + +#if defined( GUEST_PGENTRY_32 ) + +#define GUEST_L1_PAGETABLE_ENTRIES L1_PAGETABLE_ENTRIES_32 +#define GUEST_L2_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES_32 +#define GUEST_ROOT_PAGETABLE_ENTRIES ROOT_PAGETABLE_ENTRIES_32 +#define GUEST_L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT_32 + +#define guest_l1_pgentry_t l1_pgentry_32_t +#define guest_l2_pgentry_t l2_pgentry_32_t +#define guest_root_pgentry_t l2_pgentry_32_t + +#define guest_l1e_get_paddr l1e_get_paddr_32 +#define guest_l2e_get_paddr l2e_get_paddr_32 + +#define guest_get_pte_flags get_pte_flags_32 +#define guest_put_pte_flags put_pte_flags_32 + +#define guest_l1e_get_flags l1e_get_flags_32 +#define guest_l2e_get_flags l2e_get_flags_32 +#define guest_root_get_flags l2e_get_flags_32 +#define guest_root_get_intpte l2e_get_intpte + +#define guest_l1e_empty l1e_empty_32 +#define guest_l2e_empty l2e_empty_32 + +#define guest_l1e_from_pfn l1e_from_pfn_32 +#define guest_l2e_from_pfn l2e_from_pfn_32 + +#define guest_l1e_from_paddr l1e_from_paddr_32 +#define guest_l2e_from_paddr l2e_from_paddr_32 + +#define guest_l1e_from_page l1e_from_page_32 +#define guest_l2e_from_page l2e_from_page_32 + +#define guest_l1e_add_flags l1e_add_flags_32 +#define guest_l2e_add_flags l2e_add_flags_32 + +#define guest_l1e_remove_flag l1e_remove_flags_32 +#define guest_l2e_remove_flag l2e_remove_flags_32 + +#define guest_l1e_has_changed l1e_has_changed_32 +#define guest_l2e_has_changed l2e_has_changed_32 +#define root_entry_has_changed l2e_has_changed_32 + +#define guest_l1_table_offset l1_table_offset_32 +#define guest_l2_table_offset l2_table_offset_32 + +#define guest_linear_l1_table linear_pg_table_32 +#define guest_linear_l2_table linear_l2_table_32 + +#define guest_va_to_l1mfn va_to_l1mfn_32 + +#else + +#define GUEST_L1_PAGETABLE_ENTRIES L1_PAGETABLE_ENTRIES +#define GUEST_L2_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES +#define GUEST_ROOT_PAGETABLE_ENTRIES ROOT_PAGETABLE_ENTRIES +#define GUEST_L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT + +#define guest_l1_pgentry_t l1_pgentry_t +#define guest_l2_pgentry_t l2_pgentry_t +#define guest_root_pgentry_t l4_pgentry_t + +#define guest_l1e_get_paddr l1e_get_paddr +#define guest_l2e_get_paddr l2e_get_paddr + +#define guest_get_pte_flags get_pte_flags +#define guest_put_pte_flags put_pte_flags + +#define guest_l1e_get_flags l1e_get_flags +#define guest_l2e_get_flags l2e_get_flags +#define guest_root_get_flags l4e_get_flags +#define guest_root_get_intpte l4e_get_intpte + +#define guest_l1e_empty l1e_empty +#define guest_l2e_empty l2e_empty + +#define guest_l1e_from_pfn l1e_from_pfn +#define guest_l2e_from_pfn l2e_from_pfn + +#define guest_l1e_from_paddr l1e_from_paddr +#define guest_l2e_from_paddr l2e_from_paddr + +#define guest_l1e_from_page l1e_from_page +#define guest_l2e_from_page l2e_from_page + +#define guest_l1e_add_flags l1e_add_flags +#define guest_l2e_add_flags l2e_add_flags + +#define guest_l1e_remove_flag l1e_remove_flags +#define guest_l2e_remove_flag l2e_remove_flags + +#define guest_l1e_has_changed l1e_has_changed +#define guest_l2e_has_changed l2e_has_changed +#define root_entry_has_changed l4e_has_changed + +#define guest_l1_table_offset l1_table_offset +#define guest_l2_table_offset l2_table_offset + +#define guest_linear_l1_table linear_pg_table +#define guest_linear_l2_table linear_l2_table + +#define guest_va_to_l1mfn va_to_l1mfn +#endif + +#endif /* _XEN_SHADOW_OPS_H */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/io/tpmif.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/io/tpmif.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,42 @@ +/****************************************************************************** + * tpmif.h + * + * TPM I/O interface for Xen guest OSes. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@xxxxxxxxxx + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from tools/libxc/xen/io/netif.h + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_TPMIF_H__ +#define __XEN_PUBLIC_IO_TPMIF_H__ + +typedef struct { + unsigned long addr; /* Machine address of packet. */ + int ref; /* grant table access reference */ + u16 id; /* Echoed in response message. */ + u16 size:15; /* Packet size in bytes. */ + u16 mapped:1; +} tpmif_tx_request_t; + +/* + * The TPMIF_TX_RING_SIZE defines the number of pages the + * front-end and backend can exchange (= size of array). + */ +typedef u32 TPMIF_RING_IDX; + +#define TPMIF_TX_RING_SIZE 16 + +/* This structure must fit in a memory page. */ +typedef struct { + union { + tpmif_tx_request_t req; + } ring[TPMIF_TX_RING_SIZE]; +} tpmif_tx_interface_t; + +#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/include/public/memory.h --- /dev/null Thu Sep 8 15:18:40 2005 +++ b/xen/include/public/memory.h Fri Sep 9 16:30:54 2005 @@ -0,0 +1,50 @@ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Copyright (c) 2005, Keir Fraser <keir@xxxxxxxxxxxxx> + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +/* arg == addr of struct xen_memory_reservation. */ +#define XENMEM_increase_reservation 0 + +/* arg == addr of struct xen_memory_reservation. */ +#define XENMEM_decrease_reservation 1 + +/* arg == addr of unsigned long. */ +#define XENMEM_maximum_ram_page 2 + +typedef struct xen_memory_reservation { + + /* + * MFN bases of extents to free (XENMEM_decrease_reservation). + * MFN bases of extents that were allocated (XENMEM_increase_reservation). + */ + unsigned long *extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + unsigned long nr_extents; + unsigned int extent_order; + + /* + * XENMEM_increase_reservation: maximum # bits addressable by the user + * of the allocated region (e.g., I/O devices often have a 32-bit + * limitation even in 64-bit systems). If zero then the user has no + * addressing restriction. + * XENMEM_decrease_reservation: unused. + */ + unsigned int address_bits; + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; + +} xen_memory_reservation_t; + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/arch/xen/kernel/ctrl_if.c --- a/linux-2.6-xen-sparse/arch/xen/kernel/ctrl_if.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,569 +0,0 @@ -/****************************************************************************** - * ctrl_if.c - * - * Management functions for special interface to the domain controller. - * - * Copyright (c) 2004, K A Fraser - * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <asm-xen/ctrl_if.h> -#include <asm-xen/evtchn.h> - -#if 0 -#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) -#else -#define DPRINTK(_f, _a...) ((void)0) -#endif - -/* - * Extra ring macros to sync a consumer index up to the public producer index. - * Generally UNSAFE, but we use it for recovery and shutdown in some cases. - */ -#define RING_DROP_PENDING_REQUESTS(_r) \ - do { \ - (_r)->req_cons = (_r)->sring->req_prod; \ - } while (0) -#define RING_DROP_PENDING_RESPONSES(_r) \ - do { \ - (_r)->rsp_cons = (_r)->sring->rsp_prod; \ - } while (0) - -/* - * Only used by initial domain which must create its own control-interface - * event channel. This value is picked up by the user-space domain controller - * via an ioctl. - */ -int initdom_ctrlif_domcontroller_port = -1; - -static int ctrl_if_evtchn; -static int ctrl_if_irq; -static spinlock_t ctrl_if_lock; - -static struct irqaction ctrl_if_irq_action; - -static ctrl_front_ring_t ctrl_if_tx_ring; -static ctrl_back_ring_t ctrl_if_rx_ring; - -/* Incoming message requests. */ - /* Primary message type -> message handler. */ -static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256]; - /* Primary message type -> callback in process context? */ -static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)]; - /* Is it late enough during bootstrap to use schedule_task()? */ -static int safe_to_schedule_task; - /* Queue up messages to be handled in process context. */ -static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE]; -static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod; -static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons; - -/* Incoming message responses: message identifier -> message handler/id. */ -static struct { - ctrl_msg_handler_t fn; - unsigned long id; -} ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE]; - -/* For received messages that must be deferred to process context. */ -static void __ctrl_if_rxmsg_deferred(void *unused); -static DECLARE_WORK(ctrl_if_rxmsg_deferred_work, - __ctrl_if_rxmsg_deferred, - NULL); - -/* Deferred callbacks for people waiting for space in the transmit ring. */ -static DECLARE_TASK_QUEUE(ctrl_if_tx_tq); - -static DECLARE_WAIT_QUEUE_HEAD(ctrl_if_tx_wait); -static void __ctrl_if_tx_tasklet(unsigned long data); -static DECLARE_TASKLET(ctrl_if_tx_tasklet, __ctrl_if_tx_tasklet, 0); - -static void __ctrl_if_rx_tasklet(unsigned long data); -static DECLARE_TASKLET(ctrl_if_rx_tasklet, __ctrl_if_rx_tasklet, 0); - -#define get_ctrl_if() ((control_if_t *)((char *)HYPERVISOR_shared_info + 2048)) - -static void ctrl_if_notify_controller(void) -{ - notify_via_evtchn(ctrl_if_evtchn); -} - -static void ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id) -{ - msg->length = 0; - ctrl_if_send_response(msg); -} - -static void __ctrl_if_tx_tasklet(unsigned long data) -{ - ctrl_msg_t *msg; - int was_full = RING_FULL(&ctrl_if_tx_ring); - RING_IDX i, rp; - - i = ctrl_if_tx_ring.rsp_cons; - rp = ctrl_if_tx_ring.sring->rsp_prod; - rmb(); /* Ensure we see all requests up to 'rp'. */ - - for ( ; i != rp; i++ ) - { - msg = RING_GET_RESPONSE(&ctrl_if_tx_ring, i); - - DPRINTK("Rx-Rsp %u/%u :: %d/%d\n", i-1, - ctrl_if_tx_ring.sring->rsp_prod, - msg->type, msg->subtype); - - /* Execute the callback handler, if one was specified. */ - if ( msg->id != 0xFF ) - { - (*ctrl_if_txmsg_id_mapping[msg->id].fn)( - msg, ctrl_if_txmsg_id_mapping[msg->id].id); - smp_mb(); /* Execute, /then/ free. */ - ctrl_if_txmsg_id_mapping[msg->id].fn = NULL; - } - } - - /* - * Step over messages in the ring /after/ finishing reading them. As soon - * as the index is updated then the message may get blown away. - */ - smp_mb(); - ctrl_if_tx_ring.rsp_cons = i; - - if ( was_full && !RING_FULL(&ctrl_if_tx_ring) ) - { - wake_up(&ctrl_if_tx_wait); - run_task_queue(&ctrl_if_tx_tq); - } -} - -static void __ctrl_if_rxmsg_deferred(void *unused) -{ - ctrl_msg_t *msg; - CONTROL_RING_IDX dp; - - dp = ctrl_if_rxmsg_deferred_prod; - rmb(); /* Ensure we see all deferred requests up to 'dp'. */ - - while ( ctrl_if_rxmsg_deferred_cons != dp ) - { - msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX( - ctrl_if_rxmsg_deferred_cons++)]; - (*ctrl_if_rxmsg_handler[msg->type])(msg, 0); - } -} - -static void __ctrl_if_rx_tasklet(unsigned long data) -{ - ctrl_msg_t msg, *pmsg; - CONTROL_RING_IDX dp; - RING_IDX rp, i; - - i = ctrl_if_rx_ring.req_cons; - rp = ctrl_if_rx_ring.sring->req_prod; - dp = ctrl_if_rxmsg_deferred_prod; - rmb(); /* Ensure we see all requests up to 'rp'. */ - - for ( ; i != rp; i++) - { - pmsg = RING_GET_REQUEST(&ctrl_if_rx_ring, i); - memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg)); - - DPRINTK("Rx-Req %u/%u :: %d/%d\n", i-1, - ctrl_if_rx_ring.sring->req_prod, - msg.type, msg.subtype); - - if ( msg.length > sizeof(msg.msg) ) - msg.length = sizeof(msg.msg); - - if ( msg.length != 0 ) - memcpy(msg.msg, pmsg->msg, msg.length); - - if ( test_bit(msg.type, - (unsigned long *)&ctrl_if_rxmsg_blocking_context) ) - memcpy(&ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX(dp++)], - &msg, offsetof(ctrl_msg_t, msg) + msg.length); - else - (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0); - } - - ctrl_if_rx_ring.req_cons = i; - - if ( dp != ctrl_if_rxmsg_deferred_prod ) - { - wmb(); - ctrl_if_rxmsg_deferred_prod = dp; - schedule_work(&ctrl_if_rxmsg_deferred_work); - } -} - -static irqreturn_t ctrl_if_interrupt(int irq, void *dev_id, - struct pt_regs *regs) -{ - if ( RING_HAS_UNCONSUMED_RESPONSES(&ctrl_if_tx_ring) ) - tasklet_schedule(&ctrl_if_tx_tasklet); - - if ( RING_HAS_UNCONSUMED_REQUESTS(&ctrl_if_rx_ring) ) - tasklet_schedule(&ctrl_if_rx_tasklet); - - return IRQ_HANDLED; -} - -int -ctrl_if_send_message_noblock( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id) -{ - unsigned long flags; - ctrl_msg_t *dmsg; - int i; - - spin_lock_irqsave(&ctrl_if_lock, flags); - - if ( RING_FULL(&ctrl_if_tx_ring) ) - { - spin_unlock_irqrestore(&ctrl_if_lock, flags); - return -EAGAIN; - } - - msg->id = 0xFF; - if ( hnd != NULL ) - { - for ( i = 0; ctrl_if_txmsg_id_mapping[i].fn != NULL; i++ ) - continue; - ctrl_if_txmsg_id_mapping[i].fn = hnd; - ctrl_if_txmsg_id_mapping[i].id = id; - msg->id = i; - } - - DPRINTK("Tx-Req %u/%u :: %d/%d\n", - ctrl_if_tx_ring.req_prod_pvt, - ctrl_if_tx_ring.rsp_cons, - msg->type, msg->subtype); - - dmsg = RING_GET_REQUEST(&ctrl_if_tx_ring, - ctrl_if_tx_ring.req_prod_pvt); - memcpy(dmsg, msg, sizeof(*msg)); - ctrl_if_tx_ring.req_prod_pvt++; - RING_PUSH_REQUESTS(&ctrl_if_tx_ring); - - spin_unlock_irqrestore(&ctrl_if_lock, flags); - - ctrl_if_notify_controller(); - - return 0; -} - -int -ctrl_if_send_message_block( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id, - long wait_state) -{ - DECLARE_WAITQUEUE(wait, current); - int rc; - - /* Fast path. */ - if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != -EAGAIN ) - return rc; - - add_wait_queue(&ctrl_if_tx_wait, &wait); - - for ( ; ; ) - { - set_current_state(wait_state); - - if ( (rc = ctrl_if_send_message_noblock(msg, hnd, id)) != -EAGAIN ) - break; - - rc = -ERESTARTSYS; - if ( signal_pending(current) && (wait_state == TASK_INTERRUPTIBLE) ) - break; - - schedule(); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&ctrl_if_tx_wait, &wait); - - return rc; -} - -/* Allow a reponse-callback handler to find context of a blocked requester. */ -struct rsp_wait { - ctrl_msg_t *msg; /* Buffer for the response message. */ - struct task_struct *task; /* The task that is blocked on the response. */ - int done; /* Indicate to 'task' that response is rcv'ed. */ -}; - -static void __ctrl_if_get_response(ctrl_msg_t *msg, unsigned long id) -{ - struct rsp_wait *wait = (struct rsp_wait *)id; - struct task_struct *task = wait->task; - - memcpy(wait->msg, msg, sizeof(*msg)); - wmb(); - wait->done = 1; - - wake_up_process(task); -} - -int -ctrl_if_send_message_and_get_response( - ctrl_msg_t *msg, - ctrl_msg_t *rmsg, - long wait_state) -{ - struct rsp_wait wait; - int rc; - - wait.msg = rmsg; - wait.done = 0; - wait.task = current; - - if ( (rc = ctrl_if_send_message_block(msg, __ctrl_if_get_response, - (unsigned long)&wait, - wait_state)) != 0 ) - return rc; - - for ( ; ; ) - { - /* NB. Can't easily support TASK_INTERRUPTIBLE here. */ - set_current_state(TASK_UNINTERRUPTIBLE); - if ( wait.done ) - break; - schedule(); - } - - set_current_state(TASK_RUNNING); - return 0; -} - -int -ctrl_if_enqueue_space_callback( - struct tq_struct *task) -{ - /* Fast path. */ - if ( !RING_FULL(&ctrl_if_tx_ring) ) - return 0; - - (void)queue_task(task, &ctrl_if_tx_tq); - - /* - * We may race execution of the task queue, so return re-checked status. If - * the task is not executed despite the ring being non-full then we will - * certainly return 'not full'. - */ - smp_mb(); - return RING_FULL(&ctrl_if_tx_ring); -} - -void -ctrl_if_send_response( - ctrl_msg_t *msg) -{ - unsigned long flags; - ctrl_msg_t *dmsg; - - /* - * NB. The response may the original request message, modified in-place. - * In this situation we may have src==dst, so no copying is required. - */ - spin_lock_irqsave(&ctrl_if_lock, flags); - - DPRINTK("Tx-Rsp %u :: %d/%d\n", - ctrl_if_rx_ring.rsp_prod_pvt, - msg->type, msg->subtype); - - dmsg = RING_GET_RESPONSE(&ctrl_if_rx_ring, - ctrl_if_rx_ring.rsp_prod_pvt); - if ( dmsg != msg ) - memcpy(dmsg, msg, sizeof(*msg)); - - ctrl_if_rx_ring.rsp_prod_pvt++; - RING_PUSH_RESPONSES(&ctrl_if_rx_ring); - - spin_unlock_irqrestore(&ctrl_if_lock, flags); - - ctrl_if_notify_controller(); -} - -int -ctrl_if_register_receiver( - u8 type, - ctrl_msg_handler_t hnd, - unsigned int flags) -{ - unsigned long _flags; - int inuse; - - spin_lock_irqsave(&ctrl_if_lock, _flags); - - inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler); - - if ( inuse ) - { - printk(KERN_INFO "Receiver %p already established for control " - "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type); - } - else - { - ctrl_if_rxmsg_handler[type] = hnd; - clear_bit(type, (unsigned long *)&ctrl_if_rxmsg_blocking_context); - if ( flags == CALLBACK_IN_BLOCKING_CONTEXT ) - { - set_bit(type, (unsigned long *)&ctrl_if_rxmsg_blocking_context); - if ( !safe_to_schedule_task ) - BUG(); - } - } - - spin_unlock_irqrestore(&ctrl_if_lock, _flags); - - return !inuse; -} - -void -ctrl_if_unregister_receiver( - u8 type, - ctrl_msg_handler_t hnd) -{ - unsigned long flags; - - spin_lock_irqsave(&ctrl_if_lock, flags); - - if ( ctrl_if_rxmsg_handler[type] != hnd ) - printk(KERN_INFO "Receiver %p is not registered for control " - "messages of type %d.\n", hnd, type); - else - ctrl_if_rxmsg_handler[type] = ctrl_if_rxmsg_default_handler; - - spin_unlock_irqrestore(&ctrl_if_lock, flags); - - /* Ensure that @hnd will not be executed after this function returns. */ - tasklet_unlock_wait(&ctrl_if_rx_tasklet); -} - -void ctrl_if_suspend(void) -{ - teardown_irq(ctrl_if_irq, &ctrl_if_irq_action); - unbind_evtchn_from_irq(ctrl_if_evtchn); -} - -void ctrl_if_resume(void) -{ - control_if_t *ctrl_if = get_ctrl_if(); - - if ( xen_start_info.flags & SIF_INITDOMAIN ) - { - /* - * The initial domain must create its own domain-controller link. - * The controller is probably not running at this point, but will - * pick up its end of the event channel from - */ - evtchn_op_t op; - extern void bind_evtchn_to_cpu(unsigned port, unsigned cpu); - - op.cmd = EVTCHNOP_bind_interdomain; - op.u.bind_interdomain.dom1 = DOMID_SELF; - op.u.bind_interdomain.dom2 = DOMID_SELF; - op.u.bind_interdomain.port1 = 0; - op.u.bind_interdomain.port2 = 0; - if ( HYPERVISOR_event_channel_op(&op) != 0 ) - BUG(); - xen_start_info.domain_controller_evtchn = op.u.bind_interdomain.port1; - initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2; - bind_evtchn_to_cpu(op.u.bind_interdomain.port1, 0); - } - - /* Sync up with shared indexes. */ - FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM); - BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM); - - ctrl_if_evtchn = xen_start_info.domain_controller_evtchn; - ctrl_if_irq = bind_evtchn_to_irq(ctrl_if_evtchn); - - memset(&ctrl_if_irq_action, 0, sizeof(ctrl_if_irq_action)); - ctrl_if_irq_action.handler = ctrl_if_interrupt; - ctrl_if_irq_action.name = "ctrl-if"; - (void)setup_irq(ctrl_if_irq, &ctrl_if_irq_action); -} - -void __init ctrl_if_init(void) -{ - control_if_t *ctrl_if = get_ctrl_if(); - int i; - - for ( i = 0; i < 256; i++ ) - ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler; - - FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring, CONTROL_RING_MEM); - BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring, CONTROL_RING_MEM); - - spin_lock_init(&ctrl_if_lock); - - ctrl_if_resume(); -} - - -/* This is called after it is safe to call schedule_task(). */ -static int __init ctrl_if_late_setup(void) -{ - safe_to_schedule_task = 1; - return 0; -} -__initcall(ctrl_if_late_setup); - - -/* - * !! The following are DANGEROUS FUNCTIONS !! - * Use with care [for example, see xencons_force_flush()]. - */ - -int ctrl_if_transmitter_empty(void) -{ - return (ctrl_if_tx_ring.sring->req_prod == ctrl_if_tx_ring.rsp_cons); - -} - -void ctrl_if_discard_responses(void) -{ - RING_DROP_PENDING_RESPONSES(&ctrl_if_tx_ring); -} - -EXPORT_SYMBOL(ctrl_if_send_message_noblock); -EXPORT_SYMBOL(ctrl_if_send_message_block); -EXPORT_SYMBOL(ctrl_if_send_message_and_get_response); -EXPORT_SYMBOL(ctrl_if_enqueue_space_callback); -EXPORT_SYMBOL(ctrl_if_send_response); -EXPORT_SYMBOL(ctrl_if_register_receiver); -EXPORT_SYMBOL(ctrl_if_unregister_receiver); diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,573 +0,0 @@ -/****************************************************************************** - * blktap_controlmsg.c - * - * XenLinux virtual block-device tap. - * Control interfaces to the frontend and backend drivers. - * - * Copyright (c) 2004, Andrew Warfield - * - */ - -#include "blktap.h" -#include <asm-xen/evtchn.h> - -static char *blkif_state_name[] = { - [BLKIF_STATE_CLOSED] = "closed", - [BLKIF_STATE_DISCONNECTED] = "disconnected", - [BLKIF_STATE_CONNECTED] = "connected", -}; - -static char *blkif_status_name[] = { - [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", - [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", - [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", - [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", -}; - -unsigned int blktap_be_state = BLKIF_STATE_CLOSED; -unsigned int blktap_be_evtchn; - -/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/ - -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) - -static kmem_cache_t *blkif_cachep; -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) -{ - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - ((blkif->domid != domid) || (blkif->handle != handle)) ) - blkif = blkif->hash_next; - return blkif; -} - -static void __blkif_disconnect_complete(void *arg) -{ - blkif_t *blkif = (blkif_t *)arg; - ctrl_msg_t cmsg; - blkif_be_disconnect_t disc; -#ifdef CONFIG_XEN_BLKDEV_GRANT - struct gnttab_unmap_grant_ref op; -#endif - - /* - * These can't be done in blkif_disconnect() because at that point there - * may be outstanding requests at the disc whose asynchronous responses - * must still be notified to the remote driver. - */ -#ifdef CONFIG_XEN_BLKDEV_GRANT - op.host_addr = blkif->shmem_vaddr; - op.handle = blkif->shmem_handle; - op.dev_bus_addr = 0; - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); -#endif - vfree(blkif->blk_ring.sring); - - /* Construct the deferred response message. */ - cmsg.type = CMSG_BLKIF_BE; - cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; - cmsg.id = blkif->disconnect_rspid; - cmsg.length = sizeof(blkif_be_disconnect_t); - disc.domid = blkif->domid; - disc.blkif_handle = blkif->handle; - disc.status = BLKIF_BE_STATUS_OKAY; - memcpy(cmsg.msg, &disc, sizeof(disc)); - - /* - * Make sure message is constructed /before/ status change, because - * after the status change the 'blkif' structure could be deallocated at - * any time. Also make sure we send the response /after/ status change, - * as otherwise a subsequent CONNECT request could spuriously fail if - * another CPU doesn't see the status change yet. - */ - mb(); - if ( blkif->status != DISCONNECTING ) - BUG(); - blkif->status = DISCONNECTED; - mb(); - - /* Send the successful response. */ - ctrl_if_send_response(&cmsg); -} - -void blkif_disconnect_complete(blkif_t *blkif) -{ - INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif); - schedule_work(&blkif->work); -} - -void blkif_ptfe_create(blkif_be_create_t *create) -{ - blkif_t *blkif, **pblkif; - domid_t domid = create->domid; - unsigned int handle = create->blkif_handle; - - - /* May want to store info on the connecting domain here. */ - - DPRINTK("PT got BE_CREATE\n"); - - if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) - { - WPRINTK("Could not create blkif: out of memory\n"); - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - /* blkif struct init code from blkback.c */ - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->status = DISCONNECTED; - spin_lock_init(&blkif->blk_ring_lock); - atomic_set(&blkif->refcnt, 0); - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif != NULL ) - { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - { - WPRINTK("Could not create blkif: already exists\n"); - create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; - kmem_cache_free(blkif_cachep, blkif); - return; - } - pblkif = &(*pblkif)->hash_next; - } - - blkif->hash_next = *pblkif; - *pblkif = blkif; - - create->status = BLKIF_BE_STATUS_OKAY; -} - - -void blkif_ptfe_destroy(blkif_be_destroy_t *destroy) -{ - /* Clear anything that we initialized above. */ - - domid_t domid = destroy->domid; - unsigned int handle = destroy->blkif_handle; - blkif_t **pblkif, *blkif; - - DPRINTK("PT got BE_DESTROY\n"); - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif = *pblkif) != NULL ) - { - if ( (blkif->domid == domid) && (blkif->handle == handle) ) - { - if ( blkif->status != DISCONNECTED ) - goto still_connected; - goto destroy; - } - pblkif = &blkif->hash_next; - } - - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - - still_connected: - destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - return; - - destroy: - *pblkif = blkif->hash_next; - kmem_cache_free(blkif_cachep, blkif); - destroy->status = BLKIF_BE_STATUS_OKAY; -} - -void blkif_ptfe_connect(blkif_be_connect_t *connect) -{ - domid_t domid = connect->domid; - unsigned int handle = connect->blkif_handle; - unsigned int evtchn = connect->evtchn; - unsigned long shmem_frame = connect->shmem_frame; - struct vm_struct *vma; -#ifdef CONFIG_XEN_BLKDEV_GRANT - int ref = connect->shmem_ref; -#else - pgprot_t prot; - int error; -#endif - blkif_t *blkif; - blkif_sring_t *sring; - - DPRINTK("PT got BE_CONNECT\n"); - - blkif = blkif_find_by_handle(domid, handle); - if ( unlikely(blkif == NULL) ) - { - WPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", - connect->domid, connect->blkif_handle); - connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - - if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) - { - connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - -#ifndef CONFIG_XEN_BLKDEV_GRANT - prot = __pgprot(_KERNPG_TABLE); - error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), - shmem_frame<<PAGE_SHIFT, PAGE_SIZE, - prot, domid); - if ( error != 0 ) - { - if ( error == -ENOMEM ) - connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - else if ( error == -EFAULT ) - connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; - else - connect->status = BLKIF_BE_STATUS_ERROR; - vfree(vma->addr); - return; - } -#else - { /* Map: Use the Grant table reference */ - struct gnttab_map_grant_ref op; - op.host_addr = VMALLOC_VMADDR(vma->addr); - op.flags = GNTMAP_host_map; - op.ref = ref; - op.dom = domid; - - BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); - - handle = op.handle; - - if (op.handle < 0) { - DPRINTK(" Grant table operation failure !\n"); - connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; - vfree(vma->addr); - return; - } - - blkif->shmem_ref = ref; - blkif->shmem_handle = handle; - blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr); - } -#endif - - if ( blkif->status != DISCONNECTED ) - { - connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - vfree(vma->addr); - return; - } - - sring = (blkif_sring_t *)vma->addr; - SHARED_RING_INIT(sring); - BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE); - - blkif->evtchn = evtchn; - blkif->shmem_frame = shmem_frame; - blkif->status = CONNECTED; - blkif_get(blkif); - - bind_evtchn_to_irqhandler( - evtchn, blkif_ptfe_int, 0, "blkif-pt-backend", blkif); - - connect->status = BLKIF_BE_STATUS_OKAY; -} - -int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) -{ - domid_t domid = disconnect->domid; - unsigned int handle = disconnect->blkif_handle; - blkif_t *blkif; - - DPRINTK("PT got BE_DISCONNECT\n"); - - blkif = blkif_find_by_handle(domid, handle); - if ( unlikely(blkif == NULL) ) - { - WPRINTK("blkif_disconnect attempted for non-existent blkif" - " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); - disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return 1; /* Caller will send response error message. */ - } - - if ( blkif->status == CONNECTED ) - { - blkif->status = DISCONNECTING; - blkif->disconnect_rspid = rsp_id; - wmb(); /* Let other CPUs see the status change. */ - unbind_evtchn_from_irqhandler(blkif->evtchn, blkif); - blkif_deschedule(blkif); - blkif_put(blkif); - return 0; /* Caller should not send response message. */ - } - - disconnect->status = BLKIF_BE_STATUS_OKAY; - return 1; -} - -/*-----[ Control Messages to/from Backend VM ]----------------------------*/ - -/* Tell the controller to bring up the interface. */ -static void blkif_ptbe_send_interface_connect(void) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, - .length = sizeof(blkif_fe_interface_connect_t), - }; - blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; - msg->handle = 0; - msg->shmem_frame = virt_to_mfn(blktap_be_ring.sring); - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -static void blkif_ptbe_close(void) -{ -} - -/* Move from CLOSED to DISCONNECTED state. */ -static void blkif_ptbe_disconnect(void) -{ - blkif_sring_t *sring; - - sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blktap_be_ring, sring, PAGE_SIZE); - blktap_be_state = BLKIF_STATE_DISCONNECTED; - DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n"); - blkif_ptbe_send_interface_connect(); -} - -static void blkif_ptbe_connect(blkif_fe_interface_status_t *status) -{ - int err = 0; - - blktap_be_evtchn = status->evtchn; - - err = bind_evtchn_to_irqhandler( - blktap_be_evtchn, blkif_ptbe_int, SA_SAMPLE_RANDOM, "blkif", NULL); - if ( err ) { - WPRINTK("blkfront bind_evtchn_to_irqhandler failed (%d)\n", err); - return; - } else { - /* transtion to connected in case we need to do a - a partion probe on a whole disk */ - blktap_be_state = BLKIF_STATE_CONNECTED; - } -} - -static void unexpected(blkif_fe_interface_status_t *status) -{ - WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", - blkif_status_name[status->status], - blkif_state_name[blktap_be_state]); -} - -static void blkif_ptbe_status( - blkif_fe_interface_status_t *status) -{ - if ( status->handle != 0 ) - { - DPRINTK("Status change on unsupported blkif %d\n", - status->handle); - return; - } - - DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]); - - switch ( status->status ) - { - case BLKIF_INTERFACE_STATUS_CLOSED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_ptbe_close(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_DISCONNECTED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - blkif_ptbe_disconnect(); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n"); - unexpected(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CONNECTED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - blkif_ptbe_disconnect(); - blkif_ptbe_connect(status); - break; - case BLKIF_STATE_DISCONNECTED: - blkif_ptbe_connect(status); - break; - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_ptbe_connect(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CHANGED: - switch ( blktap_be_state ) - { - case BLKIF_STATE_CLOSED: - case BLKIF_STATE_DISCONNECTED: - unexpected(status); - break; - case BLKIF_STATE_CONNECTED: - /* vbd_update(); */ - /* tap doesn't really get state changes... */ - unexpected(status); - break; - } - break; - - default: - DPRINTK("Status change to unknown value %d\n", status->status); - break; - } -} - -/*-----[ All control messages enter here: ]-------------------------------*/ - -void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->type ) - { - case CMSG_BLKIF_FE: - - switch ( msg->subtype ) - { - case CMSG_BLKIF_FE_INTERFACE_STATUS: - blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]); - break; - - default: - goto parse_error; - } - - break; - - case CMSG_BLKIF_BE: - - /* send a copy of the message to user if wanted */ - - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_COPY_FE) ) { - - blktap_write_ctrl_ring(msg); - blktap_kick_user(); - } - - switch ( msg->subtype ) - { - case CMSG_BLKIF_BE_CREATE: - blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_DESTROY: - blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_CONNECT: - blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]); - break; - case CMSG_BLKIF_BE_DISCONNECT: - if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0], - msg->id) ) - return; - break; - - /* We just ignore anything to do with vbds for now. */ - - case CMSG_BLKIF_BE_VBD_CREATE: - DPRINTK("PT got VBD_CREATE\n"); - ((blkif_be_vbd_create_t *)&msg->msg[0])->status - = BLKIF_BE_STATUS_OKAY; - break; - case CMSG_BLKIF_BE_VBD_DESTROY: - DPRINTK("PT got VBD_DESTROY\n"); - ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status - = BLKIF_BE_STATUS_OKAY; - break; - default: - goto parse_error; - } - - break; - } - - ctrl_if_send_response(msg); - return; - - parse_error: - msg->length = 0; - ctrl_if_send_response(msg); -} - -/*-----[ Initialization ]-------------------------------------------------*/ - -void __init blkif_interface_init(void) -{ - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), - 0, 0, NULL, NULL); - memset(blkif_hash, 0, sizeof(blkif_hash)); - - blktap_be_ring.sring = NULL; -} - - - -/* Debug : print the current ring indices. */ - -void print_fe_ring_idxs(void) -{ - int i; - blkif_t *blkif; - - WPRINTK("FE Rings: \n---------\n"); - for ( i = 0; i < BLKIF_HASHSZ; i++) { - blkif = blkif_hash[i]; - while (blkif != NULL) { - if (blkif->status == DISCONNECTED) { - WPRINTK("(%2d,%2d) DISCONNECTED\n", - blkif->domid, blkif->handle); - } else if (blkif->status == DISCONNECTING) { - WPRINTK("(%2d,%2d) DISCONNECTING\n", - blkif->domid, blkif->handle); - } else if (blkif->blk_ring.sring == NULL) { - WPRINTK("(%2d,%2d) CONNECTED, but null sring!\n", - blkif->domid, blkif->handle); - } else { - blkif_get(blkif); - WPRINTK("(%2d,%2d): req_cons: %2d, rsp_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blkif->domid, blkif->handle, - blkif->blk_ring.req_cons, - blkif->blk_ring.rsp_prod_pvt, - blkif->blk_ring.sring->req_prod, - blkif->blk_ring.sring->rsp_prod); - blkif_put(blkif); - } - blkif = blkif->hash_next; - } - } -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,449 +0,0 @@ -/****************************************************************************** - * blktap_datapath.c - * - * XenLinux virtual block-device tap. - * Block request routing data path. - * - * Copyright (c) 2004, Andrew Warfield - * -- see full header in blktap.c - */ - -#include "blktap.h" -#include <asm-xen/evtchn.h> - -/*-----[ The data paths ]-------------------------------------------------*/ - -/* Connection to a single backend domain. */ -blkif_front_ring_t blktap_be_ring; - -/*-----[ Tracking active requests ]---------------------------------------*/ - -/* this must be the same as MAX_PENDING_REQS in blkback.c */ -#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U) - -active_req_t active_reqs[MAX_ACTIVE_REQS]; -ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS]; -spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; -ACTIVE_RING_IDX active_prod, active_cons; -#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1)) -#define ACTIVE_IDX(_ar) (_ar - active_reqs) -#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons) - -inline active_req_t *get_active_req(void) -{ - ACTIVE_RING_IDX idx; - active_req_t *ar; - unsigned long flags; - - ASSERT(active_cons != active_prod); - - spin_lock_irqsave(&active_req_lock, flags); - idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)]; - ar = &active_reqs[idx]; - spin_unlock_irqrestore(&active_req_lock, flags); - - return ar; -} - -inline void free_active_req(active_req_t *ar) -{ - unsigned long flags; - - spin_lock_irqsave(&active_req_lock, flags); - active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); - spin_unlock_irqrestore(&active_req_lock, flags); -} - -active_req_t *lookup_active_req(ACTIVE_RING_IDX idx) -{ - return &active_reqs[idx]; -} - -void active_reqs_init(void) -{ - ACTIVE_RING_IDX i; - - active_cons = 0; - active_prod = MAX_ACTIVE_REQS; - memset(active_reqs, 0, sizeof(active_reqs)); - for ( i = 0; i < MAX_ACTIVE_REQS; i++ ) - active_req_ring[i] = i; -} - -/* Requests passing through the tap to the backend hijack the id field - * in the request message. In it we put the AR index _AND_ the fe domid. - * the domid is used by the backend to map the pages properly. - */ - -static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) -{ - return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) ); -} - -/*-----[ Ring helpers ]---------------------------------------------------*/ - -static void maybe_trigger_blktap_schedule(void); - -inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) -{ - blkif_response_t *resp_d; - active_req_t *ar; - - ar = &active_reqs[ID_TO_IDX(rsp->id)]; - rsp->id = ar->id; - - resp_d = RING_GET_RESPONSE(&blkif->blk_ring, - blkif->blk_ring.rsp_prod_pvt); - memcpy(resp_d, rsp, sizeof(blkif_response_t)); - wmb(); - blkif->blk_ring.rsp_prod_pvt++; - - blkif_put(ar->blkif); - free_active_req(ar); - - return 0; -} - -inline int write_req_to_be_ring(blkif_request_t *req) -{ - blkif_request_t *req_d; - - if ( blktap_be_state != BLKIF_STATE_CONNECTED ) { - WPRINTK("Tap trying to access an unconnected backend!\n"); - return 0; - } - - req_d = RING_GET_REQUEST(&blktap_be_ring, - blktap_be_ring.req_prod_pvt); - memcpy(req_d, req, sizeof(blkif_request_t)); - wmb(); - blktap_be_ring.req_prod_pvt++; - - return 0; -} - -void kick_fe_domain(blkif_t *blkif) -{ - RING_PUSH_RESPONSES(&blkif->blk_ring); - notify_via_evtchn(blkif->evtchn); - DPRINTK("notified FE(dom %u)\n", blkif->domid); - - /* We just feed up a batch of request slots... */ - maybe_trigger_blktap_schedule(); - -} - -void kick_be_domain(void) -{ - if ( blktap_be_state != BLKIF_STATE_CONNECTED ) - return; - - wmb(); /* Ensure that the frontend can see the requests. */ - RING_PUSH_REQUESTS(&blktap_be_ring); - notify_via_evtchn(blktap_be_evtchn); - DPRINTK("notified BE\n"); -} - -/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/ - -/*-----[ Scheduler list maint -from blkback ]--- */ - -static struct list_head blkio_schedule_list; -static spinlock_t blkio_schedule_list_lock; - -static int __on_blkdev_list(blkif_t *blkif) -{ - return blkif->blkdev_list.next != NULL; -} - -static void remove_from_blkdev_list(blkif_t *blkif) -{ - unsigned long flags; - if ( !__on_blkdev_list(blkif) ) return; - spin_lock_irqsave(&blkio_schedule_list_lock, flags); - if ( __on_blkdev_list(blkif) ) - { - list_del(&blkif->blkdev_list); - blkif->blkdev_list.next = NULL; - blkif_put(blkif); - } - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); -} - -static void add_to_blkdev_list_tail(blkif_t *blkif) -{ - unsigned long flags; - if ( __on_blkdev_list(blkif) ) return; - spin_lock_irqsave(&blkio_schedule_list_lock, flags); - if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) - { - list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); - blkif_get(blkif); - } - spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); -} - - -/*-----[ Scheduler functions - from blkback ]--- */ - -static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); - -static int do_block_io_op(blkif_t *blkif, int max_to_do); - -static int blkio_schedule(void *arg) -{ - DECLARE_WAITQUEUE(wq, current); - - blkif_t *blkif; - struct list_head *ent; - - daemonize( - "xentapd" - ); - - for ( ; ; ) - { - /* Wait for work to do. */ - add_wait_queue(&blkio_schedule_wait, &wq); - set_current_state(TASK_INTERRUPTIBLE); - if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || - list_empty(&blkio_schedule_list) ) - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&blkio_schedule_wait, &wq); - - /* Queue up a batch of requests. */ - while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) && - !list_empty(&blkio_schedule_list) ) - { - ent = blkio_schedule_list.next; - blkif = list_entry(ent, blkif_t, blkdev_list); - blkif_get(blkif); - remove_from_blkdev_list(blkif); - if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) - add_to_blkdev_list_tail(blkif); - blkif_put(blkif); - } - } -} - -static void maybe_trigger_blktap_schedule(void) -{ - /* - * Needed so that two processes, who together make the following predicate - * true, don't both read stale values and evaluate the predicate - * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... - */ - smp_mb(); - - if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS/2)) && - !list_empty(&blkio_schedule_list) ) - wake_up(&blkio_schedule_wait); -} - -void blkif_deschedule(blkif_t *blkif) -{ - remove_from_blkdev_list(blkif); -} - -void __init blkdev_schedule_init(void) -{ - spin_lock_init(&blkio_schedule_list_lock); - INIT_LIST_HEAD(&blkio_schedule_list); - - if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) - BUG(); -} - -/*-----[ Interrupt entry from a frontend ]------ */ - -irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) -{ - blkif_t *blkif = dev_id; - - add_to_blkdev_list_tail(blkif); - maybe_trigger_blktap_schedule(); - return IRQ_HANDLED; -} - -/*-----[ Other Frontend Ring functions ]-------- */ - -/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/ -static int do_block_io_op(blkif_t *blkif, int max_to_do) -{ - /* we have pending messages from the real frontend. */ - - blkif_request_t *req_s; - RING_IDX i, rp; - unsigned long flags; - active_req_t *ar; - int more_to_do = 0; - int notify_be = 0, notify_user = 0; - - /* lock both rings */ - spin_lock_irqsave(&blkif_io_lock, flags); - - rp = blkif->blk_ring.sring->req_prod; - rmb(); - - for ( i = blkif->blk_ring.req_cons; - (i != rp) && - !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i); - i++ ) - { - - if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) - { - more_to_do = 1; - break; - } - - req_s = RING_GET_REQUEST(&blkif->blk_ring, i); - /* This is a new request: - * Assign an active request record, and remap the id. - */ - ar = get_active_req(); - ar->id = req_s->id; - ar->nr_pages = req_s->nr_segments; - blkif_get(blkif); - ar->blkif = blkif; - req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar)); - /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */ - - /* FE -> BE interposition point is here. */ - - /* ------------------------------------------------------------- */ - /* BLKIF_OP_PROBE_HACK: */ - /* Signal to the backend that we are a tap domain. */ - - if (req_s->operation == BLKIF_OP_PROBE) { - DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n"); - req_s->frame_and_sects[1] = BLKTAP_COOKIE; - } - - /* ------------------------------------------------------------- */ - - /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */ - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_COPY_FE) ) { - - /* Copy the response message to UFERing */ - /* In MODE_INTERCEPT_FE, map attached pages into the app vma */ - /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */ - - DPRINTK("req->UFERing\n"); - blktap_write_fe_ring(req_s); - notify_user = 1; - } - - /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */ - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { - - /* be included to prevent noise from the fe when its off */ - /* copy the request message to the BERing */ - - DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", - (unsigned)i & (RING_SIZE(&blktap_be_ring)-1), - (unsigned)blktap_be_ring.req_prod_pvt & - (RING_SIZE((&blktap_be_ring)-1))); - - write_req_to_be_ring(req_s); - notify_be = 1; - } - } - - blkif->blk_ring.req_cons = i; - - /* unlock rings */ - spin_unlock_irqrestore(&blkif_io_lock, flags); - - if (notify_user) - blktap_kick_user(); - if (notify_be) - kick_be_domain(); - - return more_to_do; -} - -/*-----[ Data to/from Backend (server) VM ]------------------------------*/ - - -irqreturn_t blkif_ptbe_int(int irq, void *dev_id, - struct pt_regs *ptregs) -{ - blkif_response_t *resp_s; - blkif_t *blkif; - RING_IDX rp, i; - unsigned long flags; - - DPRINTK("PT got BE interrupt.\n"); - - /* lock both rings */ - spin_lock_irqsave(&blkif_io_lock, flags); - - rp = blktap_be_ring.sring->rsp_prod; - rmb(); - - for ( i = blktap_be_ring.rsp_cons; i != rp; i++) - { - resp_s = RING_GET_RESPONSE(&blktap_be_ring, i); - - /* BE -> FE interposition point is here. */ - - blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif; - - /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */ - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || - (blktap_mode & BLKTAP_MODE_COPY_BE) ) { - - /* Copy the response message to UBERing */ - /* In MODE_INTERCEPT_BE, map attached pages into the app vma */ - /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */ - - DPRINTK("rsp->UBERing\n"); - blktap_write_be_ring(resp_s); - blktap_kick_user(); - - } - - /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */ - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || - (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { - - /* (fe included to prevent random interference from the BE) */ - /* Copy the response message to FERing */ - - DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", - (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1), - (unsigned)blkif->blk_ring.rsp_prod_pvt & - (RING_SIZE((&blkif->blk_ring)-1))); - - write_resp_to_fe_ring(blkif, resp_s); - kick_fe_domain(blkif); - - } - } - - blktap_be_ring.rsp_cons = i; - - - spin_unlock_irqrestore(&blkif_io_lock, flags); - - return IRQ_HANDLED; -} - -/* Debug : print the current ring indices. */ - -void print_be_ring_idxs(void) -{ - if (blktap_be_ring.sring != NULL) { - WPRINTK("BE Ring: \n--------\n"); - WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blktap_be_ring.rsp_cons, - blktap_be_ring.req_prod_pvt, - blktap_be_ring.sring->req_prod, - blktap_be_ring.sring->rsp_prod); - } -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,801 +0,0 @@ -/****************************************************************************** - * blktap_userdev.c - * - * XenLinux virtual block-device tap. - * Control interface between the driver and a character device. - * - * Copyright (c) 2004, Andrew Warfield - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/miscdevice.h> -#include <linux/errno.h> -#include <linux/major.h> -#include <linux/gfp.h> -#include <linux/poll.h> -#include <asm/pgalloc.h> -#include <asm/tlbflush.h> -#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */ -#ifdef CONFIG_XEN_BLKDEV_GRANT -#include <asm-xen/xen-public/grant_table.h> -#endif - -#include "blktap.h" - - -unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH; - -/* Only one process may open /dev/xen/blktap at any time. */ -static unsigned long blktap_dev_inuse; -unsigned long blktap_ring_ok; /* make this ring->state */ - -/* for poll: */ -static wait_queue_head_t blktap_wait; - -/* Rings up to user space. */ -static blkif_front_ring_t blktap_ufe_ring; -static blkif_back_ring_t blktap_ube_ring; -static ctrl_front_ring_t blktap_uctrl_ring; - -/* local prototypes */ -static int blktap_read_fe_ring(void); -static int blktap_read_be_ring(void); - - -/* -------[ mmap region ]--------------------------------------------- */ -/* - * We use a big chunk of address space to map in-flight requests into, - * and export this region up to user-space. See the comments in blkback - * about this -- the two must be kept in sync if the tap is used as a - * passthrough. - */ - -#define MAX_PENDING_REQS 64 - -/* immediately before the mmap area, we have a bunch of pages reserved - * for shared memory rings. - */ -#define RING_PAGES 3 /* Ctrl, Front, and Back */ - -/* Where things are inside the device mapping. */ -struct vm_area_struct *blktap_vma = NULL; -unsigned long mmap_vstart; /* Kernel pages for mapping in data. */ -unsigned long rings_vstart; /* start of mmaped vma */ -unsigned long user_vstart; /* start of user mappings */ - -#define MMAP_PAGES_PER_REQUEST \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) -#define MMAP_PAGES \ - (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) -#define MMAP_VADDR(_start, _req,_seg) \ - ( _start + \ - ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * PAGE_SIZE)) - -/* -------[ grant handles ]------------------------------------------- */ - -#ifdef CONFIG_XEN_BLKDEV_GRANT -/* When using grant tables to map a frame for device access then the - * handle returned must be used to unmap the frame. This is needed to - * drop the ref count on the frame. - */ -struct grant_handle_pair -{ - u16 kernel; - u16 user; -}; -static struct grant_handle_pair pending_grant_handles[MMAP_PAGES]; -#define pending_handle(_idx, _i) \ - (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) -#define BLKTAP_INVALID_HANDLE(_g) \ - (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) -#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ - (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ - } while(0) - -#endif - - -/* -------[ blktap vm ops ]------------------------------------------- */ - -static struct page *blktap_nopage(struct vm_area_struct *vma, - unsigned long address, - int *type) -{ - /* - * if the page has not been mapped in by the driver then generate - * a SIGBUS to the domain. - */ - - force_sig(SIGBUS, current); - - return 0; -} - -struct vm_operations_struct blktap_vm_ops = { - nopage: blktap_nopage, -}; - -/* -------[ blktap file ops ]----------------------------------------- */ - -static int blktap_open(struct inode *inode, struct file *filp) -{ - blkif_sring_t *sring; - ctrl_sring_t *csring; - - if ( test_and_set_bit(0, &blktap_dev_inuse) ) - return -EBUSY; - - /* Allocate the ctrl ring. */ - csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL); - if (csring == NULL) - goto fail_nomem; - - SetPageReserved(virt_to_page(csring)); - - SHARED_RING_INIT(csring); - FRONT_RING_INIT(&blktap_uctrl_ring, csring, PAGE_SIZE); - - /* Allocate the fe ring. */ - sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); - if (sring == NULL) - goto fail_free_ctrl; - - SetPageReserved(virt_to_page(sring)); - - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE); - - /* Allocate the be ring. */ - sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); - if (sring == NULL) - goto fail_free_fe; - - SetPageReserved(virt_to_page(sring)); - - SHARED_RING_INIT(sring); - BACK_RING_INIT(&blktap_ube_ring, sring, PAGE_SIZE); - - DPRINTK(KERN_ALERT "blktap open.\n"); - - return 0; - - fail_free_ctrl: - free_page( (unsigned long) blktap_uctrl_ring.sring); - - fail_free_fe: - free_page( (unsigned long) blktap_ufe_ring.sring); - - fail_nomem: - return -ENOMEM; -} - -static int blktap_release(struct inode *inode, struct file *filp) -{ - blktap_dev_inuse = 0; - blktap_ring_ok = 0; - - DPRINTK(KERN_ALERT "blktap closed.\n"); - - /* Free the ring page. */ - ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring)); - free_page((unsigned long) blktap_uctrl_ring.sring); - - ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); - free_page((unsigned long) blktap_ufe_ring.sring); - - ClearPageReserved(virt_to_page(blktap_ube_ring.sring)); - free_page((unsigned long) blktap_ube_ring.sring); - - /* Clear any active mappings and free foreign map table */ - if (blktap_vma != NULL) { - zap_page_range(blktap_vma, blktap_vma->vm_start, - blktap_vma->vm_end - blktap_vma->vm_start, NULL); - blktap_vma = NULL; - } - - return 0; -} - -/* Note on mmap: - * We need to map pages to user space in a way that will allow the block - * subsystem set up direct IO to them. This couldn't be done before, because - * there isn't really a sane way to make a user virtual address down to a - * physical address when the page belongs to another domain. - * - * My first approach was to map the page in to kernel memory, add an entry - * for it in the physical frame list (using alloc_lomem_region as in blkback) - * and then attempt to map that page up to user space. This is disallowed - * by xen though, which realizes that we don't really own the machine frame - * underlying the physical page. - * - * The new approach is to provide explicit support for this in xen linux. - * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages - * mapped from other vms. vma->vm_private_data is set up as a mapping - * from pages to actual page structs. There is a new clause in get_user_pages - * that does the right thing for this sort of mapping. - * - * blktap_mmap sets up this mapping. Most of the real work is done in - * blktap_write_fe_ring below. - */ -static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) -{ - int size; - struct page **map; - int i; - - DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n", - vma->vm_start, vma->vm_end); - - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &blktap_vm_ops; - - size = vma->vm_end - vma->vm_start; - if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) { - printk(KERN_INFO - "blktap: you _must_ map exactly %d pages!\n", - MMAP_PAGES + RING_PAGES); - return -EAGAIN; - } - - size >>= PAGE_SHIFT; - DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1); - - rings_vstart = vma->vm_start; - user_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT); - - /* Map the ring pages to the start of the region and reserve it. */ - - /* not sure if I really need to do this... */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring)); - if (remap_pfn_range(vma, vma->vm_start, - __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - goto fail; - - - DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); - if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, - __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - goto fail; - - DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); - if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), - __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot)) - goto fail; - - /* Mark this VM as containing foreign pages, and set up mappings. */ - map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - * sizeof(struct page_struct*), - GFP_KERNEL); - if (map == NULL) goto fail; - - for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++) - map[i] = NULL; - - vma->vm_private_data = map; - vma->vm_flags |= VM_FOREIGN; - - blktap_vma = vma; - blktap_ring_ok = 1; - - return 0; - fail: - /* Clear any active mappings. */ - zap_page_range(vma, vma->vm_start, - vma->vm_end - vma->vm_start, NULL); - - return -ENOMEM; -} - -static int blktap_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - switch(cmd) { - case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */ - return blktap_read_fe_ring(); - - case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */ - return blktap_read_be_ring(); - - case BLKTAP_IOCTL_SETMODE: - if (BLKTAP_MODE_VALID(arg)) { - blktap_mode = arg; - /* XXX: may need to flush rings here. */ - printk(KERN_INFO "blktap: set mode to %lx\n", arg); - return 0; - } - case BLKTAP_IOCTL_PRINT_IDXS: - { - print_be_ring_idxs(); - print_fe_ring_idxs(); - WPRINTK("User Rings: \n-----------\n"); - WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blktap_ufe_ring.rsp_cons, - blktap_ufe_ring.req_prod_pvt, - blktap_ufe_ring.sring->req_prod, - blktap_ufe_ring.sring->rsp_prod); - WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d " - "| req_prod: %2d, rsp_prod: %2d\n", - blktap_ube_ring.req_cons, - blktap_ube_ring.rsp_prod_pvt, - blktap_ube_ring.sring->req_prod, - blktap_ube_ring.sring->rsp_prod); - - } - } - return -ENOIOCTLCMD; -} - -static unsigned int blktap_poll(struct file *file, poll_table *wait) -{ - poll_wait(file, &blktap_wait, wait); - - if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) || - RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) || - RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) { - - flush_tlb_all(); - - RING_PUSH_REQUESTS(&blktap_uctrl_ring); - RING_PUSH_REQUESTS(&blktap_ufe_ring); - RING_PUSH_RESPONSES(&blktap_ube_ring); - return POLLIN | POLLRDNORM; - } - - return 0; -} - -void blktap_kick_user(void) -{ - /* blktap_ring->req_prod = blktap_req_prod; */ - wake_up_interruptible(&blktap_wait); -} - -static struct file_operations blktap_fops = { - owner: THIS_MODULE, - poll: blktap_poll, - ioctl: blktap_ioctl, - open: blktap_open, - release: blktap_release, - mmap: blktap_mmap, -}; - -/*-----[ Data to/from user space ]----------------------------------------*/ - -static void fast_flush_area(int idx, int nr_pages) -{ -#ifdef CONFIG_XEN_BLKDEV_GRANT - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; - unsigned int i, op = 0; - struct grant_handle_pair *handle; - unsigned long ptep; - - for (i=0; i<nr_pages; i++) - { - handle = &pending_handle(idx, i); - if (!BLKTAP_INVALID_HANDLE(handle)) - { - - unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); - unmap[op].dev_bus_addr = 0; - unmap[op].handle = handle->kernel; - op++; - - if (create_lookup_pte_addr(blktap_vma->vm_mm, - MMAP_VADDR(user_vstart, idx, i), - &ptep) !=0) { - DPRINTK("Couldn't get a pte addr!\n"); - return; - } - unmap[op].host_addr = ptep; - unmap[op].dev_bus_addr = 0; - unmap[op].handle = handle->user; - op++; - - BLKTAP_INVALIDATE_HANDLE(handle); - } - } - if ( unlikely(HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, op))) - BUG(); -#else - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int i; - - for ( i = 0; i < nr_pages; i++ ) - { - MULTI_update_va_mapping(mcl+i, MMAP_VADDR(mmap_vstart, idx, i), - __pte(0), 0); - } - - mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) - BUG(); -#endif -} - - -int blktap_write_fe_ring(blkif_request_t *req) -{ - blkif_request_t *target; - int i, ret = 0; -#ifdef CONFIG_XEN_BLKDEV_GRANT - struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; - int op; -#else - unsigned long remap_prot; - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1]; - mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -#endif - - /* - * This is called to pass a request from the real frontend domain's - * blkif ring to the character device. - */ - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: ufe_ring not ready for a request!\n"); - return 0; - } - - if ( RING_FULL(&blktap_ufe_ring) ) { - PRINTK("blktap: fe_ring is full, can't add.\n"); - return 0; - } - - flush_cache_all(); /* a noop on intel... */ - - target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); - memcpy(target, req, sizeof(*req)); - - /* Map the foreign pages directly in to the application */ -#ifdef CONFIG_XEN_BLKDEV_GRANT - op = 0; - for (i=0; i<target->nr_segments; i++) { - - unsigned long uvaddr; - unsigned long kvaddr; - unsigned long ptep; - - uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); - - /* Map the remote page to kernel. */ - map[op].host_addr = kvaddr; - map[op].dom = ID_TO_DOM(req->id); - map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); - map[op].flags = GNTMAP_host_map; - /* This needs a bit more thought in terms of interposition: - * If we want to be able to modify pages during write using - * grant table mappings, the guest will either need to allow - * it, or we'll need to incur a copy. */ - if (req->operation == BLKIF_OP_WRITE) - map[op].flags |= GNTMAP_readonly; - op++; - - /* Now map it to user. */ - ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); - if (ret) - { - DPRINTK("Couldn't get a pte addr!\n"); - goto fail; - } - - map[op].host_addr = ptep; - map[op].dom = ID_TO_DOM(req->id); - map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); - map[op].flags = GNTMAP_host_map | GNTMAP_application_map - | GNTMAP_contains_pte; - /* Above interposition comment applies here as well. */ - if (req->operation == BLKIF_OP_WRITE) - map[op].flags |= GNTMAP_readonly; - op++; - } - - if ( unlikely(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, map, op))) - BUG(); - - op = 0; - for (i=0; i<(target->nr_segments*2); i+=2) { - unsigned long uvaddr; - unsigned long kvaddr; - unsigned long offset; - int cancel = 0; - - uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2); - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2); - - if ( unlikely(map[i].handle < 0) ) { - DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle); - ret = map[i].handle; - cancel = 1; - } - - if ( unlikely(map[i+1].handle < 0) ) { - DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle); - ret = map[i+1].handle; - cancel = 1; - } - - if (cancel) - goto fail; - - /* Set the necessary mappings in p2m and in the VM_FOREIGN - * vm_area_struct to allow user vaddr -> struct page lookups - * to work. This is needed for direct IO to foreign pages. */ - phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] = - FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); - - offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; - ((struct page **)blktap_vma->vm_private_data)[offset] = - pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); - - /* Save handles for unmapping later. */ - pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle; - pending_handle(ID_TO_IDX(req->id), i/2).user = map[i+1].handle; - } - -#else - - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; - - for (i=0; i<target->nr_segments; i++) { - unsigned long buf; - unsigned long uvaddr; - unsigned long kvaddr; - unsigned long offset; - unsigned long ptep; - - buf = target->frame_and_sects[i] & PAGE_MASK; - uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); - - MULTI_update_va_mapping_otherdomain( - mcl+i, - kvaddr, - pfn_pte_ma(buf >> PAGE_SHIFT, __pgprot(remap_prot)), - 0, - ID_TO_DOM(req->id)); - - phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = - FOREIGN_FRAME(buf >> PAGE_SHIFT); - - ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); - if (ret) - { - DPRINTK("error getting pte\n"); - goto fail; - } - - mmu[i].ptr = ptep; - mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK) - | pgprot_val(blktap_vma->vm_page_prot); - - offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; - ((struct page **)blktap_vma->vm_private_data)[offset] = - pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); - } - - /* Add the mmu_update call. */ - mcl[i].op = __HYPERVISOR_mmu_update; - mcl[i].args[0] = (unsigned long)mmu; - mcl[i].args[1] = target->nr_segments; - mcl[i].args[2] = 0; - mcl[i].args[3] = ID_TO_DOM(req->id); - - BUG_ON(HYPERVISOR_multicall(mcl, target->nr_segments+1) != 0); - - /* Make sure it all worked. */ - for ( i = 0; i < target->nr_segments; i++ ) - { - if ( unlikely(mcl[i].result != 0) ) - { - DPRINTK("invalid buffer -- could not remap it\n"); - ret = mcl[i].result; - goto fail; - } - } - if ( unlikely(mcl[i].result != 0) ) - { - DPRINTK("direct remapping of pages to /dev/blktap failed.\n"); - ret = mcl[i].result; - goto fail; - } -#endif /* CONFIG_XEN_BLKDEV_GRANT */ - - /* Mark mapped pages as reserved: */ - for ( i = 0; i < target->nr_segments; i++ ) - { - unsigned long kvaddr; - - kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); - SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT)); - } - - - blktap_ufe_ring.req_prod_pvt++; - - return 0; - - fail: - fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); - return ret; -} - -int blktap_write_be_ring(blkif_response_t *rsp) -{ - blkif_response_t *target; - - /* - * This is called to pass a request from the real backend domain's - * blkif ring to the character device. - */ - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: be_ring not ready for a request!\n"); - return 0; - } - - /* No test for fullness in the response direction. */ - - target = RING_GET_RESPONSE(&blktap_ube_ring, - blktap_ube_ring.rsp_prod_pvt); - memcpy(target, rsp, sizeof(*rsp)); - - /* no mapping -- pages were mapped in blktap_write_fe_ring() */ - - blktap_ube_ring.rsp_prod_pvt++; - - return 0; -} - -static int blktap_read_fe_ring(void) -{ - /* This is called to read responses from the UFE ring. */ - - RING_IDX i, j, rp; - blkif_response_t *resp_s; - blkif_t *blkif; - active_req_t *ar; - - DPRINTK("blktap_read_fe_ring()\n"); - - /* if we are forwarding from UFERring to FERing */ - if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { - - /* for each outstanding message on the UFEring */ - rp = blktap_ufe_ring.sring->rsp_prod; - rmb(); - - for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) - { - resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i); - - DPRINTK("resp->fe_ring\n"); - ar = lookup_active_req(ID_TO_IDX(resp_s->id)); - blkif = ar->blkif; - for (j = 0; j < ar->nr_pages; j++) { - unsigned long vaddr; - struct page **map = blktap_vma->vm_private_data; - int offset; - - vaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), j); - offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT; - - ClearPageReserved(virt_to_page(vaddr)); - map[offset] = NULL; - } - - fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); - zap_page_range(blktap_vma, - MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), - ar->nr_pages << PAGE_SHIFT, NULL); - write_resp_to_fe_ring(blkif, resp_s); - blktap_ufe_ring.rsp_cons = i + 1; - kick_fe_domain(blkif); - } - } - return 0; -} - -static int blktap_read_be_ring(void) -{ - /* This is called to read requests from the UBE ring. */ - - RING_IDX i, rp; - blkif_request_t *req_s; - - DPRINTK("blktap_read_be_ring()\n"); - - /* if we are forwarding from UFERring to FERing */ - if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { - - /* for each outstanding message on the UFEring */ - rp = blktap_ube_ring.sring->req_prod; - rmb(); - for ( i = blktap_ube_ring.req_cons; i != rp; i++ ) - { - req_s = RING_GET_REQUEST(&blktap_ube_ring, i); - - DPRINTK("req->be_ring\n"); - write_req_to_be_ring(req_s); - kick_be_domain(); - } - - blktap_ube_ring.req_cons = i; - } - - return 0; -} - -int blktap_write_ctrl_ring(ctrl_msg_t *msg) -{ - ctrl_msg_t *target; - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: be_ring not ready for a request!\n"); - return 0; - } - - /* No test for fullness in the response direction. */ - - target = RING_GET_REQUEST(&blktap_uctrl_ring, - blktap_uctrl_ring.req_prod_pvt); - memcpy(target, msg, sizeof(*msg)); - - blktap_uctrl_ring.req_prod_pvt++; - - /* currently treat the ring as unidirectional. */ - blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod; - - return 0; - -} - -/* -------[ blktap module setup ]------------------------------------- */ - -static struct miscdevice blktap_miscdev = { - .minor = BLKTAP_MINOR, - .name = "blktap", - .fops = &blktap_fops, - .devfs_name = "misc/blktap", -}; - -int blktap_init(void) -{ - int err, i, j; - struct page *page; - - page = balloon_alloc_empty_page_range(MMAP_PAGES); - BUG_ON(page == NULL); - mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); - -#ifdef CONFIG_XEN_BLKDEV_GRANT - for (i=0; i<MAX_PENDING_REQS ; i++) - for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) - BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j)); -#endif - - err = misc_register(&blktap_miscdev); - if ( err != 0 ) - { - printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err); - return err; - } - - init_waitqueue_head(&blktap_wait); - - - return 0; -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/ctrl_if.h --- a/linux-2.6-xen-sparse/include/asm-xen/ctrl_if.h Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,160 +0,0 @@ -/****************************************************************************** - * ctrl_if.h - * - * Management functions for special interface to the domain controller. - * - * Copyright (c) 2004, K A Fraser - * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __ASM_XEN__CTRL_IF_H__ -#define __ASM_XEN__CTRL_IF_H__ - -#include <asm-xen/hypervisor.h> -#include <asm-xen/queues.h> - -typedef control_msg_t ctrl_msg_t; - -/* - * Callback function type. Called for asynchronous processing of received - * request messages, and responses to previously-transmitted request messages. - * The parameters are (@msg, @id). - * @msg: Original request/response message (not a copy). The message can be - * modified in-place by the handler (e.g., a response callback can - * turn a request message into a response message in place). The message - * is no longer accessible after the callback handler returns -- if the - * message is required to persist for longer then it must be copied. - * @id: (Response callbacks only) The 'id' that was specified when the - * original request message was queued for transmission. - */ -typedef void (*ctrl_msg_handler_t)(ctrl_msg_t *, unsigned long); - -/* - * Send @msg to the domain controller. Execute @hnd when a response is - * received, passing the response message and the specified @id. This - * operation will not block: it will return -EAGAIN if there is no space. - * Notes: - * 1. The @msg is copied if it is transmitted and so can be freed after this - * function returns. - * 2. If @hnd is NULL then no callback is executed. - */ -int -ctrl_if_send_message_noblock( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id); - -/* - * Send @msg to the domain controller. Execute @hnd when a response is - * received, passing the response message and the specified @id. This - * operation will block until the message is sent, or a signal is received - * for the calling process (unless @wait_state is TASK_UNINTERRUPTIBLE). - * Notes: - * 1. The @msg is copied if it is transmitted and so can be freed after this - * function returns. - * 2. If @hnd is NULL then no callback is executed. - */ -int -ctrl_if_send_message_block( - ctrl_msg_t *msg, - ctrl_msg_handler_t hnd, - unsigned long id, - long wait_state); - -/* - * Send @msg to the domain controller. Block until the response is received, - * and then copy it into the provided buffer, @rmsg. - */ -int -ctrl_if_send_message_and_get_response( - ctrl_msg_t *msg, - ctrl_msg_t *rmsg, - long wait_state); - -/* - * Request a callback when there is /possibly/ space to immediately send a - * message to the domain controller. This function returns 0 if there is - * already space to trasnmit a message --- in this case the callback task /may/ - * still be executed. If this function returns 1 then the callback /will/ be - * executed when space becomes available. - */ -int -ctrl_if_enqueue_space_callback( - struct tq_struct *task); - -/* - * Send a response (@msg) to a message from the domain controller. This will - * never block. - * Notes: - * 1. The @msg is copied and so can be freed after this function returns. - * 2. The @msg may be the original request message, modified in-place. - */ -void -ctrl_if_send_response( - ctrl_msg_t *msg); - -/* - * Register a receiver for typed messages from the domain controller. The - * handler (@hnd) is called for every received message of specified @type. - * Returns TRUE (non-zero) if the handler was successfully registered. - * If CALLBACK_IN_BLOCKING CONTEXT is specified in @flags then callbacks will - * occur in a context in which it is safe to yield (i.e., process context). - */ -#define CALLBACK_IN_BLOCKING_CONTEXT 1 -int ctrl_if_register_receiver( - u8 type, - ctrl_msg_handler_t hnd, - unsigned int flags); - -/* - * Unregister a receiver for typed messages from the domain controller. The - * handler (@hnd) will not be executed after this function returns. - */ -void -ctrl_if_unregister_receiver( - u8 type, ctrl_msg_handler_t hnd); - -/* Suspend/resume notifications. */ -void ctrl_if_suspend(void); -void ctrl_if_resume(void); - -/* Start-of-day setup. */ -void ctrl_if_init(void); - -/* - * Returns TRUE if there are no outstanding message requests at the domain - * controller. This can be used to ensure that messages have really flushed - * through when it is not possible to use the response-callback interface. - * WARNING: If other subsystems are using the control interface then this - * function might never return TRUE! - */ -int ctrl_if_transmitter_empty(void); /* !! DANGEROUS FUNCTION !! */ - -/* - * Manually discard response messages from the domain controller. - * WARNING: This is usually done automatically -- this function should only - * be called when normal interrupt mechanisms are disabled! - */ -void ctrl_if_discard_responses(void); /* !! DANGEROUS FUNCTION !! */ - -#endif /* __ASM_XEN__CONTROL_IF_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 linux-2.6-xen-sparse/include/asm-xen/linux-public/suspend.h --- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/suspend.h Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,43 +0,0 @@ -/****************************************************************************** - * suspend.h - * - * Copyright (c) 2003-2004, K A Fraser - * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __ASM_XEN_SUSPEND_H__ -#define __ASM_XEN_SUSPEND_H__ - -typedef struct suspend_record_st { - /* To be filled in before resume. */ - start_info_t resume_info; - /* - * The number of a machine frame containing, in sequence, the number of - * each machine frame that contains PFN -> MFN translation table data. - */ - unsigned long pfn_to_mfn_frame_list; - /* Number of entries in the PFN -> MFN translation table. */ - unsigned long nr_pfns; -} suspend_record_t; - -#endif /* __ASM_XEN_SUSPEND_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_curl_devel --- a/tools/check/check_curl_devel Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,11 +0,0 @@ -#!/bin/bash -# CHECK-BUILD - -function error { - echo 'Check for libcurl includes failed.' - exit 1 -} - -set -e -[ -e /usr/include/curl ] || error -[ -e /usr/include/curl/curl.h ] || error \ No newline at end of file diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/check/check_curl_lib --- a/tools/check/check_curl_lib Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,10 +0,0 @@ -#!/bin/bash -# CHECK-BUILD CHECK-INSTALL - -function error { - echo 'Check for CURL library failed.' - exit 1 -} - -set -e -ldconfig -p | grep libcurl.so || error \ No newline at end of file diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/examples/backend.hotplug --- a/tools/examples/backend.hotplug Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,21 +0,0 @@ -#! /bin/sh - -#DEVPATH=/devices/xen-backend/vif-1-0 -#ACTION=add - -PATH=/etc/xen/scripts:$PATH - -DEV=$(basename "$DEVPATH") -case "$ACTION" in - add) - case "$DEV" in - vif-*) - vif=$(echo "$DEV" | sed 's/-\([0-9]*\)-\([0-9]*\)/\1.\2/') - vif-bridge up domain=unknown vif="$vif" mac=fe:ff:ff:ff:ff:ff bridge=xen-br0 >/dev/null 2>&1 - ;; - esac - ;; - remove) - ;; -esac - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/firmware/vmxassist/TODO --- a/tools/firmware/vmxassist/TODO Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,8 +0,0 @@ - -- Use the VME extensions (interrupt handling) - -- Use E820 map in vmxassist instead of cmos hack - -- Add ACPI support (Nitin's patch) - - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/lowlevel/xu/xu.c --- a/tools/python/xen/lowlevel/xu/xu.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,1781 +0,0 @@ -/****************************************************************************** - * utils.c - * - * Copyright (c) 2004, K A Fraser - */ - -#include <Python.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/stat.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <sys/mman.h> -#include <sys/poll.h> -#include <sys/sysmacros.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include <signal.h> -#include <xenctrl.h> - -#include <xen/xen.h> -#include <xen/io/domain_controller.h> -#include <xen/linux/privcmd.h> - -#define XENPKG "xen.lowlevel.xu" - -/* Needed for Python versions earlier than 2.3. */ -#ifndef PyMODINIT_FUNC -#define PyMODINIT_FUNC DL_EXPORT(void) -#endif - -/* NB. The following should be kept in sync with the kernel's evtchn driver. */ -#define EVTCHN_DEV_NAME "/dev/xen/evtchn" -#define EVTCHN_DEV_MAJOR 10 -#define EVTCHN_DEV_MINOR 201 -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) -/* EVTCHN_BIND: Bind to teh specified event-channel port. */ -#define EVTCHN_BIND _IO('E', 2) -/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ -#define EVTCHN_UNBIND _IO('E', 3) - -/* Set the close-on-exec flag on a file descriptor. Doesn't currently bother - * to check for errors. */ -/* -static void set_cloexec(int fd) -{ - int flags = fcntl(fd, F_GETFD, 0); - - if ( flags < 0 ) - return; - - flags |= FD_CLOEXEC; - fcntl(fd, F_SETFD, flags); -} -*/ -/* - * *********************** XCS INTERFACE *********************** - */ - -#include <arpa/inet.h> -#include <xcs_proto.h> - -static int xcs_ctrl_fd = -1; /* control connection to the xcs server. */ -static int xcs_data_fd = -1; /* data connection to the xcs server. */ -static unsigned long xcs_session_id = 0; - -static int xcs_ctrl_send(xcs_msg_t *msg); -static int xcs_ctrl_read(xcs_msg_t *msg); -static int xcs_data_send(xcs_msg_t *msg); -static int xcs_data_read(xcs_msg_t *msg); - -static int xcs_connect(char *path) -{ - struct sockaddr_un addr; - int ret, len, flags; - xcs_msg_t msg; - - if (xcs_data_fd != -1) /* already connected */ - return 0; - - xcs_ctrl_fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (xcs_ctrl_fd < 0) - { - printf("error creating xcs socket!\n"); - goto fail; - } - - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, path); - len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1; - - ret = connect(xcs_ctrl_fd, (struct sockaddr *)&addr, len); - if (ret < 0) - { - printf("error connecting to xcs(ctrl)! (%d)\n", errno); - goto ctrl_fd_fail; - } - - /*set_cloexec(xcs_ctrl_fd);*/ - - msg.type = XCS_CONNECT_CTRL; - msg.u.connect.session_id = xcs_session_id; - xcs_ctrl_send(&msg); - xcs_ctrl_read(&msg); /* TODO: timeout + error! */ - - if (msg.result != XCS_RSLT_OK) - { - printf("error connecting xcs control channel!\n"); - goto ctrl_fd_fail; - } - xcs_session_id = msg.u.connect.session_id; - - /* now the data connection. */ - xcs_data_fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (xcs_data_fd < 0) - { - printf("error creating xcs data socket!\n"); - goto ctrl_fd_fail; - } - - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, path); - len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1; - - ret = connect(xcs_data_fd, (struct sockaddr *)&addr, len); - if (ret < 0) - { - printf("error connecting to xcs(data)! (%d)\n", errno); - goto data_fd_fail; - } - - //set_cloexec(xcs_data_fd); - msg.type = XCS_CONNECT_DATA; - msg.u.connect.session_id = xcs_session_id; - xcs_data_send(&msg); - xcs_data_read(&msg); /* TODO: timeout + error! */ - - if (msg.result != XCS_RSLT_OK) - { - printf("error connecting xcs control channel!\n"); - goto ctrl_fd_fail; - } - - if ( ((flags = fcntl(xcs_data_fd, F_GETFL, 0)) < 0) || - (fcntl(xcs_data_fd, F_SETFL, flags | O_NONBLOCK) < 0) ) - { - printf("Unable to set non-blocking status on data socket."); - goto data_fd_fail; - } - - return 0; - -data_fd_fail: - close(xcs_data_fd); - xcs_data_fd = -1; - -ctrl_fd_fail: - close(xcs_ctrl_fd); - xcs_ctrl_fd = -1; - -fail: - return -1; - -} - -static void xcs_disconnect(void) -{ - close(xcs_data_fd); - xcs_data_fd = -1; - close(xcs_ctrl_fd); - xcs_ctrl_fd = -1; -} - -static int xcs_ctrl_read(xcs_msg_t *msg) -{ - int ret; - - ret = read(xcs_ctrl_fd, msg, sizeof(xcs_msg_t)); - return ret; -} - -static int xcs_ctrl_send(xcs_msg_t *msg) -{ - int ret; - - ret = send(xcs_ctrl_fd, msg, sizeof(xcs_msg_t), 0); - return ret; -} - -static int xcs_data_read(xcs_msg_t *msg) -{ - int ret; - - ret = read(xcs_data_fd, msg, sizeof(xcs_msg_t)); - return ret; -} - -static int xcs_data_send(xcs_msg_t *msg) -{ - int ret; - - ret = send(xcs_data_fd, msg, sizeof(xcs_msg_t), 0); - return ret; -} - - -typedef struct kme_st { - xcs_msg_t msg; - struct kme_st *next; -} xcs_msg_ent_t; - - -#define XCS_RING_SIZE 64 -static xcs_msg_ent_t *req_ring[64]; -static unsigned req_prod = 0; -static unsigned req_cons = 0; - -static xcs_msg_ent_t *rsp_ring[64]; -static unsigned rsp_prod = 0; -static unsigned rsp_cons = 0; - -#define REQ_RING_ENT(_idx) (req_ring[(_idx) % XCS_RING_SIZE]) -#define RSP_RING_ENT(_idx) (rsp_ring[(_idx) % XCS_RING_SIZE]) -#define REQ_RING_FULL ( req_prod - req_cons == XCS_RING_SIZE ) -#define RSP_RING_FULL ( rsp_prod - rsp_cons == XCS_RING_SIZE ) -#define REQ_RING_EMPTY ( req_prod == req_cons ) -#define RSP_RING_EMPTY ( rsp_prod == rsp_cons ) -/* - * *********************** NOTIFIER *********************** - */ - -typedef struct { - PyObject_HEAD; - int evtchn_fd; -} xu_notifier_object; - -static PyObject *xu_notifier_read(PyObject *self, PyObject *args) -{ - xcs_msg_ent_t *ent; - int ret; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - while ((!REQ_RING_FULL) && (!RSP_RING_FULL)) - { - ent = (xcs_msg_ent_t *)malloc(sizeof(xcs_msg_ent_t)); - ret = xcs_data_read(&ent->msg); - - if (ret == -1) - { - free(ent); - if ( errno == EINTR ) - continue; - if ( errno == EAGAIN ) - break; - return PyErr_SetFromErrno(PyExc_IOError); - } - - switch (ent->msg.type) - { - case XCS_REQUEST: - REQ_RING_ENT(req_prod) = ent; - req_prod++; - continue; - - case XCS_RESPONSE: - RSP_RING_ENT(rsp_prod) = ent; - rsp_prod++; - continue; - - case XCS_VIRQ: - ret = ent->msg.u.control.local_port; - free(ent); - return PyInt_FromLong(ret); - - default: - /*printf("Throwing away xcs msg type: %u\n", ent->msg.type);*/ - free(ent); - } - } - - if (!REQ_RING_EMPTY) - { - return PyInt_FromLong(REQ_RING_ENT(req_cons)->msg.u.control.local_port); - } - - if (!RSP_RING_EMPTY) - { - return PyInt_FromLong(RSP_RING_ENT(rsp_cons)->msg.u.control.local_port); - } - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject *xu_notifier_bind_virq(PyObject *self, - PyObject *args, PyObject *kwds) -{ - int virq; - xcs_msg_t kmsg; - - static char *kwd_list[] = { "virq", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &virq) ) - return NULL; - - kmsg.type = XCS_VIRQ_BIND; - kmsg.u.virq.virq = virq; - xcs_ctrl_send(&kmsg); - xcs_ctrl_read(&kmsg); - - if ( kmsg.result != XCS_RSLT_OK ) - { - Py_INCREF(Py_None); - return Py_None; - } - - return PyInt_FromLong(kmsg.u.virq.port); -} - -static PyObject *xu_notifier_virq_send(PyObject *self, - PyObject *args, PyObject *kwds) -{ - int port; - xcs_msg_t kmsg; - - static char *kwd_list[] = { "port", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &port) ) - return NULL; - - kmsg.type = XCS_VIRQ; - kmsg.u.control.local_port = port; - xcs_ctrl_send(&kmsg); - xcs_ctrl_read(&kmsg); - - if ( kmsg.result != XCS_RSLT_OK ) - { - Py_INCREF(Py_None); - return Py_None; - } - - return PyInt_FromLong(kmsg.u.virq.port); -} - -static PyObject *xu_notifier_fileno(PyObject *self, PyObject *args) -{ - return PyInt_FromLong(xcs_data_fd); -} - -static PyMethodDef xu_notifier_methods[] = { - { "read", - (PyCFunction)xu_notifier_read, - METH_VARARGS, - "Read a @port with pending notifications.\n" }, - - { "bind_virq", - (PyCFunction)xu_notifier_bind_virq, - METH_VARARGS | METH_KEYWORDS, - "Get notifications for a virq.\n" - " virq [int]: VIRQ to bind.\n\n" }, - - { "virq_send", - (PyCFunction)xu_notifier_virq_send, - METH_VARARGS | METH_KEYWORDS, - "Fire a virq notification.\n" - " port [int]: port that VIRQ is bound to.\n\n" }, - - { "fileno", - (PyCFunction)xu_notifier_fileno, - METH_VARARGS, - "Return the file descriptor for the notification channel.\n" }, - - { NULL, NULL, 0, NULL } -}; - -staticforward PyTypeObject xu_notifier_type; - -/* connect to xcs if we aren't already, and return a dummy object. */ -static PyObject *xu_notifier_new(PyObject *self, PyObject *args) -{ - xu_notifier_object *xun; - int i; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - xun = PyObject_New(xu_notifier_object, &xu_notifier_type); - - for (i = 0; i < XCS_RING_SIZE; i++) - REQ_RING_ENT(i) = RSP_RING_ENT(i) = NULL; - - (void)xcs_connect(XCS_SUN_PATH); - - - return (PyObject *)xun; -} - -static PyObject *xu_notifier_getattr(PyObject *obj, char *name) -{ - return Py_FindMethod(xu_notifier_methods, obj, name); -} - -static void xu_notifier_dealloc(PyObject *self) -{ - xcs_disconnect(); - PyObject_Del(self); -} - -static PyTypeObject xu_notifier_type = { - PyObject_HEAD_INIT(&PyType_Type) - 0, - "notifier", - sizeof(xu_notifier_object), - 0, - xu_notifier_dealloc, /* tp_dealloc */ - NULL, /* tp_print */ - xu_notifier_getattr, /* tp_getattr */ - NULL, /* tp_setattr */ - NULL, /* tp_compare */ - NULL, /* tp_repr */ - NULL, /* tp_as_number */ - NULL, /* tp_as_sequence */ - NULL, /* tp_as_mapping */ - NULL /* tp_hash */ -}; - - - -/* - * *********************** MESSAGE *********************** - */ - -#define TYPE(_x,_y) (((_x)<<8)|(_y)) - -#define P2C(_struct, _field, _ctype) \ - do { \ - PyObject *obj; \ - if ( (obj = PyDict_GetItemString(payload, #_field)) != NULL ) \ - { \ - if ( PyInt_Check(obj) ) \ - { \ - ((_struct *)&xum->msg.msg[0])->_field = \ - (_ctype)PyInt_AsLong(obj); \ - dict_items_parsed++; \ - } \ - else if ( PyLong_Check(obj) ) \ - { \ - ((_struct *)&xum->msg.msg[0])->_field = \ - (_ctype)PyLong_AsUnsignedLongLong(obj); \ - dict_items_parsed++; \ - } \ - } \ - xum->msg.length = sizeof(_struct); \ - } while ( 0 ) - -/** Set a char[] field in a struct from a Python string. - * Can't do this in P2C because of the typing. - */ -#define P2CSTRING(_struct, _field) \ - do { \ - PyObject *obj; \ - if ( (obj = PyDict_GetItemString(payload, #_field)) != NULL ) \ - { \ - if ( PyString_Check(obj) ) \ - { \ - _struct * _cobj = (_struct *)&xum->msg.msg[0]; \ - int _field_n = sizeof(_cobj->_field); \ - memset(_cobj->_field, 0, _field_n); \ - strncpy(_cobj->_field, \ - PyString_AsString(obj), \ - _field_n - 1); \ - dict_items_parsed++; \ - } \ - } \ - xum->msg.length = sizeof(_struct); \ - } while ( 0 ) - -#define C2P(_struct, _field, _pytype, _ctype) \ - do { \ - PyObject *obj = Py ## _pytype ## _From ## _ctype \ - (((_struct *)&xum->msg.msg[0])->_field); \ - if ( dict == NULL ) dict = PyDict_New(); \ - PyDict_SetItemString(dict, #_field, obj); \ - } while ( 0 ) - -#define PSTR2CHAR(_struct, _field) \ - do { \ - PyObject *obj; \ - if ( (obj = PyDict_GetItemString(payload, #_field)) != NULL ) \ - { \ - if ( PyString_Check(obj) ) \ - { \ - char *buffer = PyString_AsString(obj); \ - \ - strcpy(((_struct *)&xum->msg.msg[0])->_field, \ - buffer); \ - /* Should complain about length - think later */ \ - dict_items_parsed++; \ - } \ - } \ - xum->msg.length = sizeof(_struct); \ - } while ( 0 ) - -typedef struct { - PyObject_HEAD; - control_msg_t msg; -} xu_message_object; - -static PyObject *xu_message_append_payload(PyObject *self, PyObject *args) -{ - xu_message_object *xum = (xu_message_object *)self; - char *str; - int len; - - if ( !PyArg_ParseTuple(args, "s#", &str, &len) ) - return NULL; - - if ( (len + xum->msg.length) > sizeof(xum->msg.msg) ) - { - PyErr_SetString(PyExc_RuntimeError, "out of space in control message"); - return NULL; - } - - memcpy(&xum->msg.msg[xum->msg.length], str, len); - xum->msg.length += len; - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject *xu_message_set_response_fields(PyObject *self, PyObject *args) -{ - xu_message_object *xum = (xu_message_object *)self; - PyObject *payload; - int dict_items_parsed = 0; - - if ( !PyArg_ParseTuple(args, "O", &payload) ) - return NULL; - - if ( !PyDict_Check(payload) ) - { - PyErr_SetString(PyExc_TypeError, "payload is not a dictionary"); - return NULL; - } - - switch ( TYPE(xum->msg.type, xum->msg.subtype) ) - { - case TYPE(CMSG_BLKIF_FE, CMSG_BLKIF_FE_DRIVER_STATUS): - P2C(blkif_fe_driver_status_t, max_handle, u32); - break; - case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_DRIVER_STATUS): - P2C(netif_fe_driver_status_t, max_handle, u32); - break; - } - - if ( dict_items_parsed != PyDict_Size(payload) ) - { - PyErr_SetString(PyExc_TypeError, "payload contains bad items"); - return NULL; - } - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject *xu_message_get_payload(PyObject *self, PyObject *args) -{ - xu_message_object *xum = (xu_message_object *)self; - PyObject *dict = NULL; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - switch ( TYPE(xum->msg.type, xum->msg.subtype) ) - { - case TYPE(CMSG_BLKIF_FE, CMSG_BLKIF_FE_INTERFACE_STATUS): - C2P(blkif_fe_interface_status_t, handle, Int, Long); - C2P(blkif_fe_interface_status_t, status, Int, Long); - C2P(blkif_fe_interface_status_t, evtchn, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_FE, CMSG_BLKIF_FE_DRIVER_STATUS): - C2P(blkif_fe_driver_status_t, status, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_FE, CMSG_BLKIF_FE_INTERFACE_CONNECT): - C2P(blkif_fe_interface_connect_t, handle, Int, Long); - C2P(blkif_fe_interface_connect_t, shmem_frame, Int, Long); - C2P(blkif_fe_interface_connect_t, shmem_ref , Int, Long); - return dict; - case TYPE(CMSG_BLKIF_FE, CMSG_BLKIF_FE_INTERFACE_DISCONNECT): - C2P(blkif_fe_interface_disconnect_t, handle, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE): - C2P(blkif_be_create_t, domid, Int, Long); - C2P(blkif_be_create_t, blkif_handle, Int, Long); - C2P(blkif_be_create_t, status, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY): - C2P(blkif_be_destroy_t, domid, Int, Long); - C2P(blkif_be_destroy_t, blkif_handle, Int, Long); - C2P(blkif_be_destroy_t, status, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CONNECT): - C2P(blkif_be_connect_t, domid, Int, Long); - C2P(blkif_be_connect_t, blkif_handle, Int, Long); - C2P(blkif_be_connect_t, shmem_frame, Int, Long); - C2P(blkif_be_connect_t, shmem_ref, Int, Long); - C2P(blkif_be_connect_t, evtchn, Int, Long); - C2P(blkif_be_connect_t, status, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DISCONNECT): - C2P(blkif_be_disconnect_t, domid, Int, Long); - C2P(blkif_be_disconnect_t, blkif_handle, Int, Long); - C2P(blkif_be_disconnect_t, status, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE): - C2P(blkif_be_vbd_create_t, domid, Int, Long); - C2P(blkif_be_vbd_create_t, blkif_handle, Int, Long); - C2P(blkif_be_vbd_create_t, pdevice, Int, Long); - C2P(blkif_be_vbd_create_t, dev_handle, Int, Long); - C2P(blkif_be_vbd_create_t, vdevice, Int, Long); - C2P(blkif_be_vbd_create_t, readonly, Int, Long); - C2P(blkif_be_vbd_create_t, status, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_DESTROY): - C2P(blkif_be_vbd_destroy_t, domid, Int, Long); - C2P(blkif_be_vbd_destroy_t, blkif_handle, Int, Long); - C2P(blkif_be_vbd_destroy_t, vdevice, Int, Long); - C2P(blkif_be_vbd_destroy_t, status, Int, Long); - return dict; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DRIVER_STATUS): - C2P(blkif_be_driver_status_t, status, Int, Long); - return dict; - case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_STATUS): - C2P(netif_fe_interface_status_t, handle, Int, Long); - C2P(netif_fe_interface_status_t, status, Int, Long); - C2P(netif_fe_interface_status_t, evtchn, Int, Long); - C2P(netif_fe_interface_status_t, mac[0], Int, Long); - C2P(netif_fe_interface_status_t, mac[1], Int, Long); - C2P(netif_fe_interface_status_t, mac[2], Int, Long); - C2P(netif_fe_interface_status_t, mac[3], Int, Long); - C2P(netif_fe_interface_status_t, mac[4], Int, Long); - C2P(netif_fe_interface_status_t, mac[5], Int, Long); - return dict; - case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_DRIVER_STATUS): - C2P(netif_fe_driver_status_t, status, Int, Long); - C2P(netif_fe_driver_status_t, max_handle, Int, Long); - return dict; - case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_CONNECT): - C2P(netif_fe_interface_connect_t, handle, Int, Long); - C2P(netif_fe_interface_connect_t, tx_shmem_frame, Int, Long); - C2P(netif_fe_interface_connect_t, tx_shmem_ref, Int, Long); - C2P(netif_fe_interface_connect_t, rx_shmem_frame, Int, Long); - C2P(netif_fe_interface_connect_t, rx_shmem_ref, Int, Long); - return dict; - case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_DISCONNECT): - C2P(netif_fe_interface_disconnect_t, handle, Int, Long); - return dict; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CREATE): - C2P(netif_be_create_t, domid, Int, Long); - C2P(netif_be_create_t, netif_handle, Int, Long); - C2P(netif_be_create_t, status, Int, Long); - return dict; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DESTROY): - C2P(netif_be_destroy_t, domid, Int, Long); - C2P(netif_be_destroy_t, netif_handle, Int, Long); - C2P(netif_be_destroy_t, status, Int, Long); - return dict; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CREDITLIMIT): - C2P(netif_be_creditlimit_t, domid, Int, Long); - C2P(netif_be_creditlimit_t, netif_handle, Int, Long); - C2P(netif_be_creditlimit_t, credit_bytes, Int, Long); - C2P(netif_be_creditlimit_t, period_usec, Int, Long); - C2P(netif_be_creditlimit_t, status, Int, Long); - return dict; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CONNECT): - C2P(netif_be_connect_t, domid, Int, Long); - C2P(netif_be_connect_t, netif_handle, Int, Long); - C2P(netif_be_connect_t, tx_shmem_frame, Int, Long); - C2P(netif_be_connect_t, tx_shmem_ref, Int, Long); - C2P(netif_be_connect_t, rx_shmem_frame, Int, Long); - C2P(netif_be_connect_t, rx_shmem_ref, Int, Long); - C2P(netif_be_connect_t, evtchn, Int, Long); - C2P(netif_be_connect_t, status, Int, Long); - return dict; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DISCONNECT): - C2P(netif_be_disconnect_t, domid, Int, Long); - C2P(netif_be_disconnect_t, netif_handle, Int, Long); - C2P(netif_be_disconnect_t, status, Int, Long); - return dict; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DRIVER_STATUS): - C2P(netif_be_driver_status_t, status, Int, Long); - return dict; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED): - C2P(usbif_fe_interface_status_changed_t, status, Int, Long); - C2P(usbif_fe_interface_status_changed_t, evtchn, Int, Long); - C2P(usbif_fe_interface_status_changed_t, domid, Int, Long); - C2P(usbif_fe_interface_status_changed_t, bandwidth, Int, Long); - C2P(usbif_fe_interface_status_changed_t, num_ports, Int, Long); - return dict; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_DRIVER_STATUS_CHANGED): - C2P(usbif_fe_driver_status_changed_t, status, Int, Long); - return dict; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT): - C2P(usbif_fe_interface_connect_t, shmem_frame, Int, Long); - return dict; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT): - return dict; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CREATE): - C2P(usbif_be_create_t, domid, Int, Long); - C2P(usbif_be_create_t, status, Int, Long); - return dict; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DESTROY): - C2P(usbif_be_destroy_t, domid, Int, Long); - C2P(usbif_be_destroy_t, status, Int, Long); - return dict; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT): - C2P(usbif_be_connect_t, domid, Int, Long); - C2P(usbif_be_connect_t, shmem_frame, Int, Long); - C2P(usbif_be_connect_t, evtchn, Int, Long); - C2P(usbif_be_connect_t, bandwidth, Int, Long); - C2P(usbif_be_connect_t, status, Int, Long); - return dict; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DISCONNECT): - C2P(usbif_be_disconnect_t, domid, Int, Long); - C2P(usbif_be_disconnect_t, status, Int, Long); - return dict; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DRIVER_STATUS_CHANGED): - C2P(usbif_be_driver_status_changed_t, status, Int, Long); - return dict; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CLAIM_PORT): - C2P(usbif_be_claim_port_t, domid, Int, Long); - C2P(usbif_be_claim_port_t, usbif_port, Int, Long); - C2P(usbif_be_claim_port_t, status, Int, Long); - C2P(usbif_be_claim_port_t, path, String, String); - return dict; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_RELEASE_PORT): - C2P(usbif_be_release_port_t, path, String, String); - return dict; - case TYPE(CMSG_MEM_REQUEST, CMSG_MEM_REQUEST_SET): - C2P(mem_request_t, target, Int, Long); - C2P(mem_request_t, status, Int, Long); - return dict; - case TYPE(CMSG_VCPU_HOTPLUG, CMSG_VCPU_HOTPLUG_OFF): - C2P(vcpu_hotplug_t, vcpu, Int, Long); - C2P(vcpu_hotplug_t, status, Int, Long); - return dict; - case TYPE(CMSG_VCPU_HOTPLUG, CMSG_VCPU_HOTPLUG_ON): - C2P(vcpu_hotplug_t, vcpu, Int, Long); - C2P(vcpu_hotplug_t, status, Int, Long); - return dict; - } - - return PyString_FromStringAndSize((char *)xum->msg.msg, xum->msg.length); -} - -static PyObject *xu_message_get_header(PyObject *self, PyObject *args) -{ - xu_message_object *xum = (xu_message_object *)self; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - return Py_BuildValue("{s:i,s:i,s:i}", - "type", xum->msg.type, - "subtype", xum->msg.subtype, - "id", xum->msg.id); -} - -static PyMethodDef xu_message_methods[] = { - { "append_payload", - (PyCFunction)xu_message_append_payload, - METH_VARARGS, - "Append @str to the message payload.\n" }, - - { "set_response_fields", - (PyCFunction)xu_message_set_response_fields, - METH_VARARGS, - "Fill in the response fields in a message that was passed to us.\n" }, - - { "get_payload", - (PyCFunction)xu_message_get_payload, - METH_VARARGS, - "Return the message payload in string form.\n" }, - - { "get_header", - (PyCFunction)xu_message_get_header, - METH_VARARGS, - "Returns a dictionary of values for @type, @subtype, and @id.\n" }, - - { NULL, NULL, 0, NULL } -}; - -staticforward PyTypeObject xu_message_type; - -static PyObject *xu_message_new(PyObject *self, PyObject *args) -{ - xu_message_object *xum; - int type, subtype, id, dict_items_parsed = 0; - PyObject *payload = NULL; - - if ( !PyArg_ParseTuple(args, "iii|O", &type, &subtype, &id, &payload) ) - return NULL; - - xum = PyObject_New(xu_message_object, &xu_message_type); - - xum->msg.type = type; - xum->msg.subtype = subtype; - xum->msg.id = id; - xum->msg.length = 0; - - if ( payload == NULL ) - return (PyObject *)xum; - - if ( !PyDict_Check(payload) ) - { - PyErr_SetString(PyExc_TypeError, "payload is not a dictionary"); - PyObject_Del((PyObject *)xum); - return NULL; - } - - switch ( TYPE(type, subtype) ) - { - case TYPE(CMSG_BLKIF_FE, CMSG_BLKIF_FE_INTERFACE_STATUS): - P2C(blkif_fe_interface_status_t, handle, u32); - P2C(blkif_fe_interface_status_t, status, u32); - P2C(blkif_fe_interface_status_t, evtchn, u16); - P2C(blkif_fe_interface_status_t, domid, u16); - break; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE): - P2C(blkif_be_create_t, domid, u32); - P2C(blkif_be_create_t, blkif_handle, u32); - break; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY): - P2C(blkif_be_destroy_t, domid, u32); - P2C(blkif_be_destroy_t, blkif_handle, u32); - break; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CONNECT): - P2C(blkif_be_connect_t, domid, u32); - P2C(blkif_be_connect_t, blkif_handle, u32); - P2C(blkif_be_connect_t, shmem_frame, unsigned long); - P2C(blkif_be_connect_t, shmem_ref, u32); - P2C(blkif_be_connect_t, evtchn, u16); - break; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DISCONNECT): - P2C(blkif_be_disconnect_t, domid, u32); - P2C(blkif_be_disconnect_t, blkif_handle, u32); - break; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE): - P2C(blkif_be_vbd_create_t, domid, u32); - P2C(blkif_be_vbd_create_t, blkif_handle, u32); - P2C(blkif_be_vbd_create_t, pdevice, blkif_pdev_t); - P2C(blkif_be_vbd_create_t, dev_handle, u32); - P2C(blkif_be_vbd_create_t, vdevice, blkif_vdev_t); - P2C(blkif_be_vbd_create_t, readonly, u16); - break; - case TYPE(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_DESTROY): - P2C(blkif_be_vbd_destroy_t, domid, u32); - P2C(blkif_be_vbd_destroy_t, blkif_handle, u32); - P2C(blkif_be_vbd_destroy_t, vdevice, blkif_vdev_t); - break; - case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_STATUS): - P2C(netif_fe_interface_status_t, handle, u32); - P2C(netif_fe_interface_status_t, status, u32); - P2C(netif_fe_interface_status_t, evtchn, u16); - P2C(netif_fe_interface_status_t, domid, u16); - P2C(netif_fe_interface_status_t, mac[0], u8); - P2C(netif_fe_interface_status_t, mac[1], u8); - P2C(netif_fe_interface_status_t, mac[2], u8); - P2C(netif_fe_interface_status_t, mac[3], u8); - P2C(netif_fe_interface_status_t, mac[4], u8); - P2C(netif_fe_interface_status_t, mac[5], u8); - break; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CREATE): - P2C(netif_be_create_t, domid, u32); - P2C(netif_be_create_t, netif_handle, u32); - P2C(netif_be_create_t, mac[0], u8); - P2C(netif_be_create_t, mac[1], u8); - P2C(netif_be_create_t, mac[2], u8); - P2C(netif_be_create_t, mac[3], u8); - P2C(netif_be_create_t, mac[4], u8); - P2C(netif_be_create_t, mac[5], u8); - P2C(netif_be_create_t, be_mac[0], u8); - P2C(netif_be_create_t, be_mac[1], u8); - P2C(netif_be_create_t, be_mac[2], u8); - P2C(netif_be_create_t, be_mac[3], u8); - P2C(netif_be_create_t, be_mac[4], u8); - P2C(netif_be_create_t, be_mac[5], u8); - break; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DESTROY): - P2C(netif_be_destroy_t, domid, u32); - P2C(netif_be_destroy_t, netif_handle, u32); - break; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CREDITLIMIT): - P2C(netif_be_creditlimit_t, domid, u32); - P2C(netif_be_creditlimit_t, netif_handle, u32); - P2C(netif_be_creditlimit_t, credit_bytes, u32); - P2C(netif_be_creditlimit_t, period_usec, u32); - break; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_CONNECT): - P2C(netif_be_connect_t, domid, u32); - P2C(netif_be_connect_t, netif_handle, u32); - P2C(netif_be_connect_t, tx_shmem_frame, unsigned long); - P2C(netif_be_connect_t, tx_shmem_ref, u32); - P2C(netif_be_connect_t, rx_shmem_frame, unsigned long); - P2C(netif_be_connect_t, rx_shmem_ref, u32); - P2C(netif_be_connect_t, evtchn, u16); - break; - case TYPE(CMSG_NETIF_BE, CMSG_NETIF_BE_DISCONNECT): - P2C(netif_be_disconnect_t, domid, u32); - P2C(netif_be_disconnect_t, netif_handle, u32); - break; - case TYPE(CMSG_NETIF_FE, CMSG_NETIF_FE_DRIVER_STATUS): - P2C(netif_fe_driver_status_t, status, u32); - P2C(netif_fe_driver_status_t, max_handle, u32); - break; - case TYPE(CMSG_MEM_REQUEST, CMSG_MEM_REQUEST_SET): - P2C(mem_request_t, target, u32); - break; - case TYPE(CMSG_VCPU_HOTPLUG, CMSG_VCPU_HOTPLUG_OFF): - P2C(vcpu_hotplug_t, vcpu, u32); - P2C(vcpu_hotplug_t, status, u32); - break; - case TYPE(CMSG_VCPU_HOTPLUG, CMSG_VCPU_HOTPLUG_ON): - P2C(vcpu_hotplug_t, vcpu, u32); - P2C(vcpu_hotplug_t, status, u32); - break; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED): - P2C(usbif_fe_interface_status_changed_t, status, u32); - P2C(usbif_fe_interface_status_changed_t, evtchn, u16); - P2C(usbif_fe_interface_status_changed_t, domid, domid_t); - P2C(usbif_fe_interface_status_changed_t, bandwidth, u32); - P2C(usbif_fe_interface_status_changed_t, num_ports, u32); - break; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_DRIVER_STATUS_CHANGED): - P2C(usbif_fe_driver_status_changed_t, status, u32); - break; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT): - P2C(usbif_fe_interface_connect_t, shmem_frame, unsigned long); - break; - case TYPE(CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT): - break; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CREATE): - P2C(usbif_be_create_t, domid, domid_t); - P2C(usbif_be_create_t, status, u32); - break; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DESTROY): - P2C(usbif_be_destroy_t, domid, domid_t); - P2C(usbif_be_destroy_t, status, u32); - break; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT): - P2C(usbif_be_connect_t, domid, domid_t); - P2C(usbif_be_connect_t, shmem_frame, unsigned long); - P2C(usbif_be_connect_t, evtchn, u32); - P2C(usbif_be_connect_t, bandwidth, u32); - P2C(usbif_be_connect_t, status, u32); - break; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DISCONNECT): - P2C(usbif_be_disconnect_t, domid, domid_t); - P2C(usbif_be_disconnect_t, status, u32); - break; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_DRIVER_STATUS_CHANGED): - P2C(usbif_be_driver_status_changed_t, status, u32); - break; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_CLAIM_PORT): - P2C(usbif_be_claim_port_t, domid, domid_t); - P2C(usbif_be_claim_port_t, usbif_port, u32); - P2C(usbif_be_claim_port_t, status, u32); - PSTR2CHAR(usbif_be_claim_port_t, path); - printf("dict items parsed = %d", dict_items_parsed); - break; - case TYPE(CMSG_USBIF_BE, CMSG_USBIF_BE_RELEASE_PORT): - PSTR2CHAR(usbif_be_release_port_t, path); - break; - case TYPE(CMSG_SHUTDOWN, CMSG_SHUTDOWN_SYSRQ): - P2C(shutdown_sysrq_t, key, char); - break; - } - - if ( dict_items_parsed != PyDict_Size(payload) ) - { - PyErr_SetString(PyExc_TypeError, "payload contains bad items"); - PyObject_Del((PyObject *)xum); - return NULL; - } - - return (PyObject *)xum; -} - -static PyObject *xu_message_getattr(PyObject *obj, char *name) -{ - xu_message_object *xum; - if ( strcmp(name, "MAX_PAYLOAD") == 0 ) - return PyInt_FromLong(sizeof(xum->msg.msg)); - return Py_FindMethod(xu_message_methods, obj, name); -} - -static void xu_message_dealloc(PyObject *self) -{ - PyObject_Del(self); -} - -static PyTypeObject xu_message_type = { - PyObject_HEAD_INIT(&PyType_Type) - 0, - "message", - sizeof(xu_message_object), - 0, - xu_message_dealloc, /* tp_dealloc */ - NULL, /* tp_print */ - xu_message_getattr, /* tp_getattr */ - NULL, /* tp_setattr */ - NULL, /* tp_compare */ - NULL, /* tp_repr */ - NULL, /* tp_as_number */ - NULL, /* tp_as_sequence */ - NULL, /* tp_as_mapping */ - NULL /* tp_hash */ -}; - - - -/* - * *********************** PORT *********************** - */ - -typedef struct xu_port_object { - PyObject_HEAD; - int xc_handle; - int connected; - u32 remote_dom; - int local_port, remote_port; - struct xu_port_object *fix_next; -} xu_port_object; - -static PyObject *port_error; - -static PyObject *xu_port_read_request(PyObject *self, PyObject *args) -{ - xu_port_object *xup = (xu_port_object *)self; - xu_message_object *xum; - control_msg_t *cmsg; - unsigned i; - xcs_msg_ent_t *ent = NULL; - - for ( i = req_cons; (i != req_prod); i++ ) { - ent = REQ_RING_ENT(i); - if (ent == NULL) - continue; - if (ent->msg.u.control.remote_dom == xup->remote_dom) - break; - } - - if ((ent == NULL) || - (ent->msg.u.control.remote_dom != xup->remote_dom)) - goto none; - - cmsg = &ent->msg.u.control.msg; - xum = PyObject_New(xu_message_object, &xu_message_type); - memcpy(&xum->msg, cmsg, sizeof(*cmsg)); - if ( xum->msg.length > sizeof(xum->msg.msg) ) - xum->msg.length = sizeof(xum->msg.msg); - free(ent); - - /* remove the entry from the ring and advance the consumer if possible */ - REQ_RING_ENT(i) = NULL; - while ( (REQ_RING_ENT(req_cons) == NULL) && (!REQ_RING_EMPTY) ) - req_cons++; - - return (PyObject *)xum; - -none: - Py_INCREF(Py_None); - return Py_None; - -} - -static PyObject *xu_port_write_request(PyObject *self, PyObject *args) -{ - xu_port_object *xup = (xu_port_object *)self; - xu_message_object *xum; - xcs_msg_t kmsg; - - if ( !PyArg_ParseTuple(args, "O", (PyObject **)&xum) ) - return NULL; - - if ( !PyObject_TypeCheck((PyObject *)xum, &xu_message_type) ) - { - PyErr_SetString(PyExc_TypeError, "expected a " XENPKG ".message"); - return NULL; - } - - kmsg.type = XCS_REQUEST; - kmsg.u.control.remote_dom = xup->remote_dom; - memcpy(&kmsg.u.control.msg, &xum->msg, sizeof(control_msg_t)); - xcs_data_send(&kmsg); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject *xu_port_read_response(PyObject *self, PyObject *args) -{ - xu_port_object *xup = (xu_port_object *)self; - xu_message_object *xum; - control_msg_t *cmsg; - unsigned i; - xcs_msg_ent_t *ent = NULL; - - for ( i = rsp_cons; (i != rsp_prod); i++ ) { - ent = RSP_RING_ENT(i); - if (ent == NULL) - continue; - if (ent->msg.u.control.remote_dom == xup->remote_dom) - break; - } - - if ((ent == NULL) || - (ent->msg.u.control.remote_dom != xup->remote_dom)) - goto none; - - cmsg = &ent->msg.u.control.msg; - xum = PyObject_New(xu_message_object, &xu_message_type); - memcpy(&xum->msg, cmsg, sizeof(*cmsg)); - if ( xum->msg.length > sizeof(xum->msg.msg) ) - xum->msg.length = sizeof(xum->msg.msg); - free(ent); - - /* remove the entry from the ring and advance the consumer if possible */ - RSP_RING_ENT(i) = NULL; - while ( (RSP_RING_ENT(rsp_cons) == NULL) && (!RSP_RING_EMPTY) ) - rsp_cons++; - - return (PyObject *)xum; - -none: - Py_INCREF(Py_None); - return Py_None; - -} - -static PyObject *xu_port_write_response(PyObject *self, PyObject *args) -{ - xu_port_object *xup = (xu_port_object *)self; - xu_message_object *xum; - xcs_msg_t kmsg; - - if ( !PyArg_ParseTuple(args, "O", (PyObject **)&xum) ) - return NULL; - - if ( !PyObject_TypeCheck((PyObject *)xum, &xu_message_type) ) - { - PyErr_SetString(PyExc_TypeError, "expected a " XENPKG ".message"); - return NULL; - } - - kmsg.type = XCS_RESPONSE; - kmsg.u.control.remote_dom = xup->remote_dom; - memcpy(&kmsg.u.control.msg, &xum->msg, sizeof(control_msg_t)); - xcs_data_send(&kmsg); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject *xu_port_request_to_read(PyObject *self, PyObject *args) -{ - xu_port_object *xup = (xu_port_object *)self; - xcs_msg_ent_t *ent; - int found = 0; - unsigned i; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - for ( i = req_cons; (i != req_prod); i++ ) { - ent = REQ_RING_ENT(i); - if (ent == NULL) - continue; - if (ent->msg.u.control.remote_dom == xup->remote_dom) { - found = 1; - break; - } - } - - return PyInt_FromLong(found); -} - -static PyObject *xu_port_response_to_read(PyObject *self, PyObject *args) -{ - xu_port_object *xup = (xu_port_object *)self; - xcs_msg_ent_t *ent; - int found = 0; - unsigned i; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - for ( i = rsp_cons; (i != rsp_prod); i++ ) { - ent = RSP_RING_ENT(i); - if (ent == NULL) - continue; - if (ent->msg.u.control.remote_dom == xup->remote_dom) { - found = 1; - break; - } - } - - return PyInt_FromLong(found); -} - -static void _xu_port_close(xu_port_object *xup ) -{ - if ( xup->connected && xup->remote_dom != 0 ) - { - xcs_msg_t kmsg; - kmsg.type = XCS_CIF_FREE_CC; - kmsg.u.interface.dom = xup->remote_dom; - kmsg.u.interface.local_port = xup->local_port; - kmsg.u.interface.remote_port = xup->remote_port; - xcs_ctrl_send(&kmsg); - xcs_ctrl_read(&kmsg); - xup->connected = 0; - } -} - -static PyObject *xu_port_close(PyObject *self, PyObject *args) -{ - xu_port_object *xup = (xu_port_object *)self; - - _xu_port_close(xup); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject *xu_port_register(PyObject *self, PyObject *args, - PyObject *kwds) -{ - int type; - xcs_msg_t msg; - xu_port_object *xup = (xu_port_object *)self; - static char *kwd_list[] = { "type", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, - &type) ) - return NULL; - - if (!xup->connected) - { - return PyInt_FromLong(0); - } - - msg.type = XCS_MSG_BIND; - msg.u.bind.port = xup->local_port; - msg.u.bind.type = type; - xcs_ctrl_send(&msg); - xcs_ctrl_read(&msg); - - if (msg.result != XCS_RSLT_OK) - { - return PyInt_FromLong(0); - } - - return PyInt_FromLong(1); -} - -static PyObject *xu_port_deregister(PyObject *self, PyObject *args, - PyObject *kwds) -{ - int type; - xcs_msg_t msg; - xu_port_object *xup = (xu_port_object *)self; - static char *kwd_list[] = { "type", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, - &type) ) - return NULL; - - if (!xup->connected) - { - return PyInt_FromLong(0); - } - - msg.type = XCS_MSG_UNBIND; - msg.u.bind.port = xup->local_port; - msg.u.bind.type = type; - xcs_ctrl_send(&msg); - xcs_ctrl_read(&msg); - - if (msg.result != XCS_RSLT_OK) - { - return PyInt_FromLong(0); - } - - return PyInt_FromLong(1); -} - -static PyMethodDef xu_port_methods[] = { - - { "read_request", - (PyCFunction)xu_port_read_request, - METH_VARARGS, - "Read a request message from the control interface.\n" }, - - { "write_request", - (PyCFunction)xu_port_write_request, - METH_VARARGS, - "Write a request message to the control interface.\n" }, - - { "read_response", - (PyCFunction)xu_port_read_response, - METH_VARARGS, - "Read a response message from the control interface.\n" }, - - { "write_response", - (PyCFunction)xu_port_write_response, - METH_VARARGS, - "Write a response message to the control interface.\n" }, - - { "request_to_read", - (PyCFunction)xu_port_request_to_read, - METH_VARARGS, - "Returns TRUE if there is a request message to read.\n" }, - - - { "response_to_read", - (PyCFunction)xu_port_response_to_read, - METH_VARARGS, - "Returns TRUE if there is a response message to read.\n" }, - - { "register", - (PyCFunction)xu_port_register, - METH_VARARGS | METH_KEYWORDS, - "Register to receive a type of message on this channel.\n" }, - - { "deregister", - (PyCFunction)xu_port_deregister, - METH_VARARGS | METH_KEYWORDS, - "Stop receiving a type of message on this port.\n" }, - - { "close", - (PyCFunction)xu_port_close, - METH_VARARGS, - "Close the port.\n" }, - - { NULL, NULL, 0, NULL } -}; - -staticforward PyTypeObject xu_port_type; - -static PyObject *xu_port_new(PyObject *self, PyObject *args, PyObject *kwds) -{ - xu_port_object *xup; - u32 dom; - int port1 = 0, port2 = 0; - xcs_msg_t kmsg; - - static char *kwd_list[] = { "dom", "local_port", "remote_port", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|ii", kwd_list, - &dom, &port1, &port2) ) - return NULL; - - xup = PyObject_New(xu_port_object, &xu_port_type); - - xup->connected = 0; - xup->remote_dom = dom; - - kmsg.type = XCS_CIF_NEW_CC; - kmsg.u.interface.dom = xup->remote_dom; - kmsg.u.interface.local_port = port1; - kmsg.u.interface.remote_port = port2; - xcs_ctrl_send(&kmsg); - xcs_ctrl_read(&kmsg); - - if ( kmsg.result != XCS_RSLT_OK ) - goto fail1; - - xup->local_port = kmsg.u.interface.local_port; - xup->remote_port = kmsg.u.interface.remote_port; - xup->connected = 1; - - return (PyObject *)xup; - - fail1: - PyObject_Del((PyObject *)xup); - PyErr_SetString(PyExc_ValueError, "cannot create port"); - return NULL; -} - -static PyObject *xu_port_getattr(PyObject *obj, char *name) -{ - xu_port_object *xup = (xu_port_object *)obj; - - if ( strcmp(name, "local_port") == 0 ) - { - return PyInt_FromLong(xup->connected ? xup->local_port : -1); - } - if ( strcmp(name, "remote_port") == 0 ) - { - return PyInt_FromLong(xup->connected ? xup->remote_port : -1); - } - if ( strcmp(name, "remote_dom") == 0 ) - { - return PyInt_FromLong(xup->remote_dom); - } - if ( strcmp(name, "connected") == 0 ) - { - return PyInt_FromLong(xup->connected); - } - return Py_FindMethod(xu_port_methods, obj, name); -} - -static void xu_port_dealloc(PyObject *self) -{ - xu_port_object *xup = (xu_port_object *)self; - - _xu_port_close(xup); - - PyObject_Del(self); -} - -static PyTypeObject xu_port_type = { - PyObject_HEAD_INIT(&PyType_Type) - 0, - "port", - sizeof(xu_port_object), - 0, - xu_port_dealloc, /* tp_dealloc */ - NULL, /* tp_print */ - xu_port_getattr, /* tp_getattr */ - NULL, /* tp_setattr */ - NULL, /* tp_compare */ - NULL, /* tp_repr */ - NULL, /* tp_as_number */ - NULL, /* tp_as_sequence */ - NULL, /* tp_as_mapping */ - NULL /* tp_hash */ -}; - - - -/* - * *********************** BUFFER *********************** - */ - -#define BUFSZ 65536 -#define MASK_BUF_IDX(_i) ((_i)&(BUFSZ-1)) -typedef unsigned int BUF_IDX; - -typedef struct { - PyObject_HEAD; - char *buf; - unsigned int prod, cons; -} xu_buffer_object; - -static PyObject *__xu_buffer_peek(xu_buffer_object *xub, int max) -{ - PyObject *str1, *str2; - int len1, len2, c = MASK_BUF_IDX(xub->cons); - - len1 = xub->prod - xub->cons; - if ( len1 > (BUFSZ - c) ) /* clip to ring wrap */ - len1 = BUFSZ - c; - if ( len1 > max ) /* clip to specified maximum */ - len1 = max; - if ( len1 < 0 ) /* sanity */ - len1 = 0; - - if ( (str1 = PyString_FromStringAndSize(&xub->buf[c], len1)) == NULL ) - return NULL; - - if ( (len1 < (xub->prod - xub->cons)) && (len1 < max) ) - { - len2 = max - len1; - if ( len2 > MASK_BUF_IDX(xub->prod) ) - len2 = MASK_BUF_IDX(xub->prod); - if ( len2 > 0 ) - { - str2 = PyString_FromStringAndSize(&xub->buf[0], len2); - if ( str2 == NULL ) - return NULL; - PyString_ConcatAndDel(&str1, str2); - if ( str1 == NULL ) - return NULL; - } - } - - return str1; -} - -static PyObject *xu_buffer_peek(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - int max = 1024; - - if ( !PyArg_ParseTuple(args, "|i", &max) ) - return NULL; - - return __xu_buffer_peek(xub, max); -} - -static PyObject *xu_buffer_read(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - PyObject *str; - int max = 1024; - - if ( !PyArg_ParseTuple(args, "|i", &max) ) - return NULL; - - if ( (str = __xu_buffer_peek(xub, max)) != NULL ) - xub->cons += PyString_Size(str); - - return str; -} - -static PyObject *xu_buffer_discard(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - int max, len; - - if ( !PyArg_ParseTuple(args, "i", &max) ) - return NULL; - - len = xub->prod - xub->cons; - if ( len > max ) - len = max; - if ( len < 0 ) - len = 0; - - xub->cons += len; - - return PyInt_FromLong(len); -} - -static PyObject *xu_buffer_write(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - char *str; - int len, len1, len2; - - if ( !PyArg_ParseTuple(args, "s#", &str, &len) ) - return NULL; - - len1 = len; - if ( len1 > (BUFSZ - MASK_BUF_IDX(xub->prod)) ) - len1 = BUFSZ - MASK_BUF_IDX(xub->prod); - if ( len1 > (BUFSZ - (xub->prod - xub->cons)) ) - len1 = BUFSZ - (xub->prod - xub->cons); - - if ( len1 == 0 ) - return PyInt_FromLong(0); - - memcpy(&xub->buf[MASK_BUF_IDX(xub->prod)], &str[0], len1); - xub->prod += len1; - - if ( len1 < len ) - { - len2 = len - len1; - if ( len2 > (BUFSZ - MASK_BUF_IDX(xub->prod)) ) - len2 = BUFSZ - MASK_BUF_IDX(xub->prod); - if ( len2 > (BUFSZ - (xub->prod - xub->cons)) ) - len2 = BUFSZ - (xub->prod - xub->cons); - if ( len2 != 0 ) - { - memcpy(&xub->buf[MASK_BUF_IDX(xub->prod)], &str[len1], len2); - xub->prod += len2; - return PyInt_FromLong(len1 + len2); - } - } - - return PyInt_FromLong(len1); -} - -static PyObject *xu_buffer_empty(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - if ( xub->cons == xub->prod ) - return PyInt_FromLong(1); - - return PyInt_FromLong(0); -} - -static PyObject *xu_buffer_full(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - if ( (xub->prod - xub->cons) == BUFSZ ) - return PyInt_FromLong(1); - - return PyInt_FromLong(0); -} - -static PyObject *xu_buffer_size(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - return PyInt_FromLong(xub->prod - xub->cons); -} - -static PyObject *xu_buffer_space(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - return PyInt_FromLong(BUFSZ - (xub->prod - xub->cons)); -} - -static PyMethodDef xu_buffer_methods[] = { - { "peek", - (PyCFunction)xu_buffer_peek, - METH_VARARGS, - "Peek up to @max bytes from the buffer. Returns a string.\n" }, - - { "read", - (PyCFunction)xu_buffer_read, - METH_VARARGS, - "Read up to @max bytes from the buffer. Returns a string.\n" }, - - { "discard", - (PyCFunction)xu_buffer_discard, - METH_VARARGS, - "Discard up to @max bytes from the buffer. Returns number of bytes.\n" }, - - { "write", - (PyCFunction)xu_buffer_write, - METH_VARARGS, - "Write @string into buffer. Return number of bytes written.\n" }, - - { "empty", - (PyCFunction)xu_buffer_empty, - METH_VARARGS, - "Return TRUE if the buffer is empty.\n" }, - - { "full", - (PyCFunction)xu_buffer_full, - METH_VARARGS, - "Return TRUE if the buffer is full.\n" }, - - { "size", - (PyCFunction)xu_buffer_size, - METH_VARARGS, - "Return number of bytes in the buffer.\n" }, - - { "space", - (PyCFunction)xu_buffer_space, - METH_VARARGS, - "Return space left in the buffer.\n" }, - - { NULL, NULL, 0, NULL } -}; - -staticforward PyTypeObject xu_buffer_type; - -static PyObject *xu_buffer_new(PyObject *self, PyObject *args) -{ - xu_buffer_object *xub; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - xub = PyObject_New(xu_buffer_object, &xu_buffer_type); - - if ( (xub->buf = malloc(BUFSZ)) == NULL ) - { - PyObject_Del((PyObject *)xub); - return NULL; - } - - xub->prod = xub->cons = 0; - - return (PyObject *)xub; -} - -static PyObject *xu_buffer_getattr(PyObject *obj, char *name) -{ - return Py_FindMethod(xu_buffer_methods, obj, name); -} - -static void xu_buffer_dealloc(PyObject *self) -{ - xu_buffer_object *xub = (xu_buffer_object *)self; - free(xub->buf); - PyObject_Del(self); -} - -static PyTypeObject xu_buffer_type = { - PyObject_HEAD_INIT(&PyType_Type) - 0, - "buffer", - sizeof(xu_buffer_object), - 0, - xu_buffer_dealloc, /* tp_dealloc */ - NULL, /* tp_print */ - xu_buffer_getattr, /* tp_getattr */ - NULL, /* tp_setattr */ - NULL, /* tp_compare */ - NULL, /* tp_repr */ - NULL, /* tp_as_number */ - NULL, /* tp_as_sequence */ - NULL, /* tp_as_mapping */ - NULL /* tp_hash */ -}; - - - -/* - * *********************** MODULE WRAPPER *********************** - */ - -static void handle_child_death(int dummy) -{ - while ( waitpid(-1, NULL, WNOHANG) > 0 ) - continue; -} - -static PyObject *xu_autoreap(PyObject *self, PyObject *args) -{ - struct sigaction sa; - - if ( !PyArg_ParseTuple(args, "") ) - return NULL; - - memset(&sa, 0, sizeof(sa)); - sa.sa_handler = handle_child_death; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_NOCLDSTOP | SA_RESTART; - (void)sigaction(SIGCHLD, &sa, NULL); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyMethodDef xu_methods[] = { - { "notifier", xu_notifier_new, METH_VARARGS, - "Create a new notifier." }, - { "message", xu_message_new, METH_VARARGS, - "Create a new communications message." }, - { "port", (PyCFunction)xu_port_new, METH_VARARGS | METH_KEYWORDS, - "Create a new communications port." }, - { "buffer", xu_buffer_new, METH_VARARGS, - "Create a new ring buffer." }, - { "autoreap", xu_autoreap, METH_VARARGS, - "Ensure that zombie children are automatically reaped by the OS." }, - { NULL, NULL, 0, NULL } -}; - -PyMODINIT_FUNC initxu(void) -{ - PyObject *m, *d; - - m = Py_InitModule(XENPKG, xu_methods); - - d = PyModule_GetDict(m); - port_error = PyErr_NewException(XENPKG ".PortError", NULL, NULL); - PyDict_SetItemString(d, "PortError", port_error); - - /* KAF: This ensures that we get debug output in a timely manner. */ - setbuf(stdout, NULL); - setbuf(stderr, NULL); -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/python/xen/xend/server/messages.py --- a/tools/python/xen/xend/server/messages.py Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,462 +0,0 @@ -#============================================================================ -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -#============================================================================ -# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> -#============================================================================ - -import sys -import struct -import types - -from xen.lowlevel import xu - -DEBUG = False - -#PORT_WILDCARD = 0xefffffff - -"""Wildcard for the control message types.""" -TYPE_WILDCARD = 0xffff - -""" All message formats. -Added to incrementally for the various message types. -See below. -""" -msg_formats = {} - -#============================================================================ -# Block interface message types. -#============================================================================ - -CMSG_BLKIF_BE = 1 -CMSG_BLKIF_FE = 2 - -CMSG_BLKIF_FE_INTERFACE_STATUS = 0 -CMSG_BLKIF_FE_DRIVER_STATUS = 32 -CMSG_BLKIF_FE_INTERFACE_CONNECT = 33 -CMSG_BLKIF_FE_INTERFACE_DISCONNECT = 34 -CMSG_BLKIF_FE_INTERFACE_QUERY = 35 - -CMSG_BLKIF_BE_CREATE = 0 -CMSG_BLKIF_BE_DESTROY = 1 -CMSG_BLKIF_BE_CONNECT = 2 -CMSG_BLKIF_BE_DISCONNECT = 3 -CMSG_BLKIF_BE_VBD_CREATE = 4 -CMSG_BLKIF_BE_VBD_DESTROY = 5 -CMSG_BLKIF_BE_DRIVER_STATUS = 32 - -BLKIF_DRIVER_STATUS_DOWN = 0 -BLKIF_DRIVER_STATUS_UP = 1 - -BLKIF_INTERFACE_STATUS_CLOSED = 0 #/* Interface doesn't exist. */ -BLKIF_INTERFACE_STATUS_DISCONNECTED = 1 #/* Exists but is disconnected. */ -BLKIF_INTERFACE_STATUS_CONNECTED = 2 #/* Exists and is connected. */ -BLKIF_INTERFACE_STATUS_CHANGED = 3 #/* A device has been added or removed. */ - -BLKIF_BE_STATUS_OKAY = 0 -BLKIF_BE_STATUS_ERROR = 1 -BLKIF_BE_STATUS_INTERFACE_EXISTS = 2 -BLKIF_BE_STATUS_INTERFACE_NOT_FOUND = 3 -BLKIF_BE_STATUS_INTERFACE_CONNECTED = 4 -BLKIF_BE_STATUS_VBD_EXISTS = 5 -BLKIF_BE_STATUS_VBD_NOT_FOUND = 6 -BLKIF_BE_STATUS_OUT_OF_MEMORY = 7 -BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND = 8 -BLKIF_BE_STATUS_MAPPING_ERROR = 9 - -blkif_formats = { - 'blkif_be_connect_t': - (CMSG_BLKIF_BE, CMSG_BLKIF_BE_CONNECT), - # Connect be to fe (in response to blkif_fe_interface_connect_t). - - 'blkif_be_create_t': - (CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE), - # Create be. - - 'blkif_be_disconnect_t': - (CMSG_BLKIF_BE, CMSG_BLKIF_BE_DISCONNECT), - # Disconnect be from fe. - - 'blkif_be_destroy_t': - (CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY), - # Destroy be (after disconnect). - # Make be do this even if no disconnect (and destroy all vbd too). - - 'blkif_be_vbd_create_t': - (CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE), - # Create a vbd device. - - 'blkif_be_vbd_destroy_t': - (CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_DESTROY), - # Destroy a vbd. - - # Add message to query be for state and vbds. - - 'blkif_fe_interface_status_t': - (CMSG_BLKIF_FE, CMSG_BLKIF_FE_INTERFACE_STATUS), - # Notify device status to fe. - # Also used to notify 'any' device change with status BLKIF_INTERFACE_STATUS_CHANGED. - - 'blkif_fe_driver_status_t': - (CMSG_BLKIF_FE, CMSG_BLKIF_FE_DRIVER_STATUS), - # Comes from fe, treated as notifying that fe has come up/changed. - # Xend sets be(s) to BLKIF_INTERFACE_STATUS_DISCONNECTED, - # sends blkif_fe_interface_status_t to fe (from each be). - # - # Reply with i/f count. - # The i/f sends probes (using -ve trick), we reply with the info. - - 'blkif_fe_interface_connect_t': - (CMSG_BLKIF_FE, CMSG_BLKIF_FE_INTERFACE_CONNECT), - # Comes from fe, passing shmem frame to use for be. - # fe sends when gets blkif_fe_interface_status_t with state NETIF_INTERFACE_STATUS_DISCONNECTED. - # Xend creates event channel and notifies be. - # Then notifies fe of event channel with blkif_fe_interface_status_t. - - # Add message to kick fe to probe for devices. - # Just report new devices to fe? - - # - # Add message for fe to probe a device. - # And probing with id -1 should return first. - # And probing with id -n should return first device with id > n. - - # Add message to query fe for state and vbds. -} - -msg_formats.update(blkif_formats) - -#============================================================================ -# Network interface message types. -#============================================================================ - -CMSG_NETIF_BE = 3 -CMSG_NETIF_FE = 4 - -CMSG_NETIF_FE_INTERFACE_STATUS = 0 -CMSG_NETIF_FE_DRIVER_STATUS = 32 -CMSG_NETIF_FE_INTERFACE_CONNECT = 33 -CMSG_NETIF_FE_INTERFACE_DISCONNECT = 34 -CMSG_NETIF_FE_INTERFACE_QUERY = 35 - -CMSG_NETIF_BE_CREATE = 0 -CMSG_NETIF_BE_DESTROY = 1 -CMSG_NETIF_BE_CONNECT = 2 -CMSG_NETIF_BE_DISCONNECT = 3 -CMSG_NETIF_BE_CREDITLIMIT = 4 -CMSG_NETIF_BE_DRIVER_STATUS = 32 - -NETIF_INTERFACE_STATUS_CLOSED = 0 #/* Interface doesn't exist. */ -NETIF_INTERFACE_STATUS_DISCONNECTED = 1 #/* Exists but is disconnected. */ -NETIF_INTERFACE_STATUS_CONNECTED = 2 #/* Exists and is connected. */ -NETIF_INTERFACE_STATUS_CHANGED = 3 #/* A device has been added or removed. */ - -NETIF_DRIVER_STATUS_DOWN = 0 -NETIF_DRIVER_STATUS_UP = 1 - -netif_formats = { - 'netif_be_connect_t': - (CMSG_NETIF_BE, CMSG_NETIF_BE_CONNECT), - - 'netif_be_create_t': - (CMSG_NETIF_BE, CMSG_NETIF_BE_CREATE), - - 'netif_be_disconnect_t': - (CMSG_NETIF_BE, CMSG_NETIF_BE_DISCONNECT), - - 'netif_be_destroy_t': - (CMSG_NETIF_BE, CMSG_NETIF_BE_DESTROY), - - 'netif_be_creditlimit_t': - (CMSG_NETIF_BE, CMSG_NETIF_BE_CREDITLIMIT), - - 'netif_be_driver_status_t': - (CMSG_NETIF_BE, CMSG_NETIF_BE_DRIVER_STATUS), - - 'netif_fe_driver_status_t': - (CMSG_NETIF_FE, CMSG_NETIF_FE_DRIVER_STATUS), - - 'netif_fe_interface_connect_t': - (CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_CONNECT), - - 'netif_fe_interface_status_t': - (CMSG_NETIF_FE, CMSG_NETIF_FE_INTERFACE_STATUS), - } - -msg_formats.update(netif_formats) - -#============================================================================ -# USB interface message types. -#============================================================================ - -CMSG_USBIF_BE = 8 -CMSG_USBIF_FE = 9 - -CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED = 0 - -CMSG_USBIF_FE_DRIVER_STATUS_CHANGED = 32 -CMSG_USBIF_FE_INTERFACE_CONNECT = 33 -CMSG_USBIF_FE_INTERFACE_DISCONNECT = 34 - -USBIF_DRIVER_STATUS_DOWN = 0 -USBIF_DRIVER_STATUS_UP = 1 - -USBIF_INTERFACE_STATUS_DESTROYED = 0 #/* Interface doesn't exist. */ -USBIF_INTERFACE_STATUS_DISCONNECTED = 1 #/* Exists but is disconnected. */ -USBIF_INTERFACE_STATUS_CONNECTED = 2 #/* Exists and is connected. */ - -CMSG_USBIF_BE_CREATE = 0 -CMSG_USBIF_BE_DESTROY = 1 -CMSG_USBIF_BE_CONNECT = 2 - -CMSG_USBIF_BE_DISCONNECT = 3 -CMSG_USBIF_BE_CLAIM_PORT = 4 -CMSG_USBIF_BE_RELEASE_PORT = 5 - -CMSG_USBIF_BE_DRIVER_STATUS_CHANGED = 32 - -USBIF_BE_STATUS_OKAY = 0 -USBIF_BE_STATUS_ERROR = 1 - -USBIF_BE_STATUS_INTERFACE_EXISTS = 2 -USBIF_BE_STATUS_INTERFACE_NOT_FOUND = 3 -USBIF_BE_STATUS_INTERFACE_CONNECTED = 4 -USBIF_BE_STATUS_OUT_OF_MEMORY = 7 -USBIF_BE_STATUS_MAPPING_ERROR = 9 - -usbif_formats = { - 'usbif_be_create_t': - (CMSG_USBIF_BE, CMSG_USBIF_BE_CREATE), - - 'usbif_be_destroy_t': - (CMSG_USBIF_BE, CMSG_USBIF_BE_DESTROY), - - 'usbif_be_connect_t': - (CMSG_USBIF_BE, CMSG_USBIF_BE_CONNECT), - - 'usbif_be_disconnect_t': - (CMSG_USBIF_BE, CMSG_USBIF_BE_DISCONNECT), - - 'usbif_be_claim_port_t': - (CMSG_USBIF_BE, CMSG_USBIF_BE_CLAIM_PORT), - - 'usbif_be_release_port_t': - (CMSG_USBIF_BE, CMSG_USBIF_BE_RELEASE_PORT), - - 'usbif_fe_interface_status_changed_t': - (CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED), - - 'usbif_fe_driver_status_changed_t': - (CMSG_USBIF_FE, CMSG_USBIF_FE_DRIVER_STATUS_CHANGED), - - 'usbif_fe_interface_connect_t': - (CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_CONNECT), - - 'usbif_fe_interface_disconnect_t': - (CMSG_USBIF_FE, CMSG_USBIF_FE_INTERFACE_DISCONNECT), - - } - -msg_formats.update(usbif_formats) - -#============================================================================ -# Domain shutdown message types. -#============================================================================ - -CMSG_SHUTDOWN = 6 - -CMSG_SHUTDOWN_POWEROFF = 0 -CMSG_SHUTDOWN_REBOOT = 1 -CMSG_SHUTDOWN_SUSPEND = 2 -CMSG_SHUTDOWN_SYSRQ = 3 - -STOPCODE_shutdown = 0 -STOPCODE_reboot = 1 -STOPCODE_suspend = 2 -STOPCODE_sysrq = 3 - -shutdown_formats = { - 'shutdown_poweroff_t': - (CMSG_SHUTDOWN, CMSG_SHUTDOWN_POWEROFF), - - 'shutdown_reboot_t': - (CMSG_SHUTDOWN, CMSG_SHUTDOWN_REBOOT), - - 'shutdown_suspend_t': - (CMSG_SHUTDOWN, CMSG_SHUTDOWN_SUSPEND), - - 'shutdown_sysrq_t': - (CMSG_SHUTDOWN, CMSG_SHUTDOWN_SYSRQ) - } - -msg_formats.update(shutdown_formats) - -#============================================================================ -# Domain memory reservation message. -#============================================================================ - -CMSG_MEM_REQUEST = 7 -CMSG_MEM_REQUEST_SET = 0 - -mem_request_formats = { - 'mem_request_t': - (CMSG_MEM_REQUEST, CMSG_MEM_REQUEST_SET) - } - -msg_formats.update(mem_request_formats) - -#============================================================================ -# Domain vcpu hotplug message. -#============================================================================ - -CMSG_VCPU_HOTPLUG = 10 -CMSG_VCPU_HOTPLUG_OFF = 0 -CMSG_VCPU_HOTPLUG_ON = 1 - -vcpu_hotplug_formats = { - 'vcpu_hotplug_off_t': - (CMSG_VCPU_HOTPLUG, CMSG_VCPU_HOTPLUG_OFF), - - 'vcpu_hotplug_on_t': - (CMSG_VCPU_HOTPLUG, CMSG_VCPU_HOTPLUG_ON) - } - -msg_formats.update(vcpu_hotplug_formats) - -#============================================================================ -class Msg: - pass - -_next_msgid = 0 - -def nextid(): - """Generate the next message id. - - @return: message id - @rtype: int - """ - global _next_msgid - _next_msgid += 1 - return _next_msgid - -def packMsg(ty, params): - """Pack a message. - Any I{mac} parameter is passed in as an int[6] array and converted. - - @param ty: message type name - @type ty: string - @param params: message parameters - @type params: dicy - @return: message - @rtype: xu message - """ - msgid = nextid() - if DEBUG: print '>packMsg', msgid, ty, params - (major, minor) = msg_formats[ty] - args = {} - for (k, v) in params.items(): - if k in ['mac', 'be_mac']: - for i in range(0, 6): - args['%s[%d]' % (k, i)] = v[i] - else: - args[k] = v - msg = xu.message(major, minor, msgid, args) - if DEBUG: print '<packMsg', msg.get_header()['id'], ty, args - return msg - -def unpackMsg(ty, msg): - """Unpack a message. - Any mac addresses in the message are converted to int[6] array - in the return dict. - - @param ty: message type - @type ty: string - @param msg: message - @type msg: xu message - @return: parameters - @rtype: dict - """ - args = msg.get_payload() - if DEBUG: print '>unpackMsg', args - if isinstance(args, types.StringType): - args = {'value': args} - else: - mac = [0, 0, 0, 0, 0, 0] - macs = [] - for (k, v) in args.items(): - if k.startswith('mac['): - macs.append(k) - i = int(k[4:5]) - mac[i] = v - else: - pass - if macs: - args['mac'] = mac - #print 'macs=', macs - #print 'args=', args - for k in macs: - del args[k] - if DEBUG: - msgid = msg.get_header()['id'] - print '<unpackMsg', msgid, ty, args - return args - -def msgTypeName(ty, subty): - """Convert a message type, subtype pair to a message type name. - - @param ty: message type - @type ty: int - @param subty: message subtype - @type ty: int - @return: message type name (or None) - @rtype: string or None - """ - for (name, info) in msg_formats.items(): - if info[0] == ty and info[1] == subty: - return name - return None - -def printMsg(msg, out=sys.stdout, all=False): - """Print a message. - - @param msg: message - @type msg: xu message - @param out: where to print to - @type out: stream - @param all: print payload if true - @type all: bool - """ - hdr = msg.get_header() - major = hdr['type'] - minor = hdr['subtype'] - msgid = hdr['id'] - ty = msgTypeName(major, minor) - print >>out, 'message:', 'type=', ty, '%d:%d' % (major, minor), 'id=%d' % msgid - if all: - print >>out, 'payload=', msg.get_payload() - - -def getMessageType(msg): - """Get a 2-tuple of the message type and subtype. - - @param msg: message - @type msg: xu message - @return: type info - @rtype: (int, int) - """ - hdr = msg.get_header() - return (hdr['type'], hdr.get('subtype')) - -def getMessageId(msg): - hdr = msg.get_header() - return hdr['id'] diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/Makefile --- a/tools/xcs/Makefile Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,49 +0,0 @@ -# Makefile for XCS -# Andrew Warfield, 2004 - -XEN_ROOT=../.. -include $(XEN_ROOT)/tools/Rules.mk - -XCS_INSTALL_DIR = /usr/sbin - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 - -CC = gcc -CFLAGS = -Wall -Werror -g3 -D _XOPEN_SOURCE=600 - -CFLAGS += -I $(XEN_XC) -CFLAGS += -I $(XEN_LIBXC) - -SRCS := -SRCS += ctrl_interface.c -SRCS += bindings.c -SRCS += connection.c -SRCS += evtchn.c -SRCS += xcs.c - -HDRS = $(wildcard *.h) -OBJS = $(patsubst %.c,%.o,$(SRCS)) -BIN = xcs - -all: $(BIN) xcsdump - -clean: - $(RM) *.a *.so *.o *.rpm $(BIN) xcsdump - -xcsdump: xcsdump.c dump.c - $(CC) $(CFLAGS) -o xcsdump xcsdump.c -L$(XEN_LIBXC) \ - ctrl_interface.c evtchn.c dump.c -lxenctrl - -$(BIN): $(OBJS) - $(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -lxenctrl - -$(OBJS): $(HDRS) - -install: xcs xcsdump - $(INSTALL_DIR) -p $(DESTDIR)/$(XCS_INSTALL_DIR) - $(INSTALL_DIR) -p $(DESTDIR)/usr/include - $(INSTALL_PROG) xcs $(DESTDIR)/$(XCS_INSTALL_DIR) - $(INSTALL_PROG) xcsdump $(DESTDIR)/$(XCS_INSTALL_DIR) - $(INSTALL_PROG) xcs_proto.h $(DESTDIR)/usr/include diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/bindings.c --- a/tools/xcs/bindings.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,179 +0,0 @@ -/* bindings.c - * - * Manage subscriptions for the control interface switch. - * - * (c) 2004, Andrew Warfield - * - */ - -/* Interfaces: - * - * xcs_bind (port, type, connection) - * - Register connection to receive messages of this type. - * xcs_unbind (port, type, connection) - * - Remove an existing registration. (Must be an exact match) - * xcs_lookup (port, type) - * - Return a list of connections matching a registration. - * - * - All connections have a connection.bindings list of current bindings. - * - (port, type) pairs may be wildcarded with -1. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> -#include "xcs.h" - - -typedef struct binding_ent_st { - connection_t *con; - struct binding_ent_st *next; -} binding_ent_t; - -#define BINDING_TABLE_SIZE 1024 - -static binding_ent_t *binding_table[BINDING_TABLE_SIZE]; - -#define PORT_WILD(_ent) ((_ent)->port == PORT_WILDCARD) -#define TYPE_WILD(_ent) ((_ent)->type == TYPE_WILDCARD) -#define FULLY_WILD(_ent) (PORT_WILD(_ent) && TYPE_WILD(_ent)) - -#define BINDING_HASH(_key) \ - ((((_key)->port * 11) ^ (_key)->type) % BINDING_TABLE_SIZE) - - -void init_bindings(void) -{ - memset(binding_table, 0, sizeof(binding_table)); -} - -static int table_add(binding_ent_t *table[], - connection_t *con, - binding_key_t *key) -{ - binding_ent_t **curs, *ent; - - curs = &table[BINDING_HASH(key)]; - - while (*curs != NULL) { - if ((*curs)->con == con) { - DPRINTF("Tried to add an ent that already existed.\n"); - goto done; - } - curs = &(*curs)->next; - } - - if (connection_add_binding(con, key) != 0) - { - DPRINTF("couldn't add binding on connection (%lu)\n", con->id); - goto fail; - } - ent = (binding_ent_t *)malloc(sizeof(binding_ent_t)); - if (ent == 0) { - DPRINTF("couldn't alloc binding ent!\n"); - goto fail; - } - ent->con = con; - ent->next = NULL; - *curs = ent; - -done: - return 0; - -fail: - return -1; -} - - -static inline int binding_has_colliding_hashes(connection_t *con, - binding_key_t *key) -{ - int hash, count = 0; - binding_key_ent_t *ent; - - ent = con->bindings; - hash = BINDING_HASH(key); - - while (ent != NULL) { - if (BINDING_HASH(&ent->key) == hash) count ++; - ent = ent->next; - } - - return (count > 1); -} -static int table_remove(binding_ent_t *table[], - connection_t *con, - binding_key_t *key) -{ - binding_ent_t **curs, *ent; - - if (!binding_has_colliding_hashes(con, key)) - { - - curs = &table[BINDING_HASH(key)]; - - while ((*curs != NULL) && ((*curs)->con != con)) - curs = &(*curs)->next; - - if (*curs != NULL) { - ent = *curs; - *curs = (*curs)->next; - free(ent); - } - } - - connection_remove_binding(con, key); - - return 0; -} - -int xcs_bind(connection_t *con, int port, u16 type) -{ - binding_key_t key; - - key.port = port; - key.type = type; - - return table_add(binding_table, con, &key); -} - -int xcs_unbind(connection_t *con, int port, u16 type) -{ - binding_key_t key; - - key.port = port; - key.type = type; - - return table_remove(binding_table, con, &key); -} - - -static void for_each_binding(binding_ent_t *list, binding_key_t *key, - void (*f)(connection_t *, void *), void *arg) -{ - while (list != NULL) - { - if (connection_has_binding(list->con, key)) - f(list->con, arg); - list = list->next; - } -} - -void xcs_lookup(int port, u16 type, void (*f)(connection_t *, void *), - void *arg) -{ - binding_key_t key; - - key.port = port; key.type = type; - for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg); - - key.port = port; key.type = TYPE_WILDCARD; - for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg); - - key.port = PORT_WILDCARD; key.type = type; - for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg); - - key.port = PORT_WILDCARD; key.type = TYPE_WILDCARD; - for_each_binding(binding_table[BINDING_HASH(&key)], &key, f, arg); -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/connection.c --- a/tools/xcs/connection.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,157 +0,0 @@ -/* - * connection.c - * - * State associated with a client connection to xcs. - * - * Copyright (c) 2004, Andrew Warfield - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include "xcs.h" - -connection_t *connection_list = NULL; - -#define CONNECTED(_c) (((_c)->ctrl_fd != -1) || ((_c)->data_fd != -1)) - -connection_t *get_con_by_session(unsigned long session_id) -{ - connection_t **c, *ent = NULL; - - c = &connection_list; - - DPRINTF("looking for id: %lu : %lu\n", session_id, (*c)->id); - - while (*c != NULL) - { - if ((*c)->id == session_id) - return (*c); - c = &(*c)->next; - } - - return ent; -} - -connection_t *connection_new() -{ - connection_t *con; - - con = (connection_t *)malloc(sizeof(connection_t)); - if (con == NULL) - { - DPRINTF("couldn't allocate a new connection\n"); - return NULL; - } - - con->bindings = NULL; - con->data_fd = -1; - con->ctrl_fd = -1; - - /* connections need a unique session id. - * - this approach probably gets fixed later, but for the moment - * is unique, and clearly identifies a connection. - */ - con->id = (unsigned long)con; - - /* add it to the connection list */ - con->next = connection_list; - connection_list = con; - - return (con); -} - -void connection_free(connection_t *con) -{ - /* first free all subscribed bindings: */ - - while (con->bindings != NULL) - xcs_unbind(con, con->bindings->key.port, con->bindings->key.type); - - /* now free the connection. */ - free(con); -} - -int connection_add_binding(connection_t *con, binding_key_t *key) -{ - binding_key_ent_t *key_ent; - - key_ent = (binding_key_ent_t *)malloc(sizeof(binding_key_ent_t)); - if (key_ent == NULL) - { - DPRINTF("couldn't alloc key in connection_add_binding\n"); - return -1; - } - - key_ent->key = *key; - key_ent->next = con->bindings; - con->bindings = key_ent; - - return 0; -} - -int connection_remove_binding(connection_t *con, binding_key_t *key) -{ - binding_key_ent_t *key_ent; - binding_key_ent_t **curs = &con->bindings; - - while ((*curs != NULL) && (!BINDING_KEYS_EQUAL(&(*curs)->key, key))) - curs = &(*curs)->next; - - if (*curs != NULL) { - key_ent = *curs; - *curs = (*curs)->next; - free(key_ent); - } - - return 0; -} - - -int connection_has_binding(connection_t *con, binding_key_t *key) -{ - binding_key_ent_t *ent; - int ret = 0; - - ent = con->bindings; - - while (ent != NULL) - { - if (BINDING_KEYS_EQUAL(key, &ent->key)) - { - ret = 1; - break; - } - ent = ent->next; - } - - return ret; -} - - -void gc_connection_list(void) -{ - connection_t **c, *ent = NULL; - struct timeval now, delta; - - c = &connection_list; - gettimeofday(&now, NULL); - - while ( *c != NULL ) - { - if ( !CONNECTED(*c) ) - { - timersub(&now, &(*c)->disconnect_time, &delta); - if ( delta.tv_sec >= XCS_SESSION_TIMEOUT ) - { - DPRINTF(" : Freeing connection %lu after %lds\n", - (*c)->id, delta.tv_sec); - ent = *c; - *c = (*c)->next; - connection_free(ent); - continue; - } - } - c = &(*c)->next; - } -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/ctrl_interface.c --- a/tools/xcs/ctrl_interface.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,269 +0,0 @@ -/* control_interface.c - * - * Interfaces to control message rings to VMs. - * - * Most of this is directly based on the original xu interface to python - * written by Keir Fraser. - * - * (c) 2004, Andrew Warfield - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <errno.h> -#include "xcs.h" - -static int xc_handle = -1; - -/* Called at start-of-day when using the control channel interface. */ -int ctrl_chan_init(void) -{ - if ( (xc_handle = xc_interface_open()) == -1 ) - { - DPRINTF("Could not open Xen control interface"); - return -1; - } - - return 0; -} - -static control_if_t *map_control_interface(int fd, unsigned long pfn, - u32 dom) -{ - char *vaddr = xc_map_foreign_range( fd, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, pfn ); - if ( vaddr == NULL ) - return NULL; - return (control_if_t *)(vaddr + 2048); -} - -static void unmap_control_interface(int fd, control_if_t *c) -{ - char *vaddr = (char *)c - 2048; - (void)munmap(vaddr, PAGE_SIZE); -} - -int ctrl_chan_notify(control_channel_t *cc) -{ - return xc_evtchn_send(xc_handle, cc->local_port); -} - -int ctrl_chan_read_request(control_channel_t *cc, xcs_control_msg_t *dmsg) -{ - control_msg_t *smsg; - RING_IDX c = cc->tx_ring.req_cons; - - if ( !RING_HAS_UNCONSUMED_REQUESTS(&cc->tx_ring) ) - { - DPRINTF("no request to read\n"); - return -1; - } - - rmb(); /* make sure we see the data associated with the request */ - smsg = RING_GET_REQUEST(&cc->tx_ring, c); - memcpy(&dmsg->msg, smsg, sizeof(*smsg)); - if ( dmsg->msg.length > sizeof(dmsg->msg.msg) ) - dmsg->msg.length = sizeof(dmsg->msg.msg); - cc->tx_ring.req_cons++; - return 0; -} - -int ctrl_chan_write_request(control_channel_t *cc, - xcs_control_msg_t *smsg) -{ - control_msg_t *dmsg; - RING_IDX p = cc->rx_ring.req_prod_pvt; - - if ( RING_FULL(&cc->rx_ring) ) - { - DPRINTF("no space to write request"); - return -ENOSPC; - } - - dmsg = RING_GET_REQUEST(&cc->rx_ring, p); - memcpy(dmsg, &smsg->msg, sizeof(*dmsg)); - - wmb(); - cc->rx_ring.req_prod_pvt++; - RING_PUSH_REQUESTS(&cc->rx_ring); - - return 0; -} - -int ctrl_chan_read_response(control_channel_t *cc, xcs_control_msg_t *dmsg) -{ - control_msg_t *smsg; - RING_IDX c = cc->rx_ring.rsp_cons; - - if ( !RING_HAS_UNCONSUMED_RESPONSES(&cc->rx_ring) ) - { - DPRINTF("no response to read"); - return -1; - } - - rmb(); /* make sure we see the data associated with the request */ - smsg = RING_GET_RESPONSE(&cc->rx_ring, c); - memcpy(&dmsg->msg, smsg, sizeof(*smsg)); - if ( dmsg->msg.length > sizeof(dmsg->msg.msg) ) - dmsg->msg.length = sizeof(dmsg->msg.msg); - cc->rx_ring.rsp_cons++; - return 0; -} - -int ctrl_chan_write_response(control_channel_t *cc, - xcs_control_msg_t *smsg) -{ - control_msg_t *dmsg; - RING_IDX p = cc->tx_ring.rsp_prod_pvt; - - /* akw: if the ring is synchronous, you should never need this test! */ - /* (but it was in the original code... ) */ - if ( cc->tx_ring.req_cons == cc->tx_ring.rsp_prod_pvt ) - { - DPRINTF("no space to write response"); - return -ENOSPC; - } - - dmsg = RING_GET_RESPONSE(&cc->tx_ring, p); - memcpy(dmsg, &smsg->msg, sizeof(*dmsg)); - - wmb(); - cc->tx_ring.rsp_prod_pvt++; - RING_PUSH_RESPONSES(&cc->tx_ring); - - return 0; -} - -int ctrl_chan_request_to_read(control_channel_t *cc) -{ - return (RING_HAS_UNCONSUMED_REQUESTS(&cc->tx_ring)); -} - -int ctrl_chan_space_to_write_request(control_channel_t *cc) -{ - return (!(RING_FULL(&cc->rx_ring))); -} - -int ctrl_chan_response_to_read(control_channel_t *cc) -{ - return (RING_HAS_UNCONSUMED_RESPONSES(&cc->rx_ring)); -} - -int ctrl_chan_space_to_write_response(control_channel_t *cc) -{ - /* again, there is something fishy here. */ - return ( cc->tx_ring.req_cons != cc->tx_ring.rsp_prod_pvt ); -} - -int ctrl_chan_connect(control_channel_t *cc) -{ - xc_dominfo_t info; - - if ( cc->connected ) - { - return 0; - } - - if ( (xc_domain_getinfo(xc_handle, cc->remote_dom, 1, &info) != 1) || - (info.domid != cc->remote_dom) ) - { - DPRINTF("Failed to obtain domain status"); - return -1; - } - - cc->interface = - map_control_interface(xc_handle, info.shared_info_frame, - cc->remote_dom); - - if ( cc->interface == NULL ) - { - DPRINTF("Failed to map domain control interface"); - return -1; - } - - /* Synchronise ring indexes. */ - BACK_RING_ATTACH(&cc->tx_ring, &cc->interface->tx_ring, CONTROL_RING_MEM); - FRONT_RING_ATTACH(&cc->rx_ring, &cc->interface->rx_ring, CONTROL_RING_MEM); - - cc->connected = 1; - - return 0; -} - -void ctrl_chan_disconnect(control_channel_t *cc) -{ - if ( cc->connected ) - unmap_control_interface(xc_handle, cc->interface); - cc->connected = 0; -} - - -control_channel_t *ctrl_chan_new(u32 dom, int local_port, int remote_port) -{ - control_channel_t *cc; - - cc = (control_channel_t *)malloc(sizeof(control_channel_t)); - if ( cc == NULL ) return NULL; - - cc->connected = 0; - cc->remote_dom = dom; - - if ( dom == 0 ) - { - /* - * The control-interface event channel for DOM0 is already set up. - * We use an ioctl to discover the port at our end of the channel. - */ - local_port = ioctl(xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN, - NULL); - remote_port = -1; /* We don't need the remote end of the DOM0 link. */ - if ( local_port < 0 ) - { - DPRINTF("Could not open channel to DOM0"); - goto fail; - } - } - else if ( xc_evtchn_bind_interdomain(xc_handle, - DOMID_SELF, dom, - &local_port, &remote_port) != 0 ) - { - DPRINTF("Could not open channel to domain"); - goto fail; - } - - cc->local_port = local_port; - cc->remote_port = remote_port; - - if ( ctrl_chan_connect(cc) != 0 ) - goto fail; - - return cc; - - fail: - if ( dom != 0 ) - (void)xc_evtchn_close(xc_handle, DOMID_SELF, local_port); - - free(cc); - - return NULL; -} - -void ctrl_chan_free(control_channel_t *cc) -{ - ctrl_chan_disconnect(cc); - if ( cc->remote_dom != 0 ) - (void)xc_evtchn_close(xc_handle, DOMID_SELF, cc->local_port); - free(cc); -} - - -/* other libxc commands: */ - -int ctrl_chan_bind_virq(int virq, int *port) -{ - return xc_evtchn_bind_virq(xc_handle, virq, port); -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/dump.c --- a/tools/xcs/dump.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,506 +0,0 @@ -/*\ - * Copyright (C) International Business Machines Corp., 2005 - * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; under version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -\*/ - -#include <stdio.h> -#include <stdarg.h> - -#include "dump.h" - -#define str(a) # a -#define error(a, ...) do { \ - _error("%s:%s():L%d: " a, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);\ - exit(1); \ -} while (0) -#define warn(a, ...) do { \ - _error("%s:%s():L%d: " a, __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);\ -} while (0) -#define debug(a, ...) do { \ - _error(a, ## __VA_ARGS__);\ -} while (0) - -void _error(const char *fmt, ...); - -#define debug_begin(a, b) debug("CMSG_" a "_" b " {") -#define debug_end(a, b) debug("}") -#define debug_field(a, b, c) debug("\t." str(b) " = " c, a->b) -#define debug_field_mac(a, b) \ - debug("\t." str(b) " = %.2x:%.2x:%.2x:%.2x:%.2x:%.2x", \ - a->b[0], a->b[1], a->b[2], a->b[3], a->b[4], a->b[5]) - -#define debug_dump(a, b, c) debug_hex("\t." str(b) " = ", a->b, a->c) - -#include <stdint.h> -#include <string.h> -#include <stdio.h> -#include <ctype.h> - -static int strcount(const char *str, char ch) -{ - int i; - int count = 0; - - for (i = 0; str[i]; i++) { - if (str[i] == ch) { - count++; - } - } - - return count; -} - -void debug_hex(const char *info, const uint8_t *data, size_t length) -{ - int indent = strlen(info) + (strcount(info, '\t') * 8 - 1); - int words_per_row = (2 * (80 - indent - 2) / 7) & ~1; - size_t i; - - for (i = 0; i < length; i += words_per_row) { - size_t ind; - - if (i == 0) { - fprintf(stderr, "%s", info); - } else { - int j; - for (j = 0; j < indent; j++) { - fprintf(stderr, " "); - } - } - - for (ind = 0; ind < words_per_row; ind++) { - if (ind % 2 == 0) { - fprintf(stderr, " "); - } - - if (i + ind < length) { - fprintf(stderr, "%.2X", data[i + ind]); - } else { - fprintf(stderr, " "); - } - } - - fprintf(stderr, " "); - - for (ind = 0; ind < words_per_row; ind++) { - if (i + ind < length) { - if (isprint(data[i + ind])) { - fprintf(stderr, "%c", data[i + ind]); - } else { - fprintf(stderr, "."); - } - } else { - fprintf(stderr, " "); - } - } - fprintf(stderr, "\n"); - } -} - -void dump_msg(const control_msg_t *msg, uint64_t flags) -{ - if ((flags & (1 << msg->type)) == 0) { - return; - } - - switch (msg->type) { - case CMSG_CONSOLE: - if (msg->subtype == CMSG_CONSOLE_DATA) { - debug_begin("CONSOLE", "DATA"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("CONSOLE", "DATA"); - } else { - debug_begin("CONSOLE", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("CONSOLE", "UNKNOWN"); - } - break; - case CMSG_BLKIF_BE: - if (msg->subtype == CMSG_BLKIF_BE_CREATE) { - blkif_be_create_t *load; - load = (blkif_be_create_t *)msg->msg; - debug_begin("BLKIF_BE", "CREATE"); - debug_field(load, domid, "%u"); - debug_field(load, blkif_handle, "%u"); - debug_field(load, status, "%u"); - debug_end("BLKIF_BE", "CREATE"); - } else if (msg->subtype == CMSG_BLKIF_BE_DESTROY) { - blkif_be_destroy_t *load; - load = (blkif_be_destroy_t *)msg->msg; - debug_begin("BLKIF_BE", "DESTROY"); - debug_field(load, domid, "%u"); - debug_field(load, blkif_handle, "%u"); - debug_field(load, status, "%u"); - debug_end("BLKIF_BE", "DESTROY"); - } else if (msg->subtype == CMSG_BLKIF_BE_CONNECT) { - blkif_be_connect_t *load; - load = (blkif_be_connect_t *)msg->msg; - debug_begin("BLKIF_BE", "CONNECT"); - debug_field(load, domid, "%u"); - debug_field(load, blkif_handle, "%u"); - debug_field(load, shmem_frame, "%lu"); - debug_field(load, evtchn, "%u"); - debug_field(load, status, "%u"); - debug_end("BLKIF_BE", "CONNECT"); - } else if (msg->subtype == CMSG_BLKIF_BE_DISCONNECT) { - blkif_be_disconnect_t *load; - load = (blkif_be_disconnect_t *)msg->msg; - debug_begin("BLKIF_BE", "DISCONNECT"); - debug_field(load, domid, "%u"); - debug_field(load, blkif_handle, "%u"); - debug_field(load, status, "%u"); - debug_end("BLKIF_BE", "DISCONNECT"); - } else if (msg->subtype == CMSG_BLKIF_BE_VBD_CREATE) { - blkif_be_vbd_create_t *load; - load = (blkif_be_vbd_create_t *)msg->msg; - debug_begin("BLKIF_BE", "VBD_CREATE"); - debug_field(load, domid, "%u"); - debug_field(load, blkif_handle, "%u"); - debug_field(load, pdevice, "%u"); - debug_field(load, vdevice, "%u"); - debug_field(load, readonly, "%u"); - debug_field(load, status, "%u"); - debug_end("BLKIF_BE", "VBD_CREATE"); - } else if (msg->subtype == CMSG_BLKIF_BE_VBD_DESTROY) { - blkif_be_vbd_destroy_t *load; - load = (blkif_be_vbd_destroy_t *)msg->msg; - debug_begin("BLKIF_BE", "VBD_DESTROY"); - debug_field(load, domid, "%u"); - debug_field(load, blkif_handle, "%u"); - debug_field(load, vdevice, "%u"); - debug_field(load, status, "%u"); - debug_end("BLKIF_BE", "VBD_DESTROY"); - } else if (msg->subtype == CMSG_BLKIF_BE_DRIVER_STATUS) { - blkif_be_driver_status_t *load; - load = (blkif_be_driver_status_t *)msg->msg; - debug_begin("BLKIF_BE", "DRIVER_STATUS"); - debug_field(load, status, "%u"); - debug_end("BLKIF_BE", "DRIVER_STATUS"); - } else { - debug_begin("BLKIF_BE", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("BLKIF_BE", "UNKNOWN"); - } - break; - case CMSG_BLKIF_FE: - if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_STATUS) { - blkif_fe_interface_status_t *load; - load = (blkif_fe_interface_status_t *)msg->msg; - debug_begin("BLKIF_FE", "INTERFACE_STATUS"); - debug_field(load, handle, "%u"); - debug_field(load, status, "%u"); - debug_field(load, evtchn, "%u"); - debug_field(load, domid, "%u"); - debug_end("BLKIF_FE", "INTERFACE_STATUS"); - } else if (msg->subtype == CMSG_BLKIF_FE_DRIVER_STATUS) { - blkif_fe_driver_status_t *load; - load = (blkif_fe_driver_status_t *)msg->msg; - debug_begin("BLKIF_FE", "DRIVER_STATUS"); - debug_field(load, status, "%u"); - debug_field(load, max_handle, "%u"); - debug_end("BLKIF_FE", "DRIVER_STATUS"); - } else if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT) { - blkif_fe_interface_connect_t *load; - load = (blkif_fe_interface_connect_t *)msg->msg; - debug_begin("BLKIF_FE", "INTERFACE_CONNECT"); - debug_field(load, handle, "%u"); - debug_field(load, shmem_frame, "%lu"); - debug_end("BLKIF_FE", "INTERFACE_CONNECT"); - } else if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_DISCONNECT) { - blkif_fe_interface_disconnect_t *load; - load = (blkif_fe_interface_disconnect_t *)msg->msg; - debug_begin("BLKIF_FE", "INTERFACE_DISCONNECT"); - debug_field(load, handle, "%u"); - debug_end("BLKIF_FE", "INTERFACE_DISCONNECT"); - } else if (msg->subtype == CMSG_BLKIF_FE_INTERFACE_QUERY) { - blkif_fe_interface_query_t *load; - load = (blkif_fe_interface_query_t *)msg->msg; - debug_begin("BLKIF_FE", "INTERFACE_QUERY"); - debug_field(load, handle, "%u"); - debug_field(load, status, "%u"); - debug_field(load, evtchn, "%u"); - debug_field(load, domid, "%u"); - debug_end("BLKIF_FE", "INTERFACE_QUERY"); - } else { - debug_begin("BLKIF_FE", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("BLKIF_FE", "UNKNOWN"); - } - break; - case CMSG_NETIF_BE: - if (msg->subtype == CMSG_NETIF_BE_CREATE) { - netif_be_create_t *load; - load = (netif_be_create_t *)msg->msg; - debug_begin("NETIF_BE", "CREATE"); - debug_field(load, domid, "%u"); - debug_field(load, netif_handle, "%u"); - debug_field_mac(load, mac); - debug_field_mac(load, be_mac); - debug_field(load, status, "%u"); - debug_end("NETIF_BE", "CREATE"); - } else if (msg->subtype == CMSG_NETIF_BE_DESTROY) { - netif_be_destroy_t *load; - load = (netif_be_destroy_t *)msg->msg; - debug_begin("NETIF_BE", "DESTROY"); - debug_field(load, domid, "%u"); - debug_field(load, netif_handle, "%u"); - debug_field(load, status, "%u"); - debug_end("NETIF_BE", "DESTROY"); - } else if (msg->subtype == CMSG_NETIF_BE_CONNECT) { - netif_be_connect_t *load; - load = (netif_be_connect_t *)msg->msg; - debug_begin("NETIF_BE", "CONNECT"); - debug_field(load, domid, "%u"); - debug_field(load, netif_handle, "%u"); - debug_field(load, tx_shmem_frame, "%lu"); - debug_field(load, rx_shmem_frame, "%lu"); - debug_field(load, evtchn, "%u"); - debug_field(load, status, "%u"); - debug_end("NETIF_BE", "CONNECT"); - } else if (msg->subtype == CMSG_NETIF_BE_DISCONNECT) { - netif_be_disconnect_t *load; - load = (netif_be_disconnect_t *)msg->msg; - debug_begin("NETIF_BE", "DISCONNECT"); - debug_field(load, domid, "%u"); - debug_field(load, netif_handle, "%u"); - debug_field(load, status, "%u"); - debug_end("NETIF_BE", "DISCONNECT"); - } else if (msg->subtype == CMSG_NETIF_BE_DRIVER_STATUS) { - netif_be_driver_status_t *load; - load = (netif_be_driver_status_t *)msg->msg; - debug_begin("NETIF_BE", "DRIVER_STATUS"); - debug_field(load, status, "%u"); - debug_end("NETIF_BE", "DRIVER_STATUS"); - } else { - debug_begin("NETIF_BE", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("NETIF_BE", "UNKNOWN"); - } - break; - case CMSG_NETIF_FE: - if (msg->subtype == CMSG_NETIF_FE_INTERFACE_STATUS) { - netif_fe_interface_status_t *load; - load = (netif_fe_interface_status_t *)msg->msg; - debug_begin("NETIF_FE", "INTERFACE_STATUS"); - debug_field(load, handle, "%u"); - debug_field(load, status, "%u"); - debug_field(load, evtchn, "%u"); - debug_field_mac(load, mac); - debug_field(load, domid, "%u"); - debug_end("NETIF_FE", "INTERFACE_STATUS"); - } else if (msg->subtype == CMSG_NETIF_FE_DRIVER_STATUS) { - netif_fe_driver_status_t *load; - load = (netif_fe_driver_status_t *)msg->msg; - debug_begin("NETIF_FE", "DRIVER_STATUS"); - debug_field(load, status, "%u"); - debug_field(load, max_handle, "%u"); - debug_end("NETIF_FE", "DRIVER_STATUS"); - } else if (msg->subtype == CMSG_NETIF_FE_INTERFACE_CONNECT) { - netif_fe_interface_connect_t *load; - load = (netif_fe_interface_connect_t *)msg->msg; - debug_begin("NETIF_FE", "INTERFACE_CONNECT"); - debug_field(load, handle, "%u"); - debug_field(load, tx_shmem_frame, "%lu"); - debug_field(load, rx_shmem_frame, "%lu"); - debug_end("NETIF_FE", "INTERFACE_CONNECT"); - } else if (msg->subtype == CMSG_NETIF_FE_INTERFACE_DISCONNECT) { - netif_fe_interface_disconnect_t *load; - load = (netif_fe_interface_disconnect_t *)msg->msg; - debug_begin("NETIF_FE", "INTERFACE_DISCONNECT"); - debug_field(load, handle, "%u"); - debug_end("NETIF_FE", "INTERFACE_DISCONNECT"); - } else if (msg->subtype == CMSG_NETIF_FE_INTERFACE_QUERY) { - netif_fe_interface_query_t *load; - load = (netif_fe_interface_query_t *)msg->msg; - debug_begin("NETIF_FE", "INTERFACE_QUERY"); - debug_field(load, handle, "%u"); - debug_field(load, status, "%u"); - debug_field(load, evtchn, "%u"); - debug_field_mac(load, mac); - debug_field(load, domid, "%u"); - debug_end("NETIF_FE", "INTERFACE_QUERY"); - } else { - debug_begin("NETIF_FE", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("NETIF_FE", "UNKNOWN"); - } - break; - case CMSG_SHUTDOWN: - if (msg->subtype == CMSG_SHUTDOWN_POWEROFF) { - debug_begin("SHUTDOWN", "POWEROFF"); - debug_end("SHUTDOWN", "POWEROFF"); - } else if (msg->subtype == CMSG_SHUTDOWN_REBOOT) { - debug_begin("SHUTDOWN", "REBOOT"); - debug_end("SHUTDOWN", "REBOOT"); - } else if (msg->subtype == CMSG_SHUTDOWN_SUSPEND) { - debug_begin("SHUTDOWN", "SUSPEND"); - debug_end("SHUTDOWN", "SUSPEND"); - } else if (msg->subtype == CMSG_SHUTDOWN_SYSRQ) { - debug_begin("SHUTDOWN", "SYSRQ"); - debug_end("SHUTDOWN", "SYSRQ"); - } else { - debug_begin("SHUTDOWN", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("SHUTDOWN", "UNKNOWN"); - } - break; - case CMSG_MEM_REQUEST: - if (msg->subtype == CMSG_MEM_REQUEST_SET) { - mem_request_t *load; - load = (mem_request_t *)msg->msg; - debug_begin("MEM_REQUEST", "SET"); - debug_field(load, target, "%u"); - debug_field(load, status, "%u"); - debug_end("MEM_REQUEST", "SET"); - } else { - debug_begin("MEM_REQUEST", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("MEM_REQUEST", "UNKNOWN"); - } - break; - case CMSG_USBIF_BE: - if (msg->subtype == CMSG_USBIF_BE_CREATE) { - usbif_be_create_t *load; - load = (usbif_be_create_t *)msg->msg; - debug_begin("USBIF_BE", "CREATE"); - debug_field(load, domid, "%u"); - debug_field(load, status, "%u"); - debug_end("USBIF_BE", "CREATE"); - } else if (msg->subtype == CMSG_USBIF_BE_DESTROY) { - usbif_be_destroy_t *load; - load = (usbif_be_destroy_t *)msg->msg; - debug_begin("USBIF_BE", "DESTROY"); - debug_field(load, domid, "%u"); - debug_field(load, status, "%u"); - debug_end("USBIF_BE", "DESTROY"); - } else if (msg->subtype == CMSG_USBIF_BE_CONNECT) { - usbif_be_connect_t *load; - load = (usbif_be_connect_t *)msg->msg; - debug_begin("USBIF_BE", "CONNECT"); - debug_field(load, domid, "%u"); - debug_field(load, shmem_frame, "%lu"); - debug_field(load, evtchn, "%u"); - debug_field(load, bandwidth, "%u"); - debug_field(load, status, "%u"); - debug_end("USBIF_BE", "CONNECT"); - } else if (msg->subtype == CMSG_USBIF_BE_DISCONNECT) { - usbif_be_disconnect_t *load; - load = (usbif_be_disconnect_t *)msg->msg; - debug_begin("USBIF_BE", "DISCONNECT"); - debug_field(load, domid, "%u"); - debug_field(load, status, "%u"); - debug_end("USBIF_BE", "DISCONNECT"); - } else if (msg->subtype == CMSG_USBIF_BE_CLAIM_PORT) { - usbif_be_claim_port_t *load; - load = (usbif_be_claim_port_t *)msg->msg; - debug_begin("USBIF_BE", "CLAIM_PORT"); - debug_field(load, domid, "%u"); - debug_field(load, usbif_port, "%u"); - debug_field(load, status, "%u"); - debug_field(load, path, "%s"); - debug_end("USBIF_BE", "CLAIM_PORT"); - } else if (msg->subtype == CMSG_USBIF_BE_RELEASE_PORT) { - usbif_be_release_port_t *load; - load = (usbif_be_release_port_t *)msg->msg; - debug_begin("USBIF_BE", "RELEASE_PORT"); - debug_field(load, path, "%s"); - debug_end("USBIF_BE", "RELEASE_PORT"); - } else if (msg->subtype == CMSG_USBIF_BE_DRIVER_STATUS_CHANGED) { - usbif_be_driver_status_changed_t *load; - load = (usbif_be_driver_status_changed_t *)msg->msg; - debug_begin("USBIF_BE", "DRIVER_STATUS_CHANGED"); - debug_field(load, status, "%u"); - debug_end("USBIF_BE", "DRIVER_STATUS_CHANGED"); - } else { - debug_begin("USBIF_BE", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("USBIF_BE", "UNKNOWN"); - } - break; - case CMSG_USBIF_FE: - if (msg->subtype == CMSG_USBIF_FE_INTERFACE_STATUS_CHANGED) { - usbif_fe_interface_status_changed_t *load; - load = (usbif_fe_interface_status_changed_t *)msg->msg; - debug_begin("USBIF_FE", "INTERFACE_STATUS_CHANGED"); - debug_field(load, status, "%u"); - debug_field(load, evtchn, "%u"); - debug_field(load, domid, "%u"); - debug_field(load, bandwidth, "%u"); - debug_field(load, num_ports, "%u"); - debug_end("USBIF_FE", "INTERFACE_STATUS_CHANGED"); - } else if (msg->subtype == CMSG_USBIF_FE_DRIVER_STATUS_CHANGED) { - usbif_fe_driver_status_changed_t *load; - load = (usbif_fe_driver_status_changed_t *)msg->msg; - debug_begin("USBIF_FE", "DRIVER_STATUS_CHANGED"); - debug_field(load, status, "%u"); - debug_end("USBIF_FE", "DRIVER_STATUS_CHANGED"); - } else if (msg->subtype == CMSG_USBIF_FE_INTERFACE_CONNECT) { - usbif_fe_interface_connect_t *load; - load = (usbif_fe_interface_connect_t *)msg->msg; - debug_begin("USBIF_FE", "INTERFACE_CONNECT"); - debug_field(load, shmem_frame, "%lu"); - debug_end("USBIF_FE", "INTERFACE_CONNECT"); - } else if (msg->subtype == CMSG_USBIF_FE_INTERFACE_DISCONNECT) { - debug_begin("USBIF_FE", "INTERFACE_DISCONNECT"); - debug_end("USBIF_FE", "INTERFACE_DISCONNECT"); - } else { - debug_begin("USBIF_FE", "UNKNOWN"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("USBIF_FE", "UNKNOWN"); - } - break; - default: - debug_begin("UNKNOWN", "UNKNOWN"); - debug_field(msg, type, "%u"); - debug_field(msg, subtype, "%u"); - debug_field(msg, length, "%u"); - debug_dump(msg, msg, length); - debug_end("UNKNOWN", "UNKNOWN"); - break; - } -} - -void _error(const char *fmt, ...) -{ - va_list ap; - char buffer[4096]; - - va_start(ap, fmt); - vsnprintf(buffer, sizeof(buffer), fmt, ap); - va_end(ap); - - fprintf(stderr, "%s\n", buffer); -} - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/dump.h --- a/tools/xcs/dump.h Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,28 +0,0 @@ -/*\ - * Copyright (C) International Business Machines Corp., 2005 - * Author(s): Anthony Liguori <aliguori@xxxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; under version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -\*/ - -#ifndef XENCTLD_ERROR_H -#define XENCTLD_ERROR_H - -#include <stdint.h> -#include <xenctrl.h> -#include <xen/io/domain_controller.h> - -void dump_msg(const control_msg_t *msg, uint64_t flags); - -#endif diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/evtchn.c --- a/tools/xcs/evtchn.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,106 +0,0 @@ -/* evtchn.c - * - * Interfaces to event channel driver. - * - * Most of this is directly based on the original xu interface to python - * written by Keir Fraser. - * - * (c) 2004, Andrew Warfield - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/sysmacros.h> /* XOPEN drops makedev, this gets it back. */ -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include "xcs.h" - -static int evtchn_fd = -1; - -/* NB. The following should be kept in sync with the kernel's evtchn driver. */ -#define EVTCHN_DEV_NAME "/dev/xen/evtchn" -#define EVTCHN_DEV_MAJOR 10 -#define EVTCHN_DEV_MINOR 201 -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) -/* EVTCHN_BIND: Bind to teh specified event-channel port. */ -#define EVTCHN_BIND _IO('E', 2) -/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ -#define EVTCHN_UNBIND _IO('E', 3) - -int evtchn_read() -{ - u16 v; - int bytes; - - while ( (bytes = read(evtchn_fd, &v, sizeof(v))) == -1 ) - { - if ( errno == EINTR ) - continue; - /* EAGAIN was cased to return 'None' in the python version... */ - return -errno; - } - - if ( bytes == sizeof(v) ) - return v; - - /* bad return */ - return -1; -} - -void evtchn_unmask(u16 idx) -{ - (void)write(evtchn_fd, &idx, sizeof(idx)); -} - -int evtchn_bind(int idx) -{ - if ( ioctl(evtchn_fd, EVTCHN_BIND, idx) != 0 ) - return -errno; - - return 0; -} - -int evtchn_unbind(int idx) -{ - if ( ioctl(evtchn_fd, EVTCHN_UNBIND, idx) != 0 ) - return -errno; - - return 0; -} - -int evtchn_open(void) -{ - struct stat st; - - /* Make sure any existing device file links to correct device. */ - if ( (lstat(EVTCHN_DEV_NAME, &st) != 0) || - !S_ISCHR(st.st_mode) || - (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) ) - (void)unlink(EVTCHN_DEV_NAME); - - reopen: - evtchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR); - if ( evtchn_fd == -1 ) - { - if ( (errno == ENOENT) && - ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) && - (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600, - makedev(EVTCHN_DEV_MAJOR,EVTCHN_DEV_MINOR)) == 0) ) - goto reopen; - return -errno; - } - return evtchn_fd; -} - -void evtchn_close() -{ - (void)close(evtchn_fd); - evtchn_fd = -1; -} - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/xcs.c --- a/tools/xcs/xcs.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,973 +0,0 @@ -/* xcs.c - * - * xcs - Xen Control Switch - * - * Copyright (c) 2004, Andrew Warfield - */ - -/* - - Things we need to select on in xcs: - - 1. Events arriving on /dev/evtchn - - These will kick a function to read everything off the fd, and scan the - associated control message rings, resulting in notifications sent on - data channels to connected clients. - - 2. New TCP connections on XCS_PORT. - - These will either be control (intially) or associated data connections. - - Control connections will instantiate or rebind to an existing connnection - struct. The control channel is used to configure what events will be - received on an associated data channel. These two channels are split - out because the control channel is synchronous, all messages will return - a result from XCS. The data channel is effectively asynchronous, events - may arrive in the middle of a control message exchange. Additionally, - Having two TCP connections allows the client side to have a blocking - listen loop for data messages, while independently interacting on the - control channel at other places in the code. - - Data connections attach to an existing control struct, using a session - id that is passed during the control connect. There is currently a - one-to-one relationship between data and control channels, but there - could just as easily be many data channels, if there were a set of - clients with identical interests, or if you wanted to trace an existing - client's data traffic. - - 3. Messages arriving on open TCP connections. - There are three types of open connections: - - 3a. Messages arriving on open control channel file descriptors. - - [description of the control protocol here] - - 3b. Messages arriving on open data channel file descriptors. - - [description of the data protocol here] - - 3c. Messages arriving on (new) unbound connections. - - A connection must issue a XCS_CONNECT message to specify what - it is, after which the connection is moved into one of the above - two groups. - - Additionally, we need a periodic timer to do housekeeping. - - 4. Every XCS_GC_INTERVAL seconds, we need to clean up outstanding state. - Specifically, we garbage collect any sessions (connection_t structs) - that have been unconnected for a period of time (XCS_SESSION_TIMEOUT), - and close any connections that have been openned, but not connected - as a control or data connection (XCS_UFD_TIMEOUT). - -*/ - -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/time.h> -#include <sys/types.h> -#include <string.h> -#include <signal.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <errno.h> -#include <malloc.h> -#include <fcntl.h> -#include <ctype.h> -#include "xcs.h" - -#undef fd_max -#define fd_max(x,y) ((x) > (y) ? (x) : (y)) - -/* ------[ Control channel interfaces ]------------------------------------*/ - -static control_channel_t *cc_list[NR_EVENT_CHANNELS]; -static int *dom_port_map = 0; -static int dom_port_map_size = 0; - -static void map_dom_to_port(u32 dom, int port) -{ - if (dom >= dom_port_map_size) { - dom_port_map = (int *)realloc(dom_port_map, - (dom + 256) * sizeof(dom_port_map[0])); - - if (dom_port_map == NULL) { - perror("realloc(dom_port_map)"); - exit(1); - } - - for (; dom_port_map_size < dom + 256; dom_port_map_size++) { - dom_port_map[dom_port_map_size] = -1; - } - } - - dom_port_map[dom] = port; -} - -static int dom_to_port(u32 dom) -{ - if (dom >= dom_port_map_size) return -1; - - return dom_port_map[dom]; -} - -static void init_interfaces(void) -{ - memset(cc_list, 0, sizeof cc_list); -} - -static control_channel_t *add_interface(u32 dom, int local_port, - int remote_port) -{ - control_channel_t *cc=NULL, *oldcc; - int ret; - - if ((dom_to_port(dom) >= 0) && (cc_list[dom_to_port(dom)] != NULL)) - { - return(cc_list[dom_to_port(dom)]); - } - - if (cc_list[local_port] == NULL) - { - cc = ctrl_chan_new(dom, local_port, remote_port); - } - - if (cc == NULL) - return NULL; - - DPRINTF("added a new interface: dom: %u (l:%d,r:%d): %p\n", - dom, local_port, remote_port, cc); - DPRINTF("added a new interface: dom: %u (l:%d,r:%d): %p\n", - dom, cc->local_port, cc->remote_port, cc); - - if ((ret = evtchn_bind(cc->local_port)) != 0) - { - DPRINTF("Got control interface, but couldn't bind evtchan!(%d)\n", ret); - ctrl_chan_free(cc); - return NULL; - } - - if ( cc_list[cc->local_port] != NULL ) - { - oldcc = cc_list[cc->local_port]; - - if ((oldcc->remote_dom != cc->remote_dom) || - (oldcc->remote_port != cc->remote_port)) - { - DPRINTF("CC conflict! (port: %d, old dom: %u, new dom: %u, " - "old ref_count: %d)\n", - cc->local_port, oldcc->remote_dom, cc->remote_dom, - oldcc->ref_count); - map_dom_to_port(oldcc->remote_dom, -1); - ctrl_chan_free(cc_list[cc->local_port]); - cc_list[cc->local_port] = NULL; - } - } - - cc_list[cc->local_port] = cc; - map_dom_to_port(cc->remote_dom, cc->local_port); - cc->type = CC_TYPE_INTERDOMAIN; - cc->ref_count = 0; - return cc; -} - -control_channel_t *add_virq(int virq) -{ - control_channel_t *cc; - int virq_port; - - if (ctrl_chan_bind_virq(virq, &virq_port) == -1) - return NULL; - - if ((cc_list[virq_port] != NULL) && - (cc_list[virq_port]->type != CC_TYPE_VIRQ)) - return NULL; - - if ((cc_list[virq_port] != NULL) && - (cc_list[virq_port]->type == CC_TYPE_VIRQ)) - return cc_list[virq_port]; - - cc = (control_channel_t *)malloc(sizeof(control_channel_t)); - if ( cc == NULL ) return NULL; - - memset(cc, 0, sizeof(control_channel_t)); - cc->type = CC_TYPE_VIRQ; - cc->local_port = virq_port; - cc->virq = virq; - cc->ref_count = 1; - - if (evtchn_bind(cc->local_port) != 0) - { - DPRINTF("Got control interface, but couldn't bind evtchan!\n"); - free(cc); - return NULL; - } - - cc_list[cc->local_port] = cc; - - return cc; -} - -void get_interface(control_channel_t *cc) -{ - if (cc != NULL) - cc->ref_count++; -} - -void put_interface(control_channel_t *cc) -{ - if (cc != NULL) - { - cc->ref_count--; - if (cc->ref_count <= 0) - { - DPRINTF("Freeing cc on port %d.\n", cc->local_port); - (void)evtchn_unbind(cc->local_port); - cc_list[cc->local_port] = NULL; - map_dom_to_port(cc->remote_dom, -1); - ctrl_chan_free(cc); - } - } -} - -/* ------[ Simple helpers ]------------------------------------------------*/ - -/* listen_socket() is straight from paul sheer's useful select_tut manpage. */ -static int listen_socket (char *listen_path) -{ - struct sockaddr_un a; - int s; - int yes; - - if ((s = socket (AF_UNIX, SOCK_STREAM, 0)) < 0) - { - perror ("socket"); - return -1; - } - - yes = 1; - - memset (&a, 0, sizeof (a)); - a.sun_family = AF_UNIX; - strcpy(a.sun_path, listen_path); - - /* remove an old socket if it exists. */ - unlink(listen_path); - - if (bind(s, (struct sockaddr *) &a, sizeof (a)) < 0) - { - fprintf (stderr, "bind('%s'): %s\n", listen_path, strerror(errno)); - close (s); - return -1; - } - DPRINTF ("accepting connections on path %s\n", listen_path); - listen (s, 10); - return s; -} - -/* ------[ Message handlers ]----------------------------------------------*/ - -#define NO_CHANGE 0 -#define CONNECTED 1 -#define DISCONNECTED 2 -int handle_connect_msg( xcs_msg_t *msg, int fd ) -{ - xcs_connect_msg_t *cmsg = &msg->u.connect; - connection_t *con; - int ret = NO_CHANGE; - - switch (msg->type) - { - case XCS_CONNECT_CTRL: - { - if ( cmsg->session_id == 0 ) - { - con = connection_new(); - if ( con == NULL) - { - msg->result = XCS_RSLT_FAILED; - break; - } - msg->result = XCS_RSLT_OK; - cmsg->session_id = con->id; - con->ctrl_fd = fd; - ret = CONNECTED; - DPRINTF("New control connection\n"); - break; - } - - con = get_con_by_session(cmsg->session_id); - if ( con == NULL ) - { - msg->result = XCS_RSLT_BADSESSION; - break; - } - if ( con->ctrl_fd != -1 ) - { - msg->result = XCS_RSLT_CONINUSE; - break; - } - con->ctrl_fd = fd; - msg->result = XCS_RSLT_OK; - ret = CONNECTED; - DPRINTF("Rebound to control connection\n"); - break; - } - case XCS_CONNECT_DATA: - { - con = get_con_by_session(cmsg->session_id); - if ( con == NULL ) - { - msg->result = XCS_RSLT_BADSESSION; - break; - } - if ( con->data_fd != -1 ) - { - msg->result = XCS_RSLT_CONINUSE; - break; - } - con->data_fd = fd; - msg->result = XCS_RSLT_OK; - ret = CONNECTED; - DPRINTF("Attached data connection\n"); - break; - - } - case XCS_CONNECT_BYE: - { - close ( fd ); - ret = DISCONNECTED; - break; - } - } - - return ret; -} - -int handle_control_message( connection_t *con, xcs_msg_t *msg ) -{ - int ret; - int reply_needed = 1; - - DPRINTF("Got message, type %u.\n", msg->type); - - switch (msg->type) - { - case XCS_MSG_BIND: - { - xcs_bind_msg_t *bmsg = &msg->u.bind; - - if ( ! BIND_MSG_VALID(bmsg) ) - { - msg->result = XCS_RSLT_BADREQUEST; - break; - } - - ret = xcs_bind(con, bmsg->port, bmsg->type); - if (ret == 0) { - msg->result = XCS_RSLT_OK; - } else { - msg->result = XCS_RSLT_FAILED; - } - break; - } - case XCS_MSG_UNBIND: - { - xcs_bind_msg_t *bmsg = &msg->u.bind; - - if ( ! BIND_MSG_VALID(bmsg) ) - { - msg->result = XCS_RSLT_BADREQUEST; - break; - } - - ret = xcs_unbind(con, bmsg->port, bmsg->type); - if (ret == 0) { - msg->result = XCS_RSLT_OK; - } else { - msg->result = XCS_RSLT_FAILED; - } - break; - } - case XCS_VIRQ_BIND: - { - control_channel_t *cc; - xcs_virq_msg_t *vmsg = &msg->u.virq; - if ( ! VIRQ_MSG_VALID(vmsg) ) - { - msg->result = XCS_RSLT_BADREQUEST; - break; - } - - cc = add_virq(vmsg->virq); - if (cc == NULL) - { - msg->result = XCS_RSLT_FAILED; - break; - } - ret = xcs_bind(con, cc->local_port, TYPE_VIRQ); - if (ret == 0) { - vmsg->port = cc->local_port; - msg->result = XCS_RSLT_OK; - } else { - msg->result = XCS_RSLT_FAILED; - } - break; - } - - case XCS_CIF_NEW_CC: - { - control_channel_t *cc; - xcs_interface_msg_t *imsg = &msg->u.interface; - - if ( ! INTERFACE_MSG_VALID(imsg) ) - { - msg->result = XCS_RSLT_BADREQUEST; - break; - } - - cc = add_interface(imsg->dom, imsg->local_port, imsg->remote_port); - if (cc != NULL) { - get_interface(cc); - msg->result = XCS_RSLT_OK; - imsg->local_port = cc->local_port; - imsg->remote_port = cc->remote_port; - } else { - msg->result = XCS_RSLT_FAILED; - } - break; - } - - case XCS_CIF_FREE_CC: - { - control_channel_t *cc; - xcs_interface_msg_t *imsg = &msg->u.interface; - - if ( ! INTERFACE_MSG_VALID(imsg) ) - { - msg->result = XCS_RSLT_BADREQUEST; - break; - } - - cc = add_interface(imsg->dom, imsg->local_port, imsg->remote_port); - if (cc != NULL) { - put_interface(cc); - } - msg->result = XCS_RSLT_OK; - break; - } - } - return reply_needed; -} - -void handle_data_message( connection_t *con, xcs_msg_t *msg ) -{ - control_channel_t *cc; - xcs_control_msg_t *cmsg = &msg->u.control; - int port; - - switch (msg->type) - { - case XCS_REQUEST: - if ( cmsg->remote_dom > MAX_DOMS ) - break; - - port = dom_to_port(cmsg->remote_dom); - if (port == -1) break; - cc = cc_list[port]; - if ((cc != NULL) && ( cc->type == CC_TYPE_INTERDOMAIN )) - { - DPRINTF("DN:REQ: dom:%d port: %d type: %d\n", - cc->remote_dom, cc->local_port, - cmsg->msg.type); - ctrl_chan_write_request(cc, cmsg); - ctrl_chan_notify(cc); - } else { - DPRINTF("tried to send a REQ to a null cc\n."); - } - break; - - case XCS_RESPONSE: - if ( cmsg->remote_dom > MAX_DOMS ) - break; - - port = dom_to_port(cmsg->remote_dom); - if (port == -1) break; - cc = cc_list[port]; - if ((cc != NULL) && ( cc->type == CC_TYPE_INTERDOMAIN )) - { - DPRINTF("DN:RSP: dom:%d port: %d type: %d\n", - cc->remote_dom, cc->local_port, - cmsg->msg.type); - ctrl_chan_write_response(cc, cmsg); - ctrl_chan_notify(cc); - } - break; - - case XCS_VIRQ: - if ( !(PORT_VALID(cmsg->local_port)) ) - break; - - cc = cc_list[cmsg->local_port]; - - if ((cc != NULL) && ( cc->type == CC_TYPE_VIRQ )) - { - DPRINTF("DN:VIRQ: virq: %d port: %d\n", - cc->virq, cc->local_port); - ctrl_chan_notify(cc); - } - break; - } -} - -/* ------[ Control interface handler ]-------------------------------------*/ - -/* passed as a function pointer to the lookup. */ -void send_kmsg(connection_t *c, void *arg) -{ - xcs_msg_t *msg = (xcs_msg_t *)arg; - - DPRINTF(" -> CONNECTION %d\n", c->data_fd); - if (c->data_fd > 0) - { - send(c->data_fd, msg, sizeof(xcs_msg_t), 0); - } -} - -int handle_ctrl_if(void) -{ - control_channel_t *cc; - control_msg_t *msg; - xcs_msg_t kmsg; - int chan, ret; - - DPRINTF("Event thread kicked!\n"); -again: - while ((chan = evtchn_read()) > 0) - { - evtchn_unmask(chan); - cc = cc_list[chan]; - if (cc_list[chan] == NULL) { - DPRINTF("event from unknown channel (%d)\n", chan); - continue; - } - - if ( cc_list[chan]->type == CC_TYPE_VIRQ ) - { - DPRINTF("UP:VIRQ: virq:%d port: %d\n", - cc->virq, cc->local_port); - kmsg.type = XCS_VIRQ; - kmsg.u.control.local_port = cc->local_port; - xcs_lookup(cc->local_port, TYPE_VIRQ, send_kmsg, &kmsg); - continue; - } - - while (ctrl_chan_request_to_read(cc)) - { - msg = &kmsg.u.control.msg; - kmsg.type = XCS_REQUEST; - kmsg.u.control.remote_dom = cc->remote_dom; - kmsg.u.control.local_port = cc->local_port; - ret = ctrl_chan_read_request(cc, &kmsg.u.control); - DPRINTF("UP:REQ: dom:%d port: %d type: %d len: %d\n", - cc->remote_dom, cc->local_port, - msg->type, msg->length); - if (ret == 0) - xcs_lookup(cc->local_port, msg->type, send_kmsg, &kmsg); - } - - while (ctrl_chan_response_to_read(cc)) - { - msg = &kmsg.u.control.msg; - kmsg.type = XCS_RESPONSE; - kmsg.u.control.remote_dom = cc->remote_dom; - kmsg.u.control.local_port = cc->local_port; - ret = ctrl_chan_read_response(cc, &kmsg.u.control); - DPRINTF("UP:RSP: dom:%d port: %d type: %d len: %d\n", - cc->remote_dom, cc->local_port, - msg->type, msg->length); - if (ret == 0) - xcs_lookup(cc->local_port, msg->type, send_kmsg, &kmsg); - } - } - - if (chan == -EINTR) - goto again; - - return chan; -} - - -/* ------[ Main xcs code / big select loop ]-------------------------------*/ - - -typedef struct unbound_fd_st { - int fd; - struct timeval born; - struct unbound_fd_st *next; -} unbound_fd_t; - -/* This makes ufd point to the next entry in the list, so need to * - * break/continue if called while iterating. */ -void delete_ufd(unbound_fd_t **ufd) -{ - unbound_fd_t *del_ufd; - - del_ufd = *ufd; - *ufd = (*ufd)->next; - free( del_ufd ); -} - -void gc_ufd_list( unbound_fd_t **ufd ) -{ - struct timeval now, delta; - - gettimeofday(&now, NULL); - - while ( *ufd != NULL ) - { - timersub(&now, &(*ufd)->born, &delta); - if (delta.tv_sec > XCS_UFD_TIMEOUT) - { - DPRINTF("GC-UFD: closing fd: %d\n", (*ufd)->fd); - close((*ufd)->fd); - delete_ufd(ufd); - continue; - } - ufd = &(*ufd)->next; - } -} - -void daemonize_xcs(void) -{ - - /* detach from our controlling tty so that a shell does hang waiting for - stopped jobs. */ - - pid_t pid = fork(); - int fd; - - if (pid == -1) { - perror("fork()"); - } else if (pid) { - exit(0); - } - - fd = open("/var/log/xcs.log", O_WRONLY | O_APPEND | O_CREAT); - if ( fd == -1 ) { - fprintf(stderr, "xcs couldn't open logfile. Directing all output to " - "/dev/null instead.\n"); - fd = open("/dev/null", O_WRONLY); - } - - setsid(); - close(2); - close(1); - close(0); - dup(fd); - dup(fd); -} - - -static char *pidfilename = NULL; -void cleanup(int sig) -{ - /* throw away our pidfile if we created one. */ - if ( pidfilename != NULL ) - unlink(pidfilename); - exit(0); -} - -int main (int argc, char *argv[]) -{ - int listen_fd, evtchn_fd; - unbound_fd_t *unbound_fd_list = NULL, **ufd; - struct timeval timeout = { XCS_GC_INTERVAL, 0 }; - connection_t **con; - int c, daemonize; - FILE *pidfile; - struct stat s; - - daemonize = 1; - pidfile = NULL; - - signal(SIGHUP, cleanup); - signal(SIGTERM, cleanup); - signal(SIGINT, cleanup); - - /* Do a bunch of stuff before potentially daemonizing so we can - * print error messages sanely before redirecting output. */ - - /* Initialize xc and event connections. */ - if (ctrl_chan_init() != 0) - { - printf("Couldn't open conneciton to libxc.\n"); - exit(-1); - } - - if ((evtchn_fd = evtchn_open()) < 0) - { - printf("Couldn't open event channel driver interface.\n"); - exit(-1); - } - - /* Bind listen_fd to the client socket. */ - listen_fd = listen_socket(XCS_SUN_PATH); - - while ((c = getopt (argc, argv, "ip:")) != -1) - { - switch (c) - { - case 'i': /* interactive */ - daemonize = 0; - break; - case 'p': /* pid file */ - pidfilename = optarg; - break; - case '?': - if (isprint (optopt)) - fprintf (stderr, "Unknown option `-%c'.\n", optopt); - else - fprintf (stderr, - "Bad option character `\\x%x'.\n", optopt); - break; - } - } - - if ( pidfilename != NULL ) - { - if ( stat(pidfilename, &s) == 0 ) - { - fprintf(stderr, "Thre specified pid file (%s) already exists.\n" - "Is another instance of xcs running?\n", pidfilename); - exit(-1); - } - - pidfile = fopen(pidfilename, "w"); - if (pidfile == NULL) - { - fprintf(stderr, "Error openning pidfile (%s).\n", pidfilename); - exit(-1); - } - } - - if (daemonize == 1) - daemonize_xcs(); - - if (pidfile != NULL) - { - fprintf(pidfile, "%d", getpid()); - fclose(pidfile); - } - - - /* Initialize control interfaces, bindings. */ - init_interfaces(); - init_bindings(); - - - for (;;) - { - int n = 0, ret; - fd_set rd, wr, er; - FD_ZERO ( &rd ); - FD_ZERO ( &wr ); - FD_ZERO ( &er ); - - /* TCP listen fd: */ - FD_SET ( listen_fd, &rd ); - n = fd_max ( n, listen_fd ); - - /* Evtchn fd: */ - FD_SET ( evtchn_fd, &rd ); - n = fd_max ( n, evtchn_fd ); - - /* unbound connection fds: */ - ufd = &unbound_fd_list; - while ((*ufd) != NULL) - { - FD_SET ( (*ufd)->fd, &rd ); - n = fd_max ( n, (*ufd)->fd ); - ufd = &(*ufd)->next; - } - - /* control and data fds: */ - con = &connection_list; - while ((*con) != NULL) - { - if ((*con)->ctrl_fd > 0) - { - FD_SET ( (*con)->ctrl_fd, &rd ); - n = fd_max ( n, (*con)->ctrl_fd ); - } - if ((*con)->data_fd > 0) - { - FD_SET ( (*con)->data_fd, &rd ); - n = fd_max ( n, (*con)->data_fd ); - } - con = &(*con)->next; - } - - ret = select ( n + 1, &rd, &wr, &er, &timeout ); - - if ( (timeout.tv_sec == 0) && (timeout.tv_usec == 0) ) - { - gc_ufd_list(&unbound_fd_list); - gc_connection_list(); - timeout.tv_sec = XCS_GC_INTERVAL; - } - - if ( (ret == -1) && (errno == EINTR) ) - continue; - if ( ret < 0 ) - { - perror ("select()"); - exit(-1); - } - - /* CASE 1: Events arriving on /dev/evtchn. */ - - if ( FD_ISSET (evtchn_fd, &rd )) - handle_ctrl_if(); - - /* CASE 2: New connection on the listen port. */ - if ( FD_ISSET ( listen_fd, &rd )) - { - struct sockaddr_un remote_addr; - int size; - memset (&remote_addr, 0, sizeof (remote_addr)); - size = sizeof remote_addr; - ret = accept(listen_fd, (struct sockaddr *)&remote_addr, (socklen_t *)&size); - if ( ret < 0 ) - { - perror("accept()"); - } else { - unbound_fd_t *new_ufd; - - new_ufd = (unbound_fd_t *)malloc(sizeof(*new_ufd)); - - if (new_ufd != NULL) - { - gettimeofday(&new_ufd->born, NULL); - new_ufd->fd = ret; - new_ufd->next = unbound_fd_list; - unbound_fd_list = new_ufd; - } else { - perror("malloc unbound connection"); - close(ret); - } - } - } - - /* CASE 3a: Handle messages on control connections. */ - - con = &connection_list; - while ( *con != NULL ) - { - if ( ((*con)->ctrl_fd > 0) && (FD_ISSET((*con)->ctrl_fd, &rd)) ) - { - xcs_msg_t msg; - memset (&msg, 0, sizeof(msg)); - ret = read( (*con)->ctrl_fd, &msg, sizeof(msg) ); - - if ( ret < 0 ) - { - perror("reading ctrl fd."); - } else if ( ret == 0 ) - { - DPRINTF("Control connection dropped.\n"); - close ( (*con)->ctrl_fd ); - (*con)->ctrl_fd = -1; - gettimeofday(&(*con)->disconnect_time, NULL); - } else - { - if ( ret != sizeof(msg) ) - { - DPRINTF("Unexpected frame size!\n"); - continue; - } - - ret = handle_control_message( *con, &msg ); - - if ( ret == 1 ) - send( (*con)->ctrl_fd, &msg, sizeof(msg), 0 ); - } - } - con = &(*con)->next; - } - - /* CASE 3b: Handle messages on data connections. */ - - con = &connection_list; - while ( *con != NULL ) - { - if ( ((*con)->data_fd > 0) && (FD_ISSET((*con)->data_fd, &rd)) ) - { - xcs_msg_t msg; - memset (&msg, 0, sizeof(msg)); - ret = read( (*con)->data_fd, &msg, sizeof(msg) ); - - if ( ret < 0 ) - { - perror("reading data fd."); - } else if ( ret == 0 ) - { - DPRINTF("Data connection dropped.\n"); - close ( (*con)->data_fd ); - (*con)->data_fd = -1; - gettimeofday(&(*con)->disconnect_time, NULL); - } else - { - if ( ret != sizeof(msg) ) - { - DPRINTF("Unexpected frame size!\n"); - continue; - } - - handle_data_message( *con, &msg ); - } - } - con = &(*con)->next; - } - - /* CASE 3c: Handle messages arriving on unbound connections. */ - ufd = &unbound_fd_list; - while ((*ufd) != NULL) - { - if ( FD_ISSET( (*ufd)->fd, &rd ) ) - { - xcs_msg_t msg; - memset (&msg, 0, sizeof(msg)); - ret = read( (*ufd)->fd, &msg, sizeof(msg) ); - - if ( ret == 0 ) - { - close ( (*ufd)->fd ); - delete_ufd(ufd); - continue; /* we just advanced ufd */ - } else { - if ( ret != sizeof(msg) ) - { - DPRINTF("Unexpected frame size!\n"); - continue; - } - - ret = handle_connect_msg( &msg, (*ufd)->fd ); - - if ( (ret == CONNECTED) || (ret == NO_CHANGE) ) - send( (*ufd)->fd, &msg, sizeof(msg), 0 ); - - if ( (ret = CONNECTED) || (ret = DISCONNECTED) ) - { - delete_ufd( ufd ); - continue; - } - } - } - ufd = &(*ufd)->next; - } - } -} - diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/xcs.h --- a/tools/xcs/xcs.h Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,148 +0,0 @@ -/* xcs.h - * - * public interfaces for the control interface switch (xcs). - * - * (c) 2004, Andrew Warfield - * - */ - - -#ifndef __XCS_H__ -#define __XCS_H__ - -#include <pthread.h> -#include <xenctrl.h> -#include <xen/xen.h> -#include <xen/io/domain_controller.h> -#include <xen/linux/privcmd.h> -#include <sys/time.h> -#include "xcs_proto.h" - -/* ------[ Debug macros ]--------------------------------------------------*/ - -#if 0 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) -#else -#define DPRINTF(_f, _a...) ((void)0) -#endif - -/* ------[ XCS-specific defines and types ]--------------------------------*/ - -#define MAX_DOMS 1024 -#define XCS_SESSION_TIMEOUT 10 /* (secs) disconnected session gc timeout */ -#define XCS_UFD_TIMEOUT 5 /* how long can connections be unbound? */ -#define XCS_GC_INTERVAL 5 /* How often to run gc handlers. */ - - -/* ------[ Other required defines ]----------------------------------------*/ - -/* Size of a machine page frame. */ -#define PAGE_SIZE XC_PAGE_SIZE - -#ifndef timersub /* XOPEN and __BSD don't cooperate well... */ -#define timersub(a, b, result) \ - do { \ - (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ - (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ - if ((result)->tv_usec < 0) { \ - --(result)->tv_sec; \ - (result)->tv_usec += 1000000; \ - } \ - } while (0) -#endif /*timersub*/ - -/* ------[ Bindings Interface ]--------------------------------------------*/ - -/*forward declare connection_t */ -typedef struct connection_st connection_t; - -typedef struct { - int port; - u16 type; -} binding_key_t; - -typedef struct binding_key_ent_st { - binding_key_t key; - struct binding_key_ent_st *next; -} binding_key_ent_t; - -#define BINDING_KEYS_EQUAL(_k1, _k2) \ - (((_k1)->port == (_k2)->port) && ((_k1)->type == (_k2)->type)) - -int xcs_bind(connection_t *con, int port, u16 type); -int xcs_unbind(connection_t *con, int port, u16 type); -void xcs_lookup(int port, u16 type, void (*f)(connection_t *, void *), - void *arg); -void init_bindings(void); - -/* ------[ Connection Interface ]------------------------------------------*/ - -struct connection_st { - unsigned long id; /* Unique session id */ - int ctrl_fd; /* TCP descriptors */ - int data_fd; /* */ - binding_key_ent_t *bindings; /* List of bindings */ - connection_t *next; /* Linked list of connections */ - struct timeval disconnect_time; /* " " */ -}; /* previously typedefed as connection_t */ - - -extern connection_t *connection_list; - -connection_t *get_con_by_session(unsigned long session_id); -connection_t *connection_new(); -void connection_free(connection_t *con); -int connection_add_binding(connection_t *con, binding_key_t *key); -int connection_remove_binding(connection_t *con, binding_key_t *key); -int connection_has_binding(connection_t *con, binding_key_t *key); -void gc_connection_list(void); - -/* ------[ Control Channel Interfaces ]------------------------------------*/ - -typedef struct { - int connected; - int ref_count; - int type; - u32 remote_dom; - int local_port; - int remote_port; - control_if_t *interface; - ctrl_back_ring_t tx_ring; - ctrl_front_ring_t rx_ring; - int virq; -} control_channel_t; - -/* cc types that we care about */ -#define CC_TYPE_INTERDOMAIN 0 -#define CC_TYPE_VIRQ 1 - -control_channel_t - *ctrl_chan_new(u32 dom, int local_port, int remote_port); -void ctrl_chan_free(control_channel_t *cc); -int ctrl_chan_init(void); -int ctrl_chan_notify(control_channel_t *cc); -int ctrl_chan_read_request(control_channel_t *cc, xcs_control_msg_t *); -int ctrl_chan_write_request(control_channel_t *cc, - xcs_control_msg_t *smsg); -int ctrl_chan_read_response(control_channel_t *cc, xcs_control_msg_t *); -int ctrl_chan_write_response(control_channel_t *cc, - xcs_control_msg_t *smsg); -int ctrl_chan_request_to_read(control_channel_t *cc); -int ctrl_chan_space_to_write_request(control_channel_t *cc); -int ctrl_chan_response_to_read(control_channel_t *cc); -int ctrl_chan_space_to_write_response(control_channel_t *cc); -int ctrl_chan_connect(control_channel_t *cc); -void ctrl_chan_disconnect(control_channel_t *cc); -int ctrl_chan_bind_virq(int virq, int *port); - -/* ------[ Event notification interfaces ]---------------------------------*/ - - -int evtchn_open(void); -void evtchn_close(); -int evtchn_bind(int idx); -int evtchn_unbind(int idx); -void evtchn_unmask(u16 idx); -int evtchn_read(); - -#endif /* __XCS_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/xcs_proto.h --- a/tools/xcs/xcs_proto.h Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,101 +0,0 @@ -/* xcs_proto.h - * - * protocol interfaces for the control interface switch (xcs). - * - * (c) 2004, Andrew Warfield - * - */ - -#ifndef __XCS_PROTO_H__ -#define __XCS_PROTO_H__ - -#define XCS_SUN_PATH "/var/lib/xen/xcs_socket" - -/* xcs message types: */ -#define XCS_CONNECT_CTRL 0 /* This is a control connection. */ -#define XCS_CONNECT_DATA 1 /* This is a data connection. */ -#define XCS_CONNECT_BYE 2 /* Terminate a session. */ -#define XCS_MSG_BIND 3 /* Register for a message type. */ -#define XCS_MSG_UNBIND 4 /* Unregister for a message type. */ -#define XCS_VIRQ_BIND 5 /* Register for a virq. */ -#define XCS_MSG_WRITELOCK 6 /* Writelock a (dom,type) pair. */ -#define XCS_CIF_NEW_CC 7 /* Create a new control channel. */ -#define XCS_CIF_FREE_CC 8 /* Create a new control channel. */ -#define XCS_REQUEST 9 /* This is a request message. */ -#define XCS_RESPONSE 10 /* this is a response Message. */ -#define XCS_VIRQ 11 /* this is a virq notification. */ - -/* xcs result values: */ -#define XCS_RSLT_OK 0 -#define XCS_RSLT_FAILED 1 /* something bad happened. */ -#define XCS_RSLT_ARECONNECTED 2 /* attempt to over connect. */ -#define XCS_RSLT_BADSESSION 3 /* request for unknown session id. */ -#define XCS_RSLT_NOSESSION 4 /* tried to do something before NEW. */ -#define XCS_RSLT_CONINUSE 5 /* Requested connection is taken. */ -#define XCS_RSLT_BADREQUEST 6 /* Request message didn't validate. */ - -/* Binding wildcards */ -#define PORT_WILDCARD 0xefffffff -#define TYPE_WILDCARD 0xffff -#define TYPE_VIRQ 0xfffe - -typedef struct { - unsigned long session_id; -} xcs_connect_msg_t; - -typedef struct { - int port; - u16 type; -} xcs_bind_msg_t; - -typedef struct { - int port; - u16 virq; -} xcs_virq_msg_t; - -typedef struct { - u32 dom; - int local_port; - int remote_port; -} xcs_interface_msg_t; - -typedef struct { - u32 remote_dom; - int local_port; - control_msg_t msg; -} xcs_control_msg_t; - -typedef struct { - u32 type; - u32 result; - union { - xcs_connect_msg_t connect; /* These are xcs ctrl message types */ - xcs_bind_msg_t bind; - xcs_virq_msg_t virq; - xcs_interface_msg_t interface; - - xcs_control_msg_t control; /* These are xcs data message types */ - } u; -} xcs_msg_t; - -/* message validation macros. */ -#define PORT_VALID(_p) \ - ( (((_p) >= 0) && ((_p) < NR_EVENT_CHANNELS)) \ - || ((_p) == PORT_WILDCARD) ) - -#define TYPE_VALID(_t) \ - ( ((_t) < 256) \ - || ((_t) == TYPE_VIRQ) \ - || ((_t) == TYPE_WILDCARD) ) - -#define BIND_MSG_VALID(_b) \ - ( PORT_VALID((_b)->port) && TYPE_VALID((_b)->type) ) - -/* Port is overwritten, and we don't currently validate the requested virq. */ -#define VIRQ_MSG_VALID(_v) ( 1 ) - -/* Interfaces may return with ports of -1, but may not be requested as such */ -#define INTERFACE_MSG_VALID(_i) \ - ( PORT_VALID((_i)->local_port) && PORT_VALID((_i)->remote_port) ) - -#endif /* __XCS_PROTO_H__ */ diff -r 10b1d30d3f66 -r b2f4823b6ff0 tools/xcs/xcsdump.c --- a/tools/xcs/xcsdump.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,206 +0,0 @@ -/* xcsdump.c - * - * little tool to sniff control messages. - * - * Copyright (c) 2004, Andrew Warfield - * - * Modifications by Anthony Liguori <aliguori@xxxxxxxxxx> are: - * Copyright (C) 2005, International Business Machines, Corp. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/un.h> -#include <ctype.h> -#include <xenctrl.h> -#include <xen/xen.h> -#include <xen/io/domain_controller.h> -#include <getopt.h> -#include "xcs_proto.h" -#include "xcs.h" - -#include "dump.h" - -static int xcs_ctrl_fd = -1; /* connection to the xcs server. */ -static int xcs_data_fd = -1; /* connection to the xcs server. */ - -int sock_connect(char *path) -{ - struct sockaddr_un addr; - int ret, len, fd; - - fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (fd < 0) - { - printf("error creating xcs socket!\n"); - return -1; - } - - addr.sun_family = AF_UNIX; - strcpy(addr.sun_path, path); - len = sizeof(addr.sun_family) + strlen(addr.sun_path) + 1; - - ret = connect(fd, (struct sockaddr *)&addr, len); - if (ret < 0) - { - printf("error connecting to xcs!\n"); - return -1; - } - - return fd; -} - -void sock_disconnect(int *fd) -{ - close(*fd); - *fd = -1; -} - -void xcs_read(int fd, xcs_msg_t *msg) -{ - int ret; - - ret = read(fd, msg, sizeof(xcs_msg_t)); - if (ret != sizeof(xcs_msg_t)) { - printf("read error\n"); - exit(-1); - } -} - -void xcs_send(int fd, xcs_msg_t *msg) -{ - int ret; - - ret = send(fd, msg, sizeof(xcs_msg_t), 0); - if (ret != sizeof(xcs_msg_t) ) - { - printf("send error\n"); - exit(-1); - } -} - - -int main(int argc, char* argv[]) -{ - int ret; - xcs_msg_t msg; - control_msg_t *cmsg; - int verbose = 0; - int ch; - - while ((ch = getopt(argc, argv, "hv:")) != -1) - { - switch (ch) - { - case 'v': - verbose = atoi(optarg); - break; - case 'h': - printf("Usage: %s [-v FLAGS]\n" -"Displays XCS control message traffic.\n" -"\n" -"FLAGS is a bitmask where each bit (numbering starts from LSB) represents\n" -"whether to display a particular message type.\n" -"\n" -"For example, -v 1022 will display all messages except for console messages.\n" - , argv[0]); - exit(0); - break; - } - } - - ret = sock_connect(XCS_SUN_PATH); - if (ret < 0) - { - printf("connect failed!\n"); - exit(-1); - } - xcs_ctrl_fd = ret; - - memset(&msg, 0, sizeof(msg)); - msg.type = XCS_CONNECT_CTRL; - xcs_send(xcs_ctrl_fd, &msg); - xcs_read(xcs_ctrl_fd, &msg); - if (msg.result != XCS_RSLT_OK) - { - printf("Error connecting control channel\n"); - exit(-1); - } - - ret = sock_connect(XCS_SUN_PATH); - if (ret < 0) - { - printf("connect failed!\n"); - exit(-1); - } - xcs_data_fd = ret; - - msg.type = XCS_CONNECT_DATA; - /* session id is set from before... */ - xcs_send(xcs_data_fd, &msg); - xcs_read(xcs_data_fd, &msg); - if (msg.result != XCS_RSLT_OK) - { - printf("Error connecting data channel\n"); - exit(-1); - } - - msg.type = XCS_MSG_BIND; - msg.u.bind.port = PORT_WILDCARD; - msg.u.bind.type = TYPE_WILDCARD; - xcs_send(xcs_ctrl_fd, &msg); - xcs_read(xcs_ctrl_fd, &msg); - if (msg.result != XCS_RSLT_OK) - { - printf("Error binding.\n"); - exit(-1); - } - - - while (1) - { - xcs_read(xcs_data_fd, &msg); - cmsg = &msg.u.control.msg; - - switch (msg.type) - { - case XCS_REQUEST: - if (!verbose || verbose & (1 << msg.u.control.msg.type)) - { - printf("[REQUEST ] : (dom:%u port:%d) (type:(%d,%d) len %d)\n", - msg.u.control.remote_dom, - msg.u.control.local_port, - msg.u.control.msg.type, - msg.u.control.msg.subtype, - msg.u.control.msg.length); - - dump_msg(cmsg, verbose); - } - break; - case XCS_RESPONSE: - if (!verbose || verbose & (1 << msg.u.control.msg.type)) - { - printf("[RESPONSE] : (dom:%u port:%d) (type:(%d,%d) len %d)\n", - msg.u.control.remote_dom, - msg.u.control.local_port, - msg.u.control.msg.type, - msg.u.control.msg.subtype, - msg.u.control.msg.length); - - dump_msg(cmsg, verbose); - } - break; - case XCS_VIRQ: - printf("[VIRQ ] : %d\n", msg.u.control.local_port); - break; - default: - printf("[UNKNOWN ] : %d\n", msg.type); - } - } - - return(0); -} diff -r 10b1d30d3f66 -r b2f4823b6ff0 xen/common/dom_mem_ops.c --- a/xen/common/dom_mem_ops.c Thu Sep 8 15:18:40 2005 +++ /dev/null Fri Sep 9 16:30:54 2005 @@ -1,186 +0,0 @@ -/****************************************************************************** - * dom_mem_ops.c - * - * Code to handle memory related requests from domains eg. balloon driver. - * - * Copyright (c) 2003-2004, B Dragovic & K A Fraser. - */ - -#include <xen/config.h> -#include <xen/types.h> -#include <xen/lib.h> -#include <xen/mm.h> -#include <xen/perfc.h> -#include <xen/sched.h> -#include <xen/event.h> -#include <xen/shadow.h> -#include <asm/current.h> -#include <asm/hardirq.h> - -/* - * To allow safe resume of do_dom_mem_op() after preemption, we need to know - * at what point in the page list to resume. For this purpose I steal the - * high-order bits of the @op parameter, which are otherwise unused and zero. - */ -#define START_EXTENT_SHIFT 4 /* op[:4] == start_extent */ - -#define PREEMPT_CHECK(_op) \ - if ( hypercall_preempt_check() ) \ - return hypercall5_create_continuation( \ - __HYPERVISOR_dom_mem_op, \ - (_op) | (i << START_EXTENT_SHIFT), \ - extent_list, nr_extents, extent_order, \ - (d == current->domain) ? DOMID_SELF : d->domain_id); - -static long -alloc_dom_mem(struct domain *d, - unsigned long *extent_list, - unsigned long start_extent, - unsigned int nr_extents, - unsigned int extent_order, - unsigned int flags) -{ - struct pfn_info *page; - unsigned long i; - - if ( (extent_list != NULL) && - !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) ) - return start_extent; - - if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current->domain) ) - { - DPRINTK("Only I/O-capable domains may allocate > order-0 memory.\n"); - return start_extent; - } - - for ( i = start_extent; i < nr_extents; i++ ) - { - PREEMPT_CHECK(MEMOP_increase_reservation); - - if ( unlikely((page = alloc_domheap_pages(d, extent_order, - flags)) == NULL) ) - { - DPRINTK("Could not allocate a frame\n"); - return i; - } - - /* Inform the domain of the new page's machine address. */ - if ( (extent_list != NULL) && - (__put_user(page_to_pfn(page), &extent_list[i]) != 0) ) - return i; - } - - return i; -} - -static long -free_dom_mem(struct domain *d, - unsigned long *extent_list, - unsigned long start_extent, - unsigned int nr_extents, - unsigned int extent_order) -{ - struct pfn_info *page; - unsigned long i, j, mpfn; - - if ( !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) ) - return start_extent; - - for ( i = start_extent; i < nr_extents; i++ ) - { - PREEMPT_CHECK(MEMOP_decrease_reservation); - - if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) ) - return i; - - for ( j = 0; j < (1 << extent_order); j++ ) - { - if ( unlikely((mpfn + j) >= max_page) ) - { - DPRINTK("Domain %u page number out of range (%lx >= %lx)\n", - d->domain_id, mpfn + j, max_page); - return i; - } - - page = &frame_table[mpfn + j]; - if ( unlikely(!get_page(page, d)) ) - { - DPRINTK("Bad page free for domain %u\n", d->domain_id); - return i; - } - - if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) - put_page_and_type(page); - - if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) - put_page(page); - - shadow_sync_and_drop_references(d, page); - - put_page(page); - } - } - - return i; -} - -long -do_dom_mem_op(unsigned long op, - unsigned long *extent_list, - unsigned int nr_extents, - unsigned int extent_order, - domid_t domid) -{ - struct domain *d; - unsigned long rc, start_extent; - unsigned int address_bits_order; - - /* Extract @start_extent from @op. */ - start_extent = op >> START_EXTENT_SHIFT; - op &= (1 << START_EXTENT_SHIFT) - 1; - - /* seperate extent_order and address_bits_order */ - address_bits_order = (extent_order >> 8) & 0xff; - extent_order &= 0xff; - - if ( unlikely(start_extent > nr_extents) ) - return -EINVAL; - - if ( likely(domid == DOMID_SELF) ) - d = current->domain; - else if ( unlikely(!IS_PRIV(current->domain)) ) - return -EPERM; - else if ( unlikely((d = find_domain_by_id(domid)) == NULL) ) - return -ESRCH; - - switch ( op ) - { - case MEMOP_increase_reservation: - rc = alloc_dom_mem( - d, extent_list, start_extent, nr_extents, extent_order, - (address_bits_order <= 32) ? ALLOC_DOM_DMA : 0); - break; - case MEMOP_decrease_reservation: - rc = free_dom_mem( - d, extent_list, start_extent, nr_extents, extent_order); - break; - default: - rc = -ENOSYS; - break; - } - - if ( unlikely(domid != DOMID_SELF) ) - put_domain(d); - - return rc; -} - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |