[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID c9772105fead52abf64213aa1eda6419871acf94 # Parent 76bff6c996b0250739229181e40bc9c349f80c15 # Parent a08aef9f1c8e6f0350e89ee34ad6d3ba54027958 Merged. diff -r 76bff6c996b0 -r c9772105fead buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Thu Dec 8 15:04:31 2005 +++ b/buildconfigs/Rules.mk Thu Dec 8 15:04:41 2005 @@ -21,6 +21,7 @@ # Expand Linux series to Linux version LINUX_SERIES ?= 2.6 +LINUX_VER ?= $(shell grep "^LINUX_VER" buildconfigs/mk.linux-2.6-xen | sed -e 's/.*=[ ]*//') # Setup Linux search path LINUX_SRC_PATH ?= .:.. @@ -109,6 +110,13 @@ %-config: $(MAKE) -f buildconfigs/mk.$* config +linux-2.6-xen.patch: ref-linux-$(LINUX_VER)/.valid-ref + rm -rf tmp-$@ + cp -al $(<D) tmp-$@ + ( cd linux-2.6-xen-sparse && ./mkbuildtree ../tmp-$@ ) + diff -Nurp $(<D) tmp-$@ > $@ || true + rm -rf tmp-$@ + %-xen.patch: ref-%/.valid-ref rm -rf tmp-$@ cp -al $(<D) tmp-$@ diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/events.c --- a/extras/mini-os/events.c Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/events.c Thu Dec 8 15:04:41 2005 @@ -56,7 +56,7 @@ } -void bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) ) +int bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) ) { if(ev_actions[port].handler) printk("WARN: Handler for port %d already registered, replacing\n", @@ -67,6 +67,16 @@ /* Finally unmask the port */ unmask_evtchn(port); + + return port; +} + +void unbind_evtchn( u32 port ) +{ + if (!ev_actions[port].handler) + printk("WARN: No handler for port %d when unbinding\n", port); + ev_actions[port].handler = NULL; + ev_actions[port].status |= EVS_DISABLED; } int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) ) @@ -90,6 +100,10 @@ return ret; } +void unbind_virq( u32 port ) +{ + unbind_evtchn(port); +} /* diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/events.h --- a/extras/mini-os/include/events.h Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/include/events.h Thu Dec 8 15:04:41 2005 @@ -40,10 +40,12 @@ /* prototypes */ int do_event(u32 port, struct pt_regs *regs); int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) ); -void bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) ); +int bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) ); +void unbind_evtchn( u32 port ); void init_events(void); +void unbind_virq( u32 port ); -static inline int notify_via_evtchn(int port) +static inline int notify_remote_via_evtchn(int port) { evtchn_op_t op; op.cmd = EVTCHNOP_send; diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/os.h --- a/extras/mini-os/include/os.h Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/include/os.h Thu Dec 8 15:04:41 2005 @@ -131,9 +131,11 @@ #if defined(__i386__) #define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") #define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define wmb() __asm__ __volatile__ ("": : :"memory") #elif defined(__x86_64__) #define mb() __asm__ __volatile__ ("mfence":::"memory") #define rmb() __asm__ __volatile__ ("lfence":::"memory") +#define wmb() __asm__ __volatile__ ("sfence" ::: "memory") /* From CONFIG_UNORDERED_IO (linux) */ #endif diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/semaphore.h --- a/extras/mini-os/include/semaphore.h Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/include/semaphore.h Thu Dec 8 15:04:41 2005 @@ -2,6 +2,7 @@ #define _SEMAPHORE_H_ #include <wait.h> +#include <spinlock.h> /* * Implementation of semaphore in Mini-os is simple, because @@ -14,6 +15,15 @@ struct wait_queue_head wait; }; +/* + * the semaphore definition + */ +struct rw_semaphore { + signed long count; + spinlock_t wait_lock; + struct list_head wait_list; + int debug; +}; #define __SEMAPHORE_INITIALIZER(name, n) \ { \ @@ -31,6 +41,12 @@ #define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) +static inline void init_MUTEX(struct semaphore *sem) +{ + sem->count = 1; + init_waitqueue_head(&sem->wait); +} + static void inline down(struct semaphore *sem) { wait_event(sem->wait, sem->count > 0); @@ -43,4 +59,27 @@ wake_up(&sem->wait); } +/* FIXME! Thre read/write semaphores are unimplemented! */ +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = 1; +} + +static inline void down_read(struct rw_semaphore *sem) +{ +} + + +static inline void up_read(struct rw_semaphore *sem) +{ +} + +static inline void up_write(struct rw_semaphore *sem) +{ +} + +static inline void down_write(struct rw_semaphore *sem) +{ +} + #endif /* _SEMAPHORE_H */ diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/wait.h --- a/extras/mini-os/include/wait.h Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/include/wait.h Thu Dec 8 15:04:41 2005 @@ -33,6 +33,10 @@ } +static inline void init_waitqueue_head(struct wait_queue_head *h) +{ + INIT_LIST_HEAD(&h->thread_list); +} static inline void init_waitqueue_entry(struct wait_queue *q, struct thread *thread) { diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/xenbus.h --- a/extras/mini-os/include/xenbus.h Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/include/xenbus.h Thu Dec 8 15:04:41 2005 @@ -4,6 +4,7 @@ * Talks to Xen Store to figure out what devices we have. * * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -30,45 +31,98 @@ #ifndef _ASM_XEN_XENBUS_H #define _ASM_XEN_XENBUS_H - -/* Caller must hold this lock to call these functions: it's also held - * across watch callbacks. */ -// TODO -//extern struct semaphore xenbus_lock; - -char **xenbus_directory(const char *dir, const char *node, unsigned int *num); -void *xenbus_read(const char *dir, const char *node, unsigned int *len); -int xenbus_write(const char *dir, const char *node, - const char *string, int createflags); -int xenbus_mkdir(const char *dir, const char *node); -int xenbus_exists(const char *dir, const char *node); -int xenbus_rm(const char *dir, const char *node); -int xenbus_transaction_start(const char *subtree); -int xenbus_transaction_end(int abort); - -/* Single read and scanf: returns -errno or num scanned if > 0. */ -int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(scanf, 3, 4))); - -/* Single printf and write: returns -errno or 0. */ -int xenbus_printf(const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(printf, 3, 4))); - -/* Generic read function: NULL-terminated triples of name, - * sprintf-style type string, and pointer. Returns 0 or errno.*/ -int xenbus_gather(const char *dir, ...); +#include <errno.h> +#include <xen/io/xenbus.h> +#include <xen/io/xs_wire.h> /* Register callback to watch this node. */ struct xenbus_watch { struct list_head list; - char *node; - void (*callback)(struct xenbus_watch *, const char *node); -}; + + /* Path being watched. */ + const char *node; + + /* Callback (executed in a process context with no locks held). */ + void (*callback)(struct xenbus_watch *, + const char **vec, unsigned int len); +}; + + +/* A xenbus device. */ +struct xenbus_device { + const char *devicetype; + const char *nodename; + const char *otherend; + int otherend_id; + struct xenbus_watch otherend_watch; + int has_error; + void *data; +}; + +struct xenbus_device_id +{ + /* .../device/<device_type>/<identifier> */ + char devicetype[32]; /* General class of device. */ +}; + +/* A xenbus driver. */ +struct xenbus_driver { + char *name; + struct module *owner; + const struct xenbus_device_id *ids; + int (*probe)(struct xenbus_device *dev, + const struct xenbus_device_id *id); + void (*otherend_changed)(struct xenbus_device *dev, + XenbusState backend_state); + int (*remove)(struct xenbus_device *dev); + int (*suspend)(struct xenbus_device *dev); + int (*resume)(struct xenbus_device *dev); + int (*hotplug)(struct xenbus_device *, char **, int, char *, int); + int (*read_otherend_details)(struct xenbus_device *dev); +}; + +int xenbus_register_frontend(struct xenbus_driver *drv); +int xenbus_register_backend(struct xenbus_driver *drv); +void xenbus_unregister_driver(struct xenbus_driver *drv); + +struct xenbus_transaction; + +char **xenbus_directory(struct xenbus_transaction *t, + const char *dir, const char *node, unsigned int *num); +void *xenbus_read(struct xenbus_transaction *t, + const char *dir, const char *node, unsigned int *len); +int xenbus_write(struct xenbus_transaction *t, + const char *dir, const char *node, const char *string); +int xenbus_mkdir(struct xenbus_transaction *t, + const char *dir, const char *node); +int xenbus_exists(struct xenbus_transaction *t, + const char *dir, const char *node); +int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node); +struct xenbus_transaction *xenbus_transaction_start(void); +int xenbus_transaction_end(struct xenbus_transaction *t, int abort); + +/* Single read and scanf: returns -errno or num scanned if > 0. */ +int xenbus_scanf(struct xenbus_transaction *t, + const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(scanf, 4, 5))); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(struct xenbus_transaction *t, + const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(printf, 4, 5))); + +/* Generic read function: NULL-terminated triples of name, + * sprintf-style type string, and pointer. Returns 0 or errno.*/ +int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...); int register_xenbus_watch(struct xenbus_watch *watch); void unregister_xenbus_watch(struct xenbus_watch *watch); -void reregister_xenbus_watches(void); +void xs_suspend(void); +void xs_resume(void); + +/* Used by xenbus_dev to borrow kernel's store connection. */ +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); /* Called from xen core code. */ void xenbus_suspend(void); @@ -84,6 +138,87 @@ #define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE) -int xs_init(void); + +/** + * Register a watch on the given path, using the given xenbus_watch structure + * for storage, and the given callback function as the callback. Return 0 on + * success, or -errno on error. On success, the given path will be saved as + * watch->node, and remains the caller's to free. On error, watch->node will + * be NULL, the device will switch to XenbusStateClosing, and the error will + * be saved in the store. + */ +int xenbus_watch_path(struct xenbus_device *dev, const char *path, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)); + + +/** + * Register a watch on the given path/path2, using the given xenbus_watch + * structure for storage, and the given callback function as the callback. + * Return 0 on success, or -errno on error. On success, the watched path + * (path/path2) will be saved as watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)); + + +/** + * Advertise in the store a change of the given driver to the given new_state. + * Perform the change inside the given transaction xbt. xbt may be NULL, in + * which case this is performed inside its own transaction. Return 0 on + * success, or -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(struct xenbus_device *dev, + struct xenbus_transaction *xbt, + XenbusState new_state); + + +/** + * Grant access to the given ring_mfn to the peer of the given device. Return + * 0 on success, or -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); + + +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or -errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); + + +/** + * Return the state of the driver rooted at the given store path, or + * XenbusStateClosed if no state can be read. + */ +XenbusState xenbus_read_driver_state(const char *path); + + +/*** + * Report the given negative errno into the store, along with the given + * formatted message. + */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, + ...); + + +/*** + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...); + #endif /* _ASM_XEN_XENBUS_H */ diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/xmalloc.h --- a/extras/mini-os/include/xmalloc.h Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/include/xmalloc.h Thu Dec 8 15:04:41 2005 @@ -6,6 +6,9 @@ /* Allocate space for array of typed objects. */ #define xmalloc_array(_type, _num) ((_type *)_xmalloc_array(sizeof(_type), __alignof__(_type), _num)) + +#define malloc(size) _xmalloc(size, 4) +#define free(ptr) xfree(ptr) /* Free any of the above. */ extern void xfree(const void *); diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/kernel.c Thu Dec 8 15:04:41 2005 @@ -35,6 +35,7 @@ #include <lib.h> #include <sched.h> #include <xenbus.h> +#include "xenbus/xenbus_comms.h" /* * Shared page for communicating with the hypervisor. diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_comms.c --- a/extras/mini-os/xenbus/xenbus_comms.c Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/xenbus/xenbus_comms.c Thu Dec 8 15:04:41 2005 @@ -33,35 +33,19 @@ #include <events.h> #include <os.h> #include <lib.h> +#include <xenbus.h> +#include "xenbus_comms.h" +static int xenbus_irq; -#ifdef XENBUS_COMMS_DEBUG -#define DEBUG(_f, _a...) \ - printk("MINI_OS(file=xenbus_comms.c, line=%d) " _f "\n", __LINE__, ## _a) -#else -#define DEBUG(_f, _a...) ((void)0) -#endif - - -#define RINGBUF_DATASIZE ((PAGE_SIZE / 2) - sizeof(struct ringbuf_head)) -struct ringbuf_head -{ - u32 write; /* Next place to write to */ - u32 read; /* Next place to read from */ - u8 flags; - char buf[0]; -} __attribute__((packed)); +extern void xenbus_probe(void *); +extern int xenstored_ready; DECLARE_WAIT_QUEUE_HEAD(xb_waitq); -static inline struct ringbuf_head *outbuf(void) +static inline struct xenstore_domain_interface *xenstore_domain_interface(void) { return mfn_to_virt(start_info.store_mfn); -} - -static inline struct ringbuf_head *inbuf(void) -{ - return (struct ringbuf_head *)((char *)mfn_to_virt(start_info.store_mfn) + PAGE_SIZE/2); } static void wake_waiting(int port, struct pt_regs *regs) @@ -69,138 +53,112 @@ wake_up(&xb_waitq); } -static int check_buffer(const struct ringbuf_head *h) +static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) { - return (h->write < RINGBUF_DATASIZE && h->read < RINGBUF_DATASIZE); + return ((prod - cons) <= XENSTORE_RING_SIZE); } -/* We can't fill last byte: would look like empty buffer. */ -static void *get_output_chunk(const struct ringbuf_head *h, - void *buf, u32 *len) +static void *get_output_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) { - u32 read_mark; - - if (h->read == 0) - read_mark = RINGBUF_DATASIZE - 1; - else - read_mark = h->read - 1; - - /* Here to the end of buffer, unless they haven't read some out. */ - *len = RINGBUF_DATASIZE - h->write; - if (read_mark >= h->write) - *len = read_mark - h->write; - return (void *)((char *)buf + h->write); + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return buf + MASK_XENSTORE_IDX(prod); } -static const void *get_input_chunk(const struct ringbuf_head *h, - const void *buf, u32 *len) +static const void *get_input_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) { - /* Here to the end of buffer, unless they haven't written some. */ - *len = RINGBUF_DATASIZE - h->read; - if (h->write >= h->read) - *len = h->write - h->read; - return (void *)((char *)buf + h->read); -} - -static void update_output_chunk(struct ringbuf_head *h, u32 len) -{ - h->write += len; - if (h->write == RINGBUF_DATASIZE) - h->write = 0; -} - -static void update_input_chunk(struct ringbuf_head *h, u32 len) -{ - h->read += len; - if (h->read == RINGBUF_DATASIZE) - h->read = 0; -} - -static int output_avail(struct ringbuf_head *out) -{ - unsigned int avail; - - get_output_chunk(out, out->buf, &avail); - return avail != 0; + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return buf + MASK_XENSTORE_IDX(cons); } int xb_write(const void *data, unsigned len) { - struct ringbuf_head h; - struct ringbuf_head *out = outbuf(); + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; - do { + while (len != 0) { void *dst; unsigned int avail; - wait_event(xb_waitq, output_avail(out)); + wait_event(xb_waitq, (intf->req_prod - intf->req_cons) != + XENSTORE_RING_SIZE); - /* Read, then check: not that we don't trust store. - * Hell, some of my best friends are daemons. But, - * in this post-911 world... */ - h = *out; + /* Read indexes, then verify. */ + cons = intf->req_cons; + prod = intf->req_prod; mb(); - if (!check_buffer(&h)) { - return -1; /* ETERRORIST! */ - } + if (!check_indexes(cons, prod)) + return -EIO; - dst = get_output_chunk(&h, out->buf, &avail); + dst = get_output_chunk(cons, prod, intf->req, &avail); + if (avail == 0) + continue; if (avail > len) avail = len; + memcpy(dst, data, avail); - data = (void *)((char *)data + avail); + data = (void*) ( (unsigned long)data + avail ); len -= avail; - update_output_chunk(out, avail); - notify_via_evtchn(start_info.store_evtchn); - } while (len != 0); + + /* Other side must not see new header until data is there. */ + wmb(); + intf->req_prod += avail; + + /* This implies mb() before other side sees interrupt. */ + notify_remote_via_evtchn(start_info.store_evtchn); + } return 0; } -int xs_input_avail(void) -{ - unsigned int avail; - struct ringbuf_head *in = inbuf(); - - get_input_chunk(in, in->buf, &avail); - return avail != 0; -} - int xb_read(void *data, unsigned len) { - struct ringbuf_head h; - struct ringbuf_head *in = inbuf(); - int was_full; + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; while (len != 0) { unsigned int avail; const char *src; - wait_event(xb_waitq, xs_input_avail()); - h = *in; + wait_event(xb_waitq, + intf->rsp_cons != intf->rsp_prod); + + /* Read indexes, then verify. */ + cons = intf->rsp_cons; + prod = intf->rsp_prod; mb(); - if (!check_buffer(&h)) { - return -1; - } + if (!check_indexes(cons, prod)) + return -EIO; - src = get_input_chunk(&h, in->buf, &avail); + src = get_input_chunk(cons, prod, intf->rsp, &avail); + if (avail == 0) + continue; if (avail > len) avail = len; - was_full = !output_avail(&h); + + /* We must read header before we read data. */ + rmb(); memcpy(data, src, avail); - data = (void *)((char *)data + avail); + data = (void*) ( (unsigned long)data + avail ); len -= avail; - update_input_chunk(in, avail); - DEBUG("Finished read of %i bytes (%i to go)\n", avail, len); - /* If it was full, tell them we've taken some. */ - if (was_full) - notify_via_evtchn(start_info.store_evtchn); + + /* Other side must not see free space until we've copied out */ + mb(); + intf->rsp_cons += avail; + + printk("Finished read of %i bytes (%i to go)\n", avail, len); + + /* Implies mb(): they will see new header. */ + notify_remote_via_evtchn(start_info.store_evtchn); } - - /* If we left something, wake watch thread to deal with it. */ - if (xs_input_avail()) - wake_up(&xb_waitq); return 0; } @@ -208,24 +166,19 @@ /* Set up interrupt handler off store event channel. */ int xb_init_comms(void) { - printk("Init xenbus comms, store event channel %d\n", start_info.store_evtchn); - if (!start_info.store_evtchn) - return 0; - printk("Binding virq\n"); - bind_evtchn(start_info.store_evtchn, &wake_waiting); + int err; - /* FIXME zero out page -- domain builder should probably do this*/ - memset(mfn_to_virt(start_info.store_mfn), 0, PAGE_SIZE); - notify_via_evtchn(start_info.store_evtchn); + if (xenbus_irq) + unbind_evtchn(xenbus_irq); + + err = bind_evtchn( + start_info.store_evtchn, wake_waiting); + if (err <= 0) { + printk("XENBUS request irq failed %i\n", err); + return err; + } + + xenbus_irq = err; + return 0; } - -void xb_suspend_comms(void) -{ - - if (!start_info.store_evtchn) - return; - - // TODO - //unbind_evtchn_from_irqhandler(xen_start_info.store_evtchn, &xb_waitq); -} diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_comms.h --- a/extras/mini-os/xenbus/xenbus_comms.h Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/xenbus/xenbus_comms.h Thu Dec 8 15:04:41 2005 @@ -28,8 +28,8 @@ #ifndef _XENBUS_COMMS_H #define _XENBUS_COMMS_H +int xs_init(void); int xb_init_comms(void); -void xb_suspend_comms(void); /* Low level routines. */ int xb_write(const void *data, unsigned len); diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/xenbus/xenbus_xs.c --- a/extras/mini-os/xenbus/xenbus_xs.c Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/xenbus/xenbus_xs.c Thu Dec 8 15:04:41 2005 @@ -39,15 +39,63 @@ #include <wait.h> #include <sched.h> #include <semaphore.h> +#include <spinlock.h> #include <xen/io/xs_wire.h> #include "xenbus_comms.h" #define streq(a, b) (strcmp((a), (b)) == 0) -static char printf_buffer[4096]; +struct xs_stored_msg { + struct list_head list; + + struct xsd_sockmsg hdr; + + union { + /* Queued replies. */ + struct { + char *body; + } reply; + + /* Queued watch events. */ + struct { + struct xenbus_watch *handle; + char **vec; + unsigned int vec_size; + } watch; + } u; +}; + +struct xs_handle { + /* A list of replies. Currently only one will ever be outstanding. */ + struct list_head reply_list; + spinlock_t reply_lock; + struct wait_queue_head reply_waitq; + + /* One request at a time. */ + struct semaphore request_mutex; + + /* Protect transactions against save/restore. */ + struct rw_semaphore suspend_mutex; +}; + +static struct xs_handle xs_state; + +/* List of registered watches, and a lock to protect it. */ static LIST_HEAD(watches); -//TODO -DECLARE_MUTEX(xenbus_lock); +static DEFINE_SPINLOCK(watches_lock); + +/* List of pending watch callback events, and a lock to protect it. */ +static LIST_HEAD(watch_events); +static DEFINE_SPINLOCK(watch_events_lock); + +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +/* static */ DECLARE_MUTEX(xenwatch_mutex); +static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); static int get_error(const char *errorstring) { @@ -65,47 +113,82 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) { - struct xsd_sockmsg msg; - void *ret; - int err; - - err = xb_read(&msg, sizeof(msg)); - if (err) - return ERR_PTR(err); - - ret = xmalloc_array(char, msg.len + 1); - if (!ret) - return ERR_PTR(-ENOMEM); - - err = xb_read(ret, msg.len); - if (err) { - xfree(ret); - return ERR_PTR(err); - } - ((char*)ret)[msg.len] = '\0'; - - *type = msg.type; + struct xs_stored_msg *msg; + char *body; + + spin_lock(&xs_state.reply_lock); + + while (list_empty(&xs_state.reply_list)) { + spin_unlock(&xs_state.reply_lock); + wait_event(xs_state.reply_waitq, + !list_empty(&xs_state.reply_list)); + spin_lock(&xs_state.reply_lock); + } + + msg = list_entry(xs_state.reply_list.next, + struct xs_stored_msg, list); + list_del(&msg->list); + + spin_unlock(&xs_state.reply_lock); + + *type = msg->hdr.type; if (len) - *len = msg.len; - return ret; + *len = msg->hdr.len; + body = msg->u.reply.body; + + free(msg); + + return body; } /* Emergency write. */ void xenbus_debug_write(const char *str, unsigned int count) { - struct xsd_sockmsg msg; + struct xsd_sockmsg msg = { 0 }; msg.type = XS_DEBUG; msg.len = sizeof("print") + count + 1; + down(&xs_state.request_mutex); xb_write(&msg, sizeof(msg)); xb_write("print", sizeof("print")); xb_write(str, count); xb_write("", 1); + up(&xs_state.request_mutex); +} + +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) +{ + void *ret; + struct xsd_sockmsg req_msg = *msg; + int err; + + if (req_msg.type == XS_TRANSACTION_START) + down_read(&xs_state.suspend_mutex); + + down(&xs_state.request_mutex); + + err = xb_write(msg, sizeof(*msg) + msg->len); + if (err) { + msg->type = XS_ERROR; + ret = ERR_PTR(err); + } else { + ret = read_reply(&msg->type, &msg->len); + } + + up(&xs_state.request_mutex); + + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) + up_read(&xs_state.suspend_mutex); + + return ret; } /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ -static void *xs_talkv(enum xsd_sockmsg_type type, +static void *xs_talkv(struct xenbus_transaction *t, + enum xsd_sockmsg_type type, const struct kvec *iovec, unsigned int num_vecs, unsigned int *len) @@ -115,51 +198,57 @@ unsigned int i; int err; - //WARN_ON(down_trylock(&xenbus_lock) == 0); - + msg.tx_id = (u32)(unsigned long)t; + msg.req_id = 0; msg.type = type; msg.len = 0; for (i = 0; i < num_vecs; i++) msg.len += iovec[i].iov_len; + down(&xs_state.request_mutex); + err = xb_write(&msg, sizeof(msg)); - if (err) + if (err) { + up(&xs_state.request_mutex); return ERR_PTR(err); + } for (i = 0; i < num_vecs; i++) { - err = xb_write(iovec[i].iov_base, iovec[i].iov_len); - if (err) + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; + if (err) { + up(&xs_state.request_mutex); return ERR_PTR(err); - } - - /* Watches can have fired before reply comes: daemon detects - * and re-transmits, so we can ignore this. */ - do { - xfree(ret); - ret = read_reply(&msg.type, len); - if (IS_ERR(ret)) - return ret; - } while (msg.type == XS_WATCH_EVENT); + } + } + + ret = read_reply(&msg.type, len); + + up(&xs_state.request_mutex); + + if (IS_ERR(ret)) + return ret; if (msg.type == XS_ERROR) { err = get_error(ret); - xfree(ret); + free(ret); return ERR_PTR(-err); } - //BUG_ON(msg.type != type); + // BUG_ON(msg.type != type); return ret; } /* Simplified version of xs_talkv: single message. */ -static void *xs_single(enum xsd_sockmsg_type type, - const char *string, unsigned int *len) +static void *xs_single(struct xenbus_transaction *t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) { struct kvec iovec; iovec.iov_base = (void *)string; iovec.iov_len = strlen(string) + 1; - return xs_talkv(type, &iovec, 1, len); + return xs_talkv(t, type, &iovec, 1, len); } /* Many commands only need an ack, don't care what it says. */ @@ -167,7 +256,7 @@ { if (IS_ERR(reply)) return PTR_ERR(reply); - xfree(reply); + free(reply); return 0; } @@ -182,60 +271,76 @@ return num; } -/* Return the path to dir with /name appended. */ +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ static char *join(const char *dir, const char *name) { - static char buffer[4096]; - - //BUG_ON(down_trylock(&xenbus_lock) == 0); - /* XXX FIXME: might not be correct if name == "" */ - //BUG_ON(strlen(dir) + strlen("/") + strlen(name) + 1 > sizeof(buffer)); + char *buffer; + + buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1); + if (buffer == NULL) + return ERR_PTR(-ENOMEM); strcpy(buffer, dir); if (!streq(name, "")) { strcat(buffer, "/"); strcat(buffer, name); } + return buffer; } -char **xenbus_directory(const char *dir, const char *node, unsigned int *num) -{ - char *strings, *p, **ret; - unsigned int len; - - strings = xs_single(XS_DIRECTORY, join(dir, node), &len); - if (IS_ERR(strings)) - return (char **)strings; +static char **split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; /* Count the strings. */ *num = count_strings(strings, len); /* Transfer to one big alloc for easy freeing. */ - ret = (char **)xmalloc_array(char, *num * sizeof(char *) + len); + ret = malloc(*num * sizeof(char *) + len); if (!ret) { - xfree(strings); + free(strings); return ERR_PTR(-ENOMEM); } memcpy(&ret[*num], strings, len); - xfree(strings); + free(strings); strings = (char *)&ret[*num]; for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) ret[(*num)++] = p; - return ret; + + return ret; +} + +char **xenbus_directory(struct xenbus_transaction *t, + const char *dir, const char *node, unsigned int *num) +{ + char *strings, *path; + unsigned int len; + + path = join(dir, node); + if (IS_ERR(path)) + return (char **)path; + + strings = xs_single(t, XS_DIRECTORY, path, &len); + free(path); + if (IS_ERR(strings)) + return (char **)strings; + + return split(strings, len, num); } /* Check if a path exists. Return 1 if it does. */ -int xenbus_exists(const char *dir, const char *node) +int xenbus_exists(struct xenbus_transaction *t, + const char *dir, const char *node) { char **d; int dir_n; - d = xenbus_directory(dir, node, &dir_n); + d = xenbus_directory(t, dir, node, &dir_n); if (IS_ERR(d)) return 0; - xfree(d); + free(d); return 1; } @@ -243,92 +348,134 @@ * Returns a kmalloced value: call free() on it after use. * len indicates length in bytes. */ -void *xenbus_read(const char *dir, const char *node, unsigned int *len) -{ - return xs_single(XS_READ, join(dir, node), len); +void *xenbus_read(struct xenbus_transaction *t, + const char *dir, const char *node, unsigned int *len) +{ + char *path; + void *ret; + + path = join(dir, node); + if (IS_ERR(path)) + return (void *)path; + + ret = xs_single(t, XS_READ, path, len); + free(path); + return ret; } /* Write the value of a single file. - * Returns -err on failure. createflags can be 0, O_CREAT, or O_CREAT|O_EXCL. + * Returns -err on failure. */ -int xenbus_write(const char *dir, const char *node, - const char *string, int createflags) -{ - const char *flags, *path; - struct kvec iovec[3]; +int xenbus_write(struct xenbus_transaction *t, + const char *dir, const char *node, const char *string) +{ + const char *path; + struct kvec iovec[2]; + int ret; path = join(dir, node); - /* Format: Flags (as string), path, data. */ - if (createflags == 0) - flags = XS_WRITE_NONE; - else if (createflags == O_CREAT) - flags = XS_WRITE_CREATE; - else if (createflags == (O_CREAT|O_EXCL)) - flags = XS_WRITE_CREATE_EXCL; - else - return -EINVAL; + if (IS_ERR(path)) + return PTR_ERR(path); iovec[0].iov_base = (void *)path; iovec[0].iov_len = strlen(path) + 1; - iovec[1].iov_base = (void *)flags; - iovec[1].iov_len = strlen(flags) + 1; - iovec[2].iov_base = (void *)string; - iovec[2].iov_len = strlen(string); - - return xs_error(xs_talkv(XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); + iovec[1].iov_base = (void *)string; + iovec[1].iov_len = strlen(string); + + ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); + free(path); + return ret; } /* Create a new directory. */ -int xenbus_mkdir(const char *dir, const char *node) -{ - return xs_error(xs_single(XS_MKDIR, join(dir, node), NULL)); +int xenbus_mkdir(struct xenbus_transaction *t, + const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); + free(path); + return ret; } /* Destroy a file or directory (directories must be empty). */ -int xenbus_rm(const char *dir, const char *node) -{ - return xs_error(xs_single(XS_RM, join(dir, node), NULL)); +int xenbus_rm(struct xenbus_transaction *t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_RM, path, NULL)); + free(path); + return ret; } /* Start a transaction: changes by others will not be seen during this * transaction, and changes will not be visible to others until end. - * Transaction only applies to the given subtree. - * You can only have one transaction at any time. */ -int xenbus_transaction_start(const char *subtree) -{ - return xs_error(xs_single(XS_TRANSACTION_START, subtree, NULL)); +struct xenbus_transaction *xenbus_transaction_start(void) +{ + char *id_str; + unsigned long id; + + down_read(&xs_state.suspend_mutex); + + id_str = xs_single(NULL, XS_TRANSACTION_START, "", NULL); + if (IS_ERR(id_str)) { + up_read(&xs_state.suspend_mutex); + return (struct xenbus_transaction *)id_str; + } + + id = simple_strtoul(id_str, NULL, 0); + free(id_str); + + return (struct xenbus_transaction *)id; } /* End a transaction. * If abandon is true, transaction is discarded instead of committed. */ -int xenbus_transaction_end(int abort) +int xenbus_transaction_end(struct xenbus_transaction *t, int abort) { char abortstr[2]; + int err; if (abort) strcpy(abortstr, "F"); else strcpy(abortstr, "T"); - return xs_error(xs_single(XS_TRANSACTION_END, abortstr, NULL)); + + err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); + + up_read(&xs_state.suspend_mutex); + + return err; } /* Single read and scanf: returns -errno or num scanned. */ -int xenbus_scanf(const char *dir, const char *node, const char *fmt, ...) +int xenbus_scanf(struct xenbus_transaction *t, + const char *dir, const char *node, const char *fmt, ...) { va_list ap; int ret; char *val; - val = xenbus_read(dir, node, NULL); + val = xenbus_read(t, dir, node, NULL); if (IS_ERR(val)) return PTR_ERR(val); va_start(ap, fmt); ret = vsscanf(val, fmt, ap); va_end(ap); - xfree(val); + free(val); /* Distinctive errno. */ if (ret == 0) return -ERANGE; @@ -336,23 +483,32 @@ } /* Single printf and write: returns -errno or 0. */ -int xenbus_printf(const char *dir, const char *node, const char *fmt, ...) +int xenbus_printf(struct xenbus_transaction *t, + const char *dir, const char *node, const char *fmt, ...) { va_list ap; int ret; - - //BUG_ON(down_trylock(&xenbus_lock) == 0); +#define PRINTF_BUFFER_SIZE 4096 + char *printf_buffer; + + printf_buffer = malloc(PRINTF_BUFFER_SIZE); + if (printf_buffer == NULL) + return -ENOMEM; + va_start(ap, fmt); - ret = vsnprintf(printf_buffer, sizeof(printf_buffer), fmt, ap); + ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); va_end(ap); - //BUG_ON(ret > sizeof(printf_buffer)-1); - return xenbus_write(dir, node, printf_buffer, O_CREAT); -} - - + // BUG_ON(ret > PRINTF_BUFFER_SIZE-1); + ret = xenbus_write(t, dir, node, printf_buffer); + + free(printf_buffer); + + return ret; +} + /* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ -int xenbus_gather(const char *dir, ...) +int xenbus_gather(struct xenbus_transaction *t, const char *dir, ...) { va_list ap; const char *name; @@ -364,7 +520,7 @@ void *result = va_arg(ap, void *); char *p; - p = xenbus_read(dir, name, NULL); + p = xenbus_read(t, dir, name, NULL); if (IS_ERR(p)) { ret = PTR_ERR(p); break; @@ -372,7 +528,7 @@ if (fmt) { if (sscanf(p, fmt, result) == 0) ret = -EINVAL; - xfree(p); + free(p); } else *(char **)result = p; } @@ -389,31 +545,8 @@ iov[1].iov_base = (void *)token; iov[1].iov_len = strlen(token) + 1; - return xs_error(xs_talkv(XS_WATCH, iov, ARRAY_SIZE(iov), NULL)); -} - -static char *xs_read_watch(char **token) -{ - enum xsd_sockmsg_type type; - char *ret; - - ret = read_reply(&type, NULL); - if (IS_ERR(ret)) - return ret; - - //BUG_ON(type != XS_WATCH_EVENT); - *token = ret + strlen(ret) + 1; - return ret; -} - -static int xs_acknowledge_watch(const char *token) -{ -#if 0 - return xs_error(xs_single(XS_WATCH_ACK, token, NULL)); -#else - /* XS_WATCH_ACK is no longer available */ - return 0; -#endif + return xs_error(xs_talkv(NULL, XS_WATCH, iov, + ARRAY_SIZE(iov), NULL)); } static int xs_unwatch(const char *path, const char *token) @@ -425,10 +558,10 @@ iov[1].iov_base = (char *)token; iov[1].iov_len = strlen(token) + 1; - return xs_error(xs_talkv(XS_UNWATCH, iov, ARRAY_SIZE(iov), NULL)); -} - -/* A little paranoia: we don't just trust token. */ + return xs_error(xs_talkv(NULL, XS_UNWATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + static struct xenbus_watch *find_watch(const char *token) { struct xenbus_watch *i, *cmp; @@ -438,6 +571,7 @@ list_for_each_entry(i, &watches, list) if (i == cmp) return i; + return NULL; } @@ -449,111 +583,214 @@ int err; sprintf(token, "%lX", (long)watch); - //BUG_ON(find_watch(token)); -printk("Registered watch for: %s\n", token); + + down_read(&xs_state.suspend_mutex); + + spin_lock(&watches_lock); + // BUG_ON(find_watch(token)); + list_add(&watch->list, &watches); + spin_unlock(&watches_lock); + err = xs_watch(watch->node, token); - if (!err) - list_add(&watch->list, &watches); + + /* Ignore errors due to multiple registration. */ + if ((err != 0) && (err != -EEXIST)) { + spin_lock(&watches_lock); + list_del(&watch->list); + spin_unlock(&watches_lock); + } + + up_read(&xs_state.suspend_mutex); + return err; } void unregister_xenbus_watch(struct xenbus_watch *watch) { + struct xs_stored_msg *msg, *tmp; char token[sizeof(watch) * 2 + 1]; int err; sprintf(token, "%lX", (long)watch); - //BUG_ON(!find_watch(token)); + + down_read(&xs_state.suspend_mutex); + + spin_lock(&watches_lock); + // BUG_ON(!find_watch(token)); + list_del(&watch->list); + spin_unlock(&watches_lock); err = xs_unwatch(watch->node, token); - list_del(&watch->list); - if (err) printk("XENBUS Failed to release watch %s: %i\n", watch->node, err); -} - -/* Re-register callbacks to all watches. */ -void reregister_xenbus_watches(void) + + up_read(&xs_state.suspend_mutex); + + /* Cancel pending watch events. */ + spin_lock(&watch_events_lock); + list_for_each_entry_safe(msg, tmp, &watch_events, list) { + if (msg->u.watch.handle != watch) + continue; + list_del(&msg->list); + free(msg->u.watch.vec); + free(msg); + } + spin_unlock(&watch_events_lock); +} + +void xs_suspend(void) +{ + down_write(&xs_state.suspend_mutex); + down(&xs_state.request_mutex); +} + +void xs_resume(void) { struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; + up(&xs_state.request_mutex); + + /* No need for watches_lock: the suspend_mutex is sufficient. */ list_for_each_entry(watch, &watches, list) { sprintf(token, "%lX", (long)watch); xs_watch(watch->node, token); } -} - -void watch_thread(void *unused) -{ + + up_write(&xs_state.suspend_mutex); +} + +static void xenwatch_thread(void *unused) +{ + struct list_head *ent; + struct xs_stored_msg *msg; + for (;;) { - char *token; - char *node = NULL; - - wait_event(xb_waitq, xs_input_avail()); - - /* If this is a spurious wakeup caused by someone - * doing an op, they'll hold the lock and the buffer - * will be empty by the time we get there. - */ - down(&xenbus_lock); - if (xs_input_avail()) - node = xs_read_watch(&token); - - if (node && !IS_ERR(node)) { - struct xenbus_watch *w; - int err; - - err = xs_acknowledge_watch(token); - if (err) - printk("XENBUS ack %s fail %i\n", node, err); - w = find_watch(token); - //BUG_ON(!w); - w->callback(w, node); - xfree(node); - } else - printk("XENBUS xs_read_watch: %li\n", PTR_ERR(node)); - up(&xenbus_lock); - } -} - - -static void ballon_changed(struct xenbus_watch *watch, const char *node) -{ - unsigned long new_target; - int err; - err = xenbus_scanf("memory", "target", "%lu", &new_target); - - if(err != 1) - { - printk("Unable to read memory/target\n"); - return; - } - - printk("Memory target changed to: %ld bytes, ignoring.\n", new_target); -} - - -static struct xenbus_watch ballon_watch = { - .node = "memory/target", - .callback = ballon_changed, -}; - - + wait_event(watch_events_waitq, + !list_empty(&watch_events)); + + down(&xenwatch_mutex); + + spin_lock(&watch_events_lock); + ent = watch_events.next; + if (ent != &watch_events) + list_del(ent); + spin_unlock(&watch_events_lock); + + if (ent != &watch_events) { + msg = list_entry(ent, struct xs_stored_msg, list); + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + free(msg->u.watch.vec); + free(msg); + } + + up(&xenwatch_mutex); + } +} + +static int process_msg(void) +{ + struct xs_stored_msg *msg; + char *body; + int err; + + msg = malloc(sizeof(*msg)); + if (msg == NULL) + return -ENOMEM; + + err = xb_read(&msg->hdr, sizeof(msg->hdr)); + if (err) { + free(msg); + return err; + } + + body = malloc(msg->hdr.len + 1); + if (body == NULL) { + free(msg); + return -ENOMEM; + } + + err = xb_read(body, msg->hdr.len); + if (err) { + free(body); + free(msg); + return err; + } + body[msg->hdr.len] = '\0'; + + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + if (IS_ERR(msg->u.watch.vec)) { + free(msg); + return PTR_ERR(msg->u.watch.vec); + } + + spin_lock(&watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + spin_lock(&watch_events_lock); + list_add_tail(&msg->list, &watch_events); + wake_up(&watch_events_waitq); + spin_unlock(&watch_events_lock); + } else { + free(msg->u.watch.vec); + free(msg); + } + spin_unlock(&watches_lock); + } else { + msg->u.reply.body = body; + spin_lock(&xs_state.reply_lock); + list_add_tail(&msg->list, &xs_state.reply_list); + spin_unlock(&xs_state.reply_lock); + wake_up(&xs_state.reply_waitq); + } + + return 0; +} + +static void xenbus_thread(void *unused) +{ + int err; + + for (;;) { + err = process_msg(); + if (err) + printk("XENBUS error %d while reading " + "message\n", err); + } +} int xs_init(void) { int err; - struct thread *watcher; - printk("xb_init_comms\n"); + struct thread *kxwatcher_thread; + struct thread *kxenbus_thread; + + INIT_LIST_HEAD(&xs_state.reply_list); + spin_lock_init(&xs_state.reply_lock); + init_waitqueue_head(&xs_state.reply_waitq); + + init_MUTEX(&xs_state.request_mutex); + init_rwsem(&xs_state.suspend_mutex); + + /* Initialize the shared memory rings to talk to xenstored */ err = xb_init_comms(); if (err) return err; - - watcher = create_thread("kxwatch", watch_thread, NULL); - down(&xenbus_lock); - register_xenbus_watch(&ballon_watch); - up(&xenbus_lock); + + kxwatcher_thread = create_thread("kxwatch", xenwatch_thread, NULL); + if (IS_ERR(kxwatcher_thread)) + return PTR_ERR(kxwatcher_thread); + + kxenbus_thread = create_thread("kxenbus", xenbus_thread, NULL); + if (IS_ERR(kxenbus_thread)) + return PTR_ERR(kxenbus_thread); + return 0; } diff -r 76bff6c996b0 -r c9772105fead linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Thu Dec 8 15:04:31 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Thu Dec 8 15:04:41 2005 @@ -192,8 +192,8 @@ page = balloon_retrieve(); BUG_ON(page == NULL); - pfn = page - mem_map; - BUG_ON(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); + pfn = page_to_pfn(page); + BUG_ON(phys_to_machine_mapping_valid(pfn)); /* Update P->M and M->P tables. */ set_phys_to_machine(pfn, mfn_list[i]); @@ -253,8 +253,8 @@ break; } - pfn = page - mem_map; - mfn_list[i] = phys_to_machine_mapping[pfn]; + pfn = page_to_pfn(page); + mfn_list[i] = pfn_to_mfn(pfn); if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); @@ -444,6 +444,9 @@ IPRINTK("Initialising balloon driver.\n"); + if (xen_init() < 0) + return -1; + current_pages = min(xen_start_info->nr_pages, max_pfn); target_pages = current_pages; balloon_low = 0; @@ -465,7 +468,7 @@ /* Initialise the balloon with excess memory space. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { - page = &mem_map[pfn]; + page = pfn_to_page(pfn); if (!PageReserved(page)) balloon_append(page); } diff -r 76bff6c996b0 -r c9772105fead linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Thu Dec 8 15:04:31 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Thu Dec 8 15:04:41 2005 @@ -65,6 +65,8 @@ extern unsigned long *phys_to_machine_mapping; #define pfn_to_mfn(pfn) \ (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL<<31)) +#define phys_to_machine_mapping_valid(pfn) \ + (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) static inline unsigned long mfn_to_pfn(unsigned long mfn) { unsigned long pfn; diff -r 76bff6c996b0 -r c9772105fead linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Thu Dec 8 15:04:31 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypercall.h Thu Dec 8 15:04:41 2005 @@ -355,34 +355,27 @@ #endif return 1; } +#endif static inline int HYPERVISOR_update_va_mapping( unsigned long va, pte_t new_val, unsigned long flags) { -#if 0 - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_update_va_mapping), - "1" (va), "2" ((new_val).pte_low), "3" (flags) - : "memory" ); - - if ( unlikely(ret < 0) ) - { - printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n", - va, (new_val).pte_low, flags); - BUG(); - } - - return ret; -#endif - return 1; -} -#endif + /* no-op */ + return 1; +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + int ret; + __asm__ __volatile__ ( ";; mov r14=%2 ; mov r15=%3 ; mov r2=%1 ; break 0x1000 ;; mov %0=r8 ;;" + : "=r" (ret) + : "i" (__HYPERVISOR_console_io), "r"(cmd), "r"(arg) + : "r14","r15","r2","r8","memory" ); + return ret; +} static inline int HYPERVISOR_event_channel_op( diff -r 76bff6c996b0 -r c9772105fead linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h Thu Dec 8 15:04:31 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-ia64/hypervisor.h Thu Dec 8 15:04:41 2005 @@ -52,4 +52,19 @@ #define mfn_to_pfn(x) (x) #define machine_to_phys_mapping 0 +// for drivers/xen/balloon/balloon.c +#ifdef CONFIG_XEN_SCRUB_PAGES +#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) +#else +#define scrub_pages(_p,_n) ((void)0) +#endif +#define pte_mfn(_x) pte_pfn(_x) +#define INVALID_P2M_ENTRY (~0UL) +#define __pte_ma(_x) ((pte_t) {(_x)}) +#define phys_to_machine_mapping_valid(_x) (1) +#define kmap_flush_unused() do {} while (0) +#define set_phys_to_machine(_x,_y) do {} while (0) +#define xen_machphys_update(_x,_y) do {} while (0) +#define pfn_pte_ma(_x,_y) __pte_ma(0) + #endif /* __HYPERVISOR_H__ */ diff -r 76bff6c996b0 -r c9772105fead linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Thu Dec 8 15:04:31 2005 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h Thu Dec 8 15:04:41 2005 @@ -67,6 +67,8 @@ extern unsigned long *phys_to_machine_mapping; #define pfn_to_mfn(pfn) \ (phys_to_machine_mapping[(unsigned int)(pfn)] & ~(1UL << 63)) +#define phys_to_machine_mapping_valid(pfn) \ + (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) static inline unsigned long mfn_to_pfn(unsigned long mfn) { unsigned long pfn; diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Thu Dec 8 15:04:31 2005 +++ b/xen/arch/x86/shadow.c Thu Dec 8 15:04:41 2005 @@ -1450,6 +1450,7 @@ int changed; u32 min_max_shadow, min_max_snapshot; int min_shadow, max_shadow, min_snapshot, max_snapshot; + struct vcpu *v; ASSERT(shadow_lock_is_acquired(d)); @@ -1739,6 +1740,9 @@ if ( unlikely(unshadow) ) { + for_each_vcpu(d, v) + if(smfn == pagetable_get_pfn(v->arch.shadow_table)) + return need_flush; perfc_incrc(unshadow_l2_count); shadow_unpin(smfn); #if CONFIG_PAGING_LEVELS == 2 diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Thu Dec 8 15:04:31 2005 +++ b/xen/arch/x86/shadow32.c Thu Dec 8 15:04:41 2005 @@ -2326,6 +2326,7 @@ int changed; u32 min_max_shadow, min_max_snapshot; int min_shadow, max_shadow, min_snapshot, max_snapshot; + struct vcpu *v; ASSERT(shadow_lock_is_acquired(d)); @@ -2527,6 +2528,9 @@ if ( unlikely(unshadow) ) { + for_each_vcpu(d, v) + if(smfn == pagetable_get_pfn(v->arch.shadow_table)) + return need_flush; perfc_incrc(unshadow_l2_count); shadow_unpin(smfn); if ( unlikely(shadow_mode_external(d)) ) diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Thu Dec 8 15:04:31 2005 +++ b/xen/arch/x86/vmx.c Thu Dec 8 15:04:41 2005 @@ -108,7 +108,7 @@ destroy_vmcs(&v->arch.arch_vmx); free_monitor_pagetable(v); vpit = &v->domain->arch.vmx_platform.vmx_pit; - if ( vpit->ticking && active_ac_timer(&(vpit->pit_timer)) ) + if ( active_ac_timer(&(vpit->pit_timer)) ) rem_ac_timer(&vpit->pit_timer); if ( active_ac_timer(&v->arch.arch_vmx.hlt_timer) ) { rem_ac_timer(&v->arch.arch_vmx.hlt_timer); @@ -905,7 +905,7 @@ int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c) { - unsigned long mfn, old_cr4; + unsigned long mfn, old_cr4, old_base_mfn; int error = 0; error |= __vmwrite(GUEST_RIP, c->eip); @@ -945,7 +945,12 @@ return 0; } mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT); + if(!get_page(pfn_to_page(mfn), v->domain)) + return 0; + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + if (old_base_mfn) + put_page(pfn_to_page(old_base_mfn)); update_pagetables(v); /* * arch.shadow_table should now hold the next CR3 for shadow @@ -1174,9 +1179,11 @@ } if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled) - if(v->arch.arch_vmx.cpu_cr3) + if(v->arch.arch_vmx.cpu_cr3){ put_page(pfn_to_page(get_mfn_from_pfn( v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT))); + v->arch.guest_table = mk_pagetable(0); + } /* * VMX does not implement real-mode virtualization. We emulate diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_intercept.c --- a/xen/arch/x86/vmx_intercept.c Thu Dec 8 15:04:31 2005 +++ b/xen/arch/x86/vmx_intercept.c Thu Dec 8 15:04:41 2005 @@ -387,7 +387,6 @@ } else { init_ac_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor); - vpit->ticking = 1; } /* init count for this channel */ diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Thu Dec 8 15:04:31 2005 +++ b/xen/arch/x86/vmx_io.c Thu Dec 8 15:04:41 2005 @@ -748,7 +748,7 @@ { /* clear the event *before* checking for work. This should avoid the set-and-check races */ - if (vmx_clear_pending_io_event(current)) + if (vmx_clear_pending_io_event(v)) vmx_io_assist(v); } @@ -793,29 +793,39 @@ return __fls(pintr[0]); } +void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit) +{ + u64 drift; + + if ( vpit->first_injected ) + drift = vpit->period_cycles * vpit->pending_intr_nr; + else + drift = 0; + drift = v->arch.arch_vmx.tsc_offset - drift; + __vmwrite(TSC_OFFSET, drift); + +#if defined (__i386__) + __vmwrite(TSC_OFFSET_HIGH, (drift >> 32)); +#endif +} + #define BSP_CPU(v) (!(v->vcpu_id)) static inline void interrupt_post_injection(struct vcpu * v, int vector, int type) { struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit); - u64 drift; if ( is_pit_irq(v, vector, type) ) { if ( !vpit->first_injected ) { + vpit->pending_intr_nr = 0; + vpit->scheduled = NOW() + vpit->period; + set_ac_timer(&vpit->pit_timer, vpit->scheduled); vpit->first_injected = 1; - vpit->pending_intr_nr = 0; } else { vpit->pending_intr_nr--; } vpit->inject_point = NOW(); - drift = vpit->period_cycles * vpit->pending_intr_nr; - drift = v->arch.arch_vmx.tsc_offset - drift; - __vmwrite(TSC_OFFSET, drift); - -#if defined (__i386__) - __vmwrite(TSC_OFFSET_HIGH, (drift >> 32)); -#endif - + set_tsc_shift (v, vpit); } switch(type) @@ -982,8 +992,10 @@ vmx_wait_io(); } /* pick up the elapsed PIT ticks and re-enable pit_timer */ - if ( vpit->ticking ) + if ( vpit->first_injected ) { pickup_deactive_ticks(vpit); + } + set_tsc_shift(v,vpit); /* We can't resume the guest if we're waiting on I/O */ ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)); diff -r 76bff6c996b0 -r c9772105fead xen/arch/x86/vmx_vmcs.c --- a/xen/arch/x86/vmx_vmcs.c Thu Dec 8 15:04:31 2005 +++ b/xen/arch/x86/vmx_vmcs.c Thu Dec 8 15:04:41 2005 @@ -243,9 +243,6 @@ { struct vmx_platform *platform; - if (!(VMX_DOMAIN(current) && (current->vcpu_id == 0))) - return; - vmx_map_io_shared_page(d); vmx_set_vcpu_nr(d); @@ -290,6 +287,7 @@ /* Update CR3, GDT, LDT, TR */ unsigned int error = 0; unsigned long cr0, cr4; + u64 host_tsc; if (v->vcpu_id == 0) vmx_setup_platform(v->domain); @@ -337,6 +335,10 @@ __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom()); v->arch.schedule_tail = arch_vmx_do_resume; + /* init guest tsc to start from 0 */ + rdtscll(host_tsc); + v->arch.arch_vmx.tsc_offset = 0 - host_tsc; + set_tsc_shift (v, &v->domain->arch.vmx_platform.vmx_pit); } /* @@ -366,7 +368,6 @@ error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); /* TSC */ - error |= __vmwrite(TSC_OFFSET, 0); error |= __vmwrite(CR3_TARGET_COUNT, 0); /* Guest Selectors */ diff -r 76bff6c996b0 -r c9772105fead xen/include/asm-x86/vmx_vpit.h --- a/xen/include/asm-x86/vmx_vpit.h Thu Dec 8 15:04:31 2005 +++ b/xen/include/asm-x86/vmx_vpit.h Thu Dec 8 15:04:41 2005 @@ -27,7 +27,6 @@ unsigned int pending_intr_nr; /* the couner for pending timer interrupts */ u32 period; /* pit frequency in ns */ int first_injected; /* flag to prevent shadow window */ - int ticking; /* indicating it is ticking */ /* virtual PIT state for handle related I/O */ int read_state; @@ -51,5 +50,6 @@ else return -1; } +extern void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit); #endif /* _VMX_VIRPIT_H_ */ diff -r 76bff6c996b0 -r c9772105fead extras/mini-os/include/spinlock.h --- /dev/null Thu Dec 8 15:04:31 2005 +++ b/extras/mini-os/include/spinlock.h Thu Dec 8 15:04:41 2005 @@ -0,0 +1,121 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include <lib.h> + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +typedef struct { + volatile unsigned int slock; +} spinlock_t; + +#define SPINLOCK_MAGIC 0xdead4ead + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 } + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0) +#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define spin_lock_string \ + "1:\n" \ + LOCK \ + "decb %0\n\t" \ + "jns 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ + "cmpb $0,%0\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + "3:\n\t" + +#define spin_lock_string_flags \ + "1:\n" \ + LOCK \ + "decb %0\n\t" \ + "jns 4f\n\t" \ + "2:\t" \ + "testl $0x200, %1\n\t" \ + "jz 3f\n\t" \ + "#sti\n\t" \ + "3:\t" \ + "rep;nop\n\t" \ + "cmpb $0, %0\n\t" \ + "jle 3b\n\t" \ + "#cli\n\t" \ + "jmp 1b\n" \ + "4:\n\t" + +/* + * This works. Despite all the confusion. + * (except on PPro SMP or if we are using OOSTORE) + * (PPro errata 66, 92) + */ + +#define spin_unlock_string \ + "xchgb %b0, %1" \ + :"=q" (oldval), "=m" (lock->slock) \ + :"0" (oldval) : "memory" + +static inline void _raw_spin_unlock(spinlock_t *lock) +{ + char oldval = 1; + __asm__ __volatile__( + spin_unlock_string + ); +} + +static inline int _raw_spin_trylock(spinlock_t *lock) +{ + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1\n" + :"=q" (oldval), "=m" (lock->slock) + :"0" (0) : "memory"); + return oldval > 0; +} + +static inline void _raw_spin_lock(spinlock_t *lock) +{ + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->slock) : : "memory"); +} + +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) +{ + __asm__ __volatile__( + spin_lock_string_flags + :"=m" (lock->slock) : "r" (flags) : "memory"); +} + +#define _spin_trylock(lock) ({_raw_spin_trylock(lock) ? \ + 1 : ({ 0;});}) + +#define _spin_lock(lock) \ +do { \ + _raw_spin_lock(lock); \ +} while(0) + +#define _spin_unlock(lock) \ +do { \ + _raw_spin_unlock(lock); \ +} while (0) + + +#define spin_lock(lock) _spin_lock(lock) +#define spin_unlock(lock) _spin_unlock(lock) + +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED + +#endif _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |