[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxxx # Node ID 4762d73ced42da37b957cd465b191b4f9c8ea3b7 # Parent ed56ef3e9716c407351918424e2c1054a249c4f9 # Parent 35c724302bdd1339e17dad43085c841917a5dd88 merge with xen-unstable.hg --- xen/arch/powerpc/boot/boot32.S | 75 -- xen/arch/powerpc/boot/start.S | 51 - xen/arch/powerpc/delay.c | 37 - xen/arch/powerpc/mambo.S | 64 -- xen/include/asm-powerpc/misc.h | 33 - xen/include/asm-powerpc/uaccess.h | 38 - .hgignore | 13 config/powerpc64.mk | 2 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c | 10 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c | 25 tools/blktap/drivers/blktapctrl.c | 49 + tools/examples/external-device-migrate | 4 tools/ioemu/target-i386-dm/exec-dm.c | 11 tools/ioemu/vl.c | 7 tools/libaio/src/syscall-ppc.h | 6 tools/libxc/powerpc64/Makefile | 4 tools/libxc/powerpc64/flatdevtree.c | 23 tools/libxc/powerpc64/flatdevtree.h | 2 tools/libxc/powerpc64/utils.c | 211 ++++++ tools/libxc/powerpc64/utils.h | 38 + tools/libxc/powerpc64/xc_linux_build.c | 292 ++------- tools/libxc/powerpc64/xc_prose_build.c | 323 ++++++++++ tools/libxc/xc_linux_build.c | 16 tools/libxc/xc_load_elf.c | 28 tools/libxc/xenctrl.h | 4 tools/libxc/xenguest.h | 15 tools/libxc/xg_private.h | 1 tools/libxen/include/xen_console.h | 4 tools/libxen/include/xen_host.h | 4 tools/libxen/include/xen_host_cpu.h | 4 tools/libxen/include/xen_network.h | 4 tools/libxen/include/xen_pif.h | 4 tools/libxen/include/xen_sr.h | 4 tools/libxen/include/xen_user.h | 4 tools/libxen/include/xen_vdi.h | 4 tools/libxen/include/xen_vif.h | 4 tools/libxen/include/xen_vm.h | 66 +- tools/libxen/include/xen_vtpm.h | 4 tools/libxen/src/xen_vm.c | 119 +++ tools/python/xen/lowlevel/xc/xc.c | 83 ++ tools/python/xen/xend/FlatDeviceTree.py | 94 ++ tools/python/xen/xend/XendDomain.py | 4 tools/python/xen/xend/XendDomainInfo.py | 30 tools/python/xen/xend/image.py | 68 ++ tools/python/xen/xend/server/DevController.py | 35 + tools/python/xen/xend/server/blkif.py | 6 tools/python/xen/xm/main.py | 18 tools/xenstore/xenstored_domain.c | 2 xen/arch/powerpc/Makefile | 69 -- xen/arch/powerpc/backtrace.c | 34 - xen/arch/powerpc/bitops.c | 124 +-- xen/arch/powerpc/boot_of.c | 621 +++++++++++++------ xen/arch/powerpc/cmdline.c | 24 xen/arch/powerpc/crash.c | 1 xen/arch/powerpc/dart.c | 13 xen/arch/powerpc/dart_u4.c | 7 xen/arch/powerpc/domain.c | 33 - xen/arch/powerpc/domain_build.c | 3 xen/arch/powerpc/domctl.c | 6 xen/arch/powerpc/exceptions.c | 34 - xen/arch/powerpc/exceptions.h | 7 xen/arch/powerpc/external.c | 30 xen/arch/powerpc/gdbstub.c | 1 xen/arch/powerpc/iommu.c | 34 - xen/arch/powerpc/machine_kexec.c | 6 xen/arch/powerpc/memory.c | 104 ++- xen/arch/powerpc/mm.c | 235 ++++++- xen/arch/powerpc/mpic.c | 127 +--- xen/arch/powerpc/mpic_init.c | 54 + xen/arch/powerpc/numa.c | 1 xen/arch/powerpc/of-devtree.h | 40 - xen/arch/powerpc/of-devwalk.c | 14 xen/arch/powerpc/of_handler/console.c | 12 xen/arch/powerpc/ofd_fixup.c | 12 xen/arch/powerpc/ofd_fixup_memory.c | 18 xen/arch/powerpc/papr/xlate.c | 259 ++++---- xen/arch/powerpc/powerpc64/exceptions.S | 18 xen/arch/powerpc/powerpc64/io.S | 65 +- xen/arch/powerpc/powerpc64/ppc970.c | 71 +- xen/arch/powerpc/powerpc64/ppc970_machinecheck.c | 7 xen/arch/powerpc/powerpc64/ppc970_scom.c | 175 +++-- xen/arch/powerpc/powerpc64/scom.h | 39 + xen/arch/powerpc/powerpc64/traps.c | 4 xen/arch/powerpc/rtas.c | 84 ++ xen/arch/powerpc/rtas.h | 34 + xen/arch/powerpc/setup.c | 144 ++-- xen/arch/powerpc/shadow.c | 7 xen/arch/powerpc/smp.c | 192 +++++- xen/arch/powerpc/smpboot.c | 29 xen/arch/powerpc/start.S | 62 + xen/arch/powerpc/systemsim.S | 64 ++ xen/arch/powerpc/time.c | 3 xen/arch/powerpc/usercopy.c | 248 ------- xen/arch/powerpc/xen.lds.S | 8 xen/arch/x86/crash.c | 4 xen/arch/x86/domain_build.c | 8 xen/arch/x86/mm.c | 12 xen/arch/x86/mm/shadow/common.c | 4 xen/arch/x86/mm/shadow/multi.c | 3 xen/arch/x86/numa.c | 2 xen/common/Makefile | 2 xen/common/domain.c | 25 xen/common/elf.c | 27 xen/common/gdbstub.c | 1 xen/common/kexec.c | 14 xen/common/sched_credit.c | 663 +++++++++------------ xen/common/xencomm.c | 316 ++++++++++ xen/include/asm-powerpc/acpi.h | 2 xen/include/asm-powerpc/cache.h | 1 xen/include/asm-powerpc/config.h | 4 xen/include/asm-powerpc/debugger.h | 70 +- xen/include/asm-powerpc/delay.h | 16 xen/include/asm-powerpc/domain.h | 5 xen/include/asm-powerpc/flushtlb.h | 1 xen/include/asm-powerpc/grant_table.h | 12 xen/include/asm-powerpc/guest_access.h | 78 -- xen/include/asm-powerpc/mach-default/irq_vectors.h | 22 xen/include/asm-powerpc/mm.h | 100 ++- xen/include/asm-powerpc/msr.h | 4 xen/include/asm-powerpc/numa.h | 2 xen/include/asm-powerpc/page.h | 5 xen/include/asm-powerpc/powerpc64/string.h | 3 xen/include/asm-powerpc/processor.h | 108 ++- xen/include/asm-powerpc/smp.h | 22 xen/include/asm-powerpc/spinlock.h | 33 - xen/include/asm-powerpc/xenoprof.h | 26 xen/include/asm-x86/numa.h | 2 xen/include/asm-x86/page.h | 36 - xen/include/asm-x86/shadow.h | 3 xen/include/asm-x86/x86_32/page-2level.h | 6 xen/include/asm-x86/x86_32/page-3level.h | 29 xen/include/asm-x86/x86_64/page.h | 6 xen/include/public/arch-powerpc.h | 2 xen/include/public/domctl.h | 8 xen/include/public/io/fbif.h | 88 +- xen/include/public/io/kbdif.h | 70 +- xen/include/public/io/pciif.h | 44 - xen/include/public/io/xenbus.h | 12 xen/include/public/memory.h | 2 xen/include/public/sysctl.h | 2 xen/include/public/trace.h | 2 xen/include/public/xenoprof.h | 2 xen/include/xen/elfcore.h | 4 xen/include/xen/sched.h | 2 xen/include/xen/xencomm.h | 115 +++ 145 files changed, 4717 insertions(+), 2437 deletions(-) diff -r ed56ef3e9716 -r 4762d73ced42 .hgignore --- a/.hgignore Thu Dec 14 08:54:54 2006 -0700 +++ b/.hgignore Thu Dec 14 08:57:36 2006 -0700 @@ -53,6 +53,8 @@ ^docs/user/labels\.pl$ ^docs/user/user\.css$ ^docs/user/user\.html$ +^docs/xen-api/vm_lifecycle.eps$ +^docs/xen-api/xenapi-datamodel-graph.eps$ ^extras/mini-os/h/hypervisor-ifs$ ^extras/mini-os/h/xen-public$ ^extras/mini-os/mini-os\..*$ @@ -98,17 +100,15 @@ ^tools/firmware/.*\.bin$ ^tools/firmware/.*\.sym$ ^tools/firmware/.*bios/.*bios.*\.txt$ +^tools/firmware/hvmloader/acpi/acpigen$ ^tools/firmware/hvmloader/hvmloader$ ^tools/firmware/hvmloader/roms\.h$ ^tools/firmware/rombios/BIOS-bochs-[^/]*$ ^tools/firmware/rombios/_rombios[^/]*_\.c$ ^tools/firmware/rombios/rombios[^/]*\.s$ -^tools/firmware/vmxassist/acpi\.h$ ^tools/firmware/vmxassist/gen$ ^tools/firmware/vmxassist/offsets\.h$ -^tools/firmware/vmxassist/roms\.h$ ^tools/firmware/vmxassist/vmxassist$ -^tools/firmware/vmxassist/vmxloader$ ^tools/ioemu/\.pc/.*$ ^tools/ioemu/config-host\.h$ ^tools/ioemu/config-host\.mak$ @@ -220,10 +220,11 @@ ^xen/arch/powerpc/dom0\.bin$ ^xen/arch/powerpc/asm-offsets\.s$ ^xen/arch/powerpc/firmware$ -^xen/arch/powerpc/firmware_image$ +^xen/arch/powerpc/firmware_image.bin$ ^xen/arch/powerpc/xen\.lds$ -^xen/arch/powerpc/.xen-syms$ -^xen/arch/powerpc/xen-syms.S$ +^xen/arch/powerpc/\.xen-syms$ +^xen/arch/powerpc/xen-syms\.S$ +^xen/arch/powerpc/cmdline.dep$ ^unmodified_drivers/linux-2.6/\.tmp_versions ^unmodified_drivers/linux-2.6/.*\.cmd$ ^unmodified_drivers/linux-2.6/.*\.ko$ diff -r ed56ef3e9716 -r 4762d73ced42 config/powerpc64.mk --- a/config/powerpc64.mk Thu Dec 14 08:54:54 2006 -0700 +++ b/config/powerpc64.mk Thu Dec 14 08:57:36 2006 -0700 @@ -1,5 +1,7 @@ CONFIG_POWERPC := y CONFIG_POWERPC := y CONFIG_POWERPC_$(XEN_OS) := y +CONFIG_XENCOMM := y + CFLAGS += -DELFSIZE=64 LIBDIR := lib diff -r ed56ef3e9716 -r 4762d73ced42 linux-2.6-xen-sparse/arch/i386/kernel/fixup.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Thu Dec 14 08:54:54 2006 -0700 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c Thu Dec 14 08:57:36 2006 -0700 @@ -43,17 +43,17 @@ fastcall void do_fixup_4gb_segment(struc char info[100]; int i; - if (test_and_set_bit(0, &printed)) + /* Ignore statically-linked init. */ + if (current->tgid == 1) return; - - if (current->tgid == 1) /* Ignore statically linked init */ - return; HYPERVISOR_vm_assist( VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify); + if (test_and_set_bit(0, &printed)) + return; + sprintf(info, "%s (pid=%d)", current->comm, current->tgid); - DP(""); DP("***************************************************************"); diff -r ed56ef3e9716 -r 4762d73ced42 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Thu Dec 14 08:54:54 2006 -0700 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Thu Dec 14 08:57:36 2006 -0700 @@ -110,6 +110,18 @@ set_cpu_sibling_map(int cpu) cpu_data[cpu].booted_cores = 1; } +static void +remove_siblinginfo(int cpu) +{ + phys_proc_id[cpu] = BAD_APICID; + cpu_core_id[cpu] = BAD_APICID; + + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + + cpu_data[cpu].booted_cores = 0; +} + static int xen_smp_intr_init(unsigned int cpu) { int rc; @@ -358,18 +370,6 @@ static int __init initialize_cpu_present } core_initcall(initialize_cpu_present_map); -static void -remove_siblinginfo(int cpu) -{ - phys_proc_id[cpu] = BAD_APICID; - cpu_core_id[cpu] = BAD_APICID; - - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - - cpu_data[cpu].booted_cores = 0; -} - int __cpu_disable(void) { cpumask_t map = cpu_online_map; @@ -432,7 +432,6 @@ int __devinit __cpu_up(unsigned int cpu) /* This must be done before setting cpu_online_map */ set_cpu_sibling_map(cpu); wmb(); - rc = xen_smp_intr_init(cpu); if (rc) { diff -r ed56ef3e9716 -r 4762d73ced42 tools/blktap/drivers/blktapctrl.c --- a/tools/blktap/drivers/blktapctrl.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/blktap/drivers/blktapctrl.c Thu Dec 14 08:57:36 2006 -0700 @@ -57,6 +57,8 @@ #include "blktapctrl.h" #include "tapdisk.h" +#define PIDFILE "/var/run/blktapctrl.pid" + #define NUM_POLL_FDS 2 #define MSG_SIZE 4096 #define MAX_TIMEOUT 10 @@ -622,6 +624,42 @@ static void print_drivers(void) DPRINTF("Found driver: [%s]\n",dtypes[i]->name); } +static void write_pidfile(long pid) +{ + char buf[100]; + int len; + int fd; + int flags; + + fd = open(PIDFILE, O_RDWR | O_CREAT, 0600); + if (fd == -1) { + DPRINTF("Opening pid file failed (%d)\n", errno); + exit(1); + } + + /* We exit silently if daemon already running. */ + if (lockf(fd, F_TLOCK, 0) == -1) + exit(0); + + /* Set FD_CLOEXEC, so that tapdisk doesn't get this file + descriptor. */ + if ((flags = fcntl(fd, F_GETFD)) == -1) { + DPRINTF("F_GETFD failed (%d)\n", errno); + exit(1); + } + flags |= FD_CLOEXEC; + if (fcntl(fd, F_SETFD, flags) == -1) { + DPRINTF("F_SETFD failed (%d)\n", errno); + exit(1); + } + + len = sprintf(buf, "%ld\n", pid); + if (write(fd, buf, len) != len) { + DPRINTF("Writing pid file failed (%d)\n", errno); + exit(1); + } +} + int main(int argc, char *argv[]) { char *devname; @@ -681,6 +719,7 @@ int main(int argc, char *argv[]) ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); process = getpid(); + write_pidfile(process); ret = ioctl(ctlfd, BLKTAP_IOCTL_SENDPID, process ); /*Static pollhooks*/ @@ -716,3 +755,13 @@ int main(int argc, char *argv[]) closelog(); return -1; } + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r ed56ef3e9716 -r 4762d73ced42 tools/examples/external-device-migrate --- a/tools/examples/external-device-migrate Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/examples/external-device-migrate Thu Dec 14 08:57:36 2006 -0700 @@ -60,8 +60,8 @@ function evaluate_params() -step) step=$2; shift 2;; -host) host=$2; shift 2;; -domname) domname=$2; shift 2;; - -type) type=$2; shift 2;; - -subtype) subtype=$2; shift 2;; + -type) typ=$2; shift 2;; + -subtype) stype=$2; shift 2;; -recover) recover=1; shift;; -help) ext_dev_migrate_usage; exit 0;; *) break;; diff -r ed56ef3e9716 -r 4762d73ced42 tools/ioemu/target-i386-dm/exec-dm.c --- a/tools/ioemu/target-i386-dm/exec-dm.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/ioemu/target-i386-dm/exec-dm.c Thu Dec 14 08:57:36 2006 -0700 @@ -439,7 +439,12 @@ void cpu_physical_memory_rw(target_phys_ int l, io_index; uint8_t *ptr; uint32_t val; - + +#if defined(__i386__) || defined(__x86_64__) + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + pthread_mutex_lock(&mutex); +#endif + while (len > 0) { /* How much can we copy before the next page boundary? */ l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); @@ -504,6 +509,10 @@ void cpu_physical_memory_rw(target_phys_ buf += l; addr += l; } + +#if defined(__i386__) || defined(__x86_64__) + pthread_mutex_unlock(&mutex); +#endif } #endif diff -r ed56ef3e9716 -r 4762d73ced42 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/ioemu/vl.c Thu Dec 14 08:57:36 2006 -0700 @@ -5820,8 +5820,8 @@ static int qemu_map_cache_init(unsigned if (nr_pages < max_pages) max_pages = nr_pages; - nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT; - + nr_buckets = max_pages + (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1; + nr_buckets >>= (MCACHE_BUCKET_SHIFT - PAGE_SHIFT); fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets); mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache)); @@ -5857,8 +5857,7 @@ uint8_t *qemu_map_cache(target_phys_addr entry = &mapcache_entry[address_index % nr_buckets]; - if (entry->vaddr_base == NULL || entry->paddr_index != address_index) - { + if (entry->vaddr_base == NULL || entry->paddr_index != address_index) { /* We need to remap a bucket. */ uint8_t *vaddr_base; unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT]; diff -r ed56ef3e9716 -r 4762d73ced42 tools/libaio/src/syscall-ppc.h --- a/tools/libaio/src/syscall-ppc.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libaio/src/syscall-ppc.h Thu Dec 14 08:57:36 2006 -0700 @@ -1,3 +1,6 @@ +#include <asm/unistd.h> +#include <errno.h> + #define __NR_io_setup 227 #define __NR_io_destroy 228 #define __NR_io_getevents 229 @@ -9,7 +12,7 @@ * "sc; bnslr" sequence) and CR (where only CR0.SO is clobbered to signal * an error return status). */ - +#ifndef __syscall_nr #define __syscall_nr(nr, type, name, args...) \ unsigned long __sc_ret, __sc_err; \ { \ @@ -37,6 +40,7 @@ } \ if (__sc_err & 0x10000000) return -((int)__sc_ret); \ return (type) __sc_ret +#endif #define __sc_loadargs_0(name, dummy...) \ __sc_0 = __NR_##name diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/Makefile --- a/tools/libxc/powerpc64/Makefile Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/powerpc64/Makefile Thu Dec 14 08:57:36 2006 -0700 @@ -1,4 +1,6 @@ GUEST_SRCS-y += powerpc64/xc_linux_build +GUEST_SRCS-y += powerpc64/flatdevtree.c GUEST_SRCS-y += powerpc64/xc_linux_build.c -GUEST_SRCS-y += powerpc64/flatdevtree.c +GUEST_SRCS-y += powerpc64/xc_prose_build.c +GUEST_SRCS-y += powerpc64/utils.c CTRL_SRCS-y += powerpc64/xc_memory.c diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/flatdevtree.c --- a/tools/libxc/powerpc64/flatdevtree.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/powerpc64/flatdevtree.c Thu Dec 14 08:57:36 2006 -0700 @@ -220,6 +220,29 @@ void ft_add_rsvmap(struct ft_cxt *cxt, u cxt->p_anchor = cxt->pres + 16; /* over the terminator */ } +int ft_set_rsvmap(void *bphp, int m, u64 physaddr, u64 size) +{ + const struct boot_param_header *bph = bphp; + u64 *p_rsvmap = (u64 *) + ((char *)bph + be32_to_cpu(bph->off_mem_rsvmap)); + u32 i; + + for (i = 0;; i++) { + u64 addr, sz; + + addr = be64_to_cpu(p_rsvmap[i * 2]); + sz = be64_to_cpu(p_rsvmap[i * 2 + 1]); + if (addr == 0 && size == 0) + break; + if (m == i) { + p_rsvmap[i * 2] = cpu_to_be64(physaddr); + p_rsvmap[i * 2 + 1] = cpu_to_be64(size); + return 0; + } + } + return -1; +} + void ft_begin_tree(struct ft_cxt *cxt) { cxt->p_begin = cxt->p_anchor; diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/flatdevtree.h --- a/tools/libxc/powerpc64/flatdevtree.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/powerpc64/flatdevtree.h Thu Dec 14 08:57:36 2006 -0700 @@ -66,8 +66,10 @@ void ft_prop_int(struct ft_cxt *cxt, con void ft_prop_int(struct ft_cxt *cxt, const char *name, unsigned int val); void ft_begin(struct ft_cxt *cxt, void *blob, unsigned int max_size); void ft_add_rsvmap(struct ft_cxt *cxt, u64 physaddr, u64 size); +int ft_set_rsvmap(void *bphp, int m, u64 physaddr, u64 size); void ft_dump_blob(const void *bphp); +void ft_backtrack_node(struct ft_cxt *cxt); void ft_merge_blob(struct ft_cxt *cxt, void *blob); void *ft_find_node(const void *bphp, const char *srch_path); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/xc_linux_build.c --- a/tools/libxc/powerpc64/xc_linux_build.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/powerpc64/xc_linux_build.c Thu Dec 14 08:57:36 2006 -0700 @@ -35,60 +35,10 @@ #include "flatdevtree_env.h" #include "flatdevtree.h" +#include "utils.h" #define INITRD_ADDR (24UL << 20) #define DEVTREE_ADDR (16UL << 20) - -#define ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) - -#define max(x,y) ({ \ - const typeof(x) _x = (x); \ - const typeof(y) _y = (y); \ - (void) (&_x == &_y); \ - _x > _y ? _x : _y; }) - -static void *load_file(const char *path, unsigned long *filesize) -{ - void *img; - ssize_t size; - int fd; - - DPRINTF("load_file(%s)\n", path); - - fd = open(path, O_RDONLY); - if (fd < 0) { - perror(path); - return NULL; - } - - size = lseek(fd, 0, SEEK_END); - if (size < 0) { - perror(path); - close(fd); - return NULL; - } - lseek(fd, 0, SEEK_SET); - - img = malloc(size); - if (img == NULL) { - perror(path); - close(fd); - return NULL; - } - - size = read(fd, img, size); - if (size <= 0) { - perror(path); - close(fd); - free(img); - return NULL; - } - - if (filesize) - *filesize = size; - close(fd); - return img; -} static int init_boot_vcpu( int xc_handle, @@ -128,37 +78,6 @@ static int init_boot_vcpu( return rc; } -static int install_image( - int xc_handle, - int domid, - xen_pfn_t *page_array, - void *image, - unsigned long paddr, - unsigned long size) -{ - uint8_t *img = image; - int i; - int rc = 0; - - if (paddr & ~PAGE_MASK) { - printf("*** unaligned address\n"); - return -1; - } - - for (i = 0; i < size; i += PAGE_SIZE) { - void *page = img + i; - xen_pfn_t pfn = (paddr + i) >> PAGE_SHIFT; - xen_pfn_t mfn = page_array[pfn]; - - rc = xc_copy_to_domain_page(xc_handle, domid, mfn, page); - if (rc < 0) { - perror("xc_copy_to_domain_page"); - break; - } - } - return rc; -} - static int load_devtree( int xc_handle, int domid, @@ -167,10 +86,10 @@ static int load_devtree( unsigned long devtree_addr, uint64_t initrd_base, unsigned long initrd_len, - start_info_t *si, - unsigned long si_addr) -{ - uint32_t start_info[4] = {0, si_addr, 0, 0x1000}; + start_info_t *start_info __attribute__((unused)), + unsigned long start_info_addr) +{ + uint32_t si[4] = {0, start_info_addr, 0, 0x1000}; struct boot_param_header *header; void *chosen; void *xen; @@ -208,9 +127,14 @@ static int load_devtree( return rc; } + rc = ft_set_rsvmap(devtree, 1, initrd_base, initrd_len); + if (rc < 0) { + DPRINTF("couldn't set initrd reservation\n"); + return ~0UL; + } + /* start-info (XXX being removed soon) */ - rc = ft_set_prop(&devtree, xen, "start-info", - start_info, sizeof(start_info)); + rc = ft_set_prop(&devtree, xen, "start-info", si, sizeof(si)); if (rc < 0) { DPRINTF("couldn't set /xen/start-info\n"); return rc; @@ -218,91 +142,19 @@ static int load_devtree( header = devtree; devtree_size = header->totalsize; + { + static const char dtb[] = "/tmp/xc_domU.dtb"; + int dfd = creat(dtb, 0666); + if (dfd != -1) { + write(dfd, devtree, devtree_size); + close(dfd); + } else + DPRINTF("could not open(\"%s\")\n", dtb); + } DPRINTF("copying device tree to 0x%lx[0x%x]\n", DEVTREE_ADDR, devtree_size); return install_image(xc_handle, domid, page_array, devtree, DEVTREE_ADDR, devtree_size); -} - -unsigned long spin_list[] = { -#if 0 - 0x100, - 0x200, - 0x300, - 0x380, - 0x400, - 0x480, - 0x500, - 0x700, - 0x900, - 0xc00, -#endif - 0 -}; - -/* XXX yes, this is a hack */ -static void hack_kernel_img(char *img) -{ - const off_t file_offset = 0x10000; - unsigned long *addr = spin_list; - - while (*addr) { - uint32_t *instruction = (uint32_t *)(img + *addr + file_offset); - printf("installing spin loop at %lx (%x)\n", *addr, *instruction); - *instruction = 0x48000000; - addr++; - } -} - -static int load_kernel( - int xc_handle, - int domid, - const char *kernel_path, - struct domain_setup_info *dsi, - xen_pfn_t *page_array) -{ - struct load_funcs load_funcs; - char *kernel_img; - unsigned long kernel_size; - int rc; - - /* load the kernel ELF file */ - kernel_img = load_file(kernel_path, &kernel_size); - if (kernel_img == NULL) { - rc = -1; - goto out; - } - - hack_kernel_img(kernel_img); - - DPRINTF("probe_elf\n"); - rc = probe_elf(kernel_img, kernel_size, &load_funcs); - if (rc < 0) { - rc = -1; - printf("%s is not an ELF file\n", kernel_path); - goto out; - } - - DPRINTF("parseimage\n"); - rc = (load_funcs.parseimage)(kernel_img, kernel_size, dsi); - if (rc < 0) { - rc = -1; - goto out; - } - - DPRINTF("loadimage\n"); - (load_funcs.loadimage)(kernel_img, kernel_size, xc_handle, domid, - page_array, dsi); - - DPRINTF(" v_start %016"PRIx64"\n", dsi->v_start); - DPRINTF(" v_end %016"PRIx64"\n", dsi->v_end); - DPRINTF(" v_kernstart %016"PRIx64"\n", dsi->v_kernstart); - DPRINTF(" v_kernend %016"PRIx64"\n", dsi->v_kernend); - DPRINTF(" v_kernentry %016"PRIx64"\n", dsi->v_kernentry); - -out: - free(kernel_img); - return rc; } static int load_initrd( @@ -334,49 +186,38 @@ out: return rc; } -static unsigned long create_start_info(start_info_t *si, +static unsigned long create_start_info( + void *devtree, start_info_t *start_info, unsigned int console_evtchn, unsigned int store_evtchn, - unsigned long nr_pages) -{ - unsigned long si_addr; - - memset(si, 0, sizeof(*si)); - snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0); - - si->nr_pages = nr_pages; - si->shared_info = (nr_pages - 1) << PAGE_SHIFT; - si->store_mfn = si->nr_pages - 2; - si->store_evtchn = store_evtchn; - si->console.domU.mfn = si->nr_pages - 3; - si->console.domU.evtchn = console_evtchn; - si_addr = (si->nr_pages - 4) << PAGE_SHIFT; - - return si_addr; -} - -static int get_page_array(int xc_handle, int domid, xen_pfn_t **page_array, - unsigned long *nr_pages) -{ + unsigned long nr_pages, unsigned long rma_pages) +{ + unsigned long start_info_addr; + uint64_t rma_top; int rc; - DPRINTF("xc_get_tot_pages\n"); - *nr_pages = xc_get_tot_pages(xc_handle, domid); - DPRINTF(" 0x%lx\n", *nr_pages); - - *page_array = malloc(*nr_pages * sizeof(xen_pfn_t)); - if (*page_array == NULL) { - perror("malloc"); - return -1; - } - - DPRINTF("xc_get_pfn_list\n"); - rc = xc_get_pfn_list(xc_handle, domid, *page_array, *nr_pages); - if (rc != *nr_pages) { - perror("Could not get the page frame list"); - return -1; - } - - return 0; + memset(start_info, 0, sizeof(*start_info)); + snprintf(start_info->magic, sizeof(start_info->magic), + "xen-%d.%d-powerpc64HV", 3, 0); + + rma_top = rma_pages << PAGE_SHIFT; + DPRINTF("RMA top = 0x%"PRIX64"\n", rma_top); + + start_info->nr_pages = nr_pages; + start_info->shared_info = rma_top - PAGE_SIZE; + start_info->store_mfn = (rma_top >> PAGE_SHIFT) - 2; + start_info->store_evtchn = store_evtchn; + start_info->console.domU.mfn = (rma_top >> PAGE_SHIFT) - 3; + start_info->console.domU.evtchn = console_evtchn; + start_info_addr = rma_top - 4*PAGE_SIZE; + + rc = ft_set_rsvmap(devtree, 0, start_info_addr, 4*PAGE_SIZE); + if (rc < 0) { + DPRINTF("couldn't set start_info reservation\n"); + return ~0UL; + } + + + return start_info_addr; } static void free_page_array(xen_pfn_t *page_array) @@ -388,6 +229,7 @@ static void free_page_array(xen_pfn_t *p int xc_linux_build(int xc_handle, uint32_t domid, + unsigned int mem_mb, const char *image_name, const char *initrd_name, const char *cmdline, @@ -399,7 +241,7 @@ int xc_linux_build(int xc_handle, unsigned long *console_mfn, void *devtree) { - start_info_t si; + start_info_t start_info; struct domain_setup_info dsi; xen_pfn_t *page_array = NULL; unsigned long nr_pages; @@ -407,18 +249,28 @@ int xc_linux_build(int xc_handle, unsigned long kern_addr; unsigned long initrd_base = 0; unsigned long initrd_len = 0; - unsigned long si_addr; + unsigned long start_info_addr; + unsigned long rma_pages; int rc = 0; DPRINTF("%s\n", __func__); - if (get_page_array(xc_handle, domid, &page_array, &nr_pages)) { + nr_pages = mem_mb << (20 - PAGE_SHIFT); + DPRINTF("nr_pages 0x%lx\n", nr_pages); + + rma_pages = get_rma_pages(devtree); + if (rma_pages == 0) { + rc = -1; + goto out; + } + + if (get_rma_page_array(xc_handle, domid, &page_array, rma_pages)) { rc = -1; goto out; } DPRINTF("loading image '%s'\n", image_name); - if (load_kernel(xc_handle, domid, image_name, &dsi, page_array)) { + if (load_elf_kernel(xc_handle, domid, image_name, &dsi, page_array)) { rc = -1; goto out; } @@ -434,11 +286,12 @@ int xc_linux_build(int xc_handle, } /* start_info stuff: about to be removed */ - si_addr = create_start_info(&si, console_evtchn, store_evtchn, nr_pages); - *console_mfn = page_array[si.console.domU.mfn]; - *store_mfn = page_array[si.store_mfn]; - if (install_image(xc_handle, domid, page_array, &si, si_addr, - sizeof(start_info_t))) { + start_info_addr = create_start_info(devtree, &start_info, console_evtchn, + store_evtchn, nr_pages, rma_pages); + *console_mfn = page_array[start_info.console.domU.mfn]; + *store_mfn = page_array[start_info.store_mfn]; + if (install_image(xc_handle, domid, page_array, &start_info, + start_info_addr, sizeof(start_info_t))) { rc = -1; goto out; } @@ -447,7 +300,8 @@ int xc_linux_build(int xc_handle, DPRINTF("loading flattened device tree\n"); devtree_addr = DEVTREE_ADDR; if (load_devtree(xc_handle, domid, page_array, devtree, devtree_addr, - initrd_base, initrd_len, &si, si_addr)) { + initrd_base, initrd_len, &start_info, + start_info_addr)) { DPRINTF("couldn't load flattened device tree.\n"); rc = -1; goto out; diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/xc_linux_build.c Thu Dec 14 08:57:36 2006 -0700 @@ -596,15 +596,21 @@ static int compat_check(int xc_handle, s } if (strstr(xen_caps, "xen-3.0-x86_32p")) { - if (dsi->pae_kernel == PAEKERN_no) { + if (dsi->pae_kernel == PAEKERN_bimodal) { + dsi->pae_kernel = PAEKERN_extended_cr3; + } else if (dsi->pae_kernel == PAEKERN_no) { xc_set_error(XC_INVALID_KERNEL, "Non PAE-kernel on PAE host."); return 0; } - } else if (dsi->pae_kernel != PAEKERN_no) { - xc_set_error(XC_INVALID_KERNEL, - "PAE-kernel on non-PAE host."); - return 0; + } else { + if (dsi->pae_kernel == PAEKERN_bimodal) { + dsi->pae_kernel = PAEKERN_no; + } else if (dsi->pae_kernel != PAEKERN_no) { + xc_set_error(XC_INVALID_KERNEL, + "PAE-kernel on non-PAE host."); + return 0; + } } return 1; diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/xc_load_elf.c Thu Dec 14 08:57:36 2006 -0700 @@ -325,17 +325,6 @@ static int parseelfimage(const char *ima return -EINVAL; } - /* Find the section-header strings table. */ - if ( ehdr->e_shstrndx == SHN_UNDEF ) - { - xc_set_error(XC_INVALID_KERNEL, - "ELF image has no section-header strings table (shstrtab)."); - return -EINVAL; - } - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + - (ehdr->e_shstrndx*ehdr->e_shentsize)); - shstrtab = image + shdr->sh_offset; - dsi->__elfnote_section = NULL; dsi->__xen_guest_string = NULL; @@ -354,6 +343,17 @@ static int parseelfimage(const char *ima /* Fall back to looking for the special '__xen_guest' section. */ if ( dsi->__elfnote_section == NULL ) { + /* Find the section-header strings table. */ + if ( ehdr->e_shstrndx == SHN_UNDEF ) + { + xc_set_error(XC_INVALID_KERNEL, + "ELF image has no section-header strings table."); + return -EINVAL; + } + shdr = (Elf_Shdr *)(image + ehdr->e_shoff + + (ehdr->e_shstrndx*ehdr->e_shentsize)); + shstrtab = image + shdr->sh_offset; + for ( h = 0; h < ehdr->e_shnum; h++ ) { shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize)); @@ -400,6 +400,8 @@ static int parseelfimage(const char *ima } /* + * A "bimodal" ELF note indicates the kernel will adjust to the + * current paging mode, including handling extended cr3 syntax. * If we have ELF notes then PAE=yes implies that we must support * the extended cr3 syntax. Otherwise we need to find the * [extended-cr3] syntax in the __xen_guest string. @@ -408,7 +410,9 @@ static int parseelfimage(const char *ima if ( dsi->__elfnote_section ) { p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE); - if ( p != NULL && strncmp(p, "yes", 3) == 0 ) + if ( p != NULL && strncmp(p, "bimodal", 7) == 0 ) + dsi->pae_kernel = PAEKERN_bimodal; + else if ( p != NULL && strncmp(p, "yes", 3) == 0 ) dsi->pae_kernel = PAEKERN_extended_cr3; } diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/xenctrl.h Thu Dec 14 08:57:36 2006 -0700 @@ -728,4 +728,8 @@ const char *xc_error_code_to_desc(int co */ xc_error_handler xc_set_error_handler(xc_error_handler handler); +/* PowerPC specific. */ +int xc_alloc_real_mode_area(int xc_handle, + uint32_t domid, + unsigned int log); #endif diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/xenguest.h Thu Dec 14 08:57:36 2006 -0700 @@ -122,4 +122,19 @@ int xc_get_hvm_param( int xc_get_hvm_param( int handle, domid_t dom, int param, unsigned long *value); +/* PowerPC specific. */ +int xc_prose_build(int xc_handle, + uint32_t domid, + unsigned int mem_mb, + const char *image_name, + const char *ramdisk_name, + const char *cmdline, + const char *features, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn, + void *arch_args); + #endif /* XENGUEST_H */ diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxc/xg_private.h Thu Dec 14 08:57:36 2006 -0700 @@ -132,6 +132,7 @@ struct domain_setup_info #define PAEKERN_no 0 #define PAEKERN_yes 1 #define PAEKERN_extended_cr3 2 +#define PAEKERN_bimodal 3 unsigned int pae_kernel; unsigned int load_symtab; diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_console.h --- a/tools/libxen/include/xen_console.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_console.h Thu Dec 14 08:57:36 2006 -0700 @@ -149,14 +149,14 @@ xen_console_record_opt_set_free(xen_cons /** - * Get the current state of the given console. + * Get a record containing the current state of the given console. */ extern bool xen_console_get_record(xen_session *session, xen_console_record **result, xen_console console); /** - * Get a reference to the object with the specified UUID. + * Get a reference to the console instance with the specified UUID. */ extern bool xen_console_get_by_uuid(xen_session *session, xen_console *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_host.h --- a/tools/libxen/include/xen_host.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_host.h Thu Dec 14 08:57:36 2006 -0700 @@ -154,14 +154,14 @@ xen_host_record_opt_set_free(xen_host_re /** - * Get the current state of the given host. !!! + * Get a record containing the current state of the given host. */ extern bool xen_host_get_record(xen_session *session, xen_host_record **result, xen_host host); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the host instance with the specified UUID. */ extern bool xen_host_get_by_uuid(xen_session *session, xen_host *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_host_cpu.h --- a/tools/libxen/include/xen_host_cpu.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_host_cpu.h Thu Dec 14 08:57:36 2006 -0700 @@ -153,14 +153,14 @@ xen_host_cpu_record_opt_set_free(xen_hos /** - * Get the current state of the given host_cpu. !!! + * Get a record containing the current state of the given host_cpu. */ extern bool xen_host_cpu_get_record(xen_session *session, xen_host_cpu_record **result, xen_host_cpu host_cpu); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the host_cpu instance with the specified UUID. */ extern bool xen_host_cpu_get_by_uuid(xen_session *session, xen_host_cpu *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_network.h --- a/tools/libxen/include/xen_network.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_network.h Thu Dec 14 08:57:36 2006 -0700 @@ -152,14 +152,14 @@ xen_network_record_opt_set_free(xen_netw /** - * Get the current state of the given network. !!! + * Get a record containing the current state of the given network. */ extern bool xen_network_get_record(xen_session *session, xen_network_record **result, xen_network network); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the network instance with the specified UUID. */ extern bool xen_network_get_by_uuid(xen_session *session, xen_network *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_pif.h --- a/tools/libxen/include/xen_pif.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_pif.h Thu Dec 14 08:57:36 2006 -0700 @@ -155,14 +155,14 @@ xen_pif_record_opt_set_free(xen_pif_reco /** - * Get the current state of the given PIF. !!! + * Get a record containing the current state of the given PIF. */ extern bool xen_pif_get_record(xen_session *session, xen_pif_record **result, xen_pif pif); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the PIF instance with the specified UUID. */ extern bool xen_pif_get_by_uuid(xen_session *session, xen_pif *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_sr.h --- a/tools/libxen/include/xen_sr.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_sr.h Thu Dec 14 08:57:36 2006 -0700 @@ -153,14 +153,14 @@ xen_sr_record_opt_set_free(xen_sr_record /** - * Get the current state of the given SR. !!! + * Get a record containing the current state of the given SR. */ extern bool xen_sr_get_record(xen_session *session, xen_sr_record **result, xen_sr sr); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the SR instance with the specified UUID. */ extern bool xen_sr_get_by_uuid(xen_session *session, xen_sr *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_user.h --- a/tools/libxen/include/xen_user.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_user.h Thu Dec 14 08:57:36 2006 -0700 @@ -146,14 +146,14 @@ xen_user_record_opt_set_free(xen_user_re /** - * Get the current state of the given user. !!! + * Get a record containing the current state of the given user. */ extern bool xen_user_get_record(xen_session *session, xen_user_record **result, xen_user user); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the user instance with the specified UUID. */ extern bool xen_user_get_by_uuid(xen_session *session, xen_user *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vdi.h --- a/tools/libxen/include/xen_vdi.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_vdi.h Thu Dec 14 08:57:36 2006 -0700 @@ -159,14 +159,14 @@ xen_vdi_record_opt_set_free(xen_vdi_reco /** - * Get the current state of the given VDI. !!! + * Get a record containing the current state of the given VDI. */ extern bool xen_vdi_get_record(xen_session *session, xen_vdi_record **result, xen_vdi vdi); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the VDI instance with the specified UUID. */ extern bool xen_vdi_get_by_uuid(xen_session *session, xen_vdi *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vif.h --- a/tools/libxen/include/xen_vif.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_vif.h Thu Dec 14 08:57:36 2006 -0700 @@ -156,14 +156,14 @@ xen_vif_record_opt_set_free(xen_vif_reco /** - * Get the current state of the given VIF. !!! + * Get a record containing the current state of the given VIF. */ extern bool xen_vif_get_record(xen_session *session, xen_vif_record **result, xen_vif vif); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the VIF instance with the specified UUID. */ extern bool xen_vif_get_by_uuid(xen_session *session, xen_vif *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vm.h --- a/tools/libxen/include/xen_vm.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_vm.h Thu Dec 14 08:57:36 2006 -0700 @@ -79,6 +79,7 @@ typedef struct xen_vm_record char *name_description; int64_t user_version; bool is_a_template; + bool auto_power_on; struct xen_host_record_opt *resident_on; int64_t memory_static_max; int64_t memory_dynamic_max; @@ -198,14 +199,14 @@ xen_vm_record_opt_set_free(xen_vm_record /** - * Get the current state of the given VM. !!! + * Get a record containing the current state of the given VM. */ extern bool xen_vm_get_record(xen_session *session, xen_vm_record **result, xen_vm vm); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the VM instance with the specified UUID. */ extern bool xen_vm_get_by_uuid(xen_session *session, xen_vm *result, char *uuid); @@ -277,6 +278,13 @@ xen_vm_get_is_a_template(xen_session *se /** + * Get the auto_power_on field of the given VM. + */ +extern bool +xen_vm_get_auto_power_on(xen_session *session, bool *result, xen_vm vm); + + +/** * Get the resident_on field of the given VM. */ extern bool @@ -564,6 +572,13 @@ xen_vm_set_is_a_template(xen_session *se /** + * Set the auto_power_on field of the given VM. + */ +extern bool +xen_vm_set_auto_power_on(xen_session *session, xen_vm vm, bool auto_power_on); + + +/** * Set the memory/dynamic_max field of the given VM. */ extern bool @@ -592,6 +607,13 @@ xen_vm_set_vcpus_params(xen_session *ses /** + * Set the VCPUs/number field of the given VM. + */ +extern bool +xen_vm_set_vcpus_number(xen_session *session, xen_vm vm, int64_t number); + + +/** * Set the VCPUs/features/force_on field of the given VM. */ extern bool @@ -599,10 +621,42 @@ xen_vm_set_vcpus_features_force_on(xen_s /** + * Add the given value to the VCPUs/features/force_on field of the + * given VM. If the value is already in that Set, then do nothing. + */ +extern bool +xen_vm_add_vcpus_features_force_on(xen_session *session, xen_vm vm, enum xen_cpu_feature value); + + +/** + * Remove the given value from the VCPUs/features/force_on field of the + * given VM. If the value is not in that Set, then do nothing. + */ +extern bool +xen_vm_remove_vcpus_features_force_on(xen_session *session, xen_vm vm, enum xen_cpu_feature value); + + +/** * Set the VCPUs/features/force_off field of the given VM. */ extern bool xen_vm_set_vcpus_features_force_off(xen_session *session, xen_vm vm, struct xen_cpu_feature_set *force_off); + + +/** + * Add the given value to the VCPUs/features/force_off field of the + * given VM. If the value is already in that Set, then do nothing. + */ +extern bool +xen_vm_add_vcpus_features_force_off(xen_session *session, xen_vm vm, enum xen_cpu_feature value); + + +/** + * Remove the given value from the VCPUs/features/force_off field of + * the given VM. If the value is not in that Set, then do nothing. + */ +extern bool +xen_vm_remove_vcpus_features_force_off(xen_session *session, xen_vm vm, enum xen_cpu_feature value); /** @@ -817,12 +871,4 @@ xen_vm_get_all(xen_session *session, str xen_vm_get_all(xen_session *session, struct xen_vm_set **result); -/** - * Destroy the specified VM. The VM is completely removed from the system. - * This function can only be called when the VM is in the Halted State. - */ -extern bool -xen_vm_destroy(xen_session *session, xen_vm vm); - - #endif diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/include/xen_vtpm.h --- a/tools/libxen/include/xen_vtpm.h Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/include/xen_vtpm.h Thu Dec 14 08:57:36 2006 -0700 @@ -151,14 +151,14 @@ xen_vtpm_record_opt_set_free(xen_vtpm_re /** - * Get the current state of the given VTPM. !!! + * Get a record containing the current state of the given VTPM. */ extern bool xen_vtpm_get_record(xen_session *session, xen_vtpm_record **result, xen_vtpm vtpm); /** - * Get a reference to the object with the specified UUID. !!! + * Get a reference to the VTPM instance with the specified UUID. */ extern bool xen_vtpm_get_by_uuid(xen_session *session, xen_vtpm *result, char *uuid); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxen/src/xen_vm.c --- a/tools/libxen/src/xen_vm.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/libxen/src/xen_vm.c Thu Dec 14 08:57:36 2006 -0700 @@ -67,6 +67,9 @@ static const struct_member xen_vm_record { .key = "is_a_template", .type = &abstract_type_bool, .offset = offsetof(xen_vm_record, is_a_template) }, + { .key = "auto_power_on", + .type = &abstract_type_bool, + .offset = offsetof(xen_vm_record, auto_power_on) }, { .key = "resident_on", .type = &abstract_type_ref, .offset = offsetof(xen_vm_record, resident_on) }, @@ -399,6 +402,22 @@ xen_vm_get_is_a_template(xen_session *se bool +xen_vm_get_auto_power_on(xen_session *session, bool *result, xen_vm vm) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm } + }; + + abstract_type result_type = abstract_type_bool; + + XEN_CALL_("VM.get_auto_power_on"); + return session->ok; +} + + +bool xen_vm_get_resident_on(xen_session *session, xen_host *result, xen_vm vm) { abstract_value param_values[] = @@ -1082,6 +1101,22 @@ xen_vm_set_is_a_template(xen_session *se bool +xen_vm_set_auto_power_on(xen_session *session, xen_vm vm, bool auto_power_on) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm }, + { .type = &abstract_type_bool, + .u.bool_val = auto_power_on } + }; + + xen_call_(session, "VM.set_auto_power_on", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool xen_vm_set_memory_dynamic_max(xen_session *session, xen_vm vm, int64_t dynamic_max) { abstract_value param_values[] = @@ -1146,6 +1181,22 @@ xen_vm_set_vcpus_params(xen_session *ses bool +xen_vm_set_vcpus_number(xen_session *session, xen_vm vm, int64_t number) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm }, + { .type = &abstract_type_int, + .u.int_val = number } + }; + + xen_call_(session, "VM.set_VCPUs_number", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool xen_vm_set_vcpus_features_force_on(xen_session *session, xen_vm vm, struct xen_cpu_feature_set *force_on) { abstract_value param_values[] = @@ -1162,6 +1213,38 @@ xen_vm_set_vcpus_features_force_on(xen_s bool +xen_vm_add_vcpus_features_force_on(xen_session *session, xen_vm vm, enum xen_cpu_feature value) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm }, + { .type = &xen_cpu_feature_abstract_type_, + .u.string_val = xen_cpu_feature_to_string(value) } + }; + + xen_call_(session, "VM.add_VCPUs_features_force_on", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool +xen_vm_remove_vcpus_features_force_on(xen_session *session, xen_vm vm, enum xen_cpu_feature value) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm }, + { .type = &xen_cpu_feature_abstract_type_, + .u.string_val = xen_cpu_feature_to_string(value) } + }; + + xen_call_(session, "VM.remove_VCPUs_features_force_on", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool xen_vm_set_vcpus_features_force_off(xen_session *session, xen_vm vm, struct xen_cpu_feature_set *force_off) { abstract_value param_values[] = @@ -1178,6 +1261,38 @@ xen_vm_set_vcpus_features_force_off(xen_ bool +xen_vm_add_vcpus_features_force_off(xen_session *session, xen_vm vm, enum xen_cpu_feature value) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm }, + { .type = &xen_cpu_feature_abstract_type_, + .u.string_val = xen_cpu_feature_to_string(value) } + }; + + xen_call_(session, "VM.add_VCPUs_features_force_off", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool +xen_vm_remove_vcpus_features_force_off(xen_session *session, xen_vm vm, enum xen_cpu_feature value) +{ + abstract_value param_values[] = + { + { .type = &abstract_type_string, + .u.string_val = vm }, + { .type = &xen_cpu_feature_abstract_type_, + .u.string_val = xen_cpu_feature_to_string(value) } + }; + + xen_call_(session, "VM.remove_VCPUs_features_force_off", param_values, 2, NULL, NULL); + return session->ok; +} + + +bool xen_vm_set_actions_after_shutdown(xen_session *session, xen_vm vm, enum xen_on_normal_exit after_shutdown) { abstract_value param_values[] = @@ -1268,7 +1383,7 @@ xen_vm_set_platform_std_vga(xen_session .u.bool_val = std_vga } }; - xen_call_(session, "VM.set_platform_std_vga", param_values, 2, NULL, NULL); + xen_call_(session, "VM.set_platform_std_VGA", param_values, 2, NULL, NULL); return session->ok; } @@ -1444,7 +1559,7 @@ xen_vm_set_otherconfig(xen_session *sess .u.set_val = (arbitrary_set *)otherconfig } }; - xen_call_(session, "VM.set_otherconfig", param_values, 2, NULL, NULL); + xen_call_(session, "VM.set_otherConfig", param_values, 2, NULL, NULL); return session->ok; } diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/lowlevel/xc/xc.c Thu Dec 14 08:57:36 2006 -0700 @@ -919,6 +919,68 @@ static PyObject *dom_op(XcObject *self, return zero; } +#ifdef __powerpc__ +static PyObject *pyxc_alloc_real_mode_area(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t dom; + unsigned int log; + + static char *kwd_list[] = { "dom", "log", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list, + &dom, &log) ) + return NULL; + + if ( xc_alloc_real_mode_area(self->xc_handle, dom, log) ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_prose_build(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t dom; + char *image, *ramdisk = NULL, *cmdline = "", *features = NULL; + int flags = 0; + int store_evtchn, console_evtchn; + unsigned long store_mfn = 0; + unsigned long console_mfn = 0; + void *arch_args = NULL; + int unused; + + static char *kwd_list[] = { "dom", "store_evtchn", + "console_evtchn", "image", + /* optional */ + "ramdisk", "cmdline", "flags", + "features", "arch_args", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssiss#", kwd_list, + &dom, &store_evtchn, + &console_evtchn, &image, + /* optional */ + &ramdisk, &cmdline, &flags, + &features, &arch_args, &unused) ) + return NULL; + + if ( xc_prose_build(self->xc_handle, dom, image, + ramdisk, cmdline, features, flags, + store_evtchn, &store_mfn, + console_evtchn, &console_mfn, + arch_args) != 0 ) { + if (!errno) + errno = EINVAL; + return PyErr_SetFromErrno(xc_error); + } + return Py_BuildValue("{s:i,s:i}", + "store_mfn", store_mfn, + "console_mfn", console_mfn); +} +#endif /* powerpc */ static PyMethodDef pyxc_methods[] = { { "handle", @@ -1224,6 +1286,27 @@ static PyMethodDef pyxc_methods[] = { "Set a domain's time offset to Dom0's localtime\n" " dom [int]: Domain whose time offset is being set.\n" "Returns: [int] 0 on success; -1 on error.\n" }, + +#ifdef __powerpc__ + { "arch_alloc_real_mode_area", + (PyCFunction)pyxc_alloc_real_mode_area, + METH_VARARGS | METH_KEYWORDS, "\n" + "Allocate a domain's real mode area.\n" + " dom [int]: Identifier of domain.\n" + " log [int]: Specifies the area's size.\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "arch_prose_build", + (PyCFunction)pyxc_prose_build, + METH_VARARGS | METH_KEYWORDS, "\n" + "Build a new Linux guest OS.\n" + " dom [int]: Identifier of domain to build into.\n" + " image [str]: Name of kernel image file. May be gzipped.\n" + " ramdisk [str, n/a]: Name of ramdisk file, if any.\n" + " cmdline [str, n/a]: Kernel parameters, if any.\n\n" + " vcpus [int, 1]: Number of Virtual CPUS in domain.\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, +#endif /* __powerpc */ { NULL, NULL, 0, NULL } }; diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/FlatDeviceTree.py --- a/tools/python/xen/xend/FlatDeviceTree.py Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/xend/FlatDeviceTree.py Thu Dec 14 08:57:36 2006 -0700 @@ -22,6 +22,10 @@ import struct import struct import stat import re +import glob +import math + +_host_devtree_root = '/proc/device-tree' _OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning _OF_DT_BEGIN_NODE = 0x1 @@ -33,8 +37,10 @@ def _bincat(seq, separator=''): '''Concatenate the contents of seq into a bytestream.''' strs = [] for item in seq: - if type(item) == type(0): + if isinstance(item, int): strs.append(struct.pack(">I", item)) + elif isinstance(item, long): + strs.append(struct.pack(">Q", item)) else: try: strs.append(item.to_bin()) @@ -231,37 +237,50 @@ class Tree(_Node): header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8) return _pad(header.to_bin(), 8) + payload -_host_devtree_root = '/proc/device-tree' -def _getprop(propname): - '''Extract a property from the system's device tree.''' - f = file(os.path.join(_host_devtree_root, propname), 'r') +def _readfile(fullpath): + '''Return full contents of a file.''' + f = file(fullpath, 'r') data = f.read() f.close() return data +def _find_first_cpu(dirpath): + '''Find the first node of type 'cpu' in a directory tree.''' + cpulist = glob.glob(os.path.join(dirpath, 'cpus', '*')) + for node in cpulist: + try: + data = _readfile(os.path.join(node, 'device_type')) + except IOError: + continue + if 'cpu' in data: + return node + raise IOError("couldn't find any CPU nodes under " + dirpath) + def _copynode(node, dirpath, propfilter): - '''Extract all properties from a node in the system's device tree.''' + '''Copy all properties and children nodes from a directory tree.''' dirents = os.listdir(dirpath) for dirent in dirents: fullpath = os.path.join(dirpath, dirent) st = os.lstat(fullpath) if stat.S_ISDIR(st.st_mode): child = node.addnode(dirent) - _copytree(child, fullpath, propfilter) + _copynode(child, fullpath, propfilter) elif stat.S_ISREG(st.st_mode) and propfilter(fullpath): - node.addprop(dirent, _getprop(fullpath)) - -def _copytree(node, dirpath, propfilter): - path = os.path.join(_host_devtree_root, dirpath) - _copynode(node, path, propfilter) + node.addprop(dirent, _readfile(fullpath)) def build(imghandler): '''Construct a device tree by combining the domain's configuration and the host's device tree.''' root = Tree() - # 4 pages: start_info, console, store, shared_info + # 1st reseravtion entry used for start_info, console, store, shared_info root.reserve(0x3ffc000, 0x4000) + + # 2nd reservation enrty used for initrd, later on when we load the + # initrd we may fill this in with zeroes which signifies the end + # of the reservation map. So as to avoid adding a zero map now we + # put some bogus yet sensible numbers here. + root.reserve(0x1000000, 0x1000) root.addprop('device_type', 'chrp-but-not-really\0') root.addprop('#size-cells', 2) @@ -270,35 +289,52 @@ def build(imghandler): root.addprop('compatible', 'Momentum,Maple\0') xen = root.addnode('xen') - xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000) + xen.addprop('start-info', long(0x3ffc000), long(0x1000)) xen.addprop('version', 'Xen-3.0-unstable\0') - xen.addprop('reg', 0, imghandler.vm.domid, 0, 0) + xen.addprop('reg', long(imghandler.vm.domid), long(0)) xen.addprop('domain-name', imghandler.vm.getName() + '\0') xencons = xen.addnode('console') xencons.addprop('interrupts', 1, 0) - # XXX split out RMA node - mem = root.addnode('memory@0') + # add memory nodes totalmem = imghandler.vm.getMemoryTarget() * 1024 - mem.addprop('reg', 0, 0, 0, totalmem) - mem.addprop('device_type', 'memory\0') - + rma_log = 26 ### imghandler.vm.info.get('powerpc_rma_log') + rma_bytes = 1 << rma_log + + # RMA node + rma = root.addnode('memory@0') + rma.addprop('reg', long(0), long(rma_bytes)) + rma.addprop('device_type', 'memory\0') + + # all the rest in a single node + remaining = totalmem - rma_bytes + if remaining > 0: + mem = root.addnode('memory@1') + mem.addprop('reg', long(rma_bytes), long(remaining)) + mem.addprop('device_type', 'memory\0') + + # add CPU nodes cpus = root.addnode('cpus') cpus.addprop('smp-enabled') cpus.addprop('#size-cells', 0) cpus.addprop('#address-cells', 1) # Copy all properties the system firmware gave us, except for 'linux,' - # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are - # identical... + # properties, from the first CPU node in the device tree. Do this once for + # every vcpu. Hopefully all cpus are identical... cpu0 = None + cpu0path = _find_first_cpu(_host_devtree_root) def _nolinuxprops(fullpath): return not os.path.basename(fullpath).startswith('linux,') for i in range(imghandler.vm.getVCpuCount()): - cpu = cpus.addnode('PowerPC,970@0') - _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops) - # and then overwrite what we need to - pft_size = imghandler.vm.info.get('pft-size', 0x14) + # create new node and copy all properties + cpu = cpus.addnode('PowerPC,970@%d' % i) + _copynode(cpu, cpu0path, _nolinuxprops) + + # overwrite what we need to + shadow_mb = imghandler.vm.info.get('shadow_memory', 1) + shadow_mb_log = int(math.log(shadow_mb, 2)) + pft_size = shadow_mb_log + 20 cpu.setprop('ibm,pft-size', 0, pft_size) # set default CPU @@ -307,13 +343,13 @@ def build(imghandler): chosen = root.addnode('chosen') chosen.addprop('cpu', cpu0.get_phandle()) - chosen.addprop('memory', mem.get_phandle()) + chosen.addprop('memory', rma.get_phandle()) chosen.addprop('linux,stdout-path', '/xen/console\0') chosen.addprop('interrupt-controller', xen.get_phandle()) chosen.addprop('bootargs', imghandler.cmdline + '\0') # xc_linux_load.c will overwrite these 64-bit properties later - chosen.addprop('linux,initrd-start', 0, 0) - chosen.addprop('linux,initrd-end', 0, 0) + chosen.addprop('linux,initrd-start', long(0)) + chosen.addprop('linux,initrd-end', long(0)) if 1: f = file('/tmp/domU.dtb', 'w') diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/xend/XendDomain.py Thu Dec 14 08:57:36 2006 -0700 @@ -591,7 +591,9 @@ class XendDomain: try: self.domains_lock.acquire() result = [d.get_uuid() for d in self.domains.values()] - result += self.managed_domains.keys() + for d in self.managed_domains.keys(): + if d not in result: + result.append(d) return result finally: self.domains_lock.release() diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Dec 14 08:57:36 2006 -0700 @@ -167,7 +167,7 @@ def recreate(info, priv): @param xeninfo: Parsed configuration @type xeninfo: Dictionary - @param priv: TODO, unknown, something to do with memory + @param priv: Is a privileged domain (Dom 0) @type priv: bool @rtype: XendDomainInfo @@ -381,7 +381,7 @@ class XendDomainInfo: @type dompath: string @keyword augment: Augment given info with xenstored VM info @type augment: bool - @keyword priv: Is a privledged domain (Dom 0) (TODO: really?) + @keyword priv: Is a privileged domain (Dom 0) @type priv: bool @keyword resume: Is this domain being resumed? @type resume: bool @@ -563,7 +563,7 @@ class XendDomainInfo: for devclass in XendDevices.valid_devices(): self.getDeviceController(devclass).waitForDevices() - def destroyDevice(self, deviceClass, devid): + def destroyDevice(self, deviceClass, devid, force=None): try: devid = int(devid) except ValueError: @@ -578,7 +578,7 @@ class XendDomainInfo: devid = entry break - return self.getDeviceController(deviceClass).destroyDevice(devid) + return self.getDeviceController(deviceClass).destroyDevice(devid, force) @@ -647,6 +647,8 @@ class XendDomainInfo: if priv: augment_entries.remove('memory') augment_entries.remove('maxmem') + augment_entries.remove('vcpus') + augment_entries.remove('vcpu_avail') vm_config = self._readVMDetails([(k, XendConfig.LEGACY_CFG_TYPES[k]) for k in augment_entries]) @@ -663,6 +665,14 @@ class XendDomainInfo: self.info[xapiarg] = val else: self.info[arg] = val + + # For dom0, we ignore any stored value for the vcpus fields, and + # read the current value from Xen instead. This allows boot-time + # settings to take precedence over any entries in the store. + if priv: + xeninfo = dom_get(self.domid) + self.info['vcpus_number'] = xeninfo['online_vcpus'] + self.info['vcpu_avail'] = (1 << xeninfo['online_vcpus']) - 1 # read image value image_sxp = self._readVm('image') @@ -895,6 +905,10 @@ class XendDomainInfo: def getMemoryTarget(self): """Get this domain's target memory size, in KB.""" return self.info['memory_static_min'] * 1024 + + def getMemoryMaximum(self): + """Get this domain's maximum memory size, in KB.""" + return self.info['memory_static_max'] * 1024 def getResume(self): return str(self._resume) @@ -1363,9 +1377,9 @@ class XendDomainInfo: # Use architecture- and image-specific calculations to determine # the various headrooms necessary, given the raw configured # values. maxmem, memory, and shadow are all in KiB. + memory = self.image.getRequiredAvailableMemory( + self.info['memory_static_min'] * 1024) maxmem = self.image.getRequiredAvailableMemory( - self.info['memory_static_min'] * 1024) - memory = self.image.getRequiredAvailableMemory( self.info['memory_static_max'] * 1024) shadow = self.image.getRequiredShadowMemory( self.info['shadow_memory'] * 1024, @@ -1727,7 +1741,7 @@ class XendDomainInfo: raise VmError("VM name '%s' already exists%s" % (name, dom.domid is not None and - ("as domain %s" % str(dom.domid)) or "")) + (" as domain %s" % str(dom.domid)) or "")) def update(self, info = None, refresh = True): @@ -2031,7 +2045,7 @@ class XendDomainInfo: if not dev_uuid: raise XendError('Failed to create device') - if self.state in (DOM_STATE_HALTED,): + if self.state in (XEN_API_VM_POWER_STATE_RUNNING,): sxpr = self.info.device_sxpr(dev_uuid) devid = self.getDeviceController('vif').createDevice(sxpr) raise XendError("Device creation failed") diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/xend/image.py Thu Dec 14 08:57:36 2006 -0700 @@ -145,6 +145,14 @@ class ImageHandler: add headroom where necessary.""" return self.getRequiredAvailableMemory(self.vm.getMemoryTarget()) + def getRequiredMaximumReservation(self): + """@param mem_kb The maximum possible memory, in KiB. + @return The corresponding required amount of memory to be free, also + in KiB. This is normally the same as getRequiredAvailableMemory, but + architecture- or image-specific code may override this to + add headroom where necessary.""" + return self.getRequiredAvailableMemory(self.vm.getMemoryMaximum()) + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): """@param shadow_mem_kb The configured shadow memory, in KiB. @param maxmem_kb The configured maxmem, in KiB. @@ -234,6 +242,60 @@ class PPC_LinuxImageHandler(LinuxImageHa ramdisk = self.ramdisk, features = self.vm.getFeatures(), arch_args = devtree.to_bin()) + + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): + """@param shadow_mem_kb The configured shadow memory, in KiB. + @param maxmem_kb The configured maxmem, in KiB. + @return The corresponding required amount of shadow memory, also in + KiB. + PowerPC currently uses "shadow memory" to refer to the hash table.""" + return max(maxmem_kb / 64, shadow_mem_kb) + + +class PPC_ProseImageHandler(LinuxImageHandler): + + ostype = "prose" + + def configure(self, imageConfig, deviceConfig): + LinuxImageHandler.configure(self, imageConfig, deviceConfig) + self.imageConfig = imageConfig + + def buildDomain(self): + store_evtchn = self.vm.getStorePort() + console_evtchn = self.vm.getConsolePort() + + mem_mb = self.getRequiredInitialReservation() / 1024 + + log.debug("dom = %d", self.vm.getDomid()) + log.debug("memsize = %d", mem_mb) + log.debug("image = %s", self.kernel) + log.debug("store_evtchn = %d", store_evtchn) + log.debug("console_evtchn = %d", console_evtchn) + log.debug("cmdline = %s", self.cmdline) + log.debug("ramdisk = %s", self.ramdisk) + log.debug("vcpus = %d", self.vm.getVCpuCount()) + log.debug("features = %s", self.vm.getFeatures()) + + devtree = FlatDeviceTree.build(self) + + return xc.arch_prose_build(dom = self.vm.getDomid(), + memsize = mem_mb, + image = self.kernel, + store_evtchn = store_evtchn, + console_evtchn = console_evtchn, + cmdline = self.cmdline, + ramdisk = self.ramdisk, + features = self.vm.getFeatures(), + arch_args = devtree.to_bin()) + + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): + """@param shadow_mem_kb The configured shadow memory, in KiB. + @param maxmem_kb The configured maxmem, in KiB. + @return The corresponding required amount of shadow memory, also in + KiB. + PowerPC currently uses "shadow memory" to refer to the hash table.""" + return max(maxmem_kb / 64, shadow_mem_kb) + class HVMImageHandler(ImageHandler): @@ -539,6 +601,9 @@ class X86_HVM_ImageHandler(HVMImageHandl def getRequiredInitialReservation(self): return self.vm.getMemoryTarget() + def getRequiredMaximumReservation(self): + return self.vm.getMemoryMaximum() + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): # 256 pages (1MB) per vcpu, # plus 1 page per MiB of RAM for the P2M map, @@ -553,13 +618,14 @@ class X86_Linux_ImageHandler(LinuxImageH def buildDomain(self): # set physical mapping limit # add an 8MB slack to balance backend allocations. - mem_kb = self.getRequiredInitialReservation() + (8 * 1024) + mem_kb = self.getRequiredMaximumReservation() + (8 * 1024) xc.domain_set_memmap_limit(self.vm.getDomid(), mem_kb) return LinuxImageHandler.buildDomain(self) _handlers = { "powerpc": { "linux": PPC_LinuxImageHandler, + "prose": PPC_ProseImageHandler, }, "ia64": { "linux": LinuxImageHandler, diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/xend/server/DevController.py Thu Dec 14 08:57:36 2006 -0700 @@ -19,12 +19,14 @@ from threading import Event from threading import Event import types -from xen.xend import sxp +from xen.xend import sxp, XendRoot from xen.xend.XendError import VmError from xen.xend.XendLogging import log from xen.xend.xenstore.xstransact import xstransact, complete from xen.xend.xenstore.xswatch import xswatch + +import os DEVICE_CREATE_TIMEOUT = 100 HOTPLUG_STATUS_NODE = "hotplug-status" @@ -47,6 +49,8 @@ xenbusState = { 'Closing' : 5, 'Closed' : 6, } + +xroot = XendRoot.instance() xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys()))) @@ -191,7 +195,7 @@ class DevController: raise VmError('%s devices may not be reconfigured' % self.deviceClass) - def destroyDevice(self, devid): + def destroyDevice(self, devid, force): """Destroy the specified device. @param devid The device ID, or something device-specific from which @@ -211,6 +215,13 @@ class DevController: # drivers, so this ordering avoids a race). self.writeBackend(devid, 'online', "0") self.writeBackend(devid, 'state', str(xenbusState['Closing'])) + + if force: + frontpath = self.frontendPath(devid) + backpath = xstransact.Read(frontpath, "backend") + if backpath: + xstransact.Remove(backpath) + xstransact.Remove(frontpath) def configurations(self): @@ -313,6 +324,16 @@ class DevController: Make sure that the migration has finished and only then return from the call. """ + tool = xroot.get_external_migration_tool() + if tool: + log.info("Calling external migration tool for step %d" % step) + fd = os.popen("%s -type %s -step %d -host %s -domname %s" % + (tool, self.deviceClass, step, dst, domName)) + for line in fd: + log.info(line.rstrip()) + rc = fd.close() + if rc: + raise VmError('Migration tool returned %d' % (rc >> 8)) return 0 @@ -320,6 +341,16 @@ class DevController: """ Recover from device migration. The given step was the last one that was successfully executed. """ + tool = xroot.get_external_migration_tool() + if tool: + log.info("Calling external migration tool") + fd = os.popen("%s -type %s -step %d -host %s -domname %s -recover" % + (tool, self.deviceClass, step, dst, domName)) + for line in fd: + log.info(line.rstrip()) + rc = fd.close() + if rc: + raise VmError('Migration tool returned %d' % (rc >> 8)) return 0 diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/xend/server/blkif.py Thu Dec 14 08:57:36 2006 -0700 @@ -133,7 +133,7 @@ class BlkifController(DevController): return config - def destroyDevice(self, devid): + def destroyDevice(self, devid, force): """@see DevController.destroyDevice""" # If we are given a device name, then look up the device ID from it, @@ -142,13 +142,13 @@ class BlkifController(DevController): # superclass's method. try: - DevController.destroyDevice(self, int(devid)) + DevController.destroyDevice(self, int(devid), force) except ValueError: devid_end = type(devid) is str and devid.split('/')[-1] or None for i in self.deviceIDs(): d = self.readBackend(i, 'dev') if d == devid or (devid_end and d == devid_end): - DevController.destroyDevice(self, i) + DevController.destroyDevice(self, i, force) return raise VmError("Device %s not connected" % devid) diff -r ed56ef3e9716 -r 4762d73ced42 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/python/xen/xm/main.py Thu Dec 14 08:57:36 2006 -0700 @@ -142,14 +142,14 @@ SUBCOMMAND_HELP = { 'Create a new virtual block device.'), 'block-configure': ('<Domain> <BackDev> <FrontDev> <Mode> [BackDomain]', 'Change block device configuration'), - 'block-detach' : ('<Domain> <DevId>', + 'block-detach' : ('<Domain> <DevId> [-f|--force]', 'Destroy a domain\'s virtual block device.'), 'block-list' : ('<Domain> [--long]', 'List virtual block devices for a domain.'), 'network-attach': ('<Domain> [--script=<script>] [--ip=<ip>] ' '[--mac=<mac>]', 'Create a new virtual network device.'), - 'network-detach': ('<Domain> <DevId>', + 'network-detach': ('<Domain> <DevId> [-f|--force]', 'Destroy a domain\'s virtual network device.'), 'network-list' : ('<Domain> [--long]', 'List virtual network interfaces for a domain.'), @@ -1493,16 +1493,24 @@ def xm_network_attach(args): def detach(args, command, deviceClass): - arg_check(args, command, 2) + arg_check(args, command, 2, 3) dom = args[0] dev = args[1] - - server.xend.domain.destroyDevice(dom, deviceClass, dev) + try: + force = args[2] + if (force != "--force") and (force != "-f"): + print "Ignoring option %s"%(force) + force = None + except IndexError: + force = None + + server.xend.domain.destroyDevice(dom, deviceClass, dev, force) def xm_block_detach(args): detach(args, 'block-detach', 'vbd') + detach(args, 'block-detach', 'tap') def xm_network_detach(args): diff -r ed56ef3e9716 -r 4762d73ced42 tools/xenstore/xenstored_domain.c --- a/tools/xenstore/xenstored_domain.c Thu Dec 14 08:54:54 2006 -0700 +++ b/tools/xenstore/xenstored_domain.c Thu Dec 14 08:57:36 2006 -0700 @@ -459,6 +459,8 @@ static int dom0_init(void) return -1; dom0 = new_domain(NULL, 0, port); + if (dom0 == NULL) + return -1; dom0->interface = xenbus_map(); if (dom0->interface == NULL) diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/Makefile --- a/xen/arch/powerpc/Makefile Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/Makefile Thu Dec 14 08:57:36 2006 -0700 @@ -9,10 +9,10 @@ obj-y += backtrace.o obj-y += backtrace.o obj-y += bitops.o obj-y += boot_of.o +obj-y += cmdline.o obj-y += dart.o obj-y += dart_u3.o obj-y += dart_u4.o -obj-y += delay.o obj-y += domctl.o obj-y += domain_build.o obj-y += domain.o @@ -22,11 +22,12 @@ obj-y += hcalls.o obj-y += hcalls.o obj-y += iommu.o obj-y += irq.o -obj-y += mambo.o +obj-y += systemsim.o obj-y += memory.o obj-y += mm.o obj-y += mpic.o obj-y += mpic_init.o +obj-y += numa.o obj-y += of-devtree.o obj-y += of-devwalk.o obj-y += ofd_fixup.o @@ -36,6 +37,7 @@ obj-y += setup.o obj-y += setup.o obj-y += shadow.o obj-y += smp.o +obj-y += smpboot.o obj-y += smp-tbsync.o obj-y += sysctl.o obj-y += time.o @@ -57,11 +59,6 @@ PPC_C_WARNINGS += -Wshadow PPC_C_WARNINGS += -Wshadow CFLAGS += $(PPC_C_WARNINGS) -LINK=0x400000 -boot32_link_base = $(LINK) -xen_link_offset = 100 -xen_link_base = $(patsubst %000,%$(xen_link_offset),$(LINK)) - # # The following flags are fed to gcc in order to link several # objects into a single ELF segment and to not link in any additional @@ -72,34 +69,39 @@ firmware: of_handler/built_in.o $(TARGET firmware: of_handler/built_in.o $(TARGET_SUBARCH)/memcpy.o of-devtree.o $(CC) $(CFLAGS) $(OMAGIC) -e __ofh_start -Wl,-Ttext,0x0 $^ -o $@ -firmware_image: firmware +firmware_image.bin: firmware $(CROSS_COMPILE)objcopy --output-target=binary $< $@ - -firmware_image.o: firmware_image - $(CROSS_COMPILE)objcopy --input-target=binary \ - --output-target=elf64-powerpc \ - --binary-architecture=powerpc \ - --redefine-sym _binary_$<_start=$(@:%.o=%)_start \ - --redefine-sym _binary_$<_end=$(@:%.o=%)_end \ - --redefine-sym _binary_$<_size=$(@:%.o=%)_size $< $@ # # Hacks for included C files # irq.o: ../x86/irq.c physdev.o: ../x86/physdev.c +numa.o: ../x86/numa.c HDRS += $(wildcard *.h) +ifneq ($(CMDLINE),) # The first token in the arguments will be silently dropped. -IMAGENAME = xen -CMDLINE = "" -boot_of.o: CFLAGS += -DCMDLINE="\"$(IMAGENAME) $(CMDLINE)\"" +FULL_CMDLINE := xen $(CMDLINE) +endif -start.o: boot/start.S - $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@ +ifeq ($(wildcard cmdline.dep),) +cmdline.dep: + echo $(FULL_CMDLINE) > cmdline.dep +else +ifneq ($(FULL_CMDLINE),$(shell cat cmdline.dep)) +cmdline.dep:: + echo $(FULL_CMDLINE) > cmdline.dep +else +cmdline.dep: +endif +endif -TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds +cmdline.o: cmdline.dep +cmdline.o: CFLAGS += -DCMDLINE="\"$(FULL_CMDLINE)\"" + +TARGET_OPTS = $(OMAGIC) -Wl,-T,xen.lds TARGET_OPTS += start.o $(ALL_OBJS) .xen-syms: start.o $(ALL_OBJS) xen.lds @@ -122,22 +124,10 @@ xen-syms.o: xen-syms.S $(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds $(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@ -$(TARGET).bin: $(TARGET)-syms - $(CROSS_COMPILE)objcopy --output-target=binary $< $@ - -$(TARGET).bin.o: $(TARGET).bin - $(CROSS_COMPILE)objcopy --input-target=binary \ - --output-target=elf32-powerpc \ - --binary-architecture=powerpc $< $@ - -boot32.o: boot/boot32.S - $(CC) -m32 -Wa,-a32,-mppc64bridge \ - -D__ASSEMBLY__ -D__BRIDGE64__ $(CFLAGS) -c $< -o $@ - -$(TARGET): boot32.o $(TARGET).bin.o - $(CC) -m32 -N -Wl,-melf32ppclinux -static -nostdlib \ - -Wl,-Ttext,$(boot32_link_base) -Wl,-Tdata,$(xen_link_base) \ - $(CFLAGS) $^ -o $@ +# our firmware only loads 32-bit ELF files +OCPYFLAGS := --input-target=elf64-powerpc --output-target=elf32-powerpc +$(TARGET): $(TARGET)-syms + $(CROSS_COMPILE)objcopy $(OCPYFLAGS) $^ $@ asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS) $(CC) $(CFLAGS) -S -o $@ $< @@ -150,4 +140,5 @@ dom0.bin: $(DOM0_IMAGE) clean:: $(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean - rm -f firmware firmware_image dom0.bin .xen-syms + rm -f firmware firmware_image.bin dom0.bin .xen-syms xen-syms.S \ + xen.lds asm-offsets.s cmdline.dep diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/backtrace.c --- a/xen/arch/powerpc/backtrace.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/backtrace.c Thu Dec 14 08:57:36 2006 -0700 @@ -14,6 +14,7 @@ #include <xen/console.h> #include <xen/sched.h> #include <xen/symbols.h> +#include <asm/debugger.h> static char namebuf[KSYM_NAME_LEN+1]; @@ -192,6 +193,19 @@ void show_backtrace(ulong sp, ulong lr, console_end_sync(); } +void show_backtrace_regs(struct cpu_user_regs *regs) +{ + console_start_sync(); + + show_registers(regs); + printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr()); + printk("hid4 0x%016lx\n", regs->hid4); + printk("---[ backtrace ]---\n"); + show_backtrace(regs->gprs[1], regs->lr, regs->pc); + + console_end_sync(); +} + void __warn(char *file, int line) { ulong sp; @@ -202,9 +216,19 @@ void __warn(char *file, int line) sp = (ulong)__builtin_frame_address(0); lr = (ulong)__builtin_return_address(0); - backtrace(sp, lr, lr); - console_end_sync(); -} - - + + console_end_sync(); +} + +void dump_execution_state(void) +{ + struct vcpu *v = current; + struct cpu_user_regs *regs = &v->arch.ctxt; + + show_registers(regs); + if (regs->msr & MSR_HV) { + printk("In Xen:\n"); + show_backtrace(regs->gprs[1], regs->pc, regs->lr); + } +} diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/bitops.c --- a/xen/arch/powerpc/bitops.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/bitops.c Thu Dec 14 08:57:36 2006 -0700 @@ -12,42 +12,42 @@ * @size: The maximum size to search */ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) + unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ found_middle: - return result + __ffs(tmp); + return result + __ffs(tmp); } /* @@ -55,40 +55,40 @@ found_middle: * Linus' asm-alpha/bitops.h. */ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) + unsigned long offset) { - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (BITS_PER_LONG - offset); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ found_middle: - return result + ffz(tmp); + return result + ffz(tmp); } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/boot_of.c --- a/xen/arch/powerpc/boot_of.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/boot_of.c Thu Dec 14 08:57:36 2006 -0700 @@ -16,6 +16,7 @@ * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> + * Hollis Blanchard <hollisb@xxxxxxxxxx> */ #include <xen/config.h> @@ -32,6 +33,7 @@ #include "exceptions.h" #include "of-devtree.h" #include "oftree.h" +#include "rtas.h" /* Secondary processors use this for handshaking with main processor. */ volatile unsigned int __spin_ack; @@ -39,20 +41,27 @@ static ulong of_vec; static ulong of_vec; static ulong of_msr; static int of_out; -static char bootargs[256]; - -#define COMMAND_LINE_SIZE 512 -static char builtin_cmdline[COMMAND_LINE_SIZE] - __attribute__((section("__builtin_cmdline"))) = CMDLINE; - +static ulong eomem; + +#define MEM_AVAILABLE_PAGES ((32 << 20) >> PAGE_SHIFT) +static DECLARE_BITMAP(mem_available_pages, MEM_AVAILABLE_PAGES); + +extern char builtin_cmdline[]; extern struct ns16550_defaults ns16550; #undef OF_DEBUG +#undef OF_DEBUG_LOW #ifdef OF_DEBUG #define DBG(args...) of_printf(args) #else #define DBG(args...) +#endif + +#ifdef OF_DEBUG_LOW +#define DBG_LOW(args...) of_printf(args) +#else +#define DBG_LOW(args...) #endif #define of_panic(MSG...) \ @@ -68,7 +77,6 @@ static int bof_chosen; static int bof_chosen; static struct of_service s; -extern s32 prom_call(void *arg, ulong rtas_base, ulong func, ulong msr); static int __init of_call( const char *service, u32 nargs, u32 nrets, s32 rets[], ...) @@ -78,7 +86,6 @@ static int __init of_call( if (of_vec != 0) { va_list args; int i; - memset(&s, 0, sizeof (s)); s.ofs_service = (ulong)service; s.ofs_nargs = nargs; @@ -189,7 +196,7 @@ static int __init of_finddevice(const ch DBG("finddevice %s -> FAILURE %d\n",devspec,rets[0]); return OF_FAILURE; } - DBG("finddevice %s -> %d\n",devspec, rets[0]); + DBG_LOW("finddevice %s -> %d\n",devspec, rets[0]); return rets[0]; } @@ -200,11 +207,11 @@ static int __init of_getprop(int ph, con of_call("getprop", 4, 1, rets, ph, name, buf, buflen); if (rets[0] == OF_FAILURE) { - DBG("getprop 0x%x %s -> FAILURE\n", ph, name); + DBG_LOW("getprop 0x%x %s -> FAILURE\n", ph, name); return OF_FAILURE; } - DBG("getprop 0x%x %s -> 0x%x (%s)\n", ph, name, rets[0], (char *)buf); + DBG_LOW("getprop 0x%x %s -> 0x%x (%s)\n", ph, name, rets[0], (char *)buf); return rets[0]; } @@ -220,7 +227,7 @@ static int __init of_setprop( return OF_FAILURE; } - DBG("setprop 0x%x %s -> %s\n", ph, name, (char *)buf); + DBG_LOW("setprop 0x%x %s -> %s\n", ph, name, (char *)buf); return rets[0]; } @@ -232,7 +239,7 @@ static int __init of_getchild(int ph) int rets[1] = { OF_FAILURE }; of_call("child", 1, 1, rets, ph); - DBG("getchild 0x%x -> 0x%x\n", ph, rets[0]); + DBG_LOW("getchild 0x%x -> 0x%x\n", ph, rets[0]); return rets[0]; } @@ -245,7 +252,7 @@ static int __init of_getpeer(int ph) int rets[1] = { OF_FAILURE }; of_call("peer", 1, 1, rets, ph); - DBG("getpeer 0x%x -> 0x%x\n", ph, rets[0]); + DBG_LOW("getpeer 0x%x -> 0x%x\n", ph, rets[0]); return rets[0]; } @@ -259,7 +266,7 @@ static int __init of_getproplen(int ph, DBG("getproplen 0x%x %s -> FAILURE\n", ph, name); return OF_FAILURE; } - DBG("getproplen 0x%x %s -> 0x%x\n", ph, name, rets[0]); + DBG_LOW("getproplen 0x%x %s -> 0x%x\n", ph, name, rets[0]); return rets[0]; } @@ -272,7 +279,7 @@ static int __init of_package_to_path(int DBG("%s 0x%x -> FAILURE\n", __func__, ph); return OF_FAILURE; } - DBG("%s 0x%x %s -> 0x%x\n", __func__, ph, buffer, rets[0]); + DBG_LOW("%s 0x%x %s -> 0x%x\n", __func__, ph, buffer, rets[0]); if (rets[0] <= buflen) buffer[rets[0]] = '\0'; return rets[0]; @@ -289,7 +296,7 @@ static int __init of_nextprop(int ph, co return OF_FAILURE; } - DBG("nextprop 0x%x %s -> %s\n", ph, name, (char *)buf); + DBG_LOW("nextprop 0x%x %s -> %s\n", ph, name, (char *)buf); return rets[0]; } @@ -336,7 +343,7 @@ static int __init of_claim(u32 virt, u32 return OF_FAILURE; } - DBG("%s 0x%08x 0x%08x 0x%08x -> 0x%08x\n", __func__, virt, size, align, + DBG_LOW("%s 0x%08x 0x%08x 0x%08x -> 0x%08x\n", __func__, virt, size, align, rets[0]); return rets[0]; } @@ -358,29 +365,194 @@ static int __init of_getparent(int ph) of_call("parent", 1, 1, rets, ph); - DBG("getparent 0x%x -> 0x%x\n", ph, rets[0]); - return rets[0]; -} - -static void boot_of_probemem(multiboot_info_t *mbi) + DBG_LOW("getparent 0x%x -> 0x%x\n", ph, rets[0]); + return rets[0]; +} + +static int __init of_open(const char *devspec) +{ + int rets[1] = { OF_FAILURE }; + + of_call("open", 1, 1, rets, devspec); + return rets[0]; +} + +static void boot_of_alloc_init(int m, uint addr_cells, uint size_cells) +{ + int rc; + uint pg; + uint a[64]; + int tst; + u64 start; + u64 size; + + rc = of_getprop(m, "available", a, sizeof (a)); + if (rc > 0) { + int l = rc / sizeof(a[0]); + int r = 0; + +#ifdef OF_DEBUG + { + int i; + of_printf("avail:\n"); + for (i = 0; i < l; i += 4) + of_printf(" 0x%x%x, 0x%x%x\n", + a[i], a[i + 1], + a[i + 2] ,a[i + 3]); + } +#endif + + pg = 0; + while (pg < MEM_AVAILABLE_PAGES && r < l) { + ulong end; + + start = a[r++]; + if (addr_cells == 2 && (r < l) ) + start = (start << 32) | a[r++]; + + size = a[r++]; + if (size_cells == 2 && (r < l) ) + size = (size << 32) | a[r++]; + + end = ALIGN_DOWN(start + size, PAGE_SIZE); + + start = ALIGN_UP(start, PAGE_SIZE); + + DBG("%s: marking 0x%x - 0x%lx\n", __func__, + pg << PAGE_SHIFT, start); + + start >>= PAGE_SHIFT; + while (pg < MEM_AVAILABLE_PAGES && pg < start) { + set_bit(pg, mem_available_pages); + pg++; + } + + pg = end >> PAGE_SHIFT; + } + } + + /* Now make sure we mark our own memory */ + pg = (ulong)_start >> PAGE_SHIFT; + start = (ulong)_end >> PAGE_SHIFT; + + DBG("%s: marking 0x%x - 0x%lx\n", __func__, + pg << PAGE_SHIFT, start << PAGE_SHIFT); + + /* Lets try and detect if our image has stepped on something. It + * is possible that FW has already subtracted our image from + * available memory so we must make sure that the previous bits + * are the same for the whole image */ + tst = test_and_set_bit(pg, mem_available_pages); + ++pg; + while (pg <= start) { + if (test_and_set_bit(pg, mem_available_pages) != tst) + of_panic("%s: pg :0x%x of our image is different\n", + __func__, pg); + ++pg; + } + + DBG("%s: marking 0x%x - 0x%x\n", __func__, + 0 << PAGE_SHIFT, 3 << PAGE_SHIFT); + /* First for pages (where the vectors are) should be left alone as well */ + set_bit(0, mem_available_pages); + set_bit(1, mem_available_pages); + set_bit(2, mem_available_pages); + set_bit(3, mem_available_pages); +} + +#ifdef BOOT_OF_FREE +/* this is here in case we ever need a free call at a later date */ +static void boot_of_free(ulong addr, ulong size) +{ + ulong bits; + ulong pos; + ulong i; + + size = ALIGN_UP(size, PAGE_SIZE); + bits = size >> PAGE_SHIFT; + pos = addr >> PAGE_SHIFT; + + for (i = 0; i < bits; i++) { + if (!test_and_clear_bit(pos + i, mem_available_pages)) + of_panic("%s: pg :0x%lx was never allocated\n", + __func__, pos + i); + } +} +#endif + +static ulong boot_of_alloc(ulong size) +{ + ulong bits; + ulong pos; + + if (size == 0) + return 0; + + DBG("%s(0x%lx)\n", __func__, size); + + size = ALIGN_UP(size, PAGE_SIZE); + bits = size >> PAGE_SHIFT; + pos = 0; + for (;;) { + ulong i; + + pos = find_next_zero_bit(mem_available_pages, + MEM_AVAILABLE_PAGES, pos); + DBG("%s: found start bit at: 0x%lx\n", __func__, pos); + + /* found nothing */ + if ((pos + bits) > MEM_AVAILABLE_PAGES) { + of_printf("%s: allocation of size: 0x%lx failed\n", + __func__, size); + return 0; + } + + /* find a set that fits */ + DBG("%s: checking for 0x%lx bits: 0x%lx\n", __func__, bits, pos); + + i = find_next_bit(mem_available_pages, MEM_AVAILABLE_PAGES, pos); + if (i - pos >= bits) { + uint addr = pos << PAGE_SHIFT; + + /* make sure OF is happy with our choice */ + if (of_claim(addr, size, 0) != OF_FAILURE) { + for (i = 0; i < bits; i++) + set_bit(pos + i, mem_available_pages); + + DBG("%s: 0x%lx is good returning 0x%x\n", + __func__, pos, addr); + return addr; + } + /* if OF did not like the address then simply start from + * the next bit */ + i = 1; + } + + pos = pos + i; + } +} + +static ulong boot_of_mem_init(void) { int root; int p; - u32 addr_cells = 1; - u32 size_cells = 1; int rc; - int mcount = 0; - static memory_map_t mmap[16]; + uint addr_cells; + uint size_cells; root = of_finddevice("/"); p = of_getchild(root); /* code is writen to assume sizes of 1 */ - of_getprop(root, "#address-cells", &addr_cells, sizeof (addr_cells)); - of_getprop(root, "#size-cells", &size_cells, sizeof (size_cells)); + of_getprop(root, "#address-cells", &addr_cells, + sizeof (addr_cells)); + of_getprop(root, "#size-cells", &size_cells, + sizeof (size_cells)); DBG("%s: address_cells=%d size_cells=%d\n", __func__, addr_cells, size_cells); - + + /* We do ream memory discovery later, for now we only want to find + * the first LMB */ do { const char memory[] = "memory"; char type[32]; @@ -389,82 +561,69 @@ static void boot_of_probemem(multiboot_i of_getprop(p, "device_type", type, sizeof (type)); if (strncmp(type, memory, sizeof (memory)) == 0) { - u32 reg[48]; - u32 al, ah, ll, lh; + uint reg[48]; + u64 start; + u64 size; int r; + int l; rc = of_getprop(p, "reg", reg, sizeof (reg)); if (rc == OF_FAILURE) { of_panic("no reg property for memory node: 0x%x.\n", p); } - int l = rc/sizeof(u32); /* number reg element */ + + l = rc / sizeof(reg[0]); /* number reg element */ DBG("%s: number of bytes in property 'reg' %d\n", __func__, rc); r = 0; while (r < l) { - al = ah = ll = lh = 0; - if (addr_cells == 2) { - ah = reg[r++]; - if (r >= l) - break; /* partial line. Skip */ - al = reg[r++]; - if (r >= l) - break; /* partial line. Skip */ - } else { - al = reg[r++]; - if (r >= l) - break; /* partial line. Skip */ + start = reg[r++]; + if (addr_cells == 2 && (r < l) ) + start = (start << 32) | reg[r++]; + + if (r >= l) + break; /* partial line. Skip */ + + if (start > 0) { + /* this is not the first LMB so we skip it */ + break; } - if (size_cells == 2) { - lh = reg[r++]; - if (r >= l) - break; /* partial line. Skip */ - ll = reg[r++]; - } else { - ll = reg[r++]; - } - - if ((ll != 0) || (lh != 0)) { - mmap[mcount].size = 20; /* - size field */ - mmap[mcount].type = 1; /* Regular ram */ - mmap[mcount].length_high = lh; - mmap[mcount].length_low = ll; - mmap[mcount].base_addr_high = ah; - mmap[mcount].base_addr_low = al; - of_printf("%s: memory 0x%016lx[0x%08lx]\n", - __func__, - (u64)(((u64)mmap[mcount].base_addr_high << 32) - | mmap[mcount].base_addr_low), - (u64)(((u64)mmap[mcount].length_high << 32) - | mmap[mcount].length_low)); - ++mcount; - } + + size = reg[r++]; + if (size_cells == 2 && (r < l) ) + size = (size << 32) | reg[r++]; + + if (r > l) + break; /* partial line. Skip */ + + boot_of_alloc_init(p, addr_cells, size_cells); + + eomem = size; + return size; } } p = of_getpeer(p); } while (p != OF_FAILURE && p != 0); - if (mcount > 0) { - mbi->flags |= MBI_MEMMAP; - mbi->mmap_length = sizeof (mmap[0]) * mcount; - mbi->mmap_addr = (ulong)mmap; - } + return 0; } static void boot_of_bootargs(multiboot_info_t *mbi) { int rc; - rc = of_getprop(bof_chosen, "bootargs", &bootargs, sizeof (bootargs)); - if (rc == OF_FAILURE || bootargs[0] == '\0') { - strlcpy(bootargs, builtin_cmdline, sizeof(bootargs)); + if (builtin_cmdline[0] == '\0') { + rc = of_getprop(bof_chosen, "bootargs", builtin_cmdline, + CONFIG_CMDLINE_SIZE); + if (rc > CONFIG_CMDLINE_SIZE) + of_panic("bootargs[] not big enough for /chosen/bootargs\n"); } mbi->flags |= MBI_CMDLINE; - mbi->cmdline = (u32)bootargs; - - of_printf("bootargs = %s\n", bootargs); + mbi->cmdline = (ulong)builtin_cmdline; + + of_printf("bootargs = %s\n", builtin_cmdline); } static int save_props(void *m, ofdn_t n, int pkg) @@ -500,7 +659,8 @@ static int save_props(void *m, ofdn_t n, of_panic("obj array not big enough for 0x%x\n", sz); } actual = of_getprop(pkg, name, obj, sz); - if (actual > sz) of_panic("obj too small"); + if (actual > sz) + of_panic("obj too small"); } if (strncmp(name, name_str, sizeof(name_str)) == 0) { @@ -512,7 +672,8 @@ static int save_props(void *m, ofdn_t n, } pos = ofd_prop_add(m, n, name, obj, actual); - if (pos == 0) of_panic("prop_create"); + if (pos == 0) + of_panic("prop_create"); } result = of_nextprop(pkg, name, name); @@ -536,10 +697,12 @@ retry: if (pnext != 0) { sz = of_package_to_path(pnext, path, psz); - if (sz == OF_FAILURE) of_panic("bad path\n"); + if (sz == OF_FAILURE) + of_panic("bad path\n"); nnext = ofd_node_child_create(m, n, path, sz); - if (nnext == 0) of_panic("out of mem\n"); + if (nnext == 0) + of_panic("out of mem\n"); do_pkg(m, nnext, pnext, path, psz); } @@ -551,7 +714,8 @@ retry: sz = of_package_to_path(pnext, path, psz); nnext = ofd_node_peer_create(m, n, path, sz); - if (nnext <= 0) of_panic("out of space in OFD tree.\n"); + if (nnext <= 0) + of_panic("out of space in OFD tree.\n"); n = nnext; p = pnext; @@ -559,7 +723,7 @@ retry: } } -static int pkg_save(void *mem) +static long pkg_save(void *mem) { int root; char path[256]; @@ -570,11 +734,12 @@ static int pkg_save(void *mem) /* get root */ root = of_getpeer(0); - if (root == OF_FAILURE) of_panic("no root package\n"); + if (root == OF_FAILURE) + of_panic("no root package\n"); do_pkg(mem, OFD_ROOT, root, path, sizeof(path)); - r = (((ofdn_t *)mem)[1] + 1) * sizeof (u64); + r = ofd_size(mem); of_printf("%s: saved device tree in 0x%x bytes\n", __func__, r); @@ -604,7 +769,8 @@ static int boot_of_fixup_refs(void *mem) char ofpath[256]; path = ofd_node_path(mem, c); - if (path == NULL) of_panic("no path to found prop: %s\n", name); + if (path == NULL) + of_panic("no path to found prop: %s\n", name); rp = of_finddevice(path); if (rp == OF_FAILURE) @@ -629,13 +795,15 @@ static int boot_of_fixup_refs(void *mem) "ref 0x%x\n", name, path, rp, ref); dp = ofd_node_find(mem, ofpath); - if (dp <= 0) of_panic("no ofd node for OF node[0x%x]: %s\n", - ref, ofpath); + if (dp <= 0) + of_panic("no ofd node for OF node[0x%x]: %s\n", + ref, ofpath); ref = dp; upd = ofd_prop_add(mem, c, name, &ref, sizeof(ref)); - if (upd <= 0) of_panic("update failed: %s\n", name); + if (upd <= 0) + of_panic("update failed: %s\n", name); #ifdef DEBUG of_printf("%s: %s/%s -> %s\n", __func__, @@ -658,7 +826,8 @@ static int boot_of_fixup_chosen(void *me char ofpath[256]; ch = of_finddevice("/chosen"); - if (ch == OF_FAILURE) of_panic("/chosen not found\n"); + if (ch == OF_FAILURE) + of_panic("/chosen not found\n"); rc = of_getprop(ch, "cpu", &val, sizeof (val)); @@ -667,16 +836,19 @@ static int boot_of_fixup_chosen(void *me if (rc > 0) { dn = ofd_node_find(mem, ofpath); - if (dn <= 0) of_panic("no node for: %s\n", ofpath); + if (dn <= 0) + of_panic("no node for: %s\n", ofpath); ofd_boot_cpu = dn; val = dn; dn = ofd_node_find(mem, "/chosen"); - if (dn <= 0) of_panic("no /chosen node\n"); + if (dn <= 0) + of_panic("no /chosen node\n"); dc = ofd_prop_add(mem, dn, "cpu", &val, sizeof (val)); - if (dc <= 0) of_panic("could not fix /chosen/cpu\n"); + if (dc <= 0) + of_panic("could not fix /chosen/cpu\n"); rc = 1; } else { of_printf("*** can't find path to booting cpu, " @@ -685,56 +857,6 @@ static int boot_of_fixup_chosen(void *me } } return rc; -} - -static ulong space_base; - -/* - * The following function is necessary because we cannot depend on all - * FW to actually allocate us any space, so we look for it _hoping_ - * that at least is will fail if we try to claim something that - * belongs to FW. This hope does not seem to be true on some version - * of PIBS. - */ -static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi) -{ - memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr); - ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low; - ulong base; - - if (size == 0) - return 0; - - if (align == 0) - of_panic("cannot call %s() with align of 0\n", __func__); - -#ifdef BROKEN_CLAIM_WORKAROUND - { - static int broken_claim; - if (!broken_claim) { - /* just try and claim it to the FW chosen address */ - base = of_claim(0, size, align); - if (base != OF_FAILURE) - return base; - of_printf("%s: Firmware does not allocate memory for you\n", - __func__); - broken_claim = 1; - } - } -#endif - - of_printf("%s base=0x%016lx eomem=0x%016lx size=0x%08x align=0x%x\n", - __func__, space_base, eomem, size, align); - base = ALIGN_UP(space_base, PAGE_SIZE); - - while ((base + size) < rma_size(cpu_default_rma_order_pages())) { - if (of_claim(base, size, 0) != OF_FAILURE) { - space_base = base + size; - return base; - } - base += (PAGE_SIZE > align) ? PAGE_SIZE : align; - } - of_panic("Cannot find memory in the RMA\n"); } /* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges @@ -798,8 +920,10 @@ static int __init boot_of_serial(void *o of_panic("package-to-path failed\n"); rc = of_getprop(p, "device_type", type, sizeof (type)); - if (rc == OF_FAILURE) - of_panic("fetching device type failed\n"); + if (rc == OF_FAILURE) { + of_printf("%s: fetching type of `%s' failed\n", __func__, buf); + continue; + } if (strcmp(type, "serial") != 0) continue; @@ -855,17 +979,104 @@ static int __init boot_of_serial(void *o return 1; } -static void boot_of_module(ulong r3, ulong r4, multiboot_info_t *mbi) -{ - static module_t mods[3]; +static int __init boot_of_rtas(module_t *mod, multiboot_info_t *mbi) +{ + int rtas_node; + int rtas_instance; + uint size = 0; + int res[2]; + int mem; + int ret; + + rtas_node = of_finddevice("/rtas"); + + if (rtas_node <= 0) { + of_printf("No RTAS, Xen has no power control\n"); + return 0; + } + of_getprop(rtas_node, "rtas-size", &size, sizeof (size)); + if (size == 0) { + of_printf("RTAS, has no size\n"); + return 0; + } + + rtas_instance = of_open("/rtas"); + if (rtas_instance == OF_FAILURE) { + of_printf("RTAS, could not open\n"); + return 0; + } + + size = ALIGN_UP(size, PAGE_SIZE); + + mem = boot_of_alloc(size); + if (mem == 0) + of_panic("Could not allocate RTAS tree\n"); + + of_printf("instantiating RTAS at: 0x%x\n", mem); + + ret = of_call("call-method", 3, 2, res, + "instantiate-rtas", rtas_instance, mem); + if (ret == OF_FAILURE) { + of_printf("RTAS, could not open\n"); + return 0; + } + + rtas_entry = res[1]; + rtas_base = mem; + rtas_end = mem + size; + rtas_msr = of_msr; + + mod->mod_start = rtas_base; + mod->mod_end = rtas_end; + return 1; +} + +static void * __init boot_of_devtree(module_t *mod, multiboot_info_t *mbi) +{ void *oft; ulong oft_sz = 48 * PAGE_SIZE; + + /* snapshot the tree */ + oft = (void *)boot_of_alloc(oft_sz); + if (oft == NULL) + of_panic("Could not allocate OFD tree\n"); + + of_printf("creating oftree at: 0x%p\n", oft); + of_test("package-to-path"); + oft = ofd_create(oft, oft_sz); + pkg_save(oft); + + if (ofd_size(oft) > oft_sz) + of_panic("Could not fit all of native devtree\n"); + + boot_of_fixup_refs(oft); + boot_of_fixup_chosen(oft); + + if (ofd_size(oft) > oft_sz) + of_panic("Could not fit all devtree fixups\n"); + + ofd_walk(oft, __func__, OFD_ROOT, /* add_hype_props */ NULL, 2); + + mod->mod_start = (ulong)oft; + mod->mod_end = mod->mod_start + oft_sz; + of_printf("%s: devtree mod @ 0x%016x - 0x%016x\n", __func__, + mod->mod_start, mod->mod_end); + + return oft; +} + +static void * __init boot_of_module(ulong r3, ulong r4, multiboot_info_t *mbi) +{ + static module_t mods[4]; ulong mod0_start; ulong mod0_size; - static const char sepr[] = " -- "; + static const char * sepr[] = {" -- ", " || "}; + int sepr_index; extern char dom0_start[] __attribute__ ((weak)); extern char dom0_size[] __attribute__ ((weak)); - const char *p; + const char *p = NULL; + int mod; + void *oft; if ((r3 > 0) && (r4 > 0)) { /* was it handed to us in registers ? */ @@ -908,57 +1119,50 @@ static void boot_of_module(ulong r3, ulo of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]); } - space_base = (ulong)_end; - mods[0].mod_start = mod0_start; - mods[0].mod_end = mod0_start + mod0_size; - - of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__, - mods[0].mod_start, mods[0].mod_end); - p = strstr((char *)(ulong)mbi->cmdline, sepr); + mod = 0; + mods[mod].mod_start = mod0_start; + mods[mod].mod_end = mod0_start + mod0_size; + + of_printf("%s: dom0 mod @ 0x%016x[0x%x]\n", __func__, + mods[mod].mod_start, mods[mod].mod_end); + + /* look for delimiter: "--" or "||" */ + for (sepr_index = 0; sepr_index < ARRAY_SIZE(sepr); sepr_index++){ + p = strstr((char *)(ulong)mbi->cmdline, sepr[sepr_index]); + if (p != NULL) + break; + } + if (p != NULL) { - p += sizeof (sepr) - 1; - mods[0].string = (u32)(ulong)p; - of_printf("%s: mod[0].string: %s\n", __func__, p); - } - - /* snapshot the tree */ - oft = (void*)find_space(oft_sz, PAGE_SIZE, mbi); - if (oft == 0) - of_panic("Could not allocate OFD tree\n"); - - of_printf("creating oft\n"); - of_test("package-to-path"); - oft = ofd_create(oft, oft_sz); - pkg_save(oft); - - if (ofd_size(oft) > oft_sz) - of_panic("Could not fit all of native devtree\n"); - - boot_of_fixup_refs(oft); - boot_of_fixup_chosen(oft); - - if (ofd_size(oft) > oft_sz) - of_panic("Could not fit all devtree fixups\n"); - - ofd_walk(oft, OFD_ROOT, /* add_hype_props */ NULL, 2); - - mods[1].mod_start = (ulong)oft; - mods[1].mod_end = mods[1].mod_start + oft_sz; - of_printf("%s: mod[1] @ 0x%016x[0x%x]\n", __func__, - mods[1].mod_start, mods[1].mod_end); - + /* Xen proper should never know about the dom0 args. */ + *(char *)p = '\0'; + p += strlen(sepr[sepr_index]); + mods[mod].string = (u32)(ulong)p; + of_printf("%s: dom0 mod string: %s\n", __func__, p); + } + + ++mod; + if (boot_of_rtas(&mods[mod], mbi)) + ++mod; + + oft = boot_of_devtree(&mods[mod], mbi); + if (oft == NULL) + of_panic("%s: boot_of_devtree failed\n", __func__); + + ++mod; mbi->flags |= MBI_MODULES; - mbi->mods_count = 2; + mbi->mods_count = mod; mbi->mods_addr = (u32)mods; - boot_of_serial(oft); + return oft; } static int __init boot_of_cpus(void) { - int cpus_node; - int cpu_node, bootcpu_node, logical; + int cpus_node, cpu_node; + int bootcpu_instance, bootcpu_node; + int logical; int result; s32 cpuid; u32 cpu_clock[2]; @@ -967,9 +1171,13 @@ static int __init boot_of_cpus(void) /* Look up which CPU we are running on right now and get all info * from there */ result = of_getprop(bof_chosen, "cpu", - &bootcpu_node, sizeof (bootcpu_node)); + &bootcpu_instance, sizeof (bootcpu_instance)); if (result == OF_FAILURE) - of_panic("Failed to look up boot cpu\n"); + of_panic("Failed to look up boot cpu instance\n"); + + bootcpu_node = of_instance_to_package(bootcpu_instance); + if (result == OF_FAILURE) + of_panic("Failed to look up boot cpu package\n"); cpu_node = bootcpu_node; @@ -1070,15 +1278,12 @@ static int __init boot_of_cpus(void) return 1; } -static int __init boot_of_rtas(void) -{ - return 1; -} - multiboot_info_t __init *boot_of_init( ulong r3, ulong r4, ulong vec, ulong r6, ulong r7, ulong orig_msr) { static multiboot_info_t mbi; + void *oft; + int r; of_vec = vec; of_msr = orig_msr; @@ -1098,18 +1303,20 @@ multiboot_info_t __init *boot_of_init( r3, r4, vec, r6, r7, orig_msr); if ((vec >= (ulong)_start) && (vec <= (ulong)_end)) { - of_printf("Hmm.. OF[0x%lx] seems to have stepped on our image " - "that ranges: %p .. %p.\n HANG!\n", + of_panic("Hmm.. OF[0x%lx] seems to have stepped on our image " + "that ranges: %p .. %p.\n", vec, _start, _end); } of_printf("%s: _start %p _end %p 0x%lx\n", __func__, _start, _end, r6); boot_of_fix_maple(); - boot_of_probemem(&mbi); + r = boot_of_mem_init(); + if (r == 0) + of_panic("failure to initialize memory allocator"); boot_of_bootargs(&mbi); - boot_of_module(r3, r4, &mbi); + oft = boot_of_module(r3, r4, &mbi); boot_of_cpus(); - boot_of_rtas(); + boot_of_serial(oft); /* end of OF */ of_printf("Quiescing Open Firmware ...\n"); diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/crash.c --- a/xen/arch/powerpc/crash.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/crash.c Thu Dec 14 08:57:36 2006 -0700 @@ -1,5 +1,6 @@ #include <xen/lib.h> /* for printk() used in stub */ #include <xen/types.h> +#include <xen/kexec.h> #include <public/kexec.h> void machine_crash_shutdown(void) diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/dart.c --- a/xen/arch/powerpc/dart.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/dart.c Thu Dec 14 08:57:36 2006 -0700 @@ -60,8 +60,8 @@ union dart_entry { u32 de_word; struct { u32 de_v:1; /* valid */ - u32 de_rp:1; /* read protected*/ - u32 de_wp:1; /* write protected*/ + u32 de_rp:1; /* read protected */ + u32 de_wp:1; /* write protected */ u32 _de_res:5; u32 de_ppn:24; /* 24 bit Physical Page Number * representing address [28:51] */ @@ -98,7 +98,6 @@ static u32 dart_encode(int perm, ulong r if (perm & DART_WRITE) { e.de_bits.de_wp = 0; } - return e.de_word; } @@ -190,10 +189,8 @@ static int find_dart(struct dart_info *d ofdn_t n; char compat[128]; - - if (on_mambo()) { - /* mambo has no dart */ - DBG("%s: Mambo does not support a dart\n", __func__); + if (on_systemsim()) { + DBG("%s: systemsim does not support a dart\n", __func__); return -1; } @@ -263,7 +260,7 @@ static int init_dart(void) /* Linux uses a dummy page, filling "empty" DART entries with a reference to this page to capture stray DMA's */ - dummy_page = (ulong)alloc_xenheap_pages(1); + dummy_page = (ulong)alloc_xenheap_pages(0); clear_page((void *)dummy_page); dummy_page >>= PAGE_SHIFT; diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/dart_u4.c --- a/xen/arch/powerpc/dart_u4.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/dart_u4.c Thu Dec 14 08:57:36 2006 -0700 @@ -19,6 +19,7 @@ */ #undef DEBUG +#define INVALIDATE_ALL #include <xen/config.h> #include <xen/types.h> @@ -123,8 +124,13 @@ static void u4_inv_all(void) static void u4_inv_entry(ulong pgn) { +#ifdef INVALIDATE_ALL + return u4_inv_all(); +#else union dart_ctl dc; ulong retries = 0; + + return u4_inv_all(); dc.dc_word = in_32(&dart->d_dartcntl.dc_word); dc.dc_bits.dc_ilpn = pgn; @@ -139,6 +145,7 @@ static void u4_inv_entry(ulong pgn) if (retries > 1000000) panic("WAY! too long\n"); } while (dc.dc_bits.dc_ione != 0); +#endif } static struct dart_ops u4_ops = { diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domain.c --- a/xen/arch/powerpc/domain.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/domain.c Thu Dec 14 08:57:36 2006 -0700 @@ -33,6 +33,8 @@ #include <asm/htab.h> #include <asm/current.h> #include <asm/hcalls.h> +#include "rtas.h" +#include "exceptions.h" #define next_arg(fmt, args) ({ \ unsigned long __arg; \ @@ -46,7 +48,6 @@ } \ __arg; \ }) -extern void idle_loop(void); unsigned long hypercall_create_continuation(unsigned int op, const char *format, ...) @@ -87,26 +88,44 @@ int arch_domain_create(struct domain *d) INIT_LIST_HEAD(&d->arch.extent_list); + d->arch.foreign_mfn_count = 1024; + d->arch.foreign_mfns = xmalloc_array(uint, d->arch.foreign_mfn_count); + BUG_ON(d->arch.foreign_mfns == NULL); + + memset(d->arch.foreign_mfns, -1, d->arch.foreign_mfn_count * sizeof(uint)); + return 0; } void arch_domain_destroy(struct domain *d) { shadow_teardown(d); -} - + /* shared_info is part of the RMA so no need to release it */ +} + +static void machine_fail(const char *s) +{ + printk("%s failed, manual powercycle required!\n", s); + for (;;) + sleep(); +} void machine_halt(void) { printk("machine_halt called: spinning....\n"); console_start_sync(); - while(1); + printk("%s called\n", __func__); + rtas_halt(); + + machine_fail(__func__); } void machine_restart(char * __unused) { printk("machine_restart called: spinning....\n"); console_start_sync(); - while(1); + printk("%s called\n", __func__); + rtas_reboot(); + machine_fail(__func__); } struct vcpu *alloc_vcpu_struct(void) @@ -222,6 +241,7 @@ void context_switch(struct vcpu *prev, s mtsdr1(next->domain->arch.htab.sdr1); local_flush_tlb(); /* XXX maybe flush_tlb_mask? */ + cpu_flush_icache(); if (is_idle_vcpu(next)) { reset_stack_and_jump(idle_loop); @@ -278,8 +298,10 @@ static void relinquish_memory(struct dom void domain_relinquish_resources(struct domain *d) { + relinquish_memory(d, &d->xenpage_list); relinquish_memory(d, &d->page_list); free_extents(d); + xfree(d->arch.foreign_mfns); return; } @@ -291,7 +313,6 @@ void arch_dump_vcpu_info(struct vcpu *v) { } -extern void sleep(void); static void safe_halt(void) { int cpu = smp_processor_id(); diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domain_build.c --- a/xen/arch/powerpc/domain_build.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/domain_build.c Thu Dec 14 08:57:36 2006 -0700 @@ -178,8 +178,7 @@ int construct_dom0(struct domain *d, shadow_set_allocation(d, opt_dom0_shadow, &preempt); } while (preempt); if (shadow_get_allocation(d) == 0) - panic("shadow allocation failed 0x%x < 0x%x\n", - shadow_get_allocation(d), opt_dom0_shadow); + panic("shadow allocation failed: %dMib\n", opt_dom0_shadow); ASSERT( image_len < rma_sz ); diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/domctl.c --- a/xen/arch/powerpc/domctl.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/domctl.c Thu Dec 14 08:57:36 2006 -0700 @@ -96,14 +96,14 @@ long arch_do_domctl(struct xen_domctl *d case XEN_DOMCTL_real_mode_area: { struct domain *d; - unsigned int log = domctl->u.real_mode_area.log; + unsigned int order = domctl->u.real_mode_area.log - PAGE_SHIFT; ret = -ESRCH; d = find_domain_by_id(domctl->domain); if (d != NULL) { ret = -EINVAL; - if (cpu_rma_valid(log)) - ret = allocate_rma(d, log - PAGE_SHIFT); + if (cpu_rma_valid(order)) + ret = allocate_rma(d, order); put_domain(d); } } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/exceptions.c --- a/xen/arch/powerpc/exceptions.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/exceptions.c Thu Dec 14 08:57:36 2006 -0700 @@ -25,8 +25,10 @@ #include <xen/serial.h> #include <xen/gdbstub.h> #include <xen/console.h> +#include <xen/shutdown.h> #include <asm/time.h> #include <asm/processor.h> +#include <asm/debugger.h> #undef DEBUG @@ -56,25 +58,19 @@ void do_dec(struct cpu_user_regs *regs) void program_exception(struct cpu_user_regs *regs, unsigned long cookie) { + if (cookie == 0x200) { + if (cpu_machinecheck(regs)) + return; + + printk("%s: machine check\n", __func__); + } else { #ifdef CRASH_DEBUG - __trap_to_gdb(regs, cookie); -#else /* CRASH_DEBUG */ - int recover = 0; + if (__trap_to_gdb(regs, cookie) == 0) + return; +#endif /* CRASH_DEBUG */ - console_start_sync(); - - show_registers(regs); - printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr()); - printk("hid4 0x%016lx\n", regs->hid4); - printk("---[ backtrace ]---\n"); - show_backtrace(regs->gprs[1], regs->lr, regs->pc); - - if (cookie == 0x200) - recover = cpu_machinecheck(regs); - - if (!recover) - panic("%s: 0x%lx\n", __func__, cookie); - - console_end_sync(); -#endif /* CRASH_DEBUG */ + printk("%s: type: 0x%lx\n", __func__, cookie); + show_backtrace_regs(regs); + } + machine_halt(); } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/exceptions.h --- a/xen/arch/powerpc/exceptions.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/exceptions.h Thu Dec 14 08:57:36 2006 -0700 @@ -43,13 +43,14 @@ extern void program_exception( struct cpu_user_regs *regs, unsigned long cookie); extern long xen_hvcall_jump(struct cpu_user_regs *regs, ulong address); -extern void *mambo_memset(void *, int, ulong); -extern void *mambo_memcpy(void *, const void *, ulong); + +extern void sleep(void); +extern void idle_loop(void); extern ulong *__hypercall_table[]; extern char exception_vectors[]; extern char exception_vectors_end[]; extern int spin_start[]; -extern int secondary_cpu_init(int cpuid, unsigned long r4); +extern void secondary_cpu_init(int cpuid, unsigned long r4); #endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/external.c --- a/xen/arch/powerpc/external.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/external.c Thu Dec 14 08:57:36 2006 -0700 @@ -82,7 +82,14 @@ void do_external(struct cpu_user_regs *r vec = xen_mpic_get_irq(regs); - if (vec != -1) { + if (irq_desc[vec].status & IRQ_PER_CPU) { + /* x86 do_IRQ does not respect the per cpu flag. */ + irq_desc_t *desc = &irq_desc[vec]; + regs->entry_vector = vec; + desc->handler->ack(vec); + desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs); + desc->handler->end(vec); + } else if (vec != -1) { DBG("EE:0x%lx isrc: %d\n", regs->msr, vec); regs->entry_vector = vec; do_IRQ(regs); @@ -253,3 +260,24 @@ int ioapic_guest_write(unsigned long phy BUG_ON(val != val); return 0; } + +void send_IPI_mask(cpumask_t mask, int vector) +{ + unsigned int cpus; + int const bits = 8 * sizeof(cpus); + + switch(vector) { + case CALL_FUNCTION_VECTOR: + case EVENT_CHECK_VECTOR: + break; + default: + BUG(); + return; + } + + BUG_ON(NR_CPUS > bits); + BUG_ON(fls(mask.bits[0]) > bits); + + cpus = mask.bits[0]; + mpic_send_ipi(vector, cpus); +} diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/gdbstub.c --- a/xen/arch/powerpc/gdbstub.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/gdbstub.c Thu Dec 14 08:57:36 2006 -0700 @@ -25,6 +25,7 @@ #include <asm/msr.h> #include <asm/bitops.h> #include <asm/cache.h> +#include <asm/debugger.h> #include <asm/processor.h> asm(".globl trap_instruction\n" diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/iommu.c --- a/xen/arch/powerpc/iommu.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/iommu.c Thu Dec 14 08:57:36 2006 -0700 @@ -32,6 +32,12 @@ #include "tce.h" #include "iommu.h" +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + struct iommu_funcs { int (*iommu_put)(ulong, union tce); }; @@ -46,17 +52,31 @@ int iommu_put(u32 buid, ulong ioba, unio struct domain *d = v->domain; if (buid < iommu_phbs_num && iommu_phbs[buid].iommu_put != NULL) { - ulong pfn; + ulong gmfn; ulong mfn; int mtype; - pfn = tce.tce_bits.tce_rpn; - mfn = pfn2mfn(d, pfn, &mtype); + gmfn = tce.tce_bits.tce_rpn; + + + mfn = pfn2mfn(d, gmfn, &mtype); if (mfn != INVALID_MFN) { -#ifdef DEBUG - printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__, - ioba, pfn, mfn); -#endif + switch (mtype) { + case PFN_TYPE_RMA: + case PFN_TYPE_LOGICAL: + break; + case PFN_TYPE_FOREIGN: + DBG("%s: assigning to Foriegn page: " + "gmfn: 0x%lx mfn: 0x%lx\n", __func__, gmfn, mfn); + break; + default: + printk("%s: unsupported type[%d]: gmfn: 0x%lx mfn: 0x%lx\n", + __func__, mtype, gmfn, mfn); + return -1; + break; + } + DBG("%s: ioba=0x%lx gmfn=0x%lx mfn=0x%lx\n", __func__, + ioba, gmfn, mfn); tce.tce_bits.tce_rpn = mfn; return iommu_phbs[buid].iommu_put(ioba, tce); } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/machine_kexec.c --- a/xen/arch/powerpc/machine_kexec.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/machine_kexec.c Thu Dec 14 08:57:36 2006 -0700 @@ -1,5 +1,6 @@ #include <xen/lib.h> /* for printk() used in stubs */ #include <xen/types.h> +#include <xen/kexec.h> #include <public/kexec.h> int machine_kexec_load(int type, int slot, xen_kexec_image_t *image) @@ -9,11 +10,6 @@ int machine_kexec_load(int type, int slo } void machine_kexec_unload(int type, int slot, xen_kexec_image_t *image) -{ - printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); -} - -void machine_kexec(xen_kexec_image_t *image) { printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/memory.c --- a/xen/arch/powerpc/memory.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/memory.c Thu Dec 14 08:57:36 2006 -0700 @@ -20,10 +20,31 @@ */ #include <xen/sched.h> #include <xen/mm.h> +#include <xen/numa.h> #include "of-devtree.h" #include "oftree.h" +#include "rtas.h" + +#undef DEBUG +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the + * page_info table and allocation bitmap. + */ +static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB; +integer_param("xenheap_megabytes", opt_xenheap_megabytes); unsigned long xenheap_phys_end; +static uint nr_pages; +static ulong xenheap_size; +static ulong save_start; +static ulong save_end; + struct membuf { ulong start; ulong size; @@ -36,15 +57,20 @@ static ulong free_xenheap(ulong start, u start = ALIGN_UP(start, PAGE_SIZE); end = ALIGN_DOWN(end, PAGE_SIZE); - printk("%s: 0x%lx - 0x%lx\n", __func__, start, end); - - if (oftree <= end && oftree >= start) { - printk("%s: Go around the devtree: 0x%lx - 0x%lx\n", - __func__, oftree, oftree_end); - init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE)); - init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end); + DBG("%s: 0x%lx - 0x%lx\n", __func__, start, end); + + /* need to do this better */ + if (save_start <= end && save_start >= start) { + DBG("%s: Go around the saved area: 0x%lx - 0x%lx\n", + __func__, save_start, save_end); + init_xenheap_pages(start, ALIGN_DOWN(save_start, PAGE_SIZE)); + xenheap_size += ALIGN_DOWN(save_start, PAGE_SIZE) - start; + + init_xenheap_pages(ALIGN_UP(save_end, PAGE_SIZE), end); + xenheap_size += end - ALIGN_UP(save_end, PAGE_SIZE); } else { init_xenheap_pages(start, end); + xenheap_size += end - start; } return ALIGN_UP(end, PAGE_SIZE); @@ -57,8 +83,10 @@ static void set_max_page(struct membuf * for (i = 0; i < entries; i++) { ulong end_page; + printk(" %016lx: %016lx\n", mb[i].start, mb[i].size); + nr_pages += mb[i].size >> PAGE_SHIFT; + end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT; - if (end_page > max_page) max_page = end_page; } @@ -71,11 +99,11 @@ static void heap_init(struct membuf *mb, ulong start_blk; ulong end_blk = 0; - for (i = 0; i < entries; i++) { - start_blk = mb[i].start; - end_blk = start_blk + mb[i].size; - - if (start_blk < xenheap_phys_end) { + for (i = 0; i < entries; i++) { + start_blk = mb[i].start; + end_blk = start_blk + mb[i].size; + + if (start_blk < xenheap_phys_end) { if (xenheap_phys_end > end_blk) { panic("xenheap spans LMB\n"); } @@ -87,7 +115,7 @@ static void heap_init(struct membuf *mb, init_boot_pages(start_blk, end_blk); total_pages += (end_blk - start_blk) >> PAGE_SHIFT; - } + } } static void ofd_walk_mem(void *m, walk_mem_fn fn) @@ -123,7 +151,7 @@ static void setup_xenheap(module_t *mod, for (i = 0; i < mcount; i++) { u32 s; - if(mod[i].mod_end == mod[i].mod_start) + if (mod[i].mod_end == mod[i].mod_start) continue; s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE); @@ -149,19 +177,42 @@ void memory_init(module_t *mod, int mcou void memory_init(module_t *mod, int mcount) { ulong eomem; - ulong heap_start, heap_size; - - printk("Physical RAM map:\n"); + ulong heap_start; + ulong xh_pages; /* lets find out how much memory there is and set max_page */ max_page = 0; + printk("Physical RAM map:\n"); ofd_walk_mem((void *)oftree, set_max_page); eomem = max_page << PAGE_SHIFT; if (eomem == 0){ panic("ofd_walk_mem() failed\n"); } - printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10); + + /* find the portion of memory we need to keep safe */ + save_start = oftree; + save_end = oftree_end; + if (rtas_base) { + if (save_start > rtas_base) + save_start = rtas_base; + if (save_end < rtas_end) + save_end = rtas_end; + } + + /* minimum heap has to reach to the end of all Xen required memory */ + xh_pages = ALIGN_UP(save_end, PAGE_SIZE) >> PAGE_SHIFT; + xh_pages += opt_xenheap_megabytes << (20 - PAGE_SHIFT); + + /* While we are allocating HTABS from The Xen Heap we need it to + * be larger */ + xh_pages += nr_pages >> 5; + + xenheap_phys_end = xh_pages << PAGE_SHIFT; + printk("End of Xen Area: %luMiB (%luKiB)\n", + xenheap_phys_end >> 20, xenheap_phys_end >> 10); + + printk("End of RAM: %luMiB (%luKiB)\n", eomem >> 20, eomem >> 10); /* Architecturally the first 4 pages are exception hendlers, we * will also be copying down some code there */ @@ -185,22 +236,23 @@ void memory_init(module_t *mod, int mcou panic("total_pages > max_page: 0x%lx > 0x%lx\n", total_pages, max_page); - printk("total_pages: 0x%016lx\n", total_pages); + DBG("total_pages: 0x%016lx\n", total_pages); init_frametable(); + + numa_initmem_init(0, max_page); + end_boot_allocator(); /* Add memory between the beginning of the heap and the beginning - * of out text */ + * of our text */ free_xenheap(heap_start, (ulong)_start); - - heap_size = xenheap_phys_end - heap_start; - printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10); - setup_xenheap(mod, mcount); + printk("Xen Heap: %luMiB (%luKiB)\n", + xenheap_size >> 20, xenheap_size >> 10); eomem = avail_domheap_pages(); - printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem, + printk("Dom Heap: %luMiB (%luKiB)\n", (eomem << PAGE_SHIFT) >> 20, (eomem << PAGE_SHIFT) >> 10); } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mm.c --- a/xen/arch/powerpc/mm.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/mm.c Thu Dec 14 08:57:36 2006 -0700 @@ -25,9 +25,9 @@ #include <xen/kernel.h> #include <xen/sched.h> #include <xen/perfc.h> -#include <asm/misc.h> #include <asm/init.h> #include <asm/page.h> +#include <asm/string.h> #ifdef VERBOSE #define MEM_LOG(_f, _a...) \ @@ -42,18 +42,129 @@ unsigned long max_page; unsigned long max_page; unsigned long total_pages; +void __init init_frametable(void) +{ + unsigned long p; + unsigned long nr_pages; + int i; + + nr_pages = PFN_UP(max_page * sizeof(struct page_info)); + + p = alloc_boot_pages(nr_pages, 1); + if (p == 0) + panic("Not enough memory for frame table\n"); + + frame_table = (struct page_info *)(p << PAGE_SHIFT); + for (i = 0; i < nr_pages; i += 1) + clear_page((void *)((p + i) << PAGE_SHIFT)); +} + +void share_xen_page_with_guest( + struct page_info *page, struct domain *d, int readonly) +{ + if ( page_get_owner(page) == d ) + return; + + /* this causes us to leak pages in the Domain and reuslts in + * Zombie domains, I think we are missing a piece, until we find + * it we disable the following code */ + set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); + + spin_lock(&d->page_alloc_lock); + + /* The incremented type count pins as writable or read-only. */ + page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); + page->u.inuse.type_info |= PGT_validated | 1; + + page_set_owner(page, d); + wmb(); /* install valid domain ptr before updating refcnt. */ + ASSERT(page->count_info == 0); + page->count_info |= PGC_allocated | 1; + + if ( unlikely(d->xenheap_pages++ == 0) ) + get_knownalive_domain(d); + list_add_tail(&page->list, &d->xenpage_list); + + spin_unlock(&d->page_alloc_lock); +} + +void share_xen_page_with_privileged_guests( + struct page_info *page, int readonly) +{ + unimplemented(); +} + +static ulong foreign_to_mfn(struct domain *d, ulong pfn) +{ + + pfn -= 1UL << cpu_foreign_map_order(); + + BUG_ON(pfn >= d->arch.foreign_mfn_count); + + return d->arch.foreign_mfns[pfn]; +} + +static int set_foreign(struct domain *d, ulong pfn, ulong mfn) +{ + pfn -= 1UL << cpu_foreign_map_order(); + + BUG_ON(pfn >= d->arch.foreign_mfn_count); + d->arch.foreign_mfns[pfn] = mfn; + + return 0; +} + +static int create_grant_va_mapping( + unsigned long va, unsigned long frame, struct vcpu *v) +{ + if (v->domain->domain_id != 0) { + printk("only Dom0 can map a grant entry\n"); + BUG(); + return GNTST_permission_denied; + } + set_foreign(v->domain, va >> PAGE_SHIFT, frame); + return GNTST_okay; +} + +static int destroy_grant_va_mapping( + unsigned long addr, unsigned long frame, struct domain *d) +{ + if (d->domain_id != 0) { + printk("only Dom0 can map a grant entry\n"); + BUG(); + return GNTST_permission_denied; + } + set_foreign(d, addr >> PAGE_SHIFT, ~0UL); + return GNTST_okay; +} + int create_grant_host_mapping( unsigned long addr, unsigned long frame, unsigned int flags) { - panic("%s called\n", __func__); - return 1; + if (flags & GNTMAP_application_map) { + printk("%s: GNTMAP_application_map not supported\n", __func__); + BUG(); + return GNTST_general_error; + } + if (flags & GNTMAP_contains_pte) { + printk("%s: GNTMAP_contains_pte not supported\n", __func__); + BUG(); + return GNTST_general_error; + } + return create_grant_va_mapping(addr, frame, current); } int destroy_grant_host_mapping( unsigned long addr, unsigned long frame, unsigned int flags) { - panic("%s called\n", __func__); - return 1; + if (flags & GNTMAP_contains_pte) { + printk("%s: GNTMAP_contains_pte not supported\n", __func__); + BUG(); + return GNTST_general_error; + } + + /* may have force the remove here */ + return destroy_grant_va_mapping(addr, frame, current->domain); } int steal_page(struct domain *d, struct page_info *page, unsigned int memflags) @@ -139,7 +250,7 @@ int get_page_type(struct page_info *page { return 0; } - if ( unlikely(!(x & PGT_validated)) ) + else if ( unlikely(!(x & PGT_validated)) ) { /* Someone else is updating validation of this page. Wait... */ while ( (y = page->u.inuse.type_info) == x ) @@ -158,25 +269,6 @@ int get_page_type(struct page_info *page return 1; } -void __init init_frametable(void) -{ - unsigned long p; - unsigned long nr_pages; - int i; - - nr_pages = PFN_UP(max_page * sizeof(struct page_info)); - nr_pages = min(nr_pages, (4UL << (20 - PAGE_SHIFT))); - - - p = alloc_boot_pages(nr_pages, 1); - if (p == 0) - panic("Not enough memory for frame table\n"); - - frame_table = (struct page_info *)(p << PAGE_SHIFT); - for (i = 0; i < nr_pages; i += 1) - clear_page((void *)((p + i) << PAGE_SHIFT)); -} - long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) { printk("%s: no PPC specific memory ops\n", __func__); @@ -185,29 +277,28 @@ long arch_memory_op(int op, XEN_GUEST_HA extern void copy_page(void *dp, void *sp) { - if (on_mambo()) { - extern void *mambo_memcpy(void *,const void *,__kernel_size_t); - mambo_memcpy(dp, sp, PAGE_SIZE); + if (on_systemsim()) { + systemsim_memcpy(dp, sp, PAGE_SIZE); } else { memcpy(dp, sp, PAGE_SIZE); } } +/* XXX should probably replace with faster data structure */ static uint add_extent(struct domain *d, struct page_info *pg, uint order) { struct page_extents *pe; pe = xmalloc(struct page_extents); if (pe == NULL) - return 0; + return -ENOMEM; pe->pg = pg; pe->order = order; - pe->pfn = page_to_mfn(pg); list_add_tail(&pe->pe_list, &d->arch.extent_list); - return pe->pfn; + return 0; } void free_extents(struct domain *d) @@ -246,7 +337,7 @@ uint allocate_extents(struct domain *d, if (pg == NULL) return total_nrpages; - if (add_extent(d, pg, ext_order) == 0) { + if (add_extent(d, pg, ext_order) < 0) { free_domheap_pages(pg, ext_order); return total_nrpages; } @@ -299,13 +390,13 @@ int allocate_rma(struct domain *d, unsig return 0; } + void free_rma_check(struct page_info *page) { if (test_bit(_PGC_page_RMA, &page->count_info) && !test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags)) panic("Attempt to free an RMA page: 0x%lx\n", page_to_mfn(page)); } - ulong pfn2mfn(struct domain *d, ulong pfn, int *type) { @@ -314,9 +405,17 @@ ulong pfn2mfn(struct domain *d, ulong pf struct page_extents *pe; ulong mfn = INVALID_MFN; int t = PFN_TYPE_NONE; + ulong foreign_map_pfn = 1UL << cpu_foreign_map_order(); /* quick tests first */ - if (d->is_privileged && cpu_io_mfn(pfn)) { + if (pfn & foreign_map_pfn) { + t = PFN_TYPE_FOREIGN; + mfn = foreign_to_mfn(d, pfn); + } else if (pfn >= max_page && pfn < (max_page + NR_GRANT_FRAMES)) { + /* Its a grant table access */ + t = PFN_TYPE_GNTTAB; + mfn = gnttab_shared_mfn(d, d->grant_table, (pfn - max_page)); + } else if (d->is_privileged && cpu_io_mfn(pfn)) { t = PFN_TYPE_IO; mfn = pfn; } else { @@ -324,17 +423,32 @@ ulong pfn2mfn(struct domain *d, ulong pf t = PFN_TYPE_RMA; mfn = pfn + rma_base_mfn; } else { + ulong cur_pfn = rma_size_mfn; + list_for_each_entry (pe, &d->arch.extent_list, pe_list) { - uint end_pfn = pe->pfn + (1 << pe->order); - - if (pfn >= pe->pfn && pfn < end_pfn) { + uint pe_pages = 1UL << pe->order; + uint end_pfn = cur_pfn + pe_pages; + + if (pfn >= cur_pfn && pfn < end_pfn) { t = PFN_TYPE_LOGICAL; - mfn = page_to_mfn(pe->pg) + (pfn - pe->pfn); + mfn = page_to_mfn(pe->pg) + (pfn - cur_pfn); break; } + cur_pfn += pe_pages; } } - BUG_ON(t != PFN_TYPE_NONE && page_get_owner(mfn_to_page(mfn)) != d); +#ifdef DEBUG + if (t != PFN_TYPE_NONE && + (d->domain_flags & DOMF_dying) && + page_get_owner(mfn_to_page(mfn)) != d) { + printk("%s: page type: %d owner Dom[%d]:%p expected Dom[%d]:%p\n", + __func__, t, + page_get_owner(mfn_to_page(mfn))->domain_id, + page_get_owner(mfn_to_page(mfn)), + d->domain_id, d); + BUG(); + } +#endif } if (t == PFN_TYPE_NONE) { @@ -368,6 +482,42 @@ ulong pfn2mfn(struct domain *d, ulong pf return mfn; } +unsigned long mfn_to_gmfn(struct domain *d, unsigned long mfn) +{ + struct page_extents *pe; + ulong cur_pfn; + ulong gnttab_mfn; + ulong rma_mfn; + + /* grant? */ + gnttab_mfn = gnttab_shared_mfn(d, d->grant_table, 0); + if (mfn >= gnttab_mfn && mfn < (gnttab_mfn + NR_GRANT_FRAMES)) + return max_page + (mfn - gnttab_mfn); + + /* IO? */ + if (d->is_privileged && cpu_io_mfn(mfn)) + return mfn; + + rma_mfn = page_to_mfn(d->arch.rma_page); + if (mfn >= rma_mfn && + mfn < (rma_mfn + (1 << d->arch.rma_order))) + return mfn - rma_mfn; + + /* Extent? */ + cur_pfn = 1UL << d->arch.rma_order; + list_for_each_entry (pe, &d->arch.extent_list, pe_list) { + uint pe_pages = 1UL << pe->order; + uint b_mfn = page_to_mfn(pe->pg); + uint e_mfn = b_mfn + pe_pages; + + if (mfn >= b_mfn && mfn < e_mfn) { + return cur_pfn + (mfn - b_mfn); + } + cur_pfn += pe_pages; + } + return INVALID_M2P_ENTRY; +} + void guest_physmap_add_page( struct domain *d, unsigned long gpfn, unsigned long mfn) { @@ -382,3 +532,10 @@ void shadow_drop_references( struct domain *d, struct page_info *page) { } + +int arch_domain_add_extent(struct domain *d, struct page_info *page, int order) +{ + if (add_extent(d, page, order) < 0) + return -ENOMEM; + return 0; +} diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mpic.c --- a/xen/arch/powerpc/mpic.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/mpic.c Thu Dec 14 08:57:36 2006 -0700 @@ -15,22 +15,18 @@ /* XXX Xen hacks ... */ /* make this generic */ -#define le32_to_cpu(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ - (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ - (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ - (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ -}) +#define le32_to_cpu(x) \ + ({ \ + __u32 __x = (x); \ + ((__u32)( \ + (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ + (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ + (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ + (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ + }) #define alloc_bootmem(x) xmalloc_bytes(x) -#define request_irq(irq, handler, f, devname, dev_id) \ - panic("IPI requested: %d: %p: %s: %p\n", irq, handler, devname, dev_id) - -typedef int irqreturn_t; #define IRQ_NONE (0) #define IRQ_HANDLED (1) @@ -97,11 +93,6 @@ typedef int irqreturn_t; #include <asm/mpic.h> #include <asm/smp.h> -static inline void smp_message_recv(int msg, struct pt_regs *regs) -{ - return; -} - #ifdef DEBUG #define DBG(fmt...) printk(fmt) #else @@ -126,7 +117,7 @@ static DEFINE_SPINLOCK(mpic_lock); static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base, - unsigned int reg) + unsigned int reg) { if (be) return in_be32(base + (reg >> 2)); @@ -135,7 +126,7 @@ static inline u32 _mpic_read(unsigned in } static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base, - unsigned int reg, u32 value) + unsigned int reg, u32 value) { if (be) out_be32(base + (reg >> 2), value); @@ -186,17 +177,17 @@ static inline u32 _mpic_irq_read(struct unsigned int idx = src_no & mpic->isu_mask; return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], - reg + (idx * MPIC_IRQ_STRIDE)); + reg + (idx * MPIC_IRQ_STRIDE)); } static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no, - unsigned int reg, u32 value) + unsigned int reg, u32 value) { unsigned int isu = src_no >> mpic->isu_shift; unsigned int idx = src_no & mpic->isu_mask; _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], - reg + (idx * MPIC_IRQ_STRIDE), value); + reg + (idx * MPIC_IRQ_STRIDE), value); } #define mpic_read(b,r) _mpic_read(mpic->flags & MPIC_BIG_ENDIAN,(b),(r)) @@ -261,7 +252,7 @@ static inline void mpic_ht_end_irq(struc } static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source, - unsigned int irqflags) + unsigned int irqflags) { struct mpic_irq_fixup *fixup = &mpic->fixups[source]; unsigned long flags; @@ -284,7 +275,7 @@ static void mpic_startup_ht_interrupt(st } static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source, - unsigned int irqflags) + unsigned int irqflags) { struct mpic_irq_fixup *fixup = &mpic->fixups[source]; unsigned long flags; @@ -305,7 +296,7 @@ static void mpic_shutdown_ht_interrupt(s } static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase, - unsigned int devfn, u32 vdid) + unsigned int devfn, u32 vdid) { int i, irq, n; u8 __iomem *base; @@ -485,8 +476,8 @@ static void mpic_enable_irq(unsigned int DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src); mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, - mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & - ~MPIC_VECPRI_MASK); + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & + ~MPIC_VECPRI_MASK); /* make sure mask gets to controller before we return to user */ do { @@ -532,8 +523,8 @@ static void mpic_disable_irq(unsigned in DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, - mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | - MPIC_VECPRI_MASK); + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | + MPIC_VECPRI_MASK); /* make sure mask gets to controller before we return to user */ do { @@ -623,7 +614,7 @@ static void mpic_set_affinity(unsigned i cpus_and(tmp, cpumask, cpu_online_map); mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION, - mpic_physmask(cpus_addr(tmp)[0])); + mpic_physmask(cpus_addr(tmp)[0])); } @@ -633,14 +624,14 @@ static void mpic_set_affinity(unsigned i struct mpic * __init mpic_alloc(unsigned long phys_addr, - unsigned int flags, - unsigned int isu_size, - unsigned int irq_offset, - unsigned int irq_count, - unsigned int ipi_offset, - unsigned char *senses, - unsigned int senses_count, - const char *name) + unsigned int flags, + unsigned int isu_size, + unsigned int irq_offset, + unsigned int irq_count, + unsigned int ipi_offset, + unsigned char *senses, + unsigned int senses_count, + const char *name) { struct mpic *mpic; u32 reg; @@ -687,8 +678,8 @@ struct mpic * __init mpic_alloc(unsigned /* Reset */ if (flags & MPIC_WANTS_RESET) { mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, - mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) - | MPIC_GREG_GCONF_RESET); + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_RESET); while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) & MPIC_GREG_GCONF_RESET) mb(); @@ -700,15 +691,15 @@ struct mpic * __init mpic_alloc(unsigned */ reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0); mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK) - >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1; + >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1; if (isu_size == 0) mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK) - >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; + >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; /* Map the per-CPU registers */ for (i = 0; i < mpic->num_cpus; i++) { mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE + - i * MPIC_CPU_STRIDE, 0x1000); + i * MPIC_CPU_STRIDE, 0x1000); BUG_ON(mpic->cpuregs[i] == NULL); } @@ -716,7 +707,7 @@ struct mpic * __init mpic_alloc(unsigned if (mpic->isu_size == 0) { mpic->isu_size = mpic->num_sources; mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE, - MPIC_IRQ_STRIDE * mpic->isu_size); + MPIC_IRQ_STRIDE * mpic->isu_size); BUG_ON(mpic->isus[0] == NULL); } mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1); @@ -752,7 +743,7 @@ struct mpic * __init mpic_alloc(unsigned } void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, - unsigned long phys_addr) + unsigned long phys_addr) { unsigned int isu_first = isu_num * mpic->isu_size; @@ -764,7 +755,7 @@ void __init mpic_assign_isu(struct mpic } void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler, - void *data) + void *data) { struct mpic *mpic = mpic_find(irq, NULL); unsigned long flags; @@ -799,20 +790,20 @@ void __init mpic_init(struct mpic *mpic) /* Initialize timers: just disable them all */ for (i = 0; i < 4; i++) { mpic_write(mpic->tmregs, - i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0); + i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0); mpic_write(mpic->tmregs, - i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI, - MPIC_VECPRI_MASK | - (MPIC_VEC_TIMER_0 + i)); + i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI, + MPIC_VECPRI_MASK | + (MPIC_VEC_TIMER_0 + i)); } /* Initialize IPIs to our reserved vectors and mark them disabled for now */ mpic_test_broken_ipi(mpic); for (i = 0; i < 4; i++) { mpic_ipi_write(i, - MPIC_VECPRI_MASK | - (10 << MPIC_VECPRI_PRIORITY_SHIFT) | - (MPIC_VEC_IPI_0 + i)); + MPIC_VECPRI_MASK | + (10 << MPIC_VECPRI_PRIORITY_SHIFT) | + (MPIC_VEC_IPI_0 + i)); #ifdef CONFIG_SMP if (!(mpic->flags & MPIC_PRIMARY)) continue; @@ -859,7 +850,7 @@ void __init mpic_init(struct mpic *mpic) #ifdef CONFIG_MPIC_BROKEN_U3 if (mpic_is_ht_interrupt(mpic, i)) { vecpri &= ~(MPIC_VECPRI_SENSE_MASK | - MPIC_VECPRI_POLARITY_MASK); + MPIC_VECPRI_POLARITY_MASK); vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; } #else @@ -873,7 +864,7 @@ void __init mpic_init(struct mpic *mpic) /* init hw */ mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri); mpic_irq_write(i, MPIC_IRQ_DESTINATION, - 1 << hard_smp_processor_id()); + 1 << hard_smp_processor_id()); /* init linux descriptors */ if (i < mpic->irq_count) { @@ -887,8 +878,8 @@ void __init mpic_init(struct mpic *mpic) /* Disable 8259 passthrough */ mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, - mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) - | MPIC_GREG_GCONF_8259_PTHROU_DIS); + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_8259_PTHROU_DIS); /* Set current processor priority to 0 */ mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0); @@ -908,12 +899,12 @@ void mpic_irq_set_priority(unsigned int reg = mpic_ipi_read(irq - mpic->ipi_offset) & ~MPIC_VECPRI_PRIORITY_MASK; mpic_ipi_write(irq - mpic->ipi_offset, - reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } else { reg = mpic_irq_read(irq - mpic->irq_offset,MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_PRIORITY_MASK; mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI, - reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); } spin_unlock_irqrestore(&mpic_lock, flags); } @@ -956,7 +947,7 @@ void mpic_setup_this_cpu(void) if (distribute_irqs) { for (i = 0; i < mpic->num_sources ; i++) mpic_irq_write(i, MPIC_IRQ_DESTINATION, - mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk); + mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk); } /* Set current processor priority to 0 */ @@ -1001,7 +992,7 @@ void mpic_teardown_this_cpu(int secondar /* let the mpic know we don't want intrs. */ for (i = 0; i < mpic->num_sources ; i++) mpic_irq_write(i, MPIC_IRQ_DESTINATION, - mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); + mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); /* Set current processor priority to max */ mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); @@ -1021,7 +1012,7 @@ void mpic_send_ipi(unsigned int ipi_no, #endif mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10, - mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); + mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); } int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) @@ -1049,7 +1040,7 @@ int mpic_get_one_irq(struct mpic *mpic, return irq + mpic->irq_offset; } #ifdef DEBUG_IPI - DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0); + DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0); #endif return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset; } @@ -1075,13 +1066,13 @@ void mpic_request_ipis(void) /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT, - "IPI0 (call function)", mpic); + "IPI0 (call function)", mpic); request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT, - "IPI1 (reschedule)", mpic); + "IPI1 (reschedule)", mpic); request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT, - "IPI2 (unused)", mpic); + "IPI2 (unused)", mpic); request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT, - "IPI3 (debugger break)", mpic); + "IPI3 (debugger break)", mpic); printk("IPIs requested... \n"); } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/mpic_init.c --- a/xen/arch/powerpc/mpic_init.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/mpic_init.c Thu Dec 14 08:57:36 2006 -0700 @@ -22,6 +22,7 @@ #include <xen/init.h> #include <xen/lib.h> #include <asm/mpic.h> +#include <errno.h> #include "mpic_init.h" #include "oftree.h" #include "of-devtree.h" @@ -74,7 +75,7 @@ static unsigned long reg2(void *oft_p, o rc = ofd_getprop(oft_p, c, "reg", &isa_reg, sizeof(isa_reg)); DBG("%s: reg property address=0x%08x size=0x%08x\n", __func__, - isa_reg.address, isa_reg.size); + isa_reg.address, isa_reg.size); return isa_reg.address; } @@ -92,7 +93,7 @@ static unsigned long reg1(void *oft_p, o rc = ofd_getprop(oft_p, c, "reg", ®, sizeof(reg)); DBG("%s: reg property address=0x%08x size=0x%08x\n", __func__, - reg.address, reg.size); + reg.address, reg.size); return reg.address; } @@ -173,15 +174,15 @@ static unsigned long find_ranges_addr_fr break; case 2: ranges_addr = (((u64)ranges[ranges_i]) << 32) | - ranges[ranges_i + 1]; + ranges[ranges_i + 1]; break; case 3: /* the G5 case, how to squeeze 96 bits into 64 */ ranges_addr = (((u64)ranges[ranges_i+1]) << 32) | - ranges[ranges_i + 2]; + ranges[ranges_i + 2]; break; case 4: ranges_addr = (((u64)ranges[ranges_i+2]) << 32) | - ranges[ranges_i + 4]; + ranges[ranges_i + 4]; break; default: PANIC("#address-cells out of range\n"); @@ -266,7 +267,7 @@ static int find_mpic_canonical_probe(voi * We select the one without an 'interrupt' property. */ c = ofd_node_find_by_prop(oft_p, OFD_ROOT, "device_type", mpic_type, - sizeof(mpic_type)); + sizeof(mpic_type)); while (c > 0) { int int_len; int good_mpic; @@ -357,6 +358,42 @@ static struct hw_interrupt_type *share_m #define share_mpic(M,X) (M) #endif + +static unsigned int mpic_startup_ipi(unsigned int irq) +{ + mpic->hc_ipi.enable(irq); + return 0; +} + +int request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct cpu_user_regs *), + unsigned long irqflags, const char * devname, void *dev_id) +{ + int retval; + struct irqaction *action; + void (*func)(int, void *, struct cpu_user_regs *); + + action = xmalloc(struct irqaction); + if (!action) { + BUG(); + return -ENOMEM; + } + + /* Xen's handler prototype is slightly different than Linux's. */ + func = (void (*)(int, void *, struct cpu_user_regs *))handler; + + action->handler = func; + action->name = devname; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) { + BUG(); + xfree(action); + } + + return retval; +} struct hw_interrupt_type *xen_mpic_init(struct hw_interrupt_type *xen_irq) { @@ -397,6 +434,11 @@ struct hw_interrupt_type *xen_mpic_init( hit = share_mpic(&mpic->hc_irq, xen_irq); printk("%s: success\n", __func__); + + mpic->hc_ipi.ack = xen_irq->ack; + mpic->hc_ipi.startup = mpic_startup_ipi; + mpic_request_ipis(); + return hit; } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of-devtree.h --- a/xen/arch/powerpc/of-devtree.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/of-devtree.h Thu Dec 14 08:57:36 2006 -0700 @@ -33,15 +33,15 @@ union of_pci_hi { union of_pci_hi { u32 word; struct { - u32 opa_n: 1; /* relocatable */ - u32 opa_p: 1; /* prefetchable */ - u32 opa_t: 1; /* aliased */ + u32 opa_n: 1; /* relocatable */ + u32 opa_p: 1; /* prefetchable */ + u32 opa_t: 1; /* aliased */ u32 _opa_res: 3; - u32 opa: 2; /* space code */ + u32 opa: 2; /* space code */ u32 opa_b: 8; /* bus number */ - u32 opa_d: 5; /* device number */ - u32 opa_f: 3; /* function number */ - u32 opa_r: 8; /* register number */ + u32 opa_d: 5; /* device number */ + u32 opa_f: 3; /* function number */ + u32 opa_r: 8; /* register number */ } bits; }; @@ -79,9 +79,9 @@ typedef s32 ofdn_t; typedef s32 ofdn_t; #define OFD_ROOT 1 -#define OFD_DUMP_NAMES 0x1 -#define OFD_DUMP_VALUES 0x2 -#define OFD_DUMP_ALL (OFD_DUMP_VALUES|OFD_DUMP_NAMES) +#define OFD_DUMP_NAMES 0x1 +#define OFD_DUMP_VALUES 0x2 +#define OFD_DUMP_ALL (OFD_DUMP_VALUES|OFD_DUMP_NAMES) extern void *ofd_create(void *mem, size_t sz); extern ofdn_t ofd_node_parent(void *mem, ofdn_t n); @@ -90,9 +90,9 @@ extern const char *ofd_node_path(void *m extern const char *ofd_node_path(void *mem, ofdn_t p); extern int ofd_node_to_path(void *mem, ofdn_t p, void *buf, size_t sz); extern ofdn_t ofd_node_child_create(void *mem, ofdn_t parent, - const char *path, size_t pathlen); + const char *path, size_t pathlen); extern ofdn_t ofd_node_peer_create(void *mem, ofdn_t sibling, - const char *path, size_t pathlen); + const char *path, size_t pathlen); extern ofdn_t ofd_node_find(void *mem, const char *devspec); extern ofdn_t ofd_node_add(void *m, ofdn_t n, const char *path, size_t sz); extern int ofd_node_prune(void *m, ofdn_t n); @@ -102,23 +102,23 @@ extern ofdn_t ofd_nextprop(void *mem, of extern ofdn_t ofd_nextprop(void *mem, ofdn_t n, const char *prev, char *name); extern ofdn_t ofd_prop_find(void *mem, ofdn_t n, const char *name); extern int ofd_getprop(void *mem, ofdn_t n, const char *name, - void *buf, size_t sz); + void *buf, size_t sz); extern int ofd_getproplen(void *mem, ofdn_t n, const char *name); extern int ofd_setprop(void *mem, ofdn_t n, const char *name, - const void *buf, size_t sz); + const void *buf, size_t sz); extern void ofd_prop_remove(void *mem, ofdn_t node, ofdn_t prop); extern ofdn_t ofd_prop_add(void *mem, ofdn_t n, const char *name, - const void *buf, size_t sz); + const void *buf, size_t sz); extern ofdn_t ofd_io_create(void *m, ofdn_t node, u64 open); extern u32 ofd_io_open(void *mem, ofdn_t n); extern void ofd_io_close(void *mem, ofdn_t n); -typedef void (*walk_fn)(void *m, ofdn_t p, int arg); -extern void ofd_dump_props(void *m, ofdn_t p, int dump); +typedef void (*walk_fn)(void *m, const char *pre, ofdn_t p, int arg); +extern void ofd_dump_props(void *m, const char *pre, ofdn_t p, int dump); -extern void ofd_walk(void *m, ofdn_t p, walk_fn fn, int arg); +extern void ofd_walk(void *m, const char *pre, ofdn_t p, walk_fn fn, int arg); /* Recursively look up #address_cells and #size_cells properties */ @@ -129,10 +129,10 @@ extern size_t ofd_space(void *mem); extern size_t ofd_space(void *mem); extern void ofd_prop_print(const char *head, const char *path, - const char *name, const char *prop, size_t sz); + const char *name, const char *prop, size_t sz); extern ofdn_t ofd_node_find_by_prop(void *mem, ofdn_t n, const char *name, - const void *val, size_t sz); + const void *val, size_t sz); extern ofdn_t ofd_node_find_next(void *mem, ofdn_t n); extern ofdn_t ofd_node_find_prev(void *mem, ofdn_t n); extern void ofd_init(int (*write)(const char *, size_t len)); diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of-devwalk.c --- a/xen/arch/powerpc/of-devwalk.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/of-devwalk.c Thu Dec 14 08:57:36 2006 -0700 @@ -80,7 +80,7 @@ void ofd_prop_print( #endif } -void ofd_dump_props(void *mem, ofdn_t n, int dump) +void ofd_dump_props(void *mem, const char *pre, ofdn_t n, int dump) { ofdn_t p; char name[128]; @@ -95,7 +95,7 @@ void ofd_dump_props(void *mem, ofdn_t n, } if (dump & OFD_DUMP_NAMES) { - printk("of_walk: %s: phandle 0x%x\n", path, n); + printk("%s: %s: phandle 0x%x\n", pre, path, n); } p = ofd_nextprop(mem, n, NULL, name); @@ -106,30 +106,30 @@ void ofd_dump_props(void *mem, ofdn_t n, } if ( dump & OFD_DUMP_VALUES ) { - ofd_prop_print("of_walk", path, name, prop, sz); + ofd_prop_print(pre, path, name, prop, sz); } p = ofd_nextprop(mem, n, name, name); } } -void ofd_walk(void *m, ofdn_t p, walk_fn fn, int arg) +void ofd_walk(void *m, const char *pre, ofdn_t p, walk_fn fn, int arg) { ofdn_t n; if ( fn != NULL ) { - (*fn)(m, p, arg); + (*fn)(m, pre, p, arg); } /* child */ n = ofd_node_child(m, p); if ( n != 0 ) { - ofd_walk(m, n, fn, arg); + ofd_walk(m, pre, n, fn, arg); } /* peer */ n = ofd_node_peer(m, p); if ( n != 0 ) { - ofd_walk(m, n, fn, arg); + ofd_walk(m, pre, n, fn, arg); } } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/of_handler/console.c --- a/xen/arch/powerpc/of_handler/console.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/of_handler/console.c Thu Dec 14 08:57:36 2006 -0700 @@ -113,7 +113,7 @@ static s32 ofh_xen_dom0_read(s32 chan, v return ret; rc = xen_hvcall(XEN_MARK(__HYPERVISOR_console_io), CONSOLEIO_read, - count, desc); + count, desc); if (rc <= 0) { return ret; } @@ -139,7 +139,7 @@ static s32 ofh_xen_dom0_write(s32 chan, return ret; rc = xen_hvcall(XEN_MARK(__HYPERVISOR_console_io), CONSOLEIO_write, - count, desc); + count, desc); if (rc <= 0) { return ret; } @@ -157,8 +157,8 @@ static s32 ofh_xen_domu_read(s32 chan, v static s32 ofh_xen_domu_read(s32 chan, void *buf, u32 count, s32 *actual, ulong b) { - struct xencons_interface *intf; - XENCONS_RING_IDX cons, prod; + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; s32 ret; intf = DRELA(ofh_ihp, b)->ofi_intf; @@ -180,8 +180,8 @@ static s32 ofh_xen_domu_write(s32 chan, static s32 ofh_xen_domu_write(s32 chan, const void *buf, u32 count, s32 *actual, ulong b) { - struct xencons_interface *intf; - XENCONS_RING_IDX cons, prod; + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; s32 ret; intf = DRELA(ofh_ihp, b)->ofi_intf; diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/ofd_fixup.c --- a/xen/arch/powerpc/ofd_fixup.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/ofd_fixup.c Thu Dec 14 08:57:36 2006 -0700 @@ -25,6 +25,7 @@ #include <public/xen.h> #include "of-devtree.h" #include "oftree.h" +#include "rtas.h" #undef RTAS @@ -347,6 +348,15 @@ static ofdn_t ofd_xen_props(void *m, str val[0] = rma_size(d->arch.rma_order) - val[1]; ofd_prop_add(m, n, "reserved", val, sizeof (val)); + /* tell dom0 that Xen depends on it to have power control */ + if (!rtas_entry) + ofd_prop_add(m, n, "power-control", NULL, 0); + + /* tell dom0 where ranted pages go in the linear map */ + val[0] = cpu_foreign_map_order(); + val[1] = d->arch.foreign_mfn_count; + ofd_prop_add(m, n, "foreign-map", val, sizeof (val)); + n = ofd_node_add(m, n, console, sizeof (console)); if (n > 0) { val[0] = 0; @@ -417,7 +427,7 @@ int ofd_dom0_fixup(struct domain *d, ulo #ifdef DEBUG - ofd_walk(m, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL); + ofd_walk(m, __func__, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL); #endif return 1; } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/ofd_fixup_memory.c --- a/xen/arch/powerpc/ofd_fixup_memory.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/ofd_fixup_memory.c Thu Dec 14 08:57:36 2006 -0700 @@ -68,6 +68,8 @@ static ofdn_t ofd_memory_node_create( reg.sz = size; ofd_prop_add(m, n, "reg", ®, sizeof (reg)); + printk("Dom0: %s: %016lx, %016lx\n", path, start, size); + return n; } @@ -86,17 +88,19 @@ static void ofd_memory_extent_nodes(void ulong size; ofdn_t n; struct page_extents *pe; + ulong cur_pfn = 1UL << d->arch.rma_order; + start = cur_pfn << PAGE_SHIFT; + size = 0; list_for_each_entry (pe, &d->arch.extent_list, pe_list) { - start = pe->pfn << PAGE_SHIFT; - size = 1UL << (pe->order + PAGE_SHIFT); - - n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, - start, size); - - BUG_ON(n <= 0); + size += 1UL << (pe->order + PAGE_SHIFT); + if (pe->order != cpu_extent_order()) + panic("we don't handle this yet\n"); } + n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, + start, size); + BUG_ON(n <= 0); } void ofd_memory_props(void *m, struct domain *d) diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/papr/xlate.c --- a/xen/arch/powerpc/papr/xlate.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/papr/xlate.c Thu Dec 14 08:57:36 2006 -0700 @@ -19,7 +19,7 @@ */ #undef DEBUG -#undef DEBUG_FAIL +#undef DEBUG_LOW #include <xen/config.h> #include <xen/types.h> @@ -30,6 +30,17 @@ #include <asm/papr.h> #include <asm/hcalls.h> +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif +#ifdef DEBUG_LOW +#define DBG_LOW(fmt...) printk(fmt) +#else +#define DBG_LOW(fmt...) +#endif + #ifdef USE_PTE_INSERT static inline void pte_insert(union pte volatile *pte, ulong vsid, ulong rpn, ulong lrpn) @@ -106,11 +117,8 @@ static void pte_tlbie(union pte volatile } -static void h_enter(struct cpu_user_regs *regs) -{ - ulong flags = regs->gprs[4]; - ulong ptex = regs->gprs[5]; - +long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn) +{ union pte pte; union pte volatile *ppte; struct domain_htab *htab; @@ -129,14 +137,13 @@ static void h_enter(struct cpu_user_regs htab = &d->arch.htab; if (ptex > (1UL << htab->log_num_ptes)) { - regs->gprs[3] = H_Parameter; - printk("%s: bad ptex: 0x%lx\n", __func__, ptex); - return; + DBG("%s: bad ptex: 0x%lx\n", __func__, ptex); + return H_Parameter; } /* use local HPTE to avoid manual shifting & masking */ - pte.words.vsid = regs->gprs[6]; - pte.words.rpn = regs->gprs[7]; + pte.words.vsid = vsid; + pte.words.rpn = rpn; if ( pte.bits.l ) { /* large page? */ /* figure out the page size for the selected large page */ @@ -150,10 +157,9 @@ static void h_enter(struct cpu_user_regs } if ( lp_size >= d->arch.large_page_sizes ) { - printk("%s: attempt to use unsupported lp_size %d\n", - __func__, lp_size); - regs->gprs[3] = H_Parameter; - return; + DBG("%s: attempt to use unsupported lp_size %d\n", + __func__, lp_size); + return H_Parameter; } /* get correct pgshift value */ @@ -168,31 +174,32 @@ static void h_enter(struct cpu_user_regs mfn = pfn2mfn(d, pfn, &mtype); if (mfn == INVALID_MFN) { - regs->gprs[3] = H_Parameter; - return; - } - + DBG("%s: Bad PFN: 0x%lx\n", __func__, pfn); + return H_Parameter; + } + + if (mtype == PFN_TYPE_IO && !d->is_privileged) { + /* only a privilaged dom can access outside IO space */ + DBG("%s: unprivileged access to physical page: 0x%lx\n", + __func__, pfn); + return H_Privilege; + } if (mtype == PFN_TYPE_IO) { - /* only a privilaged dom can access outside IO space */ - if ( !d->is_privileged ) { - regs->gprs[3] = H_Privilege; - printk("%s: unprivileged access to physical page: 0x%lx\n", - __func__, pfn); - return; - } - if ( !((pte.bits.w == 0) && (pte.bits.i == 1) && (pte.bits.g == 1)) ) { -#ifdef DEBUG_FAIL - printk("%s: expecting an IO WIMG " - "w=%x i=%d m=%d, g=%d\n word 0x%lx\n", __func__, - pte.bits.w, pte.bits.i, pte.bits.m, pte.bits.g, - pte.words.rpn); -#endif - regs->gprs[3] = H_Parameter; - return; - } + DBG("%s: expecting an IO WIMG " + "w=%x i=%d m=%d, g=%d\n word 0x%lx\n", __func__, + pte.bits.w, pte.bits.i, pte.bits.m, pte.bits.g, + pte.words.rpn); + return H_Parameter; + } + } + if (mtype == PFN_TYPE_GNTTAB) { + DBG("%s: Dom[%d] mapping grant table: 0x%lx\n", + __func__, d->domain_id, pfn << PAGE_SHIFT); + pte.bits.i = 0; + pte.bits.g = 0; } /* fixup the RPN field of our local PTE copy */ pte.bits.rpn = mfn | lp_bits; @@ -213,13 +220,13 @@ static void h_enter(struct cpu_user_regs BUG_ON(f == d); if (unlikely(!get_domain(f))) { - regs->gprs[3] = H_Rescinded; - return; + DBG("%s: Rescinded, no domain: 0x%lx\n", __func__, pfn); + return H_Rescinded; } if (unlikely(!get_page(pg, f))) { put_domain(f); - regs->gprs[3] = H_Rescinded; - return; + DBG("%s: Rescinded, no page: 0x%lx\n", __func__, pfn); + return H_Rescinded; } } @@ -276,17 +283,12 @@ static void h_enter(struct cpu_user_regs : "b" (ppte), "r" (pte.words.rpn), "r" (pte.words.vsid) : "memory"); - regs->gprs[3] = H_Success; - regs->gprs[4] = idx; - - return; - } - } - -#ifdef DEBUG + return idx; + } + } + /* If the PTEG is full then no additional values are returned. */ - printk("%s: PTEG FULL\n", __func__); -#endif + DBG("%s: PTEG FULL\n", __func__); if (pg != NULL) put_page(pg); @@ -294,7 +296,24 @@ static void h_enter(struct cpu_user_regs if (f != NULL) put_domain(f); - regs->gprs[3] = H_PTEG_Full; + return H_PTEG_Full; +} + +static void h_enter(struct cpu_user_regs *regs) +{ + ulong flags = regs->gprs[4]; + ulong ptex = regs->gprs[5]; + ulong vsid = regs->gprs[6]; + ulong rpn = regs->gprs[7]; + long ret; + + ret = pte_enter(flags, ptex, vsid, rpn); + + if (ret >= 0) { + regs->gprs[3] = H_Success; + regs->gprs[4] = ret; + } else + regs->gprs[3] = ret; } static void h_protect(struct cpu_user_regs *regs) @@ -308,13 +327,11 @@ static void h_protect(struct cpu_user_re union pte volatile *ppte; union pte lpte; -#ifdef DEBUG - printk("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__, - flags, ptex, avpn); -#endif + DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__, + flags, ptex, avpn); if ( ptex > (1UL << htab->log_num_ptes) ) { + DBG("%s: bad ptex: 0x%lx\n", __func__, ptex); regs->gprs[3] = H_Parameter; - printk("%s: bad ptex: 0x%lx\n", __func__, ptex); return; } ppte = &htab->map[ptex]; @@ -324,10 +341,8 @@ static void h_protect(struct cpu_user_re /* the AVPN param occupies the bit-space of the word */ if ( (flags & H_AVPN) && lpte.bits.avpn != avpn >> 7 ) { -#ifdef DEBUG_FAIL - printk("%s: %p: AVPN check failed: 0x%lx, 0x%lx\n", __func__, - ppte, lpte.words.vsid, lpte.words.rpn); -#endif + DBG_LOW("%s: %p: AVPN check failed: 0x%lx, 0x%lx\n", __func__, + ppte, lpte.words.vsid, lpte.words.rpn); regs->gprs[3] = H_Not_Found; return; } @@ -337,9 +352,7 @@ static void h_protect(struct cpu_user_re * we invalidate entires where the PAPR says to 0 the whole hi * dword, so the AVPN should catch this first */ -#ifdef DEBUG_FAIL - printk("%s: pte invalid\n", __func__); -#endif + DBG("%s: pte invalid\n", __func__); regs->gprs[3] = H_Not_Found; return; } @@ -374,7 +387,6 @@ static void h_protect(struct cpu_user_re static void h_clear_ref(struct cpu_user_regs *regs) { - ulong flags = regs->gprs[4]; ulong ptex = regs->gprs[5]; struct vcpu *v = get_current(); struct domain *d = v->domain; @@ -382,20 +394,20 @@ static void h_clear_ref(struct cpu_user_ union pte volatile *pte; union pte lpte; + DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx\n", __func__, + regs->gprs[4], ptex); + #ifdef DEBUG - printk("%s: flags: 0x%lx ptex: 0x%lx\n", __func__, - flags, ptex); -#endif - - if (flags != 0) { - printk("WARNING: %s: " - "flags are undefined and should be 0: 0x%lx\n", - __func__, flags); - } + if (regs->gprs[4] != 0) { + DBG("WARNING: %s: " + "flags are undefined and should be 0: 0x%lx\n", + __func__, regs->gprs[4]); + } +#endif if (ptex > (1UL << htab->log_num_ptes)) { + DBG("%s: bad ptex: 0x%lx\n", __func__, ptex); regs->gprs[3] = H_Parameter; - printk("%s: bad ptex: 0x%lx\n", __func__, ptex); return; } pte = &htab->map[ptex]; @@ -417,7 +429,6 @@ static void h_clear_ref(struct cpu_user_ static void h_clear_mod(struct cpu_user_regs *regs) { - ulong flags = regs->gprs[4]; ulong ptex = regs->gprs[5]; struct vcpu *v = get_current(); struct domain *d = v->domain; @@ -425,19 +436,20 @@ static void h_clear_mod(struct cpu_user_ union pte volatile *pte; union pte lpte; + DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx\n", __func__, + regs->gprs[4], ptex); + #ifdef DEBUG - printk("%s: flags: 0x%lx ptex: 0x%lx\n", __func__, - flags, ptex); -#endif - if (flags != 0) { - printk("WARNING: %s: " - "flags are undefined and should be 0: 0x%lx\n", - __func__, flags); - } - + if (regs->gprs[4] != 0) { + DBG("WARNING: %s: " + "flags are undefined and should be 0: 0x%lx\n", + __func__, regs->gprs[4]); + } +#endif + if (ptex > (1UL << htab->log_num_ptes)) { + DBG("%s: bad ptex: 0x%lx\n", __func__, ptex); regs->gprs[3] = H_Parameter; - printk("%s: bad ptex: 0x%lx\n", __func__, ptex); return; } pte = &htab->map[ptex]; @@ -466,63 +478,53 @@ static void h_clear_mod(struct cpu_user_ } } -static void h_remove(struct cpu_user_regs *regs) -{ - ulong flags = regs->gprs[4]; - ulong ptex = regs->gprs[5]; - ulong avpn = regs->gprs[6]; +long pte_remove(ulong flags, ulong ptex, ulong avpn, ulong *hi, ulong *lo) +{ struct vcpu *v = get_current(); struct domain *d = v->domain; struct domain_htab *htab = &d->arch.htab; union pte volatile *pte; union pte lpte; -#ifdef DEBUG - printk("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__, - flags, ptex, avpn); -#endif + DBG_LOW("%s: flags: 0x%lx ptex: 0x%lx avpn: 0x%lx\n", __func__, + flags, ptex, avpn); + if ( ptex > (1UL << htab->log_num_ptes) ) { - regs->gprs[3] = H_Parameter; - printk("%s: bad ptex: 0x%lx\n", __func__, ptex); - return; + DBG("%s: bad ptex: 0x%lx\n", __func__, ptex); + return H_Parameter; } pte = &htab->map[ptex]; lpte.words.vsid = pte->words.vsid; lpte.words.rpn = pte->words.rpn; if ((flags & H_AVPN) && lpte.bits.avpn != (avpn >> 7)) { -#ifdef DEBUG_FAIL - printk("%s: avpn doesn not match\n", __func__); -#endif - regs->gprs[3] = H_Not_Found; - return; + DBG_LOW("%s: AVPN does not match\n", __func__); + return H_Not_Found; } if ((flags & H_ANDCOND) && ((avpn & pte->words.vsid) != 0)) { -#ifdef DEBUG_FAIL - printk("%s: andcond does not match\n", __func__); -#endif - regs->gprs[3] = H_Not_Found; - return; - } - - regs->gprs[3] = H_Success; + DBG("%s: andcond does not match\n", __func__); + return H_Not_Found; + } + /* return old PTE in regs 4 and 5 */ - regs->gprs[4] = lpte.words.vsid; - regs->gprs[5] = lpte.words.rpn; - + *hi = lpte.words.vsid; + *lo = lpte.words.rpn; + +#ifdef DEBUG_LOW /* XXX - I'm very skeptical of doing ANYTHING if not bits.v */ /* XXX - I think the spec should be questioned in this case (MFM) */ if (lpte.bits.v == 0) { - printk("%s: removing invalid entry\n", __func__); - } + DBG_LOW("%s: removing invalid entry\n", __func__); + } +#endif if (lpte.bits.v) { ulong mfn = lpte.bits.rpn; if (!cpu_io_mfn(mfn)) { struct page_info *pg = mfn_to_page(mfn); struct domain *f = page_get_owner(pg); - + if (f != d) { put_domain(f); put_page(pg); @@ -536,6 +538,27 @@ static void h_remove(struct cpu_user_reg : "memory"); pte_tlbie(&lpte, ptex); + + return H_Success; +} + +static void h_remove(struct cpu_user_regs *regs) +{ + ulong flags = regs->gprs[4]; + ulong ptex = regs->gprs[5]; + ulong avpn = regs->gprs[6]; + ulong hi, lo; + long ret; + + ret = pte_remove(flags, ptex, avpn, &hi, &lo); + + regs->gprs[3] = ret; + + if (ret == H_Success) { + regs->gprs[4] = hi; + regs->gprs[5] = lo; + } + return; } static void h_read(struct cpu_user_regs *regs) @@ -547,12 +570,12 @@ static void h_read(struct cpu_user_regs struct domain_htab *htab = &d->arch.htab; union pte volatile *pte; - if (flags & H_READ_4) + if (flags & H_READ_4) ptex &= ~0x3UL; if (ptex > (1UL << htab->log_num_ptes)) { + DBG("%s: bad ptex: 0x%lx\n", __func__, ptex); regs->gprs[3] = H_Parameter; - printk("%s: bad ptex: 0x%lx\n", __func__, ptex); return; } pte = &htab->map[ptex]; diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/exceptions.S --- a/xen/arch/powerpc/powerpc64/exceptions.S Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/powerpc64/exceptions.S Thu Dec 14 08:57:36 2006 -0700 @@ -564,6 +564,22 @@ _GLOBAL(sleep) */ .globl spin_start spin_start: + /* We discovered by experiment that the ERAT must be flushed early. */ + isync + slbia + isync + + /* Do a cache flush for our text, in case the loader didn't */ + LOADADDR(r9, _start) + LOADADDR(r8, _etext) +4: dcbf r0,r9 + icbi r0,r9 + addi r9,r9,0x20 /* up to a 4 way set per line */ + cmpld cr0,r9,r8 + blt 4b + sync + isync + /* Write our processor number as an acknowledgment that we're alive. */ LOADADDR(r14, __spin_ack) stw r3, 0(r14) @@ -575,7 +591,7 @@ spin_start: b . /* Find our index in the array of processor_area struct pointers. */ 2: LOADADDR(r14, global_cpu_table) - muli r15, r3, 8 + mulli r15, r3, 8 add r14, r14, r15 /* Spin until the pointer for our processor goes valid. */ 1: ld r15, 0(r14) diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/io.S --- a/xen/arch/powerpc/powerpc64/io.S Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/powerpc64/io.S Thu Dec 14 08:57:36 2006 -0700 @@ -23,6 +23,11 @@ #include <asm/processor.h> #include <asm/percpu.h> +/* There is no reason why I can't use a tlbie, which should be less + * "destructive" but useing SLBIE proves to be more stable result. + */ +#define INVALIDATE_ERAT_WITH_SLBIE + /* Xen runs in real mode (i.e. untranslated, MMU disabled). This avoids TLB * flushes and also makes it easy to access all domains' memory. However, on * PowerPC real mode accesses are cacheable, which is good for general @@ -34,12 +39,14 @@ * make the access, then re-enable it... */ +#ifdef INVALIDATE_ERAT_WITH_SLBIE /* Not all useful assemblers understand 'tlbiel'. * 'addr' is a GPR containing the address being accessed. */ .macro tlbiel addr .long 0x7c000224 | (\addr << 11) .endm +#endif .macro DISABLE_DCACHE addr mfmsr r8 @@ -48,29 +55,53 @@ ori r6, r6, MSR_EE andc r5, r8, r6 mtmsr r5 + sync - /* set HID4.RM_CI */ +#ifdef INVALIDATE_ERAT_WITH_SLBIE + /* create an slbie entry for the io setting a high order bit + * to avoid any important SLBs */ + extldi r0, \addr, 36, 0 +#endif + /* setup HID4.RM_CI */ mfspr r9, SPRN_HID4 li r6, 0x100 sldi r6, r6, 32 - or r5, r9, r6 - tlbiel \addr /* invalidate the ERAT entry */ - sync - mtspr SPRN_HID4, r5 + or r10, r9, r6 + + /* Mark the processor as "in CI mode" */ + li r7,0 + mfspr r5, SPRN_PIR + li r6, MCK_CPU_STAT_CI + /* store that we are in a CI routine */ + stb r6, MCK_CPU_STAT_BASE(r5) + /* r7 = MCK_CPU_STAT_CI IO in progress */ + mr r7, r5 + lwsync + + /* switch modes */ + mtspr SPRN_HID4, r10 + /* invalidate the ERAT entry */ +#ifdef INVALIDATE_ERAT_WITH_SLBIE + slbie r0 +#else + tlbiel \addr +#endif isync - /* Mark the processor as "in CI mode" */ - mfspr r5, SPRN_PIR - li r6, MCK_CPU_STAT_CI - stb r6, MCK_CPU_STAT_BASE(r5) - sync .endm .macro ENABLE_DCACHE addr - /* re-zero HID4.RM_CI */ + /* r7 = 0, IO is complete */ + li r7, 0 + lwsync + /* restore HID4.RM_CI */ + mtspr SPRN_HID4, r9 + /* invalidate the ERAT entry */ +#ifdef INVALIDATE_ERAT_WITH_SLBIE + slbie r0 +#else tlbiel \addr /* invalidate the ERAT entry */ - sync - mtspr SPRN_HID4, r9 +#endif isync /* Mark the processor as "out of CI mode" */ @@ -83,9 +114,13 @@ mtmsr r8 .endm -/* The following assembly cannot use r8 or r9 since they hold original - * values of msr and hid4 repectively +/* The following assembly cannot use some registers since they hold original + * values of we need to keep */ +#undef r0 +#define r0 do_not_use_r0 +#undef r7 +#define r7 do_not_use_r7 #undef r8 #define r8 do_not_use_r8 #undef r9 diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/ppc970.c --- a/xen/arch/powerpc/powerpc64/ppc970.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/powerpc64/ppc970.c Thu Dec 14 08:57:36 2006 -0700 @@ -30,6 +30,7 @@ #include <asm/powerpc64/procarea.h> #include <asm/powerpc64/processor.h> #include <asm/powerpc64/ppc970-hid.h> +#include "scom.h" #undef DEBUG #undef SERIALIZE @@ -38,48 +39,77 @@ struct cpu_caches cpu_caches = { .dline_size = 0x80, .log_dline_size = 7, .dlines_per_page = PAGE_SIZE >> 7, + .isize = (64 << 10), /* 64 KiB */ .iline_size = 0x80, .log_iline_size = 7, .ilines_per_page = PAGE_SIZE >> 7, }; + +void cpu_flush_icache(void) +{ + union hid1 hid1; + ulong flags; + ulong ea; + + local_irq_save(flags); + + /* uses special processor mode that forces a real address match on + * the whole line */ + hid1.word = mfhid1(); + hid1.bits.en_icbi = 1; + mthid1(hid1.word); + + for (ea = 0; ea < cpu_caches.isize; ea += cpu_caches.iline_size) + icbi(ea); + + sync(); + + hid1.bits.en_icbi = 0; + mthid1(hid1.word); + + local_irq_restore(flags); +} + + struct rma_settings { - int order; + int log; int rmlr_0; int rmlr_1_2; }; -static struct rma_settings rma_orders[] = { - { .order = 26, .rmlr_0 = 0, .rmlr_1_2 = 3, }, /* 64 MB */ - { .order = 27, .rmlr_0 = 1, .rmlr_1_2 = 3, }, /* 128 MB */ - { .order = 28, .rmlr_0 = 1, .rmlr_1_2 = 0, }, /* 256 MB */ - { .order = 30, .rmlr_0 = 0, .rmlr_1_2 = 2, }, /* 1 GB */ - { .order = 34, .rmlr_0 = 0, .rmlr_1_2 = 1, }, /* 16 GB */ - { .order = 38, .rmlr_0 = 0, .rmlr_1_2 = 0, }, /* 256 GB */ +static struct rma_settings rma_logs[] = { + { .log = 26, .rmlr_0 = 0, .rmlr_1_2 = 3, }, /* 64 MB */ + { .log = 27, .rmlr_0 = 1, .rmlr_1_2 = 3, }, /* 128 MB */ + { .log = 28, .rmlr_0 = 1, .rmlr_1_2 = 0, }, /* 256 MB */ + { .log = 30, .rmlr_0 = 0, .rmlr_1_2 = 2, }, /* 1 GB */ + { .log = 34, .rmlr_0 = 0, .rmlr_1_2 = 1, }, /* 16 GB */ + { .log = 38, .rmlr_0 = 0, .rmlr_1_2 = 0, }, /* 256 GB */ }; static uint log_large_page_sizes[] = { 4 + 20, /* (1 << 4) == 16M */ }; -static struct rma_settings *cpu_find_rma(unsigned int order) +static struct rma_settings *cpu_find_rma(unsigned int log) { int i; - for (i = 0; i < ARRAY_SIZE(rma_orders); i++) { - if (rma_orders[i].order == order) - return &rma_orders[i]; + + for (i = 0; i < ARRAY_SIZE(rma_logs); i++) { + if (rma_logs[i].log == log) + return &rma_logs[i]; } return NULL; } unsigned int cpu_default_rma_order_pages(void) { - return rma_orders[0].order - PAGE_SHIFT; -} - -int cpu_rma_valid(unsigned int log) -{ - return cpu_find_rma(log) != NULL; + return rma_logs[0].log - PAGE_SHIFT; +} + +int cpu_rma_valid(unsigned int order) +{ + return cpu_find_rma(order + PAGE_SHIFT) != NULL; } unsigned int cpu_large_page_orders(uint *sizes, uint max) @@ -163,8 +193,11 @@ void cpu_initialize(int cpuid) mtdec(timebase_freq); mthdec(timebase_freq); - hid0.bits.nap = 1; /* NAP */ + /* FIXME Do not set the NAP bit in HID0 until we have had a chance + * to audit the safe halt and idle loop code. */ + hid0.bits.nap = 0; /* NAP */ hid0.bits.dpm = 1; /* Dynamic Power Management */ + hid0.bits.nhr = 1; /* Not Hard Reset */ hid0.bits.hdice_en = 1; /* enable HDEC */ hid0.bits.en_therm = 0; /* ! Enable ext thermal ints */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/ppc970_machinecheck.c --- a/xen/arch/powerpc/powerpc64/ppc970_machinecheck.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/powerpc64/ppc970_machinecheck.c Thu Dec 14 08:57:36 2006 -0700 @@ -24,6 +24,8 @@ #include <public/xen.h> #include <asm/processor.h> #include <asm/percpu.h> +#include <asm/debugger.h> +#include "scom.h" #define MCK_SRR1_INSN_FETCH_UNIT 0x0000000000200000 /* 42 */ #define MCK_SRR1_LOAD_STORE 0x0000000000100000 /* 43 */ @@ -54,6 +56,8 @@ int cpu_machinecheck(struct cpu_user_reg if (mck_cpu_stats[mfpir()] != 0) printk("While in CI IO\n"); + show_backtrace_regs(regs); + printk("SRR1: 0x%016lx\n", regs->msr); if (regs->msr & MCK_SRR1_INSN_FETCH_UNIT) printk("42: Exception caused by Instruction Fetch Unit (IFU)\n" @@ -67,6 +71,7 @@ int cpu_machinecheck(struct cpu_user_reg case 0: printk("0b00: Likely caused by an asynchronous machine check,\n" " see SCOM Asynchronous Machine Check Register\n"); + cpu_scom_AMCR(); break; case MCK_SRR1_CAUSE_SLB_PAR: printk("0b01: Exception caused by an SLB parity error detected\n" @@ -116,5 +121,5 @@ int cpu_machinecheck(struct cpu_user_reg dump_segments(0); } - return 0; /* for now lets not recover; */ + return 0; /* for now lets not recover */ } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/ppc970_scom.c --- a/xen/arch/powerpc/powerpc64/ppc970_scom.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/powerpc64/ppc970_scom.c Thu Dec 14 08:57:36 2006 -0700 @@ -22,33 +22,17 @@ #include <xen/types.h> #include <xen/lib.h> #include <xen/console.h> +#include <xen/errno.h> +#include <asm/delay.h> +#include <asm/processor.h> +#include "scom.h" + +#undef CONFIG_SCOM #define SPRN_SCOMC 276 #define SPRN_SCOMD 277 - -static inline void mtscomc(ulong scomc) -{ - __asm__ __volatile__ ("mtspr %1, %0" : : "r" (scomc), "i"(SPRN_SCOMC)); -} - -static inline ulong mfscomc(void) -{ - ulong scomc; - __asm__ __volatile__ ("mfspr %0, %1" : "=r" (scomc): "i"(SPRN_SCOMC)); - return scomc; -} - -static inline void mtscomd(ulong scomd) -{ - __asm__ __volatile__ ("mtspr %1, %0" : : "r" (scomd), "i"(SPRN_SCOMD)); -} - -static inline ulong mfscomd(void) -{ - ulong scomd; - __asm__ __volatile__ ("mfspr %0, %1" : "=r" (scomd): "i"(SPRN_SCOMD)); - return scomd; -} +#define SCOMC_READ 1 +#define SCOMC_WRITE (!(SCOMC_READ)) union scomc { struct scomc_bits { @@ -68,50 +52,133 @@ union scomc { }; -static inline ulong read_scom(ulong addr) +int cpu_scom_read(uint addr, ulong *d) { union scomc c; - ulong d; + ulong flags; - c.word = 0; - c.bits.addr = addr; - c.bits.RW = 0; + /* drop the low 8bits (including parity) */ + addr >>= 8; - mtscomc(c.word); - d = mfscomd(); - c.word = mfscomc(); - if (c.bits.failure) - panic("scom status: 0x%016lx\n", c.word); + /* these give iface errors because the addresses are not software + * accessible */ + BUG_ON(addr & 0x8000); - return d; + for (;;) { + c.word = 0; + c.bits.addr = addr; + c.bits.RW = SCOMC_READ; + + local_irq_save(flags); + asm volatile ( + "sync \n\t" + "mtspr %2, %0 \n\t" + "isync \n\t" + "mfspr %1, %3 \n\t" + "isync \n\t" + "mfspr %0, %2 \n\t" + "isync \n\t" + : "+r" (c.word), "=r" (*d) + : "i"(SPRN_SCOMC), "i"(SPRN_SCOMD)); + + local_irq_restore(flags); + /* WARNING! older 970s (pre FX) shift the bits right 1 position */ + + if (!c.bits.failure) + return 0; + + /* deal with errors */ + /* has SCOM been disabled? */ + if (c.bits.disabled) + return -ENOSYS; + + /* we were passed a bad addr return -1 */ + if (c.bits.addr_error) + return -EINVAL; + + /* this is way bad and we will checkstop soon */ + BUG_ON(c.bits.proto_error); + + if (c.bits.iface_error) + udelay(10); + } } -static inline void write_scom(ulong addr, ulong val) +int cpu_scom_write(uint addr, ulong d) { union scomc c; + ulong flags; - c.word = 0; - c.bits.addr = addr; - c.bits.RW = 1; + /* drop the low 8bits (including parity) */ + addr >>= 8; - mtscomd(val); - mtscomc(c.word); - c.word = mfscomc(); - if (c.bits.failure) - panic("scom status: 0x%016lx\n", c.word); + /* these give iface errors because the addresses are not software + * accessible */ + BUG_ON(addr & 0x8000); + + for (;;) { + c.word = 0; + c.bits.addr = addr; + c.bits.RW = SCOMC_WRITE; + + local_irq_save(flags); + asm volatile( + "sync \n\t" + "mtspr %3, %1 \n\t" + "isync \n\t" + "mtspr %2, %0 \n\t" + "isync \n\t" + "mfspr %0, %2 \n\t" + "isync \n\t" + : "+r" (c.word) + : "r" (d), "i"(SPRN_SCOMC), "i"(SPRN_SCOMD)); + local_irq_restore(flags); + + if (!c.bits.failure) + return 0; + + /* has SCOM been disabled? */ + if (c.bits.disabled) + return -ENOSYS; + + /* we were passed a bad addr return -1 */ + if (c.bits.addr_error) + return -EINVAL; + + /* this is way bad and we will checkstop soon */ + BUG_ON(c.bits.proto_error); + + /* check for iface and retry */ + if (c.bits.iface_error) + udelay(10); + } } - -#define SCOM_AMCS_REG 0x022601 -#define SCOM_AMCS_AND_MASK 0x022700 -#define SCOM_AMCS_OR_MASK 0x022800 -#define SCOM_CMCE 0x030901 -#define SCOM_PMCR 0x400801 void cpu_scom_init(void) { -#ifdef not_yet - console_start_sync(); - printk("scom PMCR: 0x%016lx\n", read_scom(SCOM_PMCR)); - console_end_sync(); +#ifdef CONFIG_SCOM + ulong val; + if (PVR_REV(mfpvr()) == 0x0300) { + /* these address are only good for 970FX */ + console_start_sync(); + if (!cpu_scom_read(SCOM_PTSR, &val)) + printk("SCOM PTSR: 0x%016lx\n", val); + + console_end_sync(); + } #endif } + +void cpu_scom_AMCR(void) +{ +#ifdef CONFIG_SCOM + ulong val; + + if (PVR_REV(mfpvr()) == 0x0300) { + /* these address are only good for 970FX */ + cpu_scom_read(SCOM_AMC_REG, &val); + printk("SCOM AMCR: 0x%016lx\n", val); + } +#endif +} + diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/powerpc64/traps.c --- a/xen/arch/powerpc/powerpc64/traps.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/powerpc64/traps.c Thu Dec 14 08:57:36 2006 -0700 @@ -48,7 +48,3 @@ void show_registers(struct cpu_user_regs console_end_sync(); } -void show_execution_state(struct cpu_user_regs *regs) -{ - show_registers(regs); -} diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/rtas.c --- a/xen/arch/powerpc/rtas.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/rtas.c Thu Dec 14 08:57:36 2006 -0700 @@ -13,12 +13,90 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2006 * * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ #include <xen/config.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include "of-devtree.h" +#include "rtas.h" -int rtas_halt = -1; -int rtas_reboot = -1; +static int rtas_halt_token = -1; +static int rtas_reboot_token = -1; +int rtas_entry; +unsigned long rtas_msr; +unsigned long rtas_base; +unsigned long rtas_end; + +struct rtas_args { + int ra_token; + int ra_nargs; + int ra_nrets; + int ra_args[10]; +} __attribute__ ((aligned(8))); + +static int rtas_call(struct rtas_args *r) +{ + if (rtas_entry == 0) + return -ENOSYS; + + return prom_call(r, rtas_base, rtas_entry, rtas_msr); +} + +int __init rtas_init(void *m) +{ + static const char halt[] = "power-off"; + static const char reboot[] = "system-reboot"; + ofdn_t n; + + if (rtas_entry == 0) + return -ENOSYS; + + n = ofd_node_find(m, "/rtas"); + if (n <= 0) + return -ENOSYS; + + ofd_getprop(m, n, halt, + &rtas_halt_token, sizeof (rtas_halt_token)); + ofd_getprop(m, n, reboot, + &rtas_reboot_token, sizeof (rtas_reboot_token)); + return 1; +} + +int +rtas_halt(void) +{ + struct rtas_args r; + + if (rtas_halt_token == -1) + return -1; + + r.ra_token = rtas_halt_token; + r.ra_nargs = 2; + r.ra_nrets = 1; + r.ra_args[0] = 0; + r.ra_args[1] = 0; + + return rtas_call(&r); +} + +int +rtas_reboot(void) +{ + struct rtas_args r; + + if (rtas_reboot_token == -1) + return -ENOSYS; + + r.ra_token = rtas_reboot_token; + r.ra_nargs = 2; + r.ra_nrets = 1; + r.ra_args[0] = 0; + r.ra_args[1] = 0; + + return rtas_call(&r); +} diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/setup.c --- a/xen/arch/powerpc/setup.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/setup.c Thu Dec 14 08:57:36 2006 -0700 @@ -1,8 +1,8 @@ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -35,8 +35,10 @@ #include <xen/gdbstub.h> #include <xen/symbols.h> #include <xen/keyhandler.h> +#include <xen/numa.h> #include <acm/acm_hooks.h> #include <public/version.h> +#include <asm/mpic.h> #include <asm/processor.h> #include <asm/desc.h> #include <asm/cache.h> @@ -47,6 +49,7 @@ #include "exceptions.h" #include "of-devtree.h" #include "oftree.h" +#include "rtas.h" #define DEBUG @@ -75,10 +78,7 @@ ulong oftree_end; ulong oftree_end; uint cpu_hard_id[NR_CPUS] __initdata; -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; -cpumask_t cpu_online_map; /* missing ifdef in schedule.c */ cpumask_t cpu_present_map; -cpumask_t cpu_possible_map; /* XXX get this from ISA node in device tree */ char *vgabase; @@ -87,6 +87,8 @@ struct ns16550_defaults ns16550; extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[]; +static struct domain *idle_domain; + volatile struct processor_area * volatile global_cpu_table[NR_CPUS]; int is_kernel_text(unsigned long addr) @@ -110,12 +112,28 @@ static void __init do_initcalls(void) } } -static void hw_probe_attn(unsigned char key, struct cpu_user_regs *regs) + +void noinline __attn(void) { /* To continue the probe will step over the ATTN instruction. The * NOP is there to make sure there is something sane to "step * over" to. */ - asm volatile(".long 0x00000200; nop"); + console_start_sync(); + asm volatile(".long 0x200;nop"); + console_end_sync(); +} + +static void key_hw_probe_attn(unsigned char key) +{ + __attn(); +} + +static void key_ofdump(unsigned char key) +{ + printk("ofdump:\n"); + /* make sure the OF devtree is good */ + ofd_walk((void *)oftree, "devtree", OFD_ROOT, + ofd_dump_props, OFD_DUMP_ALL); } static void percpu_init_areas(void) @@ -150,8 +168,6 @@ static void percpu_free_unused_areas(voi static void __init start_of_day(void) { - struct domain *idle_domain; - init_IRQ(); scheduler_init(); @@ -166,36 +182,19 @@ static void __init start_of_day(void) /* for some reason we need to set our own bit in the thread map */ cpu_set(0, cpu_sibling_map[0]); - percpu_free_unused_areas(); - - { - /* FIXME: Xen assumes that an online CPU is a schedualable - * CPU, but we just are not there yet. Remove this fragment when - * scheduling processors actually works. */ - int cpuid; - - printk("WARNING!: Taking all secondary CPUs offline\n"); - - for_each_online_cpu(cpuid) { - if (cpuid == 0) - continue; - cpu_clear(cpuid, cpu_online_map); - } - } - initialize_keytable(); /* Register another key that will allow for the the Harware Probe * to be contacted, this works with RiscWatch probes and should * work with Chronos and FSPs */ - register_irq_keyhandler('^', hw_probe_attn, "Trap to Hardware Probe"); + register_keyhandler('^', key_hw_probe_attn, "Trap to Hardware Probe"); + + /* allow the dumping of the devtree */ + register_keyhandler('D', key_ofdump , "Dump OF Devtree"); timer_init(); serial_init_postirq(); do_initcalls(); - schedulers_start(); -} - -extern void idle_loop(void); +} void startup_cpu_idle_loop(void) { @@ -208,6 +207,15 @@ void startup_cpu_idle_loop(void) /* Finally get off the boot stack. */ reset_stack_and_jump(idle_loop); } + +/* The boot_pa is enough "parea" for the boot CPU to get thru + * initialization, it will ultimately get replaced later */ +static __init void init_boot_cpu(void) +{ + static struct processor_area boot_pa; + boot_pa.whoami = 0; + parea = &boot_pa; +} static void init_parea(int cpuid) { @@ -227,6 +235,7 @@ static void init_parea(int cpuid) pa->whoami = cpuid; pa->hard_id = cpu_hard_id[cpuid]; pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE); + mb(); /* This store has the effect of invoking secondary_cpu_init. */ global_cpu_table[cpuid] = pa; @@ -248,18 +257,34 @@ static int kick_secondary_cpus(int maxcp /* wait for it */ while (!cpu_online(cpuid)) cpu_relax(); + + numa_set_node(cpuid, 0); + numa_add_cpu(cpuid); } return 0; } /* This is the first C code that secondary processors invoke. */ -int secondary_cpu_init(int cpuid, unsigned long r4) -{ +void secondary_cpu_init(int cpuid, unsigned long r4) +{ + struct vcpu *vcpu; + cpu_initialize(cpuid); smp_generic_take_timebase(); + + /* If we are online, we must be able to ACK IPIs. */ + mpic_setup_this_cpu(); cpu_set(cpuid, cpu_online_map); - while(1); + + vcpu = alloc_vcpu(idle_domain, cpuid, cpuid); + BUG_ON(vcpu == NULL); + + set_current(idle_domain->vcpu[cpuid]); + idle_vcpu[cpuid] = current; + startup_cpu_idle_loop(); + + panic("should never get here\n"); } static void __init __start_xen(multiboot_info_t *mbi) @@ -277,6 +302,9 @@ static void __init __start_xen(multiboot /* Parse the command-line options. */ if ((mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0)) cmdline_parse(__va((ulong)mbi->cmdline)); + + /* we need to be able to identify this CPU early on */ + init_boot_cpu(); /* We initialise the serial devices very early so we can get debugging. */ ns16550.io_base = 0x3f8; @@ -286,20 +314,12 @@ static void __init __start_xen(multiboot serial_init_preirq(); init_console(); -#ifdef CONSOLE_SYNC + /* let synchronize until we really get going */ console_start_sync(); -#endif - - /* we give the first RMA to the hypervisor */ - xenheap_phys_end = rma_size(cpu_default_rma_order_pages()); /* Check that we have at least one Multiboot module. */ if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) { panic("FATAL ERROR: Require at least one Multiboot module.\n"); - } - - if (!(mbi->flags & MBI_MEMMAP)) { - panic("FATAL ERROR: Bootloader provided no memory information.\n"); } /* OF dev tree is the last module */ @@ -312,14 +332,18 @@ static void __init __start_xen(multiboot mod[mbi->mods_count-1].mod_end = 0; --mbi->mods_count; + if (rtas_entry) { + rtas_init((void *)oftree); + /* remove rtas module from consideration */ + mod[mbi->mods_count-1].mod_start = 0; + mod[mbi->mods_count-1].mod_end = 0; + --mbi->mods_count; + } memory_init(mod, mbi->mods_count); #ifdef OF_DEBUG - printk("ofdump:\n"); - /* make sure the OF devtree is good */ - ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL); + key_ofdump(0); #endif - percpu_init_areas(); init_parea(0); @@ -330,6 +354,10 @@ static void __init __start_xen(multiboot if (opt_earlygdb) debugger_trap_immediate(); #endif + + start_of_day(); + + mpic_setup_this_cpu(); /* Deal with secondary processors. */ if (opt_nosmp || ofd_boot_cpu == -1) { @@ -339,7 +367,11 @@ static void __init __start_xen(multiboot kick_secondary_cpus(max_cpus); } - start_of_day(); + /* Secondary processors must be online before we call this. */ + schedulers_start(); + + /* This cannot be called before secondary cpus are marked online. */ + percpu_free_unused_areas(); /* Create initial domain 0. */ dom0 = domain_create(0, 0); @@ -383,10 +415,10 @@ static void __init __start_xen(multiboot } init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE), - ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE)); + ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE)); if (initrd_start) init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE), - ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE)); + ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE)); init_trace_bufs(); @@ -395,8 +427,12 @@ static void __init __start_xen(multiboot /* Hide UART from DOM0 if we're using it */ serial_endboot(); + console_end_sync(); + domain_unpause_by_systemcontroller(dom0); - +#ifdef DEBUG_IPI + ipi_torture_test(); +#endif startup_cpu_idle_loop(); } @@ -414,7 +450,7 @@ void __init __start_xen_ppc( } else { /* booted by someone else that hopefully has a trap handler */ - trap(); + __builtin_trap(); } __start_xen(mbi); diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/shadow.c --- a/xen/arch/powerpc/shadow.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/shadow.c Thu Dec 14 08:57:36 2006 -0700 @@ -101,9 +101,6 @@ unsigned int shadow_set_allocation(struc addr = htab_alloc(d, order); - printk("%s: ibm,fpt-size should be: 0x%x\n", __func__, - d->arch.htab.log_num_ptes + LOG_PTE_SIZE); - if (addr == 0) return -ENOMEM; @@ -115,8 +112,8 @@ unsigned int shadow_set_allocation(struc } int shadow_domctl(struct domain *d, - xen_domctl_shadow_op_t *sc, - XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { if ( unlikely(d == current->domain) ) { diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/smp.c --- a/xen/arch/powerpc/smp.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/smp.c Thu Dec 14 08:57:36 2006 -0700 @@ -13,15 +13,18 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005,2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Authors: Amos Waterland <apw@xxxxxxxxxx> */ -#include <asm/misc.h> #include <xen/cpumask.h> #include <xen/smp.h> #include <asm/flushtlb.h> +#include <asm/debugger.h> +#include <asm/mpic.h> +#include <asm/mach-default/irq_vectors.h> int smp_num_siblings = 1; int smp_num_cpus = 1; @@ -29,25 +32,56 @@ int ht_per_core = 1; void __flush_tlb_mask(cpumask_t mask, unsigned long addr) { - unimplemented(); -} - -void smp_send_event_check_mask(cpumask_t cpu_mask) -{ - unimplemented(); -} - -int smp_call_function(void (*func) (void *info), void *info, int unused, - int wait) -{ - unimplemented(); - return 0; + if (cpu_isset(smp_processor_id(), mask)) { + cpu_clear(smp_processor_id(), mask); + if (cpus_empty(mask)) { + /* only local */ + if (addr == FLUSH_ALL_ADDRS) + local_flush_tlb(); + else + local_flush_tlb_one(addr); + return; + } + } + /* if we are still here and the mask is non-empty, then we need to + * flush other TLBs so we flush em all */ + if (!cpus_empty(mask)) + unimplemented(); +} + +void smp_send_event_check_mask(cpumask_t mask) +{ + cpu_clear(smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, EVENT_CHECK_VECTOR); +} + + +int smp_call_function(void (*func) (void *info), void *info, int retry, + int wait) +{ + cpumask_t allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + return on_selected_cpus(allbutself, func, info, retry, wait); } void smp_send_stop(void) { - unimplemented(); -} + BUG(); +} + +struct call_data_struct { + void (*func) (void *info); + void *info; + int wait; + atomic_t started; + atomic_t finished; + cpumask_t selected; +}; + +static DEFINE_SPINLOCK(call_lock); +static struct call_data_struct call_data; int on_selected_cpus( cpumask_t selected, @@ -56,5 +90,125 @@ int on_selected_cpus( int retry, int wait) { - return 0; -} + int retval = 0, nr_cpus = cpus_weight(selected); + unsigned long start, stall = SECONDS(1); + + spin_lock(&call_lock); + + call_data.func = func; + call_data.info = info; + call_data.wait = wait; + atomic_set(&call_data.started, 0); + atomic_set(&call_data.finished, 0); + mb(); + + send_IPI_mask(selected, CALL_FUNCTION_VECTOR); + + /* We always wait for an initiation ACK from remote CPU. */ + for (start = NOW(); atomic_read(&call_data.started) != nr_cpus; ) { + if (NOW() > start + stall) { + printk("IPI start stall: %d ACKS to %d SYNS\n", + atomic_read(&call_data.started), nr_cpus); + start = NOW(); + } + } + + /* If told to, we wait for a completion ACK from remote CPU. */ + if (wait) { + for (start = NOW(); atomic_read(&call_data.finished) != nr_cpus; ) { + if (NOW() > start + stall) { + printk("IPI finish stall: %d ACKS to %d SYNS\n", + atomic_read(&call_data.finished), nr_cpus); + start = NOW(); + } + } + } + + spin_unlock(&call_lock); + + return retval; +} + +void smp_call_function_interrupt(struct cpu_user_regs *regs) +{ + + void (*func)(void *info) = call_data.func; + void *info = call_data.info; + int wait = call_data.wait; + + atomic_inc(&call_data.started); + mb(); + (*func)(info); + mb(); + + if (wait) + atomic_inc(&call_data.finished); + + return; +} + +void smp_event_check_interrupt(void) +{ + /* We are knocked out of NAP state at least. */ + return; +} + +void smp_message_recv(int msg, struct cpu_user_regs *regs) +{ + switch(msg) { + case CALL_FUNCTION_VECTOR: + smp_call_function_interrupt(regs); + break; + case EVENT_CHECK_VECTOR: + smp_event_check_interrupt(); + break; + default: + BUG(); + break; + } +} + +#ifdef DEBUG_IPI +static void debug_ipi_ack(void *info) +{ + if (info) { + unsigned long start, stall = SECONDS(5); + for (start = NOW(); NOW() < start + stall; ); + printk("IPI recv on cpu #%d: %s\n", smp_processor_id(), (char *)info); + } + return; +} + +void ipi_torture_test(void) +{ + int cpu; + unsigned long before, after, delta; + unsigned long min = ~0, max = 0, mean = 0, sum = 0, trials = 0; + cpumask_t mask; + + cpus_clear(mask); + + while (trials < 1000000) { + for_each_online_cpu(cpu) { + cpu_set(cpu, mask); + before = mftb(); + on_selected_cpus(mask, debug_ipi_ack, NULL, 1, 1); + after = mftb(); + cpus_clear(mask); + + delta = after - before; + if (delta > max) max = delta; + if (delta < min) min = delta; + sum += delta; + trials++; + } + } + + mean = tb_to_ns(sum / trials); + + printk("IPI latency: min = %ld ticks, max = %ld ticks, mean = %ldns\n", + min, max, mean); + + smp_call_function(debug_ipi_ack, "Hi", 0, 1); +} +#endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/time.c --- a/xen/arch/powerpc/time.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/time.c Thu Dec 14 08:57:36 2006 -0700 @@ -25,7 +25,7 @@ #include <xen/sched.h> #include <asm/processor.h> #include <asm/current.h> -#include <asm/misc.h> +#include <asm/debugger.h> #define Dprintk(x...) printk(x) @@ -93,5 +93,4 @@ void do_settime(unsigned long secs, unsi void update_vcpu_system_time(struct vcpu *v) { - unimplemented(); } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/usercopy.c --- a/xen/arch/powerpc/usercopy.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/usercopy.c Thu Dec 14 08:57:36 2006 -0700 @@ -18,267 +18,33 @@ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> */ -#include <xen/config.h> -#include <xen/mm.h> #include <xen/sched.h> +#include <xen/lib.h> #include <asm/current.h> -#include <asm/uaccess.h> -#include <public/xen.h> -#include <public/xencomm.h> - -#undef DEBUG -#ifdef DEBUG -static int xencomm_debug = 1; /* extremely verbose */ -#else -#define xencomm_debug 0 -#endif +#include <asm/page.h> +#include <asm/debugger.h> /* XXX need to return error, not panic, if domain passed a bad pointer */ -static unsigned long paddr_to_maddr(unsigned long paddr) +unsigned long paddr_to_maddr(unsigned long paddr) { struct vcpu *v = get_current(); struct domain *d = v->domain; - int mtype; - ulong pfn; + ulong gpfn; ulong offset; ulong pa = paddr; offset = pa & ~PAGE_MASK; - pfn = pa >> PAGE_SHIFT; + gpfn = pa >> PAGE_SHIFT; - pa = pfn2mfn(d, pfn, &mtype); + pa = gmfn_to_mfn(d, gpfn); if (pa == INVALID_MFN) { printk("%s: Dom:%d bad paddr: 0x%lx\n", __func__, d->domain_id, paddr); return 0; } - switch (mtype) { - case PFN_TYPE_RMA: - case PFN_TYPE_LOGICAL: - break; - case PFN_TYPE_FOREIGN: - /* I don't think this should ever happen, but I suppose it - * could be possible */ - printk("%s: Dom:%d paddr: 0x%lx type: FOREIGN\n", - __func__, d->domain_id, paddr); - WARN(); - break; - - case PFN_TYPE_IO: - default: - printk("%s: Dom:%d paddr: 0x%lx bad type: 0x%x\n", - __func__, d->domain_id, paddr, mtype); - WARN(); - return 0; - } pa <<= PAGE_SHIFT; pa |= offset; return pa; } - -/** - * xencomm_copy_from_guest: Copy a block of data from domain space. - * @to: Machine address. - * @from: Physical address to a xencomm buffer descriptor. - * @n: Number of bytes to copy. - * @skip: Number of bytes from the start to skip. - * - * Copy data from domain to hypervisor. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -unsigned long -xencomm_copy_from_guest(void *to, const void *from, unsigned int n, - unsigned int skip) -{ - struct xencomm_desc *desc; - unsigned int from_pos = 0; - unsigned int to_pos = 0; - unsigned int i = 0; - - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)from); - if (desc == NULL) - return n; - - if (desc->magic != XENCOMM_MAGIC) { - printk("%s: error: %p magic was 0x%x\n", - __func__, desc, desc->magic); - return n; - } - - /* iterate through the descriptor, copying up to a page at a time */ - while ((to_pos < n) && (i < desc->nr_addrs)) { - unsigned long src_paddr = desc->address[i]; - unsigned int pgoffset; - unsigned int chunksz; - unsigned int chunk_skip; - - if (src_paddr == XENCOMM_INVALID) { - i++; - continue; - } - - pgoffset = src_paddr % PAGE_SIZE; - chunksz = PAGE_SIZE - pgoffset; - - chunk_skip = min(chunksz, skip); - from_pos += chunk_skip; - chunksz -= chunk_skip; - skip -= chunk_skip; - - if (skip == 0) { - unsigned long src_maddr; - unsigned long dest = (unsigned long)to + to_pos; - unsigned int bytes = min(chunksz, n - to_pos); - - src_maddr = paddr_to_maddr(src_paddr + chunk_skip); - if (src_maddr == 0) - return n - to_pos; - - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", src_maddr, bytes, dest); - memcpy((void *)dest, (void *)src_maddr, bytes); - from_pos += bytes; - to_pos += bytes; - } - - i++; - } - - return n - to_pos; -} - -/** - * xencomm_copy_to_guest: Copy a block of data to domain space. - * @to: Physical address to xencomm buffer descriptor. - * @from: Machine address. - * @n: Number of bytes to copy. - * @skip: Number of bytes from the start to skip. - * - * Copy data from hypervisor to domain. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -unsigned long -xencomm_copy_to_guest(void *to, const void *from, unsigned int n, - unsigned int skip) -{ - struct xencomm_desc *desc; - unsigned int from_pos = 0; - unsigned int to_pos = 0; - unsigned int i = 0; - - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)to); - if (desc == NULL) - return n; - - if (desc->magic != XENCOMM_MAGIC) { - printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); - return n; - } - - /* iterate through the descriptor, copying up to a page at a time */ - while ((from_pos < n) && (i < desc->nr_addrs)) { - unsigned long dest_paddr = desc->address[i]; - unsigned int pgoffset; - unsigned int chunksz; - unsigned int chunk_skip; - - if (dest_paddr == XENCOMM_INVALID) { - i++; - continue; - } - - pgoffset = dest_paddr % PAGE_SIZE; - chunksz = PAGE_SIZE - pgoffset; - - chunk_skip = min(chunksz, skip); - to_pos += chunk_skip; - chunksz -= chunk_skip; - skip -= chunk_skip; - - if (skip == 0) { - unsigned long dest_maddr; - unsigned long source = (unsigned long)from + from_pos; - unsigned int bytes = min(chunksz, n - from_pos); - - dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip); - if (dest_maddr == 0) - return -1; - - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", source, bytes, dest_maddr); - memcpy((void *)dest_maddr, (void *)source, bytes); - from_pos += bytes; - to_pos += bytes; - } - - i++; - } - - return n - from_pos; -} - -/* Offset page addresses in 'handle' to skip 'bytes' bytes. Set completely - * exhausted pages to XENCOMM_INVALID. */ -int xencomm_add_offset(void *handle, unsigned int bytes) -{ - struct xencomm_desc *desc; - int i = 0; - - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)handle); - if (desc == NULL) - return -1; - - if (desc->magic != XENCOMM_MAGIC) { - printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); - return -1; - } - - /* iterate through the descriptor incrementing addresses */ - while ((bytes > 0) && (i < desc->nr_addrs)) { - unsigned long dest_paddr = desc->address[i]; - unsigned int pgoffset; - unsigned int chunksz; - unsigned int chunk_skip; - - if (dest_paddr == XENCOMM_INVALID) { - i++; - continue; - } - - pgoffset = dest_paddr % PAGE_SIZE; - chunksz = PAGE_SIZE - pgoffset; - - chunk_skip = min(chunksz, bytes); - if (chunk_skip == chunksz) { - /* exhausted this page */ - desc->address[i] = XENCOMM_INVALID; - } else { - desc->address[i] += chunk_skip; - } - bytes -= chunk_skip; - - i++; - } - return 0; -} - -int xencomm_handle_is_null(void *ptr) -{ - struct xencomm_desc *desc; - - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)ptr); - if (desc == NULL) - return 1; - - return (desc->nr_addrs == 0); -} - diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/powerpc/xen.lds.S --- a/xen/arch/powerpc/xen.lds.S Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/powerpc/xen.lds.S Thu Dec 14 08:57:36 2006 -0700 @@ -12,12 +12,12 @@ SEARCH_DIR("=/usr/local/lib64"); SEARCH_ __DYNAMIC = 0; */ PHDRS { - text PT_LOAD FILEHDR PHDRS; + text PT_LOAD; } SECTIONS { + . = 0x00400000; /* Read-only sections, merged into text segment: */ - PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS; .interp : { *(.interp) } :text .hash : { *(.hash) } .dynsym : { *(.dynsym) } @@ -111,8 +111,6 @@ SECTIONS SORT(CONSTRUCTORS) } - /* Xen addition */ - . = ALIGN(32); __setup_start = .; .setup.init : { *(.setup.init) } @@ -130,8 +128,6 @@ SECTIONS . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT); . = ALIGN(STACK_SIZE); __per_cpu_end = .; - - /* end Xen addition */ .data1 : { *(.data1) } .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/crash.c --- a/xen/arch/x86/crash.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/x86/crash.c Thu Dec 14 08:57:36 2006 -0700 @@ -58,9 +58,9 @@ static void smp_send_nmi_allbutself(void static void smp_send_nmi_allbutself(void) { cpumask_t allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); - send_IPI_mask(allbutself, APIC_DM_NMI); + if ( !cpus_empty(allbutself) ) + send_IPI_mask(allbutself, APIC_DM_NMI); } static void nmi_shootdown_cpus(void) diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/x86/domain_build.c Thu Dec 14 08:57:36 2006 -0700 @@ -321,8 +321,11 @@ int construct_dom0(struct domain *d, if ( (rc = parseelfimage(&dsi)) != 0 ) return rc; - dom0_pae = (dsi.pae_kernel != PAEKERN_no); xen_pae = (CONFIG_PAGING_LEVELS == 3); + if (dsi.pae_kernel == PAEKERN_bimodal) + dom0_pae = xen_pae; + else + dom0_pae = (dsi.pae_kernel != PAEKERN_no); if ( dom0_pae != xen_pae ) { printk("PAE mode mismatch between Xen and DOM0 (xen=%s, dom0=%s)\n", @@ -330,7 +333,8 @@ int construct_dom0(struct domain *d, return -EINVAL; } - if ( xen_pae && dsi.pae_kernel == PAEKERN_extended_cr3 ) + if ( xen_pae && (dsi.pae_kernel == PAEKERN_extended_cr3 || + dsi.pae_kernel == PAEKERN_bimodal) ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); if ( (p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES)) != NULL ) diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/x86/mm.c Thu Dec 14 08:57:36 2006 -0700 @@ -2951,7 +2951,17 @@ long arch_memory_op(int op, XEN_GUEST_HA guest_physmap_add_page(d, xatp.gpfn, mfn); UNLOCK_BIGLOCK(d); - + + /* If we're doing FAST_FAULT_PATH, then shadow mode may have + cached the fact that this is an mmio region in the shadow + page tables. Blow the tables away to remove the cache. + This is pretty heavy handed, but this is a rare operation + (it might happen a dozen times during boot and then never + again), so it doesn't matter too much. */ + shadow_lock(d); + shadow_blow_tables(d); + shadow_unlock(d); + put_domain(d); break; diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/x86/mm/shadow/common.c Thu Dec 14 08:57:36 2006 -0700 @@ -791,7 +791,7 @@ void shadow_prealloc(struct domain *d, u /* Deliberately free all the memory we can: this will tear down all of * this domain's shadows */ -static void shadow_blow_tables(struct domain *d) +void shadow_blow_tables(struct domain *d) { struct list_head *l, *t; struct shadow_page_info *sp; @@ -3123,7 +3123,7 @@ static int shadow_log_dirty_op( out: shadow_unlock(d); domain_unpause(d); - return 0; + return rv; } diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Dec 14 08:57:36 2006 -0700 @@ -3488,6 +3488,9 @@ sh_update_cr3(struct vcpu *v) ? SH_type_l2h_shadow : SH_type_l2_shadow); } + else + /* The guest is not present: clear out the shadow. */ + sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); } } #elif GUEST_PAGING_LEVELS == 4 diff -r ed56ef3e9716 -r 4762d73ced42 xen/arch/x86/numa.c --- a/xen/arch/x86/numa.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/arch/x86/numa.c Thu Dec 14 08:57:36 2006 -0700 @@ -214,7 +214,7 @@ void __init numa_initmem_init(unsigned l __cpuinit void numa_add_cpu(int cpu) { - set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); + cpu_set(cpu, node_to_cpumask[cpu_to_node(cpu)]); } void __cpuinit numa_set_node(int cpu, int node) diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/Makefile --- a/xen/common/Makefile Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/common/Makefile Thu Dec 14 08:57:36 2006 -0700 @@ -32,5 +32,7 @@ obj-$(crash_debug) += gdbstub.o obj-$(crash_debug) += gdbstub.o obj-$(xenoprof) += xenoprof.o +obj-$(CONFIG_XENCOMM) += xencomm.o + # Object file contains changeset and compiler information. version.o: $(BASEDIR)/include/xen/compile.h diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/domain.c --- a/xen/common/domain.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/common/domain.c Thu Dec 14 08:57:36 2006 -0700 @@ -238,7 +238,11 @@ void domain_kill(struct domain *d) void __domain_crash(struct domain *d) { - if ( d == current->domain ) + if ( test_bit(_DOMF_shutdown, &d->domain_flags) ) + { + /* Print nothing: the domain is already shutting down. */ + } + else if ( d == current->domain ) { printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n", d->domain_id, current->vcpu_id, smp_processor_id()); @@ -346,16 +350,25 @@ void domain_destroy(struct domain *d) send_guest_global_virq(dom0, VIRQ_DOM_EXC); } -void vcpu_pause(struct vcpu *v) -{ - ASSERT(v != current); - +static void vcpu_pause_setup(struct vcpu *v) +{ spin_lock(&v->pause_lock); if ( v->pause_count++ == 0 ) set_bit(_VCPUF_paused, &v->vcpu_flags); spin_unlock(&v->pause_lock); - +} + +void vcpu_pause(struct vcpu *v) +{ + ASSERT(v != current); + vcpu_pause_setup(v); vcpu_sleep_sync(v); +} + +void vcpu_pause_nosync(struct vcpu *v) +{ + vcpu_pause_setup(v); + vcpu_sleep_nosync(v); } void vcpu_unpause(struct vcpu *v) diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/elf.c --- a/xen/common/elf.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/common/elf.c Thu Dec 14 08:57:36 2006 -0700 @@ -216,16 +216,6 @@ int parseelfimage(struct domain_setup_in return -EINVAL; } - /* Find the section-header strings table. */ - if ( ehdr->e_shstrndx == SHN_UNDEF ) - { - printk("ELF image has no section-header strings table (shstrtab).\n"); - return -EINVAL; - } - shdr = (Elf_Shdr *)(image + ehdr->e_shoff + - (ehdr->e_shstrndx*ehdr->e_shentsize)); - shstrtab = image + shdr->sh_offset; - dsi->__elfnote_section = NULL; dsi->__xen_guest_string = NULL; @@ -244,6 +234,16 @@ int parseelfimage(struct domain_setup_in /* Fall back to looking for the special '__xen_guest' section. */ if ( dsi->__elfnote_section == NULL ) { + /* Find the section-header strings table. */ + if ( ehdr->e_shstrndx == SHN_UNDEF ) + { + printk("ELF image has no section-header strings table.\n"); + return -EINVAL; + } + shdr = (Elf_Shdr *)(image + ehdr->e_shoff + + (ehdr->e_shstrndx*ehdr->e_shentsize)); + shstrtab = image + shdr->sh_offset; + for ( h = 0; h < ehdr->e_shnum; h++ ) { shdr = (Elf_Shdr *)(image + ehdr->e_shoff + (h*ehdr->e_shentsize)); @@ -286,6 +286,8 @@ int parseelfimage(struct domain_setup_in } /* + * A "bimodal" ELF note indicates the kernel will adjust to the + * current paging mode, including handling extended cr3 syntax. * If we have ELF notes then PAE=yes implies that we must support * the extended cr3 syntax. Otherwise we need to find the * [extended-cr3] syntax in the __xen_guest string. @@ -294,9 +296,10 @@ int parseelfimage(struct domain_setup_in if ( dsi->__elfnote_section ) { p = xen_elfnote_string(dsi, XEN_ELFNOTE_PAE_MODE); - if ( p != NULL && strncmp(p, "yes", 3) == 0 ) + if ( p != NULL && strncmp(p, "bimodal", 7) == 0 ) + dsi->pae_kernel = PAEKERN_bimodal; + else if ( p != NULL && strncmp(p, "yes", 3) == 0 ) dsi->pae_kernel = PAEKERN_extended_cr3; - } else { diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/gdbstub.c --- a/xen/common/gdbstub.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/common/gdbstub.c Thu Dec 14 08:57:36 2006 -0700 @@ -42,6 +42,7 @@ #include <xen/init.h> #include <xen/smp.h> #include <xen/console.h> +#include <xen/errno.h> /* Printk isn't particularly safe just after we've trapped to the debugger. so avoid it. */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/kexec.c --- a/xen/common/kexec.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/common/kexec.c Thu Dec 14 08:57:36 2006 -0700 @@ -140,13 +140,21 @@ void machine_crash_kexec(void) static void do_crashdump_trigger(unsigned char key) { - printk("triggering crashdump\n"); - machine_crash_kexec(); + int pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0); + if ( test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) ) + { + printk("'%c' pressed -> triggering crashdump\n", key); + machine_crash_kexec(); + } + else + { + printk("'%c' pressed -> no crash kernel loaded -- not triggering crashdump\n", key); + } } static __init int register_crashdump_trigger(void) { - register_keyhandler('c', do_crashdump_trigger, "trigger a crashdump"); + register_keyhandler('C', do_crashdump_trigger, "trigger a crashdump"); return 0; } __initcall(register_crashdump_trigger); diff -r ed56ef3e9716 -r 4762d73ced42 xen/common/sched_credit.c --- a/xen/common/sched_credit.c Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/common/sched_credit.c Thu Dec 14 08:57:36 2006 -0700 @@ -56,7 +56,12 @@ #define CSCHED_PRI_TS_UNDER -1 /* time-share w/ credits */ #define CSCHED_PRI_TS_OVER -2 /* time-share w/o credits */ #define CSCHED_PRI_IDLE -64 /* idle */ -#define CSCHED_PRI_TS_PARKED -65 /* time-share w/ capped credits */ + + +/* + * Flags + */ +#define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */ /* @@ -100,26 +105,21 @@ _MACRO(vcpu_wake_onrunq) \ _MACRO(vcpu_wake_runnable) \ _MACRO(vcpu_wake_not_runnable) \ + _MACRO(vcpu_park) \ + _MACRO(vcpu_unpark) \ _MACRO(tickle_local_idler) \ _MACRO(tickle_local_over) \ _MACRO(tickle_local_under) \ _MACRO(tickle_local_other) \ _MACRO(tickle_idlers_none) \ _MACRO(tickle_idlers_some) \ - _MACRO(vcpu_migrate) \ _MACRO(load_balance_idle) \ _MACRO(load_balance_over) \ _MACRO(load_balance_other) \ _MACRO(steal_trylock_failed) \ - _MACRO(steal_peer_down) \ _MACRO(steal_peer_idle) \ - _MACRO(steal_peer_running) \ - _MACRO(steal_peer_pinned) \ - _MACRO(steal_peer_migrating) \ - _MACRO(steal_peer_best_idler) \ - _MACRO(steal_loner_candidate) \ - _MACRO(steal_loner_signal) \ - _MACRO(cpu_pick) \ + _MACRO(migrate_queued) \ + _MACRO(migrate_running) \ _MACRO(dom_init) \ _MACRO(dom_destroy) \ _MACRO(vcpu_init) \ @@ -146,7 +146,7 @@ struct \ { \ CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \ - } stats + } stats; #define CSCHED_STATS_PRINTK() \ do \ @@ -155,14 +155,27 @@ CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \ } while ( 0 ) -#define CSCHED_STAT_CRANK(_X) (CSCHED_STAT(_X)++) +#define CSCHED_STAT_CRANK(_X) (CSCHED_STAT(_X)++) + +#define CSCHED_VCPU_STATS_RESET(_V) \ + do \ + { \ + memset(&(_V)->stats, 0, sizeof((_V)->stats)); \ + } while ( 0 ) + +#define CSCHED_VCPU_STAT_CRANK(_V, _X) (((_V)->stats._X)++) + +#define CSCHED_VCPU_STAT_SET(_V, _X, _Y) (((_V)->stats._X) = (_Y)) #else /* CSCHED_STATS */ -#define CSCHED_STATS_RESET() do {} while ( 0 ) -#define CSCHED_STATS_DEFINE() do {} while ( 0 ) -#define CSCHED_STATS_PRINTK() do {} while ( 0 ) -#define CSCHED_STAT_CRANK(_X) do {} while ( 0 ) +#define CSCHED_STATS_RESET() do {} while ( 0 ) +#define CSCHED_STATS_DEFINE() +#define CSCHED_STATS_PRINTK() do {} while ( 0 ) +#define CSCHED_STAT_CRANK(_X) do {} while ( 0 ) +#define CSCHED_VCPU_STATS_RESET(_V) do {} while ( 0 ) +#define CSCHED_VCPU_STAT_CRANK(_V, _X) do {} while ( 0 ) +#define CSCHED_VCPU_STAT_SET(_V, _X, _Y) do {} while ( 0 ) #endif /* CSCHED_STATS */ @@ -184,14 +197,18 @@ struct csched_vcpu { struct csched_dom *sdom; struct vcpu *vcpu; atomic_t credit; + uint16_t flags; int16_t pri; +#ifdef CSCHED_STATS struct { int credit_last; uint32_t credit_incr; uint32_t state_active; uint32_t state_idle; - uint32_t migrate; + uint32_t migrate_q; + uint32_t migrate_r; } stats; +#endif }; /* @@ -219,7 +236,7 @@ struct csched_private { uint32_t credit; int credit_balance; uint32_t runq_sort; - CSCHED_STATS_DEFINE(); + CSCHED_STATS_DEFINE() }; @@ -229,6 +246,15 @@ static struct csched_private csched_priv static struct csched_private csched_priv; + +static inline int +__cycle_cpu(int cpu, const cpumask_t *mask) +{ + int nxt = next_cpu(cpu, *mask); + if (nxt == NR_CPUS) + nxt = first_cpu(*mask); + return nxt; +} static inline int __vcpu_on_runq(struct csched_vcpu *svc) @@ -375,118 +401,138 @@ __csched_vcpu_check(struct vcpu *vc) #define CSCHED_VCPU_CHECK(_vc) #endif -/* - * Indicates which of two given idlers is most efficient to run - * an additional VCPU. - * - * Returns: - * 0: They are the same. - * negative: One is less efficient than Two. - * positive: One is more efficient than Two. - */ -static int -csched_idler_compare(int one, int two) -{ - cpumask_t idlers; - cpumask_t one_idlers; - cpumask_t two_idlers; - - idlers = csched_priv.idlers; - cpu_clear(one, idlers); - cpu_clear(two, idlers); - - if ( cpu_isset(one, cpu_core_map[two]) ) - { - cpus_and(one_idlers, idlers, cpu_sibling_map[one]); - cpus_and(two_idlers, idlers, cpu_sibling_map[two]); - } - else - { - cpus_and(one_idlers, idlers, cpu_core_map[one]); - cpus_and(two_idlers, idlers, cpu_core_map[two]); - } - - return cpus_weight(one_idlers) - cpus_weight(two_idlers); -} - static inline int -__csched_queued_vcpu_is_stealable(int local_cpu, struct vcpu *vc) +__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu) { /* * Don't pick up work that's in the peer's scheduling tail. Also only pick * up work that's allowed to run on our CPU. */ - if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) ) - { - CSCHED_STAT_CRANK(steal_peer_running); - return 0; - } - - if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) ) - { - CSCHED_STAT_CRANK(steal_peer_pinned); - return 0; - } - - return 1; -} - -static inline int -__csched_running_vcpu_is_stealable(int local_cpu, struct vcpu *vc) -{ - BUG_ON( is_idle_vcpu(vc) ); - - if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) ) - { - CSCHED_STAT_CRANK(steal_peer_pinned); - return 0; - } - - if ( test_bit(_VCPUF_migrating, &vc->vcpu_flags) ) - { - CSCHED_STAT_CRANK(steal_peer_migrating); - return 0; - } - - if ( csched_idler_compare(local_cpu, vc->processor) <= 0 ) - { - CSCHED_STAT_CRANK(steal_peer_best_idler); - return 0; - } - - return 1; -} - -static void -csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec) + return !test_bit(_VCPUF_running, &vc->vcpu_flags) && + cpu_isset(dest_cpu, vc->cpu_affinity); +} + +static int +csched_cpu_pick(struct vcpu *vc) +{ + cpumask_t cpus; + cpumask_t idlers; + int cpu; + + /* + * Pick from online CPUs in VCPU's affinity mask, giving a + * preference to its current processor if it's in there. + */ + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + cpu = cpu_isset(vc->processor, cpus) + ? vc->processor + : __cycle_cpu(vc->processor, &cpus); + ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) ); + + /* + * Try to find an idle processor within the above constraints. + * + * In multi-core and multi-threaded CPUs, not all idle execution + * vehicles are equal! + * + * We give preference to the idle execution vehicle with the most + * idling neighbours in its grouping. This distributes work across + * distinct cores first and guarantees we don't do something stupid + * like run two VCPUs on co-hyperthreads while there are idle cores + * or sockets. + */ + idlers = csched_priv.idlers; + cpu_set(cpu, idlers); + cpus_and(cpus, cpus, idlers); + cpu_clear(cpu, cpus); + + while ( !cpus_empty(cpus) ) + { + cpumask_t cpu_idlers; + cpumask_t nxt_idlers; + int nxt; + + nxt = __cycle_cpu(cpu, &cpus); + + if ( cpu_isset(cpu, cpu_core_map[nxt]) ) + { + ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) ); + cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]); + cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]); + } + else + { + ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) ); + cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]); + cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]); + } + + if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) ) + { + cpu = nxt; + cpu_clear(cpu, cpus); + } + else + { + cpus_andnot(cpus, cpus, nxt_idlers); + } + } + + return cpu; +} + +static inline void +__csched_vcpu_acct_start(struct csched_vcpu *svc) { struct csched_dom * const sdom = svc->sdom; unsigned long flags; - /* Update credits */ - atomic_sub(credit_dec, &svc->credit); - - /* Put this VCPU and domain back on the active list if it was idling */ + spin_lock_irqsave(&csched_priv.lock, flags); + if ( list_empty(&svc->active_vcpu_elem) ) { - spin_lock_irqsave(&csched_priv.lock, flags); - - if ( list_empty(&svc->active_vcpu_elem) ) - { - CSCHED_STAT_CRANK(acct_vcpu_active); - svc->stats.state_active++; - - sdom->active_vcpu_count++; - list_add(&svc->active_vcpu_elem, &sdom->active_vcpu); - if ( list_empty(&sdom->active_sdom_elem) ) - { - list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); - csched_priv.weight += sdom->weight; - } - } - - spin_unlock_irqrestore(&csched_priv.lock, flags); - } + CSCHED_VCPU_STAT_CRANK(svc, state_active); + CSCHED_STAT_CRANK(acct_vcpu_active); + + sdom->active_vcpu_count++; + list_add(&svc->active_vcpu_elem, &sdom->active_vcpu); + if ( list_empty(&sdom->active_sdom_elem) ) + { + list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + csched_priv.weight += sdom->weight; + } + } + + spin_unlock_irqrestore(&csched_priv.lock, flags); +} + +static inline void +__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc) +{ + struct csched_dom * const sdom = svc->sdom; + + BUG_ON( list_empty(&svc->active_vcpu_elem) ); + + CSCHED_VCPU_STAT_CRANK(svc, state_idle); + CSCHED_STAT_CRANK(acct_vcpu_idle); + + sdom->active_vcpu_count--; + list_del_init(&svc->active_vcpu_elem); + if ( list_empty(&sdom->active_vcpu) ) + { + BUG_ON( csched_priv.weight < sdom->weight ); + list_del_init(&sdom->active_sdom_elem); + csched_priv.weight -= sdom->weight; + } +} + +static void +csched_vcpu_acct(unsigned int cpu) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(current); + + ASSERT( current->processor == cpu ); + ASSERT( svc->sdom != NULL ); /* * If this VCPU's priority was boosted when it last awoke, reset it. @@ -495,25 +541,30 @@ csched_vcpu_acct(struct csched_vcpu *svc */ if ( svc->pri == CSCHED_PRI_TS_BOOST ) svc->pri = CSCHED_PRI_TS_UNDER; -} - -static inline void -__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc) -{ - struct csched_dom * const sdom = svc->sdom; - - BUG_ON( list_empty(&svc->active_vcpu_elem) ); - - CSCHED_STAT_CRANK(acct_vcpu_idle); - svc->stats.state_idle++; - - sdom->active_vcpu_count--; - list_del_init(&svc->active_vcpu_elem); - if ( list_empty(&sdom->active_vcpu) ) - { - BUG_ON( csched_priv.weight < sdom->weight ); - list_del_init(&sdom->active_sdom_elem); - csched_priv.weight -= sdom->weight; + + /* + * Update credits + */ + atomic_sub(CSCHED_CREDITS_PER_TICK, &svc->credit); + + /* + * Put this VCPU and domain back on the active list if it was + * idling. + * + * If it's been active a while, check if we'd be better off + * migrating it to run elsewhere (see multi-core and multi-thread + * support in csched_cpu_pick()). + */ + if ( list_empty(&svc->active_vcpu_elem) ) + { + __csched_vcpu_acct_start(svc); + } + else if ( csched_cpu_pick(current) != cpu ) + { + CSCHED_VCPU_STAT_CRANK(svc, migrate_r); + CSCHED_STAT_CRANK(migrate_running); + set_bit(_VCPUF_migrating, ¤t->vcpu_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); } } @@ -536,15 +587,10 @@ csched_vcpu_init(struct vcpu *vc) svc->sdom = sdom; svc->vcpu = vc; atomic_set(&svc->credit, 0); + svc->flags = 0U; svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER; - memset(&svc->stats, 0, sizeof(svc->stats)); + CSCHED_VCPU_STATS_RESET(svc); vc->sched_priv = svc; - - CSCHED_VCPU_CHECK(vc); - - /* Attach fair-share VCPUs to the accounting list */ - if ( likely(sdom != NULL) ) - csched_vcpu_acct(svc, 0); /* Allocate per-PCPU info */ if ( unlikely(!CSCHED_PCPU(vc->processor)) ) @@ -554,7 +600,6 @@ csched_vcpu_init(struct vcpu *vc) } CSCHED_VCPU_CHECK(vc); - return 0; } @@ -573,7 +618,7 @@ csched_vcpu_destroy(struct vcpu *vc) spin_lock_irqsave(&csched_priv.lock, flags); if ( !list_empty(&svc->active_vcpu_elem) ) - __csched_vcpu_acct_idle_locked(svc); + __csched_vcpu_acct_stop_locked(svc); spin_unlock_irqrestore(&csched_priv.lock, flags); @@ -634,9 +679,16 @@ csched_vcpu_wake(struct vcpu *vc) * This allows wake-to-run latency sensitive VCPUs to preempt * more CPU resource intensive VCPUs without impacting overall * system fairness. - */ - if ( svc->pri == CSCHED_PRI_TS_UNDER ) + * + * The one exception is for VCPUs of capped domains unpausing + * after earning credits they had overspent. We don't boost + * those. + */ + if ( svc->pri == CSCHED_PRI_TS_UNDER && + !(svc->flags & CSCHED_FLAG_VCPU_PARKED) ) + { svc->pri = CSCHED_PRI_TS_BOOST; + } /* Put the VCPU on the runq and tickle CPUs */ __runq_insert(cpu, svc); @@ -710,71 +762,8 @@ static void static void csched_dom_destroy(struct domain *dom) { - struct csched_dom * const sdom = CSCHED_DOM(dom); - CSCHED_STAT_CRANK(dom_destroy); - - xfree(sdom); -} - -static int -csched_cpu_pick(struct vcpu *vc) -{ - cpumask_t cpus; - int cpu, nxt; - - CSCHED_STAT_CRANK(cpu_pick); - - /* - * Pick from online CPUs in VCPU's affinity mask, giving a - * preference to its current processor if it's in there. - */ - cpus_and(cpus, cpu_online_map, vc->cpu_affinity); - ASSERT( !cpus_empty(cpus) ); - cpu = cpu_isset(vc->processor, cpus) ? vc->processor : first_cpu(cpus); - - /* - * Try to find an idle processor within the above constraints. - */ - cpus_and(cpus, cpus, csched_priv.idlers); - if ( !cpus_empty(cpus) ) - { - cpu = cpu_isset(cpu, cpus) ? cpu : first_cpu(cpus); - cpu_clear(cpu, cpus); - - /* - * In multi-core and multi-threaded CPUs, not all idle execution - * vehicles are equal! - * - * We give preference to the idle execution vehicle with the most - * idling neighbours in its grouping. This distributes work across - * distinct cores first and guarantees we don't do something stupid - * like run two VCPUs on co-hyperthreads while there are idle cores - * or sockets. - */ - while ( !cpus_empty(cpus) ) - { - nxt = first_cpu(cpus); - - if ( csched_idler_compare(cpu, nxt) < 0 ) - { - cpu = nxt; - cpu_clear(nxt, cpus); - } - else if ( cpu_isset(cpu, cpu_core_map[nxt]) ) - { - cpus_andnot(cpus, cpus, cpu_sibling_map[nxt]); - } - else - { - cpus_andnot(cpus, cpus, cpu_core_map[nxt]); - } - - ASSERT( !cpu_isset(nxt, cpus) ); - } - } - - return cpu; + xfree(CSCHED_DOM(dom)); } /* @@ -963,11 +952,19 @@ csched_acct(void) */ if ( credit < 0 ) { - if ( sdom->cap != 0U && credit < -credit_cap ) - svc->pri = CSCHED_PRI_TS_PARKED; - else - svc->pri = CSCHED_PRI_TS_OVER; - + svc->pri = CSCHED_PRI_TS_OVER; + + /* Park running VCPUs of capped-out domains */ + if ( sdom->cap != 0U && + credit < -credit_cap && + !(svc->flags & CSCHED_FLAG_VCPU_PARKED) ) + { + CSCHED_STAT_CRANK(vcpu_park); + vcpu_pause_nosync(svc->vcpu); + svc->flags |= CSCHED_FLAG_VCPU_PARKED; + } + + /* Lower bound on credits */ if ( credit < -CSCHED_CREDITS_PER_TSLICE ) { CSCHED_STAT_CRANK(acct_min_credit); @@ -979,16 +976,30 @@ csched_acct(void) { svc->pri = CSCHED_PRI_TS_UNDER; + /* Unpark any capped domains whose credits go positive */ + if ( svc->flags & CSCHED_FLAG_VCPU_PARKED) + { + /* + * It's important to unset the flag AFTER the unpause() + * call to make sure the VCPU's priority is not boosted + * if it is woken up here. + */ + CSCHED_STAT_CRANK(vcpu_unpark); + vcpu_unpause(svc->vcpu); + svc->flags &= ~CSCHED_FLAG_VCPU_PARKED; + } + + /* Upper bound on credits means VCPU stops earning */ if ( credit > CSCHED_CREDITS_PER_TSLICE ) { - __csched_vcpu_acct_idle_locked(svc); + __csched_vcpu_acct_stop_locked(svc); credit = 0; atomic_set(&svc->credit, credit); } } - svc->stats.credit_last = credit; - svc->stats.credit_incr = credit_fair; + CSCHED_VCPU_STAT_SET(svc, credit_last, credit); + CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair); credit_balance += credit; } } @@ -1004,21 +1015,14 @@ static void static void csched_tick(unsigned int cpu) { - struct csched_vcpu * const svc = CSCHED_VCPU(current); - struct csched_dom * const sdom = svc->sdom; - /* * Accounting for running VCPU - * - * Note: Some VCPUs, such as the idle tasks, are not credit scheduled. - */ - if ( likely(sdom != NULL) ) - { - csched_vcpu_acct(svc, CSCHED_CREDITS_PER_TICK); - } - - /* - * Accounting duty + */ + if ( !is_idle_vcpu(current) ) + csched_vcpu_acct(cpu); + + /* + * Host-wide accounting duty * * Note: Currently, this is always done by the master boot CPU. Eventually, * we could distribute or at the very least cycle the duty. @@ -1040,40 +1044,48 @@ csched_tick(unsigned int cpu) } static struct csched_vcpu * -csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri) -{ +csched_runq_steal(int peer_cpu, int cpu, int pri) +{ + const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu); + const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr; + struct csched_vcpu *speer; struct list_head *iter; - struct csched_vcpu *speer; struct vcpu *vc; - list_for_each( iter, &spc->runq ) - { - speer = __runq_elem(iter); - - /* - * If next available VCPU here is not of higher priority than ours, - * this PCPU is useless to us. - */ - if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri ) - { - CSCHED_STAT_CRANK(steal_peer_idle); - break; - } - - /* Is this VCPU is runnable on our PCPU? */ - vc = speer->vcpu; - BUG_ON( is_idle_vcpu(vc) ); - - if ( __csched_queued_vcpu_is_stealable(cpu, vc) ) - { - /* We got a candidate. Grab it! */ - __runq_remove(speer); - vc->processor = cpu; - - return speer; - } - } - + /* + * Don't steal from an idle CPU's runq because it's about to + * pick up work from it itself. + */ + if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) ) + { + list_for_each( iter, &peer_pcpu->runq ) + { + speer = __runq_elem(iter); + + /* + * If next available VCPU here is not of strictly higher + * priority than ours, this PCPU is useless to us. + */ + if ( speer->pri <= pri ) + break; + + /* Is this VCPU is runnable on our PCPU? */ + vc = speer->vcpu; + BUG_ON( is_idle_vcpu(vc) ); + + if (__csched_vcpu_is_migrateable(vc, cpu)) + { + /* We got a candidate. Grab it! */ + CSCHED_VCPU_STAT_CRANK(speer, migrate_q); + CSCHED_STAT_CRANK(migrate_queued); + __runq_remove(speer); + vc->processor = cpu; + return speer; + } + } + } + + CSCHED_STAT_CRANK(steal_peer_idle); return NULL; } @@ -1081,11 +1093,10 @@ csched_load_balance(int cpu, struct csch csched_load_balance(int cpu, struct csched_vcpu *snext) { struct csched_vcpu *speer; - struct csched_pcpu *spc; - struct vcpu *peer_vcpu; cpumask_t workers; - cpumask_t loners; int peer_cpu; + + BUG_ON( cpu != snext->vcpu->processor ); if ( snext->pri == CSCHED_PRI_IDLE ) CSCHED_STAT_CRANK(load_balance_idle); @@ -1095,22 +1106,16 @@ csched_load_balance(int cpu, struct csch CSCHED_STAT_CRANK(load_balance_other); /* - * Peek at non-idling CPUs in the system - */ - cpus_clear(loners); + * Peek at non-idling CPUs in the system, starting with our + * immediate neighbour. + */ cpus_andnot(workers, cpu_online_map, csched_priv.idlers); cpu_clear(cpu, workers); - peer_cpu = cpu; - BUG_ON( peer_cpu != snext->vcpu->processor ); while ( !cpus_empty(workers) ) { - /* For each CPU of interest, starting with our neighbour... */ - peer_cpu = next_cpu(peer_cpu, workers); - if ( peer_cpu == NR_CPUS ) - peer_cpu = first_cpu(workers); - + peer_cpu = __cycle_cpu(peer_cpu, &workers); cpu_clear(peer_cpu, workers); /* @@ -1126,83 +1131,13 @@ csched_load_balance(int cpu, struct csch continue; } - peer_vcpu = per_cpu(schedule_data, peer_cpu).curr; - spc = CSCHED_PCPU(peer_cpu); - - if ( unlikely(spc == NULL) ) - { - CSCHED_STAT_CRANK(steal_peer_down); - } - else if ( unlikely(is_idle_vcpu(peer_vcpu)) ) - { - /* - * Don't steal from an idle CPU's runq because it's about to - * pick up work from it itself. - */ - CSCHED_STAT_CRANK(steal_peer_idle); - } - else if ( is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) ) - { - if ( snext->pri == CSCHED_PRI_IDLE && - __csched_running_vcpu_is_stealable(cpu, peer_vcpu) ) - { - CSCHED_STAT_CRANK(steal_loner_candidate); - cpu_set(peer_cpu, loners); - } - } - else - { - /* Try to steal work from a remote CPU's runq. */ - speer = csched_runq_steal(spc, cpu, snext->pri); - if ( speer != NULL ) - { - spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); - CSCHED_STAT_CRANK(vcpu_migrate); - speer->stats.migrate++; - return speer; - } - } - + /* + * Any work over there to steal? + */ + speer = csched_runq_steal(peer_cpu, cpu, snext->pri); spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); - } - - /* - * If we failed to find any remotely queued VCPUs to move here, - * see if it would be more efficient to move any of the running - * remote VCPUs over here. - */ - while ( !cpus_empty(loners) ) - { - /* For each CPU of interest, starting with our neighbour... */ - peer_cpu = next_cpu(peer_cpu, loners); - if ( peer_cpu == NR_CPUS ) - peer_cpu = first_cpu(loners); - - cpu_clear(peer_cpu, loners); - - if ( !spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) ) - { - CSCHED_STAT_CRANK(steal_trylock_failed); - continue; - } - - peer_vcpu = per_cpu(schedule_data, peer_cpu).curr; - spc = CSCHED_PCPU(peer_cpu); - - /* Signal the first candidate only. */ - if ( !is_idle_vcpu(peer_vcpu) && - is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) && - __csched_running_vcpu_is_stealable(cpu, peer_vcpu) ) - { - set_bit(_VCPUF_migrating, &peer_vcpu->vcpu_flags); - spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); - - CSCHED_STAT_CRANK(steal_loner_signal); - cpu_raise_softirq(peer_cpu, SCHEDULE_SOFTIRQ); - break; - } - - spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock); + if ( speer != NULL ) + return speer; } /* Failed to find more important work elsewhere... */ @@ -1270,7 +1205,6 @@ csched_schedule(s_time_t now) ret.task = snext->vcpu; CSCHED_VCPU_CHECK(ret.task); - return ret; } @@ -1279,22 +1213,25 @@ csched_dump_vcpu(struct csched_vcpu *svc { struct csched_dom * const sdom = svc->sdom; - printk("[%i.%i] pri=%i cpu=%i", + printk("[%i.%i] pri=%i flags=%x cpu=%i", svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id, svc->pri, + svc->flags, svc->vcpu->processor); if ( sdom ) { - printk(" credit=%i (%d+%u) {a/i=%u/%u m=%u w=%u}", - atomic_read(&svc->credit), - svc->stats.credit_last, - svc->stats.credit_incr, - svc->stats.state_active, - svc->stats.state_idle, - svc->stats.migrate, - sdom->weight); + printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight); +#ifdef CSCHED_STATS + printk(" (%d+%u) {a/i=%u/%u m=%u+%u}", + svc->stats.credit_last, + svc->stats.credit_incr, + svc->stats.state_active, + svc->stats.state_idle, + svc->stats.migrate_q, + svc->stats.migrate_r); +#endif } printk("\n"); diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/cache.h --- a/xen/include/asm-powerpc/cache.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/cache.h Thu Dec 14 08:57:36 2006 -0700 @@ -70,4 +70,5 @@ struct cpu_caches { u32 ilines_per_page; }; extern struct cpu_caches cpu_caches; +extern void cpu_flush_icache(void); #endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/config.h --- a/xen/include/asm-powerpc/config.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/config.h Thu Dec 14 08:57:36 2006 -0700 @@ -21,7 +21,7 @@ #ifndef __PPC_CONFIG_H__ #define __PPC_CONFIG_H__ -#define CONFIG_MAMBO 1 +#define CONFIG_SYSTEMSIM 1 #define HYPERVISOR_VIRT_START 0x0 /* XXX temp hack for common/kernel.c */ @@ -50,6 +50,8 @@ extern char __bss_start[]; #define CONFIG_GDB 1 #define CONFIG_SMP 1 #define CONFIG_PCI 1 +#define CONFIG_NUMA 1 +#define CONFIG_CMDLINE_SIZE 512 #define NR_CPUS 16 #ifndef ELFSIZE diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/debugger.h --- a/xen/include/asm-powerpc/debugger.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/debugger.h Thu Dec 14 08:57:36 2006 -0700 @@ -13,13 +13,68 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ #ifndef _ASM_DEBUGGER_H_ #define _ASM_DEBUGGER_H_ + +#include <public/xen.h> + +extern void show_backtrace_regs(struct cpu_user_regs *); +extern void show_backtrace(ulong sp, ulong lr, ulong pc); + +static inline void show_execution_state(struct cpu_user_regs *regs) +{ + show_registers(regs); +} + +extern void dump_execution_state(void); + +static inline void dump_all_execution_state(void) +{ + ulong sp; + ulong lr; + + dump_execution_state(); + sp = (ulong)__builtin_frame_address(0); + lr = (ulong)__builtin_return_address(0); + + show_backtrace(sp, lr, lr); +} + +static inline void __force_crash(void) +{ + dump_all_execution_state(); + __builtin_trap(); +} + +static inline void debugger_trap_immediate(void) +{ + dump_all_execution_state(); +#ifdef CRASH_DEBUG + __builtin_trap(); +#endif +} + +static inline void unimplemented(void) +{ +#ifdef VERBOSE + dump_all_execution_state(); +#endif +} + +extern void __warn(char *file, int line); +#define WARN() __warn(__FILE__, __LINE__) +#define WARN_ON(_p) do { if (_p) WARN(); } while ( 0 ) + +extern void __attn(void); +#define ATTN() __attn(); + +#define FORCE_CRASH() __force_crash() #ifdef CRASH_DEBUG @@ -32,8 +87,6 @@ static inline int debugger_trap_fatal( return vector; } -#define debugger_trap_immediate() __asm__ __volatile__ ("trap"); - #else /* CRASH_DEBUG */ static inline int debugger_trap_fatal( @@ -43,17 +96,6 @@ static inline int debugger_trap_fatal( return vector; } -static inline void debugger_trap_immediate(void) -{ - ulong sp; - ulong lr; - - sp = (ulong)__builtin_frame_address(0); - lr = (ulong)__builtin_return_address(0); - - show_backtrace(sp, lr, lr); -} - #endif /* CRASH_DEBUG */ #endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/delay.h --- a/xen/include/asm-powerpc/delay.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/delay.h Thu Dec 14 08:57:36 2006 -0700 @@ -13,16 +13,28 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ #ifndef _ASM_DELAY_H_ #define _ASM_DELAY_H_ +#include <asm/time.h> + extern unsigned long ticks_per_usec; #define __udelay udelay -extern void udelay(unsigned long usecs); +static inline void udelay(unsigned long usecs) +{ + unsigned long ticks = usecs * ticks_per_usec; + unsigned long s; + unsigned long e; + s = get_timebase(); + do { + e = get_timebase(); + } while ((e-s) < ticks); +} #endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/domain.h --- a/xen/include/asm-powerpc/domain.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/domain.h Thu Dec 14 08:57:36 2006 -0700 @@ -40,6 +40,9 @@ struct arch_domain { /* list of extents beyond RMA */ struct list_head extent_list; + + uint foreign_mfn_count; + uint *foreign_mfns; /* I/O-port access bitmap mask. */ u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */ @@ -86,7 +89,7 @@ struct arch_vcpu { struct slb_entry slb_entries[NUM_SLB_ENTRIES]; /* I/O-port access bitmap. */ - u8 *iobmp; /* Guest kernel virtual address of the bitmap. */ + XEN_GUEST_HANDLE(uint8_t) iobmp; /* Guest kernel virtual address of the bitmap. */ int iobmp_limit; /* Number of ports represented in the bitmap. */ int iopl; /* Current IOPL for this VCPU. */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/flushtlb.h --- a/xen/include/asm-powerpc/flushtlb.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/flushtlb.h Thu Dec 14 08:57:36 2006 -0700 @@ -24,7 +24,6 @@ #include <xen/config.h> #include <xen/percpu.h> #include <xen/types.h> -#include <asm/misc.h> /* The current time as shown by the virtual TLB clock. */ extern u32 tlbflush_clock; diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/grant_table.h --- a/xen/include/asm-powerpc/grant_table.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/grant_table.h Thu Dec 14 08:57:36 2006 -0700 @@ -29,6 +29,10 @@ * Caller must own caller's BIGLOCK, is responsible for flushing the TLB, and * must hold a reference to the page. */ +extern long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn); +extern long pte_remove(ulong flags, ulong ptex, ulong avpn, + ulong *hi, ulong *lo); + int create_grant_host_mapping( unsigned long addr, unsigned long frame, unsigned int flags); int destroy_grant_host_mapping( @@ -41,8 +45,7 @@ int destroy_grant_host_mapping( (d), XENSHARE_writable); \ } while ( 0 ) -#define gnttab_shared_mfn(d, t, i) \ - ((virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i)) +#define gnttab_shared_mfn(d, t, i) (((ulong)((t)->shared) >> PAGE_SHIFT) + (i)) #define gnttab_shared_gmfn(d, t, i) \ (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) @@ -61,4 +64,9 @@ static inline void gnttab_clear_flag(uns clear_bit(lnr, laddr); } +static inline uint cpu_foreign_map_order(void) +{ + /* 16 GiB */ + return 34 - PAGE_SHIFT; +} #endif /* __ASM_PPC_GRANT_TABLE_H__ */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/guest_access.h --- a/xen/include/asm-powerpc/guest_access.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/guest_access.h Thu Dec 14 08:57:36 2006 -0700 @@ -21,82 +21,6 @@ #ifndef __PPC_GUEST_ACCESS_H__ #define __PPC_GUEST_ACCESS_H__ -extern unsigned long xencomm_copy_to_guest(void *to, const void *from, - unsigned int len, unsigned int skip); -extern unsigned long xencomm_copy_from_guest(void *to, const void *from, - unsigned int len, unsigned int skip); -extern int xencomm_add_offset(void *handle, unsigned int bytes); -extern int xencomm_handle_is_null(void *ptr); - - -/* Is the guest handle a NULL reference? */ -#define guest_handle_is_null(hnd) \ - ((hnd).p == NULL || xencomm_handle_is_null((hnd).p)) - -/* Offset the given guest handle into the array it refers to. */ -#define guest_handle_add_offset(hnd, nr) ({ \ - const typeof((hnd).p) _ptr = (hnd).p; \ - xencomm_add_offset(_ptr, nr * sizeof(*_ptr)); \ -}) - -/* Cast a guest handle to the specified type of handle. */ -#define guest_handle_cast(hnd, type) ({ \ - type *_x = (hnd).p; \ - XEN_GUEST_HANDLE(type) _y; \ - set_xen_guest_handle(_y, _x); \ - _y; \ -}) - -/* Since we run in real mode, we can safely access all addresses. That also - * means our __routines are identical to our "normal" routines. */ -#define guest_handle_okay(hnd, nr) 1 - -/* - * Copy an array of objects to guest context via a guest handle. - * Optionally specify an offset into the guest array. - */ -#define copy_to_guest_offset(hnd, idx, ptr, nr) \ - __copy_to_guest_offset(hnd, idx, ptr, nr) - -/* Copy sub-field of a structure to guest context via a guest handle. */ -#define copy_field_to_guest(hnd, ptr, field) \ - __copy_field_to_guest(hnd, ptr, field) - -/* - * Copy an array of objects from guest context via a guest handle. - * Optionally specify an offset into the guest array. - */ -#define copy_from_guest_offset(ptr, hnd, idx, nr) \ - __copy_from_guest_offset(ptr, hnd, idx, nr) - -/* Copy sub-field of a structure from guest context via a guest handle. */ -#define copy_field_from_guest(ptr, hnd, field) \ - __copy_field_from_guest(ptr, hnd, field) - -#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \ -}) - -#define __copy_field_to_guest(hnd, ptr, field) ({ \ - const int _off = offsetof(typeof(*ptr), field); \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \ -}) - -#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \ -}) - -#define __copy_field_from_guest(ptr, hnd, field) ({ \ - const int _off = offsetof(typeof(*ptr), field); \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \ -}) +#include <xen/xencomm.h> #endif /* __PPC_GUEST_ACCESS_H__ */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/mach-default/irq_vectors.h --- a/xen/include/asm-powerpc/mach-default/irq_vectors.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/mach-default/irq_vectors.h Thu Dec 14 08:57:36 2006 -0700 @@ -37,26 +37,10 @@ #define FAST_TRAP -1 /* 0x80 */ #define FIRST_SYSTEM_VECTOR -1 +#define CALL_FUNCTION_VECTOR 0x0 +#define EVENT_CHECK_VECTOR 0x1 + #if 0 - -/* - * Vectors 0-16 in some cases are used for ISA interrupts. - */ - -/* - * Special IRQ vectors used by the SMP architecture, 0xf0-0xff - * - * some of the following vectors are 'rare', they are merged - * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). - */ -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define INVALIDATE_TLB_VECTOR 0xfd -#define EVENT_CHECK_VECTOR 0xfc -#define CALL_FUNCTION_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xf0 /* diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/mm.h --- a/xen/include/asm-powerpc/mm.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/mm.h Thu Dec 14 08:57:36 2006 -0700 @@ -13,9 +13,10 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ #ifndef _ASM_MM_H_ @@ -25,10 +26,10 @@ #include <xen/list.h> #include <xen/types.h> #include <xen/mm.h> -#include <asm/misc.h> #include <asm/system.h> #include <asm/flushtlb.h> -#include <asm/uaccess.h> +#include <asm/page.h> +#include <asm/debugger.h> #define memguard_guard_range(_p,_l) ((void)0) #define memguard_unguard_range(_p,_l) ((void)0) @@ -86,39 +87,38 @@ struct page_extents { /* page extent */ struct page_info *pg; uint order; - ulong pfn; }; /* The following page types are MUTUALLY EXCLUSIVE. */ -#define PGT_none (0<<29) /* no special uses of this page */ -#define PGT_RMA (1<<29) /* This page is an RMA page? */ -#define PGT_writable_page (7<<29) /* has writable mappings of this page? */ -#define PGT_type_mask (7<<29) /* Bits 29-31. */ +#define PGT_none (0UL<<29) /* no special uses of this page */ +#define PGT_RMA (1UL<<29) /* This page is an RMA page? */ +#define PGT_writable_page (7UL<<29) /* has writable mappings of this page? */ +#define PGT_type_mask (7UL<<29) /* Bits 29-31. */ /* Owning guest has pinned this page to its current type? */ #define _PGT_pinned 28 -#define PGT_pinned (1U<<_PGT_pinned) +#define PGT_pinned (1UL<<_PGT_pinned) /* Has this page been validated for use as its current type? */ #define _PGT_validated 27 -#define PGT_validated (1U<<_PGT_validated) +#define PGT_validated (1UL<<_PGT_validated) /* 16-bit count of uses of this frame as its current type. */ -#define PGT_count_mask ((1U<<16)-1) +#define PGT_count_mask ((1UL<<16)-1) /* Cleared when the owning guest 'frees' this page. */ #define _PGC_allocated 31 -#define PGC_allocated (1U<<_PGC_allocated) +#define PGC_allocated (1UL<<_PGC_allocated) /* Set on a *guest* page to mark it out-of-sync with its shadow */ #define _PGC_out_of_sync 30 -#define PGC_out_of_sync (1U<<_PGC_out_of_sync) +#define PGC_out_of_sync (1UL<<_PGC_out_of_sync) /* Set when is using a page as a page table */ #define _PGC_page_table 29 -#define PGC_page_table (1U<<_PGC_page_table) +#define PGC_page_table (1UL<<_PGC_page_table) /* Set when using page for RMA */ #define _PGC_page_RMA 28 -#define PGC_page_RMA (1U<<_PGC_page_RMA) +#define PGC_page_RMA (1UL<<_PGC_page_RMA) /* 29-bit count of references to this frame. */ -#define PGC_count_mask ((1U<<28)-1) +#define PGC_count_mask ((1UL<<28)-1) #define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end) @@ -132,6 +132,13 @@ static inline u32 pickle_domptr(struct d #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) + +#define XENSHARE_writable 0 +#define XENSHARE_readonly 1 +extern void share_xen_page_with_guest( + struct page_info *page, struct domain *d, int readonly); +extern void share_xen_page_with_privileged_guests( + struct page_info *page, int readonly); extern struct page_info *frame_table; extern unsigned long max_page; @@ -218,16 +225,18 @@ typedef struct { } vm_assist_info_t; extern vm_assist_info_t vm_assist_info[]; -#define share_xen_page_with_guest(p, d, r) do { } while (0) -#define share_xen_page_with_privileged_guests(p, r) do { } while (0) /* hope that accesses to this will fail spectacularly */ -#define machine_to_phys_mapping ((u32 *)-1UL) - -extern int update_grant_va_mapping(unsigned long va, - unsigned long val, - struct domain *, - struct vcpu *); +#undef machine_to_phys_mapping +#define INVALID_M2P_ENTRY (~0UL) + +/* do nothing, its all calculated */ +#define set_gpfn_from_mfn(mfn, pfn) do { } while (0) +#define get_gpfn_from_mfn(mfn) (mfn) + +extern unsigned long mfn_to_gmfn(struct domain *d, unsigned long mfn); + +extern unsigned long paddr_to_maddr(unsigned long paddr); #define INVALID_MFN (~0UL) #define PFN_TYPE_NONE 0 @@ -235,29 +244,48 @@ extern int update_grant_va_mapping(unsig #define PFN_TYPE_LOGICAL 2 #define PFN_TYPE_IO 3 #define PFN_TYPE_FOREIGN 4 +#define PFN_TYPE_GNTTAB 5 extern ulong pfn2mfn(struct domain *d, ulong pfn, int *type); +static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gmfn) +{ + int mtype; + ulong mfn; + + mfn = pfn2mfn(d, gmfn, &mtype); + if (mfn != INVALID_MFN) { + switch (mtype) { + case PFN_TYPE_RMA: + case PFN_TYPE_LOGICAL: + break; + default: + WARN(); + mfn = INVALID_MFN; + break; + } + } + return mfn; +} + +extern int update_grant_va_mapping(unsigned long va, + unsigned long val, + struct domain *, + struct vcpu *); /* Arch-specific portion of memory_op hypercall. */ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg); - -/* XXX implement me? */ -#define set_gpfn_from_mfn(mfn, pfn) do { } while (0) -/* XXX only used for debug print right now... */ -#define get_gpfn_from_mfn(mfn) (mfn) - -static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gmfn) -{ - return pfn2mfn(d, gmfn, NULL); -} - -#define mfn_to_gmfn(_d, mfn) (mfn) extern int allocate_rma(struct domain *d, unsigned int order_pages); extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages); extern void free_extents(struct domain *d); +extern int arch_domain_add_extent(struct domain *d, struct page_info *page, + int order); + extern int steal_page(struct domain *d, struct page_info *page, unsigned int memflags); +/* XXX these just exist until we can stop #including x86 code */ +#define access_ok(addr,size) 1 +#define array_access_ok(addr,count,size) 1 #endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/msr.h --- a/xen/include/asm-powerpc/msr.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/msr.h Thu Dec 14 08:57:36 2006 -0700 @@ -51,9 +51,9 @@ #define MSR_RI ULL(0x0000000000000002) #define MSR_LE ULL(0x0000000000000001) -/* MSR bits set on the Mambo simulator */ +/* MSR bits set on the systemsim simulator */ #define MSR_SIM ULL(0x0000000020000000) -#define MSR_MAMBO ULL(0x0000000010000000) +#define MSR_SYSTEMSIM ULL(0x0000000010000000) /* On a trap, srr1's copy of msr defines some bits as follows: */ #define MSR_TRAP_FE ULL(0x0000000000100000) /* Floating Point Exception */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/page.h --- a/xen/include/asm-powerpc/page.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/page.h Thu Dec 14 08:57:36 2006 -0700 @@ -13,9 +13,10 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ #ifndef _ASM_PAGE_H @@ -28,7 +29,6 @@ #ifndef __ASSEMBLY__ #include <xen/config.h> -#include <asm/misc.h> #include <asm/cache.h> #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) @@ -129,5 +129,6 @@ static inline int get_order_from_pages(u #define _PAGE_PAT 0x080UL #define _PAGE_PSE 0x080UL #define _PAGE_GLOBAL 0x100UL + #endif /* ! __ASSEMBLY__ */ #endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/powerpc64/string.h --- a/xen/include/asm-powerpc/powerpc64/string.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/powerpc64/string.h Thu Dec 14 08:57:36 2006 -0700 @@ -37,4 +37,7 @@ extern int memcmp(const void *,const voi extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); +extern void *systemsim_memset(void *, int, ulong); +extern void *systemsim_memcpy(void *, const void *, ulong); + #endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/processor.h --- a/xen/include/asm-powerpc/processor.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/processor.h Thu Dec 14 08:57:36 2006 -0700 @@ -31,6 +31,85 @@ /* most assembler do not know this instruction */ #define HRFID .long 0x4c000224 +/* Processor Version Register (PVR) field extraction */ + +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +#define __is_processor(pv) (PVR_VER(mfspr(SPRN_PVR)) == (pv)) + +/* + * IBM has further subdivided the standard PowerPC 16-bit version and + * revision subfields of the PVR for the PowerPC 403s into the following: + */ + +#define PVR_FAM(pvr) (((pvr) >> 20) & 0xFFF) /* Family field */ +#define PVR_MEM(pvr) (((pvr) >> 16) & 0xF) /* Member field */ +#define PVR_CORE(pvr) (((pvr) >> 12) & 0xF) /* Core field */ +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + +/* Processor Version Numbers */ + +#define PVR_403GA 0x00200000 +#define PVR_403GB 0x00200100 +#define PVR_403GC 0x00200200 +#define PVR_403GCX 0x00201400 +#define PVR_405GP 0x40110000 +#define PVR_STB03XXX 0x40310000 +#define PVR_NP405H 0x41410000 +#define PVR_NP405L 0x41610000 +#define PVR_601 0x00010000 +#define PVR_602 0x00050000 +#define PVR_603 0x00030000 +#define PVR_603e 0x00060000 +#define PVR_603ev 0x00070000 +#define PVR_603r 0x00071000 +#define PVR_604 0x00040000 +#define PVR_604e 0x00090000 +#define PVR_604r 0x000A0000 +#define PVR_620 0x00140000 +#define PVR_740 0x00080000 +#define PVR_750 PVR_740 +#define PVR_740P 0x10080000 +#define PVR_750P PVR_740P +#define PVR_7400 0x000C0000 +#define PVR_7410 0x800C0000 +#define PVR_7450 0x80000000 +#define PVR_8540 0x80200000 +#define PVR_8560 0x80200000 +/* + * For the 8xx processors, all of them report the same PVR family for + * the PowerPC core. The various versions of these processors must be + * differentiated by the version number in the Communication Processor + * Module (CPM). + */ +#define PVR_821 0x00500000 +#define PVR_823 PVR_821 +#define PVR_850 PVR_821 +#define PVR_860 PVR_821 +#define PVR_8240 0x00810100 +#define PVR_8245 0x80811014 +#define PVR_8260 PVR_8240 + +/* 64-bit processors */ +/* XXX the prefix should be PVR_, we'll do a global sweep to fix it one day */ +#define PV_NORTHSTAR 0x0033 +#define PV_PULSAR 0x0034 +#define PV_POWER4 0x0035 +#define PV_ICESTAR 0x0036 +#define PV_SSTAR 0x0037 +#define PV_POWER4p 0x0038 +#define PV_970 0x0039 +#define PV_POWER5 0x003A +#define PV_POWER5p 0x003B +#define PV_970FX 0x003C +#define PV_630 0x0040 +#define PV_630p 0x0041 +#define PV_970MP 0x0044 +#define PV_BE 0x0070 + #ifndef __ASSEMBLY__ #include <xen/types.h> @@ -38,13 +117,10 @@ struct vcpu; struct vcpu; struct cpu_user_regs; extern int cpu_machinecheck(struct cpu_user_regs *); -extern void cpu_scom_init(void); extern void show_registers(struct cpu_user_regs *); -extern void show_execution_state(struct cpu_user_regs *); -extern void show_backtrace(ulong sp, ulong lr, ulong pc); extern unsigned int cpu_extent_order(void); extern unsigned int cpu_default_rma_order_pages(void); -extern int cpu_rma_valid(unsigned int log); +extern int cpu_rma_valid(unsigned int order); extern uint cpu_large_page_orders(uint *sizes, uint max); extern void cpu_initialize(int cpuid); extern void cpu_init_vcpu(struct vcpu *); @@ -54,13 +130,6 @@ extern void flush_segments(void); extern void flush_segments(void); extern void dump_segments(int valid); -/* XXX this could also land us in GDB */ -#define dump_execution_state() BUG() - -extern void __warn(char *file, int line); -#define WARN() __warn(__FILE__, __LINE__) -#define WARN_ON(_p) do { if (_p) WARN(); } while ( 0 ) - #define ARCH_HAS_PREFETCH static inline void prefetch(const void *x) {;} @@ -83,7 +152,8 @@ static inline void nop(void) { static inline void nop(void) { __asm__ __volatile__ ("nop"); } -#define cpu_relax() nop() +/* will need to address thread priorities when we go SMT */ +#define cpu_relax() barrier() static inline unsigned int mfpir(void) { @@ -207,13 +277,13 @@ static inline unsigned mfdsisr(void) return val; } -#ifdef CONFIG_MAMBO -static inline int on_mambo(void) -{ - return !!(mfmsr() & MSR_MAMBO); -} -#else /* CONFIG_MAMBO */ -static inline int on_mambo(void) { return 0; } +#ifdef CONFIG_SYSTEMSIM +static inline int on_systemsim(void) +{ + return !!(mfmsr() & MSR_SYSTEMSIM); +} +#else /* CONFIG_SYSTEMSIM */ +static inline int on_systemsim(void) { return 0; } #endif #endif /* __ASSEMBLY__ */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/smp.h --- a/xen/include/asm-powerpc/smp.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/smp.h Thu Dec 14 08:57:36 2006 -0700 @@ -25,6 +25,12 @@ #include <xen/cpumask.h> #include <xen/init.h> #include <asm/current.h> + +/* crap to make x86 "common code" happy */ +#define BAD_APICID 0xFFu +extern u8 x86_cpu_to_apicid[]; + + extern int smp_num_siblings; /* revisit when we support SMP */ @@ -35,4 +41,20 @@ extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_core_map[]; extern void __devinit smp_generic_take_timebase(void); extern void __devinit smp_generic_give_timebase(void); + +#define SA_INTERRUPT 0x20000000u +typedef int irqreturn_t; +extern int request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct cpu_user_regs *), + unsigned long irqflags, const char * devname, void *dev_id); +void smp_message_recv(int msg, struct cpu_user_regs *regs); +void smp_call_function_interrupt(struct cpu_user_regs *regs); +void smp_event_check_interrupt(void); +void send_IPI_mask(cpumask_t mask, int vector); + +#undef DEBUG_IPI +#ifdef DEBUG_IPI +void ipi_torture_test(void); #endif + +#endif diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-powerpc/spinlock.h --- a/xen/include/asm-powerpc/spinlock.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-powerpc/spinlock.h Thu Dec 14 08:57:36 2006 -0700 @@ -70,18 +70,15 @@ cas_u32(volatile u32 *ptr, u32 oval, u32 return tmp; } -typedef union { +typedef struct { volatile u32 lock; - struct { - s8 recurse_cpu; - u8 recurse_cnt; - s16 lock; - } fields; + s16 recurse_cpu; + u16 recurse_cnt; } spinlock_t; #define __UNLOCKED (0U) #define __LOCKED (~__UNLOCKED) -#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { __UNLOCKED } +#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { __UNLOCKED, -1, 0 } static inline void spin_lock_init(spinlock_t *lock) { *lock = (spinlock_t) SPIN_LOCK_UNLOCKED; @@ -181,17 +178,17 @@ static inline void _raw_spin_unlock_recu static inline void _raw_spin_unlock_recursive(spinlock_t *lock) { int cpu = smp_processor_id(); - if (likely(lock->fields.recurse_cpu != cpu)) { + if (likely(lock->recurse_cpu != cpu)) { spin_lock(lock); - lock->fields.recurse_cpu = cpu; - } - lock->fields.recurse_cnt++; + lock->recurse_cpu = cpu; + } + lock->recurse_cnt++; } static inline void _raw_spin_unlock_recursive(spinlock_t *lock) { - if (likely(--lock->fields.recurse_cnt == 0)) { - lock->fields.recurse_cpu = -1; + if (likely(--lock->recurse_cnt == 0)) { + lock->recurse_cpu = -1; spin_unlock(lock); } } @@ -200,19 +197,19 @@ static inline void _raw_spin_unlock_recu #define _raw_spin_lock_recursive(_lock) \ do { \ int cpu = smp_processor_id(); \ - if ( likely((_lock)->fields.recurse_cpu != cpu) ) \ + if ( likely((_lock)->recurse_cpu != cpu) ) \ { \ spin_lock(_lock); \ - (_lock)->fields.recurse_cpu = cpu; \ + (_lock)->recurse_cpu = cpu; \ } \ - (_lock)->fields.recurse_cnt++; \ + (_lock)->recurse_cnt++; \ } while ( 0 ) #define _raw_spin_unlock_recursive(_lock) \ do { \ - if ( likely(--(_lock)->fields.recurse_cnt == 0) ) \ + if ( likely(--(_lock)->recurse_cnt == 0) ) \ { \ - (_lock)->fields.recurse_cpu = -1; \ + (_lock)->recurse_cpu = -1; \ spin_unlock(_lock); \ } \ } while ( 0 ) diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/numa.h --- a/xen/include/asm-x86/numa.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-x86/numa.h Thu Dec 14 08:57:36 2006 -0700 @@ -37,7 +37,7 @@ extern void __init init_cpu_to_node(void static inline void clear_node_cpumask(int cpu) { - clear_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); + cpu_clear(cpu, node_to_cpumask[cpu_to_node(cpu)]); } /* Simple perfect hash to map physical addresses to node numbers */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-x86/page.h Thu Dec 14 08:57:36 2006 -0700 @@ -26,25 +26,37 @@ #endif /* Read a pte atomically from memory. */ -#define l1e_read_atomic(l1ep) l1e_from_intpte(pte_read_atomic(l1ep)) -#define l2e_read_atomic(l2ep) l2e_from_intpte(pte_read_atomic(l2ep)) -#define l3e_read_atomic(l3ep) l3e_from_intpte(pte_read_atomic(l3ep)) -#define l4e_read_atomic(l4ep) l4e_from_intpte(pte_read_atomic(l4ep)) +#define l1e_read_atomic(l1ep) \ + l1e_from_intpte(pte_read_atomic(&l1e_get_intpte(*(l1ep)))) +#define l2e_read_atomic(l2ep) \ + l2e_from_intpte(pte_read_atomic(&l2e_get_intpte(*(l2ep)))) +#define l3e_read_atomic(l3ep) \ + l3e_from_intpte(pte_read_atomic(&l3e_get_intpte(*(l3ep)))) +#define l4e_read_atomic(l4ep) \ + l4e_from_intpte(pte_read_atomic(&l4e_get_intpte(*(l4ep)))) /* Write a pte atomically to memory. */ -#define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e)) -#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l2e_get_intpte(l2e)) -#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l3e_get_intpte(l3e)) -#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l4e_get_intpte(l4e)) +#define l1e_write_atomic(l1ep, l1e) \ + pte_write_atomic(&l1e_get_intpte(*(l1ep)), l1e_get_intpte(l1e)) +#define l2e_write_atomic(l2ep, l2e) \ + pte_write_atomic(&l2e_get_intpte(*(l2ep)), l2e_get_intpte(l2e)) +#define l3e_write_atomic(l3ep, l3e) \ + pte_write_atomic(&l3e_get_intpte(*(l3ep)), l3e_get_intpte(l3e)) +#define l4e_write_atomic(l4ep, l4e) \ + pte_write_atomic(&l4e_get_intpte(*(l4ep)), l4e_get_intpte(l4e)) /* * Write a pte safely but non-atomically to memory. * The PTE may become temporarily not-present during the update. */ -#define l1e_write(l1ep, l1e) pte_write(l1ep, l1e_get_intpte(l1e)) -#define l2e_write(l2ep, l2e) pte_write(l2ep, l2e_get_intpte(l2e)) -#define l3e_write(l3ep, l3e) pte_write(l3ep, l3e_get_intpte(l3e)) -#define l4e_write(l4ep, l4e) pte_write(l4ep, l4e_get_intpte(l4e)) +#define l1e_write(l1ep, l1e) \ + pte_write(&l1e_get_intpte(*(l1ep)), l1e_get_intpte(l1e)) +#define l2e_write(l2ep, l2e) \ + pte_write(&l2e_get_intpte(*(l2ep)), l2e_get_intpte(l2e)) +#define l3e_write(l3ep, l3e) \ + pte_write(&l3e_get_intpte(*(l3ep)), l3e_get_intpte(l3e)) +#define l4e_write(l4ep, l4e) \ + pte_write(&l4e_get_intpte(*(l4ep)), l4e_get_intpte(l4e)) /* Get direct integer representation of a pte's contents (intpte_t). */ #define l1e_get_intpte(x) ((x).l1) diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-x86/shadow.h Thu Dec 14 08:57:36 2006 -0700 @@ -540,6 +540,9 @@ extern int shadow_remove_write_access(st * Returns non-zero if we need to flush TLBs. */ extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn); +/* Remove all mappings from the shadows. */ +extern void shadow_blow_tables(struct domain *d); + void shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn); /* This is a HVM page that we thing is no longer a pagetable. diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_32/page-2level.h --- a/xen/include/asm-x86/x86_32/page-2level.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-x86/x86_32/page-2level.h Thu Dec 14 08:57:36 2006 -0700 @@ -28,9 +28,9 @@ typedef l2_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ -#define pte_read_atomic(ptep) (*(intpte_t *)(ptep)) -#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) -#define pte_write(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) +#define pte_read_atomic(ptep) (*(ptep)) +#define pte_write_atomic(ptep, pte) (*(ptep) = (pte)) +#define pte_write(ptep, pte) (*(ptep) = (pte)) /* root table */ #define root_get_pfn l2e_get_pfn diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_32/page-3level.h --- a/xen/include/asm-x86/x86_32/page-3level.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-x86/x86_32/page-3level.h Thu Dec 14 08:57:36 2006 -0700 @@ -38,22 +38,23 @@ typedef l3_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ -#define pte_read_atomic(ptep) ({ \ - intpte_t __pte = *(intpte_t *)(ptep), __npte; \ - while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, __pte)) != __pte ) \ - __pte = __npte; \ +#define pte_read_atomic(ptep) ({ \ + intpte_t __pte = *(ptep), __npte; \ + while ( (__npte = cmpxchg(ptep, __pte, __pte)) != __pte ) \ + __pte = __npte; \ __pte; }) -#define pte_write_atomic(ptep, pte) do { \ - intpte_t __pte = *(intpte_t *)(ptep), __npte; \ - while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \ - __pte = __npte; \ +#define pte_write_atomic(ptep, pte) do { \ + intpte_t __pte = *(ptep), __npte; \ + while ( (__npte = cmpxchg(ptep, __pte, (pte))) != __pte ) \ + __pte = __npte; \ } while ( 0 ) -#define pte_write(ptep, pte) do { \ - *((u32 *)(ptep)+0) = 0; \ - wmb(); \ - *((u32 *)(ptep)+1) = (pte) >> 32; \ - wmb(); \ - *((u32 *)(ptep)+0) = (pte) >> 0; \ +#define pte_write(ptep, pte) do { \ + u32 *__ptep_words = (u32 *)(ptep); \ + __ptep_words[0] = 0; \ + wmb(); \ + __ptep_words[1] = (pte) >> 32; \ + wmb(); \ + __ptep_words[0] = (pte) >> 0; \ } while ( 0 ) /* root table */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/asm-x86/x86_64/page.h Thu Dec 14 08:57:36 2006 -0700 @@ -43,9 +43,9 @@ typedef l4_pgentry_t root_pgentry_t; #endif /* !__ASSEMBLY__ */ -#define pte_read_atomic(ptep) (*(intpte_t *)(ptep)) -#define pte_write_atomic(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) -#define pte_write(ptep, pte) ((*(intpte_t *)(ptep)) = (pte)) +#define pte_read_atomic(ptep) (*(ptep)) +#define pte_write_atomic(ptep, pte) (*(ptep) = (pte)) +#define pte_write(ptep, pte) (*(ptep) = (pte)) /* Given a virtual address, get an entry offset into a linear page table. */ #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT) diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/arch-powerpc.h --- a/xen/include/public/arch-powerpc.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/arch-powerpc.h Thu Dec 14 08:57:36 2006 -0700 @@ -73,6 +73,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #ifndef __ASSEMBLY__ +#define XENCOMM_INLINE_FLAG (1UL << 63) + typedef uint64_t xen_ulong_t; /* User-accessible registers: need to be saved/restored for every nested Xen diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/domctl.h --- a/xen/include/public/domctl.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/domctl.h Thu Dec 14 08:57:36 2006 -0700 @@ -385,6 +385,13 @@ typedef struct xen_domctl_settimeoffset typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); +#define XEN_DOMCTL_real_mode_area 26 +struct xen_domctl_real_mode_area { + uint32_t log; /* log2 of Real Mode Area size */ +}; +typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + struct xen_domctl { uint32_t cmd; uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ @@ -410,6 +417,7 @@ struct xen_domctl { struct xen_domctl_hypercall_init hypercall_init; struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_real_mode_area real_mode_area; uint8_t pad[128]; } u; }; diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/fbif.h --- a/xen/include/public/io/fbif.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/io/fbif.h Thu Dec 14 08:57:36 2006 -0700 @@ -1,18 +1,30 @@ /* * fbif.h -- Xen virtual frame buffer device * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2005 Anthony Liguori <aliguori@xxxxxxxxxx> * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@xxxxxxxxxx> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive for - * more details. */ #ifndef __XEN_PUBLIC_IO_FBIF_H__ #define __XEN_PUBLIC_IO_FBIF_H__ - -#include <asm/types.h> /* Out events (frontend -> backend) */ @@ -31,20 +43,20 @@ struct xenfb_update { - __u8 type; /* XENFB_TYPE_UPDATE */ - __s32 x; /* source x */ - __s32 y; /* source y */ - __s32 width; /* rect width */ - __s32 height; /* rect height */ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ }; #define XENFB_OUT_EVENT_SIZE 40 union xenfb_out_event { - __u8 type; - struct xenfb_update update; - char pad[XENFB_OUT_EVENT_SIZE]; + uint8_t type; + struct xenfb_update update; + char pad[XENFB_OUT_EVENT_SIZE]; }; /* In events (backend -> frontend) */ @@ -58,8 +70,8 @@ union xenfb_out_event union xenfb_in_event { - __u8 type; - char pad[XENFB_IN_EVENT_SIZE]; + uint8_t type; + char pad[XENFB_IN_EVENT_SIZE]; }; /* shared page */ @@ -82,25 +94,25 @@ union xenfb_in_event struct xenfb_page { - __u32 in_cons, in_prod; - __u32 out_cons, out_prod; + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; - __s32 width; /* the width of the framebuffer (in pixels) */ - __s32 height; /* the height of the framebuffer (in pixels) */ - __u32 line_length; /* the length of a row of pixels (in bytes) */ - __u32 mem_length; /* the length of the framebuffer (in bytes) */ - __u8 depth; /* the depth of a pixel (in bits) */ + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ - /* - * Framebuffer page directory - * - * Each directory page holds PAGE_SIZE / sizeof(*pd) - * framebuffer pages, and can thus map up to PAGE_SIZE * - * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and - * sizeof(unsigned long) == 4, that's 4 Megs. Two directory - * pages should be enough for a while. - */ - unsigned long pd[2]; + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4, that's 4 Megs. Two directory + * pages should be enough for a while. + */ + unsigned long pd[2]; }; /* @@ -114,3 +126,13 @@ struct xenfb_page #endif #endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/kbdif.h --- a/xen/include/public/io/kbdif.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/io/kbdif.h Thu Dec 14 08:57:36 2006 -0700 @@ -1,18 +1,30 @@ /* * kbdif.h -- Xen virtual keyboard/mouse * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2005 Anthony Liguori <aliguori@xxxxxxxxxx> * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@xxxxxxxxxx> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive for - * more details. */ #ifndef __XEN_PUBLIC_IO_KBDIF_H__ #define __XEN_PUBLIC_IO_KBDIF_H__ - -#include <asm/types.h> /* In events (backend -> frontend) */ @@ -35,34 +47,34 @@ struct xenkbd_motion { - __u8 type; /* XENKBD_TYPE_MOTION */ - __s32 rel_x; /* relative X motion */ - __s32 rel_y; /* relative Y motion */ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ }; struct xenkbd_key { - __u8 type; /* XENKBD_TYPE_KEY */ - __u8 pressed; /* 1 if pressed; 0 otherwise */ - __u32 keycode; /* KEY_* from linux/input.h */ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ }; struct xenkbd_position { - __u8 type; /* XENKBD_TYPE_POS */ - __s32 abs_x; /* absolute X position (in FB pixels) */ - __s32 abs_y; /* absolute Y position (in FB pixels) */ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ }; #define XENKBD_IN_EVENT_SIZE 40 union xenkbd_in_event { - __u8 type; - struct xenkbd_motion motion; - struct xenkbd_key key; - struct xenkbd_position pos; - char pad[XENKBD_IN_EVENT_SIZE]; + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; }; /* Out events (frontend -> backend) */ @@ -77,8 +89,8 @@ union xenkbd_in_event union xenkbd_out_event { - __u8 type; - char pad[XENKBD_OUT_EVENT_SIZE]; + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; }; /* shared page */ @@ -101,8 +113,18 @@ union xenkbd_out_event struct xenkbd_page { - __u32 in_cons, in_prod; - __u32 out_cons, out_prod; + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; }; #endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/pciif.h --- a/xen/include/public/io/pciif.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/io/pciif.h Thu Dec 14 08:57:36 2006 -0700 @@ -25,7 +25,7 @@ #define __XEN_PCI_COMMON_H__ /* Be sure to bump this number if you change this file */ -#define XEN_PCI_MAGIC "7" +#define XEN_PCI_MAGIC "7" /* xen_pci_sharedinfo flags */ #define _XEN_PCIF_active (0) @@ -45,29 +45,39 @@ #define XEN_PCI_ERR_op_failed (-5) struct xen_pci_op { - /* IN: what action to perform: XEN_PCI_OP_* */ - uint32_t cmd; + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; - /* OUT: will contain an error number (if any) from errno.h */ - int32_t err; + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; - /* IN: which device to touch */ - uint32_t domain; /* PCI Domain/Segment */ - uint32_t bus; - uint32_t devfn; + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; - /* IN: which configuration registers to touch */ - int32_t offset; - int32_t size; + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; - /* IN/OUT: Contains the result after a READ or the value to WRITE */ - uint32_t value; + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; }; struct xen_pci_sharedinfo { - /* flags - XEN_PCIF_* */ - uint32_t flags; - struct xen_pci_op op; + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; }; #endif /* __XEN_PCI_COMMON_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/io/xenbus.h --- a/xen/include/public/io/xenbus.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/io/xenbus.h Thu Dec 14 08:57:36 2006 -0700 @@ -56,8 +56,18 @@ enum xenbus_state { */ XenbusStateClosing = 5, - XenbusStateClosed = 6 + XenbusStateClosed = 6 }; typedef enum xenbus_state XenbusState; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/memory.h --- a/xen/include/public/memory.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/memory.h Thu Dec 14 08:57:36 2006 -0700 @@ -248,7 +248,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t * XENMEM_memory_map. * arg == addr of xen_memory_map_t. */ -#define XENMEM_machine_memory_map 10 +#define XENMEM_machine_memory_map 10 /* * Set the pseudo-physical memory map of a domain, as returned by diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/sysctl.h Thu Dec 14 08:57:36 2006 -0700 @@ -119,7 +119,7 @@ struct xen_sysctl_perfc_op { uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ /* OUT variables. */ uint32_t nr_counters; /* number of counters description */ - uint32_t nr_vals; /* number of values */ + uint32_t nr_vals; /* number of values */ /* counter information (or NULL) */ XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc; /* counter values (or NULL) */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/trace.h --- a/xen/include/public/trace.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/trace.h Thu Dec 14 08:57:36 2006 -0700 @@ -32,7 +32,7 @@ #define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ #define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ #define TRC_VMX 0x0008f000 /* Xen VMX trace */ -#define TRC_MEM 0x000af000 /* Xen memory trace */ +#define TRC_MEM 0x0010f000 /* Xen memory trace */ #define TRC_ALL 0xfffff000 /* Trace subclasses */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/public/xenoprof.h --- a/xen/include/public/xenoprof.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/public/xenoprof.h Thu Dec 14 08:57:36 2006 -0700 @@ -52,7 +52,7 @@ #define XENOPROF_last_op 14 #define MAX_OPROF_EVENTS 32 -#define MAX_OPROF_DOMAINS 25 +#define MAX_OPROF_DOMAINS 25 #define XENOPROF_CPU_TYPE_SIZE 64 /* Xenoprof performance events (not Xen events) */ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/xen/elfcore.h --- a/xen/include/xen/elfcore.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/xen/elfcore.h Thu Dec 14 08:57:36 2006 -0700 @@ -87,7 +87,7 @@ typedef struct desctype desc; \ PAD32(sizeof(desctype)); \ } desc; \ - } __attribute__ ((packed)) type + } type #define CORE_STR "CORE" #define CORE_STR_LEN 5 /* including terminating zero */ @@ -119,7 +119,7 @@ typedef struct { crash_note_core_t core; crash_note_xen_core_t xen_regs; crash_note_xen_info_t xen_info; -} __attribute__ ((packed)) crash_note_t; +} crash_note_t; #define setup_crash_note(np, member, str, str_len, id) \ np->member.note.note.note.namesz = str_len; \ diff -r ed56ef3e9716 -r 4762d73ced42 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Thu Dec 14 08:54:54 2006 -0700 +++ b/xen/include/xen/sched.h Thu Dec 14 08:57:36 2006 -0700 @@ -188,6 +188,7 @@ struct domain_setup_info #define PAEKERN_no 0 #define PAEKERN_yes 1 #define PAEKERN_extended_cr3 2 +#define PAEKERN_bimodal 3 unsigned int pae_kernel; /* Initialised by loader: Private. */ unsigned long elf_paddr_offset; @@ -437,6 +438,7 @@ static inline int vcpu_runnable(struct v } void vcpu_pause(struct vcpu *v); +void vcpu_pause_nosync(struct vcpu *v); void domain_pause(struct domain *d); void vcpu_unpause(struct vcpu *v); void domain_unpause(struct domain *d); diff -r ed56ef3e9716 -r 4762d73ced42 tools/libxc/powerpc64/utils.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/powerpc64/utils.c Thu Dec 14 08:57:36 2006 -0700 @@ -0,0 +1,211 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation 2006 + * + * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> + */ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <inttypes.h> + +#include <xen/xen.h> +#include <xen/memory.h> +#include <xc_private.h> +#include <xg_private.h> +#include <xenctrl.h> + +#include "flatdevtree_env.h" +#include "flatdevtree.h" +#include "utils.h" + +unsigned long get_rma_pages(void *devtree) +{ + void *rma; + uint64_t rma_reg[2]; + int rc; + + rma = ft_find_node(devtree, "/memory@0"); + if (rma == NULL) { + DPRINTF("couldn't find /memory@0\n"); + return 0; + } + rc = ft_get_prop(devtree, rma, "reg", rma_reg, sizeof(rma_reg)); + if (rc < 0) { + DPRINTF("couldn't get /memory@0/reg\n"); + return 0; + } + if (rma_reg[0] != 0) { + DPRINTF("RMA did not start at 0\n"); + return 0; + } + return rma_reg[1] >> PAGE_SHIFT; +} + +int get_rma_page_array(int xc_handle, int domid, xen_pfn_t **page_array, + unsigned long nr_pages) +{ + int rc; + int i; + xen_pfn_t *p; + + *page_array = malloc(nr_pages * sizeof(xen_pfn_t)); + if (*page_array == NULL) { + perror("malloc"); + return -1; + } + + DPRINTF("xc_get_pfn_list\n"); + /* We know that the RMA is machine contiguous so lets just get the + * first MFN and fill the rest in ourselves */ + rc = xc_get_pfn_list(xc_handle, domid, *page_array, 1); + if (rc == -1) { + perror("Could not get the page frame list"); + return -1; + } + p = *page_array; + for (i = 1; i < nr_pages; i++) + p[i] = p[i - 1] + 1; + return 0; +} + +int install_image( + int xc_handle, + int domid, + xen_pfn_t *page_array, + void *image, + unsigned long paddr, + unsigned long size) +{ + uint8_t *img = image; + int i; + int rc = 0; + + if (paddr & ~PAGE_MASK) { + printf("*** unaligned address\n"); + return -1; + } + + for (i = 0; i < size; i += PAGE_SIZE) { + void *page = img + i; + xen_pfn_t pfn = (paddr + i) >> PAGE_SHIFT; + xen_pfn_t mfn = page_array[pfn]; + + rc = xc_copy_to_domain_page(xc_handle, domid, mfn, page); + if (rc < 0) { + perror("xc_copy_to_domain_page"); + break; + } + } + return rc; +} + +void *load_file(const char *path, unsigned long *filesize) +{ + void *img; + ssize_t size; + int fd; + + DPRINTF("load_file(%s)\n", path); + + fd = open(path, O_RDONLY); + if (fd < 0) { + perror(path); + return NULL; + } + + size = lseek(fd, 0, SEEK_END); + if (size < 0) { + perror(path); + close(fd); + return NULL; + } + lseek(fd, 0, SEEK_SET); + + img = malloc(size); + if (img == NULL) { + perror(path); + close(fd); + return NULL; + } + + size = read(fd, img, size); + if (size <= 0) { + perror(path); + close(fd); + free(img); + return NULL; + } + + if (filesize) + *filesize = size; + close(fd); + return img; +} + +int load_elf_kernel( + int xc_handle, + int domid, + const char *kernel_path, + struct domain_setup_info *dsi, + xen_pfn_t *page_array) +{ + struct load_funcs load_funcs; + char *kernel_img; + unsigned long kernel_size; + int rc; + + /* load the kernel ELF file */ + kernel_img = load_file(kernel_path, &kernel_size); + if (kernel_img == NULL) { + rc = -1; + goto out; + } + + DPRINTF("probe_elf\n"); + rc = probe_elf(kernel_img, kernel_size, &load_funcs); + if (rc < 0) { + rc = -1; + printf("%s is not an ELF file\n", kernel_path); + goto out; + } + + DPRINTF("parseimage\n"); + rc = (load_funcs.parseimage)(kernel_img, kernel_size, dsi); + if (rc < 0) { + rc = -1; + goto out; + } + + DPRINTF("loadimage\n"); + (load_funcs.loadimage)(kernel_img, kernel_size, xc_handle, domid, + page_array, dsi); + _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |