[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxxx # Date 1171573779 25200 # Node ID 9529d667d0426eb02077cb33abdc6e434e27d293 # Parent ac18d251df63401e67c63b83d3face99f5a9aeb3 # Parent 9af0c7e4ff513c02f9bd1548f21612e9cf59b022 merge with xen-unstable.hg --- tools/libxc/xc_dom_powerpc64.c | 100 - tools/python/xen/xend/FlatDeviceTree.py | 359 --- xen/arch/powerpc/elf32.c | 7 xen/arch/powerpc/of_handler/strncpy.c | 54 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 5 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 5 linux-2.6-xen-sparse/drivers/xen/core/reboot.c | 3 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 2 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 6 tools/libxc/Makefile | 7 tools/libxc/powerpc64/Makefile | 1 tools/libxc/powerpc64/flatdevtree.c | 7 tools/libxc/powerpc64/flatdevtree.h | 2 tools/libxc/powerpc64/mk_flatdevtree.c | 650 ++++++ tools/libxc/powerpc64/mk_flatdevtree.h | 47 tools/libxc/powerpc64/utils.c | 25 tools/libxc/powerpc64/utils.h | 3 tools/libxc/powerpc64/xc_linux_build.c | 270 +- tools/libxc/powerpc64/xc_prose_build.c | 124 - tools/libxc/xc_dom.h | 5 tools/libxc/xenguest.h | 3 tools/libxc/xg_private.c | 12 tools/python/xen/lowlevel/xc/xc.c | 27 tools/python/xen/xend/image.py | 56 unmodified_drivers/linux-2.6/platform-pci/platform-compat.c | 6 xen/arch/powerpc/Makefile | 4 xen/arch/powerpc/backtrace.c | 1 xen/arch/powerpc/boot_of.c | 43 xen/arch/powerpc/domain.c | 35 xen/arch/powerpc/domain_build.c | 214 -- xen/arch/powerpc/hcalls.c | 6 xen/arch/powerpc/memory.c | 150 - xen/arch/powerpc/mm.c | 1 xen/arch/powerpc/of-devtree.c | 2 xen/arch/powerpc/of-devtree.h | 3 xen/arch/powerpc/of_handler/Makefile | 2 xen/arch/powerpc/of_handler/strlcpy.c | 58 xen/arch/powerpc/ofd_fixup.c | 11 xen/arch/powerpc/papr/xlate.c | 17 xen/arch/powerpc/powerpc64/asm-offsets.c | 2 xen/arch/powerpc/powerpc64/exceptions.S | 8 xen/arch/powerpc/powerpc64/ppc970.c | 7 xen/arch/powerpc/powerpc64/ppc970_scom.c | 4 xen/arch/powerpc/powerpc64/traps.c | 10 xen/arch/powerpc/setup.c | 23 xen/arch/powerpc/sysctl.c | 10 xen/arch/powerpc/time.c | 6 xen/arch/powerpc/xen.lds.S | 23 xen/arch/x86/domain.c | 52 xen/arch/x86/domain_build.c | 10 xen/arch/x86/domctl.c | 11 xen/arch/x86/hvm/hvm.c | 9 xen/arch/x86/hvm/io.c | 2 xen/arch/x86/hvm/platform.c | 41 xen/arch/x86/hvm/svm/intr.c | 2 xen/arch/x86/hvm/svm/svm.c | 32 xen/arch/x86/hvm/svm/vmcb.c | 4 xen/arch/x86/hvm/vlapic.c | 1 xen/arch/x86/hvm/vmx/vmcs.c | 2 xen/arch/x86/hvm/vmx/vmx.c | 34 xen/arch/x86/mm.c | 107 - xen/arch/x86/mm/Makefile | 3 xen/arch/x86/mm/p2m.c | 699 +++++++ xen/arch/x86/mm/paging.c | 143 + xen/arch/x86/mm/shadow/common.c | 1197 +++--------- xen/arch/x86/mm/shadow/multi.c | 245 +- xen/arch/x86/mm/shadow/multi.h | 2 xen/arch/x86/mm/shadow/page-guest32.h | 5 xen/arch/x86/mm/shadow/private.h | 107 - xen/arch/x86/mm/shadow/types.h | 12 xen/arch/x86/setup.c | 2 xen/arch/x86/sysctl.c | 1 xen/arch/x86/traps.c | 10 xen/arch/x86/x86_32/domain_page.c | 1 xen/arch/x86/x86_64/traps.c | 1 xen/common/libelf/libelf-dominfo.c | 557 ++--- xen/common/libelf/libelf-loader.c | 125 - xen/common/libelf/libelf-private.h | 26 xen/common/libelf/libelf-relocate.c | 375 +-- xen/common/libelf/libelf-tools.c | 153 - xen/include/asm-powerpc/config.h | 5 xen/include/asm-powerpc/domain.h | 7 xen/include/asm-powerpc/init.h | 10 xen/include/asm-powerpc/mm.h | 3 xen/include/asm-powerpc/nmi.h | 6 xen/include/asm-powerpc/processor.h | 1 xen/include/asm-x86/domain.h | 96 xen/include/asm-x86/mm.h | 63 xen/include/asm-x86/p2m.h | 142 + xen/include/asm-x86/page.h | 5 xen/include/asm-x86/paging.h | 376 +++ xen/include/asm-x86/shadow.h | 375 --- xen/include/public/arch-ia64.h | 1 xen/include/public/arch-powerpc.h | 22 xen/include/public/arch-x86/xen.h | 1 95 files changed, 4112 insertions(+), 3398 deletions(-) diff -r ac18d251df63 -r 9529d667d042 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Feb 15 13:13:36 2007 -0700 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Feb 15 14:09:39 2007 -0700 @@ -344,7 +344,7 @@ static void frontend_changed(struct xenb switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk("%s: %s: prepare for reconnect\n", + printk(KERN_INFO "%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } @@ -488,7 +488,8 @@ static int connect_ring(struct backend_i xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - printk("blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + printk(KERN_INFO + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ diff -r ac18d251df63 -r 9529d667d042 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Thu Feb 15 13:13:36 2007 -0700 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Thu Feb 15 14:09:39 2007 -0700 @@ -272,7 +272,7 @@ static void tap_frontend_changed(struct switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk("%s: %s: prepare for reconnect\n", + printk(KERN_INFO "%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } @@ -369,7 +369,8 @@ static int connect_ring(struct backend_i xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - printk("blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + printk(KERN_INFO + "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ diff -r ac18d251df63 -r 9529d667d042 linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu Feb 15 13:13:36 2007 -0700 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu Feb 15 14:09:39 2007 -0700 @@ -100,6 +100,7 @@ static void shutdown_handler(struct xenb static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { + extern void ctrl_alt_del(void); char *str; struct xenbus_transaction xbt; int err; @@ -129,7 +130,7 @@ static void shutdown_handler(struct xenb if (strcmp(str, "poweroff") == 0) shutting_down = SHUTDOWN_POWEROFF; else if (strcmp(str, "reboot") == 0) - kill_proc(1, SIGINT, 1); /* interrupt init */ + ctrl_alt_del(); else if (strcmp(str, "suspend") == 0) shutting_down = SHUTDOWN_SUSPEND; else if (strcmp(str, "halt") == 0) diff -r ac18d251df63 -r 9529d667d042 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Feb 15 13:13:36 2007 -0700 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Feb 15 14:09:39 2007 -0700 @@ -217,7 +217,7 @@ static void frontend_changed(struct xenb switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk("%s: %s: prepare for reconnect\n", + printk(KERN_INFO "%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); if (be->netif) { netif_disconnect(be->netif); diff -r ac18d251df63 -r 9529d667d042 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Feb 15 13:13:36 2007 -0700 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Feb 15 14:09:39 2007 -0700 @@ -1505,7 +1505,7 @@ static void netif_release_rx_bufs(struct int id, ref; if (np->copying_receiver) { - printk("%s: fix me for copying receiver.\n", __FUNCTION__); + WPRINTK("%s: fix me for copying receiver.\n", __FUNCTION__); return; } @@ -1555,8 +1555,8 @@ static void netif_release_rx_bufs(struct xfer++; } - printk("%s: %d xfer, %d noxfer, %d unused\n", - __FUNCTION__, xfer, noxfer, unused); + IPRINTK("%s: %d xfer, %d noxfer, %d unused\n", + __FUNCTION__, xfer, noxfer, unused); if (xfer) { /* Some pages are no longer absent... */ diff -r ac18d251df63 -r 9529d667d042 tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/Makefile Thu Feb 15 14:09:39 2007 -0700 @@ -49,15 +49,8 @@ GUEST_SRCS-y += xc_dom_core.c xc_dom_boo GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c GUEST_SRCS-y += xc_dom_elfloader.c GUEST_SRCS-y += xc_dom_binloader.c - -ifeq ($(CONFIG_POWERPC),y) -# big endian boxes -GUEST_SRCS-y += xc_dom_powerpc64.c -else -# little endian boxes GUEST_SRCS-y += xc_dom_x86.c GUEST_SRCS-y += xc_dom_ia64.c -endif GUEST_SRCS-$(CONFIG_X86) += xc_dom_compat_linux.c GUEST_SRCS-$(CONFIG_IA64) += xc_dom_compat_linux.c diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/Makefile --- a/tools/libxc/powerpc64/Makefile Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/powerpc64/Makefile Thu Feb 15 14:09:39 2007 -0700 @@ -1,4 +1,5 @@ GUEST_SRCS-y += powerpc64/flatdevtree.c GUEST_SRCS-y += powerpc64/flatdevtree.c +GUEST_SRCS-y += powerpc64/mk_flatdevtree.c GUEST_SRCS-y += powerpc64/xc_linux_build.c GUEST_SRCS-y += powerpc64/xc_prose_build.c GUEST_SRCS-y += powerpc64/utils.c diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/flatdevtree.c --- a/tools/libxc/powerpc64/flatdevtree.c Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/powerpc64/flatdevtree.c Thu Feb 15 14:09:39 2007 -0700 @@ -14,7 +14,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * Copyright Pantelis Antoniou 2006 - * Copyright (C) IBM Corporation 2006 + * Copyright IBM Corporation 2006, 2007 * 2006 (c) MontaVista, Software, Inc. * * Authors: Pantelis Antoniou <pantelis@xxxxxxxxxxxxxxxxx> @@ -209,7 +209,7 @@ void ft_add_rsvmap(struct ft_cxt *cxt, u ((u64 *) cxt->pres)[0] = cpu_to_be64(physaddr); /* phys = 0, size = 0, terminate */ ((u64 *) cxt->pres)[1] = cpu_to_be64(size); - cxt->pres += 18; /* advance */ + cxt->pres += 16; /* advance two u64s worth */ ((u64 *) cxt->pres)[0] = 0; /* phys = 0, size = 0, terminate */ ((u64 *) cxt->pres)[1] = 0; @@ -317,6 +317,9 @@ int ft_end_tree(struct ft_cxt *cxt) /* the new strings start */ cxt->pstr_begin = cxt->p_begin + cxt->struct_size; cxt->pstr = cxt->pstr_begin + cxt->strings_size; + + /* mark the size of string structure in bph */ + bph->size_dt_strings = cxt->strings_size; return 0; } diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/flatdevtree.h --- a/tools/libxc/powerpc64/flatdevtree.h Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/powerpc64/flatdevtree.h Thu Feb 15 14:09:39 2007 -0700 @@ -40,7 +40,7 @@ struct boot_param_header { /* version 2 fields below */ u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ /* version 3 fields below */ - u32 dt_strings_size; /* size of the DT strings block */ + u32 size_dt_strings; /* size of the DT strings block */ }; struct ft_cxt { diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/mk_flatdevtree.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/powerpc64/mk_flatdevtree.c Thu Feb 15 14:09:39 2007 -0700 @@ -0,0 +1,650 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation 2007 + * + * Authors: Ryan Harper <ryanh@xxxxxxxxxx> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> +#include <libgen.h> +#include <inttypes.h> +#include <math.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/dir.h> +#include <sys/stat.h> +#include <sys/param.h> + +#include <xc_private.h> /* for PERROR() */ + +#include "mk_flatdevtree.h" + +static uint32_t current_phandle = 0; + +static uint32_t get_phandle(void) +{ + return current_phandle++; +} + +static int readfile(const char *fullpath, void *data, int len) +{ + struct stat st; + int saved_errno; + int rc = -1; + int fd; + + if ((fd = open(fullpath, O_RDONLY)) == -1) { + PERROR("%s: failed to open file %s", __func__, fullpath); + return -1; + } + + if ((rc = fstat(fd, &st)) == -1) { + PERROR("%s: failed to stat fd %d", __func__, fd); + goto error; + } + + if (S_ISREG(st.st_mode)) + rc = read(fd, data, len); + + close(fd); + return rc; + +error: + saved_errno = errno; + close(fd); + errno = saved_errno; + return -1; +} + +/* + * @property - string to check against the filter list + * @filter - NULL terminated list of strings + * + * compare @property string to each string in @filter + * + * return 1 if @property matches any filter, otherwise 0 + * + */ +static int match(const char *property, const char **filter) +{ + int i; + + for (i=0; filter[i] != NULL; i++) { + /* compare the filter to property */ + if (strncmp(property, filter[i], strlen(filter[i])) == 0) + return 1; + } + + return 0; +} + +/* + * copy the node at @dirpath filtering out any properties that match in @propfilter + */ +static int copynode(struct ft_cxt *cxt, const char *dirpath, const char **propfilter) +{ + struct dirent *tree; + struct stat st; + DIR *dir; + char fullpath[MAX_PATH]; + char *bname = NULL; + char *basec = NULL; + int saved_errno; + + if ((dir = opendir(dirpath)) == NULL) { + PERROR("%s: failed to open dir %s", __func__, dirpath); + return -1; + } + + while (1) { + if ((tree = readdir(dir)) == NULL) + break; /* reached end of directory entries */ + + /* ignore . and .. */ + if (strcmp(tree->d_name,"." ) == 0 || strcmp(tree->d_name,"..") == 0) + continue; + + /* build full path name of the file, for stat() */ + if (snprintf(fullpath, sizeof(fullpath), "%s/%s", dirpath, + tree->d_name) >= sizeof(fullpath)) { + PERROR("%s: failed to build full path", __func__); + goto error; + } + + /* stat the entry */ + if (stat(fullpath, &st) < 0) { + PERROR("%s: failed to stat file %s", __func__, fullpath); + goto error; + } + + if (S_ISDIR(st.st_mode)) { + /* start a new node for a dir */ + ft_begin_node(cxt, tree->d_name); + + /* copy everything in this dir */ + if (copynode(cxt, fullpath, propfilter) < 0) { + PERROR("%s: failed to copy node @ %s", __func__, fullpath); + goto error; + } + + /* end the node */ + ft_end_node(cxt); + } + /* add files in dir as properties */ + else if (S_ISREG(st.st_mode)) { + + if ((basec = strdup(fullpath)) == NULL) { + PERROR("%s: failed to dupe string", __func__); + goto error; + } + + if ((bname = basename(basec)) == NULL) { + PERROR("%s: basename() failed", __func__); + goto error; + } + + /* only add files that don't match the property filter string */ + if (!match(bname, propfilter)) { + char data[BUFSIZE]; + int len; + + /* snarf the data and push into the property */ + if ((len = readfile(fullpath, data, sizeof(data))) < 0) { + PERROR("%s: failed to read data from file %s", __func__, + fullpath); + goto error; + } + ft_prop(cxt, tree->d_name, data, len); + + } + + /* strdup mallocs memory */ + if (basec != NULL ) { + free(basec); + basec = NULL; + } + + } + } + + closedir(dir); + return 0; + +error: + saved_errno = errno; + + /* strdup mallocs memory */ + if (basec != NULL ) { + free(basec); + basec = NULL; + } + + closedir(dir); + + errno = saved_errno; + return -1; +} + +static int find_cpu0(char *cpupath, int len) +{ + const char path[] = "/proc/device-tree/cpus"; + const char device[] = "device_type"; + const char dev_cpu[] = "cpu"; + const char reg[] = "reg"; + char data[sizeof(dev_cpu)]; + char prop[MAX_PATH]; + char node[MAX_PATH]; + struct dirent *tree; + struct stat st; + DIR* dir; + int saved_errno; + int found = 0; + + if ((dir = opendir(path)) == NULL) { + PERROR("%s: failed to open directory %s", __func__, path); + return -1; + } + + while (!found) { + + if ((tree = readdir(dir)) == NULL) + break; /* reached end of directory entries */ + + /* ignore ., .. */ + if (strcmp(tree->d_name,"." ) == 0 || strcmp(tree->d_name,"..") == 0) + continue; + + /* build full path name of the file, for stat() */ + if (snprintf(node, sizeof(node), "%s/%s", path, + tree->d_name) >= sizeof(node)) { + PERROR("%s: failed to concat strings", __func__); + goto error; + } + + /* stat the entry */ + if (stat(node, &st) < 0) { + PERROR("%s: failed to stat file %s", __func__, node); + /* something funny happen in /proc/device-tree, but march onward */ + continue; + } + + /* for each dir, check the device_type property until we find 'cpu'*/ + if (S_ISDIR(st.st_mode)) { + + /* construct path to device_type */ + if (snprintf(prop, sizeof(prop), "%s/%s", node, + device) >= sizeof(prop)) { + PERROR("%s: failed to concat strings", __func__); + goto error; + } + + /* read device_type into buffer */ + if ((readfile(prop, data, sizeof(data))) < 0) { + PERROR("%s: failed to read data from file %s", __func__, prop); + goto error; + } + + /* if the device_type is 'cpu', and reg is 0 + * return the path where we found it */ + if (strcmp(data, "cpu") == 0) { + + /* construct path to reg */ + if (snprintf(prop, sizeof(prop), "%s/%s", node, + reg) >= sizeof(prop)) { + PERROR("%s: failed to concat strings", __func__); + goto error; + } + + /* using data buffer since reg and device_type values have same size */ + if ((readfile(prop, data, sizeof(data))) < 0) { + PERROR("%s: failed to read data from file %s", __func__, prop); + goto error; + } + + /* now check property "reg" for value 0 */ + if ((u32)*data == 0) { + if (snprintf(cpupath, len, "%s", node) >= len) { + PERROR("%s: failed to copy cpupath", __func__); + goto error; + } + found = 1; + } + } + } + } + + closedir(dir); + return found; + +error: + saved_errno = errno; + closedir(dir); + errno = saved_errno; + return -1; +} + +void free_devtree(struct ft_cxt *root) +{ + if ((root != NULL) && root->bph != NULL) { + free(root->bph); + root->bph = NULL; + } +} + +int make_devtree(struct ft_cxt *root, + uint32_t domid, + uint32_t mem_mb, + unsigned long rma_bytes, + unsigned long shadow_mb, + unsigned long initrd_base, + unsigned long initrd_len, + const char *bootargs, + uint64_t shared_info_paddr, + unsigned long console_evtchn, + uint64_t console_paddr, + unsigned long store_evtchn, + uint64_t store_paddr) +{ + struct boot_param_header *bph = NULL; + uint64_t val[2]; + uint32_t val32[2]; + unsigned long remaining; + unsigned long initrd_end = initrd_base + initrd_len; + int64_t shadow_mb_log; + uint64_t pft_size; + char cpupath[MAX_PATH]; + const char *propfilter[] = { "ibm", "linux,", NULL }; + char *cpupath_copy = NULL; + char *cpuname = NULL; + int saved_errno; + int dtb_fd = -1; + uint32_t cpu0_phandle = get_phandle(); + uint32_t xen_phandle = get_phandle(); + uint32_t rma_phandle = get_phandle(); + + /* initialize bph to prevent double free on error path */ + root->bph = NULL; + + /* carve out space for bph */ + if ((bph = (struct boot_param_header *)malloc(BPH_SIZE)) == NULL) { + PERROR("%s: Failed to malloc bph buffer size", __func__); + goto error; + } + + /* NB: struct ft_cxt root defined at top of file */ + /* root = Tree() */ + ft_begin(root, bph, BPH_SIZE); + + /* you MUST set reservations BEFORE _starting_the_tree_ */ + + /* reserve shared_info_t page */ + if (shared_info_paddr) { + val[0] = cpu_to_be64((u64) shared_info_paddr); + val[1] = cpu_to_be64((u64) PAGE_SIZE); + ft_add_rsvmap(root, val[0], val[1]); + } + + /* reserve console page for domU */ + if (console_paddr) { + val[0] = cpu_to_be64((u64) console_paddr); + val[1] = cpu_to_be64((u64) PAGE_SIZE); + ft_add_rsvmap(root, val[0], val[1]); + } + + /* reserve xen store page for domU */ + if (store_paddr) { + val[0] = cpu_to_be64((u64) store_paddr); + val[1] = cpu_to_be64((u64) PAGE_SIZE); + ft_add_rsvmap(root, val[0], val[1]); + } + + /* reserve space for initrd if needed */ + if ( initrd_len > 0 ) { + val[0] = cpu_to_be64((u64) initrd_base); + val[1] = cpu_to_be64((u64) initrd_len); + ft_add_rsvmap(root, val[0], val[1]); + } + + /* NB: ft_add_rsvmap() already terminates with a NULL reservation for us */ + + /* done with reservations, _starting_the_tree_ */ + ft_begin_tree(root); + + /* make root node */ + ft_begin_node(root, ""); + + /* root.addprop('device_type', 'chrp-but-not-really\0') */ + ft_prop_str(root, "device_type", "chrp-but-not-really"); + + /* root.addprop('#size-cells', 2) */ + ft_prop_int(root, "#size-cells", 2); + + /* root.addprop('#address-cells', 2) */ + ft_prop_int(root, "#address-cells", 2); + + /* root.addprop('model', 'Momentum,Maple-D\0') */ + ft_prop_str(root, "model", "Momentum,Maple-D"); + + /* root.addprop('compatible', 'Momentum,Maple\0') */ + ft_prop_str(root, "compatible", "Momentum,Maple"); + + /* start chosen node */ + ft_begin_node(root, "chosen"); + + /* chosen.addprop('cpu', cpu0.get_phandle()) */ + ft_prop_int(root, "cpu", cpu0_phandle); + + /* chosen.addprop('rma', rma.get_phandle()) */ + ft_prop_int(root, "memory", rma_phandle); + + /* chosen.addprop('linux,stdout-path', '/xen/console\0') */ + ft_prop_str(root, "linux,stdout-path", "/xen/console"); + + /* chosen.addprop('interrupt-controller, xen.get_phandle()) */ + ft_prop_int(root, "interrupt-controller", xen_phandle); + + /* chosen.addprop('bootargs', imghandler.cmdline + '\0') */ + if ( bootargs != NULL ) + ft_prop_str(root, "bootargs", bootargs); + + /* mark where the initrd is, if present */ + if ( initrd_len > 0 ) { + val[0] = cpu_to_be64((u64) initrd_base); + val[1] = cpu_to_be64((u64) initrd_end); + ft_prop(root, "linux,initrd-start", &(val[0]), sizeof(val[0])); + ft_prop(root, "linux,initrd-end", &(val[1]), sizeof(val[1])); + } + + /* end chosen node */ + ft_end_node(root); + + /* xen = root.addnode('xen') */ + ft_begin_node(root, "xen"); + + /* xen.addprop('version', 'Xen-3.0-unstable\0') */ + ft_prop_str(root, "compatible", "Xen-3.0-unstable"); + + /* xen.addprop('reg', long(imghandler.vm.domid), long(0)) */ + val[0] = cpu_to_be64((u64) domid); + val[1] = cpu_to_be64((u64) 0); + ft_prop(root, "reg", val, sizeof(val)); + + /* point to shared_info_t page base addr */ + val[0] = cpu_to_be64((u64) shared_info_paddr); + val[1] = cpu_to_be64((u64) PAGE_SIZE); + ft_prop(root, "shared-info", val, sizeof(val)); + + /* xen.addprop('domain-name', imghandler.vm.getName() + '\0') */ + /* libxc doesn't know the domain name, that is purely a xend thing */ + /* ft_prop_str(root, "domain-name", domain_name); */ + + /* add xen/linux,phandle for chosen/interrupt-controller */ + ft_prop_int(root, "linux,phandle", xen_phandle); + + if (console_paddr != 0) { + /* xencons = xen.addnode('console') */ + ft_begin_node(root, "console"); + + /* console_paddr */ + val[0] = cpu_to_be64((u64) console_paddr); + val[1] = cpu_to_be64((u64) PAGE_SIZE); + ft_prop(root, "reg", val, sizeof(val)); + + /* xencons.addprop('interrupts', console_evtchn, 0) */ + val32[0] = cpu_to_be32((u32) console_evtchn); + val32[1] = cpu_to_be32((u32) 0); + ft_prop(root, "interrupts", val32, sizeof(val32)); + + /* end of console */ + ft_end_node(root); + } + + if (store_paddr != 0) { + /* start store node */ + ft_begin_node(root, "store"); + + /* store paddr */ + val[0] = cpu_to_be64((u64) store_paddr); + val[1] = cpu_to_be64((u64) PAGE_SIZE); + ft_prop(root, "reg", val, sizeof(val)); + + /* store event channel */ + val32[0] = cpu_to_be32((u32) store_evtchn); + val32[1] = cpu_to_be32((u32) 0); + ft_prop(root, "interrupts", val32, sizeof(val32)); + + /* end of store */ + ft_end_node(root); + } + + /* end of xen node */ + ft_end_node(root); + + /* rma = root.addnode('memory@0') */ + ft_begin_node(root, "memory@0"); + + /* rma.addprop('reg', long(0), long(rma_bytes)) */ + val[0] = cpu_to_be64((u64) 0); + val[1] = cpu_to_be64((u64) rma_bytes); + ft_prop(root, "reg", val, sizeof(val)); + + /* rma.addprop('device_type', 'memory\0') */ + ft_prop_str(root, "device_type", "memory"); + + /* add linux,phandle for chosen/rma node */ + ft_prop_int(root, "linux,phandle", rma_phandle); + + /* end of memory@0 */ + ft_end_node(root); + + /* calculate remaining bytes from total - rma size */ + remaining = (mem_mb * 1024 * 1024) - rma_bytes; + + /* memory@<rma_bytes> is all remaining memory after RMA */ + if (remaining > 0) + { + char mem[MAX_PATH]; + + if (snprintf(mem, sizeof(mem), "memory@%lx", + rma_bytes) >= sizeof(mem)) { + PERROR("%s: failed to build memory string", __func__); + goto error; + } + + /* memory@<rma_bytes> is all remaining memory after RMA */ + ft_begin_node(root, mem); + + /* mem.addprop('reg', long(rma_bytes), long(remaining)) */ + val[0] = cpu_to_be64((u64) rma_bytes); + val[1] = cpu_to_be64((u64) remaining); + ft_prop(root, "reg", val, sizeof(val)); + + /* mem.addprop('device_type', 'memory\0') */ + ft_prop_str(root, "device_type", "memory"); + + /* end memory@<rma_bytes> node */ + ft_end_node(root); + } + + /* add CPU nodes */ + /* cpus = root.addnode('cpus') */ + ft_begin_node(root, "cpus"); + + /* cpus.addprop('smp-enabled') */ + ft_prop(root, "smp-enabled", NULL, 0); + + /* cpus.addprop('#size-cells', 0) */ + ft_prop_int(root, "#size-cells", 0); + + /* cpus.addprop('#address-cells', 1) */ + ft_prop_int(root, "#address-cells", 1); + + /* + * Copy all properties the system firmware gave us from a + * CPU node in the device tree. + */ + if (find_cpu0(cpupath, sizeof(cpupath)) <= 0) { + PERROR("%s: failed find cpu0 node in host devtree", __func__); + goto error; + } + + /* get the basename from path to cpu device */ + if ((cpupath_copy = strdup(cpupath)) == NULL) { + PERROR("%s: failed to dupe string", __func__); + goto error; + } + if ((cpuname = basename(cpupath_copy)) == NULL) { + PERROR("%s: basename() failed", __func__); + goto error; + } + + /* start node for the cpu */ + ft_begin_node(root, cpuname); + + /* strdup() mallocs memory */ + if ( cpupath_copy != NULL ) { + free(cpupath_copy); + cpupath_copy = NULL; + } + + /* copy over most properties from host tree for cpu */ + if (copynode(root, cpupath, propfilter) < 0) { + PERROR("%s: failed to copy node", __func__); + goto error; + } + + /* calculate the pft-size */ + shadow_mb_log = (int)log2((double)shadow_mb); + pft_size = shadow_mb_log + 20; + + val32[0] = cpu_to_be32((u32) 0); + val32[1] = cpu_to_be32((u32) pft_size); + ft_prop(root, "ibm,pft-size", val32, sizeof(val32)); + + /* make phandle for cpu0 */ + ft_prop_int(root, "linux,phandle", cpu0_phandle); + + /* end <cpuname> node */ + ft_end_node(root); + + /* end cpus node */ + ft_end_node(root); + + /* end root node */ + ft_end_node(root); + + /* end of the tree */ + if (ft_end_tree(root) != 0) { + PERROR("%s: failed to end tree", __func__); + goto error; + } + + /* write a copy of the tree to a file */ + if ((dtb_fd = open(DTB_FILE , O_RDWR)) == -1) { + PERROR("%s: failed to open file %s", __func__, DTB_FILE); + goto error; + } + + if (write(dtb_fd, (const void *)bph, bph->totalsize) != bph->totalsize) { + PERROR("%s: failed to write blob to file", __func__); + goto error; + } + + return 0; + +error: + saved_errno = errno; + + /* strdup() mallocs memory */ + if ( cpupath_copy != NULL ) { + free(cpupath_copy); + cpupath_copy = NULL; + } + + /* free bph buffer */ + free_devtree(root); + + if (dtb_fd) + close(dtb_fd); + + errno = saved_errno; + return -1; +} diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/mk_flatdevtree.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/powerpc64/mk_flatdevtree.h Thu Feb 15 14:09:39 2007 -0700 @@ -0,0 +1,47 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation 2007 + * + * Authors: Ryan Harper <ryanh@xxxxxxxxxx> + */ + +#ifndef MK_FLATDEVTREE_H +#define MK_FLATDEVTREE_H + +#include "flatdevtree_env.h" +#include "flatdevtree.h" + +extern void free_devtree(struct ft_cxt *root); +extern int make_devtree(struct ft_cxt *root, + uint32_t domid, + uint32_t mem_mb, + unsigned long rma_bytes, + unsigned long shadow_mb, + unsigned long initrd_base, + unsigned long initrd_len, + const char *bootargs, + uint64_t shared_info_paddr, + unsigned long console_evtchn, + uint64_t console_paddr, + unsigned long store_evtchn, + uint64_t store_paddr); + +#define MAX_PATH 200 +#define BUFSIZE 1024 +#define BPH_SIZE 16*1024 +#define DTB_FILE "/tmp/domU.dtb" + +#endif /* MK_FLATDEVTREE_H */ diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/utils.c --- a/tools/libxc/powerpc64/utils.c Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/powerpc64/utils.c Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation 2006 + * Copyright IBM Corporation 2006, 2007 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> @@ -36,29 +36,6 @@ #include "flatdevtree_env.h" #include "flatdevtree.h" #include "utils.h" - -unsigned long get_rma_pages(void *devtree) -{ - void *rma; - uint64_t rma_reg[2]; - int rc; - - rma = ft_find_node(devtree, "/memory@0"); - if (rma == NULL) { - DPRINTF("couldn't find /memory@0\n"); - return 0; - } - rc = ft_get_prop(devtree, rma, "reg", rma_reg, sizeof(rma_reg)); - if (rc < 0) { - DPRINTF("couldn't get /memory@0/reg\n"); - return 0; - } - if (rma_reg[0] != 0) { - DPRINTF("RMA did not start at 0\n"); - return 0; - } - return rma_reg[1] >> PAGE_SHIFT; -} int get_rma_page_array(int xc_handle, int domid, xen_pfn_t **page_array, unsigned long nr_pages) diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/utils.h --- a/tools/libxc/powerpc64/utils.h Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/powerpc64/utils.h Thu Feb 15 14:09:39 2007 -0700 @@ -13,13 +13,12 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation 2006 + * Copyright IBM Corporation 2006, 2007 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ -extern unsigned long get_rma_pages(void *devtree); extern int get_rma_page_array(int xc_handle, int domid, xen_pfn_t **page_array, unsigned long nr_pages); extern int install_image(int xc_handle, int domid, xen_pfn_t *page_array, diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/xc_linux_build.c --- a/tools/libxc/powerpc64/xc_linux_build.c Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/powerpc64/xc_linux_build.c Thu Feb 15 14:09:39 2007 -0700 @@ -13,9 +13,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation 2006 + * Copyright IBM Corporation 2006, 2007 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Ryan Harper <ryanh@xxxxxxxxxx> */ #include <stdio.h> @@ -32,10 +33,12 @@ #include <xc_private.h> #include <xg_private.h> #include <xenctrl.h> +#include <xen/arch-powerpc.h> #include "flatdevtree_env.h" #include "flatdevtree.h" #include "utils.h" +#include "mk_flatdevtree.h" #define INITRD_ADDR (24UL << 20) #define DEVTREE_ADDR (16UL << 20) @@ -78,85 +81,6 @@ static int init_boot_vcpu( return rc; } -static int load_devtree( - int xc_handle, - int domid, - xen_pfn_t *page_array, - void *devtree, - unsigned long devtree_addr, - uint64_t initrd_base, - unsigned long initrd_len, - start_info_t *start_info __attribute__((unused)), - unsigned long start_info_addr) -{ - uint32_t si[4] = {0, start_info_addr, 0, 0x1000}; - struct boot_param_header *header; - void *chosen; - void *xen; - uint64_t initrd_end = initrd_base + initrd_len; - unsigned int devtree_size; - int rc = 0; - - DPRINTF("adding initrd props\n"); - - chosen = ft_find_node(devtree, "/chosen"); - if (chosen == NULL) { - DPRINTF("couldn't find /chosen\n"); - return -1; - } - - xen = ft_find_node(devtree, "/xen"); - if (xen == NULL) { - DPRINTF("couldn't find /xen\n"); - return -1; - } - - /* initrd-start */ - rc = ft_set_prop(&devtree, chosen, "linux,initrd-start", - &initrd_base, sizeof(initrd_base)); - if (rc < 0) { - DPRINTF("couldn't set /chosen/linux,initrd-start\n"); - return rc; - } - - /* initrd-end */ - rc = ft_set_prop(&devtree, chosen, "linux,initrd-end", - &initrd_end, sizeof(initrd_end)); - if (rc < 0) { - DPRINTF("couldn't set /chosen/linux,initrd-end\n"); - return rc; - } - - rc = ft_set_rsvmap(devtree, 1, initrd_base, initrd_len); - if (rc < 0) { - DPRINTF("couldn't set initrd reservation\n"); - return ~0UL; - } - - /* start-info (XXX being removed soon) */ - rc = ft_set_prop(&devtree, xen, "start-info", si, sizeof(si)); - if (rc < 0) { - DPRINTF("couldn't set /xen/start-info\n"); - return rc; - } - - header = devtree; - devtree_size = header->totalsize; - { - static const char dtb[] = "/tmp/xc_domU.dtb"; - int dfd = creat(dtb, 0666); - if (dfd != -1) { - write(dfd, devtree, devtree_size); - close(dfd); - } else - DPRINTF("could not open(\"%s\")\n", dtb); - } - - DPRINTF("copying device tree to 0x%lx[0x%x]\n", DEVTREE_ADDR, devtree_size); - return install_image(xc_handle, domid, page_array, devtree, DEVTREE_ADDR, - devtree_size); -} - static int load_initrd( int xc_handle, int domid, @@ -186,46 +110,46 @@ out: return rc; } -static unsigned long create_start_info( - void *devtree, start_info_t *start_info, - unsigned int console_evtchn, unsigned int store_evtchn, - unsigned long nr_pages, unsigned long rma_pages) -{ - unsigned long start_info_addr; - uint64_t rma_top; - int rc; - - memset(start_info, 0, sizeof(*start_info)); - snprintf(start_info->magic, sizeof(start_info->magic), - "xen-%d.%d-powerpc64HV", 3, 0); - - rma_top = rma_pages << PAGE_SHIFT; - DPRINTF("RMA top = 0x%"PRIX64"\n", rma_top); - - start_info->nr_pages = nr_pages; - start_info->shared_info = rma_top - PAGE_SIZE; - start_info->store_mfn = (rma_top >> PAGE_SHIFT) - 2; - start_info->store_evtchn = store_evtchn; - start_info->console.domU.mfn = (rma_top >> PAGE_SHIFT) - 3; - start_info->console.domU.evtchn = console_evtchn; - start_info_addr = rma_top - 4*PAGE_SIZE; - - rc = ft_set_rsvmap(devtree, 0, start_info_addr, 4*PAGE_SIZE); - if (rc < 0) { - DPRINTF("couldn't set start_info reservation\n"); - return ~0UL; - } - - - return start_info_addr; -} - static void free_page_array(xen_pfn_t *page_array) { free(page_array); } - +static int check_memory_config(int rma_log, unsigned int mem_mb) +{ + u64 mem_kb = (mem_mb << 10); + u64 rma_kb = (1 << rma_log) >> 10; + + switch(rma_log) + { + case 26: + case 27: + case 28: + case 30: + case 34: + case 38: + if (mem_kb < rma_kb) { + DPRINTF("Domain memory must be at least %dMB\n", + (1 << rma_log)>>20); + break; + } + + if (mem_kb % (16 << 10)) { + DPRINTF("Domain memory %dMB must be a multiple of 16MB\n", + mem_mb); + + break; + } + + /* rma_log and mem_mb OK */ + return 0; + + default: + DPRINTF("Invalid rma_log (%d)\n", rma_log); + } + + return 1; +} int xc_linux_build(int xc_handle, uint32_t domid, @@ -238,10 +162,8 @@ int xc_linux_build(int xc_handle, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, - unsigned long *console_mfn, - void *devtree) -{ - start_info_t start_info; + unsigned long *console_mfn) +{ struct domain_setup_info dsi; xen_pfn_t *page_array = NULL; unsigned long nr_pages; @@ -249,19 +171,59 @@ int xc_linux_build(int xc_handle, unsigned long kern_addr; unsigned long initrd_base = 0; unsigned long initrd_len = 0; - unsigned long start_info_addr; unsigned long rma_pages; + unsigned long shadow_mb; + u64 shared_info_paddr; + u64 store_paddr; + u64 console_paddr; + u32 remaining_kb; + u32 extent_order; + u64 nr_extents; + int rma_log = 26; /* 64MB RMA */ int rc = 0; + int op; + struct ft_cxt devtree; DPRINTF("%s\n", __func__); nr_pages = mem_mb << (20 - PAGE_SHIFT); DPRINTF("nr_pages 0x%lx\n", nr_pages); - rma_pages = get_rma_pages(devtree); + rma_pages = (1 << rma_log) >> PAGE_SHIFT; if (rma_pages == 0) { - rc = -1; - goto out; + rc = -1; + goto out; + } + + /* validate rma_log and domain memory config */ + if (check_memory_config(rma_log, mem_mb)) { + rc = -1; + goto out; + } + + /* alloc RMA */ + if (xc_alloc_real_mode_area(xc_handle, domid, rma_log)) { + rc = -1; + goto out; + } + + /* subtract already allocated RMA to determine remaining KB to alloc */ + remaining_kb = (nr_pages - rma_pages) * (PAGE_SIZE / 1024); + DPRINTF("totalmem - RMA = %dKB\n", remaining_kb); + + /* to allocate in 16MB chunks, we need to determine the order of + * the number of PAGE_SIZE pages contained in 16MB. */ + extent_order = 24 - 12; /* extent_order = log2((1 << 24) - (1 << 12)) */ + nr_extents = (remaining_kb / (PAGE_SIZE/1024)) >> extent_order; + DPRINTF("allocating memory in %llu chunks of %luMB\n", nr_extents, + (((1 << extent_order) >> 10) * PAGE_SIZE) >> 10); + + /* now allocate the remaining memory as large-order allocations */ + DPRINTF("increase_reservation(%u, %llu, %u)\n", domid, nr_extents, extent_order); + if (xc_domain_memory_increase_reservation(xc_handle, domid, nr_extents, + extent_order, 0, NULL)) { + rc = -1; + goto out; } if (get_rma_page_array(xc_handle, domid, &page_array, rma_pages)) { @@ -285,27 +247,46 @@ int xc_linux_build(int xc_handle, } } - /* start_info stuff: about to be removed */ - start_info_addr = create_start_info(devtree, &start_info, console_evtchn, - store_evtchn, nr_pages, rma_pages); - *console_mfn = page_array[start_info.console.domU.mfn]; - *store_mfn = page_array[start_info.store_mfn]; - if (install_image(xc_handle, domid, page_array, &start_info, - start_info_addr, sizeof(start_info_t))) { - rc = -1; - goto out; - } - - if (devtree) { - DPRINTF("loading flattened device tree\n"); - devtree_addr = DEVTREE_ADDR; - if (load_devtree(xc_handle, domid, page_array, devtree, devtree_addr, - initrd_base, initrd_len, &start_info, - start_info_addr)) { - DPRINTF("couldn't load flattened device tree.\n"); - rc = -1; - goto out; - } + /* fetch the current shadow_memory value for this domain */ + op = XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION; + if (xc_shadow_control(xc_handle, domid, op, NULL, 0, + &shadow_mb, 0, NULL) < 0 ) { + rc = -1; + goto out; + } + + /* determine shared_info, console, and store paddr */ + shared_info_paddr = (rma_pages << PAGE_SHIFT) - + (RMA_SHARED_INFO * PAGE_SIZE); + console_paddr = (rma_pages << PAGE_SHIFT) - (RMA_CONSOLE * PAGE_SIZE); + store_paddr = (rma_pages << PAGE_SHIFT) - (RMA_STORE * PAGE_SIZE); + + /* map paddrs to mfns */ + *store_mfn = page_array[(xen_pfn_t)(store_paddr >> PAGE_SHIFT)]; + *console_mfn = page_array[(xen_pfn_t)(console_paddr >> PAGE_SHIFT)]; + DPRINTF("console_mfn->%08lx store_mfn->%08lx\n", *console_mfn, + *store_mfn); + + /* build the devtree here */ + DPRINTF("constructing devtree\n"); + if (make_devtree(&devtree, domid, mem_mb, (rma_pages << PAGE_SHIFT), + shadow_mb, initrd_base, initrd_len, cmdline, + shared_info_paddr, console_evtchn, console_paddr, + store_evtchn, store_paddr) < 0) { + DPRINTF("failed to create flattened device tree\n"); + rc = -1; + goto out; + } + + devtree_addr = DEVTREE_ADDR; + DPRINTF("loading flattened device tree to 0x%lx[0x%x]\n", + devtree_addr, devtree.bph->totalsize); + + if (install_image(xc_handle, domid, page_array, (void *)devtree.bph, + devtree_addr, devtree.bph->totalsize)) { + DPRINTF("couldn't load flattened device tree.\n"); + rc = -1; + goto out; } if (init_boot_vcpu(xc_handle, domid, &dsi, devtree_addr, kern_addr)) { @@ -314,6 +295,7 @@ int xc_linux_build(int xc_handle, } out: + free_devtree(&devtree); free_page_array(page_array); return rc; } diff -r ac18d251df63 -r 9529d667d042 tools/libxc/powerpc64/xc_prose_build.c --- a/tools/libxc/powerpc64/xc_prose_build.c Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/powerpc64/xc_prose_build.c Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright (C) IBM Corporation 2006 + * Copyright IBM Corporation 2006, 2007 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> * Jonathan Appavoo <jappavoo@xxxxxxxxxx> @@ -34,18 +34,14 @@ #include <xg_private.h> #include <xenctrl.h> -#include "flatdevtree_env.h" -#include "flatdevtree.h" #include "utils.h" #define INITRD_ADDR (24UL << 20) -#define DEVTREE_ADDR (16UL << 20) static int init_boot_vcpu( int xc_handle, int domid, struct domain_setup_info *dsi, - unsigned long devtree_addr, unsigned long kern_addr) { vcpu_guest_context_t ctxt; @@ -55,7 +51,7 @@ static int init_boot_vcpu( ctxt.user_regs.pc = dsi->v_kernentry; ctxt.user_regs.msr = 0; ctxt.user_regs.gprs[1] = 0; /* Linux uses its own stack */ - ctxt.user_regs.gprs[3] = devtree_addr; + ctxt.user_regs.gprs[3] = 0; ctxt.user_regs.gprs[4] = kern_addr; ctxt.user_regs.gprs[5] = 0; /* reserved for specifying OF handler */ /* There is a buggy kernel that does not zero the "local_paca", so @@ -79,85 +75,6 @@ static int init_boot_vcpu( return rc; } -static int load_devtree( - int xc_handle, - int domid, - xen_pfn_t *page_array, - void *devtree, - unsigned long devtree_addr, - uint64_t initrd_base, - unsigned long initrd_len, - start_info_t *start_info __attribute__((unused)), - unsigned long start_info_addr) -{ - uint32_t si[4] = {0, start_info_addr, 0, 0x1000}; - struct boot_param_header *header; - void *chosen; - void *xen; - uint64_t initrd_end = initrd_base + initrd_len; - unsigned int devtree_size; - int rc = 0; - - DPRINTF("adding initrd props\n"); - - chosen = ft_find_node(devtree, "/chosen"); - if (chosen == NULL) { - DPRINTF("couldn't find /chosen\n"); - return -1; - } - - xen = ft_find_node(devtree, "/xen"); - if (xen == NULL) { - DPRINTF("couldn't find /xen\n"); - return -1; - } - - /* initrd-start */ - rc = ft_set_prop(&devtree, chosen, "linux,initrd-start", - &initrd_base, sizeof(initrd_base)); - if (rc < 0) { - DPRINTF("couldn't set /chosen/linux,initrd-start\n"); - return rc; - } - - /* initrd-end */ - rc = ft_set_prop(&devtree, chosen, "linux,initrd-end", - &initrd_end, sizeof(initrd_end)); - if (rc < 0) { - DPRINTF("couldn't set /chosen/linux,initrd-end\n"); - return rc; - } - - rc = ft_set_rsvmap(devtree, 1, initrd_base, initrd_len); - if (rc < 0) { - DPRINTF("couldn't set initrd reservation\n"); - return ~0UL; - } - - /* start-info (XXX being removed soon) */ - rc = ft_set_prop(&devtree, xen, "start-info", si, sizeof(si)); - if (rc < 0) { - DPRINTF("couldn't set /xen/start-info\n"); - return rc; - } - - header = devtree; - devtree_size = header->totalsize; - { - static const char dtb[] = "/tmp/xc_domU.dtb"; - int dfd = creat(dtb, 0666); - if (dfd != -1) { - write(dfd, devtree, devtree_size); - close(dfd); - } else - DPRINTF("could not open(\"%s\")\n", dtb); - } - - DPRINTF("copying device tree to 0x%lx[0x%x]\n", DEVTREE_ADDR, devtree_size); - return install_image(xc_handle, domid, page_array, devtree, DEVTREE_ADDR, - devtree_size); -} - static int load_initrd( int xc_handle, int domid, @@ -188,13 +105,12 @@ out: } static unsigned long create_start_info( - void *devtree, start_info_t *start_info, + start_info_t *start_info, unsigned int console_evtchn, unsigned int store_evtchn, unsigned long nr_pages, unsigned long rma_pages, const char *cmdline) { unsigned long start_info_addr; uint64_t rma_top; - int rc; memset(start_info, 0, sizeof(*start_info)); snprintf(start_info->magic, sizeof(start_info->magic), @@ -213,12 +129,6 @@ static unsigned long create_start_info( /* just in case we truncated cmdline with strncpy add 0 at the end */ start_info->cmd_line[MAX_GUEST_CMDLINE-1]=0; start_info_addr = rma_top - 4*PAGE_SIZE; - - rc = ft_set_rsvmap(devtree, 0, start_info_addr, 4*PAGE_SIZE); - if (rc < 0) { - DPRINTF("couldn't set start_info reservation\n"); - return ~0UL; - } return start_info_addr; } @@ -239,14 +149,12 @@ int xc_prose_build(int xc_handle, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, - unsigned long *console_mfn, - void *devtree) + unsigned long *console_mfn) { start_info_t start_info; struct domain_setup_info dsi; xen_pfn_t *page_array = NULL; unsigned long nr_pages; - unsigned long devtree_addr = 0; unsigned long kern_addr; unsigned long initrd_base = 0; unsigned long initrd_len = 0; @@ -261,11 +169,7 @@ int xc_prose_build(int xc_handle, nr_pages = mem_mb << (20 - PAGE_SHIFT); DPRINTF("nr_pages 0x%lx\n", nr_pages); - rma_pages = get_rma_pages(devtree); - if (rma_pages == 0) { - rc = -1; - goto out; - } + rma_pages = (1 << 26) >> PAGE_SHIFT; /* 64 MB */ if (get_rma_page_array(xc_handle, domid, &page_array, rma_pages)) { rc = -1; @@ -289,9 +193,9 @@ int xc_prose_build(int xc_handle, } /* start_info stuff: about to be removed */ - start_info_addr = create_start_info(devtree, &start_info, console_evtchn, + start_info_addr = create_start_info(&start_info, console_evtchn, store_evtchn, nr_pages, - rma_pages, cmdline); + rma_pages, cmdline); *console_mfn = page_array[start_info.console.domU.mfn]; *store_mfn = page_array[start_info.store_mfn]; if (install_image(xc_handle, domid, page_array, &start_info, @@ -300,19 +204,7 @@ int xc_prose_build(int xc_handle, goto out; } - if (devtree) { - DPRINTF("loading flattened device tree\n"); - devtree_addr = DEVTREE_ADDR; - if (load_devtree(xc_handle, domid, page_array, devtree, devtree_addr, - initrd_base, initrd_len, &start_info, - start_info_addr)) { - DPRINTF("couldn't load flattened device tree.\n"); - rc = -1; - goto out; - } - } - - if (init_boot_vcpu(xc_handle, domid, &dsi, devtree_addr, kern_addr)) { + if (init_boot_vcpu(xc_handle, domid, &dsi, kern_addr)) { rc = -1; goto out; } diff -r ac18d251df63 -r 9529d667d042 tools/libxc/xc_dom.h --- a/tools/libxc/xc_dom.h Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/xc_dom.h Thu Feb 15 14:09:39 2007 -0700 @@ -6,11 +6,6 @@ typedef uint64_t xen_vaddr_t; typedef uint64_t xen_paddr_t; - -/* FIXME: temporary hack ... */ -#ifndef PRIpfn -#define PRIpfn "lx" -#endif struct xc_dom_seg { xen_vaddr_t vstart; diff -r ac18d251df63 -r 9529d667d042 tools/libxc/xc_dom_powerpc64.c --- a/tools/libxc/xc_dom_powerpc64.c Thu Feb 15 13:13:36 2007 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ -/* - * Xen domain builder -- ia64 bits. - * - * Most architecture-specific code for ia64 goes here. - * - fill architecture-specific structs. - * - * This code is licenced under the GPL. - * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <inttypes.h> - -#include <xen/xen.h> - -#include "xg_private.h" -#include "xc_dom.h" - -/* ------------------------------------------------------------------------ */ - -static int alloc_magic_pages(struct xc_dom_image *dom) -{ - /* allocate special pages */ - dom->low_top--; /* shared_info */ - dom->xenstore_pfn = --dom->low_top; - dom->console_pfn = --dom->low_top; - dom->start_info_pfn = --dom->low_top; - return 0; -} - -static int start_info(struct xc_dom_image *dom) -{ - start_info_t *si = - xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1); - - xc_dom_printf("%s\n", __FUNCTION__); - - snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0); - - si->nr_pages = dom->total_pages; - si->shared_info = (dom->total_pages - 1) << PAGE_SHIFT; - si->store_mfn = dom->xenstore_pfn; - si->store_evtchn = dom->store_evtchn; - si->console.domU.mfn = dom->console_pfn; - si->console.domU.evtchn = dom->console_evtchn; - return 0; -} - -static int shared_info(struct xc_dom_image *dom, void *ptr) -{ - shared_info_t *shared_info = ptr; - int i; - - xc_dom_printf("%s: called\n", __FUNCTION__); - - memset(shared_info, 0, sizeof(*shared_info)); - return 0; -} - -static int vcpu(struct xc_dom_image *dom, void *ptr) -{ - vcpu_guest_context_t *ctxt = ptr; - - xc_dom_printf("%s: called\n", __FUNCTION__); - - /* clear everything */ - memset(ctxt, 0, sizeof(*ctxt)); - - memset(&ctxt->user_regs, 0x55, sizeof(ctxt.user_regs)); - ctxt->user_regs.pc = dsi->v_kernentry; - ctxt->user_regs.msr = 0; - ctxt->user_regs.gprs[1] = 0; /* Linux uses its own stack */ - ctxt->user_regs.gprs[3] = devtree_addr; - ctxt->user_regs.gprs[4] = kern_addr; - ctxt->user_regs.gprs[5] = 0; - - /* There is a buggy kernel that does not zero the "local_paca", so - * we must make sure this register is 0 */ - ctxt->user_regs.gprs[13] = 0; - - return 0; -} - -/* ------------------------------------------------------------------------ */ - -static struct xc_dom_arch xc_dom_arch = { - .guest_type = "xen-3.0-powerpc64", - .page_shift = FIXME, - .alloc_magic_pages = alloc_magic_pages, - .start_info = start_info, - .shared_info = shared_info, - .vcpu = vcpu, -}; - -static void __init register_arch_hooks(void) -{ - xc_dom_register_arch_hooks(&xc_dom_arch); -} diff -r ac18d251df63 -r 9529d667d042 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/xenguest.h Thu Feb 15 14:09:39 2007 -0700 @@ -153,7 +153,6 @@ int xc_prose_build(int xc_handle, unsigned int store_evtchn, unsigned long *store_mfn, unsigned int console_evtchn, - unsigned long *console_mfn, - void *arch_args); + unsigned long *console_mfn); #endif /* XENGUEST_H */ diff -r ac18d251df63 -r 9529d667d042 tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/libxc/xg_private.c Thu Feb 15 14:09:39 2007 -0700 @@ -216,3 +216,15 @@ int xc_hvm_restore(int xc_handle, int io errno = ENOSYS; return -1; } + +__attribute__((weak)) int xc_get_hvm_param( + int handle, domid_t dom, int param, unsigned long *value) +{ + return -ENOSYS; +} + +__attribute__((weak)) int xc_set_hvm_param( + int handle, domid_t dom, int param, unsigned long value) +{ + return -ENOSYS; +} diff -r ac18d251df63 -r 9529d667d042 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/python/xen/lowlevel/xc/xc.c Thu Feb 15 14:09:39 2007 -0700 @@ -966,7 +966,7 @@ static PyObject *pyxc_alloc_real_mode_ar return NULL; if ( xc_alloc_real_mode_area(self->xc_handle, dom, log) ) - return PyErr_SetFromErrno(xc_error); + return pyxc_error_to_exception(); Py_INCREF(zero); return zero; @@ -980,33 +980,32 @@ static PyObject *pyxc_prose_build(XcObje char *image, *ramdisk = NULL, *cmdline = "", *features = NULL; int flags = 0; int store_evtchn, console_evtchn; + unsigned int mem_mb; unsigned long store_mfn = 0; unsigned long console_mfn = 0; - void *arch_args = NULL; int unused; static char *kwd_list[] = { "dom", "store_evtchn", - "console_evtchn", "image", + "console_evtchn", "image", "memsize", /* optional */ "ramdisk", "cmdline", "flags", - "features", "arch_args", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssiss#", kwd_list, - &dom, &store_evtchn, + "features", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiis|ssis#", kwd_list, + &dom, &store_evtchn, &mem_mb, &console_evtchn, &image, /* optional */ &ramdisk, &cmdline, &flags, - &features, &arch_args, &unused) ) - return NULL; - - if ( xc_prose_build(self->xc_handle, dom, image, + &features, &unused) ) + return NULL; + + if ( xc_prose_build(self->xc_handle, dom, mem_mb, image, ramdisk, cmdline, features, flags, store_evtchn, &store_mfn, - console_evtchn, &console_mfn, - arch_args) != 0 ) { + console_evtchn, &console_mfn) != 0 ) { if (!errno) errno = EINVAL; - return PyErr_SetFromErrno(xc_error); + return pyxc_error_to_exception(); } return Py_BuildValue("{s:i,s:i}", "store_mfn", store_mfn, diff -r ac18d251df63 -r 9529d667d042 tools/python/xen/xend/FlatDeviceTree.py --- a/tools/python/xen/xend/FlatDeviceTree.py Thu Feb 15 13:13:36 2007 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,359 +0,0 @@ -#!/usr/bin/env python -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of version 2.1 of the GNU Lesser General Public -# License as published by the Free Software Foundation. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# Copyright (C) IBM Corp. 2006 -# -# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> - -import os -import sys -import struct -import stat -import re -import glob -import math - -_host_devtree_root = '/proc/device-tree' - -_OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning -_OF_DT_BEGIN_NODE = 0x1 -_OF_DT_END_NODE = 0x2 -_OF_DT_PROP = 0x3 -_OF_DT_END = 0x9 - -def _bincat(seq, separator=''): - '''Concatenate the contents of seq into a bytestream.''' - strs = [] - for item in seq: - if isinstance(item, int): - strs.append(struct.pack(">I", item)) - elif isinstance(item, long): - strs.append(struct.pack(">Q", item)) - else: - try: - strs.append(item.to_bin()) - except AttributeError, e: - strs.append(item) - return separator.join(strs) - -def _alignup(val, alignment): - return (val + alignment - 1) & ~(alignment - 1) - -def _pad(buf, alignment): - '''Pad bytestream with NULLs to specified alignment.''' - padlen = _alignup(len(buf), alignment) - return buf + '\0' * (padlen - len(buf)) - # not present in Python 2.3: - #return buf.ljust(_padlen, '\0') - -def _indent(item): - indented = [] - for line in str(item).splitlines(True): - indented.append(' ' + line) - return ''.join(indented) - -class _Property: - _nonprint = re.compile('[\000-\037\200-\377]') - def __init__(self, node, name, value): - self.node = node - self.value = value - self.name = name - self.node.tree.stradd(name) - - def __str__(self): - result = self.name - if self.value: - searchtext = self.value - # it's ok for a string to end in NULL - if searchtext.find('\000') == len(searchtext)-1: - searchtext = searchtext[:-1] - m = self._nonprint.search(searchtext) - if m: - bytes = struct.unpack("B" * len(self.value), self.value) - hexbytes = [ '%02x' % b for b in bytes ] - words = [] - for i in range(0, len(self.value), 4): - words.append(''.join(hexbytes[i:i+4])) - v = '<' + ' '.join(words) + '>' - else: - v = '"%s"' % self.value - result += ': ' + v - return result - - def to_bin(self): - offset = self.node.tree.stroffset(self.name) - return struct.pack('>III', _OF_DT_PROP, len(self.value), offset) \ - + _pad(self.value, 4) - -class _Node: - def __init__(self, tree, name): - self.tree = tree - self.name = name - self.props = {} - self.children = {} - self.phandle = 0 - - def __str__(self): - propstrs = [ _indent(prop) for prop in self.props.values() ] - childstrs = [ _indent(child) for child in self.children.values() ] - return '%s:\n%s\n%s' % (self.name, '\n'.join(propstrs), - '\n'.join(childstrs)) - - def to_bin(self): - name = _pad(self.name + '\0', 4) - return struct.pack('>I', _OF_DT_BEGIN_NODE) + \ - name + \ - _bincat(self.props.values()) + \ - _bincat(self.children.values()) + \ - struct.pack('>I', _OF_DT_END_NODE) - - def addprop(self, propname, *cells): - '''setprop with duplicate error-checking.''' - if propname in self.props: - raise AttributeError('%s/%s already exists' % (self.name, propname)) - self.setprop(propname, *cells) - - def setprop(self, propname, *cells): - self.props[propname] = _Property(self, propname, _bincat(cells)) - - def addnode(self, nodename): - '''newnode with duplicate error-checking.''' - if nodename in self.children: - raise AttributeError('%s/%s already exists' % (self.name, nodename)) - return self.newnode(nodename) - - def newnode(self, nodename): - node = _Node(self.tree, nodename) - self.children[nodename] = node - return node - - def getprop(self, propname): - return self.props[propname] - - def getchild(self, nodename): - return self.children[nodename] - - def get_phandle(self): - if self.phandle: - return self.phandle - self.phandle = self.tree.alloc_phandle() - self.addprop('linux,phandle', self.phandle) - return self.phandle - -class _Header: - def __init__(self): - self.magic = 0 - self.totalsize = 0 - self.off_dt_struct = 0 - self.off_dt_strings = 0 - self.off_mem_rsvmap = 0 - self.version = 0 - self.last_comp_version = 0 - self.boot_cpuid_phys = 0 - self.size_dt_strings = 0 - def to_bin(self): - return struct.pack('>9I', - self.magic, - self.totalsize, - self.off_dt_struct, - self.off_dt_strings, - self.off_mem_rsvmap, - self.version, - self.last_comp_version, - self.boot_cpuid_phys, - self.size_dt_strings) - -class _StringBlock: - def __init__(self): - self.table = [] - def to_bin(self): - return _bincat(self.table, '\0') + '\0' - def add(self, str): - self.table.append(str) - def getoffset(self, str): - return self.to_bin().index(str + '\0') - -class Tree(_Node): - def __init__(self): - self.last_phandle = 0 - self.strings = _StringBlock() - self.reserved = [(0, 0)] - _Node.__init__(self, self, '\0') - - def alloc_phandle(self): - self.last_phandle += 1 - return self.last_phandle - - def stradd(self, str): - return self.strings.add(str) - - def stroffset(self, str): - return self.strings.getoffset(str) - - def reserve(self, start, len): - self.reserved.insert(0, (start, len)) - - def to_bin(self): - # layout: - # header - # reservation map - # string block - # data block - - datablock = _Node.to_bin(self) - - r = [ struct.pack('>QQ', rsrv[0], rsrv[1]) for rsrv in self.reserved ] - reserved = _bincat(r) - - strblock = _pad(self.strings.to_bin(), 4) - strblocklen = len(strblock) - - header = _Header() - header.magic = _OF_DT_HEADER - header.off_mem_rsvmap = _alignup(len(header.to_bin()), 8) - header.off_dt_strings = header.off_mem_rsvmap + len(reserved) - header.off_dt_struct = header.off_dt_strings + strblocklen - header.version = 0x10 - header.last_comp_version = 0x10 - header.boot_cpuid_phys = 0 - header.size_dt_strings = strblocklen - - payload = reserved + \ - strblock + \ - datablock + \ - struct.pack('>I', _OF_DT_END) - header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8) - return _pad(header.to_bin(), 8) + payload - -def _readfile(fullpath): - '''Return full contents of a file.''' - f = file(fullpath, 'r') - data = f.read() - f.close() - return data - -def _find_first_cpu(dirpath): - '''Find the first node of type 'cpu' in a directory tree.''' - cpulist = glob.glob(os.path.join(dirpath, 'cpus', '*')) - for node in cpulist: - try: - data = _readfile(os.path.join(node, 'device_type')) - except IOError: - continue - if 'cpu' in data: - return node - raise IOError("couldn't find any CPU nodes under " + dirpath) - -def _copynode(node, dirpath, propfilter): - '''Copy all properties and children nodes from a directory tree.''' - dirents = os.listdir(dirpath) - for dirent in dirents: - fullpath = os.path.join(dirpath, dirent) - st = os.lstat(fullpath) - if stat.S_ISDIR(st.st_mode): - child = node.addnode(dirent) - _copynode(child, fullpath, propfilter) - elif stat.S_ISREG(st.st_mode) and propfilter(fullpath): - node.addprop(dirent, _readfile(fullpath)) - -def build(imghandler): - '''Construct a device tree by combining the domain's configuration and - the host's device tree.''' - root = Tree() - - # 1st reseravtion entry used for start_info, console, store, shared_info - root.reserve(0x3ffc000, 0x4000) - - # 2nd reservation enrty used for initrd, later on when we load the - # initrd we may fill this in with zeroes which signifies the end - # of the reservation map. So as to avoid adding a zero map now we - # put some bogus yet sensible numbers here. - root.reserve(0x1000000, 0x1000) - - root.addprop('device_type', 'chrp-but-not-really\0') - root.addprop('#size-cells', 2) - root.addprop('#address-cells', 2) - root.addprop('model', 'Momentum,Maple-D\0') - root.addprop('compatible', 'Momentum,Maple\0') - - xen = root.addnode('xen') - xen.addprop('start-info', long(0x3ffc000), long(0x1000)) - xen.addprop('version', 'Xen-3.0-unstable\0') - xen.addprop('reg', long(imghandler.vm.domid), long(0)) - xen.addprop('domain-name', imghandler.vm.getName() + '\0') - xencons = xen.addnode('console') - xencons.addprop('interrupts', 1, 0) - - # add memory nodes - totalmem = imghandler.vm.getMemoryTarget() * 1024 - rma_log = 26 ### imghandler.vm.info.get('powerpc_rma_log') - rma_bytes = 1 << rma_log - - # RMA node - rma = root.addnode('memory@0') - rma.addprop('reg', long(0), long(rma_bytes)) - rma.addprop('device_type', 'memory\0') - - # all the rest in a single node - remaining = totalmem - rma_bytes - if remaining > 0: - mem = root.addnode('memory@1') - mem.addprop('reg', long(rma_bytes), long(remaining)) - mem.addprop('device_type', 'memory\0') - - # add CPU nodes - cpus = root.addnode('cpus') - cpus.addprop('smp-enabled') - cpus.addprop('#size-cells', 0) - cpus.addprop('#address-cells', 1) - - # Copy all properties the system firmware gave us, except for 'linux,' - # properties, from the first CPU node in the device tree. Do this once for - # every vcpu. Hopefully all cpus are identical... - cpu0 = None - cpu0path = _find_first_cpu(_host_devtree_root) - def _nolinuxprops(fullpath): - return not os.path.basename(fullpath).startswith('linux,') - for i in range(imghandler.vm.getVCpuCount()): - # create new node and copy all properties - cpu = cpus.addnode('PowerPC,970@%d' % i) - _copynode(cpu, cpu0path, _nolinuxprops) - - # overwrite what we need to - shadow_mb = imghandler.vm.info.get('shadow_memory', 1) - shadow_mb_log = int(math.log(shadow_mb, 2)) - pft_size = shadow_mb_log + 20 - cpu.setprop('ibm,pft-size', 0, pft_size) - - # set default CPU - if cpu0 == None: - cpu0 = cpu - - chosen = root.addnode('chosen') - chosen.addprop('cpu', cpu0.get_phandle()) - chosen.addprop('memory', rma.get_phandle()) - chosen.addprop('linux,stdout-path', '/xen/console\0') - chosen.addprop('interrupt-controller', xen.get_phandle()) - chosen.addprop('bootargs', imghandler.cmdline + '\0') - # xc_linux_load.c will overwrite these 64-bit properties later - chosen.addprop('linux,initrd-start', long(0)) - chosen.addprop('linux,initrd-end', long(0)) - - if 1: - f = file('/tmp/domU.dtb', 'w') - f.write(root.to_bin()) - f.close() - - return root diff -r ac18d251df63 -r 9529d667d042 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Thu Feb 15 13:13:36 2007 -0700 +++ b/tools/python/xen/xend/image.py Thu Feb 15 14:09:39 2007 -0700 @@ -30,7 +30,6 @@ from xen.xend.server.netif import random from xen.xend.server.netif import randomMAC from xen.xend.xenstore.xswatch import xswatch from xen.xend import arch -from xen.xend import FlatDeviceTree xc = xen.lowlevel.xc.xc() @@ -219,39 +218,7 @@ class PPC_LinuxImageHandler(LinuxImageHa class PPC_LinuxImageHandler(LinuxImageHandler): ostype = "linux" - - def configure(self, vmConfig, imageConfig, deviceConfig): - LinuxImageHandler.configure(self, vmConfig, imageConfig, deviceConfig) - self.imageConfig = imageConfig - - def buildDomain(self): - store_evtchn = self.vm.getStorePort() - console_evtchn = self.vm.getConsolePort() - - mem_mb = self.getRequiredInitialReservation() / 1024 - - log.debug("domid = %d", self.vm.getDomid()) - log.debug("memsize = %d", mem_mb) - log.debug("image = %s", self.kernel) - log.debug("store_evtchn = %d", store_evtchn) - log.debug("console_evtchn = %d", console_evtchn) - log.debug("cmdline = %s", self.cmdline) - log.debug("ramdisk = %s", self.ramdisk) - log.debug("vcpus = %d", self.vm.getVCpuCount()) - log.debug("features = %s", self.vm.getFeatures()) - - devtree = FlatDeviceTree.build(self) - - return xc.linux_build(domid = self.vm.getDomid(), - memsize = mem_mb, - image = self.kernel, - store_evtchn = store_evtchn, - console_evtchn = console_evtchn, - cmdline = self.cmdline, - ramdisk = self.ramdisk, - features = self.vm.getFeatures(), - arch_args = devtree.to_bin()) - + def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): """@param shadow_mem_kb The configured shadow memory, in KiB. @param maxmem_kb The configured maxmem, in KiB. @@ -261,13 +228,10 @@ class PPC_LinuxImageHandler(LinuxImageHa return max(maxmem_kb / 64, shadow_mem_kb) -class PPC_ProseImageHandler(LinuxImageHandler): + +class PPC_ProseImageHandler(PPC_LinuxImageHandler): ostype = "prose" - - def configure(self, imageConfig, deviceConfig): - LinuxImageHandler.configure(self, imageConfig, deviceConfig) - self.imageConfig = imageConfig def buildDomain(self): store_evtchn = self.vm.getStorePort() @@ -285,8 +249,6 @@ class PPC_ProseImageHandler(LinuxImageHa log.debug("vcpus = %d", self.vm.getVCpuCount()) log.debug("features = %s", self.vm.getFeatures()) - devtree = FlatDeviceTree.build(self) - return xc.arch_prose_build(dom = self.vm.getDomid(), memsize = mem_mb, image = self.kernel, @@ -294,17 +256,7 @@ class PPC_ProseImageHandler(LinuxImageHa console_evtchn = console_evtchn, cmdline = self.cmdline, ramdisk = self.ramdisk, - features = self.vm.getFeatures(), - arch_args = devtree.to_bin()) - - def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): - """@param shadow_mem_kb The configured shadow memory, in KiB. - @param maxmem_kb The configured maxmem, in KiB. - @return The corresponding required amount of shadow memory, also in - KiB. - PowerPC currently uses "shadow memory" to refer to the hash table.""" - return max(maxmem_kb / 64, shadow_mem_kb) - + features = self.vm.getFeatures()) class HVMImageHandler(ImageHandler): diff -r ac18d251df63 -r 9529d667d042 unmodified_drivers/linux-2.6/platform-pci/platform-compat.c --- a/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c Thu Feb 15 13:13:36 2007 -0700 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c Thu Feb 15 14:09:39 2007 -0700 @@ -12,6 +12,12 @@ static int system_state = 1; static int system_state = 1; EXPORT_SYMBOL(system_state); #endif + +static inline void ctrl_alt_del(void) +{ + kill_proc(1, SIGINT, 1); /* interrupt init */ +} +EXPORT_SYMBOL(ctrl_alt_del); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) size_t strcspn(const char *s, const char *reject) diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/Makefile --- a/xen/arch/powerpc/Makefile Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/Makefile Thu Feb 15 14:09:39 2007 -0700 @@ -51,8 +51,6 @@ obj-$(builtin_dom0) += dom0.o obj-y += firmware_image.o -obj-y += elf32.o - # These are extra warnings like for the arch/ppc directory but may not # allow the rest of the tree to build. PPC_C_WARNINGS += -Wundef -Wmissing-prototypes -Wmissing-declarations @@ -64,7 +62,7 @@ CFLAGS += $(PPC_C_WARNINGS) # objects into a single ELF segment and to not link in any additional # objects that gcc would normally like to # -OMAGIC = -N -nodefaultlibs -nostartfiles +OMAGIC = -nodefaultlibs -nostartfiles firmware: of_handler/built_in.o $(TARGET_SUBARCH)/memcpy.o of-devtree.o $(CC) $(CFLAGS) $(OMAGIC) -e __ofh_start -Wl,-Ttext,0x0 $^ -o $@ diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/backtrace.c --- a/xen/arch/powerpc/backtrace.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/backtrace.c Thu Feb 15 14:09:39 2007 -0700 @@ -198,7 +198,6 @@ void show_backtrace_regs(struct cpu_user console_start_sync(); show_registers(regs); - printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr()); printk("hid4 0x%016lx\n", regs->hid4); printk("---[ backtrace ]---\n"); show_backtrace(regs->gprs[1], regs->lr, regs->pc); diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/boot_of.c --- a/xen/arch/powerpc/boot_of.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/boot_of.c Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005, 2006 + * Copyright IBM Corp. 2005, 2006, 2007 * * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> * Hollis Blanchard <hollisb@xxxxxxxxxx> @@ -43,6 +43,14 @@ static int of_out; static int of_out; static ulong eomem; +/* Track memory during early boot with a limited per-page bitmap. We need an + * allocator to tell us where we can place RTAS, our copy of the device tree. + * We could examine the "available" properties in memory nodes, but we + * apparently can't depend on firmware to update those when we call "claim". So + * we need to track it ourselves. + * We can't dynamically allocate the bitmap, because we would need something + * to tell us where it's safe to allocate... + */ #define MEM_AVAILABLE_PAGES ((32 << 20) >> PAGE_SHIFT) static DECLARE_BITMAP(mem_available_pages, MEM_AVAILABLE_PAGES); @@ -530,6 +538,37 @@ static ulong boot_of_alloc(ulong size) pos = pos + i; } +} + +int boot_of_mem_avail(int pos, ulong *startpage, ulong *endpage) +{ + ulong freebit; + ulong usedbit; + + if (pos >= MEM_AVAILABLE_PAGES) + /* Stop iterating. */ + return -1; + + /* Find first free page. */ + freebit = find_next_zero_bit(mem_available_pages, MEM_AVAILABLE_PAGES, pos); + if (freebit >= MEM_AVAILABLE_PAGES) { + /* We know everything after MEM_AVAILABLE_PAGES is still free. */ + *startpage = MEM_AVAILABLE_PAGES << PAGE_SHIFT; + *endpage = ~0UL; + return freebit; + } + *startpage = freebit << PAGE_SHIFT; + + /* Now find first used page after that. */ + usedbit = find_next_bit(mem_available_pages, MEM_AVAILABLE_PAGES, freebit); + if (usedbit >= MEM_AVAILABLE_PAGES) { + /* We know everything after MEM_AVAILABLE_PAGES is still free. */ + *endpage = ~0UL; + return usedbit; + } + + *endpage = usedbit << PAGE_SHIFT; + return usedbit; } static ulong boot_of_mem_init(void) @@ -1302,7 +1341,7 @@ multiboot_info_t __init *boot_of_init( __func__, r3, r4, vec, r6, r7, orig_msr); - if ((vec >= (ulong)_start) && (vec <= (ulong)_end)) { + if (is_kernel(vec)) { of_panic("Hmm.. OF[0x%lx] seems to have stepped on our image " "that ranges: %p .. %p.\n", vec, _start, _end); diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/domain.c --- a/xen/arch/powerpc/domain.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/domain.c Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005, 2006 + * Copyright IBM Corp. 2005, 2006, 2007 * * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ @@ -105,13 +105,13 @@ void arch_domain_destroy(struct domain * static void machine_fail(const char *s) { - printk("%s failed, manual powercycle required!\n", s); + printk("%s failed, manual powercycle required!\n" + " spinning....\n", s); for (;;) sleep(); } void machine_halt(void) { - printk("machine_halt called: spinning....\n"); console_start_sync(); printk("%s called\n", __func__); rtas_halt(); @@ -121,7 +121,6 @@ void machine_halt(void) void machine_restart(char * __unused) { - printk("machine_restart called: spinning....\n"); console_start_sync(); printk("%s called\n", __func__); rtas_reboot(); @@ -152,22 +151,32 @@ void vcpu_destroy(struct vcpu *v) int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c) { + struct domain *d = v->domain; + memcpy(&v->arch.ctxt, &c.nat->user_regs, sizeof(c.nat->user_regs)); - printk("Domain[%d].%d: initializing\n", - v->domain->domain_id, v->vcpu_id); - - if (v->domain->arch.htab.order == 0) - panic("Page table never allocated for Domain: %d\n", - v->domain->domain_id); - if (v->domain->arch.rma_order == 0) - panic("RMA never allocated for Domain: %d\n", - v->domain->domain_id); + printk("Domain[%d].%d: initializing\n", d->domain_id, v->vcpu_id); + + if (d->arch.htab.order == 0) + panic("Page table never allocated for Domain: %d\n", d->domain_id); + if (d->arch.rma_order == 0) + panic("RMA never allocated for Domain: %d\n", d->domain_id); + + d->shared_info->wc_sec = dom0->shared_info->wc_sec; + d->shared_info->wc_nsec = dom0->shared_info->wc_nsec; + d->shared_info->arch.boot_timebase = dom0->shared_info->arch.boot_timebase; set_bit(_VCPUF_initialised, &v->vcpu_flags); cpu_init_vcpu(v); + return 0; +} + +int arch_vcpu_reset(struct vcpu *v) +{ + panic("%s: called for Dom%d[%d]\n", + __func__, v->domain->domain_id, v->vcpu_id); return 0; } diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/domain_build.c --- a/xen/arch/powerpc/domain_build.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/domain_build.c Thu Feb 15 14:09:39 2007 -0700 @@ -20,19 +20,18 @@ #include <xen/config.h> #include <xen/lib.h> -#include <xen/elf.h> #include <xen/sched.h> #include <xen/init.h> #include <xen/ctype.h> #include <xen/iocap.h> #include <xen/shadow.h> +#include <xen/domain.h> #include <xen/version.h> #include <asm/processor.h> #include <asm/papr.h> +#include <public/arch-powerpc.h> +#include <public/libelf.h> #include "oftree.h" - -extern int parseelfimage_32(struct domain_setup_info *dsi); -extern int loadelfimage_32(struct domain_setup_info *dsi); /* opt_dom0_mem: memory allocated to domain 0. */ static unsigned int dom0_nrpages; @@ -51,63 +50,18 @@ static unsigned int opt_dom0_shadow; static unsigned int opt_dom0_shadow; boolean_param("dom0_shadow", opt_dom0_shadow); -int elf_sanity_check(const Elf_Ehdr *ehdr) -{ - if (IS_ELF(*ehdr)) - /* we are happy with either */ - if ((ehdr->e_ident[EI_CLASS] == ELFCLASS32 - && ehdr->e_machine == EM_PPC) - || (ehdr->e_ident[EI_CLASS] == ELFCLASS64 - && ehdr->e_machine == EM_PPC64)) { - if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB - && ehdr->e_type == ET_EXEC) - return 1; - } - printk("DOM0 image is not a Xen-compatible Elf image.\n"); - return 0; -} - /* adapted from common/elf.c */ #define RM_MASK(a,l) ((a) & ((1UL << (l)) - 1)) - -static int rm_loadelfimage_64(struct domain_setup_info *dsi, ulong rma) -{ - char *elfbase = (char *)dsi->image_addr; - Elf64_Ehdr *ehdr = (Elf64_Ehdr *)dsi->image_addr; - Elf64_Phdr *phdr; - int h; - - for (h = 0; h < ehdr->e_phnum; h++ ) - { - phdr = (Elf64_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); - if (!((phdr->p_type == PT_LOAD) && - ((phdr->p_flags & (PF_W|PF_X)) != 0))) - continue; - - if (phdr->p_filesz != 0) - memcpy((char *)(rma + RM_MASK(phdr->p_paddr, 42)), - elfbase + phdr->p_offset, - phdr->p_filesz); - if (phdr->p_memsz > phdr->p_filesz) - memset((char *)(rma + RM_MASK(phdr->p_paddr, 42) + phdr->p_filesz), - 0, phdr->p_memsz - phdr->p_filesz); - } - -#ifdef NOT_YET - loadelfsymtab(dsi, 1); -#endif - - return 0; -} int construct_dom0(struct domain *d, unsigned long image_start, unsigned long image_len, unsigned long initrd_start, unsigned long initrd_len, char *cmdline) { + struct elf_binary elf; + struct elf_dom_parms parms; int rc; struct vcpu *v = d->vcpu[0]; - struct domain_setup_info dsi; ulong dst; u64 *ofh_tree; uint rma_nrpages = 1 << d->arch.rma_order; @@ -115,11 +69,8 @@ int construct_dom0(struct domain *d, ulong rma = page_to_maddr(d->arch.rma_page); start_info_t *si; ulong eomem; - int am64 = 1; int preempt = 0; - ulong msr; - ulong pc; - ulong r2; + int vcpu; /* Sanity! */ BUG_ON(d->domain_id != 0); @@ -130,26 +81,27 @@ int construct_dom0(struct domain *d, cpu_init_vcpu(v); - memset(&dsi, 0, sizeof(struct domain_setup_info)); - dsi.image_addr = image_start; - dsi.image_len = image_len; - - printk("Trying Dom0 as 64bit ELF\n"); - if ((rc = parseelfimage(&dsi)) != 0) { - printk("Trying Dom0 as 32bit ELF\n"); - if ((rc = parseelfimage_32(&dsi)) != 0) - return rc; - am64 = 0; - } + printk("*** LOADING DOMAIN 0 ***\n"); + + rc = elf_init(&elf, (void *)image_start, image_len); + if (rc) + return rc; +#ifdef VERBOSE + elf_set_verbose(&elf); +#endif + elf_parse_binary(&elf); + if (0 != (elf_xen_parse(&elf, &parms))) + return rc; + + printk("Dom0 kernel: %s, paddr 0x%" PRIx64 " -> 0x%" PRIx64 "\n", + elf_64bit(&elf) ? "64-bit" : "32-bit", + elf.pstart, elf.pend); /* elf contains virtual addresses that can have the upper bits * masked while running in real mode, so we do the masking as well * as well */ - dsi.v_kernstart = RM_MASK(dsi.v_kernstart, 42); - dsi.v_kernend = RM_MASK(dsi.v_kernend, 42); - dsi.v_kernentry = RM_MASK(dsi.v_kernentry, 42); - - printk("*** LOADING DOMAIN 0 ***\n"); + parms.virt_kend = RM_MASK(parms.virt_kend, 42); + parms.virt_entry = RM_MASK(parms.virt_entry, 42); /* By default DOM0 is allocated all available memory. */ d->max_pages = ~0U; @@ -210,6 +162,27 @@ int construct_dom0(struct domain *d, /* put stack below everything */ v->arch.ctxt.gprs[1] = dst - STACK_FRAME_OVERHEAD; + /* startup secondary processors */ + if ( opt_dom0_max_vcpus == 0 ) + opt_dom0_max_vcpus = num_online_cpus(); + if ( opt_dom0_max_vcpus > num_online_cpus() ) + opt_dom0_max_vcpus = num_online_cpus(); + if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) + opt_dom0_max_vcpus = MAX_VIRT_CPUS; +#ifdef BITS_PER_GUEST_LONG + if ( opt_dom0_max_vcpus > BITS_PER_GUEST_LONG(d) ) + opt_dom0_max_vcpus = BITS_PER_GUEST_LONG(d); +#endif + printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); + + for (vcpu = 1; vcpu < opt_dom0_max_vcpus; vcpu++) { + if (NULL == alloc_vcpu(dom0, vcpu, vcpu)) + panic("Error creating domain 0 vcpu %d\n", vcpu); + /* for now we pin Dom0 VCPUs to their coresponding CPUs */ + if (cpu_isset(vcpu, cpu_online_map)) + dom0->vcpu[vcpu]->cpu_affinity = cpumask_of_cpu(vcpu); + } + /* copy relative to Xen */ dst += rma; @@ -229,75 +202,56 @@ int construct_dom0(struct domain *d, printk("loading OFD: 0x%lx RMA: 0x%lx, 0x%lx\n", dst, dst - rma, oftree_len); memcpy((void *)dst, (void *)oftree, oftree_len); - dst = ALIGN_UP(dst + oftree_len, PAGE_SIZE); - if (am64) { - ulong kbase; - ulong *fdesc; - - printk("loading 64-bit Dom0: 0x%lx, in RMA:0x%lx\n", dst, dst - rma); - rm_loadelfimage_64(&dsi, dst); - - kbase = dst; - /* move dst to end of bss */ - dst = ALIGN_UP(dsi.v_kernend + dst, PAGE_SIZE); - - if ( initrd_len > 0 ) { - ASSERT( (dst - rma) + image_len < eomem ); - - printk("loading initrd: 0x%lx, 0x%lx\n", dst, initrd_len); - memcpy((void *)dst, (void *)initrd_start, initrd_len); - - si->mod_start = dst - rma; - si->mod_len = image_len; - - dst = ALIGN_UP(dst + initrd_len, PAGE_SIZE); - } else { - printk("no initrd\n"); - si->mod_start = 0; - si->mod_len = 0; - } - /* it may be a function descriptor */ - fdesc = (ulong *)(dsi.v_kernstart + dsi.v_kernentry + kbase); - - if (fdesc[2] == 0 - && ((fdesc[0] >= dsi.v_kernstart) - && (fdesc[0] < dsi.v_kernend)) /* text entry is in range */ - && ((fdesc[1] >= dsi.v_kernstart) /* toc can be > image */ - && (fdesc[1] < (dsi.v_kernend + (0x7fff * sizeof (ulong)))))) { - /* it is almost certainly a function descriptor */ - pc = RM_MASK(fdesc[0], 42) + kbase - rma; - r2 = RM_MASK(fdesc[1], 42) + kbase - rma; - } else { - pc = ((ulong)fdesc) - rma; - r2 = 0; - } - msr = MSR_SF; + /* Load the dom0 kernel. */ + elf.dest = (void *)dst; + elf_load_binary(&elf); + v->arch.ctxt.pc = dst - rma; + dst = ALIGN_UP(dst + parms.virt_kend, PAGE_SIZE); + + /* Load the initrd. */ + if (initrd_len > 0) { + ASSERT((dst - rma) + image_len < eomem); + + printk("loading initrd: 0x%lx, 0x%lx\n", dst, initrd_len); + memcpy((void *)dst, (void *)initrd_start, initrd_len); + + si->mod_start = dst - rma; + si->mod_len = image_len; + + dst = ALIGN_UP(dst + initrd_len, PAGE_SIZE); } else { - printk("loading 32-bit Dom0: 0x%lx, in RMA:0x%lx\n", - dsi.v_kernstart + rma, dsi.v_kernstart); - dsi.v_start = rma; - loadelfimage_32(&dsi); - - pc = dsi.v_kernentry; - r2 = 0; - msr = 0; - } - + printk("no initrd\n"); + si->mod_start = 0; + si->mod_len = 0; + } + + if (elf_64bit(&elf)) { + v->arch.ctxt.msr = MSR_SF; + } else { + v->arch.ctxt.msr = 0; + } + v->arch.ctxt.gprs[2] = 0; v->arch.ctxt.gprs[3] = si->mod_start; v->arch.ctxt.gprs[4] = si->mod_len; + + printk("dom0 initial register state:\n" + " pc %016lx msr %016lx\n" + " r1 %016lx r2 %016lx r3 %016lx\n" + " r4 %016lx r5 %016lx\n", + v->arch.ctxt.pc, + v->arch.ctxt.msr, + v->arch.ctxt.gprs[1], + v->arch.ctxt.gprs[2], + v->arch.ctxt.gprs[3], + v->arch.ctxt.gprs[4], + v->arch.ctxt.gprs[5]); memset(si->cmd_line, 0, sizeof(si->cmd_line)); if ( cmdline != NULL ) strlcpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line)); - v->arch.ctxt.msr = msr; - v->arch.ctxt.pc = pc; - v->arch.ctxt.gprs[2] = r2; - - printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2); - ofd_dom0_fixup(d, *ofh_tree + rma, si); set_bit(_VCPUF_initialised, &v->vcpu_flags); diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/elf32.c --- a/xen/arch/powerpc/elf32.c Thu Feb 15 13:13:36 2007 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -#define parseelfimage parseelfimage_32 -#define loadelfimage loadelfimage_32 -#define xen_elfnote_string xen_elfnote_string32 -#define xen_elfnote_numeric xen_elfnote_numeric32 -#define ELFSIZE 32 -#include "../../common/elf.c" - diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/hcalls.c --- a/xen/arch/powerpc/hcalls.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/hcalls.c Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright IBM Corp. 2005, 2006, 2007 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> */ @@ -130,7 +130,7 @@ static void register_papr_hcall(ulong nu static void init_papr_hcalls(void) { - inithcall_t *hcall; + init_hcall_t *hcall; int i; /* initialize PAPR hcall table */ @@ -140,7 +140,7 @@ static void init_papr_hcalls(void) register_papr_hcall(i, do_ni_papr_hypercall); /* register the PAPR hcalls */ - for (hcall = &__inithcall_start; hcall < &__inithcall_end; hcall++) { + for (hcall = &__init_hcall_start; hcall < &__init_hcall_end; hcall++) { register_papr_hcall(hcall->number, hcall->handler); } } diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/memory.c --- a/xen/arch/powerpc/memory.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/memory.c Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006, 2007 * * Authors: Dan Poff <poff@xxxxxxxxxx> * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> @@ -25,7 +25,7 @@ #include "oftree.h" #include "rtas.h" -#undef DEBUG +#define DEBUG #ifdef DEBUG #define DBG(fmt...) printk(fmt) #else @@ -42,8 +42,6 @@ unsigned long xenheap_phys_end; unsigned long xenheap_phys_end; static uint nr_pages; static ulong xenheap_size; -static ulong save_start; -static ulong save_end; struct membuf { ulong start; @@ -51,30 +49,6 @@ struct membuf { }; typedef void (*walk_mem_fn)(struct membuf *, uint); - -static ulong free_xenheap(ulong start, ulong end) -{ - start = ALIGN_UP(start, PAGE_SIZE); - end = ALIGN_DOWN(end, PAGE_SIZE); - - DBG("%s: 0x%lx - 0x%lx\n", __func__, start, end); - - /* need to do this better */ - if (save_start <= end && save_start >= start) { - DBG("%s: Go around the saved area: 0x%lx - 0x%lx\n", - __func__, save_start, save_end); - init_xenheap_pages(start, ALIGN_DOWN(save_start, PAGE_SIZE)); - xenheap_size += ALIGN_DOWN(save_start, PAGE_SIZE) - start; - - init_xenheap_pages(ALIGN_UP(save_end, PAGE_SIZE), end); - xenheap_size += end - ALIGN_UP(save_end, PAGE_SIZE); - } else { - init_xenheap_pages(start, end); - xenheap_size += end - start; - } - - return ALIGN_UP(end, PAGE_SIZE); -} static void set_max_page(struct membuf *mb, uint entries) { @@ -113,6 +87,7 @@ static void heap_init(struct membuf *mb, start_blk = xenheap_phys_end; } + DBG("boot free: %016lx - %016lx\n", start_blk, end_blk); init_boot_pages(start_blk, end_blk); total_pages += (end_blk - start_blk) >> PAGE_SHIFT; } @@ -141,72 +116,31 @@ static void ofd_walk_mem(void *m, walk_m } } -static void setup_xenheap(module_t *mod, int mcount) -{ - int i; - ulong freemem; - - freemem = ALIGN_UP((ulong)_end, PAGE_SIZE); - - for (i = 0; i < mcount; i++) { - u32 s; - - if (mod[i].mod_end == mod[i].mod_start) - continue; - - s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE); - - if (mod[i].mod_start > (ulong)_start && - mod[i].mod_start < (ulong)_end) { - /* mod was linked in */ - continue; - } - - if (s < freemem) - panic("module addresses must assend\n"); - - free_xenheap(freemem, s); - freemem = ALIGN_UP(mod[i].mod_end, PAGE_SIZE); - - } - - /* the rest of the xenheap, starting at the end of modules */ - free_xenheap(freemem, xenheap_phys_end); -} - void memory_init(module_t *mod, int mcount) { ulong eomem; - ulong heap_start; + ulong bitmap_start = ~0UL; + ulong bitmap_end = 0; + ulong bitmap_size; ulong xh_pages; + ulong start; + ulong end; + int pos; /* lets find out how much memory there is and set max_page */ max_page = 0; printk("Physical RAM map:\n"); ofd_walk_mem((void *)oftree, set_max_page); eomem = max_page << PAGE_SHIFT; - - if (eomem == 0){ + if (eomem == 0) { panic("ofd_walk_mem() failed\n"); } - /* find the portion of memory we need to keep safe */ - save_start = oftree; - save_end = oftree_end; - if (rtas_base) { - if (save_start > rtas_base) - save_start = rtas_base; - if (save_end < rtas_end) - save_end = rtas_end; - } - - /* minimum heap has to reach to the end of all Xen required memory */ - xh_pages = ALIGN_UP(save_end, PAGE_SIZE) >> PAGE_SHIFT; - xh_pages += opt_xenheap_megabytes << (20 - PAGE_SHIFT); + xh_pages = opt_xenheap_megabytes << (20 - PAGE_SHIFT); /* While we are allocating HTABS from The Xen Heap we need it to * be larger */ - xh_pages += nr_pages >> 5; + xh_pages += nr_pages >> 5; xenheap_phys_end = xh_pages << PAGE_SHIFT; printk("End of Xen Area: %luMiB (%luKiB)\n", @@ -214,17 +148,20 @@ void memory_init(module_t *mod, int mcou printk("End of RAM: %luMiB (%luKiB)\n", eomem >> 20, eomem >> 10); - /* Architecturally the first 4 pages are exception hendlers, we - * will also be copying down some code there */ - heap_start = 4 << PAGE_SHIFT; - if (oftree < (ulong)_start) - heap_start = ALIGN_UP(oftree_end, PAGE_SIZE); - - heap_start = init_boot_allocator(heap_start); - if (heap_start > (ulong)_start) { - panic("space below _start (%p) is not enough memory " - "for heap (0x%lx)\n", _start, heap_start); - } + /* The boot allocator requires one bit per page. Find a spot for it. */ + bitmap_size = max_page / 8; + pos = boot_of_mem_avail(0, &start, &end); + while (pos >= 0) { + if (end - start >= bitmap_size) { + bitmap_start = start; + bitmap_end = init_boot_allocator(bitmap_start); + printk("boot allocator @ %lx - %lx\n", bitmap_start, bitmap_end); + break; + } + pos = boot_of_mem_avail(pos, &start, &end); + } + if (bitmap_start == ~0UL) + panic("Couldn't find 0x%lx bytes for boot allocator.", bitmap_size); /* allow everything else to be allocated */ total_pages = 0; @@ -242,12 +179,39 @@ void memory_init(module_t *mod, int mcou numa_initmem_init(0, max_page); + /* Domain heap gets all the unclaimed memory. */ end_boot_allocator(); - /* Add memory between the beginning of the heap and the beginning - * of our text */ - free_xenheap(heap_start, (ulong)_start); - setup_xenheap(mod, mcount); + /* Create initial xen heap by finding non-reserved memory. */ + pos = boot_of_mem_avail(0, &start, &end); + while (pos >= 0) { + if (end == ~0UL) + end = xenheap_phys_end; + + /* Problem: the bitmap itself is not reserved. */ + if ((start >= bitmap_start) && (start < bitmap_end)) { + /* Start is inside bitmap. */ + start = bitmap_end; + } + if ((end > bitmap_start) && (end <= bitmap_end)) { + /* End is inside bitmap. */ + end = bitmap_start; + } + if ((start < bitmap_start) && (end > bitmap_end)) { + /* Range encompasses bitmap. First free low part, then high. */ + xenheap_size += bitmap_start - start; + DBG("xenheap: %016lx - %016lx\n", start, bitmap_start); + init_xenheap_pages(start, bitmap_start); + start = bitmap_end; + } + + xenheap_size += end - start; + DBG("xenheap: %016lx - %016lx\n", start, end); + init_xenheap_pages(start, end); + + pos = boot_of_mem_avail(pos, &start, &end); + } + printk("Xen Heap: %luMiB (%luKiB)\n", xenheap_size >> 20, xenheap_size >> 10); diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/mm.c --- a/xen/arch/powerpc/mm.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/mm.c Thu Feb 15 14:09:39 2007 -0700 @@ -28,6 +28,7 @@ #include <asm/init.h> #include <asm/page.h> #include <asm/string.h> +#include <public/arch-powerpc.h> #ifdef VERBOSE #define MEM_LOG(_f, _a...) \ diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/of-devtree.c --- a/xen/arch/powerpc/of-devtree.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/of-devtree.c Thu Feb 15 14:09:39 2007 -0700 @@ -358,7 +358,7 @@ static ofdn_t ofd_node_create( n->on_io = 0; n->on_pathlen = pathlen; n->on_last = ofd_pathsplit_left(path, '/', pathlen); - strlcpy(n->on_path, path, pathlen); + strlcpy(n->on_path, path, pathlen + 1); return pos; } diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/of-devtree.h --- a/xen/arch/powerpc/of-devtree.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/of-devtree.h Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright IBM Corp. 2005, 2006, 2007 * * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ @@ -23,6 +23,7 @@ #include <xen/types.h> #include <xen/string.h> +#include <xen/kernel.h> #include <public/xen.h> enum { diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/of_handler/Makefile --- a/xen/arch/powerpc/of_handler/Makefile Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/of_handler/Makefile Thu Feb 15 14:09:39 2007 -0700 @@ -27,5 +27,5 @@ obj-y += strcmp.o obj-y += strcmp.o obj-y += strlen.o obj-y += strncmp.o -obj-y += strncpy.o +obj-y += strlcpy.o obj-y += strnlen.o diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/of_handler/strlcpy.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/powerpc/of_handler/strlcpy.c Thu Feb 15 14:09:39 2007 -0700 @@ -0,0 +1,58 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2005, 2007 + * + * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> + * Hollis Blanchard <hollisb@xxxxxxxxxx> + */ + +#include <xen/string.h> + +size_t +strlcpy(char *dest, const char *src, size_t n) +{ + size_t ret; + char *dp; + + /* cases to consider: + * dest is NULL, s is NULL; + * src is empty (0); + * src is not empty, less than n; + * src is not empty, equal to n; + * src is not empty, greater than n; + */ + + if (n <= 0) { + return 0; + } + + dp = dest; + + do { + *dp++ = *src; + --n; + ++src; + } while ((*src != '\0') && (n > 1)); + + ret = n; + + /* clear remainder of buffer (if any); ANSI semantics */ + while (n > 0) { + *dp++ = '\0'; + --n; + } + return ret; +} diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/of_handler/strncpy.c --- a/xen/arch/powerpc/of_handler/strncpy.c Thu Feb 15 13:13:36 2007 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (C) IBM Corp. 2005 - * - * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> - */ - -#include <xen/string.h> - -char * -strncpy(char *dest, const char *src, size_t n) -{ - char *dp; - - /* cases to consider: - * dest is NULL, s is NULL; - * src is empty (0); - * src is not empty, less than n; - * src is not empty, equal to n; - * src is not empty, greater than n; - */ - - if (n <= 0) { - return dest; - } - - dp = dest; - - do { - *dp++ = *src; - --n; - ++src; - } while ((*src != '\0') && (n > 0)); - - /* clear remainder of buffer (if any); ANSI semantics */ - while (n > 0) { - *dp++ = '\0'; - --n; - } - return dest; -} diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/ofd_fixup.c --- a/xen/arch/powerpc/ofd_fixup.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/ofd_fixup.c Thu Feb 15 14:09:39 2007 -0700 @@ -178,11 +178,20 @@ static ofdn_t ofd_cpus_props(void *m, st if (ofd_boot_cpu == -1) ofd_boot_cpu = c; while (c > 0) { - /* Since we are not MP yet we prune all but the booting cpu */ + /* We do not use the OF tree to identify secondary processors + * so we must prune them from the tree */ if (c == ofd_boot_cpu) { + ofdn_t p; + ibm_pft_size[1] = d->arch.htab.log_num_ptes + LOG_PTE_SIZE; ofd_prop_add(m, c, "ibm,pft-size", ibm_pft_size, sizeof (ibm_pft_size)); + + /* get rid of non-standard properties */ + p = ofd_prop_find(m, c, "cpu#"); + if (p > 0) { + ofd_prop_remove(m, c, p); + } /* FIXME: Check the the "l2-cache" property who's * contents is an orphaned phandle? */ diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/papr/xlate.c --- a/xen/arch/powerpc/papr/xlate.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/papr/xlate.c Thu Feb 15 14:09:39 2007 -0700 @@ -72,6 +72,20 @@ static inline void pte_insert(union pte } #endif +/* + * POWER Arch 2.03 Sec 4.12.1 (Yes 970 is one) + * + * when a tlbsync instruction has been executed by a processor in a + * given partition, a ptesync instruction must be executed by that + * processor before a tlbie or tlbsync instruction is executed by + * another processor in that partition. + * + * So for now, here is a BFLock to deal with it, the lock should be per-domain. + * + * XXX Will need to audit all tlb usege soon enough. + */ + +static DEFINE_SPINLOCK(native_tlbie_lock); static void pte_tlbie(union pte volatile *pte, ulong ptex) { ulong va; @@ -91,6 +105,7 @@ static void pte_tlbie(union pte volatile va = (pi << 12) | (vsid << 28); va &= ~(0xffffULL << 48); + spin_lock(&native_tlbie_lock); #ifndef FLUSH_THE_WHOLE_THING if (pte->bits.l) { va |= (pte->bits.rpn & 1); @@ -114,7 +129,7 @@ static void pte_tlbie(union pte volatile } } #endif - + spin_unlock(&native_tlbie_lock); } long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn) diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/powerpc64/asm-offsets.c --- a/xen/arch/powerpc/powerpc64/asm-offsets.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/powerpc64/asm-offsets.c Thu Feb 15 14:09:39 2007 -0700 @@ -48,6 +48,8 @@ void __dummy__(void) OFFSET(UREGS_ctr, struct cpu_user_regs, ctr); OFFSET(UREGS_xer, struct cpu_user_regs, xer); OFFSET(UREGS_hid4, struct cpu_user_regs, hid4); + OFFSET(UREGS_dar, struct cpu_user_regs, dar); + OFFSET(UREGS_dsisr, struct cpu_user_regs, dsisr); OFFSET(UREGS_cr, struct cpu_user_regs, cr); OFFSET(UREGS_fpscr, struct cpu_user_regs, fpscr); DEFINE(UREGS_sizeof, sizeof(struct cpu_user_regs)); diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/powerpc64/exceptions.S --- a/xen/arch/powerpc/powerpc64/exceptions.S Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/powerpc64/exceptions.S Thu Feb 15 14:09:39 2007 -0700 @@ -373,9 +373,15 @@ ex_machcheck_continued: * a better way, but this works for now. */ ex_program_continued: SAVE_GPRS r14, r31, r1 /* save all the non-volatiles */ - /* save hid4 for debug */ + + /* save these for debug, no needed for restore */ mfspr r14, SPRN_HID4 std r14, UREGS_hid4(r1) + mfdar r14 + std r14, UREGS_dar(r1) + mfdsisr r14 + stw r14, UREGS_dsisr(r1) + mr r14, r0 EXCEPTION_SAVE_STATE r1 mr r4, r14 diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/powerpc64/ppc970.c --- a/xen/arch/powerpc/powerpc64/ppc970.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/powerpc64/ppc970.c Thu Feb 15 14:09:39 2007 -0700 @@ -129,7 +129,6 @@ unsigned int cpu_extent_order(void) return log_large_page_sizes[0] - PAGE_SHIFT; } - /* This is more a platform thing than a CPU thing, but we only have * one platform now */ int cpu_io_mfn(ulong mfn) @@ -141,6 +140,12 @@ int cpu_io_mfn(ulong mfn) return 0; } + +int cpu_threads(int cpuid) +{ + return 1; +} + static u64 cpu0_hids[6]; static u64 cpu0_hior; diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/powerpc64/ppc970_scom.c --- a/xen/arch/powerpc/powerpc64/ppc970_scom.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/powerpc64/ppc970_scom.c Thu Feb 15 14:09:39 2007 -0700 @@ -158,7 +158,7 @@ void cpu_scom_init(void) { #ifdef CONFIG_SCOM ulong val; - if (PVR_REV(mfpvr()) == 0x0300) { + if (PVR_REV(mfpvr()) == PV_970FX) { /* these address are only good for 970FX */ console_start_sync(); if (!cpu_scom_read(SCOM_PTSR, &val)) @@ -174,7 +174,7 @@ void cpu_scom_AMCR(void) #ifdef CONFIG_SCOM ulong val; - if (PVR_REV(mfpvr()) == 0x0300) { + if (PVR_REV(mfpvr()) == PV_970FX) { /* these address are only good for 970FX */ cpu_scom_read(SCOM_AMC_REG, &val); printk("SCOM AMCR: 0x%016lx\n", val); diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/powerpc64/traps.c --- a/xen/arch/powerpc/powerpc64/traps.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/powerpc64/traps.c Thu Feb 15 14:09:39 2007 -0700 @@ -41,7 +41,15 @@ void show_registers(struct cpu_user_regs regs->pc, regs->msr, regs->lr, regs->ctr, regs->srr0, regs->srr1); - for (i=0; i<32; i+=4) { + + /* These come in handy for debugging but are not always saved, so + * what is "actually" in the register should be good */ + printk("dar %016lx dsisr %08x *** saved\n" + "dar %016lx dsisr %08x *** actual\n", + regs->dar, regs->dsisr, + mfdar(), mfdsisr()); + + for (i = 0; i < 32; i += 4) { printk("r%02i: %016lx %016lx %016lx %016lx\n", i, regs->gprs[i], regs->gprs[i+1], regs->gprs[i+2], regs->gprs[i+3]); } diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/setup.c --- a/xen/arch/powerpc/setup.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/setup.c Thu Feb 15 14:09:39 2007 -0700 @@ -36,6 +36,7 @@ #include <xen/symbols.h> #include <xen/keyhandler.h> #include <xen/numa.h> +#include <xen/rcupdate.h> #include <acm/acm_hooks.h> #include <public/version.h> #include <asm/mpic.h> @@ -166,9 +167,6 @@ static void __init start_of_day(void) set_current(idle_domain->vcpu[0]); idle_vcpu[0] = current; - /* for some reason we need to set our own bit in the thread map */ - cpu_set(0, cpu_sibling_map[0]); - initialize_keytable(); /* Register another key that will allow for the the Harware Probe * to be contacted, this works with RiscWatch probes and should @@ -179,6 +177,7 @@ static void __init start_of_day(void) register_keyhandler('D', key_ofdump , "Dump OF Devtree"); timer_init(); + rcu_init(); serial_init_postirq(); do_initcalls(); } @@ -234,6 +233,21 @@ static int kick_secondary_cpus(int maxcp int cpuid; for_each_present_cpu(cpuid) { + int threads; + int i; + + threads = cpu_threads(cpuid); + for (i = 0; i < threads; i++) + cpu_set(i, cpu_sibling_map[cpuid]); + + /* For now everything is single core */ + cpu_set(cpuid, cpu_core_map[cpuid]); + + rcu_online_cpu(cpuid); + + numa_set_node(cpuid, 0); + numa_add_cpu(cpuid); + if (cpuid == 0) continue; if (cpuid >= maxcpus) @@ -244,9 +258,6 @@ static int kick_secondary_cpus(int maxcp /* wait for it */ while (!cpu_online(cpuid)) cpu_relax(); - - numa_set_node(cpuid, 0); - numa_add_cpu(cpuid); } return 0; diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/sysctl.c --- a/xen/arch/powerpc/sysctl.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/sysctl.c Thu Feb 15 14:09:39 2007 -0700 @@ -41,9 +41,13 @@ long arch_do_sysctl(struct xen_sysctl *s { xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo; - pi->threads_per_core = 1; - pi->cores_per_socket = 1; - pi->sockets_per_node = 1; + pi->threads_per_core = + cpus_weight(cpu_sibling_map[0]); + pi->cores_per_socket = + cpus_weight(cpu_core_map[0]) / pi->threads_per_core; + pi->sockets_per_node = + num_online_cpus() / cpus_weight(cpu_core_map[0]); + pi->nr_nodes = 1; pi->total_pages = total_pages; pi->free_pages = avail_domheap_pages(); diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/time.c --- a/xen/arch/powerpc/time.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/time.c Thu Feb 15 14:09:39 2007 -0700 @@ -85,12 +85,6 @@ void send_timer_event(struct vcpu *v) vcpu_unblock(v); } -/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ -void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) -{ - unimplemented(); -} - void update_vcpu_system_time(struct vcpu *v) { } diff -r ac18d251df63 -r 9529d667d042 xen/arch/powerpc/xen.lds.S --- a/xen/arch/powerpc/xen.lds.S Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/powerpc/xen.lds.S Thu Feb 15 14:09:39 2007 -0700 @@ -17,6 +17,8 @@ SECTIONS SECTIONS { . = 0x00400000; + PROVIDE(_text = .); + PROVIDE(_stext = .); /* Read-only sections, merged into text segment: */ .interp : { *(.interp) } :text .hash : { *(.hash) } @@ -111,17 +113,26 @@ SECTIONS SORT(CONSTRUCTORS) } + . = ALIGN(4096); + __init_begin = .; + _sinittext = .; + .init.text : { *(.init.text) } : text + _einittext = .; + .init.data : { *(.init.data) } : text . = ALIGN(32); __setup_start = .; - .init.setup : { *(.init.setup) } + .init.setup : { *(.init.setup) } : text __setup_end = .; __initcall_start = .; - .initcall.init : { *(.initcall1.init) } + .initcall.init : { *(.initcall1.init) } : text __initcall_end = .; - __inithcall_start = .; - .inithcall.text : { *(.inithcall.text) } - __inithcall_end = .; - + __init_hcall_start = .; + .init_hcall.init : { *(.init_hcall.init) } : text + __init_hcall_end = .; + __builtin_cmdline : { *(__builtin_cmdline) } : text + . = ALIGN(4096); + __init_end = .; + __per_cpu_start = .; .data.percpu : { *(.data.percpu) } __per_cpu_data_end = .; diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/domain.c Thu Feb 15 14:09:39 2007 -0700 @@ -37,7 +37,7 @@ #include <asm/i387.h> #include <asm/mpspec.h> #include <asm/ldt.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/msr.h> @@ -331,6 +331,7 @@ int vcpu_initialise(struct vcpu *v) pae_l3_cache_init(&v->arch.pae_l3_cache); + paging_vcpu_init(v); if ( is_hvm_domain(d) ) { @@ -424,7 +425,7 @@ int arch_domain_create(struct domain *d) HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START; #endif - shadow_domain_init(d); + paging_domain_init(d); if ( !is_idle_domain(d) ) { @@ -464,7 +465,7 @@ void arch_domain_destroy(struct domain * hvm_domain_destroy(d); } - shadow_final_teardown(d); + paging_final_teardown(d); free_xenheap_pages( d->arch.mm_perdomain_pt, @@ -613,7 +614,7 @@ int arch_set_info_guest( { cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); - if ( shadow_mode_refcounts(d) + if ( paging_mode_refcounts(d) ? !get_page(mfn_to_page(cr3_pfn), d) : !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_base_page_table) ) @@ -631,7 +632,7 @@ int arch_set_info_guest( cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); - if ( shadow_mode_refcounts(d) + if ( paging_mode_refcounts(d) ? !get_page(mfn_to_page(cr3_pfn), d) : !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_l3_page_table) ) @@ -652,8 +653,8 @@ int arch_set_info_guest( /* Don't redo final setup */ set_bit(_VCPUF_initialised, &v->vcpu_flags); - if ( shadow_mode_enabled(d) ) - shadow_update_paging_modes(v); + if ( paging_mode_enabled(d) ) + paging_update_paging_modes(v); update_cr3(v); @@ -1406,7 +1407,7 @@ static void vcpu_destroy_pagetables(stru if ( pfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); @@ -1427,7 +1428,7 @@ static void vcpu_destroy_pagetables(stru pfn = pagetable_get_pfn(v->arch.guest_table); if ( pfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); @@ -1443,7 +1444,7 @@ static void vcpu_destroy_pagetables(stru pfn = pagetable_get_pfn(v->arch.guest_table_user); if ( pfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(pfn)); else put_page_and_type(mfn_to_page(pfn)); @@ -1464,8 +1465,8 @@ void domain_relinquish_resources(struct for_each_vcpu ( d, v ) vcpu_destroy_pagetables(v); - /* Tear down shadow mode stuff. */ - shadow_teardown(d); + /* Tear down paging-assistance stuff. */ + paging_teardown(d); /* * Relinquish GDT mappings. No need for explicit unmapping of the LDT as @@ -1484,35 +1485,12 @@ void domain_relinquish_resources(struct void arch_dump_domain_info(struct domain *d) { - if ( shadow_mode_enabled(d) ) - { - printk(" shadow mode: "); - if ( d->arch.shadow.mode & SHM2_enable ) - printk("enabled "); - if ( shadow_mode_refcounts(d) ) - printk("refcounts "); - if ( shadow_mode_log_dirty(d) ) - printk("log_dirty "); - if ( shadow_mode_translate(d) ) - printk("translate "); - if ( shadow_mode_external(d) ) - printk("external "); - printk("\n"); - } + paging_dump_domain_info(d); } void arch_dump_vcpu_info(struct vcpu *v) { - if ( shadow_mode_enabled(v->domain) ) - { - if ( v->arch.shadow.mode ) - printk(" shadowed %u-on-%u, %stranslated\n", - v->arch.shadow.mode->guest_levels, - v->arch.shadow.mode->shadow_levels, - shadow_vcpu_mode_translate(v) ? "" : "not "); - else - printk(" not shadowed\n"); - } + paging_dump_vcpu_info(v); } /* diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/domain_build.c Thu Feb 15 14:09:39 2007 -0700 @@ -25,7 +25,7 @@ #include <asm/processor.h> #include <asm/desc.h> #include <asm/i387.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <public/version.h> #include <public/libelf.h> @@ -777,8 +777,8 @@ int construct_dom0(struct domain *d, (void)alloc_vcpu(d, i, i); /* Set up CR3 value for write_ptbase */ - if ( shadow_mode_enabled(v->domain) ) - shadow_update_paging_modes(v); + if ( paging_mode_enabled(v->domain) ) + paging_update_paging_modes(v); else update_cr3(v); @@ -918,8 +918,8 @@ int construct_dom0(struct domain *d, regs->eflags = X86_EFLAGS_IF; if ( opt_dom0_shadow ) - if ( shadow_enable(d, SHM2_enable) == 0 ) - shadow_update_paging_modes(v); + if ( paging_enable(d, PG_SH_enable) == 0 ) + paging_update_paging_modes(v); if ( supervisor_mode_kernel ) { diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/domctl.c Thu Feb 15 14:09:39 2007 -0700 @@ -19,7 +19,7 @@ #include <xen/trace.h> #include <xen/console.h> #include <xen/iocap.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/irq.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> @@ -42,7 +42,7 @@ long arch_do_domctl( d = get_domain_by_id(domctl->domain); if ( d != NULL ) { - ret = shadow_domctl(d, + ret = paging_domctl(d, &domctl->u.shadow_op, guest_handle_cast(u_domctl, void)); put_domain(d); @@ -398,6 +398,7 @@ long arch_do_domctl( put_domain(d); } + break; case XEN_DOMCTL_get_address_size: { @@ -411,7 +412,11 @@ long arch_do_domctl( ret = 0; put_domain(d); - } + + if ( copy_to_guest(u_domctl, domctl, 1) ) + ret = -EFAULT; + } + break; default: ret = -ENOSYS; diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/hvm.c Thu Feb 15 14:09:39 2007 -0700 @@ -30,11 +30,10 @@ #include <xen/hypercall.h> #include <xen/guest_access.h> #include <xen/event.h> -#include <xen/shadow.h> #include <asm/current.h> #include <asm/e820.h> #include <asm/io.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> @@ -155,7 +154,7 @@ int hvm_domain_initialise(struct domain spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); - rc = shadow_enable(d, SHM2_refcounts|SHM2_translate|SHM2_external); + rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external); if ( rc != 0 ) return rc; @@ -383,7 +382,7 @@ static int __hvm_copy(void *buf, paddr_t count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); if ( virt ) - mfn = get_mfn_from_gpfn(shadow_gva_to_gfn(current, addr)); + mfn = get_mfn_from_gpfn(paging_gva_to_gfn(current, addr)); else mfn = get_mfn_from_gpfn(addr >> PAGE_SHIFT); @@ -600,7 +599,7 @@ void hvm_do_hypercall(struct cpu_user_re return; } - if ( current->arch.shadow.mode->guest_levels == 4 ) + if ( current->arch.paging.mode->guest_levels == 4 ) { pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi, pregs->rsi, diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/io.c Thu Feb 15 14:09:39 2007 -0700 @@ -32,7 +32,7 @@ #include <asm/processor.h> #include <asm/msr.h> #include <asm/apic.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/vpt.h> diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/platform.c Thu Feb 15 14:09:39 2007 -0700 @@ -21,7 +21,6 @@ #include <xen/config.h> #include <xen/types.h> #include <xen/mm.h> -#include <xen/shadow.h> #include <xen/domain_page.h> #include <asm/page.h> #include <xen/event.h> @@ -29,6 +28,7 @@ #include <xen/sched.h> #include <asm/regs.h> #include <asm/x86_emulate.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/io.h> @@ -690,6 +690,39 @@ static int mmio_decode(int address_bytes } else return DECODE_failure; + case 0xFE: + case 0xFF: + { + unsigned char ins_subtype = (opcode[1] >> 3) & 7; + + if ( opcode[0] == 0xFE ) { + *op_size = BYTE; + GET_OP_SIZE_FOR_BYTE(size_reg); + } else { + GET_OP_SIZE_FOR_NONEBYTE(*op_size); + size_reg = *op_size; + } + + mmio_op->immediate = 1; + mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); + mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); + + switch ( ins_subtype ) { + case 0: /* inc */ + mmio_op->instr = INSTR_ADD; + return DECODE_success; + + case 1: /* dec */ + mmio_op->instr = INSTR_SUB; + return DECODE_success; + + default: + printk("%x/%x, This opcode isn't handled yet!\n", + *opcode, ins_subtype); + return DECODE_failure; + } + } + case 0x0F: break; @@ -809,7 +842,7 @@ void send_pio_req(unsigned long port, un if ( value_is_ptr ) /* get physical address of data */ { if ( hvm_paging_enabled(current) ) - p->data = shadow_gva_to_gpa(current, value); + p->data = paging_gva_to_gpa(current, value); else p->data = value; /* guest VA == guest PA */ } @@ -865,7 +898,7 @@ static void send_mmio_req(unsigned char if ( value_is_ptr ) { if ( hvm_paging_enabled(v) ) - p->data = shadow_gva_to_gpa(v, value); + p->data = paging_gva_to_gpa(v, value); else p->data = value; /* guest VA == guest PA */ } @@ -981,7 +1014,7 @@ void handle_mmio(unsigned long gpa) if ( ad_size == WORD ) addr &= 0xFFFF; addr += hvm_get_segment_base(v, x86_seg_es); - if ( shadow_gva_to_gpa(v, addr) == gpa ) + if ( paging_gva_to_gpa(v, addr) == gpa ) { enum x86_segment seg; diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/svm/intr.c Thu Feb 15 14:09:39 2007 -0700 @@ -24,10 +24,10 @@ #include <xen/lib.h> #include <xen/trace.h> #include <xen/errno.h> -#include <xen/shadow.h> #include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/msr.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/io.h> #include <asm/hvm/support.h> diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Feb 15 14:09:39 2007 -0700 @@ -29,7 +29,8 @@ #include <xen/domain_page.h> #include <asm/current.h> #include <asm/io.h> -#include <asm/shadow.h> +#include <asm/paging.h> +#include <asm/p2m.h> #include <asm/regs.h> #include <asm/cpufeature.h> #include <asm/processor.h> @@ -491,9 +492,6 @@ int svm_vmcb_restore(struct vcpu *v, str v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_svm.cpu_cr3 = c->cr3; } @@ -560,7 +558,7 @@ int svm_vmcb_restore(struct vcpu *v, str vmcb->sysenter_esp = c->sysenter_esp; vmcb->sysenter_eip = c->sysenter_eip; - shadow_update_paging_modes(v); + paging_update_paging_modes(v); return 0; bad_cr3: @@ -1095,7 +1093,7 @@ static int svm_do_page_fault(unsigned lo "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", va, (unsigned long)current->arch.hvm_svm.vmcb->rip, (unsigned long)regs->error_code); - return shadow_fault(va, regs); + return paging_fault(va, regs); } @@ -1730,7 +1728,7 @@ static int svm_set_cr0(unsigned long val v->arch.guest_table = pagetable_from_pfn(mfn); if ( old_base_mfn ) put_page(mfn_to_page(old_base_mfn)); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); @@ -1753,7 +1751,7 @@ static int svm_set_cr0(unsigned long val svm_inject_exception(v, TRAP_gp_fault, 1, 0); return 0; } - shadow_update_paging_modes(v); + paging_update_paging_modes(v); } else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) { @@ -1763,7 +1761,7 @@ static int svm_set_cr0(unsigned long val clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); } /* we should take care of this kind of situation */ - shadow_update_paging_modes(v); + paging_update_paging_modes(v); } return 1; @@ -1866,7 +1864,7 @@ static int mov_to_cr(int gpreg, int cr, mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(v->arch.guest_table)) goto bad_cr3; - shadow_update_cr3(v); + paging_update_cr3(v); } else { @@ -1917,7 +1915,7 @@ static int mov_to_cr(int gpreg, int cr, v->arch.guest_table = pagetable_from_pfn(mfn); if ( old_base_mfn ) put_page(mfn_to_page(old_base_mfn)); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); @@ -1946,7 +1944,7 @@ static int mov_to_cr(int gpreg, int cr, * all TLB entries except global entries. */ if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) - shadow_update_paging_modes(v); + paging_update_paging_modes(v); break; case 8: @@ -2289,7 +2287,7 @@ void svm_handle_invlpg(const short invlp __update_guest_eip (vmcb, inst_len); } - shadow_invlpg(v, g_vaddr); + paging_invlpg(v, g_vaddr); } @@ -2660,7 +2658,7 @@ void walk_shadow_and_guest_pt(unsigned l struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; paddr_t gpa; - gpa = shadow_gva_to_gpa(current, gva); + gpa = paging_gva_to_gpa(current, gva); printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3); if( !svm_paging_enabled(v) || mmio_space(gpa) ) return; @@ -2681,7 +2679,7 @@ void walk_shadow_and_guest_pt(unsigned l shadow_sync_va(v, gva); gpte.l1 = 0; - __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], + __copy_from_user(&gpte, &__linear_l1_table[ l1_linear_offset(gva) ], sizeof(gpte) ); printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) ); @@ -2726,7 +2724,7 @@ asmlinkage void svm_vmexit_handler(struc if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) { if (svm_paging_enabled(v) && - !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2))) + !mmio_space(paging_gva_to_gpa(current, vmcb->exitinfo2))) { printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64"," "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", " @@ -2736,7 +2734,7 @@ asmlinkage void svm_vmexit_handler(struc (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2, (u64)vmcb->exitintinfo.bytes, - (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2)); + (u64)paging_gva_to_gpa(current, vmcb->exitinfo2)); } else { diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Feb 15 14:09:39 2007 -0700 @@ -23,10 +23,10 @@ #include <xen/mm.h> #include <xen/lib.h> #include <xen/errno.h> -#include <xen/shadow.h> #include <asm/cpufeature.h> #include <asm/processor.h> #include <asm/msr.h> +#include <asm/paging.h> #include <asm/hvm/hvm.h> #include <asm/hvm/io.h> #include <asm/hvm/support.h> @@ -196,7 +196,7 @@ static int construct_vmcb(struct vcpu *v read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK; - shadow_update_paging_modes(v); + paging_update_paging_modes(v); vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP; diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/vlapic.c Thu Feb 15 14:09:39 2007 -0700 @@ -22,7 +22,6 @@ #include <xen/types.h> #include <xen/mm.h> #include <xen/xmalloc.h> -#include <xen/shadow.h> #include <xen/domain_page.h> #include <asm/page.h> #include <xen/event.h> diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Feb 15 14:09:39 2007 -0700 @@ -448,7 +448,7 @@ static void construct_vmcs(struct vcpu * vmx_vmcs_exit(v); - shadow_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ + paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ } int vmx_create_vmcs(struct vcpu *v) diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Feb 15 14:09:39 2007 -0700 @@ -35,12 +35,13 @@ #include <asm/types.h> #include <asm/msr.h> #include <asm/spinlock.h> +#include <asm/paging.h> +#include <asm/p2m.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> #include <asm/hvm/vmx/vmx.h> #include <asm/hvm/vmx/vmcs.h> #include <asm/hvm/vmx/cpu.h> -#include <asm/shadow.h> #include <public/sched.h> #include <public/hvm/ioreq.h> #include <asm/hvm/vpic.h> @@ -484,9 +485,6 @@ int vmx_vmcs_restore(struct vcpu *v, str v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_vmx.cpu_cr3 = c->cr3; } @@ -556,7 +554,7 @@ int vmx_vmcs_restore(struct vcpu *v, str vmx_vmcs_exit(v); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); return 0; bad_cr3: @@ -1126,7 +1124,7 @@ static int vmx_do_page_fault(unsigned lo } #endif - result = shadow_fault(va, regs); + result = paging_fault(va, regs); TRACE_VMEXIT(2, result); #if 0 @@ -1277,7 +1275,7 @@ static void vmx_do_invlpg(unsigned long * We do the safest things first, then try to update the shadow * copying from guest */ - shadow_invlpg(v, va); + paging_invlpg(v, va); } @@ -1691,9 +1689,6 @@ static int vmx_world_restore(struct vcpu v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_vmx.cpu_cr3 = c->cr3; } @@ -1753,7 +1748,7 @@ static int vmx_world_restore(struct vcpu __vmwrite(GUEST_LDTR_BASE, c->ldtr_base); __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); return 0; bad_cr3: @@ -1906,14 +1901,11 @@ static int vmx_set_cr0(unsigned long val v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - shadow_update_paging_modes(v); + paging_update_paging_modes(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - /* - * arch->shadow_table should hold the next CR3 for shadow - */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vmx.cpu_cr3, mfn); } @@ -1981,7 +1973,7 @@ static int vmx_set_cr0(unsigned long val vm_entry_value &= ~VM_ENTRY_IA32E_MODE; __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); } - shadow_update_paging_modes(v); + paging_update_paging_modes(v); } return 1; @@ -2070,7 +2062,7 @@ static int mov_to_cr(int gp, int cr, str mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); if (mfn != pagetable_get_pfn(v->arch.guest_table)) goto bad_cr3; - shadow_update_cr3(v); + paging_update_cr3(v); } else { /* * If different, make a shadow. Check if the PDBR is valid @@ -2084,9 +2076,6 @@ static int mov_to_cr(int gp, int cr, str v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); - /* - * arch.shadow_table should now hold the next CR3 for shadow - */ v->arch.hvm_vmx.cpu_cr3 = value; update_cr3(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); @@ -2120,9 +2109,6 @@ static int mov_to_cr(int gp, int cr, str HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", (unsigned long) (mfn << PAGE_SHIFT)); - /* - * arch->shadow_table should hold the next CR3 for shadow - */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vmx.cpu_cr3, mfn); @@ -2148,7 +2134,7 @@ static int mov_to_cr(int gp, int cr, str * all TLB entries except global entries. */ if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) - shadow_update_paging_modes(v); + paging_update_paging_modes(v); break; case 8: diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm.c Thu Feb 15 14:09:39 2007 -0700 @@ -99,6 +99,7 @@ #include <xen/event.h> #include <xen/iocap.h> #include <xen/guest_access.h> +#include <asm/paging.h> #include <asm/shadow.h> #include <asm/page.h> #include <asm/flushtlb.h> @@ -373,9 +374,6 @@ void write_ptbase(struct vcpu *v) /* Should be called after CR3 is updated. * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. * - * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, - * shadow_vtable, etc). - * * Uses values found in vcpu->arch.(guest_table and guest_table_user), and * for HVM guests, arch.monitor_table and hvm's guest CR3. * @@ -385,9 +383,9 @@ void update_cr3(struct vcpu *v) { unsigned long cr3_mfn=0; - if ( shadow_mode_enabled(v->domain) ) - { - shadow_update_cr3(v); + if ( paging_mode_enabled(v->domain) ) + { + paging_update_cr3(v); return; } @@ -615,7 +613,7 @@ get_page_from_l1e( * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ okay = (((l1e_get_flags(l1e) & _PAGE_RW) && - !(unlikely(shadow_mode_external(d) && (d != current->domain)))) + !(unlikely(paging_mode_external(d) && (d != current->domain)))) ? get_page_and_type(page, d, PGT_writable_page) : get_page(page, d)); if ( !okay ) @@ -804,9 +802,9 @@ void put_page_from_l1e(l1_pgentry_t l1e, } /* Remember we didn't take a type-count of foreign writable mappings - * to shadow external domains */ + * to paging-external domains */ if ( (l1e_get_flags(l1e) & _PAGE_RW) && - !(unlikely((e != d) && shadow_mode_external(e))) ) + !(unlikely((e != d) && paging_mode_external(e))) ) { put_page_and_type(page); } @@ -976,6 +974,19 @@ static void pae_flush_pgd( l3_pgentry_t *l3tab_ptr; struct pae_l3_cache *cache; + if ( unlikely(shadow_mode_enabled(d)) ) + { + cpumask_t m = CPU_MASK_NONE; + /* Re-shadow this l3 table on any vcpus that are using it */ + for_each_vcpu ( d, v ) + if ( pagetable_get_pfn(v->arch.guest_table) == mfn ) + { + paging_update_cr3(v); + cpus_or(m, m, v->vcpu_dirty_cpumask); + } + flush_tlb_mask(m); + } + /* If below 4GB then the pgdir is not shadowed in low memory. */ if ( !l3tab_needs_shadow(mfn) ) return; @@ -1259,20 +1270,13 @@ static inline int update_intpte(intpte_t { int rv = 1; #ifndef PTE_UPDATE_WITH_CMPXCHG - if ( unlikely(shadow_mode_enabled(v->domain)) ) - rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); - else - rv = (!__copy_to_user(p, &new, sizeof(new))); + rv = paging_write_guest_entry(v, p, new, _mfn(mfn)); #else { intpte_t t = old; for ( ; ; ) { - if ( unlikely(shadow_mode_enabled(v->domain)) ) - rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); - else - rv = (!cmpxchg_user(p, t, new)); - + rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); if ( unlikely(rv == 0) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte @@ -1310,7 +1314,7 @@ static int mod_l1_entry(l1_pgentry_t *pl if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) return 0; - if ( unlikely(shadow_mode_refcounts(d)) ) + if ( unlikely(paging_mode_refcounts(d)) ) return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) @@ -1572,7 +1576,7 @@ void free_page_type(struct page_info *pa */ queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); - if ( unlikely(shadow_mode_enabled(owner)) ) + if ( unlikely(paging_mode_enabled(owner)) ) { /* A page table is dirtied when its type count becomes zero. */ mark_dirty(owner, page_to_mfn(page)); @@ -1771,7 +1775,7 @@ int new_guest_cr3(unsigned long mfn) #ifdef CONFIG_COMPAT if ( IS_COMPAT(d) ) { - okay = shadow_mode_refcounts(d) + okay = paging_mode_refcounts(d) ? 0 /* Old code was broken, but what should it be? */ : mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), l4e_from_pfn(mfn, (_PAGE_PRESENT|_PAGE_RW| @@ -1788,7 +1792,7 @@ int new_guest_cr3(unsigned long mfn) return 1; } #endif - okay = shadow_mode_refcounts(d) + okay = paging_mode_refcounts(d) ? get_page_from_pagenr(mfn, d) : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); if ( unlikely(!okay) ) @@ -1808,7 +1812,7 @@ int new_guest_cr3(unsigned long mfn) if ( likely(old_base_mfn != 0) ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(old_base_mfn)); else put_page_and_type(mfn_to_page(old_base_mfn)); @@ -1861,7 +1865,7 @@ static int set_foreigndom(domid_t domid) d->domain_id); okay = 0; } - else if ( unlikely(shadow_mode_translate(d)) ) + else if ( unlikely(paging_mode_translate(d)) ) { MEM_LOG("Cannot mix foreign mappings with translated domains"); okay = 0; @@ -2007,7 +2011,7 @@ int do_mmuext_op( if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) ) break; - if ( shadow_mode_refcounts(FOREIGNDOM) ) + if ( paging_mode_refcounts(FOREIGNDOM) ) break; okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); @@ -2032,7 +2036,7 @@ int do_mmuext_op( break; case MMUEXT_UNPIN_TABLE: - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) break; if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) ) @@ -2070,7 +2074,7 @@ int do_mmuext_op( } if (likely(mfn != 0)) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) okay = get_page_from_pagenr(mfn, d); else okay = get_page_and_type_from_pagenr( @@ -2087,7 +2091,7 @@ int do_mmuext_op( v->arch.guest_table_user = pagetable_from_pfn(mfn); if ( old_mfn != 0 ) { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) put_page(mfn_to_page(old_mfn)); else put_page_and_type(mfn_to_page(old_mfn)); @@ -2101,8 +2105,8 @@ int do_mmuext_op( break; case MMUEXT_INVLPG_LOCAL: - if ( !shadow_mode_enabled(d) - || shadow_invlpg(v, op.arg1.linear_addr) != 0 ) + if ( !paging_mode_enabled(d) + || paging_invlpg(v, op.arg1.linear_addr) != 0 ) local_flush_tlb_one(op.arg1.linear_addr); break; @@ -2149,7 +2153,7 @@ int do_mmuext_op( unsigned long ptr = op.arg1.linear_addr; unsigned long ents = op.arg2.nr_ents; - if ( shadow_mode_external(d) ) + if ( paging_mode_external(d) ) { MEM_LOG("ignoring SET_LDT hypercall from external " "domain %u", d->domain_id); @@ -2298,9 +2302,9 @@ int do_mmu_update( case PGT_l3_page_table: case PGT_l4_page_table: { - if ( shadow_mode_refcounts(d) ) + if ( paging_mode_refcounts(d) ) { - MEM_LOG("mmu update on shadow-refcounted domain!"); + MEM_LOG("mmu update on auto-refcounted domain!"); break; } @@ -2351,13 +2355,7 @@ int do_mmu_update( if ( unlikely(!get_page_type(page, PGT_writable_page)) ) break; - if ( unlikely(shadow_mode_enabled(d)) ) - okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); - else - { - *(intpte_t *)va = req.val; - okay = 1; - } + okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); put_page_type(page); } @@ -2380,9 +2378,9 @@ int do_mmu_update( break; } - if ( unlikely(shadow_mode_translate(FOREIGNDOM)) ) + if ( unlikely(paging_mode_translate(FOREIGNDOM)) ) { - MEM_LOG("Mach-phys update on shadow-translate guest"); + MEM_LOG("Mach-phys update on auto-translate guest"); break; } @@ -2472,7 +2470,7 @@ static int create_grant_pte_mapping( goto failed; } - if ( !shadow_mode_refcounts(d) ) + if ( !paging_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); put_page_type(page); @@ -2578,7 +2576,7 @@ static int create_grant_va_mapping( if ( !okay ) return GNTST_general_error; - if ( !shadow_mode_refcounts(d) ) + if ( !paging_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); return GNTST_okay; @@ -2704,7 +2702,7 @@ int do_update_va_mapping(unsigned long v perfc_incrc(calls_to_update_va); - if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) + if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) ) return -EINVAL; LOCK_BIGLOCK(d); @@ -2744,8 +2742,8 @@ int do_update_va_mapping(unsigned long v switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: - if ( !shadow_mode_enabled(d) - || (shadow_invlpg(current, va) != 0) ) + if ( !paging_mode_enabled(d) + || (paging_invlpg(current, va) != 0) ) local_flush_tlb_one(va); break; case UVMF_ALL: @@ -2980,7 +2978,7 @@ long arch_memory_op(int op, XEN_GUEST_HA break; } - if ( !shadow_mode_translate(d) || (mfn == 0) ) + if ( !paging_mode_translate(d) || (mfn == 0) ) { put_domain(d); return -EINVAL; @@ -3235,17 +3233,12 @@ static int ptwr_emulated_update( if ( do_cmpxchg ) { int okay; + intpte_t t = old; ol1e = l1e_from_intpte(old); - if ( shadow_mode_enabled(d) ) - { - intpte_t t = old; - okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, - &t, val, _mfn(mfn)); - okay = (okay && t == old); - } - else - okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); + okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e, + &t, val, _mfn(mfn)); + okay = (okay && t == old); if ( !okay ) { diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/Makefile --- a/xen/arch/x86/mm/Makefile Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm/Makefile Thu Feb 15 14:09:39 2007 -0700 @@ -1,1 +1,4 @@ subdir-y += shadow subdir-y += shadow + +obj-y += paging.o +obj-y += p2m.o diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/p2m.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/p2m.c Thu Feb 15 14:09:39 2007 -0700 @@ -0,0 +1,699 @@ +/****************************************************************************** + * arch/x86/mm/p2m.c + * + * physical-to-machine mappings for automatically-translated domains. + * + * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. + * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <asm/domain.h> +#include <asm/page.h> +#include <asm/paging.h> +#include <asm/p2m.h> + +/* Debugging and auditing of the P2M code? */ +#define P2M_AUDIT 0 +#define P2M_DEBUGGING 1 + +/* The P2M lock. This protects all updates to the p2m table. + * Updates are expected to be safe against concurrent reads, + * which do *not* require the lock */ + +#define p2m_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.p2m.lock); \ + (_d)->arch.p2m.locker = -1; \ + (_d)->arch.p2m.locker_function = "nobody"; \ + } while (0) + +#define p2m_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.p2m.locker == current->processor) )\ + { \ + printk("Error: p2m lock held by %s\n", \ + (_d)->arch.p2m.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.p2m.lock); \ + ASSERT((_d)->arch.p2m.locker == -1); \ + (_d)->arch.p2m.locker = current->processor; \ + (_d)->arch.p2m.locker_function = __func__; \ + } while (0) + +#define p2m_unlock(_d) \ + do { \ + ASSERT((_d)->arch.p2m.locker == current->processor); \ + (_d)->arch.p2m.locker = -1; \ + (_d)->arch.p2m.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.p2m.lock); \ + } while (0) + + + +/* Printouts */ +#define P2M_PRINTK(_f, _a...) \ + debugtrace_printk("p2m: %s(): " _f, __func__, ##_a) +#define P2M_ERROR(_f, _a...) \ + printk("pg error: %s(): " _f, __func__, ##_a) +#if P2M_DEBUGGING +#define P2M_DEBUG(_f, _a...) \ + debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a) +#else +#define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0) +#endif + + +/* Override macros from asm/page.h to make them work with mfn_t */ +#undef mfn_to_page +#define mfn_to_page(_m) (frame_table + mfn_x(_m)) +#undef mfn_valid +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) +#undef page_to_mfn +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) + + + +// Find the next level's P2M entry, checking for out-of-range gfn's... +// Returns NULL on error. +// +static l1_pgentry_t * +p2m_find_entry(void *table, unsigned long *gfn_remainder, + unsigned long gfn, u32 shift, u32 max) +{ + u32 index; + + index = *gfn_remainder >> shift; + if ( index >= max ) + { + P2M_DEBUG("gfn=0x%lx out of range " + "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", + gfn, *gfn_remainder, shift, index, max); + return NULL; + } + *gfn_remainder &= (1 << shift) - 1; + return (l1_pgentry_t *)table + index; +} + +// Walk one level of the P2M table, allocating a new table if required. +// Returns 0 on error. +// +static int +p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, + unsigned long *gfn_remainder, unsigned long gfn, u32 shift, + u32 max, unsigned long type) +{ + l1_pgentry_t *p2m_entry; + l1_pgentry_t new_entry; + void *next; + ASSERT(d->arch.p2m.alloc_page); + + if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, + shift, max)) ) + return 0; + + if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) + { + struct page_info *pg = d->arch.p2m.alloc_page(d); + if ( pg == NULL ) + return 0; + list_add_tail(&pg->list, &d->arch.p2m.pages); + pg->u.inuse.type_info = type | 1 | PGT_validated; + pg->count_info = 1; + + new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), + __PAGE_HYPERVISOR|_PAGE_USER); + + switch ( type ) { + case PGT_l3_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 4); + break; + case PGT_l2_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 3); + break; + case PGT_l1_page_table: + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 2); + break; + default: + BUG(); + break; + } + } + *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); + next = map_domain_page(mfn_x(*table_mfn)); + unmap_domain_page(*table); + *table = next; + + return 1; +} + +// Returns 0 on error (out of memory) +static int +set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +{ + // XXX -- this might be able to be faster iff current->domain == d + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); + void *table =map_domain_page(mfn_x(table_mfn)); + unsigned long gfn_remainder = gfn; + l1_pgentry_t *p2m_entry; + l1_pgentry_t entry_content; + int rv=0; + +#if CONFIG_PAGING_LEVELS >= 4 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L4_PAGETABLE_SHIFT - PAGE_SHIFT, + L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) + goto out; +#endif +#if CONFIG_PAGING_LEVELS >= 3 + // When using PAE Xen, we only allow 33 bits of pseudo-physical + // address in translated guests (i.e. 8 GBytes). This restriction + // comes from wanting to map the P2M table into the 16MB RO_MPT hole + // in Xen's address space for translated PV guests. + // + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L3_PAGETABLE_SHIFT - PAGE_SHIFT, + (CONFIG_PAGING_LEVELS == 3 + ? 8 + : L3_PAGETABLE_ENTRIES), + PGT_l2_page_table) ) + goto out; +#endif + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) + goto out; + + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + 0, L1_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + + /* Track the highest gfn for which we have ever had a valid mapping */ + if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) + d->arch.p2m.max_mapped_pfn = gfn; + + if ( mfn_valid(mfn) ) + entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); + else + entry_content = l1e_empty(); + + /* level 1 entry */ + paging_write_p2m_entry(d, gfn, p2m_entry, entry_content, 1); + + /* Success */ + rv = 1; + + out: + unmap_domain_page(table); + return rv; +} + + +/* Init the datastructures for later use by the p2m code */ +void p2m_init(struct domain *d) +{ + p2m_lock_init(d); + INIT_LIST_HEAD(&d->arch.p2m.pages); +} + + +// Allocate a new p2m table for a domain. +// +// The structure of the p2m table is that of a pagetable for xen (i.e. it is +// controlled by CONFIG_PAGING_LEVELS). +// +// The alloc_page and free_page functions will be used to get memory to +// build the p2m, and to release it again at the end of day. +// +// Returns 0 for success or -errno. +// +int p2m_alloc_table(struct domain *d, + struct page_info * (*alloc_page)(struct domain *d), + void (*free_page)(struct domain *d, struct page_info *pg)) + +{ + mfn_t mfn; + struct list_head *entry; + struct page_info *page, *p2m_top; + unsigned int page_count = 0; + unsigned long gfn; + + p2m_lock(d); + + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) + { + P2M_ERROR("p2m already allocated for this domain\n"); + p2m_unlock(d); + return -EINVAL; + } + + P2M_PRINTK("allocating p2m table\n"); + + d->arch.p2m.alloc_page = alloc_page; + d->arch.p2m.free_page = free_page; + + p2m_top = d->arch.p2m.alloc_page(d); + if ( p2m_top == NULL ) + { + p2m_unlock(d); + return -ENOMEM; + } +list_add_tail(&p2m_top->list, &d->arch.p2m.pages); + + p2m_top->count_info = 1; + p2m_top->u.inuse.type_info = +#if CONFIG_PAGING_LEVELS == 4 + PGT_l4_page_table +#elif CONFIG_PAGING_LEVELS == 3 + PGT_l3_page_table +#elif CONFIG_PAGING_LEVELS == 2 + PGT_l2_page_table +#endif + | 1 | PGT_validated; + + d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top)); + + P2M_PRINTK("populating p2m table\n"); + + /* Initialise physmap tables for slot zero. Other code assumes this. */ + gfn = 0; +mfn = _mfn(INVALID_MFN); + if ( !set_p2m_entry(d, gfn, mfn) ) + goto error; + + for ( entry = d->page_list.next; + entry != &d->page_list; + entry = entry->next ) + { + page = list_entry(entry, struct page_info, list); + mfn = page_to_mfn(page); + gfn = get_gpfn_from_mfn(mfn_x(mfn)); + page_count++; + if ( +#ifdef __x86_64__ + (gfn != 0x5555555555555555L) +#else + (gfn != 0x55555555L) +#endif + && gfn != INVALID_M2P_ENTRY + && !set_p2m_entry(d, gfn, mfn) ) + goto error; + } + + P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); + p2m_unlock(d); + return 0; + + error: + P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" + PRI_mfn "\n", gfn, mfn_x(mfn)); + p2m_unlock(d); + return -ENOMEM; +} + +void p2m_teardown(struct domain *d) +/* Return all the p2m pages to Xen. + * We know we don't have any extra mappings to these pages */ +{ + struct list_head *entry, *n; + struct page_info *pg; + + p2m_lock(d); + d->arch.phys_table = pagetable_null(); + + list_for_each_safe(entry, n, &d->arch.p2m.pages) + { + pg = list_entry(entry, struct page_info, list); + list_del(entry); + d->arch.p2m.free_page(d, pg); + } + p2m_unlock(d); +} + +mfn_t +gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) +/* Read another domain's p2m entries */ +{ + mfn_t mfn; + paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + + ASSERT(paging_mode_translate(d)); + mfn = pagetable_get_mfn(d->arch.phys_table); + + + if ( gpfn > d->arch.p2m.max_mapped_pfn ) + /* This pfn is higher than the highest the p2m map currently holds */ + return _mfn(INVALID_MFN); + +#if CONFIG_PAGING_LEVELS >= 4 + { + l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); + l4e += l4_table_offset(addr); + if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l4e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l4e_get_pfn(*l4e)); + unmap_domain_page(l4e); + } +#endif +#if CONFIG_PAGING_LEVELS >= 3 + { + l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); +#if CONFIG_PAGING_LEVELS == 3 + /* On PAE hosts the p2m has eight l3 entries, not four (see + * shadow_set_p2m_entry()) so we can't use l3_table_offset. + * Instead, just count the number of l3es from zero. It's safe + * to do this because we already checked that the gfn is within + * the bounds of the p2m. */ + l3e += (addr >> L3_PAGETABLE_SHIFT); +#else + l3e += l3_table_offset(addr); +#endif + if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l3e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l3e_get_pfn(*l3e)); + unmap_domain_page(l3e); + } +#endif + + l2e = map_domain_page(mfn_x(mfn)); + l2e += l2_table_offset(addr); + if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l2e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l2e_get_pfn(*l2e)); + unmap_domain_page(l2e); + + l1e = map_domain_page(mfn_x(mfn)); + l1e += l1_table_offset(addr); + if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) + { + unmap_domain_page(l1e); + return _mfn(INVALID_MFN); + } + mfn = _mfn(l1e_get_pfn(*l1e)); + unmap_domain_page(l1e); + + return mfn; +} + +#if P2M_AUDIT +static void audit_p2m(struct domain *d) +{ + struct list_head *entry; + struct page_info *page; + struct domain *od; + unsigned long mfn, gfn, m2pfn, lp2mfn = 0; + mfn_t p2mfn; + unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; + int test_linear; + + if ( !paging_mode_translate(d) ) + return; + + //P2M_PRINTK("p2m audit starts\n"); + + test_linear = ( (d == current->domain) + && !pagetable_is_null(current->arch.monitor_table) ); + if ( test_linear ) + local_flush_tlb(); + + /* Audit part one: walk the domain's page allocation list, checking + * the m2p entries. */ + for ( entry = d->page_list.next; + entry != &d->page_list; + entry = entry->next ) + { + page = list_entry(entry, struct page_info, list); + mfn = mfn_x(page_to_mfn(page)); + + // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn); + + od = page_get_owner(page); + + if ( od != d ) + { + P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", + mfn, od, (od?od->domain_id:-1), d, d->domain_id); + continue; + } + + gfn = get_gpfn_from_mfn(mfn); + if ( gfn == INVALID_M2P_ENTRY ) + { + orphans_i++; + //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", + // mfn); + continue; + } + + if ( gfn == 0x55555555 ) + { + orphans_d++; + //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", + // mfn); + continue; + } + + p2mfn = gfn_to_mfn_foreign(d, gfn); + if ( mfn_x(p2mfn) != mfn ) + { + mpbad++; + P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" + " (-> gfn %#lx)\n", + mfn, gfn, mfn_x(p2mfn), + (mfn_valid(p2mfn) + ? get_gpfn_from_mfn(mfn_x(p2mfn)) + : -1u)); + /* This m2p entry is stale: the domain has another frame in + * this physical slot. No great disaster, but for neatness, + * blow away the m2p entry. */ + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + } + + if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) ) + { + lp2mfn = mfn_x(gfn_to_mfn_current(gfn)); + if ( lp2mfn != mfn_x(p2mfn) ) + { + P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " + "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn)); + } + } + + // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", + // mfn, gfn, p2mfn, lp2mfn); + } + + /* Audit part two: walk the domain's p2m table, checking the entries. */ + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) + { + l2_pgentry_t *l2e; + l1_pgentry_t *l1e; + int i1, i2; + +#if CONFIG_PAGING_LEVELS == 4 + l4_pgentry_t *l4e; + l3_pgentry_t *l3e; + int i3, i4; + l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#elif CONFIG_PAGING_LEVELS == 3 + l3_pgentry_t *l3e; + int i3; + l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#else /* CONFIG_PAGING_LEVELS == 2 */ + l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); +#endif + + gfn = 0; +#if CONFIG_PAGING_LEVELS >= 3 +#if CONFIG_PAGING_LEVELS >= 4 + for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) + { + if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4])))); +#endif /* now at levels 3 or 4... */ + for ( i3 = 0; + i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); + i3++ ) + { + if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); +#endif /* all levels... */ + for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) + { + if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) + { + gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); + + for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) + { + if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) + continue; + mfn = l1e_get_pfn(l1e[i1]); + ASSERT(mfn_valid(_mfn(mfn))); + m2pfn = get_gpfn_from_mfn(mfn); + if ( m2pfn != gfn ) + { + pmbad++; + P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" + " -> gfn %#lx\n", gfn, mfn, m2pfn); + BUG(); + } + } + unmap_domain_page(l1e); + } +#if CONFIG_PAGING_LEVELS >= 3 + unmap_domain_page(l2e); + } +#if CONFIG_PAGING_LEVELS >= 4 + unmap_domain_page(l3e); + } +#endif +#endif + +#if CONFIG_PAGING_LEVELS == 4 + unmap_domain_page(l4e); +#elif CONFIG_PAGING_LEVELS == 3 + unmap_domain_page(l3e); +#else /* CONFIG_PAGING_LEVELS == 2 */ + unmap_domain_page(l2e); +#endif + + } + + //P2M_PRINTK("p2m audit complete\n"); + //if ( orphans_i | orphans_d | mpbad | pmbad ) + // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", + // orphans_i + orphans_d, orphans_i, orphans_d, + if ( mpbad | pmbad ) + P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", + pmbad, mpbad); +} +#else +#define audit_p2m(_d) do { (void)(_d); } while(0) +#endif /* P2M_AUDIT */ + + + +static void +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) +{ + if ( !paging_mode_translate(d) ) + return; + P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); + + ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn); + //ASSERT(mfn_to_gfn(d, mfn) == gfn); + + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); +} + +void +guest_physmap_remove_page(struct domain *d, unsigned long gfn, + unsigned long mfn) +{ + p2m_lock(d); + audit_p2m(d); + p2m_remove_page(d, gfn, mfn); + audit_p2m(d); + p2m_unlock(d); +} + +void +guest_physmap_add_page(struct domain *d, unsigned long gfn, + unsigned long mfn) +{ + unsigned long ogfn; + mfn_t omfn; + + if ( !paging_mode_translate(d) ) + return; + + p2m_lock(d); + audit_p2m(d); + + P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); + + omfn = gfn_to_mfn(d, gfn); + if ( mfn_valid(omfn) ) + { + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); + set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); + } + + ogfn = mfn_to_gfn(d, _mfn(mfn)); + if ( +#ifdef __x86_64__ + (ogfn != 0x5555555555555555L) +#else + (ogfn != 0x55555555L) +#endif + && (ogfn != INVALID_M2P_ENTRY) + && (ogfn != gfn) ) + { + /* This machine frame is already mapped at another physical address */ + P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", + mfn, ogfn, gfn); + if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) ) + { + P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", + ogfn , mfn_x(omfn)); + if ( mfn_x(omfn) == mfn ) + p2m_remove_page(d, ogfn, mfn); + } + } + + set_p2m_entry(d, gfn, _mfn(mfn)); + set_gpfn_from_mfn(mfn, gfn); + + audit_p2m(d); + p2m_unlock(d); +} + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/paging.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/mm/paging.c Thu Feb 15 14:09:39 2007 -0700 @@ -0,0 +1,143 @@ +/****************************************************************************** + * arch/x86/paging.c + * + * x86 specific paging support + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) + * Copyright (c) 2007 XenSource Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <xen/init.h> +#include <asm/paging.h> +#include <asm/shadow.h> +#include <asm/p2m.h> + +/* Xen command-line option to enable hardware-assisted paging */ +int opt_hap_enabled = 0; +boolean_param("hap", opt_hap_enabled); + +/* Printouts */ +#define PAGING_PRINTK(_f, _a...) \ + debugtrace_printk("pg: %s(): " _f, __func__, ##_a) +#define PAGING_ERROR(_f, _a...) \ + printk("pg error: %s(): " _f, __func__, ##_a) +#define PAGING_DEBUG(flag, _f, _a...) \ + do { \ + if (PAGING_DEBUG_ ## flag) \ + debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \ + } while (0) + + +/* Domain paging struct initialization. */ +void paging_domain_init(struct domain *d) +{ + p2m_init(d); + shadow_domain_init(d); +} + +/* vcpu paging struct initialization goes here */ +void paging_vcpu_init(struct vcpu *v) +{ + shadow_vcpu_init(v); +} + + +int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl) +{ + /* Here, dispatch domctl to the appropriate paging code */ + return shadow_domctl(d, sc, u_domctl); +} + +/* Call when destroying a domain */ +void paging_teardown(struct domain *d) +{ + shadow_teardown(d); + /* Call other modes' teardown code here */ +} + +/* Call once all of the references to the domain have gone away */ +void paging_final_teardown(struct domain *d) +{ + shadow_teardown(d); + /* Call other modes' final teardown code here */ +} + +/* Enable an arbitrary paging-assistance mode. Call once at domain + * creation. */ +int paging_enable(struct domain *d, u32 mode) +{ + if ( mode & PG_SH_enable ) + return shadow_enable(d, mode); + else + /* No other modes supported yet */ + return -EINVAL; +} + +/* Print paging-assistance info to the console */ +void paging_dump_domain_info(struct domain *d) +{ + if ( paging_mode_enabled(d) ) + { + printk(" paging assistance: "); + if ( paging_mode_shadow(d) ) + printk("shadow "); + if ( paging_mode_hap(d) ) + printk("hap "); + if ( paging_mode_refcounts(d) ) + printk("refcounts "); + if ( paging_mode_log_dirty(d) ) + printk("log_dirty "); + if ( paging_mode_translate(d) ) + printk("translate "); + if ( paging_mode_external(d) ) + printk("external "); + printk("\n"); + } +} + +void paging_dump_vcpu_info(struct vcpu *v) +{ + if ( paging_mode_enabled(v->domain) ) + { + printk(" paging assistance: "); + if ( paging_mode_shadow(v->domain) ) + { + if ( v->arch.paging.mode ) + printk("shadowed %u-on-%u, %stranslated\n", + v->arch.paging.mode->guest_levels, + v->arch.paging.mode->shadow.shadow_levels, + paging_vcpu_mode_translate(v) ? "" : "not "); + else + printk("not shadowed\n"); + } + else if ( paging_mode_hap(v->domain) && v->arch.paging.mode ) + printk("hap, %u levels\n", + v->arch.paging.mode->guest_levels); + else + printk("none\n"); + } +} + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm/shadow/common.c Thu Feb 15 14:09:39 2007 -0700 @@ -47,12 +47,27 @@ void shadow_domain_init(struct domain *d int i; shadow_lock_init(d); for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); -} - + INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); + INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); + INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); +} + +/* Setup the shadow-specfic parts of a vcpu struct. Note: The most important + * job is to initialize the update_paging_modes() function pointer, which is + * used to initialized the rest of resources. Therefore, it really does not + * matter to have v->arch.paging.mode pointing to any mode, as long as it can + * be compiled. + */ +void shadow_vcpu_init(struct vcpu *v) +{ +#if CONFIG_PAGING_LEVELS == 4 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); +#elif CONFIG_PAGING_LEVELS == 3 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); +#elif CONFIG_PAGING_LEVELS == 2 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); +#endif +} #if SHADOW_AUDIT int shadow_audit_enable = 0; @@ -265,7 +280,7 @@ hvm_emulate_write(enum x86_segment seg, if ( rc ) return rc; - return v->arch.shadow.mode->x86_emulate_write( + return v->arch.paging.mode->shadow.x86_emulate_write( v, addr, &val, bytes, sh_ctxt); } @@ -288,7 +303,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg if ( rc ) return rc; - return v->arch.shadow.mode->x86_emulate_cmpxchg( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( v, addr, old, new, bytes, sh_ctxt); } @@ -312,7 +327,7 @@ hvm_emulate_cmpxchg8b(enum x86_segment s if ( rc ) return rc; - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt); } @@ -353,7 +368,7 @@ pv_emulate_write(enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - return v->arch.shadow.mode->x86_emulate_write( + return v->arch.paging.mode->shadow.x86_emulate_write( v, offset, &val, bytes, sh_ctxt); } @@ -368,7 +383,7 @@ pv_emulate_cmpxchg(enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - return v->arch.shadow.mode->x86_emulate_cmpxchg( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( v, offset, old, new, bytes, sh_ctxt); } @@ -384,7 +399,7 @@ pv_emulate_cmpxchg8b(enum x86_segment se struct sh_emulate_ctxt *sh_ctxt = container_of(ctxt, struct sh_emulate_ctxt, ctxt); struct vcpu *v = current; - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt); } @@ -721,7 +736,7 @@ static inline int chunk_is_available(str int i; for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) - if ( !list_empty(&d->arch.shadow.freelists[i]) ) + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) return 1; return 0; } @@ -783,7 +798,7 @@ void shadow_prealloc(struct domain *d, u /* Stage one: walk the list of pinned pages, unpinning them */ perfc_incrc(shadow_prealloc_1); - list_for_each_backwards_safe(l, t, &d->arch.shadow.pinned_shadows) + list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); smfn = shadow_page_to_mfn(sp); @@ -823,9 +838,9 @@ void shadow_prealloc(struct domain *d, u SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n" " shadow pages total = %u, free = %u, p2m=%u\n", 1 << order, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); BUG(); } @@ -840,7 +855,7 @@ static void shadow_blow_tables(struct do int i; /* Pass one: unpin all pinned pages */ - list_for_each_backwards_safe(l,t, &d->arch.shadow.pinned_shadows) + list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); smfn = shadow_page_to_mfn(sp); @@ -905,9 +920,9 @@ mfn_t shadow_alloc(struct domain *d, /* Find smallest order which can satisfy the request. */ for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) - if ( !list_empty(&d->arch.shadow.freelists[i]) ) + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) { - sp = list_entry(d->arch.shadow.freelists[i].next, + sp = list_entry(d->arch.paging.shadow.freelists[i].next, struct shadow_page_info, list); list_del(&sp->list); @@ -916,10 +931,10 @@ mfn_t shadow_alloc(struct domain *d, { i--; sp->order = i; - list_add_tail(&sp->list, &d->arch.shadow.freelists[i]); + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]); sp += 1 << i; } - d->arch.shadow.free_pages -= 1 << order; + d->arch.paging.shadow.free_pages -= 1 << order; /* Init page info fields and clear the pages */ for ( i = 0; i < 1<<order ; i++ ) @@ -976,7 +991,7 @@ void shadow_free(struct domain *d, mfn_t ASSERT(shadow_type != SH_type_p2m_table); order = shadow_order(shadow_type); - d->arch.shadow.free_pages += 1 << order; + d->arch.paging.shadow.free_pages += 1 << order; for ( i = 0; i < 1<<order; i++ ) { @@ -985,8 +1000,8 @@ void shadow_free(struct domain *d, mfn_t for_each_vcpu(d, v) { /* No longer safe to look for a writeable mapping in this shadow */ - if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) - v->arch.shadow.last_writeable_pte_smfn = 0; + if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) + v->arch.paging.shadow.last_writeable_pte_smfn = 0; } #endif /* Strip out the type: this is now a free shadow page */ @@ -1019,7 +1034,7 @@ void shadow_free(struct domain *d, mfn_t } sp->order = order; - list_add_tail(&sp->list, &d->arch.shadow.freelists[order]); + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); } /* Divert some memory from the pool to be used by the p2m mapping. @@ -1033,19 +1048,19 @@ void shadow_free(struct domain *d, mfn_t * returns non-zero on success. */ static int -shadow_alloc_p2m_pages(struct domain *d) +sh_alloc_p2m_pages(struct domain *d) { struct page_info *pg; u32 i; ASSERT(shadow_locked_by_me(d)); - if ( d->arch.shadow.total_pages + if ( d->arch.paging.shadow.total_pages < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) ) return 0; /* Not enough shadow memory: need to increase it first */ pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); - d->arch.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); - d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); + d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); + d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++) { /* Unlike shadow pages, mark p2m pages as owned by the domain. @@ -1055,34 +1070,59 @@ shadow_alloc_p2m_pages(struct domain *d) * believed to be a concern. */ page_set_owner(&pg[i], d); - list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist); + pg->count_info = 1; + list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist); } return 1; } // Returns 0 if no memory is available... -mfn_t +struct page_info * shadow_alloc_p2m_page(struct domain *d) { struct list_head *entry; struct page_info *pg; mfn_t mfn; void *p; - - if ( list_empty(&d->arch.shadow.p2m_freelist) && - !shadow_alloc_p2m_pages(d) ) - return _mfn(0); - entry = d->arch.shadow.p2m_freelist.next; + + shadow_lock(d); + + if ( list_empty(&d->arch.paging.shadow.p2m_freelist) && + !sh_alloc_p2m_pages(d) ) + { + shadow_unlock(d); + return NULL; + } + entry = d->arch.paging.shadow.p2m_freelist.next; list_del(entry); - list_add_tail(entry, &d->arch.shadow.p2m_inuse); + + shadow_unlock(d); + pg = list_entry(entry, struct page_info, list); - pg->count_info = 1; mfn = page_to_mfn(pg); p = sh_map_domain_page(mfn); clear_page(p); sh_unmap_domain_page(p); - return mfn; + return pg; +} + +void +shadow_free_p2m_page(struct domain *d, struct page_info *pg) +{ + ASSERT(page_get_owner(pg) == d); + /* Should have just the one ref we gave it in alloc_p2m_page() */ + if ( (pg->count_info & PGC_count_mask) != 1 ) + { + SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", + pg->count_info, pg->u.inuse.type_info); + } + /* Free should not decrement domain's total allocation, since + * these pages were allocated without an owner. */ + page_set_owner(pg, NULL); + free_domheap_pages(pg, 0); + d->arch.paging.shadow.p2m_pages--; + perfc_decr(shadow_alloc_count); } #if CONFIG_PAGING_LEVELS == 3 @@ -1130,344 +1170,6 @@ static void p2m_install_entry_in_monitor } #endif -// Find the next level's P2M entry, checking for out-of-range gfn's... -// Returns NULL on error. -// -static l1_pgentry_t * -p2m_find_entry(void *table, unsigned long *gfn_remainder, - unsigned long gfn, u32 shift, u32 max) -{ - u32 index; - - index = *gfn_remainder >> shift; - if ( index >= max ) - { - SHADOW_DEBUG(P2M, "gfn=0x%lx out of range " - "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", - gfn, *gfn_remainder, shift, index, max); - return NULL; - } - *gfn_remainder &= (1 << shift) - 1; - return (l1_pgentry_t *)table + index; -} - -// Walk one level of the P2M table, allocating a new table if required. -// Returns 0 on error. -// -static int -p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, - unsigned long *gfn_remainder, unsigned long gfn, u32 shift, - u32 max, unsigned long type) -{ - l1_pgentry_t *p2m_entry; - void *next; - - if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, - shift, max)) ) - return 0; - - if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) - { - mfn_t mfn = shadow_alloc_p2m_page(d); - if ( mfn_x(mfn) == 0 ) - return 0; - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); - mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated; - mfn_to_page(mfn)->count_info = 1; -#if CONFIG_PAGING_LEVELS == 3 - if (type == PGT_l2_page_table) - { - struct vcpu *v; - /* We have written to the p2m l3: need to sync the per-vcpu - * copies of it in the monitor tables */ - p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); - /* Also, any vcpus running on shadows of the p2m need to - * reload their CR3s so the change propagates to the shadow */ - ASSERT(shadow_locked_by_me(d)); - for_each_vcpu(d, v) - { - if ( pagetable_get_pfn(v->arch.guest_table) - == pagetable_get_pfn(d->arch.phys_table) - && v->arch.shadow.mode != NULL ) - v->arch.shadow.mode->update_cr3(v, 0); - } - } -#endif - /* The P2M can be shadowed: keep the shadows synced */ - if ( d->vcpu[0] != NULL ) - (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, - p2m_entry, sizeof *p2m_entry); - } - *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); - next = sh_map_domain_page(*table_mfn); - sh_unmap_domain_page(*table); - *table = next; - - return 1; -} - -// Returns 0 on error (out of memory) -int -shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) -{ - // XXX -- this might be able to be faster iff current->domain == d - mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); - void *table = sh_map_domain_page(table_mfn); - unsigned long gfn_remainder = gfn; - l1_pgentry_t *p2m_entry; - int rv=0; - -#if CONFIG_PAGING_LEVELS >= 4 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L4_PAGETABLE_SHIFT - PAGE_SHIFT, - L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) - goto out; -#endif -#if CONFIG_PAGING_LEVELS >= 3 - // When using PAE Xen, we only allow 33 bits of pseudo-physical - // address in translated guests (i.e. 8 GBytes). This restriction - // comes from wanting to map the P2M table into the 16MB RO_MPT hole - // in Xen's address space for translated PV guests. - // - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L3_PAGETABLE_SHIFT - PAGE_SHIFT, - (CONFIG_PAGING_LEVELS == 3 - ? 8 - : L3_PAGETABLE_ENTRIES), - PGT_l2_page_table) ) - goto out; -#endif - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L2_PAGETABLE_SHIFT - PAGE_SHIFT, - L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) - goto out; - - p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, - 0, L1_PAGETABLE_ENTRIES); - ASSERT(p2m_entry); - if ( mfn_valid(mfn) ) - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); - else - *p2m_entry = l1e_empty(); - - /* Track the highest gfn for which we have ever had a valid mapping */ - if ( mfn_valid(mfn) && (gfn > d->arch.max_mapped_pfn) ) - d->arch.max_mapped_pfn = gfn; - - /* The P2M can be shadowed: keep the shadows synced */ - if ( d->vcpu[0] != NULL ) - (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, - p2m_entry, sizeof(*p2m_entry)); - - /* Success */ - rv = 1; - - out: - sh_unmap_domain_page(table); - return rv; -} - -// Allocate a new p2m table for a domain. -// -// The structure of the p2m table is that of a pagetable for xen (i.e. it is -// controlled by CONFIG_PAGING_LEVELS). -// -// Returns 0 if p2m table could not be initialized -// -static int -shadow_alloc_p2m_table(struct domain *d) -{ - mfn_t p2m_top, mfn; - struct list_head *entry; - struct page_info *page; - unsigned int page_count = 0; - unsigned long gfn; - - SHADOW_PRINTK("allocating p2m table\n"); - ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0); - - p2m_top = shadow_alloc_p2m_page(d); - mfn_to_page(p2m_top)->count_info = 1; - mfn_to_page(p2m_top)->u.inuse.type_info = -#if CONFIG_PAGING_LEVELS == 4 - PGT_l4_page_table -#elif CONFIG_PAGING_LEVELS == 3 - PGT_l3_page_table -#elif CONFIG_PAGING_LEVELS == 2 - PGT_l2_page_table -#endif - | 1 | PGT_validated; - - if ( mfn_x(p2m_top) == 0 ) - return 0; - - d->arch.phys_table = pagetable_from_mfn(p2m_top); - - SHADOW_PRINTK("populating p2m table\n"); - - /* Initialise physmap tables for slot zero. Other code assumes this. */ - gfn = 0; - mfn = _mfn(INVALID_MFN); - if ( !shadow_set_p2m_entry(d, gfn, mfn) ) - goto error; - - /* Build a p2m map that matches the m2p entries for this domain's - * allocated pages. Skip any pages that have an explicitly invalid - * or obviously bogus m2p entry. */ - for ( entry = d->page_list.next; - entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); - mfn = page_to_mfn(page); - gfn = get_gpfn_from_mfn(mfn_x(mfn)); - page_count++; - if ( -#ifdef __x86_64__ - (gfn != 0x5555555555555555L) -#else - (gfn != 0x55555555L) -#endif - && gfn != INVALID_M2P_ENTRY - && (gfn < - (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t)) - && !shadow_set_p2m_entry(d, gfn, mfn) ) - goto error; - } - - SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count); - return 1; - - error: - SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" - SH_PRI_mfn "\n", gfn, mfn_x(mfn)); - return 0; -} - -mfn_t -sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) -/* Read another domain's p2m entries */ -{ - mfn_t mfn; - paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; - l2_pgentry_t *l2e; - l1_pgentry_t *l1e; - - ASSERT(shadow_mode_translate(d)); - mfn = pagetable_get_mfn(d->arch.phys_table); - - - if ( gpfn > d->arch.max_mapped_pfn ) - /* This pfn is higher than the highest the p2m map currently holds */ - return _mfn(INVALID_MFN); - -#if CONFIG_PAGING_LEVELS >= 4 - { - l4_pgentry_t *l4e = sh_map_domain_page(mfn); - l4e += l4_table_offset(addr); - if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l4e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l4e_get_pfn(*l4e)); - sh_unmap_domain_page(l4e); - } -#endif -#if CONFIG_PAGING_LEVELS >= 3 - { - l3_pgentry_t *l3e = sh_map_domain_page(mfn); -#if CONFIG_PAGING_LEVELS == 3 - /* On PAE hosts the p2m has eight l3 entries, not four (see - * shadow_set_p2m_entry()) so we can't use l3_table_offset. - * Instead, just count the number of l3es from zero. It's safe - * to do this because we already checked that the gfn is within - * the bounds of the p2m. */ - l3e += (addr >> L3_PAGETABLE_SHIFT); -#else - l3e += l3_table_offset(addr); -#endif - if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l3e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l3e_get_pfn(*l3e)); - sh_unmap_domain_page(l3e); - } -#endif - - l2e = sh_map_domain_page(mfn); - l2e += l2_table_offset(addr); - if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l2e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l2e_get_pfn(*l2e)); - sh_unmap_domain_page(l2e); - - l1e = sh_map_domain_page(mfn); - l1e += l1_table_offset(addr); - if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) - { - sh_unmap_domain_page(l1e); - return _mfn(INVALID_MFN); - } - mfn = _mfn(l1e_get_pfn(*l1e)); - sh_unmap_domain_page(l1e); - - return mfn; -} - -unsigned long -shadow_gfn_to_mfn_foreign(unsigned long gpfn) -{ - return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn)); -} - - -static void shadow_p2m_teardown(struct domain *d) -/* Return all the p2m pages to Xen. - * We know we don't have any extra mappings to these pages */ -{ - struct list_head *entry, *n; - struct page_info *pg; - - d->arch.phys_table = pagetable_null(); - - list_for_each_safe(entry, n, &d->arch.shadow.p2m_inuse) - { - pg = list_entry(entry, struct page_info, list); - list_del(entry); - /* Should have just the one ref we gave it in alloc_p2m_page() */ - if ( (pg->count_info & PGC_count_mask) != 1 ) - { - SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n", - pg->count_info, pg->u.inuse.type_info); - } - ASSERT(page_get_owner(pg) == d); - /* Free should not decrement domain's total allocation, since - * these pages were allocated without an owner. */ - page_set_owner(pg, NULL); - free_domheap_pages(pg, 0); - d->arch.shadow.p2m_pages--; - perfc_decr(shadow_alloc_count); - } - list_for_each_safe(entry, n, &d->arch.shadow.p2m_freelist) - { - list_del(entry); - pg = list_entry(entry, struct page_info, list); - ASSERT(page_get_owner(pg) == d); - /* Free should not decrement domain's total allocation. */ - page_set_owner(pg, NULL); - free_domheap_pages(pg, 0); - d->arch.shadow.p2m_pages--; - perfc_decr(shadow_alloc_count); - } - ASSERT(d->arch.shadow.p2m_pages == 0); -} - /* Set the pool of shadow pages to the required number of pages. * Input will be rounded up to at least shadow_min_acceptable_pages(), * plus space for the p2m table. @@ -1491,11 +1193,11 @@ static unsigned int sh_set_allocation(st pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1); SHADOW_PRINTK("current %i target %i\n", - d->arch.shadow.total_pages, pages); - - while ( d->arch.shadow.total_pages != pages ) - { - if ( d->arch.shadow.total_pages < pages ) + d->arch.paging.shadow.total_pages, pages); + + while ( d->arch.paging.shadow.total_pages != pages ) + { + if ( d->arch.paging.shadow.total_pages < pages ) { /* Need to allocate more memory from domheap */ sp = (struct shadow_page_info *) @@ -1505,8 +1207,8 @@ static unsigned int sh_set_allocation(st SHADOW_PRINTK("failed to allocate shadow pages.\n"); return -ENOMEM; } - d->arch.shadow.free_pages += 1<<SHADOW_MAX_ORDER; - d->arch.shadow.total_pages += 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER; for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) { sp[j].type = 0; @@ -1518,18 +1220,18 @@ static unsigned int sh_set_allocation(st } sp->order = SHADOW_MAX_ORDER; list_add_tail(&sp->list, - &d->arch.shadow.freelists[SHADOW_MAX_ORDER]); + &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]); } - else if ( d->arch.shadow.total_pages > pages ) + else if ( d->arch.paging.shadow.total_pages > pages ) { /* Need to return memory to domheap */ shadow_prealloc(d, SHADOW_MAX_ORDER); - ASSERT(!list_empty(&d->arch.shadow.freelists[SHADOW_MAX_ORDER])); - sp = list_entry(d->arch.shadow.freelists[SHADOW_MAX_ORDER].next, + ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER])); + sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next, struct shadow_page_info, list); list_del(&sp->list); - d->arch.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; - d->arch.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; + d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER); } @@ -1547,7 +1249,7 @@ static unsigned int sh_set_allocation(st /* Return the size of the shadow pool, rounded up to the nearest MB */ static unsigned int shadow_get_allocation(struct domain *d) { - unsigned int pg = d->arch.shadow.total_pages; + unsigned int pg = d->arch.paging.shadow.total_pages; return ((pg >> (20 - PAGE_SHIFT)) + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); } @@ -1583,7 +1285,7 @@ static void sh_hash_audit_bucket(struct if ( !(SHADOW_AUDIT_ENABLE) ) return; - sp = d->arch.shadow.hash_table[bucket]; + sp = d->arch.paging.shadow.hash_table[bucket]; while ( sp ) { /* Not a shadow? */ @@ -1608,7 +1310,7 @@ static void sh_hash_audit_bucket(struct if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) { - SHADOW_ERROR("MFN %#lx shadowed (by %#"SH_PRI_mfn")" + SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" " but has typecount %#lx\n", sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), gpg->u.inuse.type_info); @@ -1652,13 +1354,13 @@ static int shadow_hash_alloc(struct doma struct shadow_page_info **table; ASSERT(shadow_locked_by_me(d)); - ASSERT(!d->arch.shadow.hash_table); + ASSERT(!d->arch.paging.shadow.hash_table); table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS); if ( !table ) return 1; memset(table, 0, SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *)); - d->arch.shadow.hash_table = table; + d->arch.paging.shadow.hash_table = table; return 0; } @@ -1667,10 +1369,10 @@ static void shadow_hash_teardown(struct static void shadow_hash_teardown(struct domain *d) { ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); - - xfree(d->arch.shadow.hash_table); - d->arch.shadow.hash_table = NULL; + ASSERT(d->arch.paging.shadow.hash_table); + + xfree(d->arch.paging.shadow.hash_table); + d->arch.paging.shadow.hash_table = NULL; } @@ -1683,7 +1385,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v, key_t key; ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); + ASSERT(d->arch.paging.shadow.hash_table); ASSERT(t); sh_hash_audit(d); @@ -1692,16 +1394,16 @@ mfn_t shadow_hash_lookup(struct vcpu *v, key = sh_hash(n, t); sh_hash_audit_bucket(d, key); - sp = d->arch.shadow.hash_table[key]; + sp = d->arch.paging.shadow.hash_table[key]; prev = NULL; while(sp) { if ( sp->backpointer == n && sp->type == t ) { /* Pull-to-front if 'sp' isn't already the head item */ - if ( unlikely(sp != d->arch.shadow.hash_table[key]) ) + if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) ) { - if ( unlikely(d->arch.shadow.hash_walking != 0) ) + if ( unlikely(d->arch.paging.shadow.hash_walking != 0) ) /* Can't reorder: someone is walking the hash chains */ return shadow_page_to_mfn(sp); else @@ -1710,8 +1412,8 @@ mfn_t shadow_hash_lookup(struct vcpu *v, /* Delete sp from the list */ prev->next_shadow = sp->next_shadow; /* Re-insert it at the head of the list */ - sp->next_shadow = d->arch.shadow.hash_table[key]; - d->arch.shadow.hash_table[key] = sp; + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; + d->arch.paging.shadow.hash_table[key] = sp; } } else @@ -1737,7 +1439,7 @@ void shadow_hash_insert(struct vcpu *v, key_t key; ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); + ASSERT(d->arch.paging.shadow.hash_table); ASSERT(t); sh_hash_audit(d); @@ -1748,8 +1450,8 @@ void shadow_hash_insert(struct vcpu *v, /* Insert this shadow at the top of the bucket */ sp = mfn_to_shadow_page(smfn); - sp->next_shadow = d->arch.shadow.hash_table[key]; - d->arch.shadow.hash_table[key] = sp; + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; + d->arch.paging.shadow.hash_table[key] = sp; sh_hash_audit_bucket(d, key); } @@ -1763,7 +1465,7 @@ void shadow_hash_delete(struct vcpu *v, key_t key; ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_table); + ASSERT(d->arch.paging.shadow.hash_table); ASSERT(t); sh_hash_audit(d); @@ -1773,13 +1475,13 @@ void shadow_hash_delete(struct vcpu *v, sh_hash_audit_bucket(d, key); sp = mfn_to_shadow_page(smfn); - if ( d->arch.shadow.hash_table[key] == sp ) + if ( d->arch.paging.shadow.hash_table[key] == sp ) /* Easy case: we're deleting the head item. */ - d->arch.shadow.hash_table[key] = sp->next_shadow; + d->arch.paging.shadow.hash_table[key] = sp->next_shadow; else { /* Need to search for the one we want */ - x = d->arch.shadow.hash_table[key]; + x = d->arch.paging.shadow.hash_table[key]; while ( 1 ) { ASSERT(x); /* We can't have hit the end, since our target is @@ -1818,15 +1520,15 @@ static void hash_foreach(struct vcpu *v, /* Say we're here, to stop hash-lookups reordering the chains */ ASSERT(shadow_locked_by_me(d)); - ASSERT(d->arch.shadow.hash_walking == 0); - d->arch.shadow.hash_walking = 1; + ASSERT(d->arch.paging.shadow.hash_walking == 0); + d->arch.paging.shadow.hash_walking = 1; for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) { /* WARNING: This is not safe against changes to the hash table. * The callback *must* return non-zero if it has inserted or * deleted anything from the hash (lookups are OK, though). */ - for ( x = d->arch.shadow.hash_table[i]; x; x = x->next_shadow ) + for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow ) { if ( callback_mask & (1 << x->type) ) { @@ -1839,7 +1541,7 @@ static void hash_foreach(struct vcpu *v, } if ( done ) break; } - d->arch.shadow.hash_walking = 0; + d->arch.paging.shadow.hash_walking = 0; } @@ -2008,27 +1710,27 @@ int sh_remove_write_access(struct vcpu * * and that mapping is likely to be in the current pagetable, * in the guest's linear map (on non-HIGHPTE linux and windows)*/ -#define GUESS(_a, _h) do { \ - if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) ) \ - perfc_incrc(shadow_writeable_h_ ## _h); \ - if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ - return 1; \ +#define GUESS(_a, _h) do { \ + if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \ + perfc_incrc(shadow_writeable_h_ ## _h); \ + if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ + return 1; \ } while (0) - if ( v->arch.shadow.mode->guest_levels == 2 ) + if ( v->arch.paging.mode->guest_levels == 2 ) { if ( level == 1 ) /* 32bit non-PAE w2k3: linear map at 0xC0000000 */ GUESS(0xC0000000UL + (fault_addr >> 10), 1); /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); } #if CONFIG_PAGING_LEVELS >= 3 - else if ( v->arch.shadow.mode->guest_levels == 3 ) + else if ( v->arch.paging.mode->guest_levels == 3 ) { /* 32bit PAE w2k3: linear map at 0xC0000000 */ switch ( level ) @@ -2038,11 +1740,11 @@ int sh_remove_write_access(struct vcpu * } /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); } #if CONFIG_PAGING_LEVELS >= 4 - else if ( v->arch.shadow.mode->guest_levels == 4 ) + else if ( v->arch.paging.mode->guest_levels == 4 ) { /* 64bit w2k3: linear map at 0x0000070000000000 */ switch ( level ) @@ -2054,7 +1756,7 @@ int sh_remove_write_access(struct vcpu * /* 64bit Linux direct map at 0xffff810000000000; older kernels * had it at 0x0000010000000000UL */ - gfn = sh_mfn_to_gfn(v->domain, gmfn); + gfn = mfn_to_gfn(v->domain, gmfn); GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4); GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4); } @@ -2073,10 +1775,10 @@ int sh_remove_write_access(struct vcpu * * the writeable mapping by looking at the same MFN where the last * brute-force search succeeded. */ - if ( v->arch.shadow.last_writeable_pte_smfn != 0 ) + if ( v->arch.paging.shadow.last_writeable_pte_smfn != 0 ) { unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask); - mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn); + mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn); int shtype = mfn_to_shadow_page(last_smfn)->type; if ( callbacks[shtype] ) @@ -2431,7 +2133,7 @@ static void sh_update_paging_modes(struc static void sh_update_paging_modes(struct vcpu *v) { struct domain *d = v->domain; - struct shadow_paging_mode *old_mode = v->arch.shadow.mode; + struct paging_mode *old_mode = v->arch.paging.mode; mfn_t old_guest_table; ASSERT(shadow_locked_by_me(d)); @@ -2446,8 +2148,8 @@ static void sh_update_paging_modes(struc // First, tear down any old shadow tables held by this vcpu. // - if ( v->arch.shadow.mode ) - v->arch.shadow.mode->detach_old_tables(v); + if ( v->arch.paging.mode ) + v->arch.paging.mode->shadow.detach_old_tables(v); if ( !is_hvm_domain(d) ) { @@ -2456,17 +2158,17 @@ static void sh_update_paging_modes(struc /// #if CONFIG_PAGING_LEVELS == 4 if ( pv_32bit_guest(v) ) - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); else - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); #elif CONFIG_PAGING_LEVELS == 3 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #elif CONFIG_PAGING_LEVELS == 2 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); #else #error unexpected paging mode #endif - v->arch.shadow.translate_enabled = !!shadow_mode_translate(d); + v->arch.paging.translate_enabled = !!shadow_mode_translate(d); } else { @@ -2476,8 +2178,8 @@ static void sh_update_paging_modes(struc ASSERT(shadow_mode_translate(d)); ASSERT(shadow_mode_external(d)); - v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v); - if ( !v->arch.shadow.translate_enabled ) + v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); + if ( !v->arch.paging.translate_enabled ) { /* Set v->arch.guest_table to use the p2m map, and choose * the appropriate shadow mode */ @@ -2485,11 +2187,11 @@ static void sh_update_paging_modes(struc #if CONFIG_PAGING_LEVELS == 2 v->arch.guest_table = pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); #elif CONFIG_PAGING_LEVELS == 3 v->arch.guest_table = pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #else /* CONFIG_PAGING_LEVELS == 4 */ { l4_pgentry_t *l4e; @@ -2501,7 +2203,7 @@ static void sh_update_paging_modes(struc pagetable_from_pfn(l4e_get_pfn(l4e[0])); sh_unmap_domain_page(l4e); } - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); #endif /* Fix up refcounts on guest_table */ get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d); @@ -2514,7 +2216,7 @@ static void sh_update_paging_modes(struc if ( hvm_long_mode_enabled(v) ) { // long mode guest... - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4); } else @@ -2523,7 +2225,7 @@ static void sh_update_paging_modes(struc { #if CONFIG_PAGING_LEVELS >= 3 // 32-bit PAE mode guest... - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3); #else SHADOW_ERROR("PAE not supported in 32-bit Xen\n"); @@ -2535,10 +2237,10 @@ static void sh_update_paging_modes(struc { // 32-bit 2 level guest... #if CONFIG_PAGING_LEVELS >= 3 - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2); #else - v->arch.shadow.mode = + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2); #endif } @@ -2546,25 +2248,25 @@ static void sh_update_paging_modes(struc if ( pagetable_is_null(v->arch.monitor_table) ) { - mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); + mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(mmfn); make_cr3(v, mfn_x(mmfn)); hvm_update_host_cr3(v); } - if ( v->arch.shadow.mode != old_mode ) + if ( v->arch.paging.mode != old_mode ) { SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u " "(was g=%u s=%u)\n", d->domain_id, v->vcpu_id, is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1, - v->arch.shadow.mode->guest_levels, - v->arch.shadow.mode->shadow_levels, + v->arch.paging.mode->guest_levels, + v->arch.paging.mode->shadow.shadow_levels, old_mode ? old_mode->guest_levels : 0, - old_mode ? old_mode->shadow_levels : 0); + old_mode ? old_mode->shadow.shadow_levels : 0); if ( old_mode && - (v->arch.shadow.mode->shadow_levels != - old_mode->shadow_levels) ) + (v->arch.paging.mode->shadow.shadow_levels != + old_mode->shadow.shadow_levels) ) { /* Need to make a new monitor table for the new mode */ mfn_t new_mfn, old_mfn; @@ -2584,9 +2286,9 @@ static void sh_update_paging_modes(struc old_mfn = pagetable_get_mfn(v->arch.monitor_table); v->arch.monitor_table = pagetable_null(); - new_mfn = v->arch.shadow.mode->make_monitor_table(v); + new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(new_mfn); - SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", + SHADOW_PRINTK("new monitor table %"PRI_mfn "\n", mfn_x(new_mfn)); /* Don't be running on the old monitor table when we @@ -2596,7 +2298,7 @@ static void sh_update_paging_modes(struc if ( v == current ) write_ptbase(v); hvm_update_host_cr3(v); - old_mode->destroy_monitor_table(v, old_mfn); + old_mode->shadow.destroy_monitor_table(v, old_mfn); } } @@ -2606,7 +2308,7 @@ static void sh_update_paging_modes(struc // This *does* happen, at least for CR4.PGE... } - v->arch.shadow.mode->update_cr3(v, 0); + v->arch.paging.mode->update_cr3(v, 0); } void shadow_update_paging_modes(struct vcpu *v) @@ -2626,9 +2328,7 @@ static void sh_new_mode(struct domain *d ASSERT(shadow_locked_by_me(d)); ASSERT(d != current->domain); - d->arch.shadow.mode = new_mode; - if ( new_mode & SHM2_translate ) - shadow_audit_p2m(d); + d->arch.paging.mode = new_mode; for_each_vcpu(d, v) sh_update_paging_modes(v); } @@ -2642,75 +2342,75 @@ int shadow_enable(struct domain *d, u32 unsigned int old_pages; int rv = 0; - mode |= SHM2_enable; + mode |= PG_SH_enable; domain_pause(d); - shadow_lock(d); /* Sanity check the arguments */ if ( (d == current->domain) || shadow_mode_enabled(d) || - ((mode & SHM2_translate) && !(mode & SHM2_refcounts)) || - ((mode & SHM2_external) && !(mode & SHM2_translate)) ) + ((mode & PG_translate) && !(mode & PG_refcounts)) || + ((mode & PG_external) && !(mode & PG_translate)) ) { rv = -EINVAL; - goto out; - } - - // XXX -- eventually would like to require that all memory be allocated - // *after* shadow_enabled() is called... So here, we would test to make - // sure that d->page_list is empty. -#if 0 - spin_lock(&d->page_alloc_lock); - if ( !list_empty(&d->page_list) ) - { - spin_unlock(&d->page_alloc_lock); - rv = -EINVAL; - goto out; - } - spin_unlock(&d->page_alloc_lock); -#endif + goto out_unlocked; + } /* Init the shadow memory allocation if the user hasn't done so */ - old_pages = d->arch.shadow.total_pages; + old_pages = d->arch.paging.shadow.total_pages; if ( old_pages == 0 ) - if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ + { + unsigned int r; + shadow_lock(d); + r = sh_set_allocation(d, 256, NULL); /* Use at least 1MB */ + shadow_unlock(d); + if ( r != 0 ) { sh_set_allocation(d, 0, NULL); rv = -ENOMEM; - goto out; - } + goto out_unlocked; + } + } + + /* Init the P2M table. Must be done before we take the shadow lock + * to avoid possible deadlock. */ + if ( mode & PG_translate ) + { + rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page); + if (rv != 0) + goto out_unlocked; + } + + shadow_lock(d); + + /* Sanity check again with the lock held */ + if ( shadow_mode_enabled(d) ) + { + rv = -EINVAL; + goto out_locked; + } /* Init the hash table */ if ( shadow_hash_alloc(d) != 0 ) { - sh_set_allocation(d, old_pages, NULL); rv = -ENOMEM; - goto out; - } - - /* Init the P2M table */ - if ( mode & SHM2_translate ) - if ( !shadow_alloc_p2m_table(d) ) - { - shadow_hash_teardown(d); - sh_set_allocation(d, old_pages, NULL); - shadow_p2m_teardown(d); - rv = -ENOMEM; - goto out; - } + goto out_locked; + } #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) /* We assume we're dealing with an older 64bit linux guest until we * see the guest use more than one l4 per vcpu. */ - d->arch.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; + d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; #endif /* Update the bits */ sh_new_mode(d, mode); - shadow_audit_p2m(d); - out: + + out_locked: shadow_unlock(d); + out_unlocked: + if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) ) + p2m_teardown(d); domain_unpause(d); return rv; } @@ -2721,6 +2421,8 @@ void shadow_teardown(struct domain *d) { struct vcpu *v; mfn_t mfn; + struct list_head *entry, *n; + struct page_info *pg; ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); ASSERT(d != current->domain); @@ -2733,48 +2435,55 @@ void shadow_teardown(struct domain *d) /* Release the shadow and monitor tables held by each vcpu */ for_each_vcpu(d, v) { - if ( v->arch.shadow.mode ) + if ( v->arch.paging.mode ) { - v->arch.shadow.mode->detach_old_tables(v); + v->arch.paging.mode->shadow.detach_old_tables(v); if ( shadow_mode_external(d) ) { mfn = pagetable_get_mfn(v->arch.monitor_table); if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) - v->arch.shadow.mode->destroy_monitor_table(v, mfn); + v->arch.paging.mode->shadow.destroy_monitor_table(v, mfn); v->arch.monitor_table = pagetable_null(); } } } } - if ( d->arch.shadow.total_pages != 0 ) + list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) + { + list_del(entry); + pg = list_entry(entry, struct page_info, list); + shadow_free_p2m_page(d, pg); + } + + if ( d->arch.paging.shadow.total_pages != 0 ) { SHADOW_PRINTK("teardown of domain %u starts." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); /* Destroy all the shadows and release memory to domheap */ sh_set_allocation(d, 0, NULL); /* Release the hash table back to xenheap */ - if (d->arch.shadow.hash_table) + if (d->arch.paging.shadow.hash_table) shadow_hash_teardown(d); /* Release the log-dirty bitmap of dirtied pages */ sh_free_log_dirty_bitmap(d); /* Should not have any more memory held */ SHADOW_PRINTK("teardown done." " Shadow pages total = %u, free = %u, p2m=%u\n", - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); - ASSERT(d->arch.shadow.total_pages == 0); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); + ASSERT(d->arch.paging.shadow.total_pages == 0); } /* We leave the "permanent" shadow modes enabled, but clear the * log-dirty mode bit. We don't want any more mark_dirty() * calls now that we've torn down the bitmap */ - d->arch.shadow.mode &= ~SHM2_log_dirty; + d->arch.paging.mode &= ~PG_log_dirty; shadow_unlock(d); } @@ -2782,30 +2491,28 @@ void shadow_final_teardown(struct domain void shadow_final_teardown(struct domain *d) /* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */ { - SHADOW_PRINTK("dom %u final teardown starts." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); /* Double-check that the domain didn't have any shadow memory. * It is possible for a domain that never got domain_kill()ed * to get here with its shadow allocation intact. */ - if ( d->arch.shadow.total_pages != 0 ) + if ( d->arch.paging.shadow.total_pages != 0 ) shadow_teardown(d); /* It is now safe to pull down the p2m map. */ - if ( d->arch.shadow.p2m_pages != 0 ) - shadow_p2m_teardown(d); + p2m_teardown(d); SHADOW_PRINTK("dom %u final teardown done." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); } static int shadow_one_bit_enable(struct domain *d, u32 mode) @@ -2814,12 +2521,14 @@ static int shadow_one_bit_enable(struct ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ - if ( d == current->domain || (d->arch.shadow.mode & mode) ) + if ( d == current->domain || (d->arch.paging.mode & mode) ) { return -EINVAL; } - if ( d->arch.shadow.mode == 0 ) + mode |= PG_SH_enable; + + if ( d->arch.paging.mode == 0 ) { /* Init the shadow memory allocation and the hash table */ if ( sh_set_allocation(d, 1, NULL) != 0 @@ -2831,7 +2540,7 @@ static int shadow_one_bit_enable(struct } /* Update the bits */ - sh_new_mode(d, d->arch.shadow.mode | mode); + sh_new_mode(d, d->arch.paging.mode | mode); return 0; } @@ -2843,26 +2552,26 @@ static int shadow_one_bit_disable(struct ASSERT(shadow_locked_by_me(d)); /* Sanity check the call */ - if ( d == current->domain || !(d->arch.shadow.mode & mode) ) + if ( d == current->domain || !(d->arch.paging.mode & mode) ) { return -EINVAL; } /* Update the bits */ - sh_new_mode(d, d->arch.shadow.mode & ~mode); - if ( d->arch.shadow.mode == 0 ) + sh_new_mode(d, d->arch.paging.mode & ~mode); + if ( d->arch.paging.mode == 0 ) { /* Get this domain off shadows */ SHADOW_PRINTK("un-shadowing of domain %u starts." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); for_each_vcpu(d, v) { - if ( v->arch.shadow.mode ) - v->arch.shadow.mode->detach_old_tables(v); + if ( v->arch.paging.mode ) + v->arch.paging.mode->shadow.detach_old_tables(v); #if CONFIG_PAGING_LEVELS == 4 if ( !(v->arch.flags & TF_kernel_mode) ) make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); @@ -2885,9 +2594,9 @@ static int shadow_one_bit_disable(struct SHADOW_PRINTK("un-shadowing of domain %u done." " Shadow pages total = %u, free = %u, p2m=%u\n", d->domain_id, - d->arch.shadow.total_pages, - d->arch.shadow.free_pages, - d->arch.shadow.p2m_pages); + d->arch.paging.shadow.total_pages, + d->arch.paging.shadow.free_pages, + d->arch.paging.shadow.p2m_pages); } return 0; @@ -2909,7 +2618,7 @@ static int shadow_test_enable(struct dom goto out; } - ret = shadow_one_bit_enable(d, SHM2_enable); + ret = shadow_one_bit_enable(d, PG_SH_enable); out: shadow_unlock(d); domain_unpause(d); @@ -2923,7 +2632,7 @@ static int shadow_test_disable(struct do domain_pause(d); shadow_lock(d); - ret = shadow_one_bit_disable(d, SHM2_enable); + ret = shadow_one_bit_disable(d, PG_SH_enable); shadow_unlock(d); domain_unpause(d); @@ -2933,19 +2642,19 @@ static int static int sh_alloc_log_dirty_bitmap(struct domain *d) { - ASSERT(d->arch.shadow.dirty_bitmap == NULL); - d->arch.shadow.dirty_bitmap_size = + ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL); + d->arch.paging.shadow.dirty_bitmap_size = (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); - d->arch.shadow.dirty_bitmap = + d->arch.paging.shadow.dirty_bitmap = xmalloc_array(unsigned long, - d->arch.shadow.dirty_bitmap_size / BITS_PER_LONG); - if ( d->arch.shadow.dirty_bitmap == NULL ) - { - d->arch.shadow.dirty_bitmap_size = 0; + d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG); + if ( d->arch.paging.shadow.dirty_bitmap == NULL ) + { + d->arch.paging.shadow.dirty_bitmap_size = 0; return -ENOMEM; } - memset(d->arch.shadow.dirty_bitmap, 0, d->arch.shadow.dirty_bitmap_size/8); + memset(d->arch.paging.shadow.dirty_bitmap, 0, d->arch.paging.shadow.dirty_bitmap_size/8); return 0; } @@ -2953,11 +2662,11 @@ static void static void sh_free_log_dirty_bitmap(struct domain *d) { - d->arch.shadow.dirty_bitmap_size = 0; - if ( d->arch.shadow.dirty_bitmap ) - { - xfree(d->arch.shadow.dirty_bitmap); - d->arch.shadow.dirty_bitmap = NULL; + d->arch.paging.shadow.dirty_bitmap_size = 0; + if ( d->arch.paging.shadow.dirty_bitmap ) + { + xfree(d->arch.paging.shadow.dirty_bitmap); + d->arch.paging.shadow.dirty_bitmap = NULL; } } @@ -2989,7 +2698,7 @@ static int shadow_log_dirty_enable(struc goto out; } - ret = shadow_one_bit_enable(d, SHM2_log_dirty); + ret = shadow_one_bit_enable(d, PG_log_dirty); if ( ret != 0 ) sh_free_log_dirty_bitmap(d); @@ -3005,7 +2714,7 @@ static int shadow_log_dirty_disable(stru domain_pause(d); shadow_lock(d); - ret = shadow_one_bit_disable(d, SHM2_log_dirty); + ret = shadow_one_bit_disable(d, PG_log_dirty); if ( !shadow_mode_log_dirty(d) ) sh_free_log_dirty_bitmap(d); shadow_unlock(d); @@ -3017,100 +2726,52 @@ static int shadow_log_dirty_disable(stru /**************************************************************************/ /* P2M map manipulations */ -static void -sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) -{ - struct vcpu *v; - - if ( !shadow_mode_translate(d) ) - return; - - v = current; - if ( v->domain != d ) - v = d->vcpu[0]; - - SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn); - - ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn); - //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn); - - if ( v != NULL ) - { - sh_remove_all_shadows_and_parents(v, _mfn(mfn)); - if ( sh_remove_all_mappings(v, _mfn(mfn)) ) - flush_tlb_mask(d->domain_dirty_cpumask); - } - - shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); -} - +/* shadow specific code which should be called when P2M table entry is updated + * with new content. It is responsible for update the entry, as well as other + * shadow processing jobs. + */ void -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ +shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, + l1_pgentry_t new, unsigned int level) +{ + struct domain *d = v->domain; + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); + mfn_t mfn; + shadow_lock(d); - shadow_audit_p2m(d); - sh_p2m_remove_page(d, gfn, mfn); - shadow_audit_p2m(d); - shadow_unlock(d); -} - -void -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ - unsigned long ogfn; - mfn_t omfn; - - if ( !shadow_mode_translate(d) ) - return; - - shadow_lock(d); - shadow_audit_p2m(d); - - SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn); - - omfn = sh_gfn_to_mfn(d, gfn); - if ( mfn_valid(omfn) ) - { - /* Get rid of the old mapping, especially any shadows */ - struct vcpu *v = current; - if ( v->domain != d ) - v = d->vcpu[0]; - if ( v != NULL ) - { - sh_remove_all_shadows_and_parents(v, omfn); - if ( sh_remove_all_mappings(v, omfn) ) - flush_tlb_mask(d->domain_dirty_cpumask); - } - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); - } - - ogfn = sh_mfn_to_gfn(d, _mfn(mfn)); - if ( -#ifdef __x86_64__ - (ogfn != 0x5555555555555555L) -#else - (ogfn != 0x55555555L) -#endif - && (ogfn != INVALID_M2P_ENTRY) - && (ogfn != gfn) ) - { - /* This machine frame is already mapped at another physical address */ - SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", - mfn, ogfn, gfn); - if ( mfn_valid(omfn = sh_gfn_to_mfn(d, ogfn)) ) - { - SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", - ogfn , mfn_x(omfn)); - if ( mfn_x(omfn) == mfn ) - sh_p2m_remove_page(d, ogfn, mfn); - } - } - - shadow_set_p2m_entry(d, gfn, _mfn(mfn)); - set_gpfn_from_mfn(mfn, gfn); + + /* handle physmap_add and physmap_remove */ + mfn = gfn_to_mfn(d, gfn); + if ( v != NULL && level == 1 && mfn_valid(mfn) ) { + sh_remove_all_shadows_and_parents(v, mfn); + if ( sh_remove_all_mappings(v, mfn) ) + flush_tlb_mask(d->domain_dirty_cpumask); + } + + /* update the entry with new content */ + safe_write_pte(p, new); + + /* The P2M can be shadowed: keep the shadows synced */ + if ( d->vcpu[0] != NULL ) + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p)); + + /* install P2M in monitors for PAE Xen */ +#if CONFIG_PAGING_LEVELS == 3 + if ( level == 3 ) { + struct vcpu *v; + /* We have written to the p2m l3: need to sync the per-vcpu + * copies of it in the monitor tables */ + p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p); + /* Also, any vcpus running on shadows of the p2m need to + * reload their CR3s so the change propagates to the shadow */ + for_each_vcpu(d, v) { + if ( pagetable_get_pfn(v->arch.guest_table) + == pagetable_get_pfn(d->arch.phys_table) + && v->arch.paging.mode != NULL ) + v->arch.paging.mode->update_cr3(v, 0); + } + } +#endif #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) /* If we're doing FAST_FAULT_PATH, then shadow mode may have @@ -3122,7 +2783,6 @@ shadow_guest_physmap_add_page(struct dom shadow_blow_tables(d); #endif - shadow_audit_p2m(d); shadow_unlock(d); } @@ -3151,11 +2811,11 @@ static int shadow_log_dirty_op( SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", (clean) ? "clean" : "peek", d->domain_id, - d->arch.shadow.fault_count, - d->arch.shadow.dirty_count); - - sc->stats.fault_count = d->arch.shadow.fault_count; - sc->stats.dirty_count = d->arch.shadow.dirty_count; + d->arch.paging.shadow.fault_count, + d->arch.paging.shadow.dirty_count); + + sc->stats.fault_count = d->arch.paging.shadow.fault_count; + sc->stats.dirty_count = d->arch.paging.shadow.dirty_count; if ( clean ) { @@ -3164,22 +2824,22 @@ static int shadow_log_dirty_op( * but for now, we just unshadow everything except Xen. */ shadow_blow_tables(d); - d->arch.shadow.fault_count = 0; - d->arch.shadow.dirty_count = 0; + d->arch.paging.shadow.fault_count = 0; + d->arch.paging.shadow.dirty_count = 0; } if ( guest_handle_is_null(sc->dirty_bitmap) ) /* caller may have wanted just to clean the state or access stats. */ peek = 0; - if ( (peek || clean) && (d->arch.shadow.dirty_bitmap == NULL) ) + if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) ) { rv = -EINVAL; /* perhaps should be ENOMEM? */ goto out; } - if ( sc->pages > d->arch.shadow.dirty_bitmap_size ) - sc->pages = d->arch.shadow.dirty_bitmap_size; + if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size ) + sc->pages = d->arch.paging.shadow.dirty_bitmap_size; #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */ for ( i = 0; i < sc->pages; i += CHUNK ) @@ -3192,7 +2852,7 @@ static int shadow_log_dirty_op( { if ( copy_to_guest_offset( sc->dirty_bitmap, i/8, - (uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), bytes) ) + (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) ) { rv = -EFAULT; goto out; @@ -3200,7 +2860,7 @@ static int shadow_log_dirty_op( } if ( clean ) - memset((uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), 0, bytes); + memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes); } #undef CHUNK @@ -3221,7 +2881,7 @@ void sh_mark_dirty(struct domain *d, mfn if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) return; - ASSERT(d->arch.shadow.dirty_bitmap != NULL); + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); /* We /really/ mean PFN here, even for non-translated guests. */ pfn = get_gpfn_from_mfn(mfn_x(gmfn)); @@ -3235,24 +2895,24 @@ void sh_mark_dirty(struct domain *d, mfn return; /* N.B. Can use non-atomic TAS because protected by shadow_lock. */ - if ( likely(pfn < d->arch.shadow.dirty_bitmap_size) ) + if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) ) { - if ( !__test_and_set_bit(pfn, d->arch.shadow.dirty_bitmap) ) + if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) { SHADOW_DEBUG(LOGDIRTY, - "marked mfn %" SH_PRI_mfn " (pfn=%lx), dom %d\n", + "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n", mfn_x(gmfn), pfn, d->domain_id); - d->arch.shadow.dirty_count++; + d->arch.paging.shadow.dirty_count++; } } else { SHADOW_PRINTK("mark_dirty OOR! " - "mfn=%" SH_PRI_mfn " pfn=%lx max=%x (dom %d)\n" + "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n" "owner=%d c=%08x t=%" PRtype_info "\n", mfn_x(gmfn), pfn, - d->arch.shadow.dirty_bitmap_size, + d->arch.paging.shadow.dirty_bitmap_size, d->domain_id, (page_get_owner(mfn_to_page(gmfn)) ? page_get_owner(mfn_to_page(gmfn))->domain_id @@ -3292,7 +2952,7 @@ int shadow_domctl(struct domain *d, return rc; if ( is_hvm_domain(d) ) return -EINVAL; - if ( d->arch.shadow.mode & SHM2_enable ) + if ( d->arch.paging.mode & PG_SH_enable ) if ( (rc = shadow_test_disable(d)) != 0 ) return rc; return 0; @@ -3304,7 +2964,7 @@ int shadow_domctl(struct domain *d, return shadow_log_dirty_enable(d); case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE: - return shadow_enable(d, SHM2_refcounts|SHM2_translate); + return shadow_enable(d, PG_refcounts|PG_translate); case XEN_DOMCTL_SHADOW_OP_CLEAN: case XEN_DOMCTL_SHADOW_OP_PEEK: @@ -3313,7 +2973,7 @@ int shadow_domctl(struct domain *d, case XEN_DOMCTL_SHADOW_OP_ENABLE: if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY ) return shadow_log_dirty_enable(d); - return shadow_enable(d, sc->mode << SHM2_shift); + return shadow_enable(d, sc->mode << PG_mode_shift); case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: sc->mb = shadow_get_allocation(d); @@ -3390,7 +3050,7 @@ void shadow_audit_tables(struct vcpu *v) else { /* Audit only the current mode's tables */ - switch ( v->arch.shadow.mode->guest_levels ) + switch ( v->arch.paging.mode->guest_levels ) { case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break; case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE @@ -3405,199 +3065,6 @@ void shadow_audit_tables(struct vcpu *v) } #endif /* Shadow audit */ - - -/**************************************************************************/ -/* Auditing p2m tables */ - -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M - -void shadow_audit_p2m(struct domain *d) -{ - struct list_head *entry; - struct page_info *page; - struct domain *od; - unsigned long mfn, gfn, m2pfn, lp2mfn = 0; - mfn_t p2mfn; - unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; - int test_linear; - - if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) ) - return; - - //SHADOW_PRINTK("p2m audit starts\n"); - - test_linear = ( (d == current->domain) - && !pagetable_is_null(current->arch.monitor_table) ); - if ( test_linear ) - local_flush_tlb(); - - /* Audit part one: walk the domain's page allocation list, checking - * the m2p entries. */ - for ( entry = d->page_list.next; - entry != &d->page_list; - entry = entry->next ) - { - page = list_entry(entry, struct page_info, list); - mfn = mfn_x(page_to_mfn(page)); - - // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); - - od = page_get_owner(page); - - if ( od != d ) - { - SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", - mfn, od, (od?od->domain_id:-1), d, d->domain_id); - continue; - } - - gfn = get_gpfn_from_mfn(mfn); - if ( gfn == INVALID_M2P_ENTRY ) - { - orphans_i++; - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", - // mfn); - continue; - } - - if ( gfn == 0x55555555 ) - { - orphans_d++; - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", - // mfn); - continue; - } - - p2mfn = sh_gfn_to_mfn_foreign(d, gfn); - if ( mfn_x(p2mfn) != mfn ) - { - mpbad++; - SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" - " (-> gfn %#lx)\n", - mfn, gfn, mfn_x(p2mfn), - (mfn_valid(p2mfn) - ? get_gpfn_from_mfn(mfn_x(p2mfn)) - : -1u)); - /* This m2p entry is stale: the domain has another frame in - * this physical slot. No great disaster, but for neatness, - * blow away the m2p entry. */ - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); - } - - if ( test_linear && (gfn <= d->arch.max_mapped_pfn) ) - { - lp2mfn = gfn_to_mfn_current(gfn); - if ( mfn_x(lp2mfn) != mfn_x(p2mfn) ) - { - SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " - "(!= mfn %#lx)\n", gfn, - mfn_x(lp2mfn), mfn_x(p2mfn)); - } - } - - // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", - // mfn, gfn, p2mfn, lp2mfn); - } - - /* Audit part two: walk the domain's p2m table, checking the entries. */ - if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) - { - l2_pgentry_t *l2e; - l1_pgentry_t *l1e; - int i1, i2; - -#if CONFIG_PAGING_LEVELS == 4 - l4_pgentry_t *l4e; - l3_pgentry_t *l3e; - int i3, i4; - l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#elif CONFIG_PAGING_LEVELS == 3 - l3_pgentry_t *l3e; - int i3; - l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#else /* CONFIG_PAGING_LEVELS == 2 */ - l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); -#endif - - gfn = 0; -#if CONFIG_PAGING_LEVELS >= 3 -#if CONFIG_PAGING_LEVELS >= 4 - for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) - { - if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4]))); -#endif /* now at levels 3 or 4... */ - for ( i3 = 0; - i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); - i3++ ) - { - if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3]))); -#endif /* all levels... */ - for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) - { - if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) - { - gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); - continue; - } - l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2]))); - - for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) - { - if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) - continue; - mfn = l1e_get_pfn(l1e[i1]); - ASSERT(mfn_valid(_mfn(mfn))); - m2pfn = get_gpfn_from_mfn(mfn); - if ( m2pfn != gfn ) - { - pmbad++; - SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx" - " -> gfn %#lx\n", gfn, mfn, m2pfn); - BUG(); - } - } - sh_unmap_domain_page(l1e); - } -#if CONFIG_PAGING_LEVELS >= 3 - sh_unmap_domain_page(l2e); - } -#if CONFIG_PAGING_LEVELS >= 4 - sh_unmap_domain_page(l3e); - } -#endif -#endif - -#if CONFIG_PAGING_LEVELS == 4 - sh_unmap_domain_page(l4e); -#elif CONFIG_PAGING_LEVELS == 3 - sh_unmap_domain_page(l3e); -#else /* CONFIG_PAGING_LEVELS == 2 */ - sh_unmap_domain_page(l2e); -#endif - - } - - //SHADOW_PRINTK("p2m audit complete\n"); - //if ( orphans_i | orphans_d | mpbad | pmbad ) - // SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", - // orphans_i + orphans_d, orphans_i, orphans_d, - if ( mpbad | pmbad ) - SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", - pmbad, mpbad); -} - -#endif /* p2m audit */ /* * Local variables: diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Feb 15 14:09:39 2007 -0700 @@ -237,7 +237,8 @@ guest_walk_tables(struct vcpu *v, unsign #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ /* Get l4e from the top level table */ gw->l4mfn = pagetable_get_mfn(v->arch.guest_table); - gw->l4e = (guest_l4e_t *)v->arch.guest_vtable + guest_l4_table_offset(va); + gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable + + guest_l4_table_offset(va); /* Walk down to the l3e */ if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0; gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e)); @@ -248,9 +249,8 @@ guest_walk_tables(struct vcpu *v, unsign gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn)) + guest_l3_table_offset(va); #else /* PAE only... */ - /* Get l3e from the top level table */ - gw->l3mfn = pagetable_get_mfn(v->arch.guest_table); - gw->l3e = (guest_l3e_t *)v->arch.guest_vtable + guest_l3_table_offset(va); + /* Get l3e from the cache of the guest's top level table */ + gw->l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)]; #endif /* PAE or 64... */ /* Walk down to the l2e */ if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0; @@ -264,7 +264,8 @@ guest_walk_tables(struct vcpu *v, unsign #else /* 32-bit only... */ /* Get l2e from the top level table */ gw->l2mfn = pagetable_get_mfn(v->arch.guest_table); - gw->l2e = (guest_l2e_t *)v->arch.guest_vtable + guest_l2_table_offset(va); + gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable + + guest_l2_table_offset(va); #endif /* All levels... */ if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0; @@ -353,21 +354,21 @@ static inline void print_gw(walk_t *gw) SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va); #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ - SHADOW_PRINTK(" l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn)); + SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn)); SHADOW_PRINTK(" l4e=%p\n", gw->l4e); if ( gw->l4e ) SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4); + SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn)); #endif /* PAE or 64... */ - SHADOW_PRINTK(" l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn)); SHADOW_PRINTK(" l3e=%p\n", gw->l3e); if ( gw->l3e ) SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3); #endif /* All levels... */ - SHADOW_PRINTK(" l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn)); + SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn)); SHADOW_PRINTK(" l2e=%p\n", gw->l2e); if ( gw->l2e ) SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2); - SHADOW_PRINTK(" l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn)); + SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn)); SHADOW_PRINTK(" l1e=%p\n", gw->l1e); if ( gw->l1e ) SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1); @@ -1572,7 +1573,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf #if GUEST_PAGING_LEVELS == 4 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) if ( shadow_type == SH_type_l4_64_shadow && - unlikely(v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) + unlikely(v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) { /* We're shadowing a new l4, but we've been assuming the guest uses * only one l4 per vcpu and context switches using an l4 entry. @@ -1584,7 +1585,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf struct shadow_page_info *sp; struct vcpu *v2; int l4count = 0, vcpus = 0; - list_for_each(l, &v->domain->arch.shadow.pinned_shadows) + list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); if ( sp->type == SH_type_l4_64_shadow ) @@ -1595,13 +1596,13 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf if ( l4count > 2 * vcpus ) { /* Unpin all the pinned l3 tables, and don't pin any more. */ - list_for_each_safe(l, t, &v->domain->arch.shadow.pinned_shadows) + list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows) { sp = list_entry(l, struct shadow_page_info, list); if ( sp->type == SH_type_l3_64_shadow ) sh_unpin(v, shadow_page_to_mfn(sp)); } - v->domain->arch.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; + v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; } } #endif @@ -1641,7 +1642,7 @@ make_fl1_shadow(struct vcpu *v, gfn_t gf mfn_t smfn = shadow_alloc(v->domain, SH_type_fl1_shadow, (unsigned long) gfn_x(gfn)); - SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n", + SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n", gfn_x(gfn), mfn_x(smfn)); set_fl1_shadow_status(v, gfn, smfn); @@ -1851,7 +1852,7 @@ static shadow_l2e_t * shadow_get_and_cre #elif GUEST_PAGING_LEVELS == 3 /* PAE... */ /* We never demand-shadow PAE l3es: they are only created in * sh_update_cr3(). Check if the relevant sl3e is present. */ - shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) + shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table) + shadow_l3_linear_offset(gw->va); if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) return NULL; @@ -2358,7 +2359,7 @@ static int validate_gl1e(struct vcpu *v, gfn = guest_l1e_get_gfn(*new_gl1e); gmfn = vcpu_gfn_to_mfn(v, gfn); - mmio = (is_hvm_vcpu(v) && shadow_vcpu_mode_translate(v) && !mfn_valid(gmfn)); + mmio = (is_hvm_vcpu(v) && paging_vcpu_mode_translate(v) && !mfn_valid(gmfn)); l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, ft_prefetch, mmio); @@ -2506,7 +2507,7 @@ static inline void check_for_early_unsha static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn) { #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW - if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) && + if ( v->arch.paging.shadow.last_emulated_mfn == mfn_x(gmfn) && sh_mfn_is_a_page_table(gmfn) ) { u32 flags = mfn_to_page(gmfn)->shadow_flags; @@ -2516,7 +2517,7 @@ static inline void check_for_early_unsha sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ ); } } - v->arch.shadow.last_emulated_mfn = mfn_x(gmfn); + v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn); #endif } @@ -2524,7 +2525,7 @@ static inline void reset_early_unshadow( static inline void reset_early_unshadow(struct vcpu *v) { #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW - v->arch.shadow.last_emulated_mfn = INVALID_MFN; + v->arch.paging.shadow.last_emulated_mfn = INVALID_MFN; #endif } @@ -2589,7 +2590,7 @@ static void sh_prefetch(struct vcpu *v, gfn = guest_l1e_get_gfn(gl1e); gmfn = vcpu_gfn_to_mfn(v, gfn); mmio = ( is_hvm_vcpu(v) - && shadow_vcpu_mode_translate(v) + && paging_vcpu_mode_translate(v) && mmio_space(gfn_to_paddr(gfn)) ); /* Propagate the entry. Safe to use a pointer to our local @@ -2631,6 +2632,7 @@ static int sh_page_fault(struct vcpu *v, SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", v->domain->domain_id, v->vcpu_id, va, regs->error_code); + perfc_incrc(shadow_fault); // // XXX: Need to think about eventually mapping superpages directly in the // shadow (when possible), as opposed to splintering them into a @@ -2651,7 +2653,7 @@ static int sh_page_fault(struct vcpu *v, if ( sh_l1e_is_gnp(sl1e) ) { if ( likely(!is_hvm_domain(d) || - shadow_vcpu_mode_translate(v)) ) + paging_vcpu_mode_translate(v)) ) { /* Not-present in a guest PT: pass to the guest as * a not-present fault (by flipping two bits). */ @@ -2701,7 +2703,7 @@ static int sh_page_fault(struct vcpu *v, if ( unlikely(shadow_locked_by_me(d)) ) { SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n", - d->arch.shadow.locker_function); + d->arch.paging.shadow.locker_function); return 0; } @@ -2726,7 +2728,7 @@ static int sh_page_fault(struct vcpu *v, // if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) ) { - if ( is_hvm_domain(d) && !shadow_vcpu_mode_translate(v) ) + if ( is_hvm_domain(d) && !paging_vcpu_mode_translate(v) ) { /* Not present in p2m map, means this is mmio */ gpa = va; @@ -2784,13 +2786,13 @@ static int sh_page_fault(struct vcpu *v, gfn = guest_l1e_get_gfn(gw.eff_l1e); gmfn = vcpu_gfn_to_mfn(v, gfn); mmio = (is_hvm_domain(d) - && shadow_vcpu_mode_translate(v) + && paging_vcpu_mode_translate(v) && mmio_space(gfn_to_paddr(gfn))); if ( !mmio && !mfn_valid(gmfn) ) { perfc_incrc(shadow_fault_bail_bad_gfn); - SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", + SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", gfn_x(gfn), mfn_x(gmfn)); goto not_a_shadow_fault; } @@ -2848,7 +2850,7 @@ static int sh_page_fault(struct vcpu *v, } perfc_incrc(shadow_fault_fixed); - d->arch.shadow.fault_count++; + d->arch.paging.shadow.fault_count++; reset_early_unshadow(v); done: @@ -2949,7 +2951,7 @@ sh_invlpg(struct vcpu *v, unsigned long return 0; } #elif SHADOW_PAGING_LEVELS == 3 - if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)]) + if ( !(l3e_get_flags(v->arch.paging.shadow.l3table[shadow_l3_linear_offset(va)]) & _PAGE_PRESENT) ) // no need to flush anything if there's no SL2... return 0; @@ -3120,7 +3122,7 @@ sh_update_linear_entries(struct vcpu *v) } /* Shadow l3 tables are made up by sh_update_cr3 */ - sl3e = v->arch.shadow.l3table; + sl3e = v->arch.paging.shadow.l3table; for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) { @@ -3161,15 +3163,14 @@ sh_update_linear_entries(struct vcpu *v) #if GUEST_PAGING_LEVELS == 2 /* Shadow l3 tables were built by sh_update_cr3 */ if ( shadow_mode_external(d) ) - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; else BUG(); /* PV 2-on-3 is not supported yet */ #else /* GUEST_PAGING_LEVELS == 3 */ - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; - /* Always safe to use guest_vtable, because it's globally mapped */ - guest_l3e = v->arch.guest_vtable; + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; + guest_l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e; #endif /* GUEST_PAGING_LEVELS */ @@ -3267,38 +3268,36 @@ sh_update_linear_entries(struct vcpu *v) } -/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[]. +/* Removes vcpu->arch.paging.shadow.guest_vtable and vcpu->arch.shadow_table[]. * Does all appropriate management/bookkeeping/refcounting/etc... */ static void sh_detach_old_tables(struct vcpu *v) { - struct domain *d = v->domain; mfn_t smfn; int i = 0; //// - //// vcpu->arch.guest_vtable + //// vcpu->arch.paging.shadow.guest_vtable //// - if ( v->arch.guest_vtable ) - { -#if GUEST_PAGING_LEVELS == 4 + +#if GUEST_PAGING_LEVELS == 3 + /* PAE guests don't have a mapping of the guest top-level table */ + ASSERT(v->arch.paging.shadow.guest_vtable == NULL); +#else + if ( v->arch.paging.shadow.guest_vtable ) + { + struct domain *d = v->domain; if ( shadow_mode_external(d) || shadow_mode_translate(d) ) - sh_unmap_domain_page_global(v->arch.guest_vtable); -#elif GUEST_PAGING_LEVELS == 3 - if ( 1 || shadow_mode_external(d) || shadow_mode_translate(d) ) - sh_unmap_domain_page_global(v->arch.guest_vtable); -#elif GUEST_PAGING_LEVELS == 2 - if ( shadow_mode_external(d) || shadow_mode_translate(d) ) - sh_unmap_domain_page_global(v->arch.guest_vtable); -#endif - v->arch.guest_vtable = NULL; - } + sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); + v->arch.paging.shadow.guest_vtable = NULL; + } +#endif + //// //// vcpu->arch.shadow_table[] //// - #if GUEST_PAGING_LEVELS == 3 /* PAE guests have four shadow_table entries */ @@ -3370,7 +3369,7 @@ sh_set_toplevel_shadow(struct vcpu *v, install_new_entry: /* Done. Install it */ - SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n", + SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot, mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry))); v->arch.shadow_table[slot] = new_entry; @@ -3397,7 +3396,9 @@ sh_update_cr3(struct vcpu *v, int do_loc struct domain *d = v->domain; mfn_t gmfn; #if GUEST_PAGING_LEVELS == 3 + guest_l3e_t *gl3e; u32 guest_idx=0; + int i; #endif /* Don't do anything on an uninitialised vcpu */ @@ -3410,7 +3411,7 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( do_locking ) shadow_lock(v->domain); ASSERT(shadow_locked_by_me(v->domain)); - ASSERT(v->arch.shadow.mode); + ASSERT(v->arch.paging.mode); //// //// vcpu->arch.guest_table is already set @@ -3425,7 +3426,7 @@ sh_update_cr3(struct vcpu *v, int do_loc ASSERT(shadow_mode_external(d)); // Is paging enabled on this vcpu? - if ( shadow_vcpu_mode_translate(v) ) + if ( paging_vcpu_mode_translate(v) ) { gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3))); gmfn = vcpu_gfn_to_mfn(v, gfn); @@ -3456,55 +3457,54 @@ sh_update_cr3(struct vcpu *v, int do_loc //// - //// vcpu->arch.guest_vtable + //// vcpu->arch.paging.shadow.guest_vtable //// #if GUEST_PAGING_LEVELS == 4 if ( shadow_mode_external(d) || shadow_mode_translate(d) ) { - if ( v->arch.guest_vtable ) - sh_unmap_domain_page_global(v->arch.guest_vtable); - v->arch.guest_vtable = sh_map_domain_page_global(gmfn); + if ( v->arch.paging.shadow.guest_vtable ) + sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); + v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn); } else - v->arch.guest_vtable = __linear_l4_table; + v->arch.paging.shadow.guest_vtable = __linear_l4_table; #elif GUEST_PAGING_LEVELS == 3 - if ( v->arch.guest_vtable ) - sh_unmap_domain_page_global(v->arch.guest_vtable); - if ( shadow_mode_external(d) ) - { - if ( shadow_vcpu_mode_translate(v) ) - /* Paging enabled: find where in the page the l3 table is */ - guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); - else - /* Paging disabled: l3 is at the start of a page (in the p2m) */ - guest_idx = 0; - - // Ignore the low 2 bits of guest_idx -- they are really just - // cache control. - guest_idx &= ~3; - - // XXX - why does this need a global map? - v->arch.guest_vtable = - (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx; - } + /* On PAE guests we don't use a mapping of the guest's own top-level + * table. We cache the current state of that table and shadow that, + * until the next CR3 write makes us refresh our cache. */ + ASSERT(v->arch.paging.shadow.guest_vtable == NULL); + + if ( shadow_mode_external(d) && paging_vcpu_mode_translate(v) ) + /* Paging enabled: find where in the page the l3 table is */ + guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); else - v->arch.guest_vtable = sh_map_domain_page_global(gmfn); + /* Paging disabled or PV: l3 is at the start of a page */ + guest_idx = 0; + + // Ignore the low 2 bits of guest_idx -- they are really just + // cache control. + guest_idx &= ~3; + + gl3e = ((guest_l3e_t *)sh_map_domain_page(gmfn)) + guest_idx; + for ( i = 0; i < 4 ; i++ ) + v->arch.paging.shadow.gl3e[i] = gl3e[i]; + sh_unmap_domain_page(gl3e); #elif GUEST_PAGING_LEVELS == 2 if ( shadow_mode_external(d) || shadow_mode_translate(d) ) { - if ( v->arch.guest_vtable ) - sh_unmap_domain_page_global(v->arch.guest_vtable); - v->arch.guest_vtable = sh_map_domain_page_global(gmfn); + if ( v->arch.paging.shadow.guest_vtable ) + sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); + v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn); } else - v->arch.guest_vtable = __linear_l2_table; + v->arch.paging.shadow.guest_vtable = __linear_l2_table; #else #error this should never happen #endif #if 0 - printk("%s %s %d gmfn=%05lx guest_vtable=%p\n", - __func__, __FILE__, __LINE__, gmfn, v->arch.guest_vtable); + printk("%s %s %d gmfn=%05lx shadow.guest_vtable=%p\n", + __func__, __FILE__, __LINE__, gmfn, v->arch.paging.shadow.guest_vtable); #endif //// @@ -3522,10 +3522,10 @@ sh_update_cr3(struct vcpu *v, int do_loc /* PAE guests have four shadow_table entries, based on the * current values of the guest's four l3es. */ { - int i, flush = 0; + int flush = 0; gfn_t gl2gfn; mfn_t gl2mfn; - guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable; + guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e; /* First, make all four entries read-only. */ for ( i = 0; i < 4; i++ ) { @@ -3566,7 +3566,7 @@ sh_update_cr3(struct vcpu *v, int do_loc #endif /// - /// v->arch.shadow.l3table + /// v->arch.paging.shadow.l3table /// #if SHADOW_PAGING_LEVELS == 3 { @@ -3581,7 +3581,7 @@ sh_update_cr3(struct vcpu *v, int do_loc /* 3-on-3: make a PAE l3 that points at the four l2 pages */ smfn = pagetable_get_mfn(v->arch.shadow_table[i]); #endif - v->arch.shadow.l3table[i] = + v->arch.paging.shadow.l3table[i] = (mfn_x(smfn) == 0) ? shadow_l3e_empty() : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT); @@ -3605,8 +3605,8 @@ sh_update_cr3(struct vcpu *v, int do_loc /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated. * Don't use make_cr3 because (a) we know it's below 4GB, and * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */ - ASSERT(virt_to_maddr(&v->arch.shadow.l3table) <= 0xffffffe0ULL); - v->arch.cr3 = virt_to_maddr(&v->arch.shadow.l3table); + ASSERT(virt_to_maddr(&v->arch.paging.shadow.l3table) <= 0xffffffe0ULL); + v->arch.cr3 = virt_to_maddr(&v->arch.paging.shadow.l3table); #else /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0])); @@ -3622,7 +3622,7 @@ sh_update_cr3(struct vcpu *v, int do_loc ASSERT(is_hvm_domain(d)); #if SHADOW_PAGING_LEVELS == 3 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */ - hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.shadow.l3table)); + hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.paging.shadow.l3table)); #else /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.shadow_table[0])); @@ -3665,7 +3665,7 @@ static int sh_guess_wrmap(struct vcpu *v if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) return 0; #elif SHADOW_PAGING_LEVELS == 3 - sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) + sl3p = ((shadow_l3e_t *) v->arch.paging.shadow.l3table) + shadow_l3_linear_offset(vaddr); if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) return 0; @@ -3709,7 +3709,7 @@ int sh_rm_write_access_from_l1(struct vc (void) shadow_set_l1e(v, sl1e, ro_sl1e, sl1mfn); #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC /* Remember the last shadow that we shot a writeable mapping in */ - v->arch.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); + v->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); #endif if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info & PGT_count_mask) == 0 ) @@ -4050,8 +4050,8 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, #define AUDIT_FAIL(_level, _fmt, _a...) do { \ printk("Shadow %u-on-%u audit failed at level %i, index %i\n" \ - "gl" #_level "mfn = %" SH_PRI_mfn \ - " sl" #_level "mfn = %" SH_PRI_mfn \ + "gl" #_level "mfn = %" PRI_mfn \ + " sl" #_level "mfn = %" PRI_mfn \ " &gl" #_level "e = %p &sl" #_level "e = %p" \ " gl" #_level "e = %" SH_PRI_gpte \ " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n", \ @@ -4105,7 +4105,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g != PGT_writable_page ) return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ else - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); + return gfn_to_mfn(v->domain, gfn_x(gfn)); } @@ -4156,7 +4156,7 @@ int sh_audit_l1_table(struct vcpu *v, mf gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); } } @@ -4219,8 +4219,8 @@ int sh_audit_l2_table(struct vcpu *v, mf SH_type_l1_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn - " (--> %" SH_PRI_mfn ")" - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " (--> %" PRI_mfn ")" + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)), @@ -4262,7 +4262,7 @@ int sh_audit_l3_table(struct vcpu *v, mf : SH_type_l2_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); } }); @@ -4297,7 +4297,7 @@ int sh_audit_l4_table(struct vcpu *v, mf SH_type_l3_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, + " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); } }); @@ -4314,30 +4314,29 @@ int sh_audit_l4_table(struct vcpu *v, mf /**************************************************************************/ /* Entry points into this mode of the shadow code. * This will all be mangled by the preprocessor to uniquify everything. */ -struct shadow_paging_mode sh_paging_mode = { - .page_fault = sh_page_fault, - .invlpg = sh_invlpg, - .gva_to_gpa = sh_gva_to_gpa, - .gva_to_gfn = sh_gva_to_gfn, - .update_cr3 = sh_update_cr3, - .map_and_validate_gl1e = sh_map_and_validate_gl1e, - .map_and_validate_gl2e = sh_map_and_validate_gl2e, - .map_and_validate_gl2he = sh_map_and_validate_gl2he, - .map_and_validate_gl3e = sh_map_and_validate_gl3e, - .map_and_validate_gl4e = sh_map_and_validate_gl4e, - .detach_old_tables = sh_detach_old_tables, - .x86_emulate_write = sh_x86_emulate_write, - .x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, - .x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, - .make_monitor_table = sh_make_monitor_table, - .destroy_monitor_table = sh_destroy_monitor_table, - .guest_map_l1e = sh_guest_map_l1e, - .guest_get_eff_l1e = sh_guest_get_eff_l1e, +struct paging_mode sh_paging_mode = { + .page_fault = sh_page_fault, + .invlpg = sh_invlpg, + .gva_to_gpa = sh_gva_to_gpa, + .gva_to_gfn = sh_gva_to_gfn, + .update_cr3 = sh_update_cr3, + .update_paging_modes = shadow_update_paging_modes, + .write_p2m_entry = shadow_write_p2m_entry, + .write_guest_entry = shadow_write_guest_entry, + .cmpxchg_guest_entry = shadow_cmpxchg_guest_entry, + .guest_map_l1e = sh_guest_map_l1e, + .guest_get_eff_l1e = sh_guest_get_eff_l1e, + .guest_levels = GUEST_PAGING_LEVELS, + .shadow.detach_old_tables = sh_detach_old_tables, + .shadow.x86_emulate_write = sh_x86_emulate_write, + .shadow.x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, + .shadow.x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, + .shadow.make_monitor_table = sh_make_monitor_table, + .shadow.destroy_monitor_table = sh_destroy_monitor_table, #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC - .guess_wrmap = sh_guess_wrmap, -#endif - .guest_levels = GUEST_PAGING_LEVELS, - .shadow_levels = SHADOW_PAGING_LEVELS, + .shadow.guess_wrmap = sh_guess_wrmap, +#endif + .shadow.shadow_levels = SHADOW_PAGING_LEVELS, }; /* diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/shadow/multi.h --- a/xen/arch/x86/mm/shadow/multi.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm/shadow/multi.h Thu Feb 15 14:09:39 2007 -0700 @@ -115,5 +115,5 @@ SHADOW_INTERNAL_NAME(sh_destroy_monitor_ (struct vcpu *v, mfn_t mmfn); #endif -extern struct shadow_paging_mode +extern struct paging_mode SHADOW_INTERNAL_NAME(sh_paging_mode, SHADOW_LEVELS, GUEST_LEVELS); diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/shadow/page-guest32.h --- a/xen/arch/x86/mm/shadow/page-guest32.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm/shadow/page-guest32.h Thu Feb 15 14:09:39 2007 -0700 @@ -87,11 +87,6 @@ static inline l2_pgentry_32_t l2e_from_p #define l2_table_offset_32(a) \ (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) -#define linear_l1_table_32 \ - ((l1_pgentry_32_t *)(LINEAR_PT_VIRT_START)) - -#define linear_pg_table_32 linear_l1_table_32 - #endif /* __X86_PAGE_GUEST_H__ */ /* diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/shadow/private.h --- a/xen/arch/x86/mm/shadow/private.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm/shadow/private.h Thu Feb 15 14:09:39 2007 -0700 @@ -41,13 +41,12 @@ #define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ #define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ #define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ #ifdef NDEBUG #define SHADOW_AUDIT 0 #define SHADOW_AUDIT_ENABLE 0 #else -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ +#define SHADOW_AUDIT 0x15 /* Basic audit of all */ #define SHADOW_AUDIT_ENABLE shadow_audit_enable extern int shadow_audit_enable; #endif @@ -84,9 +83,9 @@ extern int shadow_audit_enable; #define SHADOW_DEBUG_PROPAGATE 1 #define SHADOW_DEBUG_MAKE_SHADOW 1 #define SHADOW_DEBUG_DESTROY_SHADOW 1 -#define SHADOW_DEBUG_P2M 0 #define SHADOW_DEBUG_A_AND_D 1 #define SHADOW_DEBUG_EMULATE 1 +#define SHADOW_DEBUG_P2M 1 #define SHADOW_DEBUG_LOGDIRTY 0 /****************************************************************************** @@ -108,36 +107,36 @@ extern int shadow_audit_enable; #error shadow.h currently requires CONFIG_SMP #endif -#define shadow_lock_init(_d) \ - do { \ - spin_lock_init(&(_d)->arch.shadow.lock); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ +#define shadow_lock_init(_d) \ + do { \ + spin_lock_init(&(_d)->arch.paging.shadow.lock); \ + (_d)->arch.paging.shadow.locker = -1; \ + (_d)->arch.paging.shadow.locker_function = "nobody"; \ } while (0) #define shadow_locked_by_me(_d) \ - (current->processor == (_d)->arch.shadow.locker) - -#define shadow_lock(_d) \ - do { \ - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ - { \ - printk("Error: shadow lock held by %s\n", \ - (_d)->arch.shadow.locker_function); \ - BUG(); \ - } \ - spin_lock(&(_d)->arch.shadow.lock); \ - ASSERT((_d)->arch.shadow.locker == -1); \ - (_d)->arch.shadow.locker = current->processor; \ - (_d)->arch.shadow.locker_function = __func__; \ + (current->processor == (_d)->arch.paging.shadow.locker) + +#define shadow_lock(_d) \ + do { \ + if ( unlikely((_d)->arch.paging.shadow.locker == current->processor) )\ + { \ + printk("Error: shadow lock held by %s\n", \ + (_d)->arch.paging.shadow.locker_function); \ + BUG(); \ + } \ + spin_lock(&(_d)->arch.paging.shadow.lock); \ + ASSERT((_d)->arch.paging.shadow.locker == -1); \ + (_d)->arch.paging.shadow.locker = current->processor; \ + (_d)->arch.paging.shadow.locker_function = __func__; \ } while (0) -#define shadow_unlock(_d) \ - do { \ - ASSERT((_d)->arch.shadow.locker == current->processor); \ - (_d)->arch.shadow.locker = -1; \ - (_d)->arch.shadow.locker_function = "nobody"; \ - spin_unlock(&(_d)->arch.shadow.lock); \ +#define shadow_unlock(_d) \ + do { \ + ASSERT((_d)->arch.paging.shadow.locker == current->processor); \ + (_d)->arch.paging.shadow.locker = -1; \ + (_d)->arch.paging.shadow.locker_function = "nobody"; \ + spin_unlock(&(_d)->arch.paging.shadow.lock); \ } while (0) @@ -151,13 +150,6 @@ extern void shadow_audit_tables(struct v #else #define shadow_audit_tables(_v) do {} while(0) #endif - -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M -extern void shadow_audit_p2m(struct domain *d); -#else -#define shadow_audit_p2m(_d) do {} while(0) -#endif - /****************************************************************************** * Macro for dealing with the naming of the internal names of the @@ -304,7 +296,7 @@ static inline int sh_type_is_pinnable(st * page. When we're shadowing those kernels, we have to pin l3 * shadows so they don't just evaporate on every context switch. * For all other guests, we'd rather use the up-pointer field in l3s. */ - if ( unlikely((v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) + if ( unlikely((v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) && CONFIG_PAGING_LEVELS >= 4 && t == SH_type_l3_64_shadow) ) return 1; @@ -379,12 +371,11 @@ void sh_install_xen_entries_in_l2(struct void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); /* Update the shadows in response to a pagetable write from Xen */ -extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); +int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size); /* Update the shadows in response to a pagetable write from a HVM guest */ -extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, - void *entry, u32 size); +void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, + void *entry, u32 size); /* Remove all writeable mappings of a guest frame from the shadows. * Returns non-zero if we need to flush TLBs. @@ -394,6 +385,21 @@ extern int sh_remove_write_access(struct unsigned int level, unsigned long fault_addr); +/* Allocate/free functions for passing to the P2M code. */ +struct page_info *shadow_alloc_p2m_page(struct domain *d); +void shadow_free_p2m_page(struct domain *d, struct page_info *pg); + +/* Functions that atomically write PT/P2M entries and update state */ +void shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, + l1_pgentry_t *p, l1_pgentry_t new, + unsigned int level); +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t new, mfn_t gmfn); +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, + intpte_t *old, intpte_t new, mfn_t gmfn); + + + /****************************************************************************** * Flags used in the return value of the shadow_set_lXe() functions... */ @@ -477,19 +483,6 @@ sh_unmap_domain_page_global(void *p) unmap_domain_page_global(p); } -static inline mfn_t -pagetable_get_mfn(pagetable_t pt) -{ - return _mfn(pagetable_get_pfn(pt)); -} - -static inline pagetable_t -pagetable_from_mfn(mfn_t mfn) -{ - return pagetable_from_pfn(mfn_x(mfn)); -} - - /****************************************************************************** * Log-dirty mode bitmap handling */ @@ -502,13 +495,13 @@ sh_mfn_is_dirty(struct domain *d, mfn_t { unsigned long pfn; ASSERT(shadow_mode_log_dirty(d)); - ASSERT(d->arch.shadow.dirty_bitmap != NULL); + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); /* We /really/ mean PFN here, even for non-translated guests. */ pfn = get_gpfn_from_mfn(mfn_x(gmfn)); if ( likely(VALID_M2P(pfn)) - && likely(pfn < d->arch.shadow.dirty_bitmap_size) - && test_bit(pfn, d->arch.shadow.dirty_bitmap) ) + && likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) + && test_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) return 1; return 0; @@ -612,7 +605,7 @@ static inline int sh_pin(struct vcpu *v, sp->pinned = 1; } /* Put it at the head of the list of pinned shadows */ - list_add(&sp->list, &v->domain->arch.shadow.pinned_shadows); + list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows); return 1; } diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/mm/shadow/types.h Thu Feb 15 14:09:39 2007 -0700 @@ -414,15 +414,9 @@ static inline mfn_t static inline mfn_t vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn) { - if ( !shadow_vcpu_mode_translate(v) ) + if ( !paging_vcpu_mode_translate(v) ) return _mfn(gfn_x(gfn)); - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); -} - -static inline gfn_t -mfn_to_gfn(struct domain *d, mfn_t mfn) -{ - return _gfn(sh_mfn_to_gfn(d, mfn)); + return gfn_to_mfn(v->domain, gfn_x(gfn)); } static inline paddr_t @@ -453,10 +447,8 @@ struct shadow_walk_t guest_l2e_t *l2e; /* Pointer to guest's level 2 entry */ guest_l1e_t *l1e; /* Pointer to guest's level 1 entry */ guest_l1e_t eff_l1e; /* Effective level 1 entry */ -#if GUEST_PAGING_LEVELS >= 3 #if GUEST_PAGING_LEVELS >= 4 mfn_t l4mfn; /* MFN that the level 4 entry is in */ -#endif mfn_t l3mfn; /* MFN that the level 3 entry is in */ #endif mfn_t l2mfn; /* MFN that the level 2 entry is in */ diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/setup.c Thu Feb 15 14:09:39 2007 -0700 @@ -29,7 +29,7 @@ #include <asm/mpspec.h> #include <asm/apic.h> #include <asm/desc.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/e820.h> #include <acm/acm_hooks.h> #include <xen/kexec.h> diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/sysctl.c --- a/xen/arch/x86/sysctl.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/sysctl.c Thu Feb 15 14:09:39 2007 -0700 @@ -19,7 +19,6 @@ #include <xen/trace.h> #include <xen/console.h> #include <xen/iocap.h> -#include <asm/shadow.h> #include <asm/irq.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/traps.c Thu Feb 15 14:09:39 2007 -0700 @@ -46,7 +46,7 @@ #include <xen/nmi.h> #include <xen/version.h> #include <xen/kexec.h> -#include <asm/shadow.h> +#include <asm/paging.h> #include <asm/system.h> #include <asm/io.h> #include <asm/atomic.h> @@ -860,8 +860,8 @@ static int fixup_page_fault(unsigned lon if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) { - if ( shadow_mode_external(d) && guest_mode(regs) ) - return shadow_fault(addr, regs); + if ( paging_mode_external(d) && guest_mode(regs) ) + return paging_fault(addr, regs); if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); @@ -876,8 +876,8 @@ static int fixup_page_fault(unsigned lon ptwr_do_page_fault(v, addr, regs) ) return EXCRET_fault_fixed; - if ( shadow_mode_enabled(d) ) - return shadow_fault(addr, regs); + if ( paging_mode_enabled(d) ) + return paging_fault(addr, regs); return 0; } diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/x86_32/domain_page.c --- a/xen/arch/x86/x86_32/domain_page.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/x86_32/domain_page.c Thu Feb 15 14:09:39 2007 -0700 @@ -11,7 +11,6 @@ #include <xen/mm.h> #include <xen/perfc.h> #include <xen/domain_page.h> -#include <xen/shadow.h> #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/hardirq.h> diff -r ac18d251df63 -r 9529d667d042 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/arch/x86/x86_64/traps.c Thu Feb 15 14:09:39 2007 -0700 @@ -16,7 +16,6 @@ #include <asm/flushtlb.h> #include <asm/msr.h> #include <asm/page.h> -#include <asm/shadow.h> #include <asm/shared.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> diff -r ac18d251df63 -r 9529d667d042 xen/common/libelf/libelf-dominfo.c --- a/xen/common/libelf/libelf-dominfo.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/common/libelf/libelf-dominfo.c Thu Feb 15 14:09:39 2007 -0700 @@ -15,61 +15,63 @@ const char *elf_xen_feature_names[] = { [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb" }; const int elf_xen_features = - sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]); +sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]); int elf_xen_parse_features(const char *features, - uint32_t *supported, - uint32_t *required) + uint32_t *supported, + uint32_t *required) { char feature[64]; int pos, len, i; - if (NULL == features) - return 0; - for (pos = 0; features[pos] != '\0'; pos += len) - { - memset(feature, 0, sizeof(feature)); - for (len = 0;; len++) - { - if (len >= sizeof(feature)-1) - break; - if (features[pos + len] == '\0') - break; - if (features[pos + len] == '|') - { - len++; - break; - } - feature[len] = features[pos + len]; - } - - for (i = 0; i < elf_xen_features; i++) - { - if (!elf_xen_feature_names[i]) - continue; - if (NULL != required && feature[0] == '!') - { - /* required */ - if (0 == strcmp(feature + 1, elf_xen_feature_names[i])) - { - elf_xen_feature_set(i, supported); - elf_xen_feature_set(i, required); - break; - } - } - else - { - /* supported */ - if (0 == strcmp(feature, elf_xen_feature_names[i])) - { - elf_xen_feature_set(i, supported); - break; - } - } - } - if (i == elf_xen_features) - return -1; - } + if ( features == NULL ) + return 0; + + for ( pos = 0; features[pos] != '\0'; pos += len ) + { + memset(feature, 0, sizeof(feature)); + for ( len = 0;; len++ ) + { + if ( len >= sizeof(feature)-1 ) + break; + if ( features[pos + len] == '\0' ) + break; + if ( features[pos + len] == '|' ) + { + len++; + break; + } + feature[len] = features[pos + len]; + } + + for ( i = 0; i < elf_xen_features; i++ ) + { + if ( !elf_xen_feature_names[i] ) + continue; + if ( (required != NULL) && (feature[0] == '!') ) + { + /* required */ + if ( !strcmp(feature + 1, elf_xen_feature_names[i]) ) + { + elf_xen_feature_set(i, supported); + elf_xen_feature_set(i, required); + break; + } + } + else + { + /* supported */ + if ( !strcmp(feature, elf_xen_feature_names[i]) ) + { + elf_xen_feature_set(i, supported); + break; + } + } + } + if ( i == elf_xen_features ) + return -1; + } + return 0; } @@ -77,26 +79,26 @@ int elf_xen_parse_features(const char *f /* xen elf notes */ int elf_xen_parse_note(struct elf_binary *elf, - struct elf_dom_parms *parms, - const elf_note *note) + struct elf_dom_parms *parms, + const elf_note *note) { /* *INDENT-OFF* */ static const struct { - char *name; - int str; + char *name; + int str; } note_desc[] = { - [XEN_ELFNOTE_ENTRY] = { "ENTRY", 0}, - [XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0}, - [XEN_ELFNOTE_VIRT_BASE] = { "VIRT_BASE", 0}, - [XEN_ELFNOTE_PADDR_OFFSET] = { "PADDR_OFFSET", 0}, - [XEN_ELFNOTE_HV_START_LOW] = { "HV_START_LOW", 0}, - [XEN_ELFNOTE_XEN_VERSION] = { "XEN_VERSION", 1}, - [XEN_ELFNOTE_GUEST_OS] = { "GUEST_OS", 1}, - [XEN_ELFNOTE_GUEST_VERSION] = { "GUEST_VERSION", 1}, - [XEN_ELFNOTE_LOADER] = { "LOADER", 1}, - [XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1}, - [XEN_ELFNOTE_FEATURES] = { "FEATURES", 1}, - [XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1}, + [XEN_ELFNOTE_ENTRY] = { "ENTRY", 0}, + [XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0}, + [XEN_ELFNOTE_VIRT_BASE] = { "VIRT_BASE", 0}, + [XEN_ELFNOTE_PADDR_OFFSET] = { "PADDR_OFFSET", 0}, + [XEN_ELFNOTE_HV_START_LOW] = { "HV_START_LOW", 0}, + [XEN_ELFNOTE_XEN_VERSION] = { "XEN_VERSION", 1}, + [XEN_ELFNOTE_GUEST_OS] = { "GUEST_OS", 1}, + [XEN_ELFNOTE_GUEST_VERSION] = { "GUEST_VERSION", 1}, + [XEN_ELFNOTE_LOADER] = { "LOADER", 1}, + [XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1}, + [XEN_ELFNOTE_FEATURES] = { "FEATURES", 1}, + [XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1}, }; /* *INDENT-ON* */ @@ -104,73 +106,73 @@ int elf_xen_parse_note(struct elf_binary uint64_t val = 0; int type = elf_uval(elf, note, type); - if ((type >= sizeof(note_desc) / sizeof(note_desc[0])) || - (NULL == note_desc[type].name)) - { - elf_err(elf, "%s: unknown xen elf note (0x%x)\n", - __FUNCTION__, type); - return -1; - } - - if (note_desc[type].str) - { - str = elf_note_desc(elf, note); - elf_msg(elf, "%s: %s = \"%s\"\n", __FUNCTION__, - note_desc[type].name, str); + if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) || + (note_desc[type].name == NULL) ) + { + elf_msg(elf, "%s: unknown xen elf note (0x%x)\n", + __FUNCTION__, type); + return 0; + } + + if ( note_desc[type].str ) + { + str = elf_note_desc(elf, note); + elf_msg(elf, "%s: %s = \"%s\"\n", __FUNCTION__, + note_desc[type].name, str); } else { - val = elf_note_numeric(elf, note); - elf_msg(elf, "%s: %s = 0x%" PRIx64 "\n", __FUNCTION__, - note_desc[type].name, val); - } - - switch (type) + val = elf_note_numeric(elf, note); + elf_msg(elf, "%s: %s = 0x%" PRIx64 "\n", __FUNCTION__, + note_desc[type].name, val); + } + + switch ( type ) { case XEN_ELFNOTE_LOADER: - safe_strcpy(parms->loader, str); - break; + safe_strcpy(parms->loader, str); + break; case XEN_ELFNOTE_GUEST_OS: - safe_strcpy(parms->guest_os, str); - break; + safe_strcpy(parms->guest_os, str); + break; case XEN_ELFNOTE_GUEST_VERSION: - safe_strcpy(parms->guest_ver, str); - break; + safe_strcpy(parms->guest_ver, str); + break; case XEN_ELFNOTE_XEN_VERSION: - safe_strcpy(parms->xen_ver, str); - break; + safe_strcpy(parms->xen_ver, str); + break; case XEN_ELFNOTE_PAE_MODE: - if (0 == strcmp(str, "yes")) - parms->pae = 2 /* extended_cr3 */; - if (strstr(str, "bimodal")) - parms->pae = 3 /* bimodal */; - break; + if ( !strcmp(str, "yes") ) + parms->pae = 2 /* extended_cr3 */; + if ( strstr(str, "bimodal") ) + parms->pae = 3 /* bimodal */; + break; case XEN_ELFNOTE_BSD_SYMTAB: - if (0 == strcmp(str, "yes")) - parms->bsd_symtab = 1; - break; + if ( !strcmp(str, "yes") ) + parms->bsd_symtab = 1; + break; case XEN_ELFNOTE_VIRT_BASE: - parms->virt_base = val; - break; + parms->virt_base = val; + break; case XEN_ELFNOTE_ENTRY: - parms->virt_entry = val; - break; + parms->virt_entry = val; + break; case XEN_ELFNOTE_PADDR_OFFSET: - parms->elf_paddr_offset = val; - break; + parms->elf_paddr_offset = val; + break; case XEN_ELFNOTE_HYPERCALL_PAGE: - parms->virt_hypercall = val; - break; + parms->virt_hypercall = val; + break; case XEN_ELFNOTE_HV_START_LOW: - parms->virt_hv_start_low = val; - break; + parms->virt_hv_start_low = val; + break; case XEN_ELFNOTE_FEATURES: - if (0 != elf_xen_parse_features(str, parms->f_supported, - parms->f_required)) - return -1; - break; + if ( elf_xen_parse_features(str, parms->f_supported, + parms->f_required) ) + return -1; + break; } return 0; @@ -180,83 +182,85 @@ int elf_xen_parse_note(struct elf_binary /* __xen_guest section */ int elf_xen_parse_guest_info(struct elf_binary *elf, - struct elf_dom_parms *parms) + struct elf_dom_parms *parms) { const char *h; char name[32], value[128]; int len; h = parms->guest_info; - while (*h) - { - memset(name, 0, sizeof(name)); - memset(value, 0, sizeof(value)); - for (len = 0;; len++, h++) { - if (len >= sizeof(name)-1) - break; - if (*h == '\0') - break; - if (*h == ',') - { - h++; - break; - } - if (*h == '=') - { - h++; - for (len = 0;; len++, h++) { - if (len >= sizeof(value)-1) - break; - if (*h == '\0') - break; - if (*h == ',') - { - h++; - break; - } - value[len] = *h; - } - break; - } - name[len] = *h; - } - elf_msg(elf, "%s: %s=\"%s\"\n", __FUNCTION__, name, value); - - /* strings */ - if (0 == strcmp(name, "LOADER")) - safe_strcpy(parms->loader, value); - if (0 == strcmp(name, "GUEST_OS")) - safe_strcpy(parms->guest_os, value); - if (0 == strcmp(name, "GUEST_VER")) - safe_strcpy(parms->guest_ver, value); - if (0 == strcmp(name, "XEN_VER")) - safe_strcpy(parms->xen_ver, value); - if (0 == strcmp(name, "PAE")) - { - if (0 == strcmp(value, "yes[extended-cr3]")) - parms->pae = 2 /* extended_cr3 */; - else if (0 == strncmp(value, "yes", 3)) - parms->pae = 1 /* yes */; - } - if (0 == strcmp(name, "BSD_SYMTAB")) - parms->bsd_symtab = 1; - - /* longs */ - if (0 == strcmp(name, "VIRT_BASE")) - parms->virt_base = strtoull(value, NULL, 0); - if (0 == strcmp(name, "VIRT_ENTRY")) - parms->virt_entry = strtoull(value, NULL, 0); - if (0 == strcmp(name, "ELF_PADDR_OFFSET")) - parms->elf_paddr_offset = strtoull(value, NULL, 0); - if (0 == strcmp(name, "HYPERCALL_PAGE")) - parms->virt_hypercall = (strtoull(value, NULL, 0) << 12) + - parms->virt_base; - - /* other */ - if (0 == strcmp(name, "FEATURES")) - if (0 != elf_xen_parse_features(value, parms->f_supported, - parms->f_required)) - return -1; + while ( *h ) + { + memset(name, 0, sizeof(name)); + memset(value, 0, sizeof(value)); + for ( len = 0;; len++, h++ ) + { + if ( len >= sizeof(name)-1 ) + break; + if ( *h == '\0' ) + break; + if ( *h == ',' ) + { + h++; + break; + } + if ( *h == '=' ) + { + h++; + for ( len = 0;; len++, h++ ) + { + if ( len >= sizeof(value)-1 ) + break; + if ( *h == '\0' ) + break; + if ( *h == ',' ) + { + h++; + break; + } + value[len] = *h; + } + break; + } + name[len] = *h; + } + elf_msg(elf, "%s: %s=\"%s\"\n", __FUNCTION__, name, value); + + /* strings */ + if ( !strcmp(name, "LOADER") ) + safe_strcpy(parms->loader, value); + if ( !strcmp(name, "GUEST_OS") ) + safe_strcpy(parms->guest_os, value); + if ( !strcmp(name, "GUEST_VER") ) + safe_strcpy(parms->guest_ver, value); + if ( !strcmp(name, "XEN_VER") ) + safe_strcpy(parms->xen_ver, value); + if ( !strcmp(name, "PAE") ) + { + if ( !strcmp(value, "yes[extended-cr3]") ) + parms->pae = 2 /* extended_cr3 */; + else if ( !strncmp(value, "yes", 3) ) + parms->pae = 1 /* yes */; + } + if ( !strcmp(name, "BSD_SYMTAB") ) + parms->bsd_symtab = 1; + + /* longs */ + if ( !strcmp(name, "VIRT_BASE") ) + parms->virt_base = strtoull(value, NULL, 0); + if ( !strcmp(name, "VIRT_ENTRY") ) + parms->virt_entry = strtoull(value, NULL, 0); + if ( !strcmp(name, "ELF_PADDR_OFFSET") ) + parms->elf_paddr_offset = strtoull(value, NULL, 0); + if ( !strcmp(name, "HYPERCALL_PAGE") ) + parms->virt_hypercall = (strtoull(value, NULL, 0) << 12) + + parms->virt_base; + + /* other */ + if ( !strcmp(name, "FEATURES") ) + if ( elf_xen_parse_features(value, parms->f_supported, + parms->f_required) ) + return -1; } return 0; } @@ -265,54 +269,59 @@ int elf_xen_parse_guest_info(struct elf_ /* sanity checks */ static int elf_xen_note_check(struct elf_binary *elf, - struct elf_dom_parms *parms) -{ - if (NULL == parms->elf_note_start && NULL == parms->guest_info) { - int machine = elf_uval(elf, elf->ehdr, e_machine); - if (EM_386 == machine || EM_X86_64 == machine) { - elf_err(elf, "%s: ERROR: Not a Xen-ELF image: " - "No ELF notes or '__xen_guest' section found.\n", - __FUNCTION__); - return -1; - } - return 0; + struct elf_dom_parms *parms) +{ + if ( (parms->elf_note_start == NULL) && (parms->guest_info == NULL) ) + { + int machine = elf_uval(elf, elf->ehdr, e_machine); + if ( (machine == EM_386) || (machine == EM_X86_64) ) + { + elf_err(elf, "%s: ERROR: Not a Xen-ELF image: " + "No ELF notes or '__xen_guest' section found.\n", + __FUNCTION__); + return -1; + } + return 0; } /* Check the contents of the Xen notes or guest string. */ - if ( ( 0 == strlen(parms->loader) || strncmp(parms->loader, "generic", 7) ) && - ( 0 == strlen(parms->guest_os) || strncmp(parms->guest_os, "linux", 5) ) ) - { - elf_err(elf, "%s: ERROR: Will only load images built for the generic " - "loader or Linux images", __FUNCTION__); - return -1; - } - - if ( 0 == strlen(parms->xen_ver) || strncmp(parms->xen_ver, "xen-3.0", 7) ) - { - elf_err(elf, "%s: ERROR: Xen will only load images built for Xen v3.0\n", - __FUNCTION__); - return -1; + if ( ((strlen(parms->loader) == 0) || + strncmp(parms->loader, "generic", 7)) && + ((strlen(parms->guest_os) == 0) || + strncmp(parms->guest_os, "linux", 5)) ) + { + elf_err(elf, "%s: ERROR: Will only load images built for the generic " + "loader or Linux images", __FUNCTION__); + return -1; + } + + if ( (strlen(parms->xen_ver) == 0) || + strncmp(parms->xen_ver, "xen-3.0", 7) ) + { + elf_err(elf, "%s: ERROR: Xen will only load images built " + "for Xen v3.0\n", __FUNCTION__); + return -1; } return 0; } static int elf_xen_addr_calc_check(struct elf_binary *elf, - struct elf_dom_parms *parms) -{ - if (UNSET_ADDR != parms->elf_paddr_offset && - UNSET_ADDR == parms->virt_base ) - { - elf_err(elf, "%s: ERROR: ELF_PADDR_OFFSET set, VIRT_BASE unset\n", - __FUNCTION__); + struct elf_dom_parms *parms) +{ + if ( (parms->elf_paddr_offset != UNSET_ADDR) && + (parms->virt_base == UNSET_ADDR) ) + { + elf_err(elf, "%s: ERROR: ELF_PADDR_OFFSET set, VIRT_BASE unset\n", + __FUNCTION__); return -1; } /* Initial guess for virt_base is 0 if it is not explicitly defined. */ - if (UNSET_ADDR == parms->virt_base) - { - parms->virt_base = 0; - elf_msg(elf, "%s: VIRT_BASE unset, using 0x%" PRIx64 "\n", - __FUNCTION__, parms->virt_base); + if ( parms->virt_base == UNSET_ADDR ) + { + parms->virt_base = 0; + elf_msg(elf, "%s: VIRT_BASE unset, using 0x%" PRIx64 "\n", + __FUNCTION__, parms->virt_base); } /* @@ -324,22 +333,22 @@ static int elf_xen_addr_calc_check(struc * If we are using the modern ELF notes interface then the default * is 0. */ - if (UNSET_ADDR == parms->elf_paddr_offset) - { - if (parms->elf_note_start) - parms->elf_paddr_offset = 0; - else - parms->elf_paddr_offset = parms->virt_base; - elf_msg(elf, "%s: ELF_PADDR_OFFSET unset, using 0x%" PRIx64 "\n", - __FUNCTION__, parms->elf_paddr_offset); + if ( parms->elf_paddr_offset == UNSET_ADDR ) + { + if ( parms->elf_note_start ) + parms->elf_paddr_offset = 0; + else + parms->elf_paddr_offset = parms->virt_base; + elf_msg(elf, "%s: ELF_PADDR_OFFSET unset, using 0x%" PRIx64 "\n", + __FUNCTION__, parms->elf_paddr_offset); } parms->virt_offset = parms->virt_base - parms->elf_paddr_offset; parms->virt_kstart = elf->pstart + parms->virt_offset; parms->virt_kend = elf->pend + parms->virt_offset; - if (UNSET_ADDR == parms->virt_entry) - parms->virt_entry = elf_uval(elf, elf->ehdr, e_entry); + if ( parms->virt_entry == UNSET_ADDR ) + parms->virt_entry = elf_uval(elf, elf->ehdr, e_entry); elf_msg(elf, "%s: addresses:\n", __FUNCTION__); elf_msg(elf, " virt_base = 0x%" PRIx64 "\n", parms->virt_base); @@ -355,7 +364,7 @@ static int elf_xen_addr_calc_check(struc (parms->virt_base > parms->virt_kstart) ) { elf_err(elf, "%s: ERROR: ELF start or entries are out of bounds.\n", - __FUNCTION__); + __FUNCTION__); return -1; } @@ -366,7 +375,7 @@ static int elf_xen_addr_calc_check(struc /* glue it all together ... */ int elf_xen_parse(struct elf_binary *elf, - struct elf_dom_parms *parms) + struct elf_dom_parms *parms) { const elf_note *note; const elf_shdr *shdr; @@ -382,39 +391,49 @@ int elf_xen_parse(struct elf_binary *elf /* find and parse elf notes */ count = elf_shdr_count(elf); - for (i = 0; i < count; i++) - { - shdr = elf_shdr_by_index(elf, i); - if (0 == strcmp(elf_section_name(elf, shdr), "__xen_guest")) - parms->guest_info = elf_section_start(elf, shdr); - if (elf_uval(elf, shdr, sh_type) != SHT_NOTE) - continue; - parms->elf_note_start = elf_section_start(elf, shdr); - parms->elf_note_end = elf_section_end(elf, shdr); - for (note = parms->elf_note_start; - (void *)note < parms->elf_note_end; - note = elf_note_next(elf, note)) - { - if (0 != strcmp(elf_note_name(elf, note), "Xen")) - continue; - if (0 != elf_xen_parse_note(elf, parms, note)) - return -1; - xen_elfnotes++; - } - } - - if (!xen_elfnotes && parms->guest_info) - { - parms->elf_note_start = NULL; - parms->elf_note_end = NULL; - elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__, - parms->guest_info); - elf_xen_parse_guest_info(elf, parms); - } - - if (0 != elf_xen_note_check(elf, parms)) - return -1; - if (0 != elf_xen_addr_calc_check(elf, parms)) - return -1; - return 0; -} + for ( i = 0; i < count; i++ ) + { + shdr = elf_shdr_by_index(elf, i); + if ( !strcmp(elf_section_name(elf, shdr), "__xen_guest") ) + parms->guest_info = elf_section_start(elf, shdr); + if ( elf_uval(elf, shdr, sh_type) != SHT_NOTE ) + continue; + parms->elf_note_start = elf_section_start(elf, shdr); + parms->elf_note_end = elf_section_end(elf, shdr); + for ( note = parms->elf_note_start; + (void *)note < parms->elf_note_end; + note = elf_note_next(elf, note) ) + { + if ( strcmp(elf_note_name(elf, note), "Xen") ) + continue; + if ( elf_xen_parse_note(elf, parms, note) ) + return -1; + xen_elfnotes++; + } + } + + if ( !xen_elfnotes && parms->guest_info ) + { + parms->elf_note_start = NULL; + parms->elf_note_end = NULL; + elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__, + parms->guest_info); + elf_xen_parse_guest_info(elf, parms); + } + + if ( elf_xen_note_check(elf, parms) != 0 ) + return -1; + if ( elf_xen_addr_calc_check(elf, parms) != 0 ) + return -1; + return 0; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ac18d251df63 -r 9529d667d042 xen/common/libelf/libelf-loader.c --- a/xen/common/libelf/libelf-loader.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/common/libelf/libelf-loader.c Thu Feb 15 14:09:39 2007 -0700 @@ -11,10 +11,10 @@ int elf_init(struct elf_binary *elf, con const elf_shdr *shdr; uint64_t i, count, section, offset; - if (!elf_is_elfbinary(image)) + if ( !elf_is_elfbinary(image) ) { - elf_err(elf, "%s: not an ELF binary\n", __FUNCTION__); - return -1; + elf_err(elf, "%s: not an ELF binary\n", __FUNCTION__); + return -1; } memset(elf, 0, sizeof(*elf)); @@ -26,46 +26,46 @@ int elf_init(struct elf_binary *elf, con /* sanity check phdr */ offset = elf_uval(elf, elf->ehdr, e_phoff) + - elf_uval(elf, elf->ehdr, e_phentsize) * elf_phdr_count(elf); - if (offset > elf->size) + elf_uval(elf, elf->ehdr, e_phentsize) * elf_phdr_count(elf); + if ( offset > elf->size ) { - elf_err(elf, "%s: phdr overflow (off %" PRIx64 " > size %lx)\n", - __FUNCTION__, offset, (unsigned long)elf->size); - return -1; + elf_err(elf, "%s: phdr overflow (off %" PRIx64 " > size %lx)\n", + __FUNCTION__, offset, (unsigned long)elf->size); + return -1; } /* sanity check shdr */ offset = elf_uval(elf, elf->ehdr, e_shoff) + - elf_uval(elf, elf->ehdr, e_shentsize) * elf_shdr_count(elf); - if (offset > elf->size) + elf_uval(elf, elf->ehdr, e_shentsize) * elf_shdr_count(elf); + if ( offset > elf->size ) { - elf_err(elf, "%s: shdr overflow (off %" PRIx64 " > size %lx)\n", - __FUNCTION__, offset, (unsigned long)elf->size); - return -1; + elf_err(elf, "%s: shdr overflow (off %" PRIx64 " > size %lx)\n", + __FUNCTION__, offset, (unsigned long)elf->size); + return -1; } /* find section string table */ section = elf_uval(elf, elf->ehdr, e_shstrndx); shdr = elf_shdr_by_index(elf, section); - if (NULL != shdr) - elf->sec_strtab = elf_section_start(elf, shdr); + if ( shdr != NULL ) + elf->sec_strtab = elf_section_start(elf, shdr); /* find symbol table, symbol string table */ count = elf_shdr_count(elf); - for (i = 0; i < count; i++) + for ( i = 0; i < count; i++ ) { - shdr = elf_shdr_by_index(elf, i); - if (elf_uval(elf, shdr, sh_type) != SHT_SYMTAB) - continue; - elf->sym_tab = shdr; - shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link)); - if (NULL == shdr) - { - elf->sym_tab = NULL; - continue; - } - elf->sym_strtab = elf_section_start(elf, shdr); - break; + shdr = elf_shdr_by_index(elf, i); + if ( elf_uval(elf, shdr, sh_type) != SHT_SYMTAB ) + continue; + elf->sym_tab = shdr; + shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link)); + if ( shdr == NULL ) + { + elf->sym_tab = NULL; + continue; + } + elf->sym_strtab = elf_section_start(elf, shdr); + break; } return 0; } @@ -91,24 +91,24 @@ void elf_parse_binary(struct elf_binary uint64_t i, count, paddr, memsz; count = elf_uval(elf, elf->ehdr, e_phnum); - for (i = 0; i < count; i++) + for ( i = 0; i < count; i++ ) { - phdr = elf_phdr_by_index(elf, i); - if (!elf_phdr_is_loadable(elf, phdr)) - continue; - paddr = elf_uval(elf, phdr, p_paddr); - memsz = elf_uval(elf, phdr, p_memsz); - elf_msg(elf, "%s: phdr: paddr=0x%" PRIx64 - " memsz=0x%" PRIx64 "\n", __FUNCTION__, paddr, memsz); - if (low > paddr) - low = paddr; - if (high < paddr + memsz) - high = paddr + memsz; + phdr = elf_phdr_by_index(elf, i); + if ( !elf_phdr_is_loadable(elf, phdr) ) + continue; + paddr = elf_uval(elf, phdr, p_paddr); + memsz = elf_uval(elf, phdr, p_memsz); + elf_msg(elf, "%s: phdr: paddr=0x%" PRIx64 + " memsz=0x%" PRIx64 "\n", __FUNCTION__, paddr, memsz); + if ( low > paddr ) + low = paddr; + if ( high < paddr + memsz ) + high = paddr + memsz; } elf->pstart = low; elf->pend = high; elf_msg(elf, "%s: memory: 0x%" PRIx64 " -> 0x%" PRIx64 "\n", - __FUNCTION__, elf->pstart, elf->pend); + __FUNCTION__, elf->pstart, elf->pend); } void elf_load_binary(struct elf_binary *elf) @@ -118,18 +118,20 @@ void elf_load_binary(struct elf_binary * char *dest; count = elf_uval(elf, elf->ehdr, e_phnum); - for (i = 0; i < count; i++) + for ( i = 0; i < count; i++ ) { - phdr = elf_phdr_by_index(elf, i); - if (!elf_phdr_is_loadable(elf, phdr)) - continue; - paddr = elf_uval(elf, phdr, p_paddr); - offset = elf_uval(elf, phdr, p_offset); - filesz = elf_uval(elf, phdr, p_filesz); - memsz = elf_uval(elf, phdr, p_memsz); - dest = elf_get_ptr(elf, paddr); - memcpy(dest, elf->image + offset, filesz); - memset(dest + filesz, 0, memsz - filesz); + phdr = elf_phdr_by_index(elf, i); + if ( !elf_phdr_is_loadable(elf, phdr) ) + continue; + paddr = elf_uval(elf, phdr, p_paddr); + offset = elf_uval(elf, phdr, p_offset); + filesz = elf_uval(elf, phdr, p_filesz); + memsz = elf_uval(elf, phdr, p_memsz); + dest = elf_get_ptr(elf, paddr); + elf_msg(elf, "%s: phdr %" PRIu64 " at 0x%p -> 0x%p\n", + __func__, i, dest, dest + filesz); + memcpy(dest, elf->image + offset, filesz); + memset(dest + filesz, 0, memsz - filesz); } } @@ -144,13 +146,24 @@ uint64_t elf_lookup_addr(struct elf_bina uint64_t value; sym = elf_sym_by_name(elf, symbol); - if (NULL == sym) + if ( sym == NULL ) { - elf_err(elf, "%s: not found: %s\n", __FUNCTION__, symbol); - return -1; + elf_err(elf, "%s: not found: %s\n", __FUNCTION__, symbol); + return -1; } + value = elf_uval(elf, sym, st_value); elf_msg(elf, "%s: symbol \"%s\" at 0x%" PRIx64 "\n", __FUNCTION__, - symbol, value); + symbol, value); return value; } + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ac18d251df63 -r 9529d667d042 xen/common/libelf/libelf-private.h --- a/xen/common/libelf/libelf-private.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/common/libelf/libelf-private.h Thu Feb 15 14:09:39 2007 -0700 @@ -12,9 +12,9 @@ #include <public/libelf.h> #define elf_msg(elf, fmt, args ... ) \ - if (elf->verbose) printk(fmt, ## args ) + if (elf->verbose) printk(fmt, ## args ) #define elf_err(elf, fmt, args ... ) \ - printk(fmt, ## args ) + printk(fmt, ## args ) #define strtoull(str, end, base) simple_strtoull(str, end, base) #define bswap_16(x) swab16(x) @@ -43,12 +43,12 @@ #include "xc_private.h" #define elf_msg(elf, fmt, args ... ) \ - if (elf->log && elf->verbose) fprintf(elf->log, fmt , ## args ) -#define elf_err(elf, fmt, args ... ) do { \ - if (elf->log) \ - fprintf(elf->log, fmt , ## args ); \ - xc_set_error(XC_INVALID_KERNEL, fmt , ## args ); \ - } while (0) + if (elf->log && elf->verbose) fprintf(elf->log, fmt , ## args ) +#define elf_err(elf, fmt, args ... ) do { \ + if (elf->log) \ + fprintf(elf->log, fmt , ## args ); \ + xc_set_error(XC_INVALID_KERNEL, fmt , ## args ); \ +} while (0) #define safe_strcpy(d,s) \ do { strncpy((d),(s),sizeof((d))-1); \ @@ -58,3 +58,13 @@ do { strncpy((d),(s),sizeof((d))-1); #endif #endif /* __LIBELF_PRIVATE_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ac18d251df63 -r 9529d667d042 xen/common/libelf/libelf-relocate.c --- a/xen/common/libelf/libelf-relocate.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/common/libelf/libelf-relocate.c Thu Feb 15 14:09:39 2007 -0700 @@ -46,22 +46,22 @@ static const char *rel_names_i386[] = { }; static int elf_reloc_i386(struct elf_binary *elf, int type, - uint64_t addr, uint64_t value) + uint64_t addr, uint64_t value) { void *ptr = elf_get_ptr(elf, addr); uint32_t *u32; - switch (type) + switch ( type ) { case 1 /* R_386_32 */ : - u32 = ptr; - *u32 += elf->reloc_offset; - break; + u32 = ptr; + *u32 += elf->reloc_offset; + break; case 2 /* R_386_PC32 */ : - /* nothing */ - break; + /* nothing */ + break; default: - return -1; + return -1; } return 0; } @@ -96,54 +96,57 @@ static const char *rel_names_x86_64[] = }; static int elf_reloc_x86_64(struct elf_binary *elf, int type, - uint64_t addr, uint64_t value) + uint64_t addr, uint64_t value) { void *ptr = elf_get_ptr(elf, addr); uint64_t *u64; uint32_t *u32; int32_t *s32; - switch (type) + switch ( type ) { case 1 /* R_X86_64_64 */ : - u64 = ptr; - value += elf->reloc_offset; - *u64 = value; - break; + u64 = ptr; + value += elf->reloc_offset; + *u64 = value; + break; case 2 /* R_X86_64_PC32 */ : - u32 = ptr; - *u32 = value - addr; - if (*u32 != (uint32_t) (value - addr)) - { - elf_err(elf, "R_X86_64_PC32 overflow: 0x%" PRIx32 " != 0x%" PRIx32 "\n", - *u32, (uint32_t) (value - addr)); - return -1; - } - break; + u32 = ptr; + *u32 = value - addr; + if ( *u32 != (uint32_t)(value - addr) ) + { + elf_err(elf, "R_X86_64_PC32 overflow: 0x%" PRIx32 + " != 0x%" PRIx32 "\n", + *u32, (uint32_t) (value - addr)); + return -1; + } + break; case 10 /* R_X86_64_32 */ : - u32 = ptr; - value += elf->reloc_offset; - *u32 = value; - if (*u32 != value) - { - elf_err(elf, "R_X86_64_32 overflow: 0x%" PRIx32 " != 0x%" PRIx64 "\n", - *u32, value); - return -1; - } - break; + u32 = ptr; + value += elf->reloc_offset; + *u32 = value; + if ( *u32 != value ) + { + elf_err(elf, "R_X86_64_32 overflow: 0x%" PRIx32 + " != 0x%" PRIx64 "\n", + *u32, value); + return -1; + } + break; case 11 /* R_X86_64_32S */ : - s32 = ptr; - value += elf->reloc_offset; - *s32 = value; - if (*s32 != (int64_t) value) - { - elf_err(elf, "R_X86_64_32S overflow: 0x%" PRIx32 " != 0x%" PRIx64 "\n", - *s32, (int64_t) value); - return -1; - } - break; + s32 = ptr; + value += elf->reloc_offset; + *s32 = value; + if ( *s32 != (int64_t) value ) + { + elf_err(elf, "R_X86_64_32S overflow: 0x%" PRIx32 + " != 0x%" PRIx64 "\n", + *s32, (int64_t) value); + return -1; + } + break; default: - return -1; + return -1; } return 0; } @@ -154,19 +157,19 @@ static struct relocs { const char **names; int count; int (*func) (struct elf_binary * elf, int type, uint64_t addr, - uint64_t value); + uint64_t value); } relocs[] = /* *INDENT-OFF* */ { [EM_386] = { - .names = rel_names_i386, - .count = sizeof(rel_names_i386) / sizeof(rel_names_i386[0]), - .func = elf_reloc_i386, + .names = rel_names_i386, + .count = sizeof(rel_names_i386) / sizeof(rel_names_i386[0]), + .func = elf_reloc_i386, }, [EM_X86_64] = { - .names = rel_names_x86_64, - .count = sizeof(rel_names_x86_64) / sizeof(rel_names_x86_64[0]), - .func = elf_reloc_x86_64, + .names = rel_names_x86_64, + .count = sizeof(rel_names_x86_64) / sizeof(rel_names_x86_64[0]), + .func = elf_reloc_x86_64, } }; /* *INDENT-ON* */ @@ -175,18 +178,18 @@ static struct relocs { static const char *rela_name(int machine, int type) { - if (machine > sizeof(relocs) / sizeof(relocs[0])) - return "unknown mach"; - if (!relocs[machine].names) - return "unknown mach"; - if (type > relocs[machine].count) - return "unknown rela"; + if ( machine > sizeof(relocs) / sizeof(relocs[0]) ) + return "unknown mach"; + if ( !relocs[machine].names ) + return "unknown mach"; + if ( type > relocs[machine].count ) + return "unknown rela"; return relocs[machine].names[type]; } static int elf_reloc_section(struct elf_binary *elf, - const elf_shdr * rels, - const elf_shdr * sect, const elf_shdr * syms) + const elf_shdr * rels, + const elf_shdr * sect, const elf_shdr * syms) { const void *ptr, *end; const elf_shdr *shdr; @@ -204,18 +207,18 @@ static int elf_reloc_section(struct elf_ int machine; machine = elf_uval(elf, elf->ehdr, e_machine); - if (machine >= sizeof(relocs) / sizeof(relocs[0]) || - NULL == relocs[machine].func) - { - elf_err(elf, "%s: can't handle machine %d\n", - __FUNCTION__, machine); - return -1; - } - if (elf_swap(elf)) - { - elf_err(elf, "%s: non-native byte order, relocation not supported\n", - __FUNCTION__); - return -1; + if ( (machine >= (sizeof(relocs) / sizeof(relocs[0]))) || + (relocs[machine].func == NULL) ) + { + elf_err(elf, "%s: can't handle machine %d\n", + __FUNCTION__, machine); + return -1; + } + if ( elf_swap(elf) ) + { + elf_err(elf, "%s: non-native byte order, relocation not supported\n", + __FUNCTION__); + return -1; } s_type = elf_uval(elf, rels, sh_type); @@ -223,89 +226,89 @@ static int elf_reloc_section(struct elf_ ptr = elf_section_start(elf, rels); end = elf_section_end(elf, rels); - for (; ptr < end; ptr += rsize) - { - switch (s_type) - { - case SHT_REL: - rel = ptr; - r_offset = elf_uval(elf, rel, r_offset); - r_info = elf_uval(elf, rel, r_info); - r_addend = 0; - break; - case SHT_RELA: - rela = ptr; - r_offset = elf_uval(elf, rela, r_offset); - r_info = elf_uval(elf, rela, r_info); - r_addend = elf_uval(elf, rela, r_addend); - break; - default: - /* can't happen */ - return -1; - } - if (elf_64bit(elf)) - { - r_type = ELF64_R_TYPE(r_info); - r_sym = ELF64_R_SYM(r_info); - } - else - { - r_type = ELF32_R_TYPE(r_info); - r_sym = ELF32_R_SYM(r_info); - } - - sym = elf_sym_by_index(elf, r_sym); - shndx = elf_uval(elf, sym, st_shndx); - switch (shndx) - { - case SHN_UNDEF: - sname = "*UNDEF*"; - sbase = 0; - break; - case SHN_COMMON: - elf_err(elf, "%s: invalid section: %" PRId64 "\n", - __FUNCTION__, shndx); - return -1; - case SHN_ABS: - sname = "*ABS*"; - sbase = 0; - break; - default: - shdr = elf_shdr_by_index(elf, shndx); - if (NULL == shdr) - { - elf_err(elf, "%s: invalid section: %" PRId64 "\n", - __FUNCTION__, shndx); - return -1; - } - sname = elf_section_name(elf, shdr); - sbase = elf_uval(elf, shdr, sh_addr); - } - - addr = r_offset; - value = elf_uval(elf, sym, st_value); - value += r_addend; - - if (elf->log && elf->verbose > 1) - { - uint64_t st_name = elf_uval(elf, sym, st_name); - const char *name = st_name ? elf->sym_strtab + st_name : "*NONE*"; - - elf_msg(elf, - "%s: type %s [%d], off 0x%" PRIx64 ", add 0x%" PRIx64 "," - " sym %s [0x%" PRIx64 "], sec %s [0x%" PRIx64 "]" - " -> addr 0x%" PRIx64 " value 0x%" PRIx64 "\n", - __FUNCTION__, rela_name(machine, r_type), r_type, r_offset, - r_addend, name, elf_uval(elf, sym, st_value), sname, sbase, - addr, value); - } - - if (-1 == relocs[machine].func(elf, r_type, addr, value)) - { - elf_err(elf, "%s: unknown/unsupported reloc type %s [%d]\n", - __FUNCTION__, rela_name(machine, r_type), r_type); - return -1; - } + for ( ; ptr < end; ptr += rsize ) + { + switch ( s_type ) + { + case SHT_REL: + rel = ptr; + r_offset = elf_uval(elf, rel, r_offset); + r_info = elf_uval(elf, rel, r_info); + r_addend = 0; + break; + case SHT_RELA: + rela = ptr; + r_offset = elf_uval(elf, rela, r_offset); + r_info = elf_uval(elf, rela, r_info); + r_addend = elf_uval(elf, rela, r_addend); + break; + default: + /* can't happen */ + return -1; + } + if ( elf_64bit(elf) ) + { + r_type = ELF64_R_TYPE(r_info); + r_sym = ELF64_R_SYM(r_info); + } + else + { + r_type = ELF32_R_TYPE(r_info); + r_sym = ELF32_R_SYM(r_info); + } + + sym = elf_sym_by_index(elf, r_sym); + shndx = elf_uval(elf, sym, st_shndx); + switch ( shndx ) + { + case SHN_UNDEF: + sname = "*UNDEF*"; + sbase = 0; + break; + case SHN_COMMON: + elf_err(elf, "%s: invalid section: %" PRId64 "\n", + __FUNCTION__, shndx); + return -1; + case SHN_ABS: + sname = "*ABS*"; + sbase = 0; + break; + default: + shdr = elf_shdr_by_index(elf, shndx); + if ( shdr == NULL ) + { + elf_err(elf, "%s: invalid section: %" PRId64 "\n", + __FUNCTION__, shndx); + return -1; + } + sname = elf_section_name(elf, shdr); + sbase = elf_uval(elf, shdr, sh_addr); + } + + addr = r_offset; + value = elf_uval(elf, sym, st_value); + value += r_addend; + + if ( elf->log && (elf->verbose > 1) ) + { + uint64_t st_name = elf_uval(elf, sym, st_name); + const char *name = st_name ? elf->sym_strtab + st_name : "*NONE*"; + + elf_msg(elf, + "%s: type %s [%d], off 0x%" PRIx64 ", add 0x%" PRIx64 "," + " sym %s [0x%" PRIx64 "], sec %s [0x%" PRIx64 "]" + " -> addr 0x%" PRIx64 " value 0x%" PRIx64 "\n", + __FUNCTION__, rela_name(machine, r_type), r_type, r_offset, + r_addend, name, elf_uval(elf, sym, st_value), sname, sbase, + addr, value); + } + + if ( relocs[machine].func(elf, r_type, addr, value) == -1 ) + { + elf_err(elf, "%s: unknown/unsupported reloc type %s [%d]\n", + __FUNCTION__, rela_name(machine, r_type), r_type); + return -1; + } } return 0; } @@ -316,30 +319,40 @@ int elf_reloc(struct elf_binary *elf) uint64_t i, count, type; count = elf_shdr_count(elf); - for (i = 0; i < count; i++) - { - rels = elf_shdr_by_index(elf, i); - type = elf_uval(elf, rels, sh_type); - if (type != SHT_REL && type != SHT_RELA) - continue; - - sect = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_info)); - syms = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_link)); - if (NULL == sect || NULL == syms) - continue; - - if (!(elf_uval(elf, sect, sh_flags) & SHF_ALLOC)) - { - elf_msg(elf, "%s: relocations for %s, skipping\n", - __FUNCTION__, elf_section_name(elf, sect)); - continue; - } - - elf_msg(elf, "%s: relocations for %s @ 0x%" PRIx64 "\n", - __FUNCTION__, elf_section_name(elf, sect), - elf_uval(elf, sect, sh_addr)); - if (0 != elf_reloc_section(elf, rels, sect, syms)) - return -1; - } - return 0; -} + for ( i = 0; i < count; i++ ) + { + rels = elf_shdr_by_index(elf, i); + type = elf_uval(elf, rels, sh_type); + if ( (type != SHT_REL) && (type != SHT_RELA) ) + continue; + + sect = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_info)); + syms = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_link)); + if ( NULL == sect || NULL == syms ) + continue; + + if ( !(elf_uval(elf, sect, sh_flags) & SHF_ALLOC) ) + { + elf_msg(elf, "%s: relocations for %s, skipping\n", + __FUNCTION__, elf_section_name(elf, sect)); + continue; + } + + elf_msg(elf, "%s: relocations for %s @ 0x%" PRIx64 "\n", + __FUNCTION__, elf_section_name(elf, sect), + elf_uval(elf, sect, sh_addr)); + if ( elf_reloc_section(elf, rels, sect, syms) != 0 ) + return -1; + } + return 0; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ac18d251df63 -r 9529d667d042 xen/common/libelf/libelf-tools.c --- a/xen/common/libelf/libelf-tools.c Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/common/libelf/libelf-tools.c Thu Feb 15 14:09:39 2007 -0700 @@ -7,7 +7,7 @@ /* ------------------------------------------------------------------------ */ uint64_t elf_access_unsigned(struct elf_binary * elf, const void *ptr, - uint64_t offset, size_t size) + uint64_t offset, size_t size) { int need_swap = elf_swap(elf); const uint8_t *u8; @@ -15,27 +15,27 @@ uint64_t elf_access_unsigned(struct elf_ const uint32_t *u32; const uint64_t *u64; - switch (size) - { - case 1: - u8 = ptr + offset; - return *u8; - case 2: - u16 = ptr + offset; - return need_swap ? bswap_16(*u16) : *u16; - case 4: - u32 = ptr + offset; - return need_swap ? bswap_32(*u32) : *u32; - case 8: - u64 = ptr + offset; - return need_swap ? bswap_64(*u64) : *u64; - default: - return 0; + switch ( size ) + { + case 1: + u8 = ptr + offset; + return *u8; + case 2: + u16 = ptr + offset; + return need_swap ? bswap_16(*u16) : *u16; + case 4: + u32 = ptr + offset; + return need_swap ? bswap_32(*u32) : *u32; + case 8: + u64 = ptr + offset; + return need_swap ? bswap_64(*u64) : *u64; + default: + return 0; } } int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, - uint64_t offset, size_t size) + uint64_t offset, size_t size) { int need_swap = elf_swap(elf); const int8_t *s8; @@ -43,22 +43,22 @@ int64_t elf_access_signed(struct elf_bin const int32_t *s32; const int64_t *s64; - switch (size) - { - case 1: - s8 = ptr + offset; - return *s8; - case 2: - s16 = ptr + offset; - return need_swap ? bswap_16(*s16) : *s16; - case 4: - s32 = ptr + offset; - return need_swap ? bswap_32(*s32) : *s32; - case 8: - s64 = ptr + offset; - return need_swap ? bswap_64(*s64) : *s64; - default: - return 0; + switch ( size ) + { + case 1: + s8 = ptr + offset; + return *s8; + case 2: + s16 = ptr + offset; + return need_swap ? bswap_16(*s16) : *s16; + case 4: + s32 = ptr + offset; + return need_swap ? bswap_32(*s32) : *s32; + case 8: + s64 = ptr + offset; + return need_swap ? bswap_64(*s64) : *s64; + default: + return 0; } } @@ -88,11 +88,12 @@ const elf_shdr *elf_shdr_by_name(struct const char *sname; int i; - for (i = 0; i < count; i++) { - shdr = elf_shdr_by_index(elf, i); - sname = elf_section_name(elf, shdr); - if (sname && 0 == strcmp(sname, name)) - return shdr; + for ( i = 0; i < count; i++ ) + { + shdr = elf_shdr_by_index(elf, i); + sname = elf_section_name(elf, shdr); + if ( sname && !strcmp(sname, name) ) + return shdr; } return NULL; } @@ -100,31 +101,35 @@ const elf_shdr *elf_shdr_by_index(struct const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index) { uint64_t count = elf_shdr_count(elf); - const void *ptr = NULL; - - if (index < count) - ptr = elf->image - + elf_uval(elf, elf->ehdr, e_shoff) - + elf_uval(elf, elf->ehdr, e_shentsize) * index; + const void *ptr; + + if ( index >= count ) + return NULL; + + ptr = (elf->image + + elf_uval(elf, elf->ehdr, e_shoff) + + elf_uval(elf, elf->ehdr, e_shentsize) * index); return ptr; } const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index) { uint64_t count = elf_uval(elf, elf->ehdr, e_phnum); - const void *ptr = NULL; - - if (index < count) - ptr = elf->image - + elf_uval(elf, elf->ehdr, e_phoff) - + elf_uval(elf, elf->ehdr, e_phentsize) * index; + const void *ptr; + + if ( index >= count ) + return NULL; + + ptr = (elf->image + + elf_uval(elf, elf->ehdr, e_phoff) + + elf_uval(elf, elf->ehdr, e_phentsize) * index); return ptr; } const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr) { - if (NULL == elf->sec_strtab) - return "unknown"; + if ( elf->sec_strtab == NULL ) + return "unknown"; return elf->sec_strtab + elf_uval(elf, shdr, sh_name); } @@ -136,7 +141,7 @@ const void *elf_section_end(struct elf_b const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr) { return elf->image - + elf_uval(elf, shdr, sh_offset) + elf_uval(elf, shdr, sh_size); + + elf_uval(elf, shdr, sh_offset) + elf_uval(elf, shdr, sh_size); } const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol) @@ -146,16 +151,16 @@ const elf_sym *elf_sym_by_name(struct el const elf_sym *sym; uint64_t info, name; - for (; ptr < end; ptr += elf_size(elf, sym)) - { - sym = ptr; - info = elf_uval(elf, sym, st_info); - name = elf_uval(elf, sym, st_name); - if (ELF32_ST_BIND(info) != STB_GLOBAL) - continue; - if (strcmp(elf->sym_strtab + name, symbol) != 0) - continue; - return sym; + for ( ; ptr < end; ptr += elf_size(elf, sym) ) + { + sym = ptr; + info = elf_uval(elf, sym, st_info); + name = elf_uval(elf, sym, st_name); + if ( ELF32_ST_BIND(info) != STB_GLOBAL ) + continue; + if ( strcmp(elf->sym_strtab + name, symbol) ) + continue; + return sym; } return NULL; } @@ -192,9 +197,9 @@ uint64_t elf_note_numeric(struct elf_bin case 2: case 4: case 8: - return elf_access_unsigned(elf, desc, 0, descsz); - default: - return 0; + return elf_access_unsigned(elf, desc, 0, descsz); + default: + return 0; } } const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note) @@ -211,9 +216,7 @@ int elf_is_elfbinary(const void *image) { const Elf32_Ehdr *ehdr = image; - if (IS_ELF(*ehdr)) - return 1; - return 0; + return IS_ELF(*ehdr); } int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr) @@ -223,3 +226,13 @@ int elf_phdr_is_loadable(struct elf_bina return ((p_type == PT_LOAD) && (p_flags & (PF_W | PF_X)) != 0); } + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r ac18d251df63 -r 9529d667d042 xen/include/asm-powerpc/config.h --- a/xen/include/asm-powerpc/config.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/include/asm-powerpc/config.h Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright IBM Corp. 2005, 2006, 2007 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> */ @@ -34,9 +34,6 @@ #define U(x) (x ## U) #define UL(x) (x ## UL) -extern char _start[]; -extern char _end[]; -extern char _etext[]; extern char __bss_start[]; #endif diff -r ac18d251df63 -r 9529d667d042 xen/include/asm-powerpc/domain.h --- a/xen/include/asm-powerpc/domain.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/include/asm-powerpc/domain.h Thu Feb 15 14:09:39 2007 -0700 @@ -107,13 +107,6 @@ extern void save_float(struct vcpu *); extern void save_float(struct vcpu *); extern void load_float(struct vcpu *); -#define RMA_SHARED_INFO 1 -#define RMA_START_INFO 2 -#define RMA_LAST_DOM0 2 -/* these are not used for dom0 so they should be last */ -#define RMA_CONSOLE 3 -#define RMA_LAST_DOMU 3 - #define rma_size(rma_order) (1UL << ((rma_order) + PAGE_SHIFT)) static inline ulong rma_addr(struct arch_domain *ad, int type) diff -r ac18d251df63 -r 9529d667d042 xen/include/asm-powerpc/init.h --- a/xen/include/asm-powerpc/init.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/include/asm-powerpc/init.h Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2006 + * Copyright IBM Corp. 2006, 2007 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> */ @@ -27,14 +27,14 @@ typedef struct { typedef struct { unsigned long number; hcall_handler_t handler; -} inithcall_t; -extern inithcall_t __inithcall_start, __inithcall_end; +} init_hcall_t; +extern init_hcall_t __init_hcall_start, __init_hcall_end; #define __init_papr_hcall(nr, fn) \ - static inithcall_t __inithcall_##fn __init_hcall \ + static init_hcall_t __init_hcall_##fn __init_hcall \ = { .number = nr, .handler = fn } #define __init_hcall \ - __attribute_used__ __attribute__ ((__section__ (".inithcall.text"))) + __attribute_used__ __attribute__ ((__section__ (".init_hcall.init"))) #endif /* _XEN_ASM_INIT_H */ diff -r ac18d251df63 -r 9529d667d042 xen/include/asm-powerpc/mm.h --- a/xen/include/asm-powerpc/mm.h Thu Feb 15 13:13:36 2007 -0700 +++ b/xen/include/asm-powerpc/mm.h Thu Feb 15 14:09:39 2007 -0700 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005, 2006 + * Copyright IBM Corp. 2005, 2006, 2007 * _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |