[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] libxc domain builder rewrite, core bits.



# HG changeset patch
# User Emmanuel Ackaouy <ack@xxxxxxxxxxxxx>
# Date 1169763412 0
# Node ID fd50500eee7ce4477b9e80413f1a2ea0dfe661c8
# Parent  50d9e2ddc377a746eb094578b264bba67045cbce
libxc domain builder rewrite, core bits.

Signed-off-by: Gerd Hoffmann <kraxel@xxxxxxx>
---
 tools/libxc/Makefile           |   14 
 tools/libxc/xc_dom.h           |  261 +++++++++++++
 tools/libxc/xc_dom_binloader.c |  294 +++++++++++++++
 tools/libxc/xc_dom_boot.c      |  515 +++++++++++++++++++++++++++
 tools/libxc/xc_dom_core.c      |  773 +++++++++++++++++++++++++++++++++++++++++
 tools/libxc/xc_dom_elfloader.c |  283 +++++++++++++++
 tools/libxc/xc_dom_ia64.c      |  118 ++++++
 tools/libxc/xc_dom_powerpc64.c |  100 +++++
 tools/libxc/xc_dom_x86.c       |  559 +++++++++++++++++++++++++++++
 9 files changed, 2917 insertions(+)

diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/Makefile
--- a/tools/libxc/Makefile      Thu Jan 25 22:16:52 2007 +0000
+++ b/tools/libxc/Makefile      Thu Jan 25 22:16:52 2007 +0000
@@ -44,6 +44,20 @@ libelf-relocate.o: libelf-relocate.c lib
 
 # add libelf bits to libxc
 GUEST_SRCS-y += $(LIBELF_SRCS)
+
+# new domain builder
+GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c
+GUEST_SRCS-y += xc_dom_elfloader.c
+GUEST_SRCS-y += xc_dom_binloader.c
+
+ifeq ($(CONFIG_POWERPC),y)
+# big endian boxes
+GUEST_SRCS-y += xc_dom_powerpc64.c
+else
+# little endian boxes
+GUEST_SRCS-y += xc_dom_x86.c
+GUEST_SRCS-y += xc_dom_ia64.c
+endif
 
 -include $(XEN_TARGET_ARCH)/Makefile
 
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom.h      Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,261 @@
+#include <xen/libelf.h>
+
+#define INVALID_P2M_ENTRY   ((xen_pfn_t)-1)
+
+/* --- typedefs and structs ---------------------------------------- */
+
+typedef uint64_t xen_vaddr_t;
+typedef uint64_t xen_paddr_t;
+
+/* FIXME: temporary hack ... */
+#ifndef PRIpfn
+#define PRIpfn "lx"
+#endif
+
+struct xc_dom_seg {
+    xen_vaddr_t vstart;
+    xen_vaddr_t vend;
+    xen_pfn_t pfn;
+};
+
+struct xc_dom_mem {
+    struct xc_dom_mem *next;
+    void *mmap_ptr;
+    size_t mmap_len;
+    unsigned char memory[0];
+};
+
+struct xc_dom_phys {
+    struct xc_dom_phys *next;
+    void *ptr;
+    xen_pfn_t first;
+    xen_pfn_t count;
+};
+
+struct xc_dom_image {
+    /* files */
+    void *kernel_blob;
+    size_t kernel_size;
+    void *ramdisk_blob;
+    size_t ramdisk_size;
+
+    /* arguments and parameters */
+    char *cmdline;
+    uint32_t f_requested[XENFEAT_NR_SUBMAPS];
+
+    /* info from (elf) kernel image */
+    struct elf_dom_parms parms;
+    char *guest_type;
+
+    /* memory layout */
+    struct xc_dom_seg kernel_seg;
+    struct xc_dom_seg ramdisk_seg;
+    struct xc_dom_seg p2m_seg;
+    struct xc_dom_seg pgtables_seg;
+    xen_pfn_t start_info_pfn;
+    xen_pfn_t console_pfn;
+    xen_pfn_t xenstore_pfn;
+    xen_pfn_t shared_info_pfn;
+    xen_pfn_t bootstack_pfn;
+    xen_vaddr_t virt_alloc_end;
+    xen_vaddr_t bsd_symtab_start;
+
+    /* initial page tables */
+    unsigned int pgtables;
+    unsigned int pg_l4;
+    unsigned int pg_l3;
+    unsigned int pg_l2;
+    unsigned int pg_l1;
+    unsigned int alloc_bootstack;
+    unsigned int extra_pages;
+    xen_vaddr_t virt_pgtab_end;
+
+    /* other state info */
+    uint32_t f_active[XENFEAT_NR_SUBMAPS];
+    xen_pfn_t *p2m_host;
+    void *p2m_guest;
+
+    /* physical memory */
+    xen_pfn_t total_pages;
+    struct xc_dom_phys *phys_pages;
+
+    /* malloc memory pool */
+    struct xc_dom_mem *memblocks;
+
+    /* memory footprint stats */
+    size_t alloc_malloc;
+    size_t alloc_mem_map;
+    size_t alloc_file_map;
+    size_t alloc_domU_map;
+
+    /* misc xen domain config stuff */
+    unsigned long flags;
+    unsigned int console_evtchn;
+    unsigned int xenstore_evtchn;
+    xen_pfn_t shared_info_mfn;
+
+    int guest_xc;
+    domid_t guest_domid;
+    int shadow_enabled;
+
+    int xen_version;
+    xen_capabilities_info_t xen_caps;
+
+    /* kernel loader, arch hooks */
+    struct xc_dom_loader *kernel_loader;
+    void *private_loader;
+
+    /* kernel loader */
+    struct xc_dom_arch *arch_hooks;
+};
+
+/* --- pluggable kernel loader ------------------------------------- */
+
+struct xc_dom_loader {
+    char *name;
+    int (*probe) (struct xc_dom_image * dom);
+    int (*parser) (struct xc_dom_image * dom);
+    int (*loader) (struct xc_dom_image * dom);
+
+    struct xc_dom_loader *next;
+};
+
+#define __init __attribute__ ((constructor))
+void xc_dom_register_loader(struct xc_dom_loader *loader);
+
+/* --- arch specific hooks ----------------------------------------- */
+
+struct xc_dom_arch {
+    /* pagetable setup */
+    int (*alloc_magic_pages) (struct xc_dom_image * dom);
+    int (*count_pgtables) (struct xc_dom_image * dom);
+    int (*setup_pgtables) (struct xc_dom_image * dom);
+
+    /* arch-specific data structs setup */
+    int (*start_info) (struct xc_dom_image * dom);
+    int (*shared_info) (struct xc_dom_image * dom, void *shared_info);
+    int (*vcpu) (struct xc_dom_image * dom, void *vcpu_ctxt);
+
+    char *guest_type;
+    int page_shift;
+    int sizeof_pfn;
+
+    struct xc_dom_arch *next;
+};
+void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks);
+
+#define XC_DOM_PAGE_SHIFT(dom)  ((dom)->arch_hooks->page_shift)
+#define XC_DOM_PAGE_SIZE(dom)   (1 << (dom)->arch_hooks->page_shift)
+
+/* --- main functions ---------------------------------------------- */
+
+struct xc_dom_image *xc_dom_allocate(const char *cmdline, const char 
*features);
+void xc_dom_release_phys(struct xc_dom_image *dom);
+void xc_dom_release(struct xc_dom_image *dom);
+int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
+
+size_t xc_dom_check_gzip(void *blob, size_t ziplen);
+int xc_dom_do_gunzip(void *src, size_t srclen, void *dst, size_t dstlen);
+int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size);
+
+int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename);
+int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename);
+int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem,
+                     size_t memsize);
+int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+                      size_t memsize);
+
+int xc_dom_parse_image(struct xc_dom_image *dom);
+int xc_dom_build_image(struct xc_dom_image *dom);
+int xc_dom_update_guest_p2m(struct xc_dom_image *dom);
+
+int xc_dom_boot_xen_init(struct xc_dom_image *dom, int xc, domid_t domid);
+int xc_dom_boot_mem_init(struct xc_dom_image *dom);
+void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
+                          xen_pfn_t count);
+int xc_dom_boot_image(struct xc_dom_image *dom);
+int xc_dom_compat_check(struct xc_dom_image *dom);
+
+/* --- debugging bits ---------------------------------------------- */
+
+extern FILE *xc_dom_logfile;
+
+void xc_dom_loginit(void);
+int xc_dom_printf(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
+int xc_dom_panic_func(const char *file, int line, xc_error_code err,
+                     const char *fmt, ...)
+    __attribute__ ((format(printf, 4, 5)));
+#define xc_dom_panic(err, fmt, args...) \
+       xc_dom_panic_func(__FILE__, __LINE__, err, fmt, ## args)
+#define xc_dom_trace(mark) \
+       xc_dom_printf("%s:%d: trace %s\n", __FILE__, __LINE__, mark)
+
+void xc_dom_log_memory_footprint(struct xc_dom_image *dom);
+
+/* --- simple memory pool ------------------------------------------ */
+
+void *xc_dom_malloc(struct xc_dom_image *dom, size_t size);
+void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size);
+void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
+                           const char *filename, size_t * size);
+char *xc_dom_strdup(struct xc_dom_image *dom, const char *str);
+
+/* --- alloc memory pool ------------------------------------------- */
+
+int xc_dom_alloc_page(struct xc_dom_image *dom, char *name);
+int xc_dom_alloc_segment(struct xc_dom_image *dom,
+                        struct xc_dom_seg *seg, char *name,
+                        xen_vaddr_t start, xen_vaddr_t size);
+
+/* --- misc bits --------------------------------------------------- */
+
+void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t first,
+                       xen_pfn_t count);
+void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn);
+void xc_dom_unmap_all(struct xc_dom_image *dom);
+
+static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom,
+                                     struct xc_dom_seg *seg)
+{
+    xen_vaddr_t segsize = seg->vend - seg->vstart;
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t pages = (segsize + page_size - 1) / page_size;
+
+    return xc_dom_pfn_to_ptr(dom, seg->pfn, pages);
+}
+
+static inline void *xc_dom_vaddr_to_ptr(struct xc_dom_image *dom,
+                                       xen_vaddr_t vaddr)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t page = (vaddr - dom->parms.virt_base) / page_size;
+    unsigned int offset = (vaddr - dom->parms.virt_base) % page_size;
+    void *ptr = xc_dom_pfn_to_ptr(dom, page, 0);
+
+    if (!ptr)
+       return NULL;
+    return ptr + offset;
+}
+
+static inline int xc_dom_feature_translated(struct xc_dom_image *dom)
+{
+    return elf_xen_feature_get(XENFEAT_auto_translated_physmap, dom->f_active);
+}
+
+static inline xen_pfn_t xc_dom_p2m_host(struct xc_dom_image *dom, xen_pfn_t 
pfn)
+{
+    if (dom->shadow_enabled)
+       return pfn;
+    return dom->p2m_host[pfn];
+}
+
+static inline xen_pfn_t xc_dom_p2m_guest(struct xc_dom_image *dom,
+                                        xen_pfn_t pfn)
+{
+    if (xc_dom_feature_translated(dom))
+       return pfn;
+    return dom->p2m_host[pfn];
+}
+
+/* --- arch bits --------------------------------------------------- */
+
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_binloader.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_binloader.c    Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,294 @@
+/******************************************************************************
+ *
+ * Loads simple binary images. It's like a .COM file in MS-DOS. No headers are
+ * present. The only requirement is that it must have a xen_bin_image table
+ * somewhere in the first 8192 bytes, starting on a 32-bit aligned address.
+ * Those familiar with the multiboot specification should recognize this, it's
+ * (almost) the same as the multiboot header.
+ * The layout of the xen_bin_image table is:
+ *
+ * Offset Type Name          Note
+ * 0      uint32_t  magic         required
+ * 4      uint32_t  flags         required
+ * 8      uint32_t  checksum      required
+ * 12     uint32_t  header_addr   required
+ * 16     uint32_t  load_addr     required
+ * 20     uint32_t  load_end_addr required
+ * 24     uint32_t  bss_end_addr  required
+ * 28     uint32_t  entry_addr    required
+ *
+ * - magic
+ *   Magic number identifying the table. For images to be loaded by Xen 3, the
+ *   magic value is 0x336ec578 ("xEn3" with the 0x80 bit of the "E" set).
+ * - flags
+ *   bit 0: indicates whether the image needs to be loaded on a page boundary
+ *   bit 1: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ *          that memory info should be passed to the image)
+ *   bit 2: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ *          that the bootloader should pass video mode info to the image)
+ *   bit 16: reserved, must be 1 (the multiboot spec uses this bit to indicate
+ *           that the values in the fields header_addr - entry_addr are
+ *           valid)
+ *   All other bits should be set to 0.
+ * - checksum
+ *   When added to "magic" and "flags", the resulting value should be 0.
+ * - header_addr
+ *   Contains the virtual address corresponding to the beginning of the
+ *   table - the memory location at which the magic value is supposed to be
+ *   loaded. This field serves to synchronize the mapping between OS image
+ *   offsets and virtual memory addresses.
+ * - load_addr
+ *   Contains the virtual address of the beginning of the text segment. The
+ *   offset in the OS image file at which to start loading is defined by the
+ *   offset at which the table was found, minus (header addr - load addr).
+ *   load addr must be less than or equal to header addr.
+ * - load_end_addr
+ *   Contains the virtual address of the end of the data segment.
+ *   (load_end_addr - load_addr) specifies how much data to load. This implies
+ *   that the text and data segments must be consecutive in the OS image. If
+ *   this field is zero, the domain builder assumes that the text and data
+ *   segments occupy the whole OS image file.
+ * - bss_end_addr
+ *   Contains the virtual address of the end of the bss segment. The domain
+ *   builder initializes this area to zero, and reserves the memory it occupies
+ *   to avoid placing boot modules and other data relevant to the loaded image
+ *   in that area. If this field is zero, the domain builder assumes that no 
bss
+ *   segment is present.
+ * - entry_addr
+ *   The virtual address at which to start execution of the loaded image.
+ *
+ * Some of the field descriptions were copied from "The Multiboot
+ * Specification", Copyright 1995, 96 Bryan Ford <baford@xxxxxxxxxxx>,
+ * Erich Stefan Boleyn <erich@xxxxxxxx> Copyright 1999, 2000, 2001, 2002
+ * Free Software Foundation, Inc.
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE_X86-1))&PAGE_MASK_X86)
+#define round_pgdown(_p)  ((_p)&PAGE_MASK_X86)
+
+struct xen_bin_image_table
+{
+    uint32_t magic;
+    uint32_t flags;
+    uint32_t checksum;
+    uint32_t header_addr;
+    uint32_t load_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t entry_addr;
+};
+
+#define XEN_MULTIBOOT_MAGIC3 0x336ec578
+
+#define XEN_MULTIBOOT_FLAG_ALIGN4K     0x00000001
+#define XEN_MULTIBOOT_FLAG_NEEDMEMINFO 0x00000002
+#define XEN_MULTIBOOT_FLAG_NEEDVIDINFO 0x00000004
+#define XEN_MULTIBOOT_FLAG_ADDRSVALID  0x00010000
+#define XEN_MULTIBOOT_FLAG_PAE_SHIFT   14
+#define XEN_MULTIBOOT_FLAG_PAE_MASK    (3 << XEN_MULTIBOOT_FLAG_PAE_SHIFT)
+
+/* Flags we test for */
+#define FLAGS_MASK     ((~ 0) & (~ XEN_MULTIBOOT_FLAG_ALIGN4K) & \
+                               (~ XEN_MULTIBOOT_FLAG_PAE_MASK))
+#define FLAGS_REQUIRED XEN_MULTIBOOT_FLAG_ADDRSVALID
+
+/* --------------------------------------------------------------------- */
+
+static struct xen_bin_image_table *find_table(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *table;
+    uint32_t *probe_ptr;
+    uint32_t *probe_end;
+
+    probe_ptr = dom->kernel_blob;
+    probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table);
+    if ((void*)probe_end > dom->kernel_blob + 8192)
+        probe_end = dom->kernel_blob + 8192;
+
+    for (table = NULL; probe_ptr < probe_end; probe_ptr++)
+    {
+        if (XEN_MULTIBOOT_MAGIC3 == *probe_ptr)
+        {
+            table = (struct xen_bin_image_table *) probe_ptr;
+            /* Checksum correct? */
+            if (0 == table->magic + table->flags + table->checksum)
+            {
+                return table;
+            }
+        }
+    }
+    return NULL;
+}
+
+static int xc_dom_probe_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *table;
+
+    table = find_table(dom);
+    if (!table)
+        return -EINVAL;
+    return 0;
+}
+
+static int xc_dom_parse_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *image_info;
+    char *image = dom->kernel_blob;
+    size_t image_size = dom->kernel_size;
+    uint32_t start_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t pae_flags;
+
+    image_info = find_table(dom);
+    if (!image_info)
+        return -EINVAL;
+
+    xc_dom_printf("%s: multiboot header fields\n", __FUNCTION__);
+    xc_dom_printf("  flags:         0x%" PRIx32 "\n", image_info->flags);
+    xc_dom_printf("  header_addr:   0x%" PRIx32 "\n", image_info->header_addr);
+    xc_dom_printf("  load_addr:     0x%" PRIx32 "\n", image_info->load_addr);
+    xc_dom_printf("  load_end_addr: 0x%" PRIx32 "\n", 
image_info->load_end_addr);
+    xc_dom_printf("  bss_end_addr:  0x%" PRIx32 "\n", 
image_info->bss_end_addr);
+    xc_dom_printf("  entry_addr:    0x%" PRIx32 "\n", image_info->entry_addr);
+
+    /* Check the flags */
+    if ( FLAGS_REQUIRED != (image_info->flags & FLAGS_MASK) )
+    {
+        xc_dom_panic(XC_INVALID_KERNEL,
+                     "%s: xen_bin_image_table flags required "
+                     "0x%08" PRIx32 " found 0x%08" PRIx32 "\n",
+                     __FUNCTION__, FLAGS_REQUIRED, image_info->flags & 
FLAGS_MASK);
+        return -EINVAL;
+    }
+
+    /* Sanity check on the addresses */
+    if ( image_info->header_addr < image_info->load_addr ||
+         ((char *) image_info - image) <
+         (image_info->header_addr - image_info->load_addr) )
+    {
+        xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid header_addr.",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    start_addr = image_info->header_addr - ((char *)image_info - image);
+    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+    xc_dom_printf("%s: calculated addresses\n", __FUNCTION__);
+    xc_dom_printf("  start_addr:    0x%" PRIx32 "\n", start_addr);
+    xc_dom_printf("  load_end_addr: 0x%" PRIx32 "\n", load_end_addr);
+    xc_dom_printf("  bss_end_addr:  0x%" PRIx32 "\n", bss_end_addr);
+
+    if ( start_addr + image_size < load_end_addr )
+    {
+        xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid load_end_addr.\n",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( bss_end_addr < load_end_addr)
+    {
+        xc_dom_panic(XC_INVALID_KERNEL, "%s: Invalid bss_end_addr.\n",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    dom->kernel_seg.vstart = image_info->load_addr;
+    dom->kernel_seg.vend   = bss_end_addr;
+    dom->parms.virt_base   = start_addr;
+    dom->parms.virt_entry  = image_info->entry_addr;
+
+    pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
+    switch (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) {
+    case 0:
+       dom->guest_type = "xen-3.0-x86_32";
+       break;
+    case 1:
+       dom->guest_type = "xen-3.0-x86_32p";
+       break;
+    case 2:
+       dom->guest_type = "xen-3.0-x86_64";
+       break;
+    case 3:
+       /* Kernel detects PAE at runtime.  So try to figure whenever
+        * xen supports PAE and advertise a PAE-capable kernel in case
+        * it does. */
+       dom->guest_type = "xen-3.0-x86_32";
+       if (strstr(dom->xen_caps, "xen-3.0-x86_32p")) {
+           xc_dom_printf("%s: PAE fixup\n", __FUNCTION__);
+           dom->guest_type = "xen-3.0-x86_32p";
+           dom->parms.pae  = 2;
+       }
+       break;
+    }
+    return 0;
+}
+
+static int xc_dom_load_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *image_info;
+    char *image = dom->kernel_blob;
+    char *dest;
+    size_t image_size = dom->kernel_size;
+    uint32_t start_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t skip, text_size, bss_size;
+    uint32_t pae_flags;
+
+    image_info = find_table(dom);
+    if (!image_info)
+        return -EINVAL;
+
+    start_addr = image_info->header_addr - ((char *)image_info - image);
+    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+    /* It's possible that we need to skip the first part of the image */
+    skip = image_info->load_addr - start_addr;
+    text_size = load_end_addr - image_info->load_addr;
+    bss_size = bss_end_addr - load_end_addr;
+
+    xc_dom_printf("%s: calculated sizes\n", __FUNCTION__);
+    xc_dom_printf("  skip:      0x%" PRIx32 "\n", skip);
+    xc_dom_printf("  text_size: 0x%" PRIx32 "\n", text_size);
+    xc_dom_printf("  bss_size:  0x%" PRIx32 "\n", bss_size);
+
+    dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart);
+    memcpy(dest, image + skip, text_size);
+    memset(dest + text_size, 0, bss_size);
+
+    pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
+    if (3 == (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) && dom->guest_xc > 0)
+    {
+    }
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_loader bin_loader = {
+    .name = "multiboot-binary",
+    .probe = xc_dom_probe_bin_kernel,
+    .parser = xc_dom_parse_bin_kernel,
+    .loader = xc_dom_load_bin_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&bin_loader);
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * End:
+ */
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_boot.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_boot.c Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,515 @@
+/*
+ * Xen domain builder -- xen booter.
+ *
+ * This is the code which actually boots a fresh
+ * prepared domain image as xen guest domain.
+ *
+ * ==>  this is the only domain bilder code piece
+ *          where xen hypercalls are allowed        <==
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+#include <xen/hvm/params.h>
+
+/* ------------------------------------------------------------------------ */
+
+static int setup_hypercall_page(struct xc_dom_image *dom)
+{
+    DECLARE_DOMCTL;
+    xen_pfn_t pfn;
+    int rc;
+
+    if (-1 == dom->parms.virt_hypercall)
+       return 0;
+    pfn = (dom->parms.virt_hypercall - dom->parms.virt_base)
+       >> XC_DOM_PAGE_SHIFT(dom);
+
+    xc_dom_printf("%s: vaddr=0x%" PRIx64 " pfn=0x%" PRIpfn "\n", __FUNCTION__,
+                 dom->parms.virt_hypercall, pfn);
+    domctl.cmd = XEN_DOMCTL_hypercall_init;
+    domctl.domain = dom->guest_domid;
+    domctl.u.hypercall_init.gmfn = xc_dom_p2m_guest(dom, pfn);
+    rc = do_domctl(dom->guest_xc, &domctl);
+    if (0 != rc)
+       xc_dom_panic(XC_INTERNAL_ERROR, "%s: HYPERCALL_INIT failed (rc=%d)\n",
+                    __FUNCTION__, rc);
+    return rc;
+}
+
+static int launch_vm(int xc, domid_t domid, void *ctxt)
+{
+    DECLARE_DOMCTL;
+    int rc;
+
+    xc_dom_printf("%s: called, ctxt=%p\n", __FUNCTION__, ctxt);
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.cmd = XEN_DOMCTL_setvcpucontext;
+    domctl.domain = domid;
+    domctl.u.vcpucontext.vcpu = 0;
+    set_xen_guest_handle(domctl.u.vcpucontext.ctxt, ctxt);
+    rc = do_domctl(xc, &domctl);
+    if (0 != rc)
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: SETVCPUCONTEXT failed (rc=%d)\n", __FUNCTION__, rc);
+    return rc;
+}
+
+static int clear_page(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+    xen_pfn_t dst;
+    int rc;
+
+    if (0 == pfn)
+       return 0;
+
+    dst = xc_dom_p2m_host(dom, pfn);
+    xc_dom_printf("%s: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, pfn, dst);
+    rc = xc_clear_domain_page(dom->guest_xc, dom->guest_domid, dst);
+    if (0 != rc)
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: xc_clear_domain_page failed (pfn 0x%" PRIpfn
+                    ", rc=%d)\n", __FUNCTION__, pfn, rc);
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: x86 bits                                                     */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+
+static int x86_compat(int xc, domid_t domid, char *guest_type)
+{
+#ifdef XEN_DOMCTL_set_compat
+    static const struct {
+       char           *guest;
+       unsigned long  cmd;
+    } types[] = {
+       { "xen-3.0-x86_32p", XEN_DOMCTL_set_compat },
+       { "xen-3.0-x86_64",  XEN_DOMCTL_set_native },
+    };
+    DECLARE_DOMCTL;
+    int i,rc;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = domid;
+    for (i = 0; i < sizeof(types)/sizeof(types[0]); i++)
+       if (0 == strcmp(types[i].guest, guest_type))
+           domctl.cmd = types[i].cmd;
+    if (0 == domctl.cmd)
+       /* nothing to do */
+       return 0;
+
+    xc_dom_printf("%s: guest %s, cmd %d\n", __FUNCTION__,
+                 guest_type, domctl.cmd);
+    rc = do_domctl(xc, &domctl);
+    if (0 != rc)
+       xc_dom_printf("%s: warning: failed (rc=%d)\n",
+                     __FUNCTION__, rc);
+    return rc;
+#else
+    xc_dom_printf("%s: compiled without compat/native switching\n", 
__FUNCTION__);
+    return 0;
+#endif /* XEN_DOMCTL_set_compat */
+}
+
+
+static int x86_shadow(int xc, domid_t domid)
+{
+    int rc, mode;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    mode = XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT |
+       XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE;
+
+    rc = xc_shadow_control(xc, domid,
+                          XEN_DOMCTL_SHADOW_OP_ENABLE,
+                          NULL, 0, NULL, mode, NULL);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: SHADOW_OP_ENABLE (mode=0x%x) failed (rc=%d)\n",
+                    __FUNCTION__, mode, rc);
+       return rc;
+    }
+    xc_dom_printf("%s: shadow enabled (mode=0x%x)\n", __FUNCTION__, mode);
+    return rc;
+}
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    int rc = 0;
+
+    x86_compat(dom->guest_xc, dom->guest_domid, dom->guest_type);
+    if (xc_dom_feature_translated(dom))
+    {
+       dom->shadow_enabled = 1;
+       rc = x86_shadow(dom->guest_xc, dom->guest_domid);
+    }
+    return rc;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    static const struct {
+       char *guest;
+       unsigned long pgd_type;
+    } types[] = {
+       { "xen-3.0-x86_32",  MMUEXT_PIN_L2_TABLE},
+       { "xen-3.0-x86_32p", MMUEXT_PIN_L3_TABLE},
+       { "xen-3.0-x86_64",  MMUEXT_PIN_L4_TABLE},
+    };
+    unsigned long pgd_type = 0;
+    shared_info_t *shared_info;
+    xen_pfn_t shinfo;
+    int i, rc;
+
+    for (i = 0; i < sizeof(types) / sizeof(types[0]); i++)
+       if (0 == strcmp(types[i].guest, dom->guest_type))
+           pgd_type = types[i].pgd_type;
+
+    if (!xc_dom_feature_translated(dom))
+    {
+       /* paravirtualized guest */
+       xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
+       rc = pin_table(dom->guest_xc, pgd_type,
+                      xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
+                      dom->guest_domid);
+       if (0 != rc)
+       {
+           xc_dom_panic(XC_INTERNAL_ERROR,
+                        "%s: pin_table failed (pfn 0x%" PRIpfn ", rc=%d)\n",
+                        __FUNCTION__, dom->pgtables_seg.pfn, rc);
+           return rc;
+       }
+       shinfo = dom->shared_info_mfn;
+    }
+    else
+    {
+       /* paravirtualized guest with auto-translation */
+       struct xen_add_to_physmap xatp;
+       int i;
+
+       /* Map shared info frame into guest physmap. */
+       xatp.domid = dom->guest_domid;
+       xatp.space = XENMAPSPACE_shared_info;
+       xatp.idx = 0;
+       xatp.gpfn = dom->shared_info_pfn;
+       rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
+       if (rc != 0)
+       {
+           xc_dom_panic(XC_INTERNAL_ERROR, "%s: mapping shared_info failed "
+                        "(pfn=0x%" PRIpfn ", rc=%d)\n",
+                        __FUNCTION__, xatp.gpfn, rc);
+           return rc;
+       }
+
+       /* Map grant table frames into guest physmap. */
+       for (i = 0;; i++)
+       {
+           xatp.domid = dom->guest_domid;
+           xatp.space = XENMAPSPACE_grant_table;
+           xatp.idx = i;
+           xatp.gpfn = dom->total_pages + i;
+           rc = xc_memory_op(dom->guest_xc, XENMEM_add_to_physmap, &xatp);
+           if (rc != 0)
+           {
+               if (i > 0 && errno == EINVAL)
+               {
+                   xc_dom_printf("%s: %d grant tables mapped\n", __FUNCTION__,
+                                 i);
+                   break;
+               }
+               xc_dom_panic(XC_INTERNAL_ERROR,
+                            "%s: mapping grant tables failed " "(pfn=0x%"
+                            PRIpfn ", rc=%d)\n", __FUNCTION__, xatp.gpfn, rc);
+               return rc;
+           }
+       }
+       shinfo = dom->shared_info_pfn;
+    }
+
+    /* setup shared_info page */
+    xc_dom_printf("%s: shared_info: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, dom->shared_info_pfn, dom->shared_info_mfn);
+    shared_info = xc_map_foreign_range(dom->guest_xc, dom->guest_domid,
+                                      PAGE_SIZE_X86,
+                                      PROT_READ | PROT_WRITE,
+                                      shinfo);
+    if (NULL == shared_info)
+       return -1;
+    dom->arch_hooks->shared_info(dom, shared_info);
+    munmap(shared_info, PAGE_SIZE_X86);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: ia64                                                         */
+
+#elif defined(__ia64__)
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    DECLARE_DOMCTL;
+    int rc;
+
+    xc_dom_printf("%s: setup firmware\n", __FUNCTION__);
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.cmd = XEN_DOMCTL_arch_setup;
+    domctl.domain = dom->guest_domid;
+    domctl.u.arch_setup.flags = 0;
+    domctl.u.arch_setup.bp = (dom->start_info_pfn << PAGE_SHIFT)
+       + sizeof(start_info_t);
+    domctl.u.arch_setup.maxmem = dom->total_pages << PAGE_SHIFT;
+    rc = do_domctl(dom->guest_xc, &domctl);
+    return rc;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: powerpc                                                      */
+
+#elif defined(__powerpc64__)
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    start_info_t *si =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+
+    xc_dom_printf("%s: TODO: setup devtree\n", __FUNCTION__);
+
+#if 0
+    load_devtree(dom->guest_xc,
+                dom->guest_domid,
+                dom->p2m_host,
+                devtree,           // FIXME
+                devtree_addr,      // FIXME
+                dom->ramdisk_seg.vstart,
+                dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart,
+                si,
+                dom->start_info_pfn << PAGE_SHIFT);
+#endif
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+/* arch stuff: other                                                        */
+
+#else
+
+static int arch_setup_early(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+static int arch_setup_late(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: doing nothing\n", __FUNCTION__);
+    return 0;
+}
+
+#endif /* arch stuff */
+
+/* ------------------------------------------------------------------------ */
+
+int xc_dom_compat_check(struct xc_dom_image *dom)
+{
+    xen_capabilities_info_t xen_caps;
+    char *item, *ptr;
+    int match, found = 0;
+
+    strcpy(xen_caps, dom->xen_caps);
+    for (item = strtok_r(xen_caps, " ", &ptr);
+        NULL != item; item = strtok_r(NULL, " ", &ptr))
+    {
+       match = (0 == strcmp(dom->guest_type, item));
+       xc_dom_printf("%s: supported guest type: %s%s\n", __FUNCTION__,
+                     item, match ? " <= matches" : "");
+       if (match)
+           found++;
+    }
+    if (!found)
+       xc_dom_panic(XC_INVALID_KERNEL,
+                    "%s: guest type %s not supported by xen kernel, sorry\n",
+                    __FUNCTION__, dom->guest_type);
+
+    return found;
+}
+
+int xc_dom_boot_xen_init(struct xc_dom_image *dom, int xc, domid_t domid)
+{
+    dom->guest_xc = xc;
+    dom->guest_domid = domid;
+
+    dom->xen_version = xc_version(dom->guest_xc, XENVER_version, NULL);
+    if (xc_version(xc, XENVER_capabilities, &dom->xen_caps) < 0) {
+       xc_dom_panic(XC_INTERNAL_ERROR, "can't get xen capabilities");
+       return -1;
+    }
+    xc_dom_printf("%s: ver %d.%d, caps %s\n", __FUNCTION__,
+                 dom->xen_version >> 16, dom->xen_version & 0xff,
+                 dom->xen_caps);
+    return 0;
+}
+
+int xc_dom_boot_mem_init(struct xc_dom_image *dom)
+{
+    long rc;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    if (0 != (rc = arch_setup_early(dom)))
+       return rc;
+
+    /* allocate guest memory */
+    rc = xc_domain_memory_populate_physmap(dom->guest_xc, dom->guest_domid,
+                                          dom->total_pages, 0, 0,
+                                          dom->p2m_host);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_OUT_OF_MEMORY,
+                    "%s: can't allocate low memory for domain\n",
+                    __FUNCTION__);
+       return rc;
+    }
+
+    return 0;
+}
+
+void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
+                          xen_pfn_t count)
+{
+    int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    privcmd_mmap_entry_t *entries;
+    void *ptr;
+    int i, rc;
+
+    entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t));
+    if (NULL == entries)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                    " [malloc]\n", __FUNCTION__, pfn, count);
+       return NULL;
+    }
+
+    ptr = mmap(NULL, count << page_shift, PROT_READ | PROT_WRITE,
+              MAP_SHARED, dom->guest_xc, 0);
+    if (MAP_FAILED == ptr)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                    " [mmap]\n", __FUNCTION__, pfn, count);
+       return NULL;
+    }
+
+    for (i = 0; i < count; i++)
+    {
+       entries[i].va = (uintptr_t) ptr + (i << page_shift);
+       entries[i].mfn = xc_dom_p2m_host(dom, pfn + i);
+       entries[i].npages = 1;
+    }
+
+    rc = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid, entries, 
count);
+    if (rc < 0)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                    " [xenctl, rc=%d]\n", __FUNCTION__, pfn, count, rc);
+       return NULL;
+    }
+    return ptr;
+}
+
+int xc_dom_boot_image(struct xc_dom_image *dom)
+{
+    DECLARE_DOMCTL;
+    void *ctxt;
+    int rc;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* collect some info */
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = dom->guest_domid;
+    rc = do_domctl(dom->guest_xc, &domctl);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: getdomaininfo failed (rc=%d)\n", __FUNCTION__, rc);
+       return rc;
+    }
+    if (domctl.domain != dom->guest_domid)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: Huh? domid mismatch (%d != %d)\n", __FUNCTION__,
+                    domctl.domain, dom->guest_domid);
+       return -1;
+    }
+    dom->shared_info_mfn = domctl.u.getdomaininfo.shared_info_frame;
+
+    /* sanity checks */
+    if (!xc_dom_compat_check(dom))
+       return -1;
+
+    /* initial mm setup */
+    if (0 != (rc = xc_dom_update_guest_p2m(dom)))
+       return rc;
+    if (dom->arch_hooks->setup_pgtables)
+       if (0 != (rc = dom->arch_hooks->setup_pgtables(dom)))
+           return rc;
+
+    if (0 != (rc = clear_page(dom, dom->console_pfn)))
+       return rc;
+    if (0 != (rc = clear_page(dom, dom->xenstore_pfn)))
+       return rc;
+
+    /* start info page */
+    if (dom->arch_hooks->start_info)
+       dom->arch_hooks->start_info(dom);
+
+    /* hypercall page */
+    if (0 != (rc = setup_hypercall_page(dom)))
+       return rc;
+    xc_dom_log_memory_footprint(dom);
+
+    /* misc x86 stuff */
+    if (0 != (rc = arch_setup_late(dom)))
+       return rc;
+
+    /* let the vm run */
+    ctxt = xc_dom_malloc(dom, PAGE_SIZE * 2 /* FIXME */ );
+    memset(ctxt, 0, PAGE_SIZE * 2);
+    if (0 != (rc = dom->arch_hooks->vcpu(dom, ctxt)))
+       return rc;
+    xc_dom_unmap_all(dom);
+    rc = launch_vm(dom->guest_xc, dom->guest_domid, ctxt);
+
+    return rc;
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_core.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_core.c Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,773 @@
+/*
+ * Xen domain builder -- core bits.
+ *
+ * The core code goes here:
+ *   - allocate and release domain structs.
+ *   - memory management functions.
+ *   - misc helper functions.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+/* debugging                                                                */
+
+FILE *xc_dom_logfile = NULL;
+
+void xc_dom_loginit(void)
+{
+    if (xc_dom_logfile)
+       return;
+    xc_dom_logfile = fopen("/var/log/xen/domain-builder-ng.log", "a");
+    setvbuf(xc_dom_logfile, NULL, _IONBF, 0);
+    xc_dom_printf("### ----- xc domain builder logfile opened -----\n");
+}
+
+int xc_dom_printf(const char *fmt, ...)
+{
+    va_list args;
+    char buf[1024];
+    int rc;
+
+    if (!xc_dom_logfile)
+       return 0;
+
+    va_start(args, fmt);
+    rc = vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+    rc = fwrite(buf, rc, 1, xc_dom_logfile);
+
+    return rc;
+}
+
+int xc_dom_panic_func(const char *file, int line, xc_error_code err,
+                     const char *fmt, ...)
+{
+    va_list args;
+    FILE *fp = stderr;
+    int rc = 0;
+    char pos[256];
+    char msg[XC_MAX_ERROR_MSG_LEN];
+
+    if (xc_dom_logfile)
+       fp = xc_dom_logfile;
+
+    snprintf(pos, sizeof(pos), "%s:%d: panic: ", file, line);
+    va_start(args, fmt);
+    vsnprintf(msg, sizeof(msg), fmt, args);
+    va_end(args);
+    xc_set_error(err, "%s", msg);
+    rc = fprintf(fp, "%s%s", pos, msg);
+    return rc;
+}
+
+static void print_mem(const char *name, size_t mem)
+{
+    if (mem > 32 * 1024 * 1024)
+       xc_dom_printf("%-24s : %zd MB\n", name, mem / (1024 * 1024));
+    else if (mem > 32 * 1024)
+       xc_dom_printf("%-24s : %zd kB\n", name, mem / 1024);
+    else
+       xc_dom_printf("%-24s : %zd bytes\n", name, mem);
+}
+
+void xc_dom_log_memory_footprint(struct xc_dom_image *dom)
+{
+    xc_dom_printf("domain builder memory footprint\n");
+    xc_dom_printf("   allocated\n");
+    print_mem("      malloc", dom->alloc_malloc);
+    print_mem("      anon mmap", dom->alloc_mem_map);
+    xc_dom_printf("   mapped\n");
+    print_mem("      file mmap", dom->alloc_file_map);
+    print_mem("      domU mmap", dom->alloc_domU_map);
+}
+
+/* ------------------------------------------------------------------------ */
+/* simple memory pool                                                       */
+
+void *xc_dom_malloc(struct xc_dom_image *dom, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    block = malloc(sizeof(*block) + size);
+    if (NULL == block)
+       return NULL;
+    memset(block, 0, sizeof(*block) + size);
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block) + size;
+    if (size > 100 * 1024)
+       print_mem(__FUNCTION__, size);
+    return block->memory;
+}
+
+void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    block = malloc(sizeof(*block));
+    if (NULL == block)
+       return NULL;
+    memset(block, 0, sizeof(*block));
+    block->mmap_len = size;
+    block->mmap_ptr = mmap(NULL, block->mmap_len,
+                          PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+                          -1, 0);
+    if (MAP_FAILED == block->mmap_ptr)
+    {
+       free(block);
+       return NULL;
+    }
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_mem_map += block->mmap_len;
+    if (size > 100 * 1024)
+       print_mem(__FUNCTION__, size);
+    return block->mmap_ptr;
+}
+
+void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
+                           const char *filename, size_t * size)
+{
+    struct xc_dom_mem *block = NULL;
+    int fd = -1;
+
+    fd = open(filename, O_RDONLY);
+    if (-1 == fd)
+       goto err;
+
+    lseek(fd, 0, SEEK_SET);
+    *size = lseek(fd, 0, SEEK_END);
+
+    block = malloc(sizeof(*block));
+    if (NULL == block)
+       goto err;
+    memset(block, 0, sizeof(*block));
+    block->mmap_len = *size;
+    block->mmap_ptr = mmap(NULL, block->mmap_len, PROT_READ, MAP_SHARED, fd, 
0);
+    if (MAP_FAILED == block->mmap_ptr)
+       goto err;
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_file_map += block->mmap_len;
+    close(fd);
+    if (*size > 100 * 1024)
+       print_mem(__FUNCTION__, *size);
+    return block->mmap_ptr;
+
+  err:
+    if (-1 != fd)
+       close(fd);
+    if (block)
+       free(block);
+    return NULL;
+}
+
+static void xc_dom_free_all(struct xc_dom_image *dom)
+{
+    struct xc_dom_mem *block;
+
+    while (NULL != (block = dom->memblocks))
+    {
+       dom->memblocks = block->next;
+       if (block->mmap_ptr)
+           munmap(block->mmap_ptr, block->mmap_len);
+       free(block);
+    }
+}
+
+char *xc_dom_strdup(struct xc_dom_image *dom, const char *str)
+{
+    size_t len = strlen(str) + 1;
+    char *nstr = xc_dom_malloc(dom, len);
+
+    if (NULL == nstr)
+       return NULL;
+    memcpy(nstr, str, len);
+    return nstr;
+}
+
+/* ------------------------------------------------------------------------ */
+/* read files, copy memory blocks, with transparent gunzip                  */
+
+size_t xc_dom_check_gzip(void *blob, size_t ziplen)
+{
+    unsigned char *gzlen;
+    size_t unziplen;
+
+    if (0 != strncmp(blob, "\037\213", 2))
+       /* not gzipped */
+       return 0;
+
+    gzlen = blob + ziplen - 4;
+    unziplen = gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0];
+    if (unziplen < ziplen || unziplen > ziplen * 8)
+    {
+       xc_dom_printf
+           ("%s: size (zip %zd, unzip %zd) looks insane, skip gunzip\n",
+            __FUNCTION__, ziplen, unziplen);
+       return 0;
+    }
+
+    return unziplen + 16;
+}
+
+int xc_dom_do_gunzip(void *src, size_t srclen, void *dst, size_t dstlen)
+{
+    z_stream zStream;
+    int rc;
+
+    memset(&zStream, 0, sizeof(zStream));
+    zStream.next_in = src;
+    zStream.avail_in = srclen;
+    zStream.next_out = dst;
+    zStream.avail_out = dstlen;
+    rc = inflateInit2(&zStream, (MAX_WBITS + 32));     /* +32 means "handle 
gzip" */
+    if (rc != Z_OK)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: inflateInit2 failed (rc=%d)\n", __FUNCTION__, rc);
+       return -1;
+    }
+    rc = inflate(&zStream, Z_FINISH);
+    if (rc != Z_STREAM_END)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: inflate failed (rc=%d)\n", __FUNCTION__, rc);
+       return -1;
+    }
+
+    xc_dom_printf("%s: unzip ok, 0x%zx -> 0x%zx\n",
+                 __FUNCTION__, srclen, dstlen);
+    return 0;
+}
+
+int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size)
+{
+    void *unzip;
+    size_t unziplen;
+
+    unziplen = xc_dom_check_gzip(*blob, *size);
+    if (0 == unziplen)
+       return 0;
+
+    unzip = xc_dom_malloc(dom, unziplen);
+    if (NULL == unzip)
+       return -1;
+
+    if (-1 == xc_dom_do_gunzip(*blob, *size, unzip, unziplen))
+       return -1;
+
+    *blob = unzip;
+    *size = unziplen;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* domain memory                                                            */
+
+void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
+                       xen_pfn_t count)
+{
+    struct xc_dom_phys *phys;
+    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    char *mode = "unset";
+
+    if (pfn > dom->total_pages)
+    {
+       xc_dom_printf("%s: pfn out of range (0x%" PRIpfn " > 0x%" PRIpfn ")\n",
+                     __FUNCTION__, pfn, dom->total_pages);
+       return NULL;
+    }
+
+    /* already allocated? */
+    for (phys = dom->phys_pages; NULL != phys; phys = phys->next)
+    {
+       if (pfn >= phys->first + phys->count)
+           continue;
+       if (count)
+       {
+           /* size given: must be completely within the already allocated 
block */
+           if (pfn + count <= phys->first)
+               continue;
+           if (pfn < phys->first || pfn + count > phys->first + phys->count)
+           {
+               xc_dom_printf("%s: request overlaps allocated block"
+                             " (req 0x%" PRIpfn "+0x%" PRIpfn ","
+                             " blk 0x%" PRIpfn "+0x%" PRIpfn ")\n",
+                             __FUNCTION__, pfn, count, phys->first,
+                             phys->count);
+               return NULL;
+           }
+       }
+       else
+       {
+           /* no size given: block must be allocated already,
+              just hand out a pointer to it */
+           if (pfn < phys->first)
+               continue;
+       }
+       return phys->ptr + ((pfn - phys->first) << page_shift);
+    }
+
+    /* allocating is allowed with size specified only */
+    if (0 == count)
+    {
+       xc_dom_printf("%s: no block found, no size given,"
+                     " can't malloc (pfn 0x%" PRIpfn ")\n", __FUNCTION__, pfn);
+       return NULL;
+    }
+
+    /* not found, no overlap => allocate */
+    phys = xc_dom_malloc(dom, sizeof(*phys));
+    if (NULL == phys)
+       return NULL;
+    memset(phys, 0, sizeof(*phys));
+    phys->first = pfn;
+    phys->count = count;
+
+    if (dom->guest_domid)
+    {
+       mode = "domU mapping";
+       phys->ptr = xc_dom_boot_domU_map(dom, phys->first, phys->count);
+       if (NULL == phys->ptr)
+           return NULL;
+       dom->alloc_domU_map += phys->count << page_shift;
+    }
+    else
+    {
+       mode = "anonymous memory";
+       phys->ptr = mmap(NULL, phys->count << page_shift,
+                        PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+                        -1, 0);
+       if (MAP_FAILED == phys->ptr)
+       {
+           xc_dom_panic(XC_OUT_OF_MEMORY,
+                        "%s: oom: can't allocate 0x%" PRIpfn " pages\n",
+                        __FUNCTION__, count);
+           return NULL;
+       }
+       dom->alloc_mem_map += phys->count << page_shift;
+    }
+
+#if 1
+    xc_dom_printf("%s: %s: pfn 0x%" PRIpfn "+0x%" PRIpfn " at %p\n",
+                 __FUNCTION__, mode, phys->first, phys->count, phys->ptr);
+#endif
+    phys->next = dom->phys_pages;
+    dom->phys_pages = phys;
+    return phys->ptr;
+}
+
+int xc_dom_alloc_segment(struct xc_dom_image *dom,
+                        struct xc_dom_seg *seg, char *name,
+                        xen_vaddr_t start, xen_vaddr_t size)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t pages = (size + page_size - 1) / page_size;
+    void *ptr;
+
+    if (0 == start)
+       start = dom->virt_alloc_end;
+
+    if (start & (page_size - 1))
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: segment start isn't page aligned (0x%" PRIx64 ")\n",
+                    __FUNCTION__, start);
+       return -1;
+    }
+    if (start < dom->virt_alloc_end)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: segment start too low (0x%" PRIx64 " < 0x%" PRIx64
+                    ")\n", __FUNCTION__, start, dom->virt_alloc_end);
+       return -1;
+    }
+
+    seg->vstart = start;
+    seg->vend = start + pages * page_size;
+    seg->pfn = (seg->vstart - dom->parms.virt_base) / page_size;
+    dom->virt_alloc_end = seg->vend;
+
+    xc_dom_printf("%-20s:   %-12s : 0x%" PRIx64 " -> 0x%" PRIx64
+                 "  (pfn 0x%" PRIpfn " + 0x%" PRIpfn " pages)\n",
+                 __FUNCTION__, name, seg->vstart, seg->vend, seg->pfn, pages);
+
+    /* map and clear pages */
+    ptr = xc_dom_seg_to_ptr(dom, seg);
+    if (NULL == ptr)
+       return -1;
+    memset(ptr, 0, pages * page_size);
+
+    return 0;
+}
+
+int xc_dom_alloc_page(struct xc_dom_image *dom, char *name)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_vaddr_t start;
+    xen_pfn_t pfn;
+
+    start = dom->virt_alloc_end;
+    dom->virt_alloc_end += page_size;
+    pfn = (start - dom->parms.virt_base) / page_size;
+
+    xc_dom_printf("%-20s:   %-12s : 0x%" PRIx64 " (pfn 0x%" PRIpfn ")\n",
+                 __FUNCTION__, name, start, pfn);
+    return pfn;
+}
+
+void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    struct xc_dom_phys *phys, *prev = NULL;
+
+    for (phys = dom->phys_pages; NULL != phys; phys = phys->next)
+    {
+       if (pfn >= phys->first && pfn < phys->first + phys->count)
+           break;
+       prev = phys;
+    }
+    if (!phys)
+    {
+       xc_dom_printf("%s: Huh? no mapping with pfn 0x%" PRIpfn "\n",
+                     __FUNCTION__, pfn);
+       return;
+    }
+
+    munmap(phys->ptr, phys->count << page_shift);
+    if (prev)
+       prev->next = phys->next;
+    else
+       dom->phys_pages = phys->next;
+}
+
+void xc_dom_unmap_all(struct xc_dom_image *dom)
+{
+    while (dom->phys_pages)
+       xc_dom_unmap_one(dom, dom->phys_pages->first);
+}
+
+/* ------------------------------------------------------------------------ */
+/* pluggable kernel loaders                                                 */
+
+static struct xc_dom_loader *first_loader = NULL;
+static struct xc_dom_arch *first_hook = NULL;
+
+void xc_dom_register_loader(struct xc_dom_loader *loader)
+{
+    loader->next = first_loader;
+    first_loader = loader;
+}
+
+static struct xc_dom_loader *xc_dom_find_loader(struct xc_dom_image *dom)
+{
+    struct xc_dom_loader *loader = first_loader;
+
+    while (NULL != loader)
+    {
+       xc_dom_printf("%s: trying %s loader ... ", __FUNCTION__, loader->name);
+       if (0 == loader->probe(dom))
+       {
+           xc_dom_printf("OK\n");
+           return loader;
+       }
+       xc_dom_printf("failed\n");
+       loader = loader->next;
+    }
+    xc_dom_panic(XC_INVALID_KERNEL, "%s: no loader found\n", __FUNCTION__);
+    return NULL;
+}
+
+void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks)
+{
+    hooks->next = first_hook;
+    first_hook = hooks;
+}
+
+static struct xc_dom_arch *xc_dom_find_arch_hooks(char *guest_type)
+{
+    struct xc_dom_arch *hooks = first_hook;
+
+    while (NULL != hooks)
+    {
+       if (0 == strcmp(hooks->guest_type, guest_type))
+           return hooks;
+       hooks = hooks->next;
+    }
+    xc_dom_panic(XC_INVALID_KERNEL,
+                "%s: not found (type %s)\n", __FUNCTION__, guest_type);
+    return NULL;
+}
+
+/* ------------------------------------------------------------------------ */
+/* public interface                                                         */
+
+void xc_dom_release(struct xc_dom_image *dom)
+{
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+    if (dom->phys_pages)
+       xc_dom_unmap_all(dom);
+    xc_dom_free_all(dom);
+    free(dom);
+}
+
+struct xc_dom_image *xc_dom_allocate(const char *cmdline, const char *features)
+{
+    struct xc_dom_image *dom;
+
+    xc_dom_printf("%s: cmdline=\"%s\", features=\"%s\"\n",
+                 __FUNCTION__, cmdline, features);
+    dom = malloc(sizeof(*dom));
+    if (!dom)
+       goto err;
+
+    memset(dom, 0, sizeof(*dom));
+    if (cmdline)
+       dom->cmdline = xc_dom_strdup(dom, cmdline);
+    if (features)
+       elf_xen_parse_features(features, dom->f_requested, NULL);
+
+    dom->parms.virt_base = UNSET_ADDR;
+    dom->parms.virt_entry = UNSET_ADDR;
+    dom->parms.virt_hypercall = UNSET_ADDR;
+    dom->parms.virt_hv_start_low = UNSET_ADDR;
+    dom->parms.elf_paddr_offset = UNSET_ADDR;
+
+    dom->alloc_malloc += sizeof(*dom);
+    return dom;
+
+  err:
+    if (dom)
+       xc_dom_release(dom);
+    return NULL;
+}
+
+int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename)
+{
+    xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename);
+    dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size);
+    if (NULL == dom->kernel_blob)
+       return -1;
+    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_ramdisk_file(struct xc_dom_image *dom, const char *filename)
+{
+    xc_dom_printf("%s: filename=\"%s\"\n", __FUNCTION__, filename);
+    dom->ramdisk_blob =
+       xc_dom_malloc_filemap(dom, filename, &dom->ramdisk_size);
+    if (NULL == dom->ramdisk_blob)
+       return -1;
+//    return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size);
+    return 0;
+}
+
+int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t 
memsize)
+{
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+    dom->kernel_blob = (void *)mem;
+    dom->kernel_size = memsize;
+    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_ramdisk_mem(struct xc_dom_image *dom, const void *mem,
+                      size_t memsize)
+{
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+    dom->ramdisk_blob = (void *)mem;
+    dom->ramdisk_size = memsize;
+//    return xc_dom_try_gunzip(dom, &dom->ramdisk_blob, &dom->ramdisk_size);
+    return 0;
+}
+
+int xc_dom_parse_image(struct xc_dom_image *dom)
+{
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* parse kernel image */
+    dom->kernel_loader = xc_dom_find_loader(dom);
+    if (NULL == dom->kernel_loader)
+       goto err;
+    if (0 != dom->kernel_loader->parser(dom))
+       goto err;
+    if (NULL == dom->guest_type)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "%s: guest_type not set\n", __FUNCTION__);
+       goto err;
+    }
+
+    /* check features */
+    for (i = 0; i < XENFEAT_NR_SUBMAPS; i++)
+    {
+       dom->f_active[i] |= dom->f_requested[i];        /* cmd line */
+       dom->f_active[i] |= dom->parms.f_required[i];   /* kernel   */
+       if ((dom->f_active[i] & dom->parms.f_supported[i]) != dom->f_active[i])
+       {
+           xc_dom_panic(XC_INVALID_PARAM,
+                        "%s: unsupported feature requested\n", __FUNCTION__);
+           goto err;
+       }
+    }
+    return 0;
+
+  err:
+    return -1;
+}
+
+int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb)
+{
+    unsigned int page_shift;
+    xen_pfn_t nr_pages, pfn;
+
+    dom->arch_hooks = xc_dom_find_arch_hooks(dom->guest_type);
+    if (NULL == dom->arch_hooks)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR, "%s: arch hooks not set\n",
+                    __FUNCTION__);
+       return -1;
+    }
+
+    page_shift = XC_DOM_PAGE_SHIFT(dom);
+    nr_pages = mem_mb << (20 - page_shift);
+
+    xc_dom_printf("%s: mem %d MB, pages 0x%" PRIpfn " pages, %dk each\n",
+                 __FUNCTION__, mem_mb, nr_pages, 1 << (page_shift-10));
+    dom->total_pages = nr_pages;
+
+    xc_dom_printf("%s: 0x%" PRIpfn " pages\n",
+                 __FUNCTION__, dom->total_pages);
+
+    /* setup initial p2m */
+    dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) * dom->total_pages);
+    for (pfn = 0; pfn < dom->total_pages; pfn++)
+       dom->p2m_host[pfn] = pfn;
+    return 0;
+}
+
+int xc_dom_update_guest_p2m(struct xc_dom_image *dom)
+{
+    uint32_t *p2m_32;
+    uint64_t *p2m_64;
+    xen_pfn_t i;
+
+    if (!dom->p2m_guest)
+       return 0;
+
+    switch (dom->arch_hooks->sizeof_pfn)
+    {
+    case 4:
+       xc_dom_printf("%s: dst 32bit, pages 0x%" PRIpfn " \n",
+                     __FUNCTION__, dom->total_pages);
+       p2m_32 = dom->p2m_guest;
+       for (i = 0; i < dom->total_pages; i++)
+           if (INVALID_P2M_ENTRY != dom->p2m_host[i])
+               p2m_32[i] = dom->p2m_host[i];
+           else
+               p2m_32[i] = (uint32_t) - 1;
+       break;
+    case 8:
+       xc_dom_printf("%s: dst 64bit, pages 0x%" PRIpfn " \n",
+                     __FUNCTION__, dom->total_pages);
+       p2m_64 = dom->p2m_guest;
+       for (i = 0; i < dom->total_pages; i++)
+           if (INVALID_P2M_ENTRY != dom->p2m_host[i])
+               p2m_64[i] = dom->p2m_host[i];
+           else
+               p2m_64[i] = (uint64_t) - 1;
+       break;
+    default:
+       xc_dom_panic(XC_INTERNAL_ERROR,
+                    "sizeof_pfn is invalid (is %d, can be 4 or 8)",
+                    dom->arch_hooks->sizeof_pfn);
+       return -1;
+    }
+    return 0;
+}
+
+int xc_dom_build_image(struct xc_dom_image *dom)
+{
+    unsigned int page_size;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* check for arch hooks */
+    if (NULL == dom->arch_hooks)
+    {
+       xc_dom_panic(XC_INTERNAL_ERROR, "%s: arch hooks not set\n",
+                    __FUNCTION__);
+       goto err;
+    }
+    page_size = XC_DOM_PAGE_SIZE(dom);
+
+    /* load kernel */
+    if (0 != xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
+                                 dom->kernel_seg.vstart,
+                                 dom->kernel_seg.vend -
+                                 dom->kernel_seg.vstart))
+       goto err;
+    if (0 != dom->kernel_loader->loader(dom))
+       goto err;
+
+    /* load ramdisk */
+    if (dom->ramdisk_blob)
+    {
+       size_t unziplen, ramdisklen;
+       void *ramdiskmap;
+
+       unziplen = xc_dom_check_gzip(dom->ramdisk_blob, dom->ramdisk_size);
+       ramdisklen = unziplen ? unziplen : dom->ramdisk_size;
+       if (0 != xc_dom_alloc_segment(dom, &dom->ramdisk_seg, "ramdisk", 0,
+                                     ramdisklen))
+           goto err;
+       ramdiskmap = xc_dom_seg_to_ptr(dom, &dom->ramdisk_seg);
+       if (unziplen)
+       {
+           if (-1 == xc_dom_do_gunzip(dom->ramdisk_blob, dom->ramdisk_size,
+                                      ramdiskmap, ramdisklen))
+               goto err;
+       }
+       else
+           memcpy(ramdiskmap, dom->ramdisk_blob, dom->ramdisk_size);
+    }
+
+    /* allocate other pages */
+    if (0 != dom->arch_hooks->alloc_magic_pages(dom))
+       goto err;
+    if (dom->arch_hooks->count_pgtables)
+    {
+       dom->arch_hooks->count_pgtables(dom);
+       if (dom->pgtables > 0)
+           if (0 !=
+               xc_dom_alloc_segment(dom, &dom->pgtables_seg, "page tables", 0,
+                                    dom->pgtables * page_size))
+               goto err;
+    }
+    if (dom->alloc_bootstack)
+       dom->bootstack_pfn = xc_dom_alloc_page(dom, "boot stack");
+    xc_dom_printf("%-20s: virt_alloc_end : 0x%" PRIx64 "\n",
+                 __FUNCTION__, dom->virt_alloc_end);
+    xc_dom_printf("%-20s: virt_pgtab_end : 0x%" PRIx64 "\n",
+                 __FUNCTION__, dom->virt_pgtab_end);
+    return 0;
+
+  err:
+    return -1;
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_elfloader.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_elfloader.c    Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,283 @@
+/*
+ * Xen domain builder -- ELF bits.
+ *
+ * Parse and load ELF kernel images.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+#define XEN_VER "xen-3.0"
+
+/* ------------------------------------------------------------------------ */
+
+static char *xc_dom_guest_type(struct xc_dom_image *dom,
+                              struct elf_binary *elf)
+{
+    uint64_t machine = elf_uval(elf, elf->ehdr, e_machine);
+
+    switch (machine) {
+    case EM_386:
+       switch (dom->parms.pae) {
+       case 3 /* PAEKERN_bimodal */:
+           if (strstr(dom->xen_caps, "xen-3.0-x86_32p"))
+               return "xen-3.0-x86_32p";
+           return "xen-3.0-x86_32";
+       case PAEKERN_extended_cr3:
+       case PAEKERN_yes:
+           return "xen-3.0-x86_32p";
+           break;
+       case PAEKERN_no:
+       default:
+           return "xen-3.0-x86_32";
+       }
+    case EM_X86_64:
+       return "xen-3.0-x86_64";
+    case EM_IA_64:
+       return elf_msb(elf) ? "xen-3.0-ia64be" : "xen-3.0-ia64";
+    case EM_PPC64:
+       return "xen-3.0-powerpc64";
+    default:
+       return "xen-3.0-unknown";
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* parse elf binary                                                         */
+
+static int check_elf_kernel(struct xc_dom_image *dom, int verbose)
+{
+    if (NULL == dom->kernel_blob)
+    {
+       if (verbose)
+           xc_dom_panic(XC_INTERNAL_ERROR, "%s: no kernel image loaded\n",
+                        __FUNCTION__);
+       return -EINVAL;
+    }
+
+    if (!elf_is_elfbinary(dom->kernel_blob))
+    {
+       if (verbose)
+           xc_dom_panic(XC_INVALID_KERNEL, "%s: kernel is not an ELF image\n",
+                        __FUNCTION__);
+       return -EINVAL;
+    }
+    return 0;
+}
+
+static int xc_dom_probe_elf_kernel(struct xc_dom_image *dom)
+{
+    return check_elf_kernel(dom, 0);
+}
+
+static int xc_dom_load_elf_symtab(struct xc_dom_image *dom,
+                                 struct elf_binary *elf, int load)
+{
+    struct elf_binary syms;
+    const elf_shdr *shdr, *shdr2;
+    xen_vaddr_t symtab, maxaddr;
+    char *hdr;
+    size_t size;
+    int h, count, type, i, tables = 0;
+
+    if (elf_swap(elf)) {
+       xc_dom_printf("%s: non-native byte order, bsd symtab not supported\n",
+                     __FUNCTION__);
+       return 0;
+    }
+
+    if (load) {
+       if (!dom->bsd_symtab_start)
+           return 0;
+       size = dom->kernel_seg.vend - dom->bsd_symtab_start;
+       hdr  = xc_dom_vaddr_to_ptr(dom, dom->bsd_symtab_start);
+       *(int *)hdr = size - sizeof(int);
+    } else {
+       size = sizeof(int) + elf_size(elf, elf->ehdr) +
+           elf_shdr_count(elf) * elf_size(elf, shdr);
+       hdr = xc_dom_malloc(dom, size);
+       if (hdr == NULL)
+           return 0;
+       dom->bsd_symtab_start = elf_round_up(&syms, dom->kernel_seg.vend);
+    }
+
+    memcpy(hdr + sizeof(int),
+          elf->image,
+          elf_size(elf, elf->ehdr));
+    memcpy(hdr + sizeof(int) + elf_size(elf, elf->ehdr),
+          elf->image + elf_uval(elf, elf->ehdr, e_shoff),
+          elf_shdr_count(elf) * elf_size(elf, shdr));
+    if (elf_64bit(elf)) {
+       Elf64_Ehdr *ehdr = (Elf64_Ehdr *)(hdr + sizeof(int));
+       ehdr->e_phoff = 0;
+       ehdr->e_phentsize = 0;
+       ehdr->e_phnum = 0;
+       ehdr->e_shoff = elf_size(elf, elf->ehdr);
+       ehdr->e_shstrndx = SHN_UNDEF;
+    } else {
+       Elf32_Ehdr *ehdr = (Elf32_Ehdr *)(hdr + sizeof(int));
+       ehdr->e_phoff = 0;
+       ehdr->e_phentsize = 0;
+       ehdr->e_phnum = 0;
+       ehdr->e_shoff = elf_size(elf, elf->ehdr);
+       ehdr->e_shstrndx = SHN_UNDEF;
+    }
+    if (0 != elf_init(&syms, hdr + sizeof(int), size - sizeof(int)))
+       return -1;
+    if (xc_dom_logfile)
+       elf_set_logfile(&syms, xc_dom_logfile, 1);
+
+    symtab = dom->bsd_symtab_start + sizeof(int);
+    maxaddr = elf_round_up(&syms, symtab + elf_size(&syms, syms.ehdr) +
+                          elf_shdr_count(&syms) * elf_size(&syms, shdr));
+
+    xc_dom_printf("%s/%s: bsd_symtab_start=%" PRIx64 ", kernel.end=0x%" PRIx64
+                 " -- symtab=0x%" PRIx64 ", maxaddr=0x%" PRIx64 "\n",
+                 __FUNCTION__, load ? "load" : "parse",
+                 dom->bsd_symtab_start, dom->kernel_seg.vend, symtab, maxaddr);
+
+    count = elf_shdr_count(&syms);
+    for (h = 0; h < count; h++)
+    {
+       shdr = elf_shdr_by_index(&syms, h);
+       type = elf_uval(&syms, shdr, sh_type);
+       if (type == SHT_STRTAB)
+       {
+           /* Look for a strtab @i linked to symtab @h. */
+           for (i = 0; i < count; i++) {
+               shdr2 = elf_shdr_by_index(&syms, i);
+               if ((elf_uval(&syms, shdr2, sh_type) == SHT_SYMTAB) &&
+                   (elf_uval(&syms, shdr2, sh_link) == h))
+                   break;
+           }
+           /* Skip symtab @h if we found no corresponding strtab @i. */
+           if (i == count)
+           {
+               if (elf_64bit(&syms))
+                   *(Elf64_Off*)(&shdr->e64.sh_offset) = 0;
+               else
+                   *(Elf32_Off*)(&shdr->e32.sh_offset) = 0;
+               continue;
+           }
+       }
+
+       if ((type == SHT_STRTAB) || (type == SHT_SYMTAB))
+       {
+           /* Mangled to be based on ELF header location. */
+           if (elf_64bit(&syms))
+               *(Elf64_Off*)(&shdr->e64.sh_offset) = maxaddr - symtab;
+           else
+               *(Elf32_Off*)(&shdr->e32.sh_offset) = maxaddr - symtab;
+           size = elf_uval(&syms, shdr, sh_size);
+           maxaddr = elf_round_up(&syms, maxaddr + size);
+           tables++;
+           xc_dom_printf("%s: h=%d %s, size=0x%zx, maxaddr=0x%" PRIx64 "\n",
+                         __FUNCTION__, h,
+                         type == SHT_SYMTAB ? "symtab" : "strtab",
+                         size, maxaddr);
+
+           if (load) {
+               shdr2 = elf_shdr_by_index(elf, h);
+               memcpy((void*)elf_section_start(&syms, shdr),
+                      elf_section_start(elf, shdr2),
+                      size);
+           }
+       }
+
+       /* Name is NULL. */
+       if (elf_64bit(&syms))
+           *(Elf64_Half*)(&shdr->e64.sh_name) = 0;
+       else
+           *(Elf32_Word*)(&shdr->e32.sh_name) = 0;
+    }
+
+    if (0 == tables)
+    {
+       xc_dom_printf("%s: no symbol table present\n", __FUNCTION__);
+       dom->bsd_symtab_start = 0;
+       return 0;
+    }
+    if (!load)
+       dom->kernel_seg.vend = maxaddr;
+    return 0;
+}
+
+static int xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary *elf;
+    int rc;
+
+    rc = check_elf_kernel(dom, 1);
+    if (0 != rc)
+       return rc;
+
+    elf = xc_dom_malloc(dom, sizeof(*elf));
+    dom->private_loader = elf;
+    rc = elf_init(elf, dom->kernel_blob, dom->kernel_size);
+    if (xc_dom_logfile)
+       elf_set_logfile(elf, xc_dom_logfile, 1);
+    if (0 != rc)
+    {
+       xc_dom_panic(XC_INVALID_KERNEL, "%s: corrupted ELF image\n",
+                    __FUNCTION__);
+       return rc;
+    }
+
+    /* Find the section-header strings table. */
+    if (NULL == elf->sec_strtab)
+    {
+       xc_dom_panic(XC_INVALID_KERNEL, "%s: ELF image has no shstrtab\n",
+                    __FUNCTION__);
+       return -EINVAL;
+    }
+
+    /* parse binary and get xen meta info */
+    elf_parse_binary(elf);
+    if (0 != (rc = elf_xen_parse(elf, &dom->parms)))
+       return rc;
+
+    /* find kernel segment */
+    dom->kernel_seg.vstart = dom->parms.virt_kstart;
+    dom->kernel_seg.vend   = dom->parms.virt_kend;
+
+    if (dom->parms.bsd_symtab)
+       xc_dom_load_elf_symtab(dom, elf, 0);
+
+    dom->guest_type = xc_dom_guest_type(dom, elf);
+    xc_dom_printf("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
+                 __FUNCTION__, dom->guest_type,
+                 dom->kernel_seg.vstart, dom->kernel_seg.vend);
+    return 0;
+}
+
+static int xc_dom_load_elf_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary *elf = dom->private_loader;
+
+    elf->dest = xc_dom_seg_to_ptr(dom, &dom->kernel_seg);
+    elf_load_binary(elf);
+    if (dom->parms.bsd_symtab)
+       xc_dom_load_elf_symtab(dom, elf, 1);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_loader elf_loader = {
+    .name = "ELF-generic",
+    .probe = xc_dom_probe_elf_kernel,
+    .parser = xc_dom_parse_elf_kernel,
+    .loader = xc_dom_load_elf_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&elf_loader);
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_ia64.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_ia64.c Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,118 @@
+/*
+ * Xen domain builder -- ia64 bits.
+ *
+ * Most architecture-specific code for ia64 goes here.
+ *   - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/foreign/ia64.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+    /* allocate special pages */
+    dom->console_pfn = dom->total_pages -1;
+    dom->xenstore_pfn = dom->total_pages -2;
+    dom->start_info_pfn = dom->total_pages -3;
+    return 0;
+}
+
+static int start_info_ia64(struct xc_dom_image *dom)
+{
+    start_info_ia64_t *start_info =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    struct xen_ia64_boot_param_ia64 *bp =
+       (struct xen_ia64_boot_param_ia64 *)(start_info + 1);
+
+    xc_dom_printf("%s\n", __FUNCTION__);
+
+    sprintf(start_info->magic, dom->guest_type);
+    start_info->flags = dom->flags;
+    start_info->nr_pages = dom->total_pages;
+    start_info->store_mfn = dom->xenstore_pfn;
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = dom->console_pfn;
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if (dom->ramdisk_blob)
+    {
+       start_info->mod_start = dom->ramdisk_seg.vstart;
+       start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+       bp->initrd_start = start_info->mod_start;
+       bp->initrd_size = start_info->mod_len;
+    }
+    if (dom->cmdline)
+    {
+       strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+       start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+       bp->command_line = (dom->start_info_pfn << PAGE_SHIFT_IA64)
+           + offsetof(start_info_t, cmd_line);
+    }
+    return 0;
+}
+
+static int shared_info_ia64(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_ia64_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+       shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    shared_info->arch.start_info_pfn = dom->start_info_pfn;
+    return 0;
+}
+
+extern unsigned long xc_ia64_fpsr_default(void);
+
+static int vcpu_ia64(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_ia64_t *ctxt = ptr;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->flags = 0;
+    ctxt->user_regs.cr_ipsr = 0;       /* all necessary bits filled by 
hypervisor */
+    ctxt->user_regs.cr_iip = dom->parms.virt_entry;
+    ctxt->user_regs.cr_ifs = (uint64_t) 1 << 63;
+#ifdef __ia64__                        /* FIXME */
+    ctxt->user_regs.ar_fpsr = xc_ia64_fpsr_default();
+#endif
+    ctxt->user_regs.r28 = (dom->start_info_pfn << PAGE_SHIFT_IA64)
+       + sizeof(start_info_ia64_t);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_arch = {
+    .guest_type = "xen-3.0-ia64",
+    .page_shift = PAGE_SHIFT_IA64,
+    .alloc_magic_pages = alloc_magic_pages,
+    .start_info = start_info_ia64,
+    .shared_info = shared_info_ia64,
+    .vcpu = vcpu_ia64,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_arch);
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_powerpc64.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_powerpc64.c    Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,100 @@
+/*
+ * Xen domain builder -- ia64 bits.
+ *
+ * Most architecture-specific code for ia64 goes here.
+ *   - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+    /* allocate special pages */
+    dom->low_top--; /* shared_info */
+    dom->xenstore_pfn = --dom->low_top;
+    dom->console_pfn = --dom->low_top;
+    dom->start_info_pfn = --dom->low_top;
+    return 0;
+}
+
+static int start_info(struct xc_dom_image *dom)
+{
+    start_info_t *si =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+
+    xc_dom_printf("%s\n", __FUNCTION__);
+
+    snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0);
+
+    si->nr_pages = dom->total_pages;
+    si->shared_info = (dom->total_pages - 1) << PAGE_SHIFT;
+    si->store_mfn = dom->xenstore_pfn;
+    si->store_evtchn = dom->store_evtchn;
+    si->console.domU.mfn = dom->console_pfn;
+    si->console.domU.evtchn = dom->console_evtchn;
+    return 0;
+}
+
+static int shared_info(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    return 0;
+}
+
+static int vcpu(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_t *ctxt = ptr;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    memset(&ctxt->user_regs, 0x55, sizeof(ctxt.user_regs));
+    ctxt->user_regs.pc = dsi->v_kernentry;
+    ctxt->user_regs.msr = 0;
+    ctxt->user_regs.gprs[1] = 0; /* Linux uses its own stack */
+    ctxt->user_regs.gprs[3] = devtree_addr;
+    ctxt->user_regs.gprs[4] = kern_addr;
+    ctxt->user_regs.gprs[5] = 0;
+
+    /* There is a buggy kernel that does not zero the "local_paca", so
+     * we must make sure this register is 0 */
+    ctxt->user_regs.gprs[13] = 0;
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_arch = {
+    .guest_type = "xen-3.0-powerpc64",
+    .page_shift = FIXME,
+    .alloc_magic_pages = alloc_magic_pages,
+    .start_info = start_info,
+    .shared_info = shared_info,
+    .vcpu = vcpu,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_arch);
+}
diff -r 50d9e2ddc377 -r fd50500eee7c tools/libxc/xc_dom_x86.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xc_dom_x86.c  Thu Jan 25 22:16:52 2007 +0000
@@ -0,0 +1,559 @@
+/*
+ * Xen domain builder -- i386 and x86_64 bits.
+ *
+ * Most architecture-specific code for x86 goes here.
+ *   - prepare page tables.
+ *   - fill architecture-specific structs.
+ *
+ * This code is licenced under the GPL.
+ * written 2006 by Gerd Hoffmann <kraxel@xxxxxxx>.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <xen/xen.h>
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/e820.h>
+
+#include "xg_private.h"
+#include "xc_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define bits_to_mask(bits)       (((xen_vaddr_t)1 << (bits))-1)
+#define round_down(addr, mask)   ((addr) & ~(mask))
+#define round_up(addr, mask)     ((addr) | (mask))
+
+static inline unsigned long
+nr_page_tables(xen_vaddr_t start, xen_vaddr_t end, unsigned long bits)
+{
+    xen_vaddr_t mask = bits_to_mask(bits);
+    int tables;
+
+    if (0 == bits)
+       return 0;               /* unused */
+
+    if (8 * sizeof(unsigned long) == bits)
+    {
+       /* must be pgd, need one */
+       start = 0;
+       end = -1;
+       tables = 1;
+    }
+    else
+    {
+       start = round_down(start, mask);
+       end = round_up(end, mask);
+       tables = ((end - start) >> bits) + 1;
+    }
+
+    xc_dom_printf("%s: 0x%016" PRIx64 "/%ld: 0x%016" PRIx64
+                 " -> 0x%016" PRIx64 ", %d table(s)\n",
+                 __FUNCTION__, mask, bits, start, end, tables);
+    return tables;
+}
+
+static int count_pgtables(struct xc_dom_image *dom, int pae,
+                         int l4_bits, int l3_bits, int l2_bits, int l1_bits)
+{
+    int pages, extra_pages;
+    xen_vaddr_t try_virt_end;
+
+    extra_pages = dom->alloc_bootstack ? 1 : 0;
+    extra_pages += dom->extra_pages;
+    pages = extra_pages;
+    for (;;)
+    {
+       try_virt_end = round_up(dom->virt_alloc_end + pages * PAGE_SIZE_X86,
+                               bits_to_mask(l1_bits));
+       dom->pg_l4 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l4_bits);
+       dom->pg_l3 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l3_bits);
+       dom->pg_l2 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l2_bits);
+       dom->pg_l1 =
+           nr_page_tables(dom->parms.virt_base, try_virt_end, l1_bits);
+       if (pae && try_virt_end < 0xc0000000)
+       {
+           xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n", 
__FUNCTION__);
+           dom->pg_l2++;
+       }
+       dom->pgtables = dom->pg_l4 + dom->pg_l3 + dom->pg_l2 + dom->pg_l1;
+       pages = dom->pgtables + extra_pages;
+       if (dom->virt_alloc_end + pages * PAGE_SIZE_X86 <= try_virt_end + 1)
+           break;
+    }
+    dom->virt_pgtab_end = try_virt_end + 1;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* i386 pagetables                                                          */
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
+
+static int count_pgtables_x86_32(struct xc_dom_image *dom)
+{
+    return count_pgtables(dom, 0, 0, 0, 32, L2_PAGETABLE_SHIFT_I386);
+}
+
+static int count_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+    return count_pgtables(dom, 1, 0, 32,
+                         L3_PAGETABLE_SHIFT_PAE, L2_PAGETABLE_SHIFT_PAE);
+}
+
+#define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86)
+
+static int setup_pgtables_x86_32(struct xc_dom_image *dom)
+{
+    xen_pfn_t l2pfn = dom->pgtables_seg.pfn;
+    xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l2;
+    l2_pgentry_32_t *l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+    l1_pgentry_32_t *l1tab = NULL;
+    unsigned long l2off, l1off;
+    xen_vaddr_t addr;
+    xen_pfn_t pgpfn;
+
+    for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+        addr += PAGE_SIZE_X86)
+    {
+       if (NULL == l1tab)
+       {
+           /* get L1 tab, make L2 entry */
+           l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+           l2off = l2_table_offset_i386(addr);
+           l2tab[l2off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+           l1pfn++;
+       }
+
+       /* make L1 entry */
+       l1off = l1_table_offset_i386(addr);
+       pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+       l1tab[l1off] =
+           pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+       if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+           l1tab[l1off] &= ~_PAGE_RW;  /* page tables are r/o */
+       if (L1_PAGETABLE_ENTRIES_I386 - 1 == l1off)
+           l1tab = NULL;
+    }
+    return 0;
+}
+
+static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+    xen_pfn_t l3pfn = dom->pgtables_seg.pfn;
+    xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l3;
+    xen_pfn_t l1pfn = dom->pgtables_seg.pfn + dom->pg_l3 + dom->pg_l2;
+    l3_pgentry_64_t *l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+    l2_pgentry_64_t *l2tab = NULL;
+    l1_pgentry_64_t *l1tab = NULL;
+    unsigned long l3off, l2off, l1off;
+    xen_vaddr_t addr;
+    xen_pfn_t pgpfn;
+
+    for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+        addr += PAGE_SIZE_X86)
+    {
+       if (NULL == l2tab)
+       {
+           /* get L2 tab, make L3 entry */
+           l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+           l3off = l3_table_offset_pae(addr);
+           l3tab[l3off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+           l2pfn++;
+       }
+
+       if (NULL == l1tab)
+       {
+           /* get L1 tab, make L2 entry */
+           l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+           l2off = l2_table_offset_pae(addr);
+           l2tab[l2off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+           if (L2_PAGETABLE_ENTRIES_PAE - 1 == l2off)
+               l2tab = NULL;
+           l1pfn++;
+       }
+
+       /* make L1 entry */
+       l1off = l1_table_offset_pae(addr);
+       pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+       l1tab[l1off] =
+           pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+       if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+           l1tab[l1off] &= ~_PAGE_RW;  /* page tables are r/o */
+       if (L1_PAGETABLE_ENTRIES_PAE - 1 == l1off)
+           l1tab = NULL;
+    }
+
+    if (dom->virt_pgtab_end <= 0xc0000000)
+    {
+       xc_dom_printf("%s: PAE: extra l2 page table for l3#3\n", __FUNCTION__);
+       l3tab[3] = pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+    }
+    return 0;
+}
+
+#undef L1_PROT
+#undef L2_PROT
+#undef L3_PROT
+
+/* ------------------------------------------------------------------------ */
+/* x86_64 pagetables                                                        */
+
+static int count_pgtables_x86_64(struct xc_dom_image *dom)
+{
+    return count_pgtables(dom, 0,
+                         L4_PAGETABLE_SHIFT_X86_64 + 9,
+                         L4_PAGETABLE_SHIFT_X86_64,
+                         L3_PAGETABLE_SHIFT_X86_64, L2_PAGETABLE_SHIFT_X86_64);
+}
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+static int setup_pgtables_x86_64(struct xc_dom_image *dom)
+{
+    xen_pfn_t l4pfn = dom->pgtables_seg.pfn;
+    xen_pfn_t l3pfn = dom->pgtables_seg.pfn + dom->pg_l4;
+    xen_pfn_t l2pfn = dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3;
+    xen_pfn_t l1pfn =
+       dom->pgtables_seg.pfn + dom->pg_l4 + dom->pg_l3 + dom->pg_l2;
+    l4_pgentry_64_t *l4tab = xc_dom_pfn_to_ptr(dom, l4pfn, 1);
+    l3_pgentry_64_t *l3tab = NULL;
+    l2_pgentry_64_t *l2tab = NULL;
+    l1_pgentry_64_t *l1tab = NULL;
+    uint64_t l4off, l3off, l2off, l1off;
+    uint64_t addr;
+    xen_pfn_t pgpfn;
+
+    for (addr = dom->parms.virt_base; addr < dom->virt_pgtab_end;
+        addr += PAGE_SIZE_X86)
+    {
+       if (NULL == l3tab)
+       {
+           /* get L3 tab, make L4 entry */
+           l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+           l4off = l4_table_offset_x86_64(addr);
+           l4tab[l4off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
+           l3pfn++;
+       }
+
+       if (NULL == l2tab)
+       {
+           /* get L2 tab, make L3 entry */
+           l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+           l3off = l3_table_offset_x86_64(addr);
+           l3tab[l3off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+           if (L3_PAGETABLE_ENTRIES_X86_64 - 1 == l3off)
+               l3tab = NULL;
+           l2pfn++;
+       }
+
+       if (NULL == l1tab)
+       {
+           /* get L1 tab, make L2 entry */
+           l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+           l2off = l2_table_offset_x86_64(addr);
+           l2tab[l2off] =
+               pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+           if (L2_PAGETABLE_ENTRIES_X86_64 - 1 == l2off)
+               l2tab = NULL;
+           l1pfn++;
+       }
+
+       /* make L1 entry */
+       l1off = l1_table_offset_x86_64(addr);
+       pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86;
+       l1tab[l1off] =
+           pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+       if (addr >= dom->pgtables_seg.vstart && addr < dom->pgtables_seg.vend)
+           l1tab[l1off] &= ~_PAGE_RW;  /* page tables are r/o */
+       if (L1_PAGETABLE_ENTRIES_X86_64 - 1 == l1off)
+           l1tab = NULL;
+    }
+    return 0;
+}
+
+#undef L1_PROT
+#undef L2_PROT
+#undef L3_PROT
+#undef L4_PROT
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+    size_t p2m_size = dom->total_pages * dom->arch_hooks->sizeof_pfn;
+
+    /* allocate phys2mach table */
+    if (0 != xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach", 0, 
p2m_size))
+       return -1;
+    dom->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg);
+
+    /* allocate special pages */
+    dom->start_info_pfn = xc_dom_alloc_page(dom, "start info");
+    dom->xenstore_pfn = xc_dom_alloc_page(dom, "xenstore");
+    dom->console_pfn = xc_dom_alloc_page(dom, "console");
+    if (xc_dom_feature_translated(dom))
+       dom->shared_info_pfn = xc_dom_alloc_page(dom, "shared info");
+    dom->alloc_bootstack = 1;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int start_info_x86_32(struct xc_dom_image *dom)
+{
+    start_info_x86_32_t *start_info =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    xen_pfn_t shinfo =
+       xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
+       shared_info_mfn;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    sprintf(start_info->magic, dom->guest_type);
+    start_info->nr_pages = dom->total_pages;
+    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+    start_info->pt_base = dom->pgtables_seg.vstart;
+    start_info->nr_pt_frames = dom->pgtables;
+    start_info->mfn_list = dom->p2m_seg.vstart;
+
+    start_info->flags = dom->flags;
+    start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if (dom->ramdisk_blob)
+    {
+       start_info->mod_start = dom->ramdisk_seg.vstart;
+       start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+    }
+    if (dom->cmdline)
+    {
+       strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+       start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+    }
+    return 0;
+}
+
+static int start_info_x86_64(struct xc_dom_image *dom)
+{
+    start_info_x86_64_t *start_info =
+       xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    xen_pfn_t shinfo =
+       xc_dom_feature_translated(dom) ? dom->shared_info_pfn : dom->
+       shared_info_mfn;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    sprintf(start_info->magic, dom->guest_type);
+    start_info->nr_pages = dom->total_pages;
+    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+    start_info->pt_base = dom->pgtables_seg.vstart;
+    start_info->nr_pt_frames = dom->pgtables;
+    start_info->mfn_list = dom->p2m_seg.vstart;
+
+    start_info->flags = dom->flags;
+    start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = xc_dom_p2m_guest(dom, dom->console_pfn);
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if (dom->ramdisk_blob)
+    {
+       start_info->mod_start = dom->ramdisk_seg.vstart;
+       start_info->mod_len = dom->ramdisk_seg.vend - dom->ramdisk_seg.vstart;
+    }
+    if (dom->cmdline)
+    {
+       strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+       start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+    }
+    return 0;
+}
+
+static int shared_info_x86_32(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_x86_32_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+       shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    return 0;
+}
+
+static int shared_info_x86_64(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_x86_64_t *shared_info = ptr;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+       shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int vcpu_x86_32(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_x86_32_t *ctxt = ptr;
+    xen_pfn_t cr3_pfn;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    /* Virtual IDT is empty at start-of-day. */
+    for (i = 0; i < 256; i++)
+    {
+       ctxt->trap_ctxt[i].vector = i;
+       ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS_X86_32;
+    }
+
+    /* No callback handlers. */
+    ctxt->event_callback_cs = FLAT_KERNEL_CS_X86_32;
+    ctxt->failsafe_callback_cs = FLAT_KERNEL_CS_X86_32;
+
+    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_32;
+    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_32;
+    ctxt->user_regs.eip = dom->parms.virt_entry;
+    ctxt->user_regs.esp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+    ctxt->user_regs.esi =
+       dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+    ctxt->user_regs.eflags = 1 << 9;   /* Interrupt Enable */
+
+    ctxt->kernel_ss = FLAT_KERNEL_SS_X86_32;
+    ctxt->kernel_sp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+
+    ctxt->flags = VGCF_in_kernel_X86_32;
+    if (dom->parms.pae == 2 /* extended_cr3 */ ||
+       dom->parms.pae == 3 /* bimodal */)
+       ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
+
+    cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
+    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_32(cr3_pfn);
+    xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+    return 0;
+}
+
+static int vcpu_x86_64(struct xc_dom_image *dom, void *ptr)
+{
+    vcpu_guest_context_x86_64_t *ctxt = ptr;
+    xen_pfn_t cr3_pfn;
+    int i;
+
+    xc_dom_printf("%s: called\n", __FUNCTION__);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    /* Virtual IDT is empty at start-of-day. */
+    for (i = 0; i < 256; i++)
+    {
+       ctxt->trap_ctxt[i].vector = i;
+       ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS_X86_64;
+    }
+
+    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_64;
+    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_64;
+    ctxt->user_regs.rip = dom->parms.virt_entry;
+    ctxt->user_regs.rsp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+    ctxt->user_regs.rsi =
+       dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+    ctxt->user_regs.rflags = 1 << 9;   /* Interrupt Enable */
+
+    ctxt->kernel_ss = FLAT_KERNEL_SS_X86_64;
+    ctxt->kernel_sp =
+       dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+
+    ctxt->flags = VGCF_in_kernel_X86_64;
+    cr3_pfn = xc_dom_p2m_guest(dom, dom->pgtables_seg.pfn);
+    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_64(cr3_pfn);
+    xc_dom_printf("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "\n",
+                 __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_32 = {
+    .guest_type = "xen-3.0-x86_32",
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 4,
+    .alloc_magic_pages = alloc_magic_pages,
+    .count_pgtables = count_pgtables_x86_32,
+    .setup_pgtables = setup_pgtables_x86_32,
+    .start_info = start_info_x86_32,
+    .shared_info = shared_info_x86_32,
+    .vcpu = vcpu_x86_32,
+};
+static struct xc_dom_arch xc_dom_32_pae = {
+    .guest_type = "xen-3.0-x86_32p",
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 4,
+    .alloc_magic_pages = alloc_magic_pages,
+    .count_pgtables = count_pgtables_x86_32_pae,
+    .setup_pgtables = setup_pgtables_x86_32_pae,
+    .start_info = start_info_x86_32,
+    .shared_info = shared_info_x86_32,
+    .vcpu = vcpu_x86_32,
+};
+
+static struct xc_dom_arch xc_dom_64 = {
+    .guest_type = "xen-3.0-x86_64",
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 8,
+    .alloc_magic_pages = alloc_magic_pages,
+    .count_pgtables = count_pgtables_x86_64,
+    .setup_pgtables = setup_pgtables_x86_64,
+    .start_info = start_info_x86_64,
+    .shared_info = shared_info_x86_64,
+    .vcpu = vcpu_x86_64,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_32);
+    xc_dom_register_arch_hooks(&xc_dom_32_pae);
+    xc_dom_register_arch_hooks(&xc_dom_64);
+}

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.