[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID 1bab7d65171b762bb3cf1ae426bc6c403f847ebf # Parent 4ba0982264290acfa208304b4e3343ec8c3ec903 # Parent 3e6325b73474b3764573178152503af27a914ab8 merge with xen-unstable.hg --- xen/arch/powerpc/htab.c | 68 -- .hgignore | 2 extras/mini-os/Makefile | 3 linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c | 7 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 42 - linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c | 56 + linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c | 2 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 11 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c | 10 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 4 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c | 8 linux-2.6-xen-sparse/drivers/xen/blktap/common.h | 1 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c | 23 linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c | 16 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 10 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 67 +- linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c | 12 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c | 21 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 34 - linux-2.6-xen-sparse/include/xen/xenbus.h | 6 tools/blktap/drivers/block-aio.c | 19 tools/blktap/drivers/block-qcow.c | 19 tools/blktap/drivers/tapdisk.c | 1 tools/blktap/lib/xs_api.c | 23 tools/libxc/ia64/xc_ia64_stubs.c | 16 tools/libxc/powerpc64/xc_linux_build.c | 4 tools/libxc/xenctrl.h | 5 tools/python/xen/xend/FlatDeviceTree.py | 323 ++++++++++ tools/python/xen/xend/XendCheckpoint.py | 6 tools/python/xen/xend/XendDomain.py | 3 tools/python/xen/xend/XendDomainInfo.py | 63 + tools/python/xen/xend/arch.py | 32 tools/python/xen/xend/image.py | 143 ++-- tools/python/xen/xend/server/DevController.py | 6 tools/python/xen/xend/server/XMLRPCServer.py | 3 tools/python/xen/xend/server/blkif.py | 16 tools/python/xen/xm/migrate.py | 3 tools/python/xen/xm/shutdown.py | 49 + xen/arch/ia64/xen/dom0_ops.c | 4 xen/arch/powerpc/Makefile | 31 xen/arch/powerpc/Rules.mk | 2 xen/arch/powerpc/backtrace.c | 193 +++++ xen/arch/powerpc/boot_of.c | 208 ++++-- xen/arch/powerpc/dart_u3.c | 8 xen/arch/powerpc/dom0_ops.c | 20 xen/arch/powerpc/domain.c | 58 - xen/arch/powerpc/domain_build.c | 60 + xen/arch/powerpc/exceptions.c | 2 xen/arch/powerpc/exceptions.h | 3 xen/arch/powerpc/external.c | 3 xen/arch/powerpc/iommu.c | 17 xen/arch/powerpc/memory.c | 206 ++++++ xen/arch/powerpc/mm.c | 298 ++++++++- xen/arch/powerpc/mpic.c | 6 xen/arch/powerpc/ofd_fixup.c | 101 --- xen/arch/powerpc/ofd_fixup_memory.c | 107 +++ xen/arch/powerpc/oftree.h | 8 xen/arch/powerpc/papr/tce.c | 6 xen/arch/powerpc/papr/xlate.c | 46 + xen/arch/powerpc/powerpc64/exceptions.S | 37 + xen/arch/powerpc/powerpc64/ppc970.c | 112 ++- xen/arch/powerpc/setup.c | 207 +++--- xen/arch/powerpc/shadow.c | 159 ++++ xen/arch/powerpc/xen.lds.S | 10 xen/arch/x86/hvm/io.c | 10 xen/arch/x86/hvm/platform.c | 32 xen/arch/x86/hvm/svm/intr.c | 43 - xen/arch/x86/hvm/svm/svm.c | 5 xen/arch/x86/hvm/vlapic.c | 14 xen/arch/x86/hvm/vmx/io.c | 13 xen/arch/x86/hvm/vmx/vmx.c | 29 xen/arch/x86/mm/shadow/multi.c | 66 -- xen/arch/x86/physdev.c | 5 xen/arch/x86/time.c | 4 xen/arch/x86/traps.c | 5 xen/arch/x86/x86_32/traps.c | 46 + xen/arch/x86/x86_64/traps.c | 43 - xen/common/perfc.c | 4 xen/include/asm-ia64/mm.h | 1 xen/include/asm-powerpc/config.h | 4 xen/include/asm-powerpc/current.h | 4 xen/include/asm-powerpc/domain.h | 7 xen/include/asm-powerpc/htab.h | 4 xen/include/asm-powerpc/mm.h | 183 +++-- xen/include/asm-powerpc/powerpc64/procarea.h | 1 xen/include/asm-powerpc/processor.h | 7 xen/include/asm-powerpc/shadow.h | 16 xen/include/asm-powerpc/smp.h | 4 xen/include/asm-powerpc/types.h | 19 xen/include/asm-x86/mm.h | 1 xen/include/asm-x86/page.h | 7 xen/include/asm-x86/processor.h | 7 xen/include/asm-x86/x86_32/page-2level.h | 3 xen/include/asm-x86/x86_32/page-3level.h | 2 xen/include/asm-x86/x86_32/page.h | 9 xen/include/asm-x86/x86_64/page.h | 11 xen/include/public/arch-ia64.h | 3 xen/include/public/arch-powerpc.h | 3 xen/include/public/arch-x86_32.h | 17 xen/include/public/arch-x86_64.h | 3 xen/include/public/domctl.h | 21 xen/include/public/sysctl.h | 16 xen/include/public/xen.h | 1 103 files changed, 2690 insertions(+), 1032 deletions(-) diff -r 4ba098226429 -r 1bab7d65171b .hgignore --- a/.hgignore Fri Sep 01 12:52:12 2006 -0600 +++ b/.hgignore Fri Sep 01 13:04:02 2006 -0600 @@ -203,6 +203,8 @@ ^xen/arch/powerpc/firmware$ ^xen/arch/powerpc/firmware_image$ ^xen/arch/powerpc/xen\.lds$ +^xen/arch/powerpc/.xen-syms$ +^xen/arch/powerpc/xen-syms.S$ ^unmodified_drivers/linux-2.6/\.tmp_versions ^unmodified_drivers/linux-2.6/.*\.cmd$ ^unmodified_drivers/linux-2.6/.*\.ko$ diff -r 4ba098226429 -r 1bab7d65171b extras/mini-os/Makefile --- a/extras/mini-os/Makefile Fri Sep 01 12:52:12 2006 -0600 +++ b/extras/mini-os/Makefile Fri Sep 01 13:04:02 2006 -0600 @@ -7,9 +7,12 @@ include $(XEN_ROOT)/Config.mk # Set TARGET_ARCH override TARGET_ARCH := $(XEN_TARGET_ARCH) +XEN_INTERFACE_VERSION := 0x00030203 + # NB. '-Wcast-qual' is nasty, so I omitted it. CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline +CFLAGS += -D__XEN_INTERFACE_VERSION__=$(XEN_INTERFACE_VERSION) ASFLAGS = -D__ASSEMBLY__ diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Fri Sep 01 13:04:02 2006 -0600 @@ -1380,8 +1380,10 @@ legacy_init_iomem_resources(struct e820e * so we try it repeatedly and let the resource manager * test it. */ +#ifndef CONFIG_XEN request_resource(res, code_resource); request_resource(res, data_resource); +#endif #ifdef CONFIG_KEXEC request_resource(res, &crashk_res); #endif @@ -1454,11 +1456,8 @@ static void __init register_memory(void) int i; /* Nothing to do if not running in dom0. */ - if (!is_initial_xendomain()) { - legacy_init_iomem_resources(e820.map, e820.nr_map, - &code_resource, &data_resource); + if (!is_initial_xendomain()) return; - } #ifdef CONFIG_XEN machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE); diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Fri Sep 01 13:04:02 2006 -0600 @@ -22,15 +22,6 @@ #define ISA_START_ADDRESS 0x0 #define ISA_END_ADDRESS 0x100000 -#if 0 /* not PAE safe */ -/* These hacky macros avoid phys->machine translations. */ -#define __direct_pte(x) ((pte_t) { (x) } ) -#define __direct_mk_pte(page_nr,pgprot) \ - __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) -#define direct_mk_pte_phys(physpage, pgprot) \ - __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) -#endif - static int direct_remap_area_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long address, @@ -66,17 +57,16 @@ static int __direct_remap_pfn_range(stru for (i = 0; i < size; i += PAGE_SIZE) { if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) { - /* Fill in the PTE pointers. */ + /* Flush a full batch after filling in the PTE ptrs. */ rc = apply_to_page_range(mm, start_address, address - start_address, direct_remap_area_pte_fn, &w); if (rc) goto out; - w = u; rc = -EFAULT; if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) goto out; - v = u; + v = w = u; start_address = address; } @@ -92,7 +82,7 @@ static int __direct_remap_pfn_range(stru } if (v != u) { - /* get the ptep's filled in */ + /* Final batch. */ rc = apply_to_page_range(mm, start_address, address - start_address, direct_remap_area_pte_fn, &w); @@ -178,32 +168,6 @@ int touch_pte_range(struct mm_struct *mm } EXPORT_SYMBOL(touch_pte_range); - -void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot) -{ - int error; - - struct vm_struct *vma; - vma = get_vm_area (vm_size, VM_IOREMAP); - - if (vma == NULL) { - printk ("ioremap.c,vm_map_xen_pages(): " - "Failed to get VMA area\n"); - return NULL; - } - - error = direct_kernel_remap_pfn_range((unsigned long) vma->addr, - maddr >> PAGE_SHIFT, vm_size, - prot, DOMID_SELF ); - if (error == 0) { - return vma->addr; - } else { - printk ("ioremap.c,vm_map_xen_pages(): " - "Failed to map xen shared pages into kernel space\n"); - return NULL; - } -} -EXPORT_SYMBOL(vm_map_xen_pages); /* * Does @address reside within a non-highmem page that is local to this virtual diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c --- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c Fri Sep 01 13:04:02 2006 -0600 @@ -26,6 +26,7 @@ #include <xen/evtchn.h> #include "op_counter.h" +#include <xen/driver_util.h> #include <xen/interface/xen.h> #include <xen/interface/xenoprof.h> #include <../../../drivers/oprofile/cpu_buffer.h> @@ -33,8 +34,6 @@ static int xenoprof_start(void); static void xenoprof_stop(void); - -void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot); static int xenoprof_enabled = 0; static unsigned int num_events = 0; @@ -373,9 +372,9 @@ static int xenoprof_set_passive(int * p_ { int ret; int i, j; - int vm_size; int npages; struct xenoprof_buf *buf; + struct vm_struct *area; pgprot_t prot = __pgprot(_KERNPG_TABLE); if (!is_primary) @@ -391,19 +390,29 @@ static int xenoprof_set_passive(int * p_ for (i = 0; i < pdoms; i++) { passive_domains[i].domain_id = p_domains[i]; passive_domains[i].max_samples = 2048; - ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, &passive_domains[i]); + ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, + &passive_domains[i]); if (ret) - return ret; + goto out; npages = (passive_domains[i].bufsize * passive_domains[i].nbuf - 1) / PAGE_SIZE + 1; - vm_size = npages * PAGE_SIZE; - - p_shared_buffer[i] = (char *)vm_map_xen_pages(passive_domains[i].buf_maddr, - vm_size, prot); - if (!p_shared_buffer[i]) { + + area = alloc_vm_area(npages * PAGE_SIZE); + if (area == NULL) { ret = -ENOMEM; goto out; } + + ret = direct_kernel_remap_pfn_range( + (unsigned long)area->addr, + passive_domains[i].buf_maddr >> PAGE_SHIFT, + npages * PAGE_SIZE, prot, DOMID_SELF); + if (ret) { + vunmap(area->addr); + goto out; + } + + p_shared_buffer[i] = area->addr; for (j = 0; j < passive_domains[i].nbuf; j++) { buf = (struct xenoprof_buf *) @@ -473,11 +482,9 @@ int __init oprofile_arch_init(struct opr int __init oprofile_arch_init(struct oprofile_operations * ops) { struct xenoprof_init init; - struct xenoprof_buf * buf; - int vm_size; - int npages; - int ret; - int i; + struct xenoprof_buf *buf; + int npages, ret, i; + struct vm_struct *area; init.max_samples = 16; ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init); @@ -495,14 +502,23 @@ int __init oprofile_arch_init(struct opr num_events = OP_MAX_COUNTER; npages = (init.bufsize * nbuf - 1) / PAGE_SIZE + 1; - vm_size = npages * PAGE_SIZE; - - shared_buffer = (char *)vm_map_xen_pages(init.buf_maddr, - vm_size, prot); - if (!shared_buffer) { + + area = alloc_vm_area(npages * PAGE_SIZE); + if (area == NULL) { ret = -ENOMEM; goto out; } + + ret = direct_kernel_remap_pfn_range( + (unsigned long)area->addr, + init.buf_maddr >> PAGE_SHIFT, + npages * PAGE_SIZE, prot, DOMID_SELF); + if (ret) { + vunmap(area->addr); + goto out; + } + + shared_buffer = area->addr; for (i=0; i< nbuf; i++) { buf = (struct xenoprof_buf*) diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/e820-xen.c Fri Sep 01 13:04:02 2006 -0600 @@ -255,8 +255,10 @@ void __init e820_reserve_resources(struc * so we try it repeatedly and let the resource manager * test it. */ +#ifndef CONFIG_XEN request_resource(res, &code_resource); request_resource(res, &data_resource); +#endif #ifdef CONFIG_KEXEC request_resource(res, &crashk_res); #endif diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Fri Sep 01 13:04:02 2006 -0600 @@ -846,7 +846,7 @@ void __init setup_arch(char **cmdline_p) if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* Make sure we have a large enough P->M table. */ - phys_to_machine_mapping = alloc_bootmem( + phys_to_machine_mapping = alloc_bootmem_pages( end_pfn * sizeof(unsigned long)); memset(phys_to_machine_mapping, ~0, end_pfn * sizeof(unsigned long)); @@ -863,7 +863,7 @@ void __init setup_arch(char **cmdline_p) * list of frames that make up the p2m table. Used by * save/restore. */ - pfn_to_mfn_frame_list_list = alloc_bootmem(PAGE_SIZE); + pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = virt_to_mfn(pfn_to_mfn_frame_list_list); @@ -873,7 +873,7 @@ void __init setup_arch(char **cmdline_p) k++; BUG_ON(k>=fpp); pfn_to_mfn_frame_list[k] = - alloc_bootmem(PAGE_SIZE); + alloc_bootmem_pages(PAGE_SIZE); pfn_to_mfn_frame_list_list[k] = virt_to_mfn(pfn_to_mfn_frame_list[k]); j=0; @@ -944,9 +944,10 @@ void __init setup_arch(char **cmdline_p) BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)); e820_reserve_resources(machine_e820, memmap.nr_entries); - } else -#endif + } +#else e820_reserve_resources(e820.map, e820.nr_map); +#endif request_resource(&iomem_resource, &video_ram_resource); diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Sep 01 13:04:02 2006 -0600 @@ -301,11 +301,11 @@ static void frontend_changed(struct xenb struct backend_info *be = dev->dev.driver_data; int err; - DPRINTK(""); + DPRINTK("%s", xenbus_strstate(frontend_state)); switch (frontend_state) { case XenbusStateInitialising: - if (dev->state == XenbusStateClosing) { + if (dev->state == XenbusStateClosed) { printk("%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); @@ -331,8 +331,12 @@ static void frontend_changed(struct xenb xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ case XenbusStateUnknown: - case XenbusStateClosed: device_unregister(&dev->dev); break; diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Sep 01 13:04:02 2006 -0600 @@ -273,7 +273,7 @@ static void backend_changed(struct xenbu xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); down(&bd->bd_sem); - if (info->users > 0) + if (info->users > 0 && system_state == SYSTEM_RUNNING) xenbus_dev_error(dev, -EBUSY, "Device in use; refusing to close"); else @@ -360,7 +360,7 @@ static void blkfront_closing(struct xenb xlvbd_del(info); - xenbus_switch_state(dev, XenbusStateClosed); + xenbus_frontend_closed(dev); } diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Fri Sep 01 13:04:02 2006 -0600 @@ -114,8 +114,8 @@ typedef struct domid_translate { } domid_translate_t ; -domid_translate_t translate_domid[MAX_TAP_DEV]; -tap_blkif_t *tapfds[MAX_TAP_DEV]; +static domid_translate_t translate_domid[MAX_TAP_DEV]; +static tap_blkif_t *tapfds[MAX_TAP_DEV]; static int __init set_blkif_reqs(char *str) { @@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif "ring does not exist!\n"); print_dbug = 0; /*We only print this message once*/ } - return 1; + return 0; } info = tapfds[blkif->dev_num]; @@ -1127,7 +1127,7 @@ static int do_block_io_op(blkif_t *blkif WPRINTK("Can't get UE info!\n"); print_dbug = 0; } - return 1; + return 0; } while (rc != rp) { diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/blktap/common.h --- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h Fri Sep 01 13:04:02 2006 -0600 @@ -91,6 +91,7 @@ void tap_blkif_free(blkif_t *blkif); void tap_blkif_free(blkif_t *blkif); int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); +void tap_blkif_unmap(blkif_t *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Fri Sep 01 13:04:02 2006 -0600 @@ -135,20 +135,25 @@ int tap_blkif_map(blkif_t *blkif, unsign return 0; } +void tap_blkif_unmap(blkif_t *blkif) +{ + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + if (blkif->blk_ring.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_ring.sring = NULL; + } +} + void tap_blkif_free(blkif_t *blkif) { atomic_dec(&blkif->refcnt); wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - /* Already disconnected? */ - if (blkif->irq) - unbind_from_irqhandler(blkif->irq, blkif); - - if (blkif->blk_ring.sring) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - } - + tap_blkif_unmap(blkif); kmem_cache_free(blkif_cachep, blkif); } diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Fri Sep 01 13:04:02 2006 -0600 @@ -247,6 +247,11 @@ static void tap_frontend_changed(struct switch (frontend_state) { case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } break; case XenbusStateInitialised: @@ -264,11 +269,20 @@ static void tap_frontend_changed(struct break; case XenbusStateClosing: + if (be->blkif->xenblkd) { + kthread_stop(be->blkif->xenblkd); + be->blkif->xenblkd = NULL; + } + tap_blkif_unmap(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ case XenbusStateUnknown: - case XenbusStateClosed: device_unregister(&dev->dev); break; diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Sep 01 13:04:02 2006 -0600 @@ -228,13 +228,13 @@ static void frontend_changed(struct xenb { struct backend_info *be = dev->dev.driver_data; - DPRINTK(""); + DPRINTK("%s", xenbus_strstate(frontend_state)); be->frontend_state = frontend_state; switch (frontend_state) { case XenbusStateInitialising: - if (dev->state == XenbusStateClosing) { + if (dev->state == XenbusStateClosed) { printk("%s: %s: prepare for reconnect\n", __FUNCTION__, dev->nodename); if (be->netif) { @@ -260,8 +260,12 @@ static void frontend_changed(struct xenb xenbus_switch_state(dev, XenbusStateClosing); break; + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ case XenbusStateUnknown: - case XenbusStateClosed: if (be->netif != NULL) kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); device_unregister(&dev->dev); diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Sep 01 13:04:02 2006 -0600 @@ -193,6 +193,7 @@ static void netfront_closing(struct xenb static void end_access(int, void *); static void netif_disconnect_backend(struct netfront_info *); +static int open_netdev(struct netfront_info *); static void close_netdev(struct netfront_info *); static void netif_free(struct netfront_info *); @@ -263,15 +264,22 @@ static int __devinit netfront_probe(stru dev->dev.driver_data = info; err = talk_to_backend(dev, info); - if (err) { - xennet_sysfs_delif(info->netdev); - unregister_netdev(netdev); - free_netdev(netdev); - dev->dev.driver_data = NULL; - return err; - } + if (err) + goto fail_backend; + + err = open_netdev(info); + if (err) + goto fail_open; return 0; + + fail_open: + xennet_sysfs_delif(info->netdev); + unregister_netdev(netdev); + fail_backend: + free_netdev(netdev); + dev->dev.driver_data = NULL; + return err; } @@ -478,7 +486,7 @@ static void backend_changed(struct xenbu struct netfront_info *np = dev->dev.driver_data; struct net_device *netdev = np->netdev; - DPRINTK("\n"); + DPRINTK("%s\n", xenbus_strstate(backend_state)); switch (backend_state) { case XenbusStateInitialising: @@ -1887,27 +1895,9 @@ create_netdev(int handle, int copying_re SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &dev->dev); - err = register_netdev(netdev); - if (err) { - printk(KERN_WARNING "%s> register_netdev err=%d\n", - __FUNCTION__, err); - goto exit_free_rx; - } - - err = xennet_sysfs_addif(netdev); - if (err) { - /* This can be non-fatal: it only means no tuning parameters */ - printk(KERN_WARNING "%s> add sysfs failed err=%d\n", - __FUNCTION__, err); - } - np->netdev = netdev; - return netdev; - - exit_free_rx: - gnttab_free_grant_references(np->gref_rx_head); exit_free_tx: gnttab_free_grant_references(np->gref_tx_head); exit: @@ -1946,11 +1936,10 @@ static void netfront_closing(struct xenb { struct netfront_info *info = dev->dev.driver_data; - DPRINTK("netfront_closing: %s removed\n", dev->nodename); + DPRINTK("%s\n", dev->nodename); close_netdev(info); - - xenbus_switch_state(dev, XenbusStateClosed); + xenbus_frontend_closed(dev); } @@ -1966,6 +1955,26 @@ static int __devexit netfront_remove(str return 0; } + +static int open_netdev(struct netfront_info *info) +{ + int err; + + err = register_netdev(info->netdev); + if (err) { + printk(KERN_WARNING "%s: register_netdev err=%d\n", + __FUNCTION__, err); + return err; + } + + err = xennet_sysfs_addif(info->netdev); + if (err) { + /* This can be non-fatal: it only means no tuning parameters */ + printk(KERN_WARNING "%s: add sysfs failed err=%d\n", + __FUNCTION__, err); + } + return 0; +} static void close_netdev(struct netfront_info *info) { diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_backend_client.c Fri Sep 01 13:04:02 2006 -0600 @@ -132,4 +132,16 @@ int xenbus_unmap_ring(struct xenbus_devi } EXPORT_SYMBOL_GPL(xenbus_unmap_ring); +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + int rc, val; + + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); + if (rc != 1) + val = 0; /* no online node present */ + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + MODULE_LICENSE("Dual BSD/GPL"); diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Fri Sep 01 13:04:02 2006 -0600 @@ -41,6 +41,20 @@ extern char *kasprintf(const char *fmt, #define DPRINTK(fmt, args...) \ pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +char *xenbus_strstate(enum xenbus_state state) +{ + static char *name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + }; + return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; +} + int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, @@ -124,6 +138,13 @@ int xenbus_switch_state(struct xenbus_de } EXPORT_SYMBOL_GPL(xenbus_switch_state); +int xenbus_frontend_closed(struct xenbus_device *dev) +{ + xenbus_switch_state(dev, XenbusStateClosed); + complete(&dev->down); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_frontend_closed); /** * Return the path to the error node for the given device, or NULL on failure. diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Sep 01 13:04:02 2006 -0600 @@ -73,6 +73,7 @@ static int xenbus_probe_backend(const ch static int xenbus_dev_probe(struct device *_dev); static int xenbus_dev_remove(struct device *_dev); +static void xenbus_dev_shutdown(struct device *_dev); /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * @@ -192,6 +193,7 @@ static struct xen_bus_type xenbus_fronte .match = xenbus_match, .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, }, .dev = { .bus_id = "xen", @@ -246,6 +248,7 @@ static struct xen_bus_type xenbus_backen .match = xenbus_match, .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, +// .shutdown = xenbus_dev_shutdown, .uevent = xenbus_uevent_backend, }, .dev = { @@ -316,8 +319,9 @@ static void otherend_changed(struct xenb state = xenbus_read_driver_state(dev->otherend); - DPRINTK("state is %d, %s, %s", - state, dev->otherend_watch.node, vec[XS_WATCH_PATH]); + DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state), + dev->otherend_watch.node, vec[XS_WATCH_PATH]); + if (drv->otherend_changed) drv->otherend_changed(dev, state); } @@ -348,7 +352,7 @@ static int xenbus_dev_probe(struct devic const struct xenbus_device_id *id; int err; - DPRINTK(""); + DPRINTK("%s", dev->nodename); if (!drv->probe) { err = -ENODEV; @@ -393,7 +397,7 @@ static int xenbus_dev_remove(struct devi struct xenbus_device *dev = to_xenbus_device(_dev); struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); - DPRINTK(""); + DPRINTK("%s", dev->nodename); free_otherend_watch(dev); free_otherend_details(dev); @@ -403,6 +407,27 @@ static int xenbus_dev_remove(struct devi xenbus_switch_state(dev, XenbusStateClosed); return 0; +} + +static void xenbus_dev_shutdown(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned long timeout = 5*HZ; + + DPRINTK("%s", dev->nodename); + + get_device(&dev->dev); + if (dev->state != XenbusStateConnected) { + printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__, + dev->nodename, xenbus_strstate(dev->state)); + goto out; + } + xenbus_switch_state(dev, XenbusStateClosing); + timeout = wait_for_completion_timeout(&dev->down, timeout); + if (!timeout) + printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename); + out: + put_device(&dev->dev); } static int xenbus_register_driver_common(struct xenbus_driver *drv, @@ -587,6 +612,7 @@ static int xenbus_probe_node(struct xen_ tmpstring += strlen(tmpstring) + 1; strcpy(tmpstring, type); xendev->devicetype = tmpstring; + init_completion(&xendev->down); xendev->dev.parent = &bus->dev; xendev->dev.bus = &bus->bus; diff -r 4ba098226429 -r 1bab7d65171b linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 12:52:12 2006 -0600 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Fri Sep 01 13:04:02 2006 -0600 @@ -37,6 +37,7 @@ #include <linux/device.h> #include <linux/notifier.h> #include <linux/mutex.h> +#include <linux/completion.h> #include <xen/interface/xen.h> #include <xen/interface/grant_table.h> #include <xen/interface/io/xenbus.h> @@ -74,6 +75,7 @@ struct xenbus_device { struct xenbus_watch otherend_watch; struct device dev; enum xenbus_state state; + struct completion down; }; static inline struct xenbus_device *to_xenbus_device(struct device *dev) @@ -297,4 +299,8 @@ void xenbus_dev_fatal(struct xenbus_devi int __init xenbus_dev_init(void); +char *xenbus_strstate(enum xenbus_state state); +int xenbus_dev_is_online(struct xenbus_device *dev); +int xenbus_frontend_closed(struct xenbus_device *dev); + #endif /* _XEN_XENBUS_H */ diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-aio.c --- a/tools/blktap/drivers/block-aio.c Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/blktap/drivers/block-aio.c Fri Sep 01 13:04:02 2006 -0600 @@ -52,7 +52,7 @@ */ #define REQUEST_ASYNC_FD 1 -#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8) +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) struct pending_aio { td_callback_t cb; @@ -146,7 +146,7 @@ int tdaio_open (struct td_state *s, cons struct tdaio_state *prv = (struct tdaio_state *)s->private; s->private = prv; - DPRINTF("XXX: block-aio open('%s')", name); + DPRINTF("block-aio open('%s')", name); /* Initialize AIO */ prv->iocb_free_count = MAX_AIO_REQS; prv->iocb_queued = 0; @@ -156,9 +156,18 @@ int tdaio_open (struct td_state *s, cons if (prv->poll_fd < 0) { ret = prv->poll_fd; - DPRINTF("Couldn't get fd for AIO poll support. This is " - "probably because your kernel does not have the " - "aio-poll patch applied.\n"); + if (ret == -EAGAIN) { + DPRINTF("Couldn't setup AIO context. If you are " + "trying to concurrently use a large number " + "of blktap-based disks, you may need to " + "increase the system-wide aio request limit. " + "(e.g. 'echo echo 1048576 > /proc/sys/fs/" + "aio-max-nr')\n"); + } else { + DPRINTF("Couldn't get fd for AIO poll support. This " + "is probably because your kernel does not " + "have the aio-poll patch applied.\n"); + } goto done; } diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/block-qcow.c --- a/tools/blktap/drivers/block-qcow.c Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/blktap/drivers/block-qcow.c Fri Sep 01 13:04:02 2006 -0600 @@ -51,7 +51,7 @@ /******AIO DEFINES******/ #define REQUEST_ASYNC_FD 1 #define MAX_QCOW_IDS 0xFFFF -#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8) +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) struct pending_aio { td_callback_t cb; @@ -176,10 +176,21 @@ static int init_aio_state(struct td_stat s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD; s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx); - if (s->poll_fd < 0) { - DPRINTF("Retrieving Async poll fd failed\n"); + if (s->poll_fd < 0) { + if (s->poll_fd == -EAGAIN) { + DPRINTF("Couldn't setup AIO context. If you are " + "trying to concurrently use a large number " + "of blktap-based disks, you may need to " + "increase the system-wide aio request limit. " + "(e.g. 'echo echo 1048576 > /proc/sys/fs/" + "aio-max-nr')\n"); + } else { + DPRINTF("Couldn't get fd for AIO poll support. This " + "is probably because your kernel does not " + "have the aio-poll patch applied.\n"); + } goto fail; - } + } for (i=0;i<MAX_AIO_REQS;i++) s->iocb_free[i] = &s->iocb_list[i]; diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/drivers/tapdisk.c --- a/tools/blktap/drivers/tapdisk.c Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/blktap/drivers/tapdisk.c Fri Sep 01 13:04:02 2006 -0600 @@ -110,6 +110,7 @@ static void unmap_disk(struct td_state * free(s->fd_entry); free(s->blkif); free(s->ring_info); + free(s->private); free(s); return; diff -r 4ba098226429 -r 1bab7d65171b tools/blktap/lib/xs_api.c --- a/tools/blktap/lib/xs_api.c Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/blktap/lib/xs_api.c Fri Sep 01 13:04:02 2006 -0600 @@ -204,7 +204,7 @@ int convert_dev_name_to_num(char *name) int convert_dev_name_to_num(char *name) { char *p_sd, *p_hd, *p_xvd, *p_plx, *p, *alpha,*ptr; int majors[10] = {3,22,33,34,56,57,88,89,90,91}; - int maj,i; + int maj,i,ret = 0; asprintf(&p_sd,"/dev/sd"); asprintf(&p_hd,"/dev/hd"); @@ -221,7 +221,7 @@ int convert_dev_name_to_num(char *name) *ptr++; } *p++; - return BASE_DEV_VAL + (16*i) + atoi(p); + ret = BASE_DEV_VAL + (16*i) + atoi(p); } else if (strstr(name, p_hd) != NULL) { p = name + strlen(p_hd); for (i = 0, ptr = alpha; i < strlen(alpha); i++) { @@ -229,7 +229,7 @@ int convert_dev_name_to_num(char *name) *ptr++; } *p++; - return (majors[i/2]*256) + atoi(p); + ret = (majors[i/2]*256) + atoi(p); } else if (strstr(name, p_xvd) != NULL) { p = name + strlen(p_xvd); @@ -238,17 +238,24 @@ int convert_dev_name_to_num(char *name) *ptr++; } *p++; - return (202*256) + (16*i) + atoi(p); + ret = (202*256) + (16*i) + atoi(p); } else if (strstr(name, p_plx) != NULL) { p = name + strlen(p_plx); - return atoi(p); + ret = atoi(p); } else { DPRINTF("Unknown device type, setting to default.\n"); - return BASE_DEV_VAL; - } - return 0; + ret = BASE_DEV_VAL; + } + + free(p_sd); + free(p_hd); + free(p_xvd); + free(p_plx); + free(alpha); + + return ret; } /** diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/ia64/xc_ia64_stubs.c --- a/tools/libxc/ia64/xc_ia64_stubs.c Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/libxc/ia64/xc_ia64_stubs.c Fri Sep 01 13:04:02 2006 -0600 @@ -36,7 +36,6 @@ xc_ia64_get_pfn_list(int xc_handle, uint struct xen_domctl domctl; int num_pfns,ret; unsigned int __start_page, __nr_pages; - unsigned long max_pfns; xen_pfn_t *__pfn_buf; __start_page = start_page; @@ -44,27 +43,22 @@ xc_ia64_get_pfn_list(int xc_handle, uint __pfn_buf = pfn_buf; while (__nr_pages) { - max_pfns = ((unsigned long)__start_page << 32) | __nr_pages; domctl.cmd = XEN_DOMCTL_getmemlist; - domctl.domain = (domid_t)domid; - domctl.u.getmemlist.max_pfns = max_pfns; + domctl.domain = (domid_t)domid; + domctl.u.getmemlist.max_pfns = __nr_pages; + domctl.u.getmemlist.start_pfn =__start_page; domctl.u.getmemlist.num_pfns = 0; set_xen_guest_handle(domctl.u.getmemlist.buffer, __pfn_buf); - if ((max_pfns != -1UL) - && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) { + if (mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0) { PERROR("Could not lock pfn list buffer"); return -1; } ret = do_domctl(xc_handle, &domctl); - if (max_pfns != -1UL) - (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)); + (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)); - if (max_pfns == -1UL) - return 0; - num_pfns = domctl.u.getmemlist.num_pfns; __start_page += num_pfns; __nr_pages -= num_pfns; diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/powerpc64/xc_linux_build.c --- a/tools/libxc/powerpc64/xc_linux_build.c Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/libxc/powerpc64/xc_linux_build.c Fri Sep 01 13:04:02 2006 -0600 @@ -309,7 +309,7 @@ static unsigned long create_start_info(s si->store_evtchn = store_evtchn; si->console.domU.mfn = si->nr_pages - 3; si->console.domU.evtchn = console_evtchn; - si_addr = eomem - (PAGE_SIZE * 4); + si_addr = (si->nr_pages - 4) << PAGE_SHIFT; return si_addr; } @@ -388,7 +388,7 @@ int xc_linux_build(int xc_handle, } si_addr = create_start_info(&si, console_evtchn, store_evtchn); - *console_mfn = page_array[si.console_mfn]; + *console_mfn = page_array[si.console.domU.mfn]; *store_mfn = page_array[si.store_mfn]; if (install_image(xc_handle, domid, page_array, &si, si_addr, diff -r 4ba098226429 -r 1bab7d65171b tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/libxc/xenctrl.h Fri Sep 01 13:04:02 2006 -0600 @@ -8,6 +8,11 @@ #ifndef XENCTRL_H #define XENCTRL_H + +/* Tell the Xen public headers we are a user-space tools build. */ +#ifndef __XEN_TOOLS__ +#define __XEN_TOOLS__ 1 +#endif #include <stddef.h> #include <stdint.h> diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xend/XendCheckpoint.py Fri Sep 01 13:04:02 2006 -0600 @@ -161,10 +161,12 @@ def restore(xd, fd): if handler.store_mfn is None or handler.console_mfn is None: raise XendError('Could not read store/console MFN') + #Block until src closes connection + os.read(fd, 1) dominfo.unpause() - + dominfo.completeRestore(handler.store_mfn, handler.console_mfn) - + return dominfo except: dominfo.destroy() diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xend/XendDomain.py Fri Sep 01 13:04:02 2006 -0600 @@ -431,7 +431,8 @@ class XendDomain: sock.send("receive\n") sock.recv(80) XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst) - + dominfo.testDeviceComplete() + sock.close() def domain_save(self, domid, dst): """Start saving a domain to file. diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Sep 01 13:04:02 2006 -0600 @@ -30,7 +30,6 @@ import time import time import threading import os -import math import xen.lowlevel.xc from xen.util import asserts @@ -703,6 +702,9 @@ class XendDomainInfo: if security[idx][0] == 'ssidref': to_store['security/ssidref'] = str(security[idx][1]) + if not self.readVm('xend/restart_count'): + to_store['xend/restart_count'] = str(0) + log.debug("Storing VM details: %s", to_store) self.writeVm(to_store) @@ -823,6 +825,9 @@ class XendDomainInfo: def setResume(self, state): self.info['resume'] = state + + def getRestartCount(self): + return self.readVm('xend/restart_count') def refreshShutdown(self, xeninfo = None): # If set at the end of this method, a restart is required, with the @@ -1280,34 +1285,28 @@ class XendDomainInfo: for v in range(0, self.info['max_vcpu_id']+1): xc.vcpu_setaffinity(self.domid, v, self.info['cpus']) - # set domain maxmem in KiB - xc.domain_setmaxmem(self.domid, self.info['maxmem'] * 1024) - - m = self.image.getDomainMemory(self.info['memory'] * 1024) + # set memory limit + maxmem = self.image.getRequiredMemory(self.info['maxmem'] * 1024) + xc.domain_setmaxmem(self.domid, maxmem) + + mem_kb = self.image.getRequiredMemory(self.info['memory'] * 1024) # get the domain's shadow memory requirement - sm = int(math.ceil(self.image.getDomainShadowMemory(m) / 1024.0)) - if self.info['shadow_memory'] > sm: - sm = self.info['shadow_memory'] + shadow_kb = self.image.getRequiredShadowMemory(mem_kb) + shadow_kb_req = self.info['shadow_memory'] * 1024 + if shadow_kb_req > shadow_kb: + shadow_kb = shadow_kb_req + shadow_mb = (shadow_kb + 1023) / 1024 # Make sure there's enough RAM available for the domain - balloon.free(m + sm * 1024) + balloon.free(mem_kb + shadow_mb * 1024) # Set up the shadow memory - sm = xc.shadow_mem_control(self.domid, mb=sm) - self.info['shadow_memory'] = sm - - init_reservation = self.info['memory'] * 1024 - if os.uname()[4] in ('ia64', 'ppc64'): - # Workaround for architectures that don't yet support - # ballooning. - init_reservation = m - # Following line from xiantao.zhang@xxxxxxxxx - # Needed for IA64 until supports ballooning -- okay for PPC64? - xc.domain_setmaxmem(self.domid, m) - - xc.domain_memory_increase_reservation(self.domid, init_reservation, - 0, 0) + shadow_cur = xc.shadow_mem_control(self.domid, shadow_mb) + self.info['shadow_memory'] = shadow_cur + + # initial memory allocation + xc.domain_memory_increase_reservation(self.domid, mem_kb, 0, 0) self.createChannels() @@ -1495,6 +1494,21 @@ class XendDomainInfo: if rc != 0: raise XendError("Device of type '%s' refuses migration." % n) + def testDeviceComplete(self): + """ For Block IO migration safety we must ensure that + the device has shutdown correctly, i.e. all blocks are + flushed to disk + """ + while True: + test = 0 + for i in self.getDeviceController('vbd').deviceIDs(): + test = 1 + log.info("Dev %s still active, looping...", i) + time.sleep(0.1) + + if test == 0: + break + def migrateDevices(self, network, dst, step, domName=''): """Notify the devices about migration """ @@ -1615,6 +1629,9 @@ class XendDomainInfo: try: new_dom = XendDomain.instance().domain_create(config) new_dom.unpause() + rst_cnt = self.readVm('xend/restart_count') + rst_cnt = int(rst_cnt) + 1 + self.writeVm('xend/restart_count', str(rst_cnt)) new_dom.removeVm(RESTART_IN_PROGRESS) except: if new_dom: diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xend/image.py Fri Sep 01 13:04:02 2006 -0600 @@ -27,6 +27,8 @@ from xen.xend.XendLogging import log from xen.xend.XendLogging import log from xen.xend.server.netif import randomMAC from xen.xend.xenstore.xswatch import xswatch +from xen.xend import arch +from xen.xend import FlatDeviceTree xc = xen.lowlevel.xc.xc() @@ -141,19 +143,10 @@ class ImageHandler: raise VmError('Building domain failed: ostype=%s dom=%d err=%s' % (self.ostype, self.vm.getDomid(), str(result))) - - def getDomainMemory(self, mem_kb): - """@return The memory required, in KiB, by the domain to store the - given amount, also in KiB.""" - if os.uname()[4] != 'ia64': - # A little extra because auto-ballooning is broken w.r.t. HVM - # guests. Also, slack is necessary for live migration since that - # uses shadow page tables. - if 'hvm' in xc.xeninfo()['xen_caps']: - mem_kb += 4*1024; + def getRequiredMemory(self, mem_kb): return mem_kb - def getDomainShadowMemory(self, mem_kb): + def getRequiredShadowMemory(self, mem_kb): """@return The minimum shadow memory required, in KiB, for a domain with mem_kb KiB of RAM.""" # PV domains don't need any shadow memory @@ -197,9 +190,39 @@ class LinuxImageHandler(ImageHandler): ramdisk = self.ramdisk, features = self.vm.getFeatures()) +class PPC_LinuxImageHandler(LinuxImageHandler): + + ostype = "linux" + + def configure(self, imageConfig, deviceConfig): + LinuxImageHandler.configure(self, imageConfig, deviceConfig) + self.imageConfig = imageConfig + + def buildDomain(self): + store_evtchn = self.vm.getStorePort() + console_evtchn = self.vm.getConsolePort() + + log.debug("dom = %d", self.vm.getDomid()) + log.debug("image = %s", self.kernel) + log.debug("store_evtchn = %d", store_evtchn) + log.debug("console_evtchn = %d", console_evtchn) + log.debug("cmdline = %s", self.cmdline) + log.debug("ramdisk = %s", self.ramdisk) + log.debug("vcpus = %d", self.vm.getVCpuCount()) + log.debug("features = %s", self.vm.getFeatures()) + + devtree = FlatDeviceTree.build(self) + + return xc.linux_build(dom = self.vm.getDomid(), + image = self.kernel, + store_evtchn = store_evtchn, + console_evtchn = console_evtchn, + cmdline = self.cmdline, + ramdisk = self.ramdisk, + features = self.vm.getFeatures(), + arch_args = devtree.to_bin()) + class HVMImageHandler(ImageHandler): - - ostype = "hvm" def configure(self, imageConfig, deviceConfig): ImageHandler.configure(self, imageConfig, deviceConfig) @@ -282,7 +305,7 @@ class HVMImageHandler(ImageHandler): for (name, info) in deviceConfig: if name == 'vbd': uname = sxp.child_value(info, 'uname') - if 'file:' in uname: + if uname is not None and 'file:' in uname: (_, vbdparam) = string.split(uname, ':', 1) if not os.path.isfile(vbdparam): raise VmError('Disk image does not exist: %s' % @@ -355,32 +378,6 @@ class HVMImageHandler(ImageHandler): os.waitpid(self.pid, 0) self.pid = 0 - def getDomainMemory(self, mem_kb): - """@see ImageHandler.getDomainMemory""" - if os.uname()[4] == 'ia64': - page_kb = 16 - # ROM size for guest firmware, ioreq page and xenstore page - extra_pages = 1024 + 2 - else: - page_kb = 4 - # This was derived emperically: - # 2.4 MB overhead per 1024 MB RAM + 8 MB constant - # + 4 to avoid low-memory condition - extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12; - extra_pages = int( math.ceil( extra_mb*1024 / page_kb )) - return mem_kb + extra_pages * page_kb - - def getDomainShadowMemory(self, mem_kb): - """@return The minimum shadow memory required, in KiB, for a domain - with mem_kb KiB of RAM.""" - if os.uname()[4] in ('ia64', 'ppc64'): - # Explicit shadow memory is not a concept - return 0 - else: - # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than - # the minimum that Xen would allocate if no value were given. - return 1024 * self.vm.getVCpuCount() + mem_kb / 256 - def register_shutdown_watch(self): """ add xen store watch on control/shutdown """ self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \ @@ -417,15 +414,51 @@ class HVMImageHandler(ImageHandler): return 1 # Keep watching -"""Table of image handler classes for virtual machine images. Indexed by -image type. -""" -imageHandlerClasses = {} - - -for h in LinuxImageHandler, HVMImageHandler: - imageHandlerClasses[h.ostype] = h - +class IA64_HVM_ImageHandler(HVMImageHandler): + + ostype = "hvm" + + def getRequiredMemory(self, mem_kb): + page_kb = 16 + # ROM size for guest firmware, ioreq page and xenstore page + extra_pages = 1024 + 2 + return mem_kb + extra_pages * page_kb + + def getRequiredShadowMemory(self, mem_kb): + # Explicit shadow memory is not a concept + return 0 + +class X86_HVM_ImageHandler(HVMImageHandler): + + ostype = "hvm" + + def getRequiredMemory(self, mem_kb): + page_kb = 4 + # This was derived emperically: + # 2.4 MB overhead per 1024 MB RAM + 8 MB constant + # + 4 to avoid low-memory condition + extra_mb = (2.4/1024) * (mem_kb/1024.0) + 12; + extra_pages = int( math.ceil( extra_mb*1024 / page_kb )) + return mem_kb + extra_pages * page_kb + + def getRequiredShadowMemory(self, mem_kb): + # 1MB per vcpu plus 4Kib/Mib of RAM. This is higher than + # the minimum that Xen would allocate if no value were given. + return 1024 * self.vm.getVCpuCount() + mem_kb / 256 + +_handlers = { + "powerpc": { + "linux": PPC_LinuxImageHandler, + }, + "ia64": { + "linux": LinuxImageHandler, + "hvm": IA64_HVM_ImageHandler, + }, + "x86": { + "linux": LinuxImageHandler, + "hvm": X86_HVM_ImageHandler, + }, +} def findImageHandlerClass(image): """Find the image handler class for an image config. @@ -433,10 +466,10 @@ def findImageHandlerClass(image): @param image config @return ImageHandler subclass or None """ - ty = sxp.name(image) - if ty is None: + type = sxp.name(image) + if type is None: raise VmError('missing image type') - imageClass = imageHandlerClasses.get(ty) - if imageClass is None: - raise VmError('unknown image type: ' + ty) - return imageClass + try: + return _handlers[arch.type][type] + except KeyError: + raise VmError('unknown image type: ' + type) diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xend/server/DevController.py Fri Sep 01 13:04:02 2006 -0600 @@ -207,6 +207,9 @@ class DevController: devid = int(devid) + # Modify online status /before/ updating state (latter is watched by + # drivers, so this ordering avoids a race). + self.writeBackend(devid, 'online', "0") self.writeBackend(devid, 'state', str(xenbusState['Closing'])) @@ -406,7 +409,8 @@ class DevController: 'domain' : self.vm.getName(), 'frontend' : frontpath, 'frontend-id' : "%i" % self.vm.getDomid(), - 'state' : str(xenbusState['Initialising']) + 'state' : str(xenbusState['Initialising']), + 'online' : "1" }) return (backpath, frontpath) diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/server/XMLRPCServer.py --- a/tools/python/xen/xend/server/XMLRPCServer.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xend/server/XMLRPCServer.py Fri Sep 01 13:04:02 2006 -0600 @@ -78,7 +78,8 @@ methods = ['device_create', 'device_conf methods = ['device_create', 'device_configure', 'destroyDevice', 'getDeviceSxprs', 'setMemoryTarget', 'setName', 'setVCpuCount', 'shutdown', - 'send_sysrq', 'getVCPUInfo', 'waitForDevices'] + 'send_sysrq', 'getVCPUInfo', 'waitForDevices', + 'getRestartCount'] exclude = ['domain_create', 'domain_restore'] diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xend/server/blkif.py Fri Sep 01 13:04:02 2006 -0600 @@ -52,10 +52,18 @@ class BlkifController(DevController): except ValueError: dev_type = "disk" - try: - (typ, params) = string.split(uname, ':', 1) - except ValueError: - (typ, params) = ("", "") + if uname is None: + if dev_type == 'cdrom': + (typ, params) = ("", "") + else: + raise VmError( + 'Block device must have physical details specified') + else: + try: + (typ, params) = string.split(uname, ':', 1) + except ValueError: + (typ, params) = ("", "") + back = { 'dev' : dev, 'type' : typ, 'params' : params, diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/migrate.py --- a/tools/python/xen/xm/migrate.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xm/migrate.py Fri Sep 01 13:04:02 2006 -0600 @@ -57,7 +57,8 @@ def main(argv): opts.usage() return if len(args) != 2: - opts.err('Invalid arguments: ' + str(args)) + opts.usage() + sys.exit(1) dom = args[0] dst = args[1] server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.resource, opts.vals.port) diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xm/shutdown.py --- a/tools/python/xen/xm/shutdown.py Fri Sep 01 12:52:12 2006 -0600 +++ b/tools/python/xen/xm/shutdown.py Fri Sep 01 13:04:02 2006 -0600 @@ -48,21 +48,48 @@ gopts.opt('reboot', short='R', fn=set_true, default=0, use='Shutdown and reboot.') +def wait_reboot(opts, doms, rcs): + while doms: + alive = server.xend.domains(0) + reboot = [] + for d in doms: + if d in alive: + rc = server.xend.domain.getRestartCount(d) + if rc == rcs[d]: continue + reboot.append(d) + else: + opts.info("Domain %s destroyed for failed in rebooting" % d) + doms.remove(d) + for d in reboot: + opts.info("Domain %s rebooted" % d) + doms.remove(d) + time.sleep(1) + opts.info("All domains rebooted") + +def wait_shutdown(opts, doms): + while doms: + alive = server.xend.domains(0) + dead = [] + for d in doms: + if d in alive: continue + dead.append(d) + for d in dead: + opts.info("Domain %s terminated" % d) + doms.remove(d) + time.sleep(1) + opts.info("All domains terminated") + def shutdown(opts, doms, mode, wait): + rcs = {} for d in doms: + rcs[d] = server.xend.domain.getRestartCount(d) server.xend.domain.shutdown(d, mode) + if wait: - while doms: - alive = server.xend.domains(0) - dead = [] - for d in doms: - if d in alive: continue - dead.append(d) - for d in dead: - opts.info("Domain %s terminated" % d) - doms.remove(d) - time.sleep(1) - opts.info("All domains terminated") + if mode == 'reboot': + wait_reboot(opts, doms, rcs) + else: + wait_shutdown(opts, doms) def shutdown_mode(opts): if opts.vals.halt and opts.vals.reboot: diff -r 4ba098226429 -r 1bab7d65171b xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/ia64/xen/dom0_ops.c Fri Sep 01 13:04:02 2006 -0600 @@ -40,8 +40,8 @@ long arch_do_domctl(xen_domctl_t *op, XE { unsigned long i; struct domain *d = find_domain_by_id(op->domain); - unsigned long start_page = op->u.getmemlist.max_pfns >> 32; - unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff; + unsigned long start_page = op->u.getmemlist.start_pfn; + unsigned long nr_pages = op->u.getmemlist.max_pfns; unsigned long mfn; if ( d == NULL ) { diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Makefile --- a/xen/arch/powerpc/Makefile Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/Makefile Fri Sep 01 13:04:02 2006 -0600 @@ -6,6 +6,7 @@ subdir-y += papr subdir-y += papr obj-y += audit.o +obj-y += backtrace.o obj-y += bitops.o obj-y += boot_of.o obj-y += dart.o @@ -19,19 +20,21 @@ obj-y += external.o obj-y += external.o obj-y += float.o obj-y += hcalls.o -obj-y += htab.o obj-y += iommu.o obj-y += irq.o obj-y += mambo.o +obj-y += memory.o obj-y += mm.o obj-y += mpic.o obj-y += mpic_init.o obj-y += of-devtree.o obj-y += of-devwalk.o obj-y += ofd_fixup.o +obj-y += ofd_fixup_memory.o obj-y += physdev.o obj-y += rtas.o obj-y += setup.o +obj-y += shadow.o obj-y += smp.o obj-y += time.o obj-y += usercopy.o @@ -47,6 +50,7 @@ obj-y += elf32.o # These are extra warnings like for the arch/ppc directory but may not # allow the rest of the tree to build. PPC_C_WARNINGS += -Wundef -Wmissing-prototypes -Wmissing-declarations +PPC_C_WARNINGS += -Wshadow CFLAGS += $(PPC_C_WARNINGS) LINK=0x400000 @@ -91,8 +95,27 @@ start.o: boot/start.S start.o: boot/start.S $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@ -$(TARGET)-syms: start.o $(ALL_OBJS) xen.lds - $(CC) $(CFLAGS) $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds start.o $(ALL_OBJS) -o $@ +TARGET_OPTS = $(OMAGIC) -Wl,-Ttext,$(xen_link_base),-T,xen.lds +TARGET_OPTS += start.o $(ALL_OBJS) + +.xen-syms: start.o $(ALL_OBJS) xen.lds + $(CC) $(CFLAGS) $(TARGET_OPTS) -o $@ + +NM=$(CROSS_COMPILE)nm +new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) + +ifeq ($(new_nm),y) +NM := $(NM) --synthetic +endif + +xen-syms.S: .xen-syms + $(NM) -n $^ | $(BASEDIR)/tools/symbols > $@ + +xen-syms.o: xen-syms.S + $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@ + +$(TARGET)-syms: start.o $(ALL_OBJS) xen-syms.o xen.lds + $(CC) $(CFLAGS) $(TARGET_OPTS) xen-syms.o -o $@ $(TARGET).bin: $(TARGET)-syms $(CROSS_COMPILE)objcopy --output-target=binary $< $@ @@ -122,4 +145,4 @@ dom0.bin: $(DOM0_IMAGE) clean:: $(MAKE) -f $(BASEDIR)/Rules.mk -C of_handler clean - rm -f firmware firmware_image dom0.bin + rm -f firmware firmware_image dom0.bin .xen-syms diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/Rules.mk --- a/xen/arch/powerpc/Rules.mk Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/Rules.mk Fri Sep 01 13:04:02 2006 -0600 @@ -4,7 +4,7 @@ LD := $(CROSS_COMPILE)ld LD := $(CROSS_COMPILE)ld # These are goodess that applies to all source. -C_WARNINGS := -Wpointer-arith -Wredundant-decls +C_WARNINGS := -Wredundant-decls # _no_ common code can have packed data structures or we are in touble. C_WARNINGS += -Wpacked diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/boot_of.c --- a/xen/arch/powerpc/boot_of.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/boot_of.c Fri Sep 01 13:04:02 2006 -0600 @@ -26,10 +26,14 @@ #include <xen/spinlock.h> #include <xen/serial.h> #include <xen/time.h> +#include <xen/sched.h> #include <asm/page.h> #include <asm/io.h> #include "exceptions.h" #include "of-devtree.h" + +/* Secondary processors use this for handshaking with main processor. */ +volatile unsigned int __spin_ack; static ulong of_vec; static ulong of_msr; @@ -322,17 +326,18 @@ static void __init of_test(const char *o } } -static int __init of_claim(void * virt, u32 size) +static int __init of_claim(u32 virt, u32 size, u32 align) { int rets[1] = { OF_FAILURE }; - of_call("claim", 3, 1, rets, virt, size, 0/*align*/); + of_call("claim", 3, 1, rets, virt, size, align); if (rets[0] == OF_FAILURE) { - DBG("%s 0x%p 0x%08x -> FAIL\n", __func__, virt, size); + DBG("%s 0x%08x 0x%08x 0x%08x -> FAIL\n", __func__, virt, size, align); return OF_FAILURE; } - DBG("%s 0x%p 0x%08x -> 0x%x\n", __func__, virt, size, rets[0]); + DBG("%s 0x%08x 0x%08x 0x%08x -> 0x%08x\n", __func__, virt, size, align, + rets[0]); return rets[0]; } @@ -683,32 +688,53 @@ static int boot_of_fixup_chosen(void *me } static ulong space_base; -static ulong find_space(u32 size, ulong align, multiboot_info_t *mbi) + +/* + * The following function is necessary because we cannot depend on all + * FW to actually allocate us any space, so we look for it _hoping_ + * that at least is will fail if we try to claim something that + * belongs to FW. This hope does not seem to be true on some version + * of PIBS. + */ +static ulong find_space(u32 size, u32 align, multiboot_info_t *mbi) { memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr); ulong eomem = ((u64)map->length_high << 32) | (u64)map->length_low; ulong base; - of_printf("%s base=0x%016lx eomem=0x%016lx size=0x%08x align=0x%lx\n", + if (size == 0) + return 0; + + if (align == 0) + of_panic("cannot call %s() with align of 0\n", __func__); + +#ifdef BROKEN_CLAIM_WORKAROUND + { + static int broken_claim; + if (!broken_claim) { + /* just try and claim it to the FW chosen address */ + base = of_claim(0, size, align); + if (base != OF_FAILURE) + return base; + of_printf("%s: Firmware does not allocate memory for you\n", + __func__); + broken_claim = 1; + } + } +#endif + + of_printf("%s base=0x%016lx eomem=0x%016lx size=0x%08x align=0x%x\n", __func__, space_base, eomem, size, align); base = ALIGN_UP(space_base, PAGE_SIZE); - if ((base + size) >= 0x4000000) return 0; - if (base + size > eomem) of_panic("not enough RAM\n"); - - if (size == 0) return base; - if (of_claim((void*)base, size) != OF_FAILURE) { - space_base = base + size; - return base; - } else { - for(base += 0x100000; (base+size) < 0x4000000; base += 0x100000) { - of_printf("Trying 0x%016lx\n", base); - if (of_claim((void*)base, size) != OF_FAILURE) { - space_base = base + size; - return base; - } - } - return 0; - } + + while ((base + size) < rma_size(cpu_default_rma_order_pages())) { + if (of_claim(base, size, 0) != OF_FAILURE) { + space_base = base + size; + return base; + } + base += (PAGE_SIZE > align) ? PAGE_SIZE : align; + } + of_panic("Cannot find memory in the RMA\n"); } /* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges @@ -834,9 +860,8 @@ static void boot_of_module(ulong r3, ulo static module_t mods[3]; void *oftree; ulong oftree_sz = 48 * PAGE_SIZE; - char *mod0_start; + ulong mod0_start; ulong mod0_size; - ulong mod0; static const char sepr[] = " -- "; extern char dom0_start[] __attribute__ ((weak)); extern char dom0_size[] __attribute__ ((weak)); @@ -844,59 +869,48 @@ static void boot_of_module(ulong r3, ulo if ((r3 > 0) && (r4 > 0)) { /* was it handed to us in registers ? */ - mod0_start = (void *)r3; + mod0_start = r3; mod0_size = r4; + of_printf("%s: Dom0 was loaded and found using r3/r4:" + "0x%lx[size 0x%lx]\n", + __func__, mod0_start, mod0_size); } else { /* see if it is in the boot params */ p = strstr((char *)((ulong)mbi->cmdline), "dom0_start="); if ( p != NULL) { p += 11; - mod0_start = (char *)simple_strtoul(p, NULL, 0); + mod0_start = simple_strtoul(p, NULL, 0); p = strstr((char *)((ulong)mbi->cmdline), "dom0_size="); p += 10; mod0_size = simple_strtoul(p, NULL, 0); - - of_printf("mod0: %o %c %c %c\n", - mod0_start[0], - mod0_start[1], - mod0_start[2], - mod0_start[3]); - + of_printf("%s: Dom0 was loaded and found using cmdline:" + "0x%lx[size 0x%lx]\n", + __func__, mod0_start, mod0_size); } else if ( ((ulong)dom0_start != 0) && ((ulong)dom0_size != 0) ) { /* was it linked in ? */ - mod0_start = dom0_start; + mod0_start = (ulong)dom0_start; mod0_size = (ulong)dom0_size; - of_printf("%s: linked in module copied after _end " - "(start 0x%p size 0x%lx)\n", + of_printf("%s: Dom0 is linked in: 0x%lx[size 0x%lx]\n", __func__, mod0_start, mod0_size); } else { - mod0_start = _end; + mod0_start = (ulong)_end; mod0_size = 0; - } + of_printf("%s: FYI Dom0 is unknown, will be caught later\n", + __func__); + } + } + + if (mod0_size > 0) { + const char *c = (const char *)mod0_start; + + of_printf("mod0: %o %c %c %c\n", c[0], c[1], c[2], c[3]); } space_base = (ulong)_end; - mod0 = find_space(mod0_size, PAGE_SIZE, mbi); - - /* three cases - * 1) mod0_size is not 0 and the image can be copied - * 2) mod0_size is not 0 and the image cannot be copied - * 3) mod0_size is 0 - */ - if (mod0_size > 0) { - if (mod0 != 0) { - memcpy((void *)mod0, mod0_start, mod0_size); - mods[0].mod_start = mod0; - mods[0].mod_end = mod0 + mod0_size; - } else { - of_panic("No space to copy mod0\n"); - } - } else { - mods[0].mod_start = mod0; - mods[0].mod_end = mod0; - } + mods[0].mod_start = mod0_start; + mods[0].mod_end = mod0_start + mod0_size; of_printf("%s: mod[0] @ 0x%016x[0x%x]\n", __func__, mods[0].mod_start, mods[0].mod_end); @@ -909,15 +923,22 @@ static void boot_of_module(ulong r3, ulo /* snapshot the tree */ oftree = (void*)find_space(oftree_sz, PAGE_SIZE, mbi); - if (oftree == 0) of_panic("Could not allocate OFD tree\n"); + if (oftree == 0) + of_panic("Could not allocate OFD tree\n"); of_printf("creating oftree\n"); of_test("package-to-path"); - ofd_create(oftree, oftree_sz); + oftree = ofd_create(oftree, oftree_sz); pkg_save(oftree); + + if (ofd_size(oftree) > oftree_sz) + of_panic("Could not fit all of native devtree\n"); boot_of_fixup_refs(oftree); boot_of_fixup_chosen(oftree); + + if (ofd_size(oftree) > oftree_sz) + of_panic("Could not fit all devtree fixups\n"); ofd_walk(oftree, OFD_ROOT, /* add_hype_props */ NULL, 2); @@ -937,7 +958,7 @@ static int __init boot_of_cpus(void) static int __init boot_of_cpus(void) { int cpus; - int cpu; + int cpu, bootcpu, logical; int result; u32 cpu_clock[2]; @@ -962,10 +983,68 @@ static int __init boot_of_cpus(void) cpu_khz /= 1000; of_printf("OF: clock-frequency = %ld KHz\n", cpu_khz); - /* FIXME: should not depend on the boot CPU bring the first child */ + /* Look up which CPU we are running on right now. */ + result = of_getprop(bof_chosen, "cpu", &bootcpu, sizeof (bootcpu)); + if (result == OF_FAILURE) + of_panic("Failed to look up boot cpu\n"); + cpu = of_getpeer(cpu); - while (cpu > 0) { - of_start_cpu(cpu, (ulong)spin_start, 0); + + /* We want a continuous logical cpu number space. */ + cpu_set(0, cpu_present_map); + cpu_set(0, cpu_online_map); + cpu_set(0, cpu_possible_map); + + /* Spin up all CPUS, even if there are more than NR_CPUS, because + * Open Firmware has them spinning on cache lines which will + * eventually be scrubbed, which could lead to random CPU activation. + */ + for (logical = 1; cpu > 0; logical++) { + unsigned int cpuid, ping, pong; + unsigned long now, then, timeout; + + if (cpu == bootcpu) { + of_printf("skipping boot cpu!\n"); + continue; + } + + result = of_getprop(cpu, "reg", &cpuid, sizeof(cpuid)); + if (result == OF_FAILURE) + of_panic("cpuid lookup failed\n"); + + of_printf("spinning up secondary processor #%d: ", logical); + + __spin_ack = ~0x0; + ping = __spin_ack; + pong = __spin_ack; + of_printf("ping = 0x%x: ", ping); + + mb(); + result = of_start_cpu(cpu, (ulong)spin_start, logical); + if (result == OF_FAILURE) + of_panic("start cpu failed\n"); + + /* We will give the secondary processor five seconds to reply. */ + then = mftb(); + timeout = then + (5 * timebase_freq); + + do { + now = mftb(); + if (now >= timeout) { + of_printf("BROKEN: "); + break; + } + + mb(); + pong = __spin_ack; + } while (pong == ping); + of_printf("pong = 0x%x\n", pong); + + if (pong != ping) { + cpu_set(logical, cpu_present_map); + cpu_set(logical, cpu_possible_map); + } + cpu = of_getpeer(cpu); } return 1; @@ -1013,6 +1092,7 @@ multiboot_info_t __init *boot_of_init( boot_of_rtas(); /* end of OF */ + of_printf("Quiescing Open Firmware ...\n"); of_call("quiesce", 0, 0, NULL); return &mbi; diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dart_u3.c --- a/xen/arch/powerpc/dart_u3.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/dart_u3.c Fri Sep 01 13:04:02 2006 -0600 @@ -55,10 +55,10 @@ static void u3_inv_all(void) dc.reg.dc_invtlb = 1; out_32(dart_ctl_reg, dc.dc_word); - do { - dc.dc_word = in_32(dart_ctl_reg); - r++; - } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l))); + do { + dc.dc_word = in_32(dart_ctl_reg); + r++; + } while ((dc.reg.dc_invtlb == 1) && (r < (1 << l))); if (r == (1 << l)) { if (l < 4) { diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/dom0_ops.c --- a/xen/arch/powerpc/dom0_ops.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/dom0_ops.c Fri Sep 01 13:04:02 2006 -0600 @@ -23,16 +23,20 @@ #include <xen/lib.h> #include <xen/sched.h> #include <xen/guest_access.h> +#include <xen/shadow.h> #include <public/xen.h> #include <public/domctl.h> #include <public/sysctl.h> +void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *); void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c) { memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs)); /* XXX fill in rest of vcpu_guest_context_t */ } +long arch_do_domctl(struct xen_domctl *domctl, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); long arch_do_domctl(struct xen_domctl *domctl, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { @@ -75,6 +79,19 @@ long arch_do_domctl(struct xen_domctl *d } } break; + case XEN_DOMCTL_shadow_op: + { + struct domain *d; + ret = -ESRCH; + d = find_domain_by_id(domctl->domain); + if ( d != NULL ) + { + ret = shadow_domctl(d, &domctl->u.shadow_op, u_domctl); + put_domain(d); + copy_to_guest(u_domctl, domctl, 1); + } + } + break; default: ret = -ENOSYS; @@ -84,6 +101,8 @@ long arch_do_domctl(struct xen_domctl *d return ret; } +long arch_do_sysctl(struct xen_sysctl *sysctl, + XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl); long arch_do_sysctl(struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) { @@ -109,6 +128,7 @@ long arch_do_sysctl(struct xen_sysctl *s break; default: + printk("%s: unsupported sysctl: 0x%x\n", __func__, (sysctl->cmd)); ret = -ENOSYS; break; } diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain.c --- a/xen/arch/powerpc/domain.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/domain.c Fri Sep 01 13:04:02 2006 -0600 @@ -27,6 +27,8 @@ #include <xen/domain.h> #include <xen/console.h> #include <xen/shutdown.h> +#include <xen/shadow.h> +#include <xen/mm.h> #include <asm/htab.h> #include <asm/current.h> #include <asm/hcalls.h> @@ -75,7 +77,8 @@ int arch_domain_create(struct domain *d) { unsigned long rma_base; unsigned long rma_sz; - uint htab_order; + uint rma_order_pages; + int rc; if (d->domain_id == IDLE_DOMAIN_ID) { d->shared_info = (void *)alloc_xenheap_page(); @@ -84,44 +87,31 @@ int arch_domain_create(struct domain *d) return 0; } - d->arch.rma_order = cpu_rma_order(); - rma_sz = rma_size(d->arch.rma_order); - /* allocate the real mode area */ - d->max_pages = 1UL << d->arch.rma_order; + rma_order_pages = cpu_default_rma_order_pages(); + d->max_pages = 1UL << rma_order_pages; d->tot_pages = 0; - d->arch.rma_page = alloc_domheap_pages(d, d->arch.rma_order, 0); - if (NULL == d->arch.rma_page) - return 1; + + rc = allocate_rma(d, rma_order_pages); + if (rc) + return rc; rma_base = page_to_maddr(d->arch.rma_page); - - BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */ - - printk("clearing RMO: 0x%lx[0x%lx]\n", rma_base, rma_sz); - memset((void *)rma_base, 0, rma_sz); + rma_sz = rma_size(rma_order_pages); d->shared_info = (shared_info_t *) (rma_addr(&d->arch, RMA_SHARED_INFO) + rma_base); - d->arch.large_page_sizes = 1; - d->arch.large_page_shift[0] = 24; /* 16 M for 970s */ - - /* FIXME: we need to the the maximum addressible memory for this - * domain to calculate this correctly. It should probably be set - * by the managment tools */ - htab_order = d->arch.rma_order - 6; /* (1/64) */ - if (test_bit(_DOMF_privileged, &d->domain_flags)) { - /* bump the htab size of privleged domains */ - ++htab_order; - } - htab_alloc(d, htab_order); + d->arch.large_page_sizes = cpu_large_page_orders( + d->arch.large_page_order, ARRAY_SIZE(d->arch.large_page_order)); + + INIT_LIST_HEAD(&d->arch.extent_list); return 0; } void arch_domain_destroy(struct domain *d) { - htab_free(d); + shadow_teardown(d); } void machine_halt(void) @@ -162,6 +152,16 @@ int arch_set_info_guest(struct vcpu *v, int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c) { memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs)); + + printf("Domain[%d].%d: initializing\n", + v->domain->domain_id, v->vcpu_id); + + if (v->domain->arch.htab.order == 0) + panic("Page table never allocated for Domain: %d\n", + v->domain->domain_id); + if (v->domain->arch.rma_order == 0) + panic("RMA never allocated for Domain: %d\n", + v->domain->domain_id); set_bit(_VCPUF_initialised, &v->vcpu_flags); @@ -253,17 +253,19 @@ void continue_running(struct vcpu *same) void continue_running(struct vcpu *same) { /* nothing to do */ + return; } void sync_vcpu_execstate(struct vcpu *v) { - /* XXX for now, for domain destruction, make this non-fatal */ - printf("%s: called\n", __func__); + /* do nothing */ + return; } void domain_relinquish_resources(struct domain *d) { free_domheap_pages(d->arch.rma_page, d->arch.rma_order); + free_extents(d); } void arch_dump_domain_info(struct domain *d) diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/domain_build.c --- a/xen/arch/powerpc/domain_build.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/domain_build.c Fri Sep 01 13:04:02 2006 -0600 @@ -25,6 +25,7 @@ #include <xen/init.h> #include <xen/ctype.h> #include <xen/iocap.h> +#include <xen/shadow.h> #include <xen/version.h> #include <asm/processor.h> #include <asm/papr.h> @@ -34,17 +35,21 @@ extern int loadelfimage_32(struct domain extern int loadelfimage_32(struct domain_setup_info *dsi); /* opt_dom0_mem: memory allocated to domain 0. */ -static unsigned int opt_dom0_mem; +static unsigned int dom0_nrpages; static void parse_dom0_mem(char *s) { - unsigned long long bytes = parse_size_and_unit(s); - /* If no unit is specified we default to kB units, not bytes. */ - if (isdigit(s[strlen(s)-1])) - opt_dom0_mem = (unsigned int)bytes; - else - opt_dom0_mem = (unsigned int)(bytes >> 10); + unsigned long long bytes; + + bytes = parse_size_and_unit(s); + dom0_nrpages = bytes >> PAGE_SHIFT; } custom_param("dom0_mem", parse_dom0_mem); + +static unsigned int opt_dom0_max_vcpus; +integer_param("dom0_max_vcpus", opt_dom0_max_vcpus); + +static unsigned int opt_dom0_shadow; +boolean_param("dom0_shadow", opt_dom0_shadow); int elf_sanity_check(Elf_Ehdr *ehdr) { @@ -105,11 +110,13 @@ int construct_dom0(struct domain *d, struct domain_setup_info dsi; ulong dst; u64 *ofh_tree; + uint rma_nrpages = 1 << d->arch.rma_order; ulong rma_sz = rma_size(d->arch.rma_order); ulong rma = page_to_maddr(d->arch.rma_page); start_info_t *si; ulong eomem; int am64 = 1; + int preempt = 0; ulong msr; ulong pc; ulong r2; @@ -118,13 +125,18 @@ int construct_dom0(struct domain *d, BUG_ON(d->domain_id != 0); BUG_ON(d->vcpu[0] == NULL); + if (image_len == 0) + panic("No Dom0 image supplied\n"); + cpu_init_vcpu(v); memset(&dsi, 0, sizeof(struct domain_setup_info)); dsi.image_addr = image_start; dsi.image_len = image_len; + printk("Trying Dom0 as 64bit ELF\n"); if ((rc = parseelfimage(&dsi)) != 0) { + printk("Trying Dom0 as 32bit ELF\n"); if ((rc = parseelfimage_32(&dsi)) != 0) return rc; am64 = 0; @@ -141,7 +153,33 @@ int construct_dom0(struct domain *d, /* By default DOM0 is allocated all available memory. */ d->max_pages = ~0U; - d->tot_pages = 1UL << d->arch.rma_order; + + /* default is the max(1/16th of memory, CONFIG_MIN_DOM0_PAGES) */ + if (dom0_nrpages == 0) { + dom0_nrpages = total_pages >> 4; + + if (dom0_nrpages < CONFIG_MIN_DOM0_PAGES) + dom0_nrpages = CONFIG_MIN_DOM0_PAGES; + } + + /* make sure we are at least as big as the RMA */ + if (dom0_nrpages > rma_nrpages) + dom0_nrpages = allocate_extents(d, dom0_nrpages, rma_nrpages); + + ASSERT(d->tot_pages == dom0_nrpages); + ASSERT(d->tot_pages >= rma_nrpages); + + if (opt_dom0_shadow == 0) { + /* 1/64 of memory */ + opt_dom0_shadow = (d->tot_pages >> 6) >> (20 - PAGE_SHIFT); + } + + do { + shadow_set_allocation(d, opt_dom0_shadow, &preempt); + } while (preempt); + if (shadow_get_allocation(d) == 0) + panic("shadow allocation failed 0x%x < 0x%x\n", + shadow_get_allocation(d), opt_dom0_shadow); ASSERT( image_len < rma_sz ); @@ -156,10 +194,6 @@ int construct_dom0(struct domain *d, printk("shared_info: 0x%lx,%p\n", si->shared_info, d->shared_info); eomem = si->shared_info; - - /* allow dom0 to access all of system RAM */ - d->arch.logical_base_pfn = 128 << (20 - PAGE_SHIFT); /* 128 MB */ - d->arch.logical_end_pfn = max_page; /* number of pages accessible */ si->nr_pages = rma_sz >> PAGE_SHIFT; @@ -265,7 +299,7 @@ int construct_dom0(struct domain *d, printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2); - ofd_dom0_fixup(d, *ofh_tree + rma, si, dst - rma); + ofd_dom0_fixup(d, *ofh_tree + rma, si); set_bit(_VCPUF_initialised, &v->vcpu_flags); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.c --- a/xen/arch/powerpc/exceptions.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/exceptions.c Fri Sep 01 13:04:02 2006 -0600 @@ -82,6 +82,8 @@ void program_exception(struct cpu_user_r show_registers(regs); printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr()); printk("hid4 0x%016lx\n", regs->hid4); + printk("---[ backtrace ]---\n"); + show_backtrace(regs->gprs[1], regs->lr, regs->pc); panic("%s: 0x%lx\n", __func__, cookie); #endif /* CRASH_DEBUG */ } diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/exceptions.h --- a/xen/arch/powerpc/exceptions.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/exceptions.h Fri Sep 01 13:04:02 2006 -0600 @@ -51,7 +51,4 @@ extern char exception_vectors[]; extern char exception_vectors[]; extern char exception_vectors_end[]; extern int spin_start[]; -extern int firmware_image_start[0]; -extern int firmware_image_size[0]; - #endif diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/external.c --- a/xen/arch/powerpc/external.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/external.c Fri Sep 01 13:04:02 2006 -0600 @@ -175,8 +175,7 @@ void init_IRQ(void) void ack_APIC_irq(void) { - printk("%s: EOI the whole MPIC?\n", __func__); - for (;;); + panic("%s: EOI the whole MPIC?\n", __func__); } void ack_bad_irq(unsigned int irq) diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/iommu.c --- a/xen/arch/powerpc/iommu.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/iommu.c Fri Sep 01 13:04:02 2006 -0600 @@ -52,17 +52,14 @@ int iommu_put(u32 buid, ulong ioba, unio pfn = tce.tce_bits.tce_rpn; mfn = pfn2mfn(d, pfn, &mtype); - if (mtype != 0) { - panic("we don't do non-RMO memory yet\n"); + if (mfn > 0) { +#ifdef DEBUG + printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__, + ioba, pfn, mfn); +#endif + tce.tce_bits.tce_rpn = mfn; + return iommu_phbs[buid].iommu_put(ioba, tce); } - -#ifdef DEBUG - printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__, - ioba, pfn, mfn); -#endif - tce.tce_bits.tce_rpn = mfn; - - return iommu_phbs[buid].iommu_put(ioba, tce); } return -1; } diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mm.c --- a/xen/arch/powerpc/mm.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/mm.c Fri Sep 01 13:04:02 2006 -0600 @@ -13,9 +13,10 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ #include <xen/config.h> @@ -23,9 +24,18 @@ #include <xen/shadow.h> #include <xen/kernel.h> #include <xen/sched.h> +#include <xen/perfc.h> #include <asm/misc.h> #include <asm/init.h> #include <asm/page.h> + +#ifdef VERBOSE +#define MEM_LOG(_f, _a...) \ + printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \ + current->domain->domain_id , __LINE__ , ## _a ) +#else +#define MEM_LOG(_f, _a...) ((void)0) +#endif /* Frame table and its size in pages. */ struct page_info *frame_table; @@ -53,16 +63,128 @@ int steal_page(struct domain *d, struct return 1; } - -int get_page_type(struct page_info *page, u32 type) -{ - panic("%s called\n", __func__); - return 1; -} - void put_page_type(struct page_info *page) { - panic("%s called\n", __func__); + unsigned long nx, x, y = page->u.inuse.type_info; + + do { + x = y; + nx = x - 1; + + ASSERT((x & PGT_count_mask) != 0); + + /* + * The page should always be validated while a reference is held. The + * exception is during domain destruction, when we forcibly invalidate + * page-table pages if we detect a referential loop. + * See domain.c:relinquish_list(). + */ + ASSERT((x & PGT_validated) || + test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags)); + + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + /* Record TLB information for flush later. */ + page->tlbflush_timestamp = tlbflush_current_time(); + } + else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == + (PGT_pinned | 1)) ) + { + /* Page is now only pinned. Make the back pointer mutable again. */ + nx |= PGT_va_mutable; + } + } + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); +} + + +int get_page_type(struct page_info *page, unsigned long type) +{ + unsigned long nx, x, y = page->u.inuse.type_info; + + again: + do { + x = y; + nx = x + 1; + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); + return 0; + } + else if ( unlikely((x & PGT_count_mask) == 0) ) + { + if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) + { + if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) + { + /* + * On type change we check to flush stale TLB + * entries. This may be unnecessary (e.g., page + * was GDT/LDT) but those circumstances should be + * very rare. + */ + cpumask_t mask = + page_get_owner(page)->domain_dirty_cpumask; + tlbflush_filter(mask, page->tlbflush_timestamp); + + if ( unlikely(!cpus_empty(mask)) ) + { + perfc_incrc(need_flush_tlb_flush); + flush_tlb_mask(mask); + } + } + + /* We lose existing type, back pointer, and validity. */ + nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); + nx |= type; + + /* No special validation needed for writable pages. */ + /* Page tables and GDT/LDT need to be scanned for validity. */ + if ( type == PGT_writable_page ) + nx |= PGT_validated; + } + } + else + { + if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) + { + if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) + { + return 0; + } + else if ( (x & PGT_va_mask) == PGT_va_mutable ) + { + /* The va backpointer is mutable, hence we update it. */ + nx &= ~PGT_va_mask; + nx |= type; /* we know the actual type is correct */ + } + else if ( (type & PGT_va_mask) != PGT_va_mutable ) + { + ASSERT((type & PGT_va_mask) != (x & PGT_va_mask)); + + /* This table is possibly mapped at multiple locations. */ + nx &= ~PGT_va_mask; + nx |= PGT_va_unknown; + } + } + if ( unlikely(!(x & PGT_validated)) ) + { + /* Someone else is updating validation of this page. Wait... */ + while ( (y = page->u.inuse.type_info) == x ) + cpu_relax(); + goto again; + } + } + } + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); + + if ( unlikely(!(nx & PGT_validated)) ) + { + /* Noone else is updating simultaneously. */ + __set_bit(_PGT_validated, &page->u.inuse.type_info); + } + + return 1; } void __init init_frametable(void) @@ -107,44 +229,148 @@ extern void copy_page(void *dp, void *sp } } +static int mfn_in_hole(ulong mfn) +{ + /* totally cheating */ + if (mfn >= (0xf0000000UL >> PAGE_SHIFT) && + mfn < (((1UL << 32) - 1) >> PAGE_SHIFT)) + return 1; + + return 0; +} + +static uint add_extent(struct domain *d, struct page_info *pg, uint order) +{ + struct page_extents *pe; + + pe = xmalloc(struct page_extents); + if (pe == NULL) + return 0; + + pe->pg = pg; + pe->order = order; + pe->pfn = page_to_mfn(pg); + + list_add_tail(&pe->pe_list, &d->arch.extent_list); + + return pe->pfn; +} + +void free_extents(struct domain *d) +{ + /* we just need to free the memory behind list */ + struct list_head *list; + struct list_head *ent; + struct list_head *next; + + list = &d->arch.extent_list; + ent = list->next; + + while (ent != list) { + next = ent->next; + xfree(ent); + ent = next; + } +} + +uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages) +{ + uint ext_order; + uint ext_nrpages; + uint total_nrpages; + struct page_info *pg; + + ext_order = cpu_extent_order(); + ext_nrpages = 1 << ext_order; + + total_nrpages = rma_nrpages; + + /* We only allocate in nr_extsz chunks so if you are not divisible + * you get more than you asked for */ + while (total_nrpages < nrpages) { + pg = alloc_domheap_pages(d, ext_order, 0); + if (pg == NULL) + return total_nrpages; + + if (add_extent(d, pg, ext_order) == 0) { + free_domheap_pages(pg, ext_order); + return total_nrpages; + } + total_nrpages += ext_nrpages; + } + + return total_nrpages; +} + +int allocate_rma(struct domain *d, unsigned int order_pages) +{ + ulong rma_base; + ulong rma_sz = rma_size(order_pages); + + d->arch.rma_page = alloc_domheap_pages(d, order_pages, 0); + if (d->arch.rma_page == NULL) { + DPRINTK("Could not allocate order_pages=%d RMA for domain %u\n", + order_pages, d->domain_id); + return -ENOMEM; + } + d->arch.rma_order = order_pages; + + rma_base = page_to_maddr(d->arch.rma_page); + BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */ + + /* XXX */ + printk("clearing RMA: 0x%lx[0x%lx]\n", rma_base, rma_sz); + memset((void *)rma_base, 0, rma_sz); + + return 0; +} + ulong pfn2mfn(struct domain *d, long pfn, int *type) { ulong rma_base_mfn = page_to_mfn(d->arch.rma_page); ulong rma_size_mfn = 1UL << d->arch.rma_order; - ulong mfn; - int t; + struct page_extents *pe; if (pfn < rma_size_mfn) { - mfn = pfn + rma_base_mfn; - t = PFN_TYPE_RMA; - } else if (pfn >= d->arch.logical_base_pfn && - pfn < d->arch.logical_end_pfn) { - if (test_bit(_DOMF_privileged, &d->domain_flags)) { - /* This hack allows dom0 to map all memory, necessary to - * initialize domU state. */ - mfn = pfn; - } else { - panic("we do not handle the logical area yet\n"); - mfn = 0; - } - - t = PFN_TYPE_LOGICAL; - } else { - /* don't know */ - mfn = pfn; - t = PFN_TYPE_IO; - } - - if (type != NULL) - *type = t; - - return mfn; + if (type) + *type = PFN_TYPE_RMA; + return pfn + rma_base_mfn; + } + + if (test_bit(_DOMF_privileged, &d->domain_flags) && + mfn_in_hole(pfn)) { + if (type) + *type = PFN_TYPE_IO; + return pfn; + } + + /* quick tests first */ + list_for_each_entry (pe, &d->arch.extent_list, pe_list) { + uint end_pfn = pe->pfn + (1 << pe->order); + + if (pfn >= pe->pfn && pfn < end_pfn) { + if (type) + *type = PFN_TYPE_LOGICAL; + return page_to_mfn(pe->pg) + (pfn - pe->pfn); + } + } + + /* This hack allows dom0 to map all memory, necessary to + * initialize domU state. */ + if (test_bit(_DOMF_privileged, &d->domain_flags)) { + if (type) + *type = PFN_TYPE_REMOTE; + return pfn; + } + + BUG(); + return 0; } void guest_physmap_add_page( struct domain *d, unsigned long gpfn, unsigned long mfn) { - panic("%s\n", __func__); + printk("%s(%d, 0x%lx, 0x%lx)\n", __func__, d->domain_id, gpfn, mfn); } void guest_physmap_remove_page( struct domain *d, unsigned long gpfn, unsigned long mfn) diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/mpic.c --- a/xen/arch/powerpc/mpic.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/mpic.c Fri Sep 01 13:04:02 2006 -0600 @@ -498,10 +498,10 @@ static void mpic_enable_irq(unsigned int #ifdef CONFIG_MPIC_BROKEN_U3 if (mpic->flags & MPIC_BROKEN_U3) { - unsigned int src = irq - mpic->irq_offset; - if (mpic_is_ht_interrupt(mpic, src) && + unsigned int bsrc = irq - mpic->irq_offset; + if (mpic_is_ht_interrupt(mpic, bsrc) && (irq_desc[irq].status & IRQ_LEVEL)) - mpic_ht_end_irq(mpic, src); + mpic_ht_end_irq(mpic, bsrc); } #endif /* CONFIG_MPIC_BROKEN_U3 */ } diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup.c --- a/xen/arch/powerpc/ofd_fixup.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/ofd_fixup.c Fri Sep 01 13:04:02 2006 -0600 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ @@ -24,6 +24,7 @@ #include <xen/version.h> #include <public/xen.h> #include "of-devtree.h" +#include "oftree.h" #undef RTAS @@ -316,91 +317,6 @@ static ofdn_t ofd_rtas_props(void *m) } #endif -struct mem_reg { - u64 addr; - u64 sz; -}; - -static ofdn_t ofd_memory_chunk_create(void *m, ofdn_t p, - const char *ppath, - const char *name, - const char *dt, - ulong start, ulong size) -{ - struct mem_reg reg; - char path[128]; - ulong l; - u32 v; - ofdn_t n; - ulong nl = strlen(name) + 1; - ulong dtl = strlen(dt) + 1; - - l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start); - n = ofd_node_add(m, p, path, l + 1); - ofd_prop_add(m, n, "name", name, nl); - - v = 1; - ofd_prop_add(m, n, "#address-cells", &v, sizeof (v)); - v = 0; - ofd_prop_add(m, n, "#size-cells", &v, sizeof (v)); - - ofd_prop_add(m, n, "device_type", dt, dtl); - - /* physical addresses usable without regard to OF */ - reg.addr = start; - reg.sz = size; - ofd_prop_add(m, n, "reg", ®, sizeof (reg)); - - return n; -} - -static ofdn_t ofd_memory_props(void *m, struct domain *d, ulong eoload) -{ - ofdn_t n = -1; - ulong start = 0; - static char name[] = "memory"; - ulong mem_size = rma_size(d->arch.rma_order); - ulong chunk_size = rma_size(d->arch.rma_order); - - /* Remove all old memory props */ - do { - ofdn_t old; - - old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type", - name, sizeof(name)); - if (old <= 0) break; - - ofd_node_prune(m, old); - } while (1); - - while (start < mem_size) { - ulong size = (mem_size < chunk_size) ? mem_size : chunk_size; - - n = ofd_memory_chunk_create(m, OFD_ROOT, "", "memory", "memory", - start, size); - - if (start == 0) { - /* We are processing the first and RMA chunk */ - - /* free list of physical addresses available after OF and - * client program have been accounted for */ - struct mem_reg avail[] = { - /* 0 til OF @ 32MiB - 16KiB stack */ - { .addr = 0, .sz = ((32 << 20) - (16 << 10)) }, - /* end of loaded material to the end the chunk - 1 page */ - { .addr = eoload, .sz = chunk_size - eoload - PAGE_SIZE }, - /* the last page is reserved for xen_start_info */ - }; - ofd_prop_add(m, n, "available", &avail, - sizeof (avail)); - } - - start += size; - mem_size -= size; - } - return n; -} - static ofdn_t ofd_xen_props(void *m, struct domain *d, start_info_t *si) { ofdn_t n; @@ -440,9 +356,8 @@ static ofdn_t ofd_xen_props(void *m, str } return n; } -extern int ofd_dom0_fixup( - struct domain *d, ulong oftree, start_info_t *si, ulong dst); -int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si, ulong eoload) + +int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si) { void *m; const ofdn_t n = OFD_ROOT; @@ -470,8 +385,8 @@ int ofd_dom0_fixup(struct domain *d, ulo printk("Add /chosen props\n"); ofd_chosen_props(m, (char *)si->cmd_line); - printk("fix /memory@0 props\n"); - ofd_memory_props(m, d, eoload); + printk("fix /memory props\n"); + ofd_memory_props(m, d); printk("fix /xen props\n"); ofd_xen_props(m, d, si); @@ -497,8 +412,8 @@ int ofd_dom0_fixup(struct domain *d, ulo r = ofd_prop_add(m, n, "ibm,partition-no", &did, sizeof(did)); ASSERT( r > 0 ); - const char dom0[] = "dom0"; - r = ofd_prop_add(m, n, "ibm,partition-name", dom0, sizeof (dom0)); + const char d0[] = "dom0"; + r = ofd_prop_add(m, n, "ibm,partition-name", d0, sizeof (d0)); ASSERT( r > 0 ); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/oftree.h --- a/xen/arch/powerpc/oftree.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/oftree.h Fri Sep 01 13:04:02 2006 -0600 @@ -20,14 +20,18 @@ #ifndef _OFTREE_H #define _OFTREE_H +#include <xen/multiboot.h> extern ulong oftree; extern ulong oftree_len; +extern ulong oftree_end; -extern int ofd_dom0_fixup( - struct domain *d, ulong oftree, start_info_t *si, ulong dst); +extern int ofd_dom0_fixup(struct domain *d, ulong mem, start_info_t *si); +extern void ofd_memory_props(void *m, struct domain *d); extern int firmware_image_start[0]; extern int firmware_image_size[0]; +extern void memory_init(module_t *mod, int mcount); + #endif /* #ifndef _OFTREE_H */ diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/tce.c --- a/xen/arch/powerpc/papr/tce.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/papr/tce.c Fri Sep 01 13:04:02 2006 -0600 @@ -47,7 +47,7 @@ static void h_put_tce(struct cpu_user_re regs->gprs[3] = H_Success; } } - + static void h_get_tce(struct cpu_user_regs *regs) { u32 liobn = regs->gprs[4]; @@ -57,7 +57,7 @@ static void h_get_tce(struct cpu_user_re printk("%s: liobn: 0x%x ioba: 0x%lx \n", __func__, liobn, ioba); #endif regs->gprs[3] = H_Function; - for(;;) ; + BUG(); } static void h_stuff_tce(struct cpu_user_regs *regs) @@ -76,7 +76,7 @@ static void h_stuff_tce(struct cpu_user_ count); #endif regs->gprs[3] = H_Function; - for(;;); + BUG(); } __init_papr_hcall(H_PUT_TCE, h_put_tce); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/papr/xlate.c --- a/xen/arch/powerpc/papr/xlate.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/papr/xlate.c Fri Sep 01 13:04:02 2006 -0600 @@ -30,12 +30,6 @@ #include <asm/papr.h> #include <asm/hcalls.h> -static void not_yet(struct cpu_user_regs *regs) -{ - printk("not implemented yet: 0x%lx\n", regs->gprs[3]); - for (;;); -} - #ifdef USE_PTE_INSERT static inline void pte_insert(union pte volatile *pte, ulong vsid, ulong rpn, ulong lrpn) @@ -160,13 +154,13 @@ static void h_enter(struct cpu_user_regs } /* get correct pgshift value */ - pgshift = d->arch.large_page_shift[lp_size]; + pgshift = d->arch.large_page_order[lp_size] + PAGE_SHIFT; } /* get the correct logical RPN in terms of 4K pages need to mask * off lp bits and unused arpn bits if this is a large page */ - lpn = ~0ULL << (pgshift - 12); + lpn = ~0ULL << (pgshift - PAGE_SHIFT); lpn = pte.bits.rpn & lpn; rpn = pfn2mfn(d, lpn, &mtype); @@ -493,8 +487,42 @@ static void h_remove(struct cpu_user_reg pte_tlbie(&lpte, ptex); } +static void h_read(struct cpu_user_regs *regs) +{ + ulong flags = regs->gprs[4]; + ulong ptex = regs->gprs[5]; + struct vcpu *v = get_current(); + struct domain *d = v->domain; + struct domain_htab *htab = &d->arch.htab; + union pte volatile *pte; + + if (flags & H_READ_4) + ptex &= ~0x3UL; + + if (ptex > (1UL << htab->log_num_ptes)) { + regs->gprs[3] = H_Parameter; + printk("%s: bad ptex: 0x%lx\n", __func__, ptex); + return; + } + pte = &htab->map[ptex]; + regs->gprs[4] = pte[0].words.vsid; + regs->gprs[5] = pte[0].words.rpn; + + if (!(flags & H_READ_4)) { + /* dump another 3 PTEs */ + regs->gprs[6] = pte[1].words.vsid; + regs->gprs[7] = pte[1].words.rpn; + regs->gprs[8] = pte[2].words.vsid; + regs->gprs[9] = pte[2].words.rpn; + regs->gprs[10] = pte[3].words.vsid; + regs->gprs[11] = pte[3].words.rpn; + } + + regs->gprs[3] = H_Success; +} + __init_papr_hcall(H_ENTER, h_enter); -__init_papr_hcall(H_READ, not_yet); +__init_papr_hcall(H_READ, h_read); __init_papr_hcall(H_REMOVE, h_remove); __init_papr_hcall(H_CLEAR_MOD, h_clear_mod); __init_papr_hcall(H_CLEAR_REF, h_clear_ref); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/exceptions.S --- a/xen/arch/powerpc/powerpc64/exceptions.S Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/powerpc64/exceptions.S Fri Sep 01 13:04:02 2006 -0600 @@ -514,6 +514,43 @@ _GLOBAL(sleep) mtmsrd r3 blr +/* The primary processor issues a firmware call to spin us up at this + * address, passing our CPU number in r3. We only need a function + * entry point instead of a descriptor since this is never called from + * C code. + */ .globl spin_start spin_start: + /* Write our processor number as an acknowledgment that we're alive. */ + LOADADDR(r14, __spin_ack) + stw r3, 0(r14) + sync + /* If NR_CPUS is too small, we should just spin forever. */ + LOADADDR(r15, NR_CPUS) + cmpd r3, r15 + blt 2f b . + /* Find our index in the array of processor_area struct pointers. */ +2: LOADADDR(r14, global_cpu_table) + muli r15, r3, 8 + add r14, r14, r15 + /* Spin until the pointer for our processor goes valid. */ +1: ld r15, 0(r14) + cmpldi r15, 0 + beq 1b + /* Dereference the pointer and load our stack pointer. */ + isync + ld r1, PAREA_stack(r15) + li r14, STACK_FRAME_OVERHEAD + sub r1, r1, r14 + /* Load up the TOC and entry point for the C function to be called. */ + LOADADDR(r14, secondary_cpu_init) + ld r2, 8(r14) + ld r11, 0(r14) + mtctr r11 + /* Warning: why do we need this synchronizing instruction on 970FX? */ + isync + /* Jump into C code now. */ + bctrl + nop + b . diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/powerpc64/ppc970.c --- a/xen/arch/powerpc/powerpc64/ppc970.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/powerpc64/ppc970.c Fri Sep 01 13:04:02 2006 -0600 @@ -13,9 +13,10 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> */ #include <xen/config.h> @@ -31,25 +32,68 @@ #undef SERIALIZE -unsigned int cpu_rma_order(void) +extern volatile struct processor_area * volatile global_cpu_table[]; + +struct rma_settings { + int order; + int rmlr0; + int rmlr12; +}; + +static struct rma_settings rma_orders[] = { + { .order = 26, .rmlr0 = 0, .rmlr12 = 3, }, /* 64 MB */ + { .order = 27, .rmlr0 = 1, .rmlr12 = 3, }, /* 128 MB */ + { .order = 28, .rmlr0 = 1, .rmlr12 = 0, }, /* 256 MB */ + { .order = 30, .rmlr0 = 0, .rmlr12 = 2, }, /* 1 GB */ + { .order = 34, .rmlr0 = 0, .rmlr12 = 1, }, /* 16 GB */ + { .order = 38, .rmlr0 = 0, .rmlr12 = 0, }, /* 256 GB */ +}; + +static uint log_large_page_sizes[] = { + 4 + 20, /* (1 << 4) == 16M */ +}; + +static struct rma_settings *cpu_find_rma(unsigned int order) { - /* XXX what about non-HV mode? */ - uint rma_log_size = 6 + 20; /* 64M */ - return rma_log_size - PAGE_SHIFT; + int i; + for (i = 0; i < ARRAY_SIZE(rma_orders); i++) { + if (rma_orders[i].order == order) + return &rma_orders[i]; + } + return NULL; } -void cpu_initialize(void) +unsigned int cpu_default_rma_order_pages(void) { - ulong stack; + return rma_orders[0].order - PAGE_SHIFT; +} - parea = xmalloc(struct processor_area); +unsigned int cpu_large_page_orders(uint *sizes, uint max) +{ + uint i = 0; + + while (i < max && i < ARRAY_SIZE(log_large_page_sizes)) { + sizes[i] = log_large_page_sizes[i] - PAGE_SHIFT; + ++i; + } + + return i; +} + +unsigned int cpu_extent_order(void) +{ + return log_large_page_sizes[0] - PAGE_SHIFT; +} + +void cpu_initialize(int cpuid) +{ + ulong r1, r2; + __asm__ __volatile__ ("mr %0, 1" : "=r" (r1)); + __asm__ __volatile__ ("mr %0, 2" : "=r" (r2)); + + /* This is SMP safe because the compiler must use r13 for it. */ + parea = global_cpu_table[cpuid]; ASSERT(parea != NULL); - - stack = (ulong)alloc_xenheap_pages(STACK_ORDER); - - ASSERT(stack != 0); - parea->hyp_stack_base = (void *)(stack + STACK_SIZE); - printk("stack is here: %p\n", parea->hyp_stack_base); mthsprg0((ulong)parea); /* now ready for exceptions */ @@ -79,7 +123,10 @@ void cpu_initialize(void) s |= 1UL << (63-3); /* ser-gp */ hid0.word |= s; #endif - printk("hid0: 0x%016lx\n", hid0.word); + + printk("CPU #%d: Hello World! SP = %lx TOC = %lx HID0 = %lx\n", + smp_processor_id(), r1, r2, hid0.word); + mthid0(hid0.word); union hid1 hid1; @@ -115,45 +162,22 @@ void cpu_init_vcpu(struct vcpu *v) { struct domain *d = v->domain; union hid4 hid4; - ulong rma_base = page_to_maddr(d->arch.rma_page); - ulong rma_size = rma_size(d->arch.rma_order); + struct rma_settings *rma_settings; hid4.word = mfhid4(); hid4.bits.lpes0 = 0; /* exceptions set MSR_HV=1 */ hid4.bits.lpes1 = 1; /* RMA applies */ - hid4.bits.rmor = rma_base >> 26; + hid4.bits.rmor = page_to_maddr(d->arch.rma_page) >> 26; hid4.bits.lpid01 = d->domain_id & 3; hid4.bits.lpid25 = (d->domain_id >> 2) & 0xf; - switch (rma_size) { - case 256ULL << 30: /* 256 GB */ - hid4.bits.rmlr0 = 0; - hid4.bits.rmlr12 = 0; - break; - case 16ULL << 30: /* 16 GB */ - hid4.bits.rmlr0 = 0; - hid4.bits.rmlr12 = 1; - break; - case 1ULL << 30: /* 1 GB */ - hid4.bits.rmlr0 = 0; - hid4.bits.rmlr12 = 2; - break; - case 64ULL << 20: /* 64 MB */ - hid4.bits.rmlr0 = 0; - hid4.bits.rmlr12 = 3; - break; - case 256ULL << 20: /* 256 MB */ - hid4.bits.rmlr0 = 1; - hid4.bits.rmlr12 = 0; - break; - case 128ULL << 20: /* 128 MB */ - hid4.bits.rmlr0 = 1; - hid4.bits.rmlr12 = 3; - break; - } + rma_settings = cpu_find_rma(d->arch.rma_order + PAGE_SHIFT); + ASSERT(rma_settings != NULL); + hid4.bits.rmlr0 = rma_settings->rmlr0; + hid4.bits.rmlr12 = rma_settings->rmlr12; v->arch.cpu.hid4.word = hid4.word; } diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/setup.c --- a/xen/arch/powerpc/setup.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/setup.c Fri Sep 01 13:04:02 2006 -0600 @@ -43,9 +43,9 @@ #include <asm/percpu.h> #include "exceptions.h" #include "of-devtree.h" +#include "oftree.h" #define DEBUG -unsigned long xenheap_phys_end; /* opt_noht: If true, Hyperthreading is ignored. */ int opt_noht = 0; @@ -53,6 +53,14 @@ boolean_param("noht", opt_noht); int opt_earlygdb = 0; boolean_param("earlygdb", opt_earlygdb); + +/* opt_nosmp: If true, secondary processors are ignored. */ +static int opt_nosmp = 0; +boolean_param("nosmp", opt_nosmp); + +/* maxcpus: maximum number of CPUs to activate. */ +static unsigned int max_cpus = NR_CPUS; +integer_param("maxcpus", max_cpus); u32 tlbflush_clock = 1U; DEFINE_PER_CPU(u32, tlbflush_time); @@ -61,9 +69,12 @@ unsigned long wait_init_idle; unsigned long wait_init_idle; ulong oftree; ulong oftree_len; +ulong oftree_end; cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; cpumask_t cpu_online_map; /* missing ifdef in schedule.c */ +cpumask_t cpu_present_map; +cpumask_t cpu_possible_map; /* XXX get this from ISA node in device tree */ ulong isa_io_base; @@ -74,6 +85,8 @@ extern void idle_loop(void); /* move us to a header file */ extern void initialize_keytable(void); + +volatile struct processor_area * volatile global_cpu_table[NR_CPUS]; int is_kernel_text(unsigned long addr) { @@ -169,6 +182,21 @@ static void __init start_of_day(void) percpu_free_unused_areas(); + { + /* FIXME: Xen assumes that an online CPU is a schedualable + * CPU, but we just are not there yet. Remove this fragment when + * scheduling processors actually works. */ + int cpuid; + + printk("WARNING!: Taking all secondary CPUs offline\n"); + + for_each_online_cpu(cpuid) { + if (cpuid == 0) + continue; + cpu_clear(cpuid, cpu_online_map); + } + } + initialize_keytable(); /* Register another key that will allow for the the Harware Probe * to be contacted, this works with RiscWatch probes and should @@ -193,17 +221,60 @@ void startup_cpu_idle_loop(void) reset_stack_and_jump(idle_loop); } +static void init_parea(int cpuid) +{ + /* Be careful not to shadow the global variable. */ + volatile struct processor_area *pa; + void *stack; + + pa = xmalloc(struct processor_area); + if (pa == NULL) + panic("%s: failed to allocate parea for cpu #%d\n", __func__, cpuid); + + stack = alloc_xenheap_pages(STACK_ORDER); + if (stack == NULL) + panic("%s: failed to allocate stack (order %d) for cpu #%d\n", + __func__, STACK_ORDER, cpuid); + + pa->whoami = cpuid; + pa->hyp_stack_base = (void *)((ulong)stack + STACK_SIZE); + + /* This store has the effect of invoking secondary_cpu_init. */ + global_cpu_table[cpuid] = pa; + mb(); +} + +static int kick_secondary_cpus(int maxcpus) +{ + int cpuid; + + for_each_present_cpu(cpuid) { + if (cpuid == 0) + continue; + if (cpuid >= maxcpus) + break; + init_parea(cpuid); + cpu_set(cpuid, cpu_online_map); + cpu_set(cpuid, cpu_possible_map); + } + + return 0; +} + +/* This is the first C code that secondary processors invoke. */ +int secondary_cpu_init(int cpuid, unsigned long r4); +int secondary_cpu_init(int cpuid, unsigned long r4) +{ + cpu_initialize(cpuid); + while(1); +} + static void __init __start_xen(multiboot_info_t *mbi) { char *cmdline; module_t *mod = (module_t *)((ulong)mbi->mods_addr); - ulong heap_start; - ulong modules_start, modules_size; - ulong eomem = 0; - ulong heap_size = 0; - ulong bytes = 0; - ulong freemem = (ulong)_end; - ulong oftree_end; + ulong dom0_start, dom0_len; + ulong initrd_start, initrd_len; memcpy(0, exception_vectors, exception_vectors_end - exception_vectors); synchronize_caches(0, exception_vectors_end - exception_vectors); @@ -226,6 +297,9 @@ static void __init __start_xen(multiboot console_start_sync(); #endif + /* we give the first RMA to the hypervisor */ + xenheap_phys_end = rma_size(cpu_default_rma_order_pages()); + /* Check that we have at least one Multiboot module. */ if (!(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0)) { panic("FATAL ERROR: Require at least one Multiboot module.\n"); @@ -234,10 +308,6 @@ static void __init __start_xen(multiboot if (!(mbi->flags & MBI_MEMMAP)) { panic("FATAL ERROR: Bootloader provided no memory information.\n"); } - - /* mark the begining of images */ - modules_start = mod[0].mod_start; - modules_size = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; /* OF dev tree is the last module */ oftree = mod[mbi->mods_count-1].mod_start; @@ -249,71 +319,7 @@ static void __init __start_xen(multiboot mod[mbi->mods_count-1].mod_end = 0; --mbi->mods_count; - printk("Physical RAM map:\n"); - - /* lets find out how much memory there is */ - while (bytes < mbi->mmap_length) { - u64 end; - u64 addr; - u64 size; - - memory_map_t *map = (memory_map_t *)((ulong)mbi->mmap_addr + bytes); - addr = ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low; - size = ((u64)map->length_high << 32) | (u64)map->length_low; - end = addr + size; - - printk(" %016lx - %016lx (usable)\n", addr, end); - - if (addr > eomem) { - printk("found a hole skipping remainder of memory at:\n" - " %016lx and beyond\n", addr); - break; - } - if (end > eomem) { - eomem = end; - } - bytes += map->size + 4; - } - - printk("System RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10); - - /* top of memory */ - max_page = PFN_DOWN(ALIGN_DOWN(eomem, PAGE_SIZE)); - total_pages = max_page; - - /* Architecturally the first 4 pages are exception hendlers, we - * will also be copying down some code there */ - heap_start = init_boot_allocator(4 << PAGE_SHIFT); - - /* we give the first RMA to the hypervisor */ - xenheap_phys_end = rma_size(cpu_rma_order()); - - /* allow everything else to be allocated */ - init_boot_pages(xenheap_phys_end, eomem); - init_frametable(); - end_boot_allocator(); - - /* Add memory between the beginning of the heap and the beginning - * of out text */ - init_xenheap_pages(heap_start, (ulong)_start); - - /* move the modules to just after _end */ - if (modules_start) { - printk("modules at: %016lx - %016lx\n", modules_start, - modules_start + modules_size); - freemem = ALIGN_UP(freemem, PAGE_SIZE); - memmove((void *)freemem, (void *)modules_start, modules_size); - - oftree -= modules_start - freemem; - modules_start = freemem; - freemem += modules_size; - printk(" moved to: %016lx - %016lx\n", modules_start, - modules_start + modules_size); - } - - /* the rest of the xenheap, starting at the end of modules */ - init_xenheap_pages(freemem, xenheap_phys_end); - + memory_init(mod, mbi->mods_count); #ifdef OF_DEBUG printk("ofdump:\n"); @@ -321,13 +327,10 @@ static void __init __start_xen(multiboot ofd_walk((void *)oftree, OFD_ROOT, ofd_dump_props, OFD_DUMP_ALL); #endif - heap_size = xenheap_phys_end - heap_start; - - printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10); - percpu_init_areas(); - cpu_initialize(); + init_parea(0); + cpu_initialize(0); #ifdef CONFIG_GDB initialise_gdb(); @@ -335,6 +338,14 @@ static void __init __start_xen(multiboot debugger_trap_immediate(); #endif + /* Deal with secondary processors. */ + if (opt_nosmp) { + printk("nosmp: leaving secondary processors spinning forever\n"); + } else { + printk("spinning up at most %d total processors ...\n", max_cpus); + kick_secondary_cpus(max_cpus); + } + start_of_day(); /* Create initial domain 0. */ @@ -353,22 +364,26 @@ static void __init __start_xen(multiboot /* Scrub RAM that is still free and so may go to an unprivileged domain. */ scrub_heap_pages(); - /* - * We're going to setup domain0 using the module(s) that we - * stashed safely above our heap. The second module, if present, - * is an initrd ramdisk. The last module is the OF devtree. - */ - if (construct_dom0(dom0, - modules_start, - mod[0].mod_end-mod[0].mod_start, - (mbi->mods_count == 1) ? 0 : - modules_start + - (mod[1].mod_start-mod[0].mod_start), - (mbi->mods_count == 1) ? 0 : - mod[mbi->mods_count-1].mod_end - mod[1].mod_start, + dom0_start = mod[0].mod_start; + dom0_len = mod[0].mod_end - mod[0].mod_start; + if (mbi->mods_count > 1) { + initrd_start = mod[1].mod_start; + initrd_len = mod[1].mod_end - mod[1].mod_start; + } else { + initrd_start = 0; + initrd_len = 0; + } + if (construct_dom0(dom0, dom0_start, dom0_len, + initrd_start, initrd_len, cmdline) != 0) { panic("Could not set up DOM0 guest OS\n"); } + + init_xenheap_pages(ALIGN_UP(dom0_start, PAGE_SIZE), + ALIGN_DOWN(dom0_start + dom0_len, PAGE_SIZE)); + if (initrd_start) + init_xenheap_pages(ALIGN_UP(initrd_start, PAGE_SIZE), + ALIGN_DOWN(initrd_start + initrd_len, PAGE_SIZE)); init_trace_bufs(); @@ -407,6 +422,8 @@ void arch_get_xen_caps(xen_capabilities_ void arch_get_xen_caps(xen_capabilities_info_t info) { } + + /* * Local variables: diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/xen.lds.S --- a/xen/arch/powerpc/xen.lds.S Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/powerpc/xen.lds.S Fri Sep 01 13:04:02 2006 -0600 @@ -10,11 +10,15 @@ SEARCH_DIR("=/usr/local/lib64"); SEARCH_ SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib"); /* Do we need any of these for elf? __DYNAMIC = 0; */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS; +} SECTIONS { /* Read-only sections, merged into text segment: */ PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + SIZEOF_HEADERS; - .interp : { *(.interp) } + .interp : { *(.interp) } :text .hash : { *(.hash) } .dynsym : { *(.dynsym) } .dynstr : { *(.dynstr) } @@ -103,7 +107,7 @@ SECTIONS PROVIDE (__fini_array_end = .); .data : { - *(.data .data.* .gnu.linkonce.d.*) + *(.data .gnu.linkonce.d.*) SORT(CONSTRUCTORS) } @@ -121,7 +125,7 @@ SECTIONS __inithcall_end = .; __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } :text + .data.percpu : { *(.data.percpu) } __per_cpu_data_end = .; . = __per_cpu_start + (NR_CPUS << PERCPU_SHIFT); . = ALIGN(STACK_SIZE); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/hvm/io.c Fri Sep 01 13:04:02 2006 -0600 @@ -646,9 +646,13 @@ static void hvm_mmio_assist(struct cpu_u break; case INSTR_BT: - index = operand_index(src); - value = get_reg_value(size, index, 0, regs); - + if ( src & REGISTER ) + { + index = operand_index(src); + value = get_reg_value(size, index, 0, regs); + } + else if ( src & IMMEDIATE ) + value = mmio_opp->immediate; if (p->u.data & (1 << (value & ((1 << 5) - 1)))) regs->eflags |= X86_EFLAGS_CF; else diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/platform.c --- a/xen/arch/x86/hvm/platform.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/hvm/platform.c Fri Sep 01 13:04:02 2006 -0600 @@ -652,6 +652,23 @@ static int hvm_decode(int realmode, unsi instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); return DECODE_success; + case 0xBA: + if (((opcode[1] >> 3) & 7) == 4) /* BT $imm8, m16/32/64 */ + { + instr->instr = INSTR_BT; + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); + instr->immediate = + (signed char)get_immediate(realmode, opcode+1, BYTE); + instr->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE); + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); + return DECODE_success; + } + else + { + printf("0f %x, This opcode subtype isn't handled yet\n", *opcode); + return DECODE_failure; + } + default: printf("0f %x, This opcode isn't handled yet\n", *opcode); return DECODE_failure; @@ -1002,10 +1019,17 @@ void handle_mmio(unsigned long va, unsig mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */ mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */ - index = operand_index(mmio_inst.operand[0]); - size = operand_size(mmio_inst.operand[0]); - value = get_reg_value(size, index, 0, regs); - + if ( mmio_inst.operand[0] & REGISTER ) + { + index = operand_index(mmio_inst.operand[0]); + size = operand_size(mmio_inst.operand[0]); + value = get_reg_value(size, index, 0, regs); + } + else if ( mmio_inst.operand[0] & IMMEDIATE ) + { + mmio_opp->immediate = mmio_inst.immediate; + value = mmio_inst.immediate; + } send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1, mmio_inst.op_size, 0, IOREQ_READ, 0); break; diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/hvm/svm/intr.c Fri Sep 01 13:04:02 2006 -0600 @@ -79,22 +79,22 @@ asmlinkage void svm_intr_assist(void) ASSERT(vmcb); /* Check if an Injection is active */ - /* Previous Interrupt delivery caused this Intercept? */ - if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) { - v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector; + /* Previous Interrupt delivery caused this Intercept? */ + if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) { + v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector; // printk("Injecting PF#: saving IRQ from ExitInfo\n"); - vmcb->exitintinfo.bytes = 0; - re_injecting = 1; - } + vmcb->exitintinfo.bytes = 0; + re_injecting = 1; + } /* Guest's interrputs masked? */ rflags = vmcb->rflags; if (irq_masked(rflags)) { HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags); - /* bail out, we won't be injecting an interrupt this time */ - return; + /* bail out, we won't be injecting an interrupt this time */ + return; } - + /* Previous interrupt still pending? */ if (vmcb->vintr.fields.irq) { // printk("Re-injecting IRQ from Vintr\n"); @@ -115,27 +115,24 @@ asmlinkage void svm_intr_assist(void) if ( v->vcpu_id == 0 ) hvm_pic_assist(v); + + if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { + pic_set_irq(pic, pt->irq, 0); + pic_set_irq(pic, pt->irq, 1); + } + callback_irq = v->domain->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ]; - - /* Before we deal with PIT interrupts, let's check for - interrupts set by the device model or paravirtualised event - channel interrupts. - */ - if ( cpu_has_pending_irq(v) ) { - intr_vector = cpu_get_interrupt(v, &intr_type); - } - else if ( callback_irq != 0 && local_events_need_delivery() ) { + if ( callback_irq != 0 && + local_events_need_delivery() ) { /*inject para-device call back irq*/ v->vcpu_info->evtchn_upcall_mask = 1; pic_set_irq(pic, callback_irq, 0); pic_set_irq(pic, callback_irq, 1); - intr_vector = callback_irq; } - else if ( (v->vcpu_id == 0) && pt->enabled && pt->pending_intr_nr ) { - pic_set_irq(pic, pt->irq, 0); - pic_set_irq(pic, pt->irq, 1); + + if ( cpu_has_pending_irq(v) ) intr_vector = cpu_get_interrupt(v, &intr_type); - } + } /* have we got an interrupt to inject? */ diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Fri Sep 01 13:04:02 2006 -0600 @@ -243,6 +243,7 @@ static void svm_store_cpu_guest_regs( { /* Returning the guest's regs */ crs[0] = v->arch.hvm_svm.cpu_shadow_cr0; + crs[2] = v->arch.hvm_svm.cpu_cr2; crs[3] = v->arch.hvm_svm.cpu_cr3; crs[4] = v->arch.hvm_svm.cpu_shadow_cr4; } @@ -2793,9 +2794,7 @@ asmlinkage void svm_vmexit_handler(struc break; case VMEXIT_INTR: - raise_softirq(SCHEDULE_SOFTIRQ); - break; - + break; case VMEXIT_INVD: svm_vmexit_do_invd(vmcb); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/hvm/vlapic.c Fri Sep 01 13:04:02 2006 -0600 @@ -919,6 +919,20 @@ int cpu_has_apic_interrupt(struct vcpu* return 0; } +/* check to see if there is pending interrupt */ +int cpu_has_pending_irq(struct vcpu *v) +{ + struct hvm_domain *plat = &v->domain->arch.hvm_domain; + + /* APIC */ + if ( cpu_has_apic_interrupt(v) ) return 1; + + /* PIC */ + if ( !vlapic_accept_pic_intr(v) ) return 0; + + return plat->interrupt_request; +} + void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) { struct vlapic *vlapic = VLAPIC(v); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/io.c Fri Sep 01 13:04:02 2006 -0600 @@ -68,19 +68,6 @@ static inline int is_interruptibility_st return interruptibility; } -/* check to see if there is pending interrupt */ -int cpu_has_pending_irq(struct vcpu *v) -{ - struct hvm_domain *plat = &v->domain->arch.hvm_domain; - - /* APIC */ - if ( cpu_has_apic_interrupt(v) ) return 1; - - /* PIC */ - if ( !vlapic_accept_pic_intr(v) ) return 0; - - return plat->interrupt_request; -} asmlinkage void vmx_intr_assist(void) { diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Sep 01 13:04:02 2006 -0600 @@ -46,6 +46,8 @@ #include <asm/hvm/vpic.h> #include <asm/hvm/vlapic.h> +extern uint32_t vlapic_update_ppr(struct vlapic *vlapic); + static DEFINE_PER_CPU(unsigned long, trace_values[5]); #define TRACE_VMEXIT(index,value) this_cpu(trace_values)[index]=value @@ -518,6 +520,7 @@ static void vmx_store_cpu_guest_regs( if ( crs != NULL ) { __vmread(CR0_READ_SHADOW, &crs[0]); + crs[2] = v->arch.hvm_vmx.cpu_cr2; __vmread(GUEST_CR3, &crs[3]); __vmread(CR4_READ_SHADOW, &crs[4]); } @@ -953,8 +956,6 @@ static void vmx_vmexit_do_cpuid(struct c bitmaskof(X86_FEATURE_MWAIT) ); edx &= ~( bitmaskof(X86_FEATURE_HT) | - bitmaskof(X86_FEATURE_MCA) | - bitmaskof(X86_FEATURE_MCE) | bitmaskof(X86_FEATURE_ACPI) | bitmaskof(X86_FEATURE_ACC) ); } @@ -1615,6 +1616,7 @@ static int mov_to_cr(int gp, int cr, str unsigned long value; unsigned long old_cr; struct vcpu *v = current; + struct vlapic *vlapic = VLAPIC(v); switch ( gp ) { CASE_GET_REG(EAX, eax); @@ -1758,6 +1760,12 @@ static int mov_to_cr(int gp, int cr, str shadow_update_paging_modes(v); break; } + case 8: + { + vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); + vlapic_update_ppr(vlapic); + break; + } default: printk("invalid cr: %d\n", gp); __hvm_bug(regs); @@ -1771,13 +1779,20 @@ static int mov_to_cr(int gp, int cr, str */ static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) { - unsigned long value; + unsigned long value = 0; struct vcpu *v = current; - - if ( cr != 3 ) + struct vlapic *vlapic = VLAPIC(v); + + if ( cr != 3 && cr != 8) __hvm_bug(regs); - value = (unsigned long) v->arch.hvm_vmx.cpu_cr3; + if ( cr == 3 ) + value = (unsigned long) v->arch.hvm_vmx.cpu_cr3; + else if ( cr == 8 ) + { + value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI); + value = (value & 0xF0) >> 4; + } switch ( gp ) { CASE_SET_REG(EAX, eax); @@ -1888,7 +1903,7 @@ static inline void vmx_do_msr_read(struc } rdmsr_safe(regs->ecx, regs->eax, regs->edx); - break; + return; } regs->eax = msr_content & 0xFFFFFFFF; diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/mm/shadow/multi.c Fri Sep 01 13:04:02 2006 -0600 @@ -2861,11 +2861,11 @@ static int sh_page_fault(struct vcpu *v, // bunch of 4K maps. // + shadow_lock(d); + SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", v->domain->domain_id, v->vcpu_id, va, regs->error_code); - shadow_lock(d); - shadow_audit_tables(v); if ( guest_walk_tables(v, va, &gw, 1) != 0 ) @@ -3291,12 +3291,6 @@ sh_update_linear_entries(struct vcpu *v) { ml3e = __linear_l3_table; l3mfn = _mfn(l4e_get_pfn(__linear_l4_table[0])); -#if GUEST_PAGING_LEVELS == 2 - /* Shadow l3 tables are made up by update_cr3 */ - sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; -#else - sl3e = v->arch.shadow_vtable; -#endif } else { @@ -3306,13 +3300,15 @@ sh_update_linear_entries(struct vcpu *v) l3mfn = _mfn(l4e_get_pfn(ml4e[0])); ml3e = sh_map_domain_page(l3mfn); sh_unmap_domain_page(ml4e); + } + #if GUEST_PAGING_LEVELS == 2 - /* Shadow l3 tables are made up by update_cr3 */ - sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; + /* Shadow l3 tables are made up by update_cr3 */ + sl3e = v->arch.hvm_vcpu.hvm_lowmem_l3tab; #else - sl3e = sh_map_domain_page(pagetable_get_mfn(v->arch.shadow_table)); -#endif - } + /* Always safe to use shadow_vtable, because it's globally mapped */ + sl3e = v->arch.shadow_vtable; +#endif for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) { @@ -3324,12 +3320,7 @@ sh_update_linear_entries(struct vcpu *v) } if ( v != current ) - { sh_unmap_domain_page(ml3e); -#if GUEST_PAGING_LEVELS != 2 - sh_unmap_domain_page(sl3e); -#endif - } } #elif CONFIG_PAGING_LEVELS == 3 @@ -3361,31 +3352,10 @@ sh_update_linear_entries(struct vcpu *v) #else /* GUEST_PAGING_LEVELS == 3 */ - /* Use local vcpu's mappings if we can; otherwise make new mappings */ - if ( v == current ) - { - shadow_l3e = v->arch.shadow_vtable; - if ( !shadow_mode_external(d) ) - guest_l3e = v->arch.guest_vtable; - } - else - { - mfn_t smfn; - int idx; - - /* Map the shadow l3 */ - smfn = pagetable_get_mfn(v->arch.shadow_table); - idx = shadow_l3_index(&smfn, guest_index(v->arch.shadow_vtable)); - shadow_l3e = sh_map_domain_page(smfn); - shadow_l3e += idx; - if ( !shadow_mode_external(d) ) - { - /* Also the guest l3 */ - mfn_t gmfn = pagetable_get_mfn(v->arch.guest_table); - guest_l3e = sh_map_domain_page(gmfn); - guest_l3e += guest_index(v->arch.guest_vtable); - } - } + /* Always safe to use *_vtable, because they're globally mapped */ + shadow_l3e = v->arch.shadow_vtable; + guest_l3e = v->arch.guest_vtable; + #endif /* GUEST_PAGING_LEVELS */ /* Choose where to write the entries, using linear maps if possible */ @@ -3443,14 +3413,6 @@ sh_update_linear_entries(struct vcpu *v) if ( v != current || !shadow_mode_external(d) ) sh_unmap_domain_page(l2e); -#if GUEST_PAGING_LEVELS == 3 - if ( v != current) - { - sh_unmap_domain_page(shadow_l3e); - if ( !shadow_mode_external(d) ) - sh_unmap_domain_page(guest_l3e); - } -#endif } #elif CONFIG_PAGING_LEVELS == 2 @@ -3601,7 +3563,7 @@ sh_detach_old_tables(struct vcpu *v) v->arch.shadow_vtable ) { // Q: why does this need to use (un)map_domain_page_*global* ? - // + /* A: so sh_update_linear_entries can operate on other vcpus */ sh_unmap_domain_page_global(v->arch.shadow_vtable); v->arch.shadow_vtable = NULL; } diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/physdev.c Fri Sep 01 13:04:02 2006 -0600 @@ -96,10 +96,11 @@ long do_physdev_op(int cmd, XEN_GUEST_HA if ( !IS_PRIV(current->domain) ) break; + irq = irq_op.irq; ret = -EINVAL; - if ( (irq = irq_op.irq) >= NR_IRQS ) + if ( (irq < 0) || (irq >= NR_IRQS) ) break; - + irq_op.vector = assign_irq_vector(irq); ret = copy_to_guest(arg, &irq_op, 1) ? -EFAULT : 0; break; diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/time.c --- a/xen/arch/x86/time.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/time.c Fri Sep 01 13:04:02 2006 -0600 @@ -676,7 +676,7 @@ static inline void __update_vcpu_system_ struct vcpu_time_info *u; t = &this_cpu(cpu_time); - u = &v->domain->shared_info->vcpu_info[v->vcpu_id].time; + u = &v->vcpu_info->time; version_update_begin(&u->version); @@ -690,7 +690,7 @@ static inline void __update_vcpu_system_ void update_vcpu_system_time(struct vcpu *v) { - if ( v->domain->shared_info->vcpu_info[v->vcpu_id].time.tsc_timestamp != + if ( v->vcpu_info->time.tsc_timestamp != this_cpu(cpu_time).local_tsc_stamp ) __update_vcpu_system_time(v); } diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/traps.c Fri Sep 01 13:04:02 2006 -0600 @@ -339,7 +339,6 @@ asmlinkage void fatal_trap(int trapnr, s asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs) { int cpu = smp_processor_id(); - unsigned long cr2; static char *trapstr[] = { "divide error", "debug", "nmi", "bkpt", "overflow", "bounds", "invalid opcode", "device not available", "double fault", @@ -356,7 +355,7 @@ asmlinkage void fatal_trap(int trapnr, s if ( trapnr == TRAP_page_fault ) { - __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : ); + unsigned long cr2 = read_cr2(); printk("Faulting linear address: %p\n", _p(cr2)); show_page_walk(cr2); } @@ -911,7 +910,7 @@ asmlinkage int do_page_fault(struct cpu_ ASSERT(!in_irq()); - __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : ); + addr = read_cr2(); DEBUGGER_trap_entry(TRAP_page_fault, regs); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/x86_32/traps.c Fri Sep 01 13:04:02 2006 -0600 @@ -21,11 +21,28 @@ /* All CPUs have their own IDT to allow int80 direct trap. */ idt_entry_t *idt_tables[NR_CPUS] __read_mostly; +static void print_xen_info(void) +{ + char taint_str[TAINT_STRING_MAX_LEN]; + char debug = 'n', *arch = "x86_32"; + +#ifndef NDEBUG + debug = 'y'; +#endif + +#ifdef CONFIG_X86_PAE + arch = "x86_32p"; +#endif + + printk("----[ Xen-%d.%d%s %s debug=%c %s ]----\n", + xen_major_version(), xen_minor_version(), xen_extra_version(), + arch, debug, print_tainted(taint_str)); +} + void show_registers(struct cpu_user_regs *regs) { struct cpu_user_regs fault_regs = *regs; unsigned long fault_crs[8]; - char taint_str[TAINT_STRING_MAX_LEN]; const char *context; if ( hvm_guest(current) && guest_mode(regs) ) @@ -35,25 +52,29 @@ void show_registers(struct cpu_user_regs } else { - context = guest_mode(regs) ? "guest" : "hypervisor"; - if ( !guest_mode(regs) ) { + context = "hypervisor"; fault_regs.esp = (unsigned long)®s->esp; fault_regs.ss = read_segment_register(ss); fault_regs.ds = read_segment_register(ds); fault_regs.es = read_segment_register(es); fault_regs.fs = read_segment_register(fs); fault_regs.gs = read_segment_register(gs); + fault_crs[2] = read_cr2(); + } + else + { + context = "guest"; + fault_crs[2] = current->vcpu_info->arch.cr2; } fault_crs[0] = read_cr0(); fault_crs[3] = read_cr3(); - } - - printk("----[ Xen-%d.%d%s %s ]----\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + fault_crs[4] = read_cr4(); + } + + print_xen_info(); printk("CPU: %d\nEIP: %04x:[<%08x>]", smp_processor_id(), fault_regs.cs, fault_regs.eip); if ( !guest_mode(regs) ) @@ -63,7 +84,8 @@ void show_registers(struct cpu_user_regs fault_regs.eax, fault_regs.ebx, fault_regs.ecx, fault_regs.edx); printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n", fault_regs.esi, fault_regs.edi, fault_regs.ebp, fault_regs.esp); - printk("cr0: %08lx cr3: %08lx\n", fault_crs[0], fault_crs[3]); + printk("cr0: %08lx cr4: %08lx cr3: %08lx cr2: %08lx\n", + fault_crs[0], fault_crs[4], fault_crs[3], fault_crs[2]); printk("ds: %04x es: %04x fs: %04x gs: %04x " "ss: %04x cs: %04x\n", fault_regs.ds, fault_regs.es, fault_regs.fs, @@ -125,7 +147,6 @@ asmlinkage void do_double_fault(void) { struct tss_struct *tss = &doublefault_tss; unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1; - char taint_str[TAINT_STRING_MAX_LEN]; watchdog_disable(); @@ -133,9 +154,8 @@ asmlinkage void do_double_fault(void) /* Find information saved during fault and dump it to the console. */ tss = &init_tss[cpu]; - printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + printk("*** DOUBLE FAULT ***\n"); + print_xen_info(); printk("CPU: %d\nEIP: %04x:[<%08x>]", cpu, tss->cs, tss->eip); print_symbol(" %s\n", tss->eip); diff -r 4ba098226429 -r 1bab7d65171b xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/arch/x86/x86_64/traps.c Fri Sep 01 13:04:02 2006 -0600 @@ -21,11 +21,24 @@ #include <public/callback.h> +static void print_xen_info(void) +{ + char taint_str[TAINT_STRING_MAX_LEN]; + char debug = 'n'; + +#ifndef NDEBUG + debug = 'y'; +#endif + + printk("----[ Xen-%d.%d%s x86_64 debug=%c %s ]----\n", + xen_major_version(), xen_minor_version(), xen_extra_version(), + debug, print_tainted(taint_str)); +} + void show_registers(struct cpu_user_regs *regs) { struct cpu_user_regs fault_regs = *regs; unsigned long fault_crs[8]; - char taint_str[TAINT_STRING_MAX_LEN]; const char *context; if ( hvm_guest(current) && guest_mode(regs) ) @@ -35,18 +48,27 @@ void show_registers(struct cpu_user_regs } else { - context = guest_mode(regs) ? "guest" : "hypervisor"; + if ( guest_mode(regs) ) + { + context = "guest"; + fault_crs[2] = current->vcpu_info->arch.cr2; + } + else + { + context = "hypervisor"; + fault_crs[2] = read_cr2(); + } + fault_crs[0] = read_cr0(); fault_crs[3] = read_cr3(); + fault_crs[4] = read_cr4(); fault_regs.ds = read_segment_register(ds); fault_regs.es = read_segment_register(es); fault_regs.fs = read_segment_register(fs); fault_regs.gs = read_segment_register(gs); } - printk("----[ Xen-%d.%d%s %s ]----\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + print_xen_info(); printk("CPU: %d\nRIP: %04x:[<%016lx>]", smp_processor_id(), fault_regs.cs, fault_regs.rip); if ( !guest_mode(regs) ) @@ -62,8 +84,9 @@ void show_registers(struct cpu_user_regs fault_regs.r9, fault_regs.r10, fault_regs.r11); printk("r12: %016lx r13: %016lx r14: %016lx\n", fault_regs.r12, fault_regs.r13, fault_regs.r14); - printk("r15: %016lx cr0: %016lx cr3: %016lx\n", - fault_regs.r15, fault_crs[0], fault_crs[3]); + printk("r15: %016lx cr0: %016lx cr4: %016lx\n", + fault_regs.r15, fault_crs[0], fault_crs[4]); + printk("cr3: %016lx cr2: %016lx\n", fault_crs[3], fault_crs[2]); printk("ds: %04x es: %04x fs: %04x gs: %04x " "ss: %04x cs: %04x\n", fault_regs.ds, fault_regs.es, fault_regs.fs, @@ -121,7 +144,6 @@ asmlinkage void do_double_fault(struct c asmlinkage void do_double_fault(struct cpu_user_regs *regs) { unsigned int cpu, tr; - char taint_str[TAINT_STRING_MAX_LEN]; asm ( "str %0" : "=r" (tr) ); cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2; @@ -131,9 +153,8 @@ asmlinkage void do_double_fault(struct c console_force_unlock(); /* Find information saved during fault and dump it to the console. */ - printk("*** DOUBLE FAULT: Xen-%d.%d%s %s\n", - xen_major_version(), xen_minor_version(), xen_extra_version(), - print_tainted(taint_str)); + printk("*** DOUBLE FAULT ***\n"); + print_xen_info(); printk("CPU: %d\nRIP: %04x:[<%016lx>]", cpu, regs->cs, regs->rip); print_symbol(" %s", regs->rip); diff -r 4ba098226429 -r 1bab7d65171b xen/common/perfc.c --- a/xen/common/perfc.c Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/common/perfc.c Fri Sep 01 13:04:02 2006 -0600 @@ -136,8 +136,8 @@ static xen_sysctl_perfc_val_t *perfc_val static xen_sysctl_perfc_val_t *perfc_vals; static int perfc_nbr_vals; static int perfc_init = 0; -static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc, - XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val) +static int perfc_copy_info(XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc, + XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val) { unsigned int i, j; unsigned int v = 0; diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-ia64/mm.h Fri Sep 01 13:04:02 2006 -0600 @@ -451,7 +451,6 @@ extern u64 translate_domain_pte(u64 ptev #define INVALID_M2P_ENTRY (~0UL) #define VALID_M2P(_e) (!((_e) & (1UL<<63))) -#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e)) #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/config.h --- a/xen/include/asm-powerpc/config.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/config.h Fri Sep 01 13:04:02 2006 -0600 @@ -47,11 +47,13 @@ extern char __bss_start[]; /* this should be per processor, but for now */ #define CACHE_LINE_SIZE 128 +/* 256M - 64M of Xen space seems like a nice number */ +#define CONFIG_MIN_DOM0_PAGES (192 << (20 - PAGE_SHIFT)) #define CONFIG_SHADOW 1 #define CONFIG_GDB 1 #define CONFIG_SMP 1 #define CONFIG_PCI 1 -#define NR_CPUS 1 +#define NR_CPUS 16 #ifndef ELFSIZE #define ELFSIZE 64 diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/current.h --- a/xen/include/asm-powerpc/current.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/current.h Fri Sep 01 13:04:02 2006 -0600 @@ -27,7 +27,7 @@ struct vcpu; -register struct processor_area *parea asm("r13"); +register volatile struct processor_area *parea asm("r13"); static inline struct vcpu *get_current(void) { @@ -66,7 +66,7 @@ static inline struct cpu_user_regs *gues static inline void reset_stack_and_jump(void (*f)(void)) { - void _reset_stack_and_jump(void (*f)(void), struct cpu_user_regs *regs); + void _reset_stack_and_jump(void (*)(void), struct cpu_user_regs *); struct cpu_user_regs *regs = guest_cpu_user_regs(); #ifdef TRACK_RESUME diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/domain.h --- a/xen/include/asm-powerpc/domain.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/domain.h Fri Sep 01 13:04:02 2006 -0600 @@ -38,15 +38,14 @@ struct arch_domain { struct page_info *rma_page; uint rma_order; - /* This is regular memory, only available thru translataion */ - ulong logical_base_pfn; - ulong logical_end_pfn; + /* list of extents beyond RMA */ + struct list_head extent_list; /* I/O-port access bitmap mask. */ u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */ uint large_page_sizes; - char large_page_shift[4]; + uint large_page_order[4]; } __cacheline_aligned; struct slb_entry { diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/htab.h --- a/xen/include/asm-powerpc/htab.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/htab.h Fri Sep 01 13:04:02 2006 -0600 @@ -133,8 +133,4 @@ struct domain_htab { union pte *map; /* access the htab like an array */ ulong *shadow; /* idx -> logical translation array */ }; - -struct domain; -extern void htab_alloc(struct domain *d, uint order); -extern void htab_free(struct domain *d); #endif diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/mm.h --- a/xen/include/asm-powerpc/mm.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/mm.h Fri Sep 01 13:04:02 2006 -0600 @@ -24,6 +24,7 @@ #include <public/xen.h> #include <xen/list.h> #include <xen/types.h> +#include <xen/mm.h> #include <asm/misc.h> #include <asm/system.h> #include <asm/flushtlb.h> @@ -33,7 +34,6 @@ #define memguard_unguard_range(_p,_l) ((void)0) extern unsigned long xenheap_phys_end; -#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end) /* * Per-page-frame information. @@ -43,7 +43,6 @@ extern unsigned long xenheap_phys_end; * 2. Provide a PFN_ORDER() macro for accessing the order of a free page. */ #define PFN_ORDER(_pfn) ((_pfn)->u.free.order) -#define PRtype_info "016lx" /* XXX copy-and-paste job; re-examine me */ struct page_info @@ -63,7 +62,7 @@ struct page_info /* Page is in use: ((count_info & PGC_count_mask) != 0). */ struct { /* Owner of this page (NULL if page is anonymous). */ - struct domain *_domain; + u32 _domain; /* Type reference count and various PGT_xxx flags and fields. */ unsigned long type_info; } inuse; @@ -80,80 +79,132 @@ struct page_info }; +struct page_extents { + /* Each frame can be threaded onto a doubly-linked list. */ + struct list_head pe_list; + + /* page extent */ + struct page_info *pg; + uint order; + ulong pfn; +}; + /* The following page types are MUTUALLY EXCLUSIVE. */ #define PGT_none (0<<29) /* no special uses of this page */ -#define PGT_l1_page_table (1<<29) /* using this page as an L1 page table? */ -#define PGT_l2_page_table (2<<29) /* using this page as an L2 page table? */ -#define PGT_l3_page_table (3<<29) /* using this page as an L3 page table? */ -#define PGT_l4_page_table (4<<29) /* using this page as an L4 page table? */ -#define PGT_gdt_page (5<<29) /* using this page in a GDT? */ -#define PGT_ldt_page (6<<29) /* using this page in an LDT? */ +#define PGT_RMA (1<<29) /* This page is an RMA page? */ #define PGT_writable_page (7<<29) /* has writable mappings of this page? */ #define PGT_type_mask (7<<29) /* Bits 29-31. */ + + /* Owning guest has pinned this page to its current type? */ +#define _PGT_pinned 28 +#define PGT_pinned (1U<<_PGT_pinned) /* Has this page been validated for use as its current type? */ -#define _PGT_validated 28 +#define _PGT_validated 27 #define PGT_validated (1U<<_PGT_validated) - /* Owning guest has pinned this page to its current type? */ -#define _PGT_pinned 27 -#define PGT_pinned (1U<<_PGT_pinned) - /* The 10 most significant bits of virt address if this is a page table. */ -#define PGT_va_shift 17 -#define PGT_va_mask (((1U<<10)-1)<<PGT_va_shift) + + /* The 27 most significant bits of virt address if this is a page table. */ +#define PGT_va_shift 32 +#define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift) /* Is the back pointer still mutable (i.e. not fixed yet)? */ -#define PGT_va_mutable (((1U<<10)-1)<<PGT_va_shift) +#define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift) /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */ -#define PGT_va_unknown (((1U<<10)-2)<<PGT_va_shift) - /* 17-bit count of uses of this frame as its current type. */ -#define PGT_count_mask ((1U<<17)-1) +#define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift) + + /* 16-bit count of uses of this frame as its current type. */ +#define PGT_count_mask ((1U<<16)-1) /* Cleared when the owning guest 'frees' this page. */ #define _PGC_allocated 31 #define PGC_allocated (1U<<_PGC_allocated) - /* 31-bit count of references to this frame. */ -#define PGC_count_mask ((1U<<31)-1) + /* Set on a *guest* page to mark it out-of-sync with its shadow */ +#define _PGC_out_of_sync 30 +#define PGC_out_of_sync (1U<<_PGC_out_of_sync) + /* Set when is using a page as a page table */ +#define _PGC_page_table 29 +#define PGC_page_table (1U<<_PGC_page_table) + /* 29-bit count of references to this frame. */ +#define PGC_count_mask ((1U<<29)-1) + +#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end) + +static inline struct domain *unpickle_domptr(u32 _domain) +{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); } + +static inline u32 pickle_domptr(struct domain *domain) +{ return (domain == NULL) ? 0 : (u32)__pa(domain); } + +#define PRtype_info "016lx"/* should only be used for printk's */ + +#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) +#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) + +extern struct page_info *frame_table; +extern unsigned long max_page; +extern unsigned long total_pages; +void init_frametable(void); static inline void put_page(struct page_info *page) { -#if 0 - int count; - - count = atomic_dec_return(&page->count_info); - - if ( unlikely((count & PGC_count_mask) == 0) ) + u32 nx, x, y = page->count_info; + + do { + x = y; + nx = x - 1; + } + while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) ); + + if ( unlikely((nx & PGC_count_mask) == 0) ) { + panic("about to free page\n"); free_domheap_page(page); -#else - trap(); -#endif + } } static inline int get_page(struct page_info *page, struct domain *domain) { -#if 0 - int count; - - count = atomic_inc_return(&page->count_info); - - if (((count & PGC_count_mask) == 0) || /* Count overflow? */ - ((count & PGC_count_mask) == 1) || /* Wasn't allocated? */ - ((page->domain != domain))) /* Wrong owner? */ - { - atomic_dec(&page->count_info); - return 0; - } - -#else - trap(); -#endif + u32 x, nx, y = page->count_info; + u32 d, nd = page->u.inuse._domain; + u32 _domain = pickle_domptr(domain); + + do { + x = y; + nx = x + 1; + d = nd; + if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */ + unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */ + unlikely(d != _domain) ) /* Wrong owner? */ + { + return 0; + } + y = cmpxchg(&page->count_info, x, nx); + } + while ( unlikely(y != x) ); + return 1; +} + +extern void put_page_type(struct page_info *page); +extern int get_page_type(struct page_info *page, unsigned long type); + +static inline void put_page_and_type(struct page_info *page) +{ + put_page_type(page); + put_page(page); } static inline int get_page_and_type(struct page_info *page, struct domain *domain, - u32 type) -{ - trap(); - return 1; + unsigned long type) +{ + int rc = get_page(page, domain); + + if ( likely(rc) && unlikely(!get_page_type(page, type)) ) + { + put_page(page); + rc = 0; + } + + return rc; } static inline int page_is_removable(struct page_info *page) @@ -161,16 +212,9 @@ static inline int page_is_removable(stru return ((page->count_info & PGC_count_mask) == 1); } -int get_page_type(struct page_info *page, u32 type); - #define set_machinetophys(_mfn, _pfn) (trap(), 0) extern void synchronise_pagetables(unsigned long cpu_mask); - -static inline void put_page_and_type(struct page_info *page) -{ - trap(); -} /* XXX don't know what this is for */ typedef struct { @@ -179,17 +223,10 @@ typedef struct { } vm_assist_info_t; extern vm_assist_info_t vm_assist_info[]; -#define page_get_owner(_p) ((_p)->u.inuse._domain) -#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = _d) - #define share_xen_page_with_guest(p, d, r) do { } while (0) #define share_xen_page_with_privileged_guests(p, r) do { } while (0) -extern struct page_info *frame_table; extern unsigned long frame_table_size; -extern unsigned long max_page; -extern unsigned long total_pages; -void init_frametable(void); /* hope that accesses to this will fail spectacularly */ #define machine_to_phys_mapping ((u32 *)-1UL) @@ -199,12 +236,12 @@ extern int update_grant_va_mapping(unsig struct domain *, struct vcpu *); -extern void put_page_type(struct page_info *page); - -#define PFN_TYPE_RMA 0 -#define PFN_TYPE_LOGICAL 1 -#define PFN_TYPE_IO 2 -extern ulong pfn2mfn(struct domain *d, long mfn, int *type); +#define PFN_TYPE_RMA 1 +#define PFN_TYPE_LOGICAL 2 +#define PFN_TYPE_IO 3 +#define PFN_TYPE_REMOTE 4 + +extern ulong pfn2mfn(struct domain *d, long pfn, int *type); /* Arch-specific portion of memory_op hypercall. */ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg); @@ -221,6 +258,10 @@ static inline unsigned long gmfn_to_mfn( #define mfn_to_gmfn(_d, mfn) (mfn) +extern int allocate_rma(struct domain *d, unsigned int order_pages); +extern uint allocate_extents(struct domain *d, uint nrpages, uint rma_nrpages); +extern void free_extents(struct domain *d); + extern int steal_page(struct domain *d, struct page_info *page, unsigned int memflags); diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/powerpc64/procarea.h --- a/xen/include/asm-powerpc/powerpc64/procarea.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/powerpc64/procarea.h Fri Sep 01 13:04:02 2006 -0600 @@ -28,6 +28,7 @@ struct gdb_state; struct processor_area { + unsigned int whoami; struct vcpu *cur_vcpu; void *hyp_stack_base; ulong saved_regs[2]; diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/processor.h --- a/xen/include/asm-powerpc/processor.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/processor.h Fri Sep 01 13:04:02 2006 -0600 @@ -39,8 +39,11 @@ struct cpu_user_regs; struct cpu_user_regs; extern void show_registers(struct cpu_user_regs *); extern void show_execution_state(struct cpu_user_regs *); -extern unsigned int cpu_rma_order(void); -extern void cpu_initialize(void); +extern void show_backtrace(ulong sp, ulong lr, ulong pc); +extern unsigned int cpu_extent_order(void); +extern unsigned int cpu_default_rma_order_pages(void); +extern uint cpu_large_page_orders(uint *sizes, uint max); +extern void cpu_initialize(int cpuid); extern void cpu_init_vcpu(struct vcpu *); extern void save_cpu_sprs(struct vcpu *); extern void load_cpu_sprs(struct vcpu *); diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/shadow.h --- a/xen/include/asm-powerpc/shadow.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/shadow.h Fri Sep 01 13:04:02 2006 -0600 @@ -13,7 +13,7 @@ * along with this program; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * Copyright (C) IBM Corp. 2005 + * Copyright (C) IBM Corp. 2005, 2006 * * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> */ @@ -55,4 +55,18 @@ static inline void mark_dirty(struct dom { return; } +#define gnttab_mark_dirty(d, f) mark_dirty((d), (f)) + +extern int shadow_domctl(struct domain *d, + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); +extern unsigned int shadow_teardown(struct domain *d); +extern unsigned int shadow_set_allocation( + struct domain *d, unsigned int megabytes, int *preempted); + +/* Return the size of the shadow pool, rounded up to the nearest MB */ +static inline unsigned int shadow_get_allocation(struct domain *d) +{ + return (1ULL << (d->arch.htab.order + PAGE_SHIFT)) >> 20; +} #endif diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/smp.h --- a/xen/include/asm-powerpc/smp.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/smp.h Fri Sep 01 13:04:02 2006 -0600 @@ -28,8 +28,8 @@ extern int smp_num_siblings; /* revisit when we support SMP */ #define get_hard_smp_processor_id(i) i -#define hard_smp_processor_id() 0 -#define raw_smp_processor_id() 0 +#define raw_smp_processor_id() (parea->whoami) +#define hard_smp_processor_id() raw_smp_processor_id() extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-powerpc/types.h --- a/xen/include/asm-powerpc/types.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-powerpc/types.h Fri Sep 01 13:04:02 2006 -0600 @@ -3,8 +3,18 @@ #ifndef _PPC_TYPES_H #define _PPC_TYPES_H +#include <xen/config.h> + +#if defined(__ppc__) +#define BYTES_PER_LONG 4 +#define BITS_PER_LONG 32 +#elif defined(__PPC64__) +#define BYTES_PER_LONG 8 +#define BITS_PER_LONG 64 +#endif + +#ifndef __ASSEMBLY__ typedef unsigned short umode_t; - /* * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the @@ -31,8 +41,6 @@ typedef unsigned long __u64; #endif #endif -#include <xen/config.h> - typedef signed char s8; typedef unsigned char u8; @@ -45,14 +53,10 @@ typedef unsigned int u32; #if defined(__ppc__) typedef signed long long s64; typedef unsigned long long u64; -#define BYTES_PER_LONG 4 -#define BITS_PER_LONG 32 typedef unsigned int size_t; #elif defined(__PPC64__) typedef signed long s64; typedef unsigned long u64; -#define BYTES_PER_LONG 8 -#define BITS_PER_LONG 64 typedef unsigned long size_t; #endif @@ -66,4 +70,5 @@ typedef u64 dma64_addr_t; typedef unsigned short xmem_bufctl_t; +#endif /* __ASSEMBLY__ */ #endif diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-x86/mm.h Fri Sep 01 13:04:02 2006 -0600 @@ -338,7 +338,6 @@ int check_descriptor(struct desc_struct #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START) #define INVALID_M2P_ENTRY (~0UL) #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1)))) -#define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e)) #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn)) #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-x86/page.h Fri Sep 01 13:04:02 2006 -0600 @@ -300,13 +300,6 @@ void setup_idle_pagetable(void); #define _PAGE_GNTTAB 0 #endif -/* - * Disallow unused flag bits plus PAT, PSE and GLOBAL. - * Also disallow GNTTAB if we are using it for grant-table debugging. - * Permit the NX bit if the hardware supports it. - */ -#define BASE_DISALLOW_MASK ((0xFFFFF180U | _PAGE_GNTTAB) & ~_PAGE_NX) - #define __PAGE_HYPERVISOR \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) #define __PAGE_HYPERVISOR_NOCACHE \ diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-x86/processor.h Fri Sep 01 13:04:02 2006 -0600 @@ -288,6 +288,13 @@ static inline void write_cr0(unsigned lo static inline void write_cr0(unsigned long val) { __asm__("mov %0,%%cr0": :"r" ((unsigned long)val)); +} + +static inline unsigned long read_cr2(void) +{ + unsigned long __cr2; + __asm__("mov %%cr2,%0\n\t" :"=r" (__cr2)); + return __cr2; } static inline unsigned long read_cr4(void) diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-2level.h --- a/xen/include/asm-x86/x86_32/page-2level.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-x86/x86_32/page-2level.h Fri Sep 01 13:04:02 2006 -0600 @@ -53,7 +53,4 @@ typedef l2_pgentry_t root_pgentry_t; #define get_pte_flags(x) ((int)(x) & 0xFFF) #define put_pte_flags(x) ((intpte_t)((x) & 0xFFF)) -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK - #endif /* __X86_32_PAGE_2LEVEL_H__ */ diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page-3level.h --- a/xen/include/asm-x86/x86_32/page-3level.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-x86/x86_32/page-3level.h Fri Sep 01 13:04:02 2006 -0600 @@ -66,8 +66,6 @@ typedef l3_pgentry_t root_pgentry_t; #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF)) #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF)) -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK #define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */ #endif /* __X86_32_PAGE_3LEVEL_H__ */ diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_32/page.h --- a/xen/include/asm-x86/x86_32/page.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-x86/x86_32/page.h Fri Sep 01 13:04:02 2006 -0600 @@ -26,6 +26,15 @@ extern unsigned int PAGE_HYPERVISOR_NOCA #define GRANT_PTE_FLAGS \ (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB) +/* + * Disallow unused flag bits plus PAT, PSE and GLOBAL. + * Permit the NX bit if the hardware supports it. + */ +#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) + +#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) + #endif /* __X86_32_PAGE_H__ */ /* diff -r 4ba098226429 -r 1bab7d65171b xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/asm-x86/x86_64/page.h Fri Sep 01 13:04:02 2006 -0600 @@ -75,8 +75,15 @@ typedef l4_pgentry_t root_pgentry_t; #define _PAGE_NX_BIT (1U<<23) #define _PAGE_NX (cpu_has_nx ? _PAGE_NX_BIT : 0U) -#define L1_DISALLOW_MASK BASE_DISALLOW_MASK -#define L2_DISALLOW_MASK BASE_DISALLOW_MASK +/* + * Disallow unused flag bits plus PAT, PSE and GLOBAL. + * Permit the NX bit if the hardware supports it. + * Note that range [62:52] is available for software use on x86/64. + */ +#define BASE_DISALLOW_MASK (0xFF000180U & ~_PAGE_NX) + +#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */) #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */) diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/public/arch-ia64.h Fri Sep 01 13:04:02 2006 -0600 @@ -18,15 +18,12 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif #ifndef __ASSEMBLY__ -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-powerpc.h --- a/xen/include/public/arch-powerpc.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/public/arch-powerpc.h Fri Sep 01 13:04:02 2006 -0600 @@ -29,7 +29,6 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) \ do { \ if (sizeof ((hnd).__pad)) \ @@ -42,8 +41,6 @@ #endif #ifndef __ASSEMBLY__ -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/public/arch-x86_32.h Fri Sep 01 13:04:02 2006 -0600 @@ -28,14 +28,7 @@ #endif /* Structural guest handles introduced in 0x00030201. */ -#if (defined(__XEN__) || defined(__XEN_TOOLS__)) && !defined(__ASSEMBLY__) -typedef uint64_t __attribute__((aligned(8))) uint64_aligned_t; -#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ - typedef struct { type *p; } \ - __guest_handle_ ## name; \ - typedef struct { union { type *p; uint64_aligned_t q; }; } \ - __guest_handle_64_ ## name -#elif __XEN_INTERFACE_VERSION__ >= 0x00030201 +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 #define __DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef struct { type *p; } __guest_handle_ ## name #else @@ -45,15 +38,9 @@ typedef uint64_t __attribute__((aligned( #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#define set_xen_guest_handle(hnd, val) \ - do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ - (hnd).p = val; \ - } while ( 0 ) -#else -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #endif #ifndef __ASSEMBLY__ diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/public/arch-x86_64.h Fri Sep 01 13:04:02 2006 -0600 @@ -39,15 +39,12 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name -#define XEN_GUEST_HANDLE_64(name) __guest_handle_ ## name #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif #ifndef __ASSEMBLY__ -typedef uint64_t uint64_aligned_t; - /* Guest handles for primitive C types. */ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); __DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/domctl.h --- a/xen/include/public/domctl.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/public/domctl.h Fri Sep 01 13:04:02 2006 -0600 @@ -16,12 +16,10 @@ #include "xen.h" -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000001 - -#define uint64_t uint64_aligned_t +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000003 struct xenctl_cpumap { - XEN_GUEST_HANDLE_64(uint8_t) bitmap; + XEN_GUEST_HANDLE(uint8_t) bitmap; uint32_t nr_cpus; }; @@ -72,8 +70,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdo #define XEN_DOMCTL_getmemlist 6 struct xen_domctl_getmemlist { /* IN variables. */ + /* Max entries to write to output buffer. */ uint64_t max_pfns; - XEN_GUEST_HANDLE_64(ulong) buffer; + /* Start index in guest's page list. */ + uint64_t start_pfn; + XEN_GUEST_HANDLE(xen_pfn_t) buffer; /* OUT variables. */ uint64_t num_pfns; }; @@ -110,7 +111,7 @@ struct xen_domctl_getpageframeinfo2 { /* IN variables. */ uint64_t num; /* IN/OUT variables. */ - XEN_GUEST_HANDLE_64(ulong) array; + XEN_GUEST_HANDLE(ulong) array; }; typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); @@ -184,7 +185,7 @@ struct xen_domctl_shadow_op { uint32_t mb; /* Shadow memory allocation in MB */ /* OP_PEEK / OP_CLEAN */ - XEN_GUEST_HANDLE_64(ulong) dirty_bitmap; + XEN_GUEST_HANDLE(ulong) dirty_bitmap; uint64_t pages; /* Size of buffer. Updated with actual size. */ struct xen_domctl_shadow_op_stats stats; }; @@ -204,8 +205,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_m #define XEN_DOMCTL_setvcpucontext 12 #define XEN_DOMCTL_getvcpucontext 13 struct xen_domctl_vcpucontext { - uint32_t vcpu; /* IN */ - XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ + uint32_t vcpu; /* IN */ + XEN_GUEST_HANDLE(vcpu_guest_context_t) ctxt; /* IN/OUT */ }; typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); @@ -378,8 +379,6 @@ typedef struct xen_domctl xen_domctl_t; typedef struct xen_domctl xen_domctl_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); -#undef uint64_t - #endif /* __XEN_PUBLIC_DOMCTL_H__ */ /* diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/public/sysctl.h Fri Sep 01 13:04:02 2006 -0600 @@ -16,9 +16,7 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000001 - -#define uint64_t uint64_aligned_t +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000002 /* * Read console content from Xen buffer ring. @@ -26,8 +24,8 @@ #define XEN_SYSCTL_readconsole 1 struct xen_sysctl_readconsole { /* IN variables. */ - uint32_t clear; /* Non-zero -> clear after reading. */ - XEN_GUEST_HANDLE_64(char) buffer; /* Buffer start */ + uint32_t clear; /* Non-zero -> clear after reading. */ + XEN_GUEST_HANDLE(char) buffer; /* Buffer start */ /* IN/OUT variables. */ uint32_t count; /* In: Buffer size; Out: Used buffer size */ }; @@ -105,9 +103,9 @@ struct xen_sysctl_perfc_op { uint32_t nr_counters; /* number of counters description */ uint32_t nr_vals; /* number of values */ /* counter information (or NULL) */ - XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; + XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t) desc; /* counter values (or NULL) */ - XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; + XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t) val; }; typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); @@ -117,7 +115,7 @@ struct xen_sysctl_getdomaininfolist { /* IN variables. */ domid_t first_domain; uint32_t max_domains; - XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t) buffer; /* OUT variables. */ uint32_t num_domains; }; @@ -140,8 +138,6 @@ typedef struct xen_sysctl xen_sysctl_t; typedef struct xen_sysctl xen_sysctl_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); -#undef uint64_t - #endif /* __XEN_PUBLIC_SYSCTL_H__ */ /* diff -r 4ba098226429 -r 1bab7d65171b xen/include/public/xen.h --- a/xen/include/public/xen.h Fri Sep 01 12:52:12 2006 -0600 +++ b/xen/include/public/xen.h Fri Sep 01 13:04:02 2006 -0600 @@ -63,6 +63,7 @@ #define __HYPERVISOR_hvm_op 34 #define __HYPERVISOR_sysctl 35 #define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/FlatDeviceTree.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/FlatDeviceTree.py Fri Sep 01 13:04:02 2006 -0600 @@ -0,0 +1,323 @@ +#!/usr/bin/env python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Copyright (C) IBM Corp. 2006 +# +# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + +import os +import sys +import struct +import stat +import re + +_OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning +_OF_DT_BEGIN_NODE = 0x1 +_OF_DT_END_NODE = 0x2 +_OF_DT_PROP = 0x3 +_OF_DT_END = 0x9 + +def _bincat(seq, separator=''): + '''Concatenate the contents of seq into a bytestream.''' + strs = [] + for item in seq: + if type(item) == type(0): + strs.append(struct.pack(">I", item)) + else: + try: + strs.append(item.to_bin()) + except AttributeError, e: + strs.append(item) + return separator.join(strs) + +def _alignup(val, alignment): + return (val + alignment - 1) & ~(alignment - 1) + +def _pad(buf, alignment): + '''Pad bytestream with NULLs to specified alignment.''' + padlen = _alignup(len(buf), alignment) + return buf + '\0' * (padlen - len(buf)) + # not present in Python 2.3: + #return buf.ljust(_padlen, '\0') + +def _indent(item): + indented = [] + for line in str(item).splitlines(True): + indented.append(' ' + line) + return ''.join(indented) + +class _Property: + _nonprint = re.compile('[\000-\037\200-\377]') + def __init__(self, node, name, value): + self.node = node + self.value = value + self.name = name + self.node.tree.stradd(name) + + def __str__(self): + result = self.name + if self.value: + searchtext = self.value + # it's ok for a string to end in NULL + if searchtext.find('\000') == len(searchtext)-1: + searchtext = searchtext[:-1] + m = self._nonprint.search(searchtext) + if m: + bytes = struct.unpack("B" * len(self.value), self.value) + hexbytes = [ '%02x' % b for b in bytes ] + words = [] + for i in range(0, len(self.value), 4): + words.append(''.join(hexbytes[i:i+4])) + v = '<' + ' '.join(words) + '>' + else: + v = '"%s"' % self.value + result += ': ' + v + return result + + def to_bin(self): + offset = self.node.tree.stroffset(self.name) + return struct.pack('>III', _OF_DT_PROP, len(self.value), offset) \ + + _pad(self.value, 4) + +class _Node: + def __init__(self, tree, name): + self.tree = tree + self.name = name + self.props = {} + self.children = {} + self.phandle = 0 + + def __str__(self): + propstrs = [ _indent(prop) for prop in self.props.values() ] + childstrs = [ _indent(child) for child in self.children.values() ] + return '%s:\n%s\n%s' % (self.name, '\n'.join(propstrs), + '\n'.join(childstrs)) + + def to_bin(self): + name = _pad(self.name + '\0', 4) + return struct.pack('>I', _OF_DT_BEGIN_NODE) + \ + name + \ + _bincat(self.props.values()) + \ + _bincat(self.children.values()) + \ + struct.pack('>I', _OF_DT_END_NODE) + + def addprop(self, propname, *cells): + '''setprop with duplicate error-checking.''' + if propname in self.props: + raise AttributeError('%s/%s already exists' % (self.name, propname)) + self.setprop(propname, *cells) + + def setprop(self, propname, *cells): + self.props[propname] = _Property(self, propname, _bincat(cells)) + + def addnode(self, nodename): + '''newnode with duplicate error-checking.''' + if nodename in self.children: + raise AttributeError('%s/%s already exists' % (self.name, nodename)) + return self.newnode(nodename) + + def newnode(self, nodename): + node = _Node(self.tree, nodename) + self.children[nodename] = node + return node + + def getprop(self, propname): + return self.props[propname] + + def getchild(self, nodename): + return self.children[nodename] + + def get_phandle(self): + if self.phandle: + return self.phandle + self.phandle = self.tree.alloc_phandle() + self.addprop('linux,phandle', self.phandle) + return self.phandle + +class _Header: + def __init__(self): + self.magic = 0 + self.totalsize = 0 + self.off_dt_struct = 0 + self.off_dt_strings = 0 + self.off_mem_rsvmap = 0 + self.version = 0 + self.last_comp_version = 0 + self.boot_cpuid_phys = 0 + self.size_dt_strings = 0 + def to_bin(self): + return struct.pack('>9I', + self.magic, + self.totalsize, + self.off_dt_struct, + self.off_dt_strings, + self.off_mem_rsvmap, + self.version, + self.last_comp_version, + self.boot_cpuid_phys, + self.size_dt_strings) + +class _StringBlock: + def __init__(self): + self.table = [] + def to_bin(self): + return _bincat(self.table, '\0') + '\0' + def add(self, str): + self.table.append(str) + def getoffset(self, str): + return self.to_bin().index(str + '\0') + +class Tree(_Node): + def __init__(self): + self.last_phandle = 0 + self.strings = _StringBlock() + self.reserved = [(0, 0)] + _Node.__init__(self, self, '\0') + + def alloc_phandle(self): + self.last_phandle += 1 + return self.last_phandle + + def stradd(self, str): + return self.strings.add(str) + + def stroffset(self, str): + return self.strings.getoffset(str) + + def reserve(self, start, len): + self.reserved.insert(0, (start, len)) + + def to_bin(self): + # layout: + # header + # reservation map + # string block + # data block + + datablock = _Node.to_bin(self) + + r = [ struct.pack('>QQ', rsrv[0], rsrv[1]) for rsrv in self.reserved ] + reserved = _bincat(r) + + strblock = _pad(self.strings.to_bin(), 4) + strblocklen = len(strblock) + + header = _Header() + header.magic = _OF_DT_HEADER + header.off_mem_rsvmap = _alignup(len(header.to_bin()), 8) + header.off_dt_strings = header.off_mem_rsvmap + len(reserved) + header.off_dt_struct = header.off_dt_strings + strblocklen + header.version = 0x10 + header.last_comp_version = 0x10 + header.boot_cpuid_phys = 0 + header.size_dt_strings = strblocklen + + payload = reserved + \ + strblock + \ + datablock + \ + struct.pack('>I', _OF_DT_END) + header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8) + return _pad(header.to_bin(), 8) + payload + +_host_devtree_root = '/proc/device-tree' +def _getprop(propname): + '''Extract a property from the system's device tree.''' + f = file(os.path.join(_host_devtree_root, propname), 'r') + data = f.read() + f.close() + return data + +def _copynode(node, dirpath, propfilter): + '''Extract all properties from a node in the system's device tree.''' + dirents = os.listdir(dirpath) + for dirent in dirents: + fullpath = os.path.join(dirpath, dirent) + st = os.lstat(fullpath) + if stat.S_ISDIR(st.st_mode): + child = node.addnode(dirent) + _copytree(child, fullpath, propfilter) + elif stat.S_ISREG(st.st_mode) and propfilter(fullpath): + node.addprop(dirent, _getprop(fullpath)) + +def _copytree(node, dirpath, propfilter): + path = os.path.join(_host_devtree_root, dirpath) + _copynode(node, path, propfilter) + +def build(imghandler): + '''Construct a device tree by combining the domain's configuration and + the host's device tree.''' + root = Tree() + + # 4 pages: start_info, console, store, shared_info + root.reserve(0x3ffc000, 0x4000) + + root.addprop('device_type', 'chrp-but-not-really\0') + root.addprop('#size-cells', 2) + root.addprop('#address-cells', 2) + root.addprop('model', 'Momentum,Maple-D\0') + root.addprop('compatible', 'Momentum,Maple\0') + + xen = root.addnode('xen') + xen.addprop('start-info', 0, 0x3ffc000, 0, 0x1000) + xen.addprop('version', 'Xen-3.0-unstable\0') + xen.addprop('reg', 0, imghandler.vm.domid, 0, 0) + xen.addprop('domain-name', imghandler.vm.getName() + '\0') + xencons = xen.addnode('console') + xencons.addprop('interrupts', 1, 0) + + # XXX split out RMA node + mem = root.addnode('memory@0') + totalmem = imghandler.vm.getMemoryTarget() * 1024 + mem.addprop('reg', 0, 0, 0, totalmem) + mem.addprop('device_type', 'memory\0') + + cpus = root.addnode('cpus') + cpus.addprop('smp-enabled') + cpus.addprop('#size-cells', 0) + cpus.addprop('#address-cells', 1) + + # Copy all properties the system firmware gave us, except for 'linux,' + # properties, from 'cpus/@0', once for every vcpu. Hopefully all cpus are + # identical... + cpu0 = None + def _nolinuxprops(fullpath): + return not os.path.basename(fullpath).startswith('linux,') + for i in range(imghandler.vm.getVCpuCount()): + cpu = cpus.addnode('PowerPC,970@0') + _copytree(cpu, 'cpus/PowerPC,970@0', _nolinuxprops) + # and then overwrite what we need to + pft_size = imghandler.vm.info.get('pft-size', 0x14) + cpu.setprop('ibm,pft-size', 0, pft_size) + + # set default CPU + if cpu0 == None: + cpu0 = cpu + + chosen = root.addnode('chosen') + chosen.addprop('cpu', cpu0.get_phandle()) + chosen.addprop('memory', mem.get_phandle()) + chosen.addprop('linux,stdout-path', '/xen/console\0') + chosen.addprop('interrupt-controller', xen.get_phandle()) + chosen.addprop('bootargs', imghandler.cmdline + '\0') + # xc_linux_load.c will overwrite these 64-bit properties later + chosen.addprop('linux,initrd-start', 0, 0) + chosen.addprop('linux,initrd-end', 0, 0) + + if 1: + f = file('/tmp/domU.dtb', 'w') + f.write(root.to_bin()) + f.close() + + return root diff -r 4ba098226429 -r 1bab7d65171b tools/python/xen/xend/arch.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/xend/arch.py Fri Sep 01 13:04:02 2006 -0600 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Copyright (C) IBM Corp. 2006 +# +# Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + +import os + +_types = { + "i386": "x86", + "i486": "x86", + "i586": "x86", + "i686": "x86", + "x86_64": "x86", + "ia64": "ia64", + "ppc": "powerpc", + "ppc64": "powerpc", +} +type = _types.get(os.uname()[4], "unknown") diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/backtrace.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/powerpc/backtrace.c Fri Sep 01 13:04:02 2006 -0600 @@ -0,0 +1,193 @@ +/* + * Routines providing a simple monitor for use on the PowerMac. + * + * Copyright (C) 1996-2005 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/console.h> +#include <xen/sched.h> +#include <xen/symbols.h> + +static char namebuf[KSYM_NAME_LEN+1]; + +/* Shamelessly lifted from Linux Xmon try to keep pristene */ +#ifdef __powerpc64__ +#define LRSAVE_OFFSET 0x10 +#define REG_FRAME_MARKER 0x7265677368657265ul /* "regshere" */ +#define MARKER_OFFSET 0x60 +#define REGS_OFFSET 0x70 +#define REG "%016lX" +#else +#define LRSAVE_OFFSET 4 +#define REG_FRAME_MARKER 0x72656773 +#define MARKER_OFFSET 8 +#define REGS_OFFSET 16 +#define REG "%08lX" +#endif + +#define TRAP(regs) ((regs)->entry_vector & ~0xF) +static int xmon_depth_to_print = 64; + +/* Very cheap human name for vector lookup. */ +static +const char *getvecname(unsigned long vec) +{ + char *ret; + + switch (vec) { + case 0x100: ret = "(System Reset)"; break; + case 0x200: ret = "(Machine Check)"; break; + case 0x300: ret = "(Data Access)"; break; + case 0x380: ret = "(Data SLB Access)"; break; + case 0x400: ret = "(Instruction Access)"; break; + case 0x480: ret = "(Instruction SLB Access)"; break; + case 0x500: ret = "(Hardware Interrupt)"; break; + case 0x600: ret = "(Alignment)"; break; + case 0x700: ret = "(Program Check)"; break; + case 0x800: ret = "(FPU Unavailable)"; break; + case 0x900: ret = "(Decrementer)"; break; + case 0xc00: ret = "(System Call)"; break; + case 0xd00: ret = "(Single Step)"; break; + case 0xf00: ret = "(Performance Monitor)"; break; + case 0xf20: ret = "(Altivec Unavailable)"; break; + case 0x1300: ret = "(Instruction Breakpoint)"; break; + default: ret = ""; + } + return ret; +} + +static int mread(unsigned long adrs, void *buf, int size) +{ + memcpy(buf, (void *)adrs, size); + return size; +} + +static void get_function_bounds(unsigned long pc, unsigned long *startp, + unsigned long *endp) +{ + unsigned long size, offset; + const char *name; + + *startp = *endp = 0; + if (pc == 0) + return; + + name = symbols_lookup(pc, &size, &offset, namebuf); + if (name != NULL) { + *startp = pc - offset; + *endp = pc - offset + size; + } +} + +/* Print an address in numeric and symbolic form (if possible) */ +static void xmon_print_symbol(unsigned long address, const char *mid, + const char *after) +{ + const char *name = NULL; + unsigned long offset, size; + + printf(REG, address); + + name = symbols_lookup(address, &size, &offset, namebuf); + if (name) { + printf("%s%s+%#lx/%#lx", mid, name, offset, size); + } + printf("%s", after); +} + +static void backtrace( + unsigned long sp, unsigned long lr, unsigned long pc) +{ + unsigned long ip; + unsigned long newsp; + unsigned long marker; + int count = 0; + struct cpu_user_regs regs; + + do { + if (sp > xenheap_phys_end) { + if (sp != 0) + printf("SP (%lx) is not in xen space\n", sp); + break; + } + + if (!mread(sp + LRSAVE_OFFSET, &ip, sizeof(unsigned long)) + || !mread(sp, &newsp, sizeof(unsigned long))) { + printf("Couldn't read stack frame at %lx\n", sp); + break; + } + + /* + * For the first stack frame, try to work out if + * LR and/or the saved LR value in the bottommost + * stack frame are valid. + */ + if ((pc | lr) != 0) { + unsigned long fnstart, fnend; + unsigned long nextip; + int printip = 1; + + get_function_bounds(pc, &fnstart, &fnend); + nextip = 0; + if (newsp > sp) + mread(newsp + LRSAVE_OFFSET, &nextip, + sizeof(unsigned long)); + if (lr == ip) { + if (lr >= xenheap_phys_end + || (fnstart <= lr && lr < fnend)) + printip = 0; + } else if (lr == nextip) { + printip = 0; + } else if (lr < xenheap_phys_end + && !(fnstart <= lr && lr < fnend)) { + printf("[link register ] "); + xmon_print_symbol(lr, " ", "\n"); + } + if (printip) { + printf("["REG"] ", sp); + xmon_print_symbol(ip, " ", " (unreliable)\n"); + } + pc = lr = 0; + + } else { + printf("["REG"] ", sp); + xmon_print_symbol(ip, " ", "\n"); + } + + /* Look for "regshere" marker to see if this is + an exception frame. */ + if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long)) + && marker == REG_FRAME_MARKER) { + if (mread(sp + REGS_OFFSET, ®s, sizeof(regs)) + != sizeof(regs)) { + printf("Couldn't read registers at %lx\n", + sp + REGS_OFFSET); + break; + } + printf("--- Exception: %x %s at ", regs.entry_vector, + getvecname(TRAP(®s))); + pc = regs.pc; + lr = regs.lr; + xmon_print_symbol(pc, " ", "\n"); + } + + if (newsp == 0) + break; + + sp = newsp; + } while (count++ < xmon_depth_to_print); +} + +void show_backtrace(ulong sp, ulong lr, ulong pc) +{ + console_start_sync(); + backtrace(sp, lr, pc); + console_end_sync(); +} diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/memory.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/powerpc/memory.c Fri Sep 01 13:04:02 2006 -0600 @@ -0,0 +1,206 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Dan Poff <poff@xxxxxxxxxx> + * Jimi Xenidis <jimix@xxxxxxxxxxxxxx> + */ +#include <xen/sched.h> +#include <xen/mm.h> +#include "of-devtree.h" +#include "oftree.h" + +unsigned long xenheap_phys_end; +struct membuf { + ulong start; + ulong size; +}; + +typedef void (*walk_mem_fn)(struct membuf *, uint); + +static ulong free_xenheap(ulong start, ulong end) +{ + start = ALIGN_UP(start, PAGE_SIZE); + end = ALIGN_DOWN(end, PAGE_SIZE); + + printk("%s: 0x%lx - 0x%lx\n", __func__, start, end); + + if (oftree <= end && oftree >= start) { + printk("%s: Go around the devtree: 0x%lx - 0x%lx\n", + __func__, oftree, oftree_end); + init_xenheap_pages(start, ALIGN_DOWN(oftree, PAGE_SIZE)); + init_xenheap_pages(ALIGN_UP(oftree_end, PAGE_SIZE), end); + } else { + init_xenheap_pages(start, end); + } + + return ALIGN_UP(end, PAGE_SIZE); +} + +static void set_max_page(struct membuf *mb, uint entries) +{ + int i; + + for (i = 0; i < entries; i++) { + ulong end_page; + + end_page = (mb[i].start + mb[i].size) >> PAGE_SHIFT; + + if (end_page > max_page) + max_page = end_page; + } +} + +/* mark all memory from modules onward as unused */ +static void heap_init(struct membuf *mb, uint entries) +{ + int i; + ulong start_blk; + ulong end_blk = 0; + + for (i = 0; i < entries; i++) { + start_blk = mb[i].start; + end_blk = start_blk + mb[i].size; + + if (start_blk < xenheap_phys_end) { + if (xenheap_phys_end > end_blk) { + panic("xenheap spans LMB\n"); + } + if (xenheap_phys_end == end_blk) + continue; + + start_blk = xenheap_phys_end; + } + + init_boot_pages(start_blk, end_blk); + total_pages += (end_blk - start_blk) >> PAGE_SHIFT; + } +} + +static void ofd_walk_mem(void *m, walk_mem_fn fn) +{ + ofdn_t n; + uint p_len; + struct membuf mb[8]; + static char name[] = "memory"; + + n = ofd_node_find_by_prop(m, OFD_ROOT, "device_type", name, sizeof(name)); + while (n > 0) { + + p_len = ofd_getprop(m, n, "reg", mb, sizeof (mb)); + if (p_len <= 0) { + panic("ofd_getprop(): failed\n"); + } + if (p_len > sizeof(mb)) + panic("%s: buffer is not big enuff for this firmware: " + "0x%lx < 0x%x\n", __func__, sizeof(mb), p_len); + + fn(mb, p_len / sizeof(mb[0])); + n = ofd_node_find_next(m, n); + } +} + +static void setup_xenheap(module_t *mod, int mcount) +{ + int i; + ulong freemem; + + freemem = ALIGN_UP((ulong)_end, PAGE_SIZE); + + for (i = 0; i < mcount; i++) { + u32 s; + + if(mod[i].mod_end == mod[i].mod_start) + continue; + + s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE); + + if (mod[i].mod_start > (ulong)_start && + mod[i].mod_start < (ulong)_end) { + /* mod was linked in */ + continue; + } + + if (s < freemem) + panic("module addresses must assend\n"); + + free_xenheap(freemem, s); + freemem = ALIGN_UP(mod[i].mod_end, PAGE_SIZE); + + } + + /* the rest of the xenheap, starting at the end of modules */ + free_xenheap(freemem, xenheap_phys_end); +} + +void memory_init(module_t *mod, int mcount) +{ + ulong eomem; + ulong heap_start, heap_size; + + printk("Physical RAM map:\n"); + + /* lets find out how much memory there is and set max_page */ + max_page = 0; + ofd_walk_mem((void *)oftree, set_max_page); + eomem = max_page << PAGE_SHIFT; + + if (eomem == 0){ + panic("ofd_walk_mem() failed\n"); + } + printk("End of RAM: %luMB (%lukB)\n", eomem >> 20, eomem >> 10); + + /* Architecturally the first 4 pages are exception hendlers, we + * will also be copying down some code there */ + heap_start = 4 << PAGE_SHIFT; + if (oftree < (ulong)_start) + heap_start = ALIGN_UP(oftree_end, PAGE_SIZE); + + heap_start = init_boot_allocator(heap_start); + if (heap_start > (ulong)_start) { + panic("space below _start (%p) is not enough memory " + "for heap (0x%lx)\n", _start, heap_start); + } + + /* allow everything else to be allocated */ + total_pages = 0; + ofd_walk_mem((void *)oftree, heap_init); + if (total_pages == 0) + panic("heap_init: failed"); + + if (total_pages > max_page) + panic("total_pages > max_page: 0x%lx > 0x%lx\n", + total_pages, max_page); + + printk("total_pages: 0x%016lx\n", total_pages); + + init_frametable(); + end_boot_allocator(); + + /* Add memory between the beginning of the heap and the beginning + * of out text */ + free_xenheap(heap_start, (ulong)_start); + + heap_size = xenheap_phys_end - heap_start; + printk("Xen heap: %luMB (%lukB)\n", heap_size >> 20, heap_size >> 10); + + setup_xenheap(mod, mcount); + + eomem = avail_domheap_pages(); + printk("Domheap pages: 0x%lx %luMB (%lukB)\n", eomem, + (eomem << PAGE_SHIFT) >> 20, + (eomem << PAGE_SHIFT) >> 10); +} diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/ofd_fixup_memory.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/powerpc/ofd_fixup_memory.c Fri Sep 01 13:04:02 2006 -0600 @@ -0,0 +1,107 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <public/xen.h> +#include "of-devtree.h" +#include "oftree.h" + +static char memory[] = "memory"; + +struct mem_reg { + u64 addr; + u64 sz; +}; + +static void ofd_memory_clean(void *m) +{ + ofdn_t old; + + /* Remove all old memory props */ + do { + old = ofd_node_find_by_prop(m, OFD_ROOT, "device_type", + memory, sizeof(memory)); + if (old <= 0) + break; + + ofd_node_prune(m, old); + } while (1); +} + +static ofdn_t ofd_memory_node_create( + void *m, ofdn_t p, const char *ppath, const char *name, + const char *dt, ulong start, ulong size) +{ + struct mem_reg reg; + char path[128]; + ulong l; + ofdn_t n; + ulong nl = strlen(name) + 1; + ulong dtl = strlen(dt) + 1; + + l = snprintf(path, sizeof (path), "%s/%s@%lx", ppath, name, start); + n = ofd_node_add(m, p, path, l + 1); + ofd_prop_add(m, n, "name", name, nl); + ofd_prop_add(m, n, "device_type", dt, dtl); + + /* physical addresses usable without regard to OF */ + reg.addr = start; + reg.sz = size; + ofd_prop_add(m, n, "reg", ®, sizeof (reg)); + + return n; +} + +static void ofd_memory_rma_node(void *m, struct domain *d) +{ + ulong size = rma_size(d->arch.rma_order); + ofdn_t n; + + n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, 0, size); + BUG_ON(n <= 0); +} + +static void ofd_memory_extent_nodes(void *m, struct domain *d) +{ + ulong start; + ulong size; + ofdn_t n; + struct page_extents *pe; + + list_for_each_entry (pe, &d->arch.extent_list, pe_list) { + + start = pe->pfn << PAGE_SHIFT; + size = 1UL << (pe->order + PAGE_SHIFT); + + n = ofd_memory_node_create(m, OFD_ROOT, "", memory, memory, + start, size); + + BUG_ON(n <= 0); + } +} + +void ofd_memory_props(void *m, struct domain *d) +{ + ofd_memory_clean(m); + ofd_memory_rma_node(m, d); + ofd_memory_extent_nodes(m,d); +} diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/shadow.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/powerpc/shadow.c Fri Sep 01 13:04:02 2006 -0600 @@ -0,0 +1,159 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/shadow.h> + +static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size) +{ + ulong sdr1_htabsize; + + ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0); + ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX); + ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE); + + sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS; + + return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK)); +} + +static ulong htab_alloc(struct domain *d, uint order) +{ + ulong htab_raddr; + uint log_htab_bytes = order + PAGE_SHIFT; + uint htab_bytes = 1UL << log_htab_bytes; + + /* we use xenheap pages to keep domheap pages usefull for domains */ + + if (order < 6) + order = 6; /* architectural minimum is 2^18 */ + if (order > 34) + order = 34; /* architectural minimum is 2^46 */ + + htab_raddr = (ulong)alloc_xenheap_pages(order); + if (htab_raddr > 0) { + ASSERT((htab_raddr & (htab_bytes - 1)) == 0); + + d->arch.htab.order = order; + d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE; + d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes); + d->arch.htab.map = (union pte *)htab_raddr; + } + return htab_raddr; +} + +static void htab_free(struct domain *d) +{ + ulong htab_raddr = GET_HTAB(d); + + free_xenheap_pages((void *)htab_raddr, d->arch.htab.order); +} + + +unsigned int shadow_teardown(struct domain *d) +{ + htab_free(d); + return 0; +} + +unsigned int shadow_set_allocation(struct domain *d, + unsigned int megabytes, + int *preempted) +{ + unsigned int rc; + uint pages; + uint p; + uint order; + ulong addr; + + + if (d->arch.htab.order) + return -EBUSY; + + if (megabytes == 0) { + /* old management tools */ + megabytes = 1; /* 1/64th of 64M */ + printk("%s: Fix management tools to set and get shadow/htab values\n" + " using %d MiB htab\n", + __func__, megabytes); + } + pages = megabytes << (20 - PAGE_SHIFT); + order = fls(pages) - 1; /* log2 truncated */ + if (pages & ((1 << order) - 1)) + ++order; /* round up */ + + addr = htab_alloc(d, order); + + printk("%s: ibm,fpt-size should be: 0x%x\n", __func__, + d->arch.htab.log_num_ptes + LOG_PTE_SIZE); + + if (addr == 0) + return -ENOMEM; + + /* XXX make this a continuation */ + for (p = 0; p < (1 << order); p++) + clear_page((void *)(addr + (p << PAGE_SHIFT))); + + return rc; +} + +int shadow_domctl(struct domain *d, + xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) +{ + if ( unlikely(d == current->domain) ) + { + DPRINTK("Don't try to do a shadow op on yourself!\n"); + return -EINVAL; + } + + switch ( sc->op ) + { + case XEN_DOMCTL_SHADOW_OP_OFF: + DPRINTK("Shadow is mandatory!\n"); + return -EINVAL; + + case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: + sc->mb = shadow_get_allocation(d); + return 0; + + case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: { + int rc; + int preempted = 0; + + rc = shadow_set_allocation(d, sc->mb, &preempted); + + if (preempted) + /* Not finished. Set up to re-run the call. */ + rc = hypercall_create_continuation( + __HYPERVISOR_domctl, "h", u_domctl); + else + /* Finished. Return the new allocation */ + sc->mb = shadow_get_allocation(d); + return rc; + } + + default: + printk("Bad shadow op %u\n", sc->op); + BUG(); + return -EINVAL; + } +} diff -r 4ba098226429 -r 1bab7d65171b xen/arch/powerpc/htab.c --- a/xen/arch/powerpc/htab.c Fri Sep 01 12:52:12 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (C) IBM Corp. 2005 - * - * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> - */ - -#include <xen/config.h> -#include <xen/sched.h> - -static ulong htab_calc_sdr1(ulong htab_addr, ulong log_htab_size) -{ - ulong sdr1_htabsize; - - ASSERT((htab_addr & ((1UL << log_htab_size) - 1)) == 0); - ASSERT(log_htab_size <= SDR1_HTABSIZE_MAX); - ASSERT(log_htab_size >= HTAB_MIN_LOG_SIZE); - - sdr1_htabsize = log_htab_size - LOG_PTEG_SIZE - SDR1_HTABSIZE_BASEBITS; - - return (htab_addr | (sdr1_htabsize & SDR1_HTABSIZE_MASK)); -} - -void htab_alloc(struct domain *d, uint order) -{ - ulong htab_raddr; - ulong log_htab_bytes = order + PAGE_SHIFT; - ulong htab_bytes = 1UL << log_htab_bytes; - - /* XXX use alloc_domheap_pages instead? */ - htab_raddr = (ulong)alloc_xenheap_pages(order); - ASSERT(htab_raddr != 0); - /* XXX check alignment guarantees */ - ASSERT((htab_raddr & (htab_bytes - 1)) == 0); - - /* XXX slow. move memset out to service partition? */ - memset((void *)htab_raddr, 0, htab_bytes); - - d->arch.htab.order = order; - d->arch.htab.log_num_ptes = log_htab_bytes - LOG_PTE_SIZE; - d->arch.htab.sdr1 = htab_calc_sdr1(htab_raddr, log_htab_bytes); - d->arch.htab.map = (union pte *)htab_raddr; - d->arch.htab.shadow = xmalloc_array(ulong, - 1UL << d->arch.htab.log_num_ptes); - ASSERT(d->arch.htab.shadow != NULL); -} - -void htab_free(struct domain *d) -{ - ulong htab_raddr = GET_HTAB(d); - - free_xenheap_pages((void *)htab_raddr, d->arch.htab.order); - xfree(d->arch.htab.shadow); -} - _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |