[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID 9d86c1a70f347b49393fa26796df4512bb114ebb # Parent b09dbe439169a2348c59b30fbdefe3f19e30c766 # Parent e5c17d2d85a4dc189b98a0ed5a5921d2cda309c3 Merged. --- linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile | 22 linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile | 20 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S | 2 linux-2.6-xen-sparse/arch/i386/kernel/vm86.c | 4 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 14 linux-2.6-xen-sparse/arch/ia64/Kconfig | 36 + linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre | 6 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 7 linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c | 2 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 27 linux-2.6-xen-sparse/drivers/xen/Kconfig | 26 linux-2.6-xen-sparse/drivers/xen/Makefile | 9 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c | 21 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h | 1 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c | 1 linux-2.6-xen-sparse/drivers/xen/core/Makefile | 16 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c | 9 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c | 2 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 288 ++++++++-- linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 26 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 173 ++++-- linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 14 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h | 6 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 7 linux-2.6-xen-sparse/include/xen/public/privcmd.h | 16 tools/debugger/libxendebug/xendebug.c | 7 tools/firmware/hvmloader/Makefile | 4 tools/firmware/rombios/Makefile | 4 tools/firmware/vmxassist/vm86.c | 65 ++ tools/ioemu/hw/cirrus_vga.c | 17 tools/ioemu/hw/pc.c | 5 tools/ioemu/hw/vga.c | 2 tools/ioemu/vl.c | 36 - tools/ioemu/vl.h | 2 tools/libxc/xc_core.c | 8 tools/libxc/xc_domain.c | 10 tools/libxc/xc_hvm_build.c | 6 tools/libxc/xc_ia64_stubs.c | 12 tools/libxc/xc_linux.c | 2 tools/libxc/xc_linux_build.c | 76 +- tools/libxc/xc_linux_restore.c | 214 ++++++- tools/libxc/xc_linux_save.c | 57 + tools/libxc/xc_load_aout9.c | 4 tools/libxc/xc_load_bin.c | 4 tools/libxc/xc_load_elf.c | 19 tools/libxc/xc_pagetab.c | 2 tools/libxc/xc_private.c | 62 +- tools/libxc/xc_ptrace.c | 11 tools/libxc/xc_ptrace_core.c | 15 tools/libxc/xenctrl.h | 19 tools/libxc/xg_private.h | 7 tools/libxc/xg_save_restore.h | 12 tools/python/xen/util/security.py | 9 tools/tests/test_x86_emulator.c | 131 ++-- xen/arch/ia64/linux-xen/smpboot.c | 3 xen/arch/ia64/xen/domain.c | 15 xen/arch/ia64/xen/xensetup.c | 2 xen/arch/x86/audit.c | 4 xen/arch/x86/cpu/mtrr/main.c | 2 xen/arch/x86/dom0_ops.c | 2 xen/arch/x86/domain.c | 39 - xen/arch/x86/domain_build.c | 9 xen/arch/x86/hvm/svm/svm.c | 80 +- xen/arch/x86/hvm/vmx/vmx.c | 34 - xen/arch/x86/hvm/vmx/x86_32/exits.S | 35 - xen/arch/x86/hvm/vmx/x86_64/exits.S | 71 +- xen/arch/x86/i8259.c | 2 xen/arch/x86/microcode.c | 2 xen/arch/x86/mm.c | 34 - xen/arch/x86/setup.c | 2 xen/arch/x86/shadow.c | 9 xen/arch/x86/shadow32.c | 14 xen/arch/x86/shadow_public.c | 14 xen/arch/x86/smp.c | 2 xen/arch/x86/smpboot.c | 17 xen/arch/x86/time.c | 6 xen/arch/x86/traps.c | 10 xen/arch/x86/x86_32/asm-offsets.c | 2 xen/arch/x86/x86_32/domain_page.c | 2 xen/arch/x86/x86_32/entry.S | 5 xen/arch/x86/x86_32/mm.c | 3 xen/arch/x86/x86_32/traps.c | 6 xen/arch/x86/x86_64/asm-offsets.c | 3 xen/arch/x86/x86_64/entry.S | 10 xen/arch/x86/x86_64/mm.c | 3 xen/arch/x86/x86_64/traps.c | 14 xen/arch/x86/x86_emulate.c | 19 xen/common/dom0_ops.c | 2 xen/common/domain.c | 134 +++- xen/common/kernel.c | 5 xen/common/keyhandler.c | 5 xen/common/memory.c | 20 xen/common/page_alloc.c | 4 xen/common/perfc.c | 2 xen/common/sched_bvt.c | 36 - xen/common/sched_credit.c | 30 - xen/common/sched_sedf.c | 39 - xen/common/schedule.c | 108 --- xen/common/trace.c | 12 xen/common/xmalloc.c | 2 xen/drivers/char/console.c | 6 xen/include/asm-x86/page.h | 11 xen/include/public/arch-ia64.h | 3 xen/include/public/arch-x86_32.h | 27 xen/include/public/arch-x86_64.h | 24 xen/include/public/callback.h | 15 xen/include/public/dom0_ops.h | 56 - xen/include/public/grant_table.h | 2 xen/include/public/io/netif.h | 4 xen/include/public/io/ring.h | 16 xen/include/public/memory.h | 10 xen/include/public/xen.h | 22 xen/include/xen/console.h | 2 xen/include/xen/domain.h | 23 xen/include/xen/sched-if.h | 11 xen/include/xen/sched.h | 12 116 files changed, 1737 insertions(+), 958 deletions(-) diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Wed Jun 07 11:03:51 2006 +0100 @@ -173,7 +173,7 @@ ENTRY(cpu_gdt_table) .ascii "|pae_pgdir_above_4gb" .ascii "|supervisor_mode_kernel" #ifdef CONFIG_X86_PAE - .ascii ",PAE=yes" + .ascii ",PAE=yes[extended-cr3]" #else .ascii ",PAE=no" #endif diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/i386/kernel/vm86.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c Wed Jun 07 11:03:51 2006 +0100 @@ -132,7 +132,9 @@ struct pt_regs * fastcall save_v86_state current->thread.sysenter_cs = __KERNEL_CS; load_esp0(tss, ¤t->thread); current->thread.saved_esp0 = 0; +#ifndef CONFIG_X86_NO_TSS put_cpu(); +#endif loadsegment(fs, current->thread.saved_fs); loadsegment(gs, current->thread.saved_gs); @@ -310,7 +312,9 @@ static void do_sys_vm86(struct kernel_vm if (cpu_has_sep) tsk->thread.sysenter_cs = 0; load_esp0(tss, &tsk->thread); +#ifndef CONFIG_X86_NO_TSS put_cpu(); +#endif tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/i386/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Wed Jun 07 11:03:51 2006 +0100 @@ -558,15 +558,11 @@ void __init paging_init(void) kmap_init(); - if (!xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->shared_info >= xen_start_info->nr_pages) { - /* Switch to the real shared_info page, and clear the - * dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); - HYPERVISOR_shared_info = - (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } + /* Switch to the real shared_info page, and clear the + * dummy page. */ + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); /* Setup mapping of lower 1st MB */ for (i = 0; i < NR_FIX_ISAMAPS; i++) diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Wed Jun 07 11:03:51 2006 +0100 @@ -73,7 +73,7 @@ config XEN_IA64_DOM0_VP config XEN_IA64_DOM0_NON_VP bool - depends on !(XEN && XEN_IA64_DOM0_VP) + depends on XEN && !XEN_IA64_DOM0_VP default y help dom0 P=M model @@ -496,15 +496,39 @@ source "security/Kconfig" source "crypto/Kconfig" +# # override default values of drivers/xen/Kconfig -if !XEN_IA64_DOM0_VP +# +if XEN +config XEN_UTIL + default n if XEN_IA64_DOM0_VP + config HAVE_ARCH_ALLOC_SKB - bool - default n + default n if !XEN_IA64_DOM0_VP config HAVE_ARCH_DEV_ALLOC_SKB - bool - default n + default n if !XEN_IA64_DOM0_VP + +config XEN_BALLOON + default n if !XEN_IA64_DOM0_VP + +config XEN_SKBUFF + default n if !XEN_IA64_DOM0_VP + +config XEN_NETDEV_BACKEND + default n if !XEN_IA64_DOM0_VP + +config XEN_NETDEV_FRONTEND + default n if !XEN_IA64_DOM0_VP + +config XEN_DEVMEM + default n + +config XEN_REBOOT + default n + +config XEN_SMPBOOT + default n endif source "drivers/xen/Kconfig" diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre --- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre Wed Jun 07 11:03:51 2006 +0100 @@ -10,12 +10,6 @@ #eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/ -#ia64 drivers/xen isn't fully functional yet, workaround... -#also ignore core/evtchn.c which uses a different irq mechanism than ia64 -#(warning: there be dragons here if these files diverge) -ln -sf ../../arch/ia64/xen/drivers/Makefile drivers/xen/Makefile -ln -sf ../../../arch/ia64/xen/drivers/coreMakefile drivers/xen/core/Makefile - #not sure where these ia64-specific files will end up in the future ln -sf ../../../arch/ia64/xen/drivers/xenia64_init.c drivers/xen/core diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Wed Jun 07 11:03:51 2006 +0100 @@ -665,13 +665,6 @@ void __init setup_arch(char **cmdline_p) setup_xen_features(); - if (xen_feature(XENFEAT_auto_translated_physmap) && - xen_start_info->shared_info < xen_start_info->nr_pages) { - HYPERVISOR_shared_info = - (shared_info_t *)__va(xen_start_info->shared_info); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/smp-xen.c Wed Jun 07 11:03:51 2006 +0100 @@ -488,7 +488,7 @@ static void smp_really_stop_cpu(void *du { smp_stop_cpu(); for (;;) - asm("hlt"); + halt(); } void smp_send_stop(void) diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Wed Jun 07 11:03:51 2006 +0100 @@ -666,7 +666,18 @@ void __meminit init_memory_mapping(unsig set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); } - BUG_ON(!after_bootmem && start_pfn != table_end); + if (!after_bootmem) { + BUG_ON(start_pfn != table_end); + /* + * Destroy the temporary mappings created above. Prevents + * overlap with modules area (if init mapping is very big). + */ + start = __START_KERNEL_map + (table_start << PAGE_SHIFT); + end = __START_KERNEL_map + (table_end << PAGE_SHIFT); + for (; start < end; start += PAGE_SIZE) + WARN_ON(HYPERVISOR_update_va_mapping( + start, __pte_ma(0), 0)); + } __flush_tlb_all(); } @@ -752,15 +763,11 @@ void __init paging_init(void) free_area_init_node(0, NODE_DATA(0), zones, __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); - if (!xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->shared_info >= xen_start_info->nr_pages) { - /* Switch to the real shared_info page, and clear the - * dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); - HYPERVISOR_shared_info = - (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } + /* Switch to the real shared_info page, and clear the + * dummy page. */ + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); init_mm.context.pinned = 1; diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Wed Jun 07 11:03:51 2006 +0100 @@ -228,4 +228,30 @@ config NO_IDLE_HZ bool default y +config XEN_UTIL + bool + default y + +config XEN_BALLOON + bool + default y + +config XEN_DEVMEM + bool + default y + +config XEN_SKBUFF + bool + default y + depends on NET + +config XEN_REBOOT + bool + default y + +config XEN_SMPBOOT + bool + default y + depends on SMP + endif diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/Makefile Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile Wed Jun 07 11:03:51 2006 +0100 @@ -1,14 +1,12 @@ - -obj-y += util.o - obj-y += core/ -obj-y += char/ obj-y += console/ obj-y += evtchn/ -obj-y += balloon/ obj-y += privcmd/ obj-y += xenbus/ +obj-$(CONFIG_XEN_UTIL) += util.o +obj-$(CONFIG_XEN_BALLOON) += balloon/ +obj-$(CONFIG_XEN_DEVMEM) += char/ obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ @@ -17,4 +15,3 @@ obj-$(CONFIG_XEN_BLKDEV_TAP) += blkt obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront/ - diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Wed Jun 07 11:03:51 2006 +0100 @@ -452,10 +452,6 @@ int blkif_ioctl(struct inode *inode, str command, (long)argument, inode->i_rdev); switch (command) { - case HDIO_GETGEO: - /* return ENOSYS to use defaults */ - return -ENOSYS; - case CDROMMULTISESSION: DPRINTK("FIXME: support multisession CDs later\n"); for (i = 0; i < sizeof(struct cdrom_multisession); i++) @@ -469,6 +465,23 @@ int blkif_ioctl(struct inode *inode, str return -EINVAL; /* same return as native Linux */ } + return 0; +} + + +int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) +{ + /* We don't have real geometry info, but let's at least return + values consistent with the size of the device */ + sector_t nsect = get_capacity(bd->bd_disk); + sector_t cylinders = nsect; + + hg->heads = 0xff; + hg->sectors = 0x3f; + sector_div(cylinders, hg->heads * hg->sectors); + hg->cylinders = cylinders; + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) + hg->cylinders = 0xffff; return 0; } diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Wed Jun 07 11:03:51 2006 +0100 @@ -140,6 +140,7 @@ extern int blkif_release(struct inode *i extern int blkif_release(struct inode *inode, struct file *filep); extern int blkif_ioctl(struct inode *inode, struct file *filep, unsigned command, unsigned long argument); +extern int blkif_getgeo(struct block_device *, struct hd_geometry *); extern int blkif_check(dev_t dev); extern int blkif_revalidate(dev_t dev); extern void do_blkif_request (request_queue_t *rq); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Wed Jun 07 11:03:51 2006 +0100 @@ -91,6 +91,7 @@ static struct block_device_operations xl .open = blkif_open, .release = blkif_release, .ioctl = blkif_ioctl, + .getgeo = blkif_getgeo }; DEFINE_SPINLOCK(blkif_io_lock); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/core/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile Wed Jun 07 11:03:51 2006 +0100 @@ -2,11 +2,13 @@ # Makefile for the linux kernel. # -obj-y := evtchn.o reboot.o gnttab.o features.o +obj-y := evtchn.o gnttab.o features.o -obj-$(CONFIG_PROC_FS) += xen_proc.o -obj-$(CONFIG_NET) += skbuff.o -obj-$(CONFIG_SMP) += smpboot.o -obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o -obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o +obj-$(CONFIG_PROC_FS) += xen_proc.o +obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o +obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o +obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o +obj-$(CONFIG_IA64) += xenia64_init.o +obj-$(CONFIG_XEN_SKBUFF) += skbuff.o +obj-$(CONFIG_XEN_REBOOT) += reboot.o +obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/core/smpboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c Wed Jun 07 11:03:51 2006 +0100 @@ -89,9 +89,8 @@ void __init prefill_possible_map(void) for (i = 0; i < NR_CPUS; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc == -ENOENT) - break; - cpu_set(i, cpu_possible_map); + if (rc >= 0) + cpu_set(i, cpu_possible_map); } } @@ -209,7 +208,7 @@ void cpu_initialize_context(unsigned int ctxt.failsafe_callback_cs = __KERNEL_CS; ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; - ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); #else /* __x86_64__ */ ctxt.user_regs.cs = __KERNEL_CS; ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); @@ -221,7 +220,7 @@ void cpu_initialize_context(unsigned int ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; ctxt.syscall_callback_eip = (unsigned long)system_call; - ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT; + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); #endif diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/netback/loopback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c Wed Jun 07 11:03:51 2006 +0100 @@ -146,11 +146,13 @@ static void loopback_construct(struct ne dev->hard_start_xmit = loopback_start_xmit; dev->get_stats = loopback_get_stats; dev->set_multicast_list = loopback_set_multicast_list; + dev->change_mtu = NULL; /* allow arbitrary mtu */ dev->tx_queue_len = 0; dev->features = (NETIF_F_HIGHDMA | NETIF_F_LLTX | + NETIF_F_SG | NETIF_F_IP_CSUM); SET_ETHTOOL_OPS(dev, &network_ethtool_ops); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Wed Jun 07 11:03:51 2006 +0100 @@ -458,6 +458,9 @@ inline static void net_tx_action_dealloc dc = dealloc_cons; dp = dealloc_prod; + /* Ensure we see all indexes enqueued by netif_idx_release(). */ + smp_rmb(); + /* * Free up any grants we have finished using */ @@ -487,6 +490,177 @@ inline static void net_tx_action_dealloc } } +static void netbk_tx_err(netif_t *netif, RING_IDX end) +{ + RING_IDX cons = netif->tx.req_cons; + + do { + netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons); + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + } while (++cons < end); + netif->tx.req_cons = cons; + netif_schedule_work(netif); + netif_put(netif); +} + +static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp, + int work_to_do) +{ + netif_tx_request_t *first = txp; + RING_IDX cons = netif->tx.req_cons; + int frags = 1; + + while (txp->flags & NETTXF_more_data) { + if (frags >= work_to_do) { + DPRINTK("Need more frags\n"); + return -frags; + } + + txp = RING_GET_REQUEST(&netif->tx, cons + frags); + if (txp->size > first->size) { + DPRINTK("Frags galore\n"); + return -frags; + } + + first->size -= txp->size; + frags++; + + if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { + DPRINTK("txp->offset: %x, size: %u\n", + txp->offset, txp->size); + return -frags; + } + } + + return frags; +} + +static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, + struct sk_buff *skb, + gnttab_map_grant_ref_t *mop) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; + netif_tx_request_t *txp; + unsigned long pending_idx = *((u16 *)skb->data); + RING_IDX cons = netif->tx.req_cons + 1; + int i, start; + + /* Skip first skb fragment if it is on same page as header fragment. */ + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < shinfo->nr_frags; i++) { + txp = RING_GET_REQUEST(&netif->tx, cons++); + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; + + gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txp->gref, netif->domid); + + memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); + netif_get(netif); + pending_tx_info[pending_idx].netif = netif; + frags[i].page = (void *)pending_idx; + } + + return mop; +} + +static int netbk_tx_check_mop(struct sk_buff *skb, + gnttab_map_grant_ref_t **mopp) +{ + gnttab_map_grant_ref_t *mop = *mopp; + int pending_idx = *((u16 *)skb->data); + netif_t *netif = pending_tx_info[pending_idx].netif; + netif_tx_request_t *txp; + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + int i, err, start; + + /* Check status of header. */ + err = mop->status; + if (unlikely(err)) { + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + netif_put(netif); + } else { + set_phys_to_machine( + __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); + grant_tx_handle[pending_idx] = mop->handle; + } + + /* Skip first skb fragment if it is on same page as header fragment. */ + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < nr_frags; i++) { + int j, newerr; + + pending_idx = (unsigned long)shinfo->frags[i].page; + + /* Check error status: if okay then remember grant handle. */ + newerr = (++mop)->status; + if (likely(!newerr)) { + set_phys_to_machine( + __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); + grant_tx_handle[pending_idx] = mop->handle; + /* Had a previous error? Invalidate this fragment. */ + if (unlikely(err)) + netif_idx_release(pending_idx); + continue; + } + + /* Error on this fragment: respond to client with an error. */ + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + netif_put(netif); + + /* Not the first error? Preceding frags already invalidated. */ + if (err) + continue; + + /* First error: invalidate header and preceding fragments. */ + pending_idx = *((u16 *)skb->data); + netif_idx_release(pending_idx); + for (j = start; j < i; j++) { + pending_idx = (unsigned long)shinfo->frags[i].page; + netif_idx_release(pending_idx); + } + + /* Remember the error: invalidate all subsequent fragments. */ + err = newerr; + } + + *mopp = mop + 1; + return err; +} + +static void netbk_fill_frags(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + int i; + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = shinfo->frags + i; + netif_tx_request_t *txp; + unsigned long pending_idx; + + pending_idx = (unsigned long)frag->page; + txp = &pending_tx_info[pending_idx].req; + frag->page = virt_to_page(MMAP_VADDR(pending_idx)); + frag->size = txp->size; + frag->page_offset = txp->offset; + + skb->len += txp->size; + skb->data_len += txp->size; + skb->truesize += txp->size; + } +} + /* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { @@ -504,7 +678,7 @@ static void net_tx_action(unsigned long net_tx_action_dealloc(); mop = tx_map_ops; - while ((NR_PENDING_REQS < MAX_PENDING_REQS) && + while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && !list_empty(&net_schedule_list)) { /* Get a netif from the list with work to do. */ ent = net_schedule_list.next; @@ -552,38 +726,44 @@ static void net_tx_action(unsigned long } netif->remaining_credit -= txreq.size; - netif->tx.req_cons++; - - netif_schedule_work(netif); - - if (unlikely(txreq.size < ETH_HLEN) || - unlikely(txreq.size > ETH_FRAME_LEN)) { + ret = netbk_count_requests(netif, &txreq, work_to_do); + if (unlikely(ret < 0)) { + netbk_tx_err(netif, i - ret); + continue; + } + i += ret; + + if (unlikely(ret > MAX_SKB_FRAGS + 1)) { + DPRINTK("Too many frags\n"); + netbk_tx_err(netif, i); + continue; + } + + if (unlikely(txreq.size < ETH_HLEN)) { DPRINTK("Bad packet size: %d\n", txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); continue; } /* No crossing a page as the payload mustn't fragment. */ - if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { + if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, (txreq.offset &~PAGE_MASK) + txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); continue; } pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; - data_len = (txreq.size > PKT_PROT_LEN) ? + data_len = (txreq.size > PKT_PROT_LEN && + ret < MAX_SKB_FRAGS + 1) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len+16, GFP_ATOMIC); if (unlikely(skb == NULL)) { DPRINTK("Can't allocate a skb in start_xmit.\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); break; } @@ -600,9 +780,23 @@ static void net_tx_action(unsigned long pending_tx_info[pending_idx].netif = netif; *((u16 *)skb->data) = pending_idx; + __skb_put(skb, data_len); + + skb_shinfo(skb)->nr_frags = ret - 1; + if (data_len < txreq.size) { + skb_shinfo(skb)->nr_frags++; + skb_shinfo(skb)->frags[0].page = + (void *)(unsigned long)pending_idx; + } + __skb_queue_tail(&tx_queue, skb); pending_cons++; + + mop = netbk_get_requests(netif, skb, mop); + + netif->tx.req_cons = i; + netif_schedule_work(netif); if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) break; @@ -617,75 +811,56 @@ static void net_tx_action(unsigned long mop = tx_map_ops; while ((skb = __skb_dequeue(&tx_queue)) != NULL) { + netif_tx_request_t *txp; + pending_idx = *((u16 *)skb->data); netif = pending_tx_info[pending_idx].netif; - memcpy(&txreq, &pending_tx_info[pending_idx].req, - sizeof(txreq)); + txp = &pending_tx_info[pending_idx].req; /* Check the remap error code. */ - if (unlikely(mop->status)) { + if (unlikely(netbk_tx_check_mop(skb, &mop))) { printk(KERN_ALERT "#### netback grant fails\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); - mop++; - pending_ring[MASK_PEND_IDX(pending_prod++)] = - pending_idx; continue; } - set_phys_to_machine( - __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, - FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); - grant_tx_handle[pending_idx] = mop->handle; - - data_len = (txreq.size > PKT_PROT_LEN) ? - PKT_PROT_LEN : txreq.size; - - __skb_put(skb, data_len); + + data_len = skb->len; memcpy(skb->data, - (void *)(MMAP_VADDR(pending_idx)|txreq.offset), + (void *)(MMAP_VADDR(pending_idx)|txp->offset), data_len); - if (data_len < txreq.size) { + if (data_len < txp->size) { /* Append the packet payload as a fragment. */ - skb_shinfo(skb)->frags[0].page = - virt_to_page(MMAP_VADDR(pending_idx)); - skb_shinfo(skb)->frags[0].size = - txreq.size - data_len; - skb_shinfo(skb)->frags[0].page_offset = - txreq.offset + data_len; - skb_shinfo(skb)->nr_frags = 1; + txp->offset += data_len; + txp->size -= data_len; } else { /* Schedule a response immediately. */ netif_idx_release(pending_idx); } - - skb->data_len = txreq.size - data_len; - skb->len += skb->data_len; - skb->truesize += skb->data_len; - - skb->dev = netif->dev; - skb->protocol = eth_type_trans(skb, skb->dev); /* * Old frontends do not assert data_validated but we * can infer it from csum_blank so test both flags. */ - if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) { + if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->proto_data_valid = 1; } else { skb->ip_summed = CHECKSUM_NONE; skb->proto_data_valid = 0; } - skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank); - - netif->stats.rx_bytes += txreq.size; + skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank); + + netbk_fill_frags(skb); + + skb->dev = netif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); + + netif->stats.rx_bytes += skb->len; netif->stats.rx_packets++; netif_rx(skb); netif->dev->last_rx = jiffies; - - mop++; } } @@ -695,7 +870,10 @@ static void netif_idx_release(u16 pendin unsigned long flags; spin_lock_irqsave(&_lock, flags); - dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; + dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx; + /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ + smp_wmb(); + dealloc_prod++; spin_unlock_irqrestore(&_lock, flags); tasklet_schedule(&net_tx_tasklet); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Wed Jun 07 11:03:51 2006 +0100 @@ -69,6 +69,8 @@ static int netback_probe(struct xenbus_d static int netback_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { + const char *message; + xenbus_transaction_t xbt; int err; struct backend_info *be = kzalloc(sizeof(struct backend_info), GFP_KERNEL); @@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d if (err) goto fail; + do { + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto fail; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + } while (err == -EAGAIN); + + if (err) { + xenbus_dev_fatal(dev, err, "completing transaction"); + goto fail; + } + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) { goto fail; @@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d return 0; +abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); fail: DPRINTK("failed"); netback_remove(dev); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Wed Jun 07 11:03:51 2006 +0100 @@ -45,6 +45,7 @@ #include <linux/bitops.h> #include <linux/ethtool.h> #include <linux/in.h> +#include <linux/if_ether.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/arp.h> @@ -173,6 +174,11 @@ static void xennet_sysfs_delif(struct ne #define xennet_sysfs_delif(dev) do { } while(0) #endif +static inline int xennet_can_sg(struct net_device *dev) +{ + return dev->features & NETIF_F_SG; +} + /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and @@ -307,8 +313,6 @@ again: goto destroy_ring; } - xenbus_switch_state(dev, XenbusStateConnected); - return 0; abort_transaction: @@ -370,12 +374,9 @@ static int setup_device(struct xenbus_de goto fail; memcpy(netdev->dev_addr, info->mac, ETH_ALEN); - network_connect(netdev); info->irq = bind_evtchn_to_irqhandler( info->evtchn, netif_int, SA_SAMPLE_RANDOM, netdev->name, netdev); - (void)send_fake_arp(netdev); - show_device(info); return 0; @@ -391,15 +392,24 @@ static void backend_changed(struct xenbu static void backend_changed(struct xenbus_device *dev, enum xenbus_state backend_state) { + struct netfront_info *np = dev->data; + struct net_device *netdev = np->netdev; + DPRINTK("\n"); switch (backend_state) { case XenbusStateInitialising: - case XenbusStateInitWait: case XenbusStateInitialised: case XenbusStateConnected: case XenbusStateUnknown: case XenbusStateClosed: + break; + + case XenbusStateInitWait: + network_connect(netdev); + xenbus_switch_state(dev, XenbusStateConnected); + (void)send_fake_arp(netdev); + show_device(np); break; case XenbusStateClosing: @@ -452,13 +462,17 @@ static int network_open(struct net_devic return 0; } +static inline int netfront_tx_slot_available(struct netfront_info *np) +{ + return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1; +} + static inline void network_maybe_wake_tx(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); if (unlikely(netif_queue_stopped(dev)) && - !RING_FULL(&np->tx) && - !gnttab_empty_grant_references(&np->gref_tx_head) && + netfront_tx_slot_available(np) && likely(netif_running(dev))) netif_wake_queue(dev); } @@ -485,7 +499,7 @@ static void network_tx_buf_gc(struct net printk(KERN_ALERT "network_tx_buf_gc: warning " "-- grant still in use by backend " "domain.\n"); - break; /* bail immediately */ + BUG(); } gnttab_end_foreign_access_ref( np->grant_tx_ref[id], GNTMAP_readonly); @@ -638,36 +652,95 @@ static void network_alloc_rx_buffers(str RING_PUSH_REQUESTS(&np->rx); } +static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, + struct netif_tx_request *tx) +{ + struct netfront_info *np = netdev_priv(dev); + char *data = skb->data; + unsigned long mfn; + RING_IDX prod = np->tx.req_prod_pvt; + int frags = skb_shinfo(skb)->nr_frags; + unsigned int offset = offset_in_page(data); + unsigned int len = skb_headlen(skb); + unsigned int id; + grant_ref_t ref; + int i; + + while (len > PAGE_SIZE - offset) { + tx->size = PAGE_SIZE - offset; + tx->flags |= NETTXF_more_data; + len -= tx->size; + data += tx->size; + offset = 0; + + id = get_id_from_freelist(np->tx_skbs); + np->tx_skbs[id] = skb_get(skb); + tx = RING_GET_REQUEST(&np->tx, prod++); + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + mfn = virt_to_mfn(data); + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; + tx->flags = 0; + } + + for (i = 0; i < frags; i++) { + skb_frag_t *frag = skb_shinfo(skb)->frags + i; + + tx->flags |= NETTXF_more_data; + + id = get_id_from_freelist(np->tx_skbs); + np->tx_skbs[id] = skb_get(skb); + tx = RING_GET_REQUEST(&np->tx, prod++); + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + mfn = pfn_to_mfn(page_to_pfn(frag->page)); + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = frag->page_offset; + tx->size = frag->size; + tx->flags = 0; + } + + np->tx.req_prod_pvt = prod; +} static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned short id; struct netfront_info *np = netdev_priv(dev); struct netif_tx_request *tx; + char *data = skb->data; RING_IDX i; grant_ref_t ref; unsigned long mfn; int notify; - - if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= - PAGE_SIZE)) { - struct sk_buff *nskb; - nskb = __dev_alloc_skb(skb->len, GFP_ATOMIC|__GFP_NOWARN); - if (unlikely(nskb == NULL)) - goto drop; - skb_put(nskb, skb->len); - memcpy(nskb->data, skb->data, skb->len); - /* Copy only the header fields we use in this driver. */ - nskb->dev = skb->dev; - nskb->ip_summed = skb->ip_summed; - nskb->proto_data_valid = skb->proto_data_valid; - dev_kfree_skb(skb); - skb = nskb; + int frags = skb_shinfo(skb)->nr_frags; + unsigned int offset = offset_in_page(data); + unsigned int len = skb_headlen(skb); + + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; + if (unlikely(frags > MAX_SKB_FRAGS + 1)) { + printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", + frags); + dump_stack(); + goto drop; } spin_lock_irq(&np->tx_lock); - if (unlikely(!netif_carrier_ok(dev))) { + if (unlikely(!netif_carrier_ok(dev) || + (frags > 1 && !xennet_can_sg(dev)))) { spin_unlock_irq(&np->tx_lock); goto drop; } @@ -682,12 +755,12 @@ static int network_start_xmit(struct sk_ tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); - mfn = virt_to_mfn(skb->data); + mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref( ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; - tx->offset = (unsigned long)skb->data & ~PAGE_MASK; - tx->size = skb->len; + tx->offset = offset; + tx->size = len; tx->flags = 0; if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ @@ -696,14 +769,17 @@ static int network_start_xmit(struct sk_ tx->flags |= NETTXF_data_validated; np->tx.req_prod_pvt = i + 1; + + xennet_make_frags(skb, dev, tx); + tx->size = skb->len; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); if (notify) notify_remote_via_irq(np->irq); network_tx_buf_gc(dev); - if (RING_FULL(&np->tx) || - gnttab_empty_grant_references(&np->gref_tx_head)) + if (!netfront_tx_slot_available(np)) netif_stop_queue(dev); spin_unlock_irq(&np->tx_lock); @@ -963,12 +1039,46 @@ static struct net_device_stats *network_ return &np->stats; } +static int xennet_change_mtu(struct net_device *dev, int mtu) +{ + int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; + dev->mtu = mtu; + return 0; +} + +static int xennet_set_sg(struct net_device *dev, u32 data) +{ + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NULL, np->xbdev->otherend, "feature-sg", + "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } else if (dev->mtu > ETH_DATA_LEN) + dev->mtu = ETH_DATA_LEN; + + return ethtool_op_set_sg(dev, data); +} + +static void xennet_set_features(struct net_device *dev) +{ + xennet_set_sg(dev, 1); +} + static void network_connect(struct net_device *dev) { struct netfront_info *np; int i, requeue_idx; struct netif_tx_request *tx; struct sk_buff *skb; + + xennet_set_features(dev); np = netdev_priv(dev); spin_lock_irq(&np->tx_lock); @@ -1081,6 +1191,8 @@ static struct ethtool_ops network_ethtoo { .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = xennet_set_sg, }; #ifdef CONFIG_SYSFS @@ -1297,6 +1409,7 @@ static struct net_device * __devinit cre netdev->poll = netif_poll; netdev->set_multicast_list = network_set_multicast_list; netdev->uninit = netif_uninit; + netdev->change_mtu = xennet_change_mtu; netdev->weight = 64; netdev->features = NETIF_F_IP_CSUM; diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Jun 07 11:03:51 2006 +0100 @@ -61,11 +61,11 @@ static int privcmd_ioctl(struct inode *i __asm__ __volatile__ ( "pushl %%ebx; pushl %%ecx; pushl %%edx; " "pushl %%esi; pushl %%edi; " - "movl 4(%%eax),%%ebx ;" - "movl 8(%%eax),%%ecx ;" - "movl 12(%%eax),%%edx ;" - "movl 16(%%eax),%%esi ;" - "movl 20(%%eax),%%edi ;" + "movl 8(%%eax),%%ebx ;" + "movl 16(%%eax),%%ecx ;" + "movl 24(%%eax),%%edx ;" + "movl 32(%%eax),%%esi ;" + "movl 40(%%eax),%%edi ;" "movl (%%eax),%%eax ;" "shll $5,%%eax ;" "addl $hypercall_page,%%eax ;" @@ -161,7 +161,7 @@ static int privcmd_ioctl(struct inode *i case IOCTL_PRIVCMD_MMAPBATCH: { privcmd_mmapbatch_t m; struct vm_area_struct *vma = NULL; - unsigned long __user *p; + xen_pfn_t __user *p; unsigned long addr, mfn; int i; @@ -210,7 +210,7 @@ static int privcmd_ioctl(struct inode *i batch_err: printk("batch_err ret=%d vma=%p addr=%lx " "num=%d arr=%p %lx-%lx\n", - ret, vma, m.addr, m.num, m.arr, + ret, vma, (unsigned long)m.addr, m.num, m.arr, vma ? vma->vm_start : 0, vma ? vma->vm_end : 0); break; } diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Wed Jun 07 11:03:51 2006 +0100 @@ -116,10 +116,12 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" __asm__ ( \ "movl %%cr3,%0\n\t" \ :"=r" (__dummy)); \ - machine_to_phys(__dummy); \ + __dummy = xen_cr3_to_pfn(__dummy); \ + mfn_to_pfn(__dummy) << PAGE_SHIFT; \ }) #define write_cr3(x) ({ \ - maddr_t __dummy = phys_to_machine(x); \ + unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \ + __dummy = xen_pfn_to_cr3(__dummy); \ __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \ }) diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Wed Jun 07 11:03:51 2006 +0100 @@ -61,13 +61,6 @@ static void __init machine_specific_arch .address = { __KERNEL_CS, (unsigned long)nmi }, }; - if (xen_feature(XENFEAT_auto_translated_physmap) && - xen_start_info->shared_info < xen_start_info->nr_pages) { - HYPERVISOR_shared_info = - (shared_info_t *)__va(xen_start_info->shared_info); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } - ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); if (ret == 0) ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/include/xen/public/privcmd.h --- a/linux-2.6-xen-sparse/include/xen/public/privcmd.h Wed Jun 07 11:03:15 2006 +0100 +++ b/linux-2.6-xen-sparse/include/xen/public/privcmd.h Wed Jun 07 11:03:51 2006 +0100 @@ -33,20 +33,22 @@ #ifndef __LINUX_PUBLIC_PRIVCMD_H__ #define __LINUX_PUBLIC_PRIVCMD_H__ +#include <linux/types.h> + #ifndef __user #define __user #endif typedef struct privcmd_hypercall { - unsigned long op; - unsigned long arg[5]; + __u64 op; + __u64 arg[5]; } privcmd_hypercall_t; typedef struct privcmd_mmap_entry { - unsigned long va; - unsigned long mfn; - unsigned long npages; + __u64 va; + __u64 mfn; + __u64 npages; } privcmd_mmap_entry_t; typedef struct privcmd_mmap { @@ -58,8 +60,8 @@ typedef struct privcmd_mmapbatch { typedef struct privcmd_mmapbatch { int num; /* number of pages to populate */ domid_t dom; /* target domain */ - unsigned long addr; /* virtual address */ - unsigned long __user *arr; /* array of mfns - top nibble set on err */ + __u64 addr; /* virtual address */ + xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ } privcmd_mmapbatch_t; /* diff -r b09dbe439169 -r 9d86c1a70f34 tools/debugger/libxendebug/xendebug.c --- a/tools/debugger/libxendebug/xendebug.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/debugger/libxendebug/xendebug.c Wed Jun 07 11:03:51 2006 +0100 @@ -57,7 +57,7 @@ typedef struct domain_context vcpu_guest_context_t context[MAX_VIRT_CPUS]; long total_pages; - unsigned long *page_array; + xen_pfn_t *page_array; unsigned long cr3_phys[MAX_VIRT_CPUS]; unsigned long *cr3_virt[MAX_VIRT_CPUS]; @@ -346,8 +346,9 @@ xendebug_memory_page (domain_context_p c ctxt->cr3_phys[vcpu] = vcpu_ctxt->ctrlreg[3]; if ( ctxt->cr3_virt[vcpu] ) munmap(ctxt->cr3_virt[vcpu], PAGE_SIZE); - ctxt->cr3_virt[vcpu] = xc_map_foreign_range(xc_handle, ctxt->domid, - PAGE_SIZE, PROT_READ, ctxt->cr3_phys[vcpu] >> PAGE_SHIFT); + ctxt->cr3_virt[vcpu] = xc_map_foreign_range( + xc_handle, ctxt->domid, PAGE_SIZE, PROT_READ, + xen_cr3_to_pfn(ctxt->cr3_phys[vcpu])); if ( ctxt->cr3_virt[vcpu] == NULL ) return 0; } diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/hvmloader/Makefile --- a/tools/firmware/hvmloader/Makefile Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/firmware/hvmloader/Makefile Wed Jun 07 11:03:51 2006 +0100 @@ -51,8 +51,8 @@ hvmloader: roms.h hvmloader.c acpi_madt. $(OBJCOPY) hvmloader.tmp hvmloader rm -f hvmloader.tmp -roms.h: ../rombios/BIOS-bochs-8-processors ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin - sh ./mkhex rombios ../rombios/BIOS-bochs-8-processors > roms.h +roms.h: ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin + sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h sh ./mkhex vmxassist ../vmxassist/vmxassist.bin >> roms.h diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/rombios/Makefile --- a/tools/firmware/rombios/Makefile Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/firmware/rombios/Makefile Wed Jun 07 11:03:51 2006 +0100 @@ -1,7 +1,7 @@ -#BIOS_BUILDS = BIOS-bochs-latest +BIOS_BUILDS = BIOS-bochs-latest #BIOS_BUILDS += BIOS-bochs-2-processors #BIOS_BUILDS += BIOS-bochs-4-processors -BIOS_BUILDS += BIOS-bochs-8-processors +#BIOS_BUILDS += BIOS-bochs-8-processors .PHONY: all all: bios diff -r b09dbe439169 -r 9d86c1a70f34 tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/firmware/vmxassist/vm86.c Wed Jun 07 11:03:51 2006 +0100 @@ -36,6 +36,8 @@ static unsigned prev_eip = 0; enum vm86_mode mode = 0; + +static struct regs saved_rm_regs; #ifdef DEBUG int traceset = 0; @@ -795,6 +797,8 @@ protected_mode(struct regs *regs) oldctx.esp = regs->uesp; oldctx.eflags = regs->eflags; + memset(&saved_rm_regs, 0, sizeof(struct regs)); + /* reload all segment registers */ if (!load_seg(regs->cs, &oldctx.cs_base, &oldctx.cs_limit, &oldctx.cs_arbytes)) @@ -808,6 +812,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.es_base, &oldctx.es_limit, &oldctx.es_arbytes); oldctx.es_sel = 0; + saved_rm_regs.ves = regs->ves; } if (load_seg(regs->uss, &oldctx.ss_base, @@ -817,6 +822,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.ss_base, &oldctx.ss_limit, &oldctx.ss_arbytes); oldctx.ss_sel = 0; + saved_rm_regs.uss = regs->uss; } if (load_seg(regs->vds, &oldctx.ds_base, @@ -826,6 +832,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.ds_base, &oldctx.ds_limit, &oldctx.ds_arbytes); oldctx.ds_sel = 0; + saved_rm_regs.vds = regs->vds; } if (load_seg(regs->vfs, &oldctx.fs_base, @@ -835,6 +842,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.fs_base, &oldctx.fs_limit, &oldctx.fs_arbytes); oldctx.fs_sel = 0; + saved_rm_regs.vfs = regs->vfs; } if (load_seg(regs->vgs, &oldctx.gs_base, @@ -844,6 +852,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.gs_base, &oldctx.gs_limit, &oldctx.gs_arbytes); oldctx.gs_sel = 0; + saved_rm_regs.vgs = regs->vgs; } /* initialize jump environment to warp back to protected mode */ @@ -880,16 +889,22 @@ real_mode(struct regs *regs) if (regs->uss >= HIGHMEM) panic("%%ss 0x%lx higher than 1MB", regs->uss); regs->uss = address(regs, regs->uss, 0) >> 4; + } else { + regs->uss = saved_rm_regs.uss; } if (regs->vds != 0) { if (regs->vds >= HIGHMEM) panic("%%ds 0x%lx higher than 1MB", regs->vds); regs->vds = address(regs, regs->vds, 0) >> 4; + } else { + regs->vds = saved_rm_regs.vds; } if (regs->ves != 0) { if (regs->ves >= HIGHMEM) panic("%%es 0x%lx higher than 1MB", regs->ves); regs->ves = address(regs, regs->ves, 0) >> 4; + } else { + regs->ves = saved_rm_regs.ves; } /* this should get us into 16-bit mode */ @@ -971,6 +986,39 @@ jmpl(struct regs *regs, int prefix) } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */ eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs); cs = fetch16(regs); + + TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); + + regs->cs = cs; + regs->eip = eip; + set_mode(regs, VM86_REAL); + } else + panic("jmpl"); +} + +static void +jmpl_indirect(struct regs *regs, int prefix, unsigned modrm) +{ + unsigned n = regs->eip; + unsigned cs, eip; + unsigned addr; + + addr = operand(prefix, regs, modrm); + + if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */ + eip = (prefix & DATA32) ? read32(addr) : read16(addr); + addr += (prefix & DATA32) ? 4 : 2; + cs = read16(addr); + + TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); + + regs->cs = cs; + regs->eip = eip; + set_mode(regs, VM86_PROTECTED); + } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */ + eip = (prefix & DATA32) ? read32(addr) : read16(addr); + addr += (prefix & DATA32) ? 4 : 2; + cs = read16(addr); TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); @@ -1306,6 +1354,23 @@ opcode(struct regs *regs) } goto invalid; + case 0xFF: /* jmpl (indirect) */ + if ((mode == VM86_REAL_TO_PROTECTED) || + (mode == VM86_PROTECTED_TO_REAL)) { + unsigned modrm = fetch8(regs); + + switch((modrm >> 3) & 7) { + case 5: + jmpl_indirect(regs, prefix, modrm); + return OPC_INVALID; + + default: + break; + } + + } + goto invalid; + case 0xEB: /* short jump */ if ((mode == VM86_REAL_TO_PROTECTED) || (mode == VM86_PROTECTED_TO_REAL)) { diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/cirrus_vga.c --- a/tools/ioemu/hw/cirrus_vga.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/ioemu/hw/cirrus_vga.c Wed Jun 07 11:03:51 2006 +0100 @@ -2460,10 +2460,9 @@ static CPUWriteMemoryFunc *cirrus_linear }; extern FILE *logfile; -#if defined(__i386__) || defined (__x86_64__) static void * set_vram_mapping(unsigned long begin, unsigned long end) { - unsigned long * extent_start = NULL; + xen_pfn_t *extent_start = NULL; unsigned long nr_extents; void *vram_pointer = NULL; int i; @@ -2474,14 +2473,14 @@ static void * set_vram_mapping(unsigned end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; nr_extents = (end - begin) >> TARGET_PAGE_BITS; - extent_start = malloc(sizeof(unsigned long) * nr_extents ); + extent_start = malloc(sizeof(xen_pfn_t) * nr_extents ); if (extent_start == NULL) { fprintf(stderr, "Failed malloc on set_vram_mapping\n"); return NULL; } - memset(extent_start, 0, sizeof(unsigned long) * nr_extents); + memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents); for (i = 0; i < nr_extents; i++) { @@ -2509,7 +2508,7 @@ static void * set_vram_mapping(unsigned static int unset_vram_mapping(unsigned long begin, unsigned long end) { - unsigned long * extent_start = NULL; + xen_pfn_t *extent_start = NULL; unsigned long nr_extents; int i; @@ -2520,7 +2519,7 @@ static int unset_vram_mapping(unsigned l end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; nr_extents = (end - begin) >> TARGET_PAGE_BITS; - extent_start = malloc(sizeof(unsigned long) * nr_extents ); + extent_start = malloc(sizeof(xen_pfn_t) * nr_extents ); if (extent_start == NULL) { @@ -2528,7 +2527,7 @@ static int unset_vram_mapping(unsigned l return -1; } - memset(extent_start, 0, sizeof(unsigned long) * nr_extents); + memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents); for (i = 0; i < nr_extents; i++) extent_start[i] = (begin + (i * TARGET_PAGE_SIZE)) >> TARGET_PAGE_BITS; @@ -2540,10 +2539,6 @@ static int unset_vram_mapping(unsigned l return 0; } -#elif defined(__ia64__) -static void * set_vram_mapping(unsigned long addr, unsigned long end) {} -static int unset_vram_mapping(unsigned long addr, unsigned long end) {} -#endif extern int vga_accelerate; /* Compute the memory access functions */ diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/ioemu/hw/pc.c Wed Jun 07 11:03:51 2006 +0100 @@ -537,8 +537,11 @@ void pc_init(uint64_t ram_size, int vga_ for(i = 0; i < MAX_SERIAL_PORTS; i++) { if (serial_hds[i]) { sp = serial_init(serial_io[i], serial_irq[i], serial_hds[i]); - if (i == SUMMA_PORT) + if (i == serial_summa_port) { summa_init(sp, serial_hds[i]); + fprintf(stderr, "Serial port %d (COM%d) initialized for Summagraphics\n", + i, i+1); + } } } diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/hw/vga.c --- a/tools/ioemu/hw/vga.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/ioemu/hw/vga.c Wed Jun 07 11:03:51 2006 +0100 @@ -1995,6 +1995,7 @@ void vga_common_init(VGAState *s, Displa s->get_resolution = vga_get_resolution; /* XXX: currently needed for display */ vga_state = s; + vga_bios_init(s); } @@ -2082,7 +2083,6 @@ int vga_initialize(PCIBus *bus, DisplayS #endif } - vga_bios_init(s); return 0; } diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/ioemu/vl.c Wed Jun 07 11:03:51 2006 +0100 @@ -146,6 +146,7 @@ int repeat_key = 1; int repeat_key = 1; TextConsole *vga_console; CharDriverState *serial_hds[MAX_SERIAL_PORTS]; +int serial_summa_port = -1; int xc_handle; time_t timeoffset = 0; @@ -2457,7 +2458,7 @@ int unset_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err = 0; xc_dominfo_t info; @@ -2490,7 +2491,7 @@ int set_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { xc_dominfo_t info; int err = 0; @@ -2498,7 +2499,7 @@ int set_mm_mapping(int xc_handle, xc_domain_getinfo(xc_handle, domid, 1, &info); if ( xc_domain_setmaxmem(xc_handle, domid, - (info.nr_pages + nr_pages) * PAGE_SIZE/1024) != 0) + info.max_memkb + nr_pages * PAGE_SIZE/1024) !=0) { fprintf(logfile, "set maxmem returned error %d\n", errno); return -1; @@ -2556,7 +2557,8 @@ int main(int argc, char **argv) int serial_device_index; char qemu_dm_logfilename[64]; const char *loadvm = NULL; - unsigned long nr_pages, *page_array; + unsigned long nr_pages; + xen_pfn_t *page_array; extern void *shared_page; #if !defined(CONFIG_SOFTMMU) @@ -2588,8 +2590,8 @@ int main(int argc, char **argv) pstrcpy(monitor_device, sizeof(monitor_device), "vc"); pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "vc"); - pstrcpy(serial_devices[1], sizeof(serial_devices[1]), "null"); - for(i = 2; i < MAX_SERIAL_PORTS; i++) + serial_summa_port = -1; + for(i = 1; i < MAX_SERIAL_PORTS; i++) serial_devices[i][0] = '\0'; serial_device_index = 0; @@ -3022,8 +3024,8 @@ int main(int argc, char **argv) xc_handle = xc_interface_open(); - if ( (page_array = (unsigned long *) - malloc(nr_pages * sizeof(unsigned long))) == NULL) + if ( (page_array = (xen_pfn_t *) + malloc(nr_pages * sizeof(xen_pfn_t))) == NULL) { fprintf(logfile, "malloc returned error %d\n", errno); exit(-1); @@ -3078,8 +3080,8 @@ int main(int argc, char **argv) page_array[0]); #endif - fprintf(logfile, "shared page at pfn:%lx, mfn: %lx\n", (nr_pages-1), - (page_array[nr_pages - 1])); + fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", (nr_pages-1), + (uint64_t)(page_array[nr_pages - 1])); /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); @@ -3173,6 +3175,20 @@ int main(int argc, char **argv) } monitor_init(monitor_hd, !nographic); + /* Find which port should be the Summagraphics port */ + /* It's the first unspecified serial line. Note that COM1 is set */ + /* by default, so the Summagraphics port would be COM2 or higher */ + + for(i = 0; i < MAX_SERIAL_PORTS; i++) { + if (serial_devices[i][0] != '\0') + continue; + serial_summa_port = i; + pstrcpy(serial_devices[serial_summa_port], sizeof(serial_devices[0]), "null"); + break; + } + + /* Now, open the ports */ + for(i = 0; i < MAX_SERIAL_PORTS; i++) { if (serial_devices[i][0] != '\0') { serial_hds[i] = qemu_chr_open(serial_devices[i]); diff -r b09dbe439169 -r 9d86c1a70f34 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/ioemu/vl.h Wed Jun 07 11:03:51 2006 +0100 @@ -238,9 +238,9 @@ void console_select(unsigned int index); /* serial ports */ #define MAX_SERIAL_PORTS 4 -#define SUMMA_PORT 1 extern CharDriverState *serial_hds[MAX_SERIAL_PORTS]; +extern int serial_summa_port; /* network redirectors support */ diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_core.c Wed Jun 07 11:03:51 2006 +0100 @@ -28,7 +28,7 @@ xc_domain_dumpcore_via_callback(int xc_h dumpcore_rtn_t dump_rtn) { unsigned long nr_pages; - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; xc_dominfo_t info; int i, nr_vcpus = 0; char *dump_mem, *dump_mem_start = NULL; @@ -70,7 +70,7 @@ xc_domain_dumpcore_via_callback(int xc_h sizeof(vcpu_guest_context_t)*nr_vcpus; dummy_len = (sizeof(struct xc_core_header) + (sizeof(vcpu_guest_context_t) * nr_vcpus) + - (nr_pages * sizeof(unsigned long))); + (nr_pages * sizeof(xen_pfn_t))); header.xch_pages_offset = round_pgup(dummy_len); sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header)); @@ -81,7 +81,7 @@ xc_domain_dumpcore_via_callback(int xc_h if ( sts != 0 ) goto error_out; - if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) + if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ) { printf("Could not allocate memory\n"); goto error_out; @@ -91,7 +91,7 @@ xc_domain_dumpcore_via_callback(int xc_h printf("Could not get the page frame list\n"); goto error_out; } - sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(unsigned long)); + sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(xen_pfn_t)); if ( sts != 0 ) goto error_out; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_domain.c Wed Jun 07 11:03:51 2006 +0100 @@ -291,7 +291,7 @@ int xc_domain_memory_increase_reservatio unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err; struct xen_memory_reservation reservation = { @@ -324,7 +324,7 @@ int xc_domain_memory_decrease_reservatio uint32_t domid, unsigned long nr_extents, unsigned int extent_order, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err; struct xen_memory_reservation reservation = { @@ -363,7 +363,7 @@ int xc_domain_memory_populate_physmap(in unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err; struct xen_memory_reservation reservation = { @@ -392,8 +392,8 @@ int xc_domain_translate_gpfn_list(int xc int xc_domain_translate_gpfn_list(int xc_handle, uint32_t domid, unsigned long nr_gpfns, - unsigned long *gpfn_list, - unsigned long *mfn_list) + xen_pfn_t *gpfn_list, + xen_pfn_t *mfn_list) { struct xen_translate_gpfn_list op = { .domid = domid, diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_hvm_build.c Wed Jun 07 11:03:51 2006 +0100 @@ -135,7 +135,7 @@ static void set_hvm_info_checksum(struct * hvmloader will use this info to set BIOS accordingly */ static int set_hvm_info(int xc_handle, uint32_t dom, - unsigned long *pfn_list, unsigned int vcpus, + xen_pfn_t *pfn_list, unsigned int vcpus, unsigned int pae, unsigned int acpi, unsigned int apic) { char *va_map; @@ -178,7 +178,7 @@ static int setup_guest(int xc_handle, unsigned int store_evtchn, unsigned long *store_mfn) { - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; unsigned long count, i; unsigned long long ptr; xc_mmu_t *mmu = NULL; @@ -223,7 +223,7 @@ static int setup_guest(int xc_handle, goto error_out; } - if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) + if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ) { PERROR("Could not allocate memory.\n"); goto error_out; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_ia64_stubs.c Wed Jun 07 11:03:51 2006 +0100 @@ -57,7 +57,7 @@ xc_plan9_build(int xc_handle, int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + xen_pfn_t *pfn_buf, unsigned int start_page, unsigned int nr_pages) { @@ -65,7 +65,7 @@ int xc_ia64_get_pfn_list(int xc_handle, int num_pfns,ret; unsigned int __start_page, __nr_pages; unsigned long max_pfns; - unsigned long *__pfn_buf; + xen_pfn_t *__pfn_buf; __start_page = start_page; __nr_pages = nr_pages; @@ -80,7 +80,7 @@ int xc_ia64_get_pfn_list(int xc_handle, set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf); if ( (max_pfns != -1UL) - && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 ) + && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0 ) { PERROR("Could not lock pfn list buffer"); return -1; @@ -89,7 +89,7 @@ int xc_ia64_get_pfn_list(int xc_handle, ret = do_dom0_op(xc_handle, &op); if (max_pfns != -1UL) - (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long)); + (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)); if (max_pfns == -1UL) return 0; @@ -122,10 +122,10 @@ int xc_ia64_copy_to_domain_pages(int xc_ { // N.B. gva should be page aligned - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; int i; - if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ){ + if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ){ PERROR("Could not allocate memory"); goto error_out; } diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux.c --- a/tools/libxc/xc_linux.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_linux.c Wed Jun 07 11:03:51 2006 +0100 @@ -28,7 +28,7 @@ int xc_interface_close(int xc_handle) } void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot, - unsigned long *arr, int num) + xen_pfn_t *arr, int num) { privcmd_mmapbatch_t ioctlx; void *addr; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_linux_build.c Wed Jun 07 11:03:51 2006 +0100 @@ -10,6 +10,7 @@ #include "xc_aout9.h" #include <stdlib.h> #include <unistd.h> +#include <inttypes.h> #include <zlib.h> #if defined(__i386__) @@ -136,7 +137,7 @@ int load_initrd(int xc_handle, domid_t d int load_initrd(int xc_handle, domid_t dom, struct initrd_info *initrd, unsigned long physbase, - unsigned long *phys_to_mach) + xen_pfn_t *phys_to_mach) { char page[PAGE_SIZE]; unsigned long pfn_start, pfn, nr_pages; @@ -189,7 +190,7 @@ static int setup_pg_tables(int xc_handle vcpu_guest_context_t *ctxt, unsigned long dsi_v_start, unsigned long v_end, - unsigned long *page_array, + xen_pfn_t *page_array, unsigned long vpt_start, unsigned long vpt_end, unsigned shadow_mode_enabled) @@ -205,9 +206,9 @@ static int setup_pg_tables(int xc_handle alloc_pt(l2tab, vl2tab, pl2tab); vl2e = &vl2tab[l2_table_offset(dsi_v_start)]; if (shadow_mode_enabled) - ctxt->ctrlreg[3] = pl2tab; + ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl2tab >> PAGE_SHIFT); else - ctxt->ctrlreg[3] = l2tab; + ctxt->ctrlreg[3] = xen_pfn_to_cr3(l2tab >> PAGE_SHIFT); for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++ ) { @@ -251,26 +252,42 @@ static int setup_pg_tables_pae(int xc_ha vcpu_guest_context_t *ctxt, unsigned long dsi_v_start, unsigned long v_end, - unsigned long *page_array, + xen_pfn_t *page_array, unsigned long vpt_start, unsigned long vpt_end, - unsigned shadow_mode_enabled) + unsigned shadow_mode_enabled, + unsigned pae_mode) { l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL; l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL; l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL; uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab; - unsigned long ppt_alloc, count; + unsigned long ppt_alloc, count, nmfn; /* First allocate page for page dir. */ ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT; + + if ( pae_mode == PAEKERN_extended_cr3 ) + { + ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3); + } + else if ( page_array[ppt_alloc] > 0xfffff ) + { + nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]); + if ( nmfn == 0 ) + { + fprintf(stderr, "Couldn't get a page below 4GB :-(\n"); + goto error_out; + } + page_array[ppt_alloc] = nmfn; + } alloc_pt(l3tab, vl3tab, pl3tab); vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)]; if (shadow_mode_enabled) - ctxt->ctrlreg[3] = pl3tab; + ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl3tab >> PAGE_SHIFT); else - ctxt->ctrlreg[3] = l3tab; + ctxt->ctrlreg[3] = xen_pfn_to_cr3(l3tab >> PAGE_SHIFT); for ( count = 0; count < ((v_end - dsi_v_start) >> PAGE_SHIFT); count++) { @@ -340,7 +357,7 @@ static int setup_pg_tables_64(int xc_han vcpu_guest_context_t *ctxt, unsigned long dsi_v_start, unsigned long v_end, - unsigned long *page_array, + xen_pfn_t *page_array, unsigned long vpt_start, unsigned long vpt_end, int shadow_mode_enabled) @@ -361,9 +378,9 @@ static int setup_pg_tables_64(int xc_han alloc_pt(l4tab, vl4tab, pl4tab); vl4e = &vl4tab[l4_table_offset(dsi_v_start)]; if (shadow_mode_enabled) - ctxt->ctrlreg[3] = pl4tab; + ctxt->ctrlreg[3] = xen_pfn_to_cr3(pl4tab >> PAGE_SHIFT); else - ctxt->ctrlreg[3] = l4tab; + ctxt->ctrlreg[3] = xen_pfn_to_cr3(l4tab >> PAGE_SHIFT); for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++) { @@ -451,7 +468,7 @@ static int setup_guest(int xc_handle, unsigned int console_evtchn, unsigned long *console_mfn, uint32_t required_features[XENFEAT_NR_SUBMAPS]) { - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; struct load_funcs load_funcs; struct domain_setup_info dsi; unsigned long vinitrd_start; @@ -478,7 +495,7 @@ static int setup_guest(int xc_handle, start_page = dsi.v_start >> PAGE_SHIFT; pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT; - if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL ) + if ( (page_array = malloc(pgnr * sizeof(xen_pfn_t))) == NULL ) { PERROR("Could not allocate memory"); goto error_out; @@ -579,11 +596,11 @@ static int compat_check(int xc_handle, s } if (strstr(xen_caps, "xen-3.0-x86_32p")) { - if (!dsi->pae_kernel) { + if (dsi->pae_kernel == PAEKERN_no) { ERROR("Non PAE-kernel on PAE host."); return 0; } - } else if (dsi->pae_kernel) { + } else if (dsi->pae_kernel != PAEKERN_no) { ERROR("PAE-kernel on non-PAE host."); return 0; } @@ -606,7 +623,7 @@ static int setup_guest(int xc_handle, unsigned int console_evtchn, unsigned long *console_mfn, uint32_t required_features[XENFEAT_NR_SUBMAPS]) { - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; unsigned long count, i, hypercall_pfn; start_info_t *start_info; shared_info_t *shared_info; @@ -617,7 +634,7 @@ static int setup_guest(int xc_handle, unsigned long nr_pt_pages; unsigned long physmap_pfn; - unsigned long *physmap, *physmap_e; + xen_pfn_t *physmap, *physmap_e; struct load_funcs load_funcs; struct domain_setup_info dsi; @@ -673,7 +690,8 @@ static int setup_guest(int xc_handle, for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ ) { - if ( (supported_features[i]&required_features[i]) != required_features[i] ) + if ( (supported_features[i] & required_features[i]) != + required_features[i] ) { ERROR("Guest kernel does not support a required feature."); goto error_out; @@ -719,7 +737,7 @@ static int setup_guest(int xc_handle, (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ ((_l) & ~((1UL<<(_s))-1))) >> (_s)) #if defined(__i386__) - if ( dsi.pae_kernel ) + if ( dsi.pae_kernel != PAEKERN_no ) { if ( (1 + /* # L3 */ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */ @@ -797,11 +815,11 @@ static int setup_guest(int xc_handle, /* setup page tables */ #if defined(__i386__) - if (dsi.pae_kernel) + if (dsi.pae_kernel != PAEKERN_no) rc = setup_pg_tables_pae(xc_handle, dom, ctxt, dsi.v_start, v_end, page_array, vpt_start, vpt_end, - shadow_mode_enabled); + shadow_mode_enabled, dsi.pae_kernel); else rc = setup_pg_tables(xc_handle, dom, ctxt, dsi.v_start, v_end, @@ -824,16 +842,16 @@ static int setup_guest(int xc_handle, */ if ( !shadow_mode_enabled ) { - if ( dsi.pae_kernel ) + if ( dsi.pae_kernel != PAEKERN_no ) { if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE, - ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) ) + xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) ) goto error_out; } else { if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE, - ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) ) + xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) ) goto error_out; } } @@ -845,7 +863,7 @@ static int setup_guest(int xc_handle, * correct protection for the page */ if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE, - ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) ) + xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) ) goto error_out; #endif @@ -865,8 +883,8 @@ static int setup_guest(int xc_handle, ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, count) ) { - fprintf(stderr,"m2p update failure p=%lx m=%lx\n", - count, page_array[count]); + fprintf(stderr,"m2p update failure p=%lx m=%"PRIx64"\n", + count, (uint64_t)page_array[count]); munmap(physmap, PAGE_SIZE); goto error_out; } @@ -958,7 +976,7 @@ static int setup_guest(int xc_handle, rc = xc_version(xc_handle, XENVER_version, NULL); sprintf(start_info->magic, "xen-%i.%i-x86_%d%s", rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8, - dsi.pae_kernel ? "p" : ""); + (dsi.pae_kernel != PAEKERN_no) ? "p" : ""); start_info->nr_pages = nr_pages; start_info->shared_info = guest_shared_info_mfn << PAGE_SHIFT; start_info->flags = flags; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_linux_restore.c Wed Jun 07 11:03:51 2006 +0100 @@ -25,10 +25,10 @@ static unsigned long max_pfn; static unsigned long max_pfn; /* Live mapping of the table mapping each PFN to its current MFN. */ -static unsigned long *live_p2m = NULL; +static xen_pfn_t *live_p2m = NULL; /* A table mapping each PFN to its new MFN. */ -static unsigned long *p2m = NULL; +static xen_pfn_t *p2m = NULL; static ssize_t @@ -108,7 +108,7 @@ int xc_linux_restore(int xc_handle, int unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOM0_OP; - int rc = 1, i, n; + int rc = 1, i, n, pae_extended_cr3 = 0; unsigned long mfn, pfn; unsigned int prev_pc, this_pc; int verify = 0; @@ -126,7 +126,7 @@ int xc_linux_restore(int xc_handle, int unsigned long *pfn_type = NULL; /* A table of MFNs to map in the current region */ - unsigned long *region_mfn = NULL; + xen_pfn_t *region_mfn = NULL; /* Types of the pfns in the current region */ unsigned long region_pfn_type[MAX_BATCH_SIZE]; @@ -135,7 +135,7 @@ int xc_linux_restore(int xc_handle, int unsigned long *page = NULL; /* A copy of the pfn-to-mfn table frame list. */ - unsigned long *p2m_frame_list = NULL; + xen_pfn_t *p2m_frame_list = NULL; /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; @@ -162,30 +162,88 @@ int xc_linux_restore(int xc_handle, int return 1; } - if (mlock(&ctxt, sizeof(ctxt))) { /* needed for build dom0 op, but might as well do early */ ERR("Unable to mlock ctxt"); return 1; } - - /* Read the saved P2M frame list */ - if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) { ERR("Couldn't allocate p2m_frame_list array"); goto out; } - if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + /* Read first entry of P2M list, or extended-info signature (~0UL). */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERR("read extended-info signature failed"); + goto out; + } + + if (p2m_frame_list[0] == ~0UL) { + uint32_t tot_bytes; + + /* Next 4 bytes: total size of following extended info. */ + if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) { + ERR("read extended-info size failed"); + goto out; + } + + while (tot_bytes) { + uint32_t chunk_bytes; + char chunk_sig[4]; + + /* 4-character chunk signature + 4-byte remaining chunk size. */ + if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || + !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) { + ERR("read extended-info chunk signature failed"); + goto out; + } + tot_bytes -= 8; + + /* VCPU context structure? */ + if (!strncmp(chunk_sig, "vcpu", 4)) { + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERR("read extended-info vcpu context failed"); + goto out; + } + tot_bytes -= sizeof(struct vcpu_guest_context); + chunk_bytes -= sizeof(struct vcpu_guest_context); + + if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) + pae_extended_cr3 = 1; + } + + /* Any remaining bytes of this chunk: read and discard. */ + while (chunk_bytes) { + unsigned long sz = chunk_bytes; + if ( sz > P2M_FL_SIZE ) + sz = P2M_FL_SIZE; + if (!read_exact(io_fd, p2m_frame_list, sz)) { + ERR("read-and-discard extended-info chunk bytes failed"); + goto out; + } + chunk_bytes -= sz; + tot_bytes -= sz; + } + } + + /* Now read the real first entry of P2M list. */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERR("read first entry of p2m_frame_list failed"); + goto out; + } + } + + /* First entry is already read into the p2m array. */ + if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) { ERR("read p2m_frame_list failed"); goto out; } - /* We want zeroed memory so use calloc rather than malloc. */ - p2m = calloc(max_pfn, sizeof(unsigned long)); + p2m = calloc(max_pfn, sizeof(xen_pfn_t)); pfn_type = calloc(max_pfn, sizeof(unsigned long)); - region_mfn = calloc(MAX_BATCH_SIZE, sizeof(unsigned long)); + region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { ERR("memory alloc failed"); @@ -193,7 +251,7 @@ int xc_linux_restore(int xc_handle, int goto out; } - if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) { + if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { ERR("Could not mlock region_mfn"); goto out; } @@ -331,17 +389,27 @@ int xc_linux_restore(int xc_handle, int ** A page table page - need to 'uncanonicalize' it, i.e. ** replace all the references to pfns with the corresponding ** mfns for the new domain. + ** + ** On PAE we need to ensure that PGDs are in MFNs < 4G, and + ** so we may need to update the p2m after the main loop. + ** Hence we defer canonicalization of L1s until then. */ - if(!uncanonicalize_pagetable(pagetype, page)) { - /* - ** Failing to uncanonicalize a page table can be ok - ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). - */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; + if ((pt_levels != 3) || + pae_extended_cr3 || + (pagetype != L1TAB)) { + + if (!uncanonicalize_pagetable(pagetype, page)) { + /* + ** Failing to uncanonicalize a page table can be ok + ** under live migration since the pages type may have + ** changed by now (and we'll get an update later). + */ + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; + } + } } else if(pagetype != NOTAB) { @@ -389,6 +457,100 @@ int xc_linux_restore(int xc_handle, int } DPRINTF("Received all pages (%d races)\n", nraces); + + if ((pt_levels == 3) && !pae_extended_cr3) { + + /* + ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This + ** is a little awkward and involves (a) finding all such PGDs and + ** replacing them with 'lowmem' versions; (b) upating the p2m[] + ** with the new info; and (c) canonicalizing all the L1s using the + ** (potentially updated) p2m[]. + ** + ** This is relatively slow (and currently involves two passes through + ** the pfn_type[] array), but at least seems to be correct. May wish + ** to consider more complex approaches to optimize this later. + */ + + int j, k; + + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ + for (i = 0; i < max_pfn; i++) { + + if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) { + + unsigned long new_mfn; + uint64_t l3ptes[4]; + uint64_t *l3tab; + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, p2m[i]); + + for(j = 0; j < 4; j++) + l3ptes[j] = l3tab[j]; + + munmap(l3tab, PAGE_SIZE); + + if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { + ERR("Couldn't get a page below 4GB :-("); + goto out; + } + + p2m[i] = new_mfn; + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)new_mfn) + << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE, i)) { + ERR("Couldn't m2p on PAE root pgdir"); + goto out; + } + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, p2m[i]); + + for(j = 0; j < 4; j++) + l3tab[j] = l3ptes[j]; + + munmap(l3tab, PAGE_SIZE); + + } + } + + /* Second pass: find all L1TABs and uncanonicalize them */ + j = 0; + + for(i = 0; i < max_pfn; i++) { + + if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { + region_mfn[j] = p2m[i]; + j++; + } + + if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { + + if (!(region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ | PROT_WRITE, + region_mfn, j))) { + ERR("map batch failed"); + goto out; + } + + for(k = 0; k < j; k++) { + if(!uncanonicalize_pagetable(L1TAB, + region_base + k*PAGE_SIZE)) { + ERR("failed uncanonicalize pt!"); + goto out; + } + } + + munmap(region_base, j*PAGE_SIZE); + j = 0; + } + } + + } if (xc_finish_mmu_updates(xc_handle, mmu)) { @@ -536,7 +698,7 @@ int xc_linux_restore(int xc_handle, int } /* Uncanonicalise the page table base pointer. */ - pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT; + pfn = xen_cr3_to_pfn(ctxt.ctrlreg[3]); if (pfn >= max_pfn) { ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", @@ -552,7 +714,7 @@ int xc_linux_restore(int xc_handle, int goto out; } - ctxt.ctrlreg[3] = p2m[pfn] << PAGE_SHIFT; + ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]); /* clear any pending events and the selector */ memset(&(shared_info->evtchn_pending[0]), 0, diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_linux_save.c Wed Jun 07 11:03:51 2006 +0100 @@ -40,10 +40,10 @@ static unsigned long max_pfn; static unsigned long max_pfn; /* Live mapping of the table mapping each PFN to its current MFN. */ -static unsigned long *live_p2m = NULL; +static xen_pfn_t *live_p2m = NULL; /* Live mapping of system MFN to PFN table. */ -static unsigned long *live_m2p = NULL; +static xen_pfn_t *live_m2p = NULL; /* grep fodder: machine_to_phys */ @@ -501,22 +501,22 @@ void canonicalize_pagetable(unsigned lon -static unsigned long *xc_map_m2p(int xc_handle, +static xen_pfn_t *xc_map_m2p(int xc_handle, unsigned long max_mfn, int prot) { struct xen_machphys_mfn_list xmml; privcmd_mmap_entry_t *entries; unsigned long m2p_chunks, m2p_size; - unsigned long *m2p; - unsigned long *extent_start; + xen_pfn_t *m2p; + xen_pfn_t *extent_start; int i, rc; m2p_size = M2P_SIZE(max_mfn); m2p_chunks = M2P_CHUNKS(max_mfn); xmml.max_extents = m2p_chunks; - if (!(extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) { + if (!(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t)))) { ERR("failed to allocate space for m2p mfns"); return NULL; } @@ -583,11 +583,11 @@ int xc_linux_save(int xc_handle, int io_ char page[PAGE_SIZE]; /* Double and single indirect references to the live P2M table */ - unsigned long *live_p2m_frame_list_list = NULL; - unsigned long *live_p2m_frame_list = NULL; + xen_pfn_t *live_p2m_frame_list_list = NULL; + xen_pfn_t *live_p2m_frame_list = NULL; /* A copy of the pfn-to-mfn table frame list. */ - unsigned long *p2m_frame_list = NULL; + xen_pfn_t *p2m_frame_list = NULL; /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; @@ -712,11 +712,11 @@ int xc_linux_save(int xc_handle, int io_ memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); /* Canonicalise the pfn-to-mfn table frame-number list. */ - for (i = 0; i < max_pfn; i += ulpp) { - if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { + for (i = 0; i < max_pfn; i += fpp) { + if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) { ERR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, - p2m_frame_list[i/ulpp]); + ERR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, + (uint64_t)p2m_frame_list[i/fpp]); goto out; } } @@ -818,12 +818,33 @@ int xc_linux_save(int xc_handle, int io_ /* Start writing out the saved-domain record. */ - if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { ERR("write: max_pfn"); goto out; } - if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + /* + * Write an extended-info structure to inform the restore code that + * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off + * slow paths in the restore code. + */ + if ((pt_levels == 3) && + (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))) { + unsigned long signature = ~0UL; + uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8; + uint32_t chunk_sz = sizeof(struct vcpu_guest_context); + char chunk_sig[] = "vcpu"; + if (!write_exact(io_fd, &signature, sizeof(signature)) || + !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) || + !write_exact(io_fd, &chunk_sig, 4) || + !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || + !write_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERR("write: extended info"); + goto out; + } + } + + if (!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { ERR("write: p2m_frame_list"); goto out; } @@ -1129,12 +1150,12 @@ int xc_linux_save(int xc_handle, int io_ } /* Canonicalise the page table base pointer. */ - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.ctrlreg[3] >> PAGE_SHIFT) ) { + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[3])) ) { ERR("PT base is not in range of pseudophys map"); goto out; } - ctxt.ctrlreg[3] = mfn_to_pfn(ctxt.ctrlreg[3] >> PAGE_SHIFT) << - PAGE_SHIFT; + ctxt.ctrlreg[3] = + xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3]))); if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) || !write_exact(io_fd, live_shinfo, PAGE_SIZE)) { diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_aout9.c --- a/tools/libxc/xc_load_aout9.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_load_aout9.c Wed Jun 07 11:03:51 2006 +0100 @@ -17,7 +17,7 @@ #define KOFFSET(_p) ((_p)&~KZERO) static int parseaout9image(const char *, unsigned long, struct domain_setup_info *); -static int loadaout9image(const char *, unsigned long, int, uint32_t, unsigned long *, struct domain_setup_info *); +static int loadaout9image(const char *, unsigned long, int, uint32_t, xen_pfn_t *, struct domain_setup_info *); static void copyout(int, uint32_t, unsigned long *, unsigned long, const char *, int); struct Exec *get_header(const char *, unsigned long, struct Exec *); @@ -79,7 +79,7 @@ loadaout9image( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, + xen_pfn_t *parray, struct domain_setup_info *dsi) { struct Exec ehdr; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_bin.c --- a/tools/libxc/xc_load_bin.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_load_bin.c Wed Jun 07 11:03:51 2006 +0100 @@ -107,7 +107,7 @@ static int static int loadbinimage( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi); + xen_pfn_t *parray, struct domain_setup_info *dsi); int probe_bin(const char *image, unsigned long image_size, @@ -235,7 +235,7 @@ static int static int loadbinimage( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi) + xen_pfn_t *parray, struct domain_setup_info *dsi) { unsigned long size; char *va; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_load_elf.c Wed Jun 07 11:03:51 2006 +0100 @@ -16,10 +16,10 @@ static int static int loadelfimage( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi); + xen_pfn_t *parray, struct domain_setup_info *dsi); static int loadelfsymtab( - const char *image, int xch, uint32_t dom, unsigned long *parray, + const char *image, int xch, uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi); int probe_elf(const char *image, @@ -122,8 +122,15 @@ static int parseelfimage(const char *ima ERROR("Actually saw: '%s'", guestinfo); return -EINVAL; } - if ( (strstr(guestinfo, "PAE=yes") != NULL) ) - dsi->pae_kernel = 1; + + dsi->pae_kernel = PAEKERN_no; + p = strstr(guestinfo, "PAE=yes"); + if ( p != NULL ) + { + dsi->pae_kernel = PAEKERN_yes; + if ( !strncmp(p+7, "[extended-cr3]", 14) ) + dsi->pae_kernel = PAEKERN_extended_cr3; + } break; } @@ -204,7 +211,7 @@ static int static int loadelfimage( const char *image, unsigned long elfsize, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi) + xen_pfn_t *parray, struct domain_setup_info *dsi) { Elf_Ehdr *ehdr = (Elf_Ehdr *)image; Elf_Phdr *phdr; @@ -258,7 +265,7 @@ loadelfimage( static int loadelfsymtab( - const char *image, int xch, uint32_t dom, unsigned long *parray, + const char *image, int xch, uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi) { Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_pagetab.c --- a/tools/libxc/xc_pagetab.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_pagetab.c Wed Jun 07 11:03:51 2006 +0100 @@ -78,7 +78,7 @@ unsigned long xc_translate_foreign_addre fprintf(stderr, "failed to retreive vcpu context\n"); goto out; } - cr3 = ctx.ctrlreg[3]; + cr3 = ((unsigned long long)xen_cr3_to_pfn(ctx.ctrlreg[3])) << PAGE_SHIFT; /* Page Map Level 4 */ diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_private.c Wed Jun 07 11:03:51 2006 +0100 @@ -4,6 +4,7 @@ * Helper functions for the rest of the library. */ +#include <inttypes.h> #include "xc_private.h" /* NB: arr must be mlock'ed */ @@ -134,9 +135,9 @@ int xc_memory_op(int xc_handle, struct xen_memory_reservation *reservation = arg; struct xen_machphys_mfn_list *xmml = arg; struct xen_translate_gpfn_list *trans = arg; - unsigned long *extent_start; - unsigned long *gpfn_list; - unsigned long *mfn_list; + xen_pfn_t *extent_start; + xen_pfn_t *gpfn_list; + xen_pfn_t *mfn_list; long ret = -EINVAL; hypercall.op = __HYPERVISOR_memory_op; @@ -156,7 +157,7 @@ int xc_memory_op(int xc_handle, get_xen_guest_handle(extent_start, reservation->extent_start); if ( (extent_start != NULL) && (mlock(extent_start, - reservation->nr_extents * sizeof(unsigned long)) != 0) ) + reservation->nr_extents * sizeof(xen_pfn_t)) != 0) ) { PERROR("Could not mlock"); safe_munlock(reservation, sizeof(*reservation)); @@ -171,7 +172,7 @@ int xc_memory_op(int xc_handle, } get_xen_guest_handle(extent_start, xmml->extent_start); if ( mlock(extent_start, - xmml->max_extents * sizeof(unsigned long)) != 0 ) + xmml->max_extents * sizeof(xen_pfn_t)) != 0 ) { PERROR("Could not mlock"); safe_munlock(xmml, sizeof(*xmml)); @@ -192,17 +193,17 @@ int xc_memory_op(int xc_handle, goto out1; } get_xen_guest_handle(gpfn_list, trans->gpfn_list); - if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(long)) != 0 ) + if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 ) { PERROR("Could not mlock"); safe_munlock(trans, sizeof(*trans)); goto out1; } get_xen_guest_handle(mfn_list, trans->mfn_list); - if ( mlock(mfn_list, trans->nr_gpfns * sizeof(long)) != 0 ) - { - PERROR("Could not mlock"); - safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long)); + if ( mlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 ) + { + PERROR("Could not mlock"); + safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)); safe_munlock(trans, sizeof(*trans)); goto out1; } @@ -220,22 +221,22 @@ int xc_memory_op(int xc_handle, get_xen_guest_handle(extent_start, reservation->extent_start); if ( extent_start != NULL ) safe_munlock(extent_start, - reservation->nr_extents * sizeof(unsigned long)); + reservation->nr_extents * sizeof(xen_pfn_t)); break; case XENMEM_machphys_mfn_list: safe_munlock(xmml, sizeof(*xmml)); get_xen_guest_handle(extent_start, xmml->extent_start); safe_munlock(extent_start, - xmml->max_extents * sizeof(unsigned long)); + xmml->max_extents * sizeof(xen_pfn_t)); break; case XENMEM_add_to_physmap: safe_munlock(arg, sizeof(struct xen_add_to_physmap)); break; case XENMEM_translate_gpfn_list: get_xen_guest_handle(mfn_list, trans->mfn_list); - safe_munlock(mfn_list, trans->nr_gpfns * sizeof(long)); + safe_munlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)); get_xen_guest_handle(gpfn_list, trans->gpfn_list); - safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long)); + safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)); safe_munlock(trans, sizeof(*trans)); break; } @@ -263,7 +264,7 @@ long long xc_domain_get_cpu_usage( int x int xc_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + xen_pfn_t *pfn_buf, unsigned long max_pfns) { DECLARE_DOM0_OP; @@ -274,10 +275,10 @@ int xc_get_pfn_list(int xc_handle, set_xen_guest_handle(op.u.getmemlist.buffer, pfn_buf); #ifdef VALGRIND - memset(pfn_buf, 0, max_pfns * sizeof(unsigned long)); + memset(pfn_buf, 0, max_pfns * sizeof(xen_pfn_t)); #endif - if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 ) + if ( mlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)) != 0 ) { PERROR("xc_get_pfn_list: pfn_buf mlock failed"); return -1; @@ -285,7 +286,7 @@ int xc_get_pfn_list(int xc_handle, ret = do_dom0_op(xc_handle, &op); - safe_munlock(pfn_buf, max_pfns * sizeof(unsigned long)); + safe_munlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)); #if 0 #ifdef DEBUG @@ -364,7 +365,7 @@ unsigned long xc_get_filesz(int fd) } void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size, - int xch, uint32_t dom, unsigned long *parray, + int xch, uint32_t dom, xen_pfn_t *parray, unsigned long vstart) { char *va; @@ -428,6 +429,29 @@ int xc_version(int xc_handle, int cmd, v safe_munlock(arg, argsize); return rc; +} + +unsigned long xc_make_page_below_4G( + int xc_handle, uint32_t domid, unsigned long mfn) +{ + xen_pfn_t old_mfn = mfn; + xen_pfn_t new_mfn; + + if ( xc_domain_memory_decrease_reservation( + xc_handle, domid, 1, 0, &old_mfn) != 0 ) + { + fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn); + return 0; + } + + if ( xc_domain_memory_increase_reservation( + xc_handle, domid, 1, 0, 32, &new_mfn) != 0 ) + { + fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn); + return 0; + } + + return new_mfn; } /* diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_ptrace.c Wed Jun 07 11:03:51 2006 +0100 @@ -190,7 +190,8 @@ map_domain_va_32( static void *v[MAX_VIRT_CPUS]; l2 = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); + xc_handle, current_domid, PAGE_SIZE, PROT_READ, + xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3])); if ( l2 == NULL ) return NULL; @@ -230,7 +231,8 @@ map_domain_va_pae( static void *v[MAX_VIRT_CPUS]; l3 = xc_map_foreign_range( - xc_handle, current_domid, PAGE_SIZE, PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); + xc_handle, current_domid, PAGE_SIZE, PROT_READ, + xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3])); if ( l3 == NULL ) return NULL; @@ -282,8 +284,9 @@ map_domain_va_64( if ((ctxt[cpu].ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */ return map_domain_va_32(xc_handle, cpu, guest_va, perm); - l4 = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE, - PROT_READ, ctxt[cpu].ctrlreg[3] >> PAGE_SHIFT); + l4 = xc_map_foreign_range( + xc_handle, current_domid, PAGE_SIZE, PROT_READ, + xen_cr3_to_pfn(ctxt[cpu].ctrlreg[3])); if ( l4 == NULL ) return NULL; diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xc_ptrace_core.c --- a/tools/libxc/xc_ptrace_core.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xc_ptrace_core.c Wed Jun 07 11:03:51 2006 +0100 @@ -12,8 +12,8 @@ static long nr_pages = 0; static long nr_pages = 0; static unsigned long *p2m_array = NULL; static unsigned long *m2p_array = NULL; -static unsigned long pages_offset; -static unsigned long cr3[MAX_VIRT_CPUS]; +static unsigned long pages_offset; +static unsigned long cr3[MAX_VIRT_CPUS]; /* --------------------- */ @@ -47,7 +47,7 @@ map_domain_va_core(unsigned long domfd, munmap(cr3_virt[cpu], PAGE_SIZE); v = mmap( NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd, - map_mtop_offset(cr3_phys[cpu])); + map_mtop_offset(xen_cr3_to_pfn(cr3_phys[cpu]))); if (v == MAP_FAILED) { perror("mmap failed"); @@ -127,14 +127,15 @@ xc_waitdomain_core( sizeof(vcpu_guest_context_t)*nr_vcpus) return -1; - for (i = 0; i < nr_vcpus; i++) { + for (i = 0; i < nr_vcpus; i++) cr3[i] = ctxt[i].ctrlreg[3]; - } + if ((p2m_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) { printf("Could not allocate p2m_array\n"); return -1; } + if (read(domfd, p2m_array, sizeof(unsigned long)*nr_pages) != sizeof(unsigned long)*nr_pages) return -1; @@ -146,10 +147,8 @@ xc_waitdomain_core( } bzero(m2p_array, sizeof(unsigned long)* 1 << 20); - for (i = 0; i < nr_pages; i++) { + for (i = 0; i < nr_pages; i++) m2p_array[p2m_array[i]] = i; - } - } return 0; } diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xenctrl.h Wed Jun 07 11:03:51 2006 +0100 @@ -415,26 +415,26 @@ int xc_domain_memory_increase_reservatio unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start); + xen_pfn_t *extent_start); int xc_domain_memory_decrease_reservation(int xc_handle, uint32_t domid, unsigned long nr_extents, unsigned int extent_order, - unsigned long *extent_start); + xen_pfn_t *extent_start); int xc_domain_memory_populate_physmap(int xc_handle, uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start); + xen_pfn_t *extent_start); int xc_domain_translate_gpfn_list(int xc_handle, uint32_t domid, unsigned long nr_gpfns, - unsigned long *gpfn_list, - unsigned long *mfn_list); + xen_pfn_t *gpfn_list, + xen_pfn_t *mfn_list); int xc_domain_ioport_permission(int xc_handle, uint32_t domid, @@ -453,6 +453,9 @@ int xc_domain_iomem_permission(int xc_ha unsigned long nr_mfns, uint8_t allow_access); +unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, + unsigned long mfn); + typedef dom0_perfc_desc_t xc_perfc_desc_t; /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */ int xc_perfc_control(int xc_handle, @@ -484,7 +487,7 @@ void *xc_map_foreign_range(int xc_handle unsigned long mfn ); void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot, - unsigned long *arr, int num ); + xen_pfn_t *arr, int num ); /** * Translates a virtual address in the context of a given domain and @@ -499,11 +502,11 @@ unsigned long xc_translate_foreign_addre unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, int vcpu, unsigned long long virt); -int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, +int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf, unsigned long max_pfns); int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + xen_pfn_t *pfn_buf, unsigned int start_page, unsigned int nr_pages); int xc_copy_to_domain_page(int xc_handle, uint32_t domid, diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xg_private.h Wed Jun 07 11:03:51 2006 +0100 @@ -156,6 +156,9 @@ struct domain_setup_info unsigned long elf_paddr_offset; +#define PAEKERN_no 0 +#define PAEKERN_yes 1 +#define PAEKERN_extended_cr3 2 unsigned int pae_kernel; unsigned int load_symtab; @@ -170,7 +173,7 @@ typedef int (*parseimagefunc)(const char struct domain_setup_info *dsi); typedef int (*loadimagefunc)(const char *image, unsigned long image_size, int xch, - uint32_t dom, unsigned long *parray, + uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi); struct load_funcs @@ -198,7 +201,7 @@ unsigned long xc_get_filesz(int fd); unsigned long xc_get_filesz(int fd); void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size, - int xch, uint32_t dom, unsigned long *parray, + int xch, uint32_t dom, xen_pfn_t *parray, unsigned long vstart); int pin_table(int xc_handle, unsigned int type, unsigned long mfn, diff -r b09dbe439169 -r 9d86c1a70f34 tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/libxc/xg_save_restore.h Wed Jun 07 11:03:51 2006 +0100 @@ -105,23 +105,23 @@ static int get_platform_info(int xc_hand */ #define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE #define M2P_CHUNK_SIZE (1 << M2P_SHIFT) -#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) +#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT) #define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT) /* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */ -#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) +#define P2M_SIZE ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT) -/* Number of unsigned longs in a page */ -#define ulpp (PAGE_SIZE/sizeof(unsigned long)) +/* Number of xen_pfn_t in a page */ +#define fpp (PAGE_SIZE/sizeof(xen_pfn_t)) /* Number of entries in the pfn_to_mfn_frame_list */ -#define P2M_FL_ENTRIES (((max_pfn)+ulpp-1)/ulpp) +#define P2M_FL_ENTRIES (((max_pfn)+fpp-1)/fpp) /* Size in bytes of the pfn_to_mfn_frame_list */ #define P2M_FL_SIZE ((P2M_FL_ENTRIES)*sizeof(unsigned long)) /* Number of entries in the pfn_to_mfn_frame_list_list */ -#define P2M_FLL_ENTRIES (((max_pfn)+(ulpp*ulpp)-1)/(ulpp*ulpp)) +#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp)) /* Current guests allow 8MB 'slack' in their P2M */ #define NR_SLACK_ENTRIES ((8 * 1024 * 1024) / PAGE_SIZE) diff -r b09dbe439169 -r 9d86c1a70f34 tools/python/xen/util/security.py --- a/tools/python/xen/util/security.py Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/python/xen/util/security.py Wed Jun 07 11:03:51 2006 +0100 @@ -426,6 +426,15 @@ def get_decision(arg1, arg2): err("Argument type not supported.") ssidref = label2ssidref(arg2[2][1], arg2[1][1]) arg2 = ['ssidref', str(ssidref)] + + # accept only int or string types for domid and ssidref + if isinstance(arg1[1], int): + arg1[1] = str(arg1[1]) + if isinstance(arg2[1], int): + arg2[1] = str(arg2[1]) + if not isinstance(arg1[1], str) or not isinstance(arg2[1], str): + err("Invalid id or ssidref type, string or int required") + try: decision = acm.getdecision(arg1[0], arg1[1], arg2[0], arg2[1]) except: diff -r b09dbe439169 -r 9d86c1a70f34 tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Wed Jun 07 11:03:15 2006 +0100 +++ b/tools/tests/test_x86_emulator.c Wed Jun 07 11:03:51 2006 +0100 @@ -13,6 +13,7 @@ typedef int64_t s64; typedef int64_t s64; #include <public/xen.h> #include <asm-x86/x86_emulate.h> +#include <sys/mman.h> static int read_any( unsigned long addr, @@ -85,23 +86,30 @@ int main(int argc, char **argv) struct x86_emulate_ctxt ctxt; struct cpu_user_regs regs; char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ - unsigned int res = 0x7FFFFFFF; - u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 }; + unsigned int *res; int rc; ctxt.regs = ®s; ctxt.mode = X86EMUL_MODE_PROT32; + res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if ( res == MAP_FAILED ) + { + fprintf(stderr, "mmap to low address failed\n"); + exit(1); + } + printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - res = 0x7FFFFFFF; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x92345677) || + ctxt.cr2 = (unsigned long)res; + *res = 0x7FFFFFFF; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x92345677) || (regs.eflags != 0xa94) || (regs.eip != (unsigned long)&instr[2]) ) goto fail; @@ -116,11 +124,25 @@ int main(int argc, char **argv) #else regs.ecx = 0x12345678UL; #endif - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x92345677) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x92345677) || (regs.ecx != 0x8000000FUL) || + (regs.eip != (unsigned long)&instr[2]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing movl (%%eax),%%ecx..."); + instr[0] = 0x8b; instr[1] = 0x08; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = ~0UL; + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x92345677) || + (regs.ecx != 0x92345677UL) || (regs.eip != (unsigned long)&instr[2]) ) goto fail; printf("okay\n"); @@ -131,10 +153,10 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0x92345677UL; regs.ecx = 0xAA; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x923456AA) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x923456AA) || (regs.eflags != 0x244) || (regs.eax != 0x92345677UL) || (regs.eip != (unsigned long)&instr[4]) ) @@ -147,10 +169,10 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0xAABBCC77UL; regs.ecx = 0xFF; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x923456AA) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x923456AA) || ((regs.eflags&0x240) != 0x200) || (regs.eax != 0xAABBCCAA) || (regs.ecx != 0xFF) || @@ -163,10 +185,10 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x12345678) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x12345678) || (regs.eflags != 0x200) || (regs.ecx != 0x923456AA) || (regs.eip != (unsigned long)&instr[2]) ) @@ -176,14 +198,14 @@ int main(int argc, char **argv) printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)..."); instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08; regs.eflags = 0x200; - res = 0x923456AA; + *res = 0x923456AA; regs.eip = (unsigned long)&instr[0]; regs.eax = 0x923456AAUL; regs.ecx = 0xDDEEFF00L; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0xDDEEFF00) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0xDDEEFF00) || (regs.eflags != 0x244) || (regs.eax != 0x923456AAUL) || (regs.eip != (unsigned long)&instr[4]) ) @@ -192,54 +214,57 @@ int main(int argc, char **argv) printf("%-40s", "Testing rep movsw..."); instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5; - res = 0x22334455; + *res = 0x22334455; regs.eflags = 0x200; regs.ecx = 23; regs.eip = (unsigned long)&instr[0]; - regs.esi = (unsigned long)&res + 0; - regs.edi = (unsigned long)&res + 2; + regs.esi = (unsigned long)res + 0; + regs.edi = (unsigned long)res + 2; regs.error_code = 0; /* read fault */ ctxt.cr2 = regs.esi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x44554455) || + (*res != 0x44554455) || (regs.eflags != 0x200) || (regs.ecx != 22) || - (regs.esi != ((unsigned long)&res + 2)) || - (regs.edi != ((unsigned long)&res + 4)) || + (regs.esi != ((unsigned long)res + 2)) || + (regs.edi != ((unsigned long)res + 4)) || (regs.eip != (unsigned long)&instr[0]) ) goto fail; printf("okay\n"); printf("%-40s", "Testing btrl $0x1,(%edi)..."); instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01; - res = 0x2233445F; - regs.eflags = 0x200; - regs.eip = (unsigned long)&instr[0]; - regs.edi = (unsigned long)&res; + *res = 0x2233445F; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.edi = (unsigned long)res; ctxt.cr2 = regs.edi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x2233445D) || + (*res != 0x2233445D) || ((regs.eflags&0x201) != 0x201) || (regs.eip != (unsigned long)&instr[4]) ) goto fail; printf("okay\n"); + + res[0] = 0x12345678; + res[1] = 0x87654321; printf("%-40s", "Testing cmpxchg8b (%edi) [succeeding]..."); instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f; regs.eflags = 0x200; - regs.eax = cmpxchg8b_res[0]; - regs.edx = cmpxchg8b_res[1]; + regs.eax = res[0]; + regs.edx = res[1]; regs.ebx = 0x9999AAAA; regs.ecx = 0xCCCCFFFF; regs.eip = (unsigned long)&instr[0]; - regs.edi = (unsigned long)cmpxchg8b_res; + regs.edi = (unsigned long)res; ctxt.cr2 = regs.edi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (cmpxchg8b_res[0] != 0x9999AAAA) || - (cmpxchg8b_res[1] != 0xCCCCFFFF) || + (res[0] != 0x9999AAAA) || + (res[1] != 0xCCCCFFFF) || ((regs.eflags&0x240) != 0x240) || (regs.eip != (unsigned long)&instr[3]) ) goto fail; @@ -248,12 +273,12 @@ int main(int argc, char **argv) printf("%-40s", "Testing cmpxchg8b (%edi) [failing]..."); instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f; regs.eip = (unsigned long)&instr[0]; - regs.edi = (unsigned long)cmpxchg8b_res; + regs.edi = (unsigned long)res; ctxt.cr2 = regs.edi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (cmpxchg8b_res[0] != 0x9999AAAA) || - (cmpxchg8b_res[1] != 0xCCCCFFFF) || + (res[0] != 0x9999AAAA) || + (res[1] != 0xCCCCFFFF) || (regs.eax != 0x9999AAAA) || (regs.edx != 0xCCCCFFFF) || ((regs.eflags&0x240) != 0x200) || @@ -265,11 +290,11 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - res = 0x82; + ctxt.cr2 = (unsigned long)res; + *res = 0x82; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x82) || + (*res != 0x82) || (regs.ecx != 0xFFFFFF82) || ((regs.eflags&0x240) != 0x200) || (regs.eip != (unsigned long)&instr[3]) ) @@ -280,11 +305,11 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - res = 0x1234aa82; + ctxt.cr2 = (unsigned long)res; + *res = 0x1234aa82; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x1234aa82) || + (*res != 0x1234aa82) || (regs.ecx != 0xaa82) || ((regs.eflags&0x240) != 0x200) || (regs.eip != (unsigned long)&instr[3]) ) diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/linux-xen/smpboot.c --- a/xen/arch/ia64/linux-xen/smpboot.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/ia64/linux-xen/smpboot.c Wed Jun 07 11:03:51 2006 +0100 @@ -62,6 +62,7 @@ #include <asm/unistd.h> #ifdef XEN +#include <xen/domain.h> #include <asm/hw_irq.h> int ht_per_core = 1; #ifndef CONFIG_SMP @@ -487,7 +488,7 @@ do_rest: #else struct vcpu *v; - v = idle_vcpu[cpu] = alloc_vcpu(idle_vcpu[0]->domain, cpu, cpu); + v = alloc_idle_vcpu(cpu); BUG_ON(v == NULL); //printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v); diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/ia64/xen/domain.c Wed Jun 07 11:03:51 2006 +0100 @@ -42,6 +42,7 @@ #include <asm/vcpu.h> /* for function declarations */ #include <public/arch-ia64.h> +#include <xen/domain.h> #include <asm/vmx.h> #include <asm/vmx_vcpu.h> #include <asm/vmx_vpd.h> @@ -92,26 +93,16 @@ alloc_dom_xen_and_dom_io(void) * Any Xen-heap pages that we will allow to be mapped will have * their domain field set to dom_xen. */ - dom_xen = alloc_domain(); + dom_xen = alloc_domain(DOMID_XEN); BUG_ON(dom_xen == NULL); - spin_lock_init(&dom_xen->page_alloc_lock); - INIT_LIST_HEAD(&dom_xen->page_list); - INIT_LIST_HEAD(&dom_xen->xenpage_list); - atomic_set(&dom_xen->refcnt, 1); - dom_xen->domain_id = DOMID_XEN; /* * Initialise our DOMID_IO domain. * This domain owns I/O pages that are within the range of the page_info * array. Mappings occur at the priv of the caller. */ - dom_io = alloc_domain(); + dom_io = alloc_domain(DOMID_IO); BUG_ON(dom_io == NULL); - spin_lock_init(&dom_io->page_alloc_lock); - INIT_LIST_HEAD(&dom_io->page_list); - INIT_LIST_HEAD(&dom_io->xenpage_list); - atomic_set(&dom_io->refcnt, 1); - dom_io->domain_id = DOMID_IO; } #endif diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/ia64/xen/xensetup.c Wed Jun 07 11:03:51 2006 +0100 @@ -35,8 +35,6 @@ char saved_command_line[COMMAND_LINE_SIZ char saved_command_line[COMMAND_LINE_SIZE]; char dom0_command_line[COMMAND_LINE_SIZE]; -struct vcpu *idle_vcpu[NR_CPUS]; - cpumask_t cpu_present_map; extern unsigned long domain0_ready; diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/audit.c --- a/xen/arch/x86/audit.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/audit.c Wed Jun 07 11:03:51 2006 +0100 @@ -432,10 +432,10 @@ int audit_adjust_pgtables(struct domain for_each_vcpu(d, v) { - if ( pagetable_get_paddr(v->arch.guest_table) ) + if ( !pagetable_is_null(v->arch.guest_table) ) adjust(mfn_to_page(pagetable_get_pfn(v->arch.guest_table)), !shadow_mode_refcounts(d)); - if ( pagetable_get_paddr(v->arch.shadow_table) ) + if ( !pagetable_is_null(v->arch.shadow_table) ) adjust(mfn_to_page(pagetable_get_pfn(v->arch.shadow_table)), 0); if ( v->arch.monitor_shadow_ref ) diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/cpu/mtrr/main.c --- a/xen/arch/x86/cpu/mtrr/main.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/cpu/mtrr/main.c Wed Jun 07 11:03:51 2006 +0100 @@ -43,7 +43,7 @@ #include "mtrr.h" /* No blocking mutexes in Xen. Spin instead. */ -#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED +#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m) #define down(_m) spin_lock(_m) #define up(_m) spin_unlock(_m) #define lock_cpu_hotplug() ((void)0) diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/dom0_ops.c Wed Jun 07 11:03:51 2006 +0100 @@ -467,7 +467,7 @@ void arch_getdomaininfo_ctxt( if ( hvm_guest(v) ) c->flags |= VGCF_HVM_GUEST; - c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table); + c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table)); c->vm_assist = v->domain->vm_assist; } diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/domain.c Wed Jun 07 11:03:51 2006 +0100 @@ -259,7 +259,7 @@ int arch_set_info_guest( struct vcpu *v, struct vcpu_guest_context *c) { struct domain *d = v->domain; - unsigned long phys_basetab = INVALID_MFN; + unsigned long cr3_pfn = INVALID_MFN; int i, rc; if ( !(c->flags & VGCF_HVM_GUEST) ) @@ -322,12 +322,8 @@ int arch_set_info_guest( if ( !(c->flags & VGCF_HVM_GUEST) ) { - phys_basetab = c->ctrlreg[3]; - phys_basetab = - (gmfn_to_mfn(d, phys_basetab >> PAGE_SHIFT) << PAGE_SHIFT) | - (phys_basetab & ~PAGE_MASK); - - v->arch.guest_table = mk_pagetable(phys_basetab); + cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3])); + v->arch.guest_table = pagetable_from_pfn(cr3_pfn); } if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 ) @@ -335,14 +331,14 @@ int arch_set_info_guest( if ( c->flags & VGCF_HVM_GUEST ) { - v->arch.guest_table = mk_pagetable(0); + v->arch.guest_table = pagetable_null(); if ( !hvm_initialize_guest_resources(v) ) return -EINVAL; } else if ( shadow_mode_refcounts(d) ) { - if ( !get_page(mfn_to_page(phys_basetab>>PAGE_SHIFT), d) ) + if ( !get_page(mfn_to_page(cr3_pfn), d) ) { destroy_gdt(v); return -EINVAL; @@ -350,7 +346,7 @@ int arch_set_info_guest( } else { - if ( !get_page_and_type(mfn_to_page(phys_basetab>>PAGE_SHIFT), d, + if ( !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_base_page_table) ) { destroy_gdt(v); @@ -528,20 +524,29 @@ static void load_segments(struct vcpu *n if ( unlikely(!all_segs_okay) ) { struct cpu_user_regs *regs = guest_cpu_user_regs(); - unsigned long *rsp = + unsigned long *rsp = (n->arch.flags & TF_kernel_mode) ? (unsigned long *)regs->rsp : (unsigned long *)nctxt->kernel_sp; + unsigned long cs_and_mask, rflags; if ( !(n->arch.flags & TF_kernel_mode) ) toggle_guest_mode(n); else regs->cs &= ~3; + /* CS longword also contains full evtchn_upcall_mask. */ + cs_and_mask = (unsigned long)regs->cs | + ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32); + + /* Fold upcall mask into RFLAGS.IF. */ + rflags = regs->rflags & ~X86_EFLAGS_IF; + rflags |= !n->vcpu_info->evtchn_upcall_mask << 9; + if ( put_user(regs->ss, rsp- 1) | put_user(regs->rsp, rsp- 2) | - put_user(regs->rflags, rsp- 3) | - put_user(regs->cs, rsp- 4) | + put_user(rflags, rsp- 3) | + put_user(cs_and_mask, rsp- 4) | put_user(regs->rip, rsp- 5) | put_user(nctxt->user_regs.gs, rsp- 6) | put_user(nctxt->user_regs.fs, rsp- 7) | @@ -553,6 +558,10 @@ static void load_segments(struct vcpu *n DPRINTK("Error while creating failsafe callback frame.\n"); domain_crash(n->domain); } + + if ( test_bit(_VGCF_failsafe_disables_events, + &n->arch.guest_context.flags) ) + n->vcpu_info->evtchn_upcall_mask = 1; regs->entry_vector = TRAP_syscall; regs->rflags &= 0xFFFCBEFFUL; @@ -935,7 +944,7 @@ void domain_relinquish_resources(struct put_page_type(mfn_to_page(pfn)); put_page(mfn_to_page(pfn)); - v->arch.guest_table = mk_pagetable(0); + v->arch.guest_table = pagetable_null(); } if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 ) @@ -944,7 +953,7 @@ void domain_relinquish_resources(struct put_page_type(mfn_to_page(pfn)); put_page(mfn_to_page(pfn)); - v->arch.guest_table_user = mk_pagetable(0); + v->arch.guest_table_user = pagetable_null(); } } diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/domain_build.c Wed Jun 07 11:03:51 2006 +0100 @@ -301,6 +301,9 @@ int construct_dom0(struct domain *d, xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no"); return -EINVAL; } + + if ( xen_pae && !!strstr(dsi.xen_section_string, "PAE=yes[extended-cr3]") ) + set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL ) { @@ -443,13 +446,13 @@ int construct_dom0(struct domain *d, l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] = l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR); } - v->arch.guest_table = mk_pagetable((unsigned long)l3start); + v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start); #else l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE; memcpy(l2tab, idle_pg_table, PAGE_SIZE); l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR); - v->arch.guest_table = mk_pagetable((unsigned long)l2start); + v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start); #endif for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) @@ -577,7 +580,7 @@ int construct_dom0(struct domain *d, l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR); l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] = l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); - v->arch.guest_table = mk_pagetable(__pa(l4start)); + v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); l4tab += l4_table_offset(dsi.v_start); mfn = alloc_spfn; diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Jun 07 11:03:51 2006 +0100 @@ -84,28 +84,26 @@ struct svm_percore_globals svm_globals[N /* * Initializes the POOL of ASID used by the guests per core. */ -void asidpool_init( int core ) +void asidpool_init(int core) { int i; - svm_globals[core].ASIDpool.asid_lock = SPIN_LOCK_UNLOCKED; - spin_lock(&svm_globals[core].ASIDpool.asid_lock); + + spin_lock_init(&svm_globals[core].ASIDpool.asid_lock); + /* Host ASID is always in use */ svm_globals[core].ASIDpool.asid[INITIAL_ASID] = ASID_INUSE; - for( i=1; i<ASID_MAX; i++ ) - { + for ( i = 1; i < ASID_MAX; i++ ) svm_globals[core].ASIDpool.asid[i] = ASID_AVAILABLE; - } - spin_unlock(&svm_globals[core].ASIDpool.asid_lock); } /* internal function to get the next available ASID */ -static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core ) +static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core) { int i; - for( i = 1; i < ASID_MAX; i++ ) - { - if( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE ) + for ( i = 1; i < ASID_MAX; i++ ) + { + if ( svm_globals[core].ASIDpool.asid[i] == ASID_AVAILABLE ) { vmcb->guest_asid = i; svm_globals[core].ASIDpool.asid[i] = ASID_INUSE; @@ -746,34 +744,34 @@ static void svm_ctxt_switch_to(struct vc void svm_final_setup_guest(struct vcpu *v) { + struct domain *d = v->domain; + struct vcpu *vc; + v->arch.schedule_tail = arch_svm_do_launch; v->arch.ctxt_switch_from = svm_ctxt_switch_from; v->arch.ctxt_switch_to = svm_ctxt_switch_to; - if (v == v->domain->vcpu[0]) - { - struct domain *d = v->domain; - struct vcpu *vc; - - /* Initialize monitor page table */ - for_each_vcpu(d, vc) - vc->arch.monitor_table = mk_pagetable(0); - - /* - * Required to do this once per domain - * TODO: add a seperate function to do these. - */ - memset(&d->shared_info->evtchn_mask[0], 0xff, - sizeof(d->shared_info->evtchn_mask)); - - /* - * Put the domain in shadow mode even though we're going to be using - * the shared 1:1 page table initially. It shouldn't hurt - */ - shadow_mode_enable(d, - SHM_enable|SHM_refcounts| - SHM_translate|SHM_external|SHM_wr_pt_pte); - } + if ( v != d->vcpu[0] ) + return; + + /* Initialize monitor page table */ + for_each_vcpu( d, vc ) + vc->arch.monitor_table = pagetable_null(); + + /* + * Required to do this once per domain + * TODO: add a seperate function to do these. + */ + memset(&d->shared_info->evtchn_mask[0], 0xff, + sizeof(d->shared_info->evtchn_mask)); + + /* + * Put the domain in shadow mode even though we're going to be using + * the shared 1:1 page table initially. It shouldn't hurt + */ + shadow_mode_enable(d, + SHM_enable|SHM_refcounts| + SHM_translate|SHM_external|SHM_wr_pt_pte); } @@ -870,7 +868,7 @@ static int svm_do_page_fault(unsigned lo /* Use 1:1 page table to identify MMIO address space */ if (mmio_space(gpa)) { - /* No support for APIC */ + /* No support for APIC */ if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) { int inst_len; @@ -1570,7 +1568,7 @@ static int svm_set_cr0(unsigned long val } /* Now arch.guest_table points to machine physical. */ - v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); update_pagetables(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", @@ -1590,7 +1588,7 @@ static int svm_set_cr0(unsigned long val if ( v->arch.hvm_svm.cpu_cr3 ) { put_page(mfn_to_page(get_mfn_from_gpfn( v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))); - v->arch.guest_table = mk_pagetable(0); + v->arch.guest_table = pagetable_null(); } /* @@ -1599,7 +1597,7 @@ static int svm_set_cr0(unsigned long val * created. */ if ((value & X86_CR0_PE) == 0) { - if (value & X86_CR0_PG) { + if (value & X86_CR0_PG) { svm_inject_exception(v, TRAP_gp_fault, 1, 0); return 0; } @@ -1740,7 +1738,7 @@ static int mov_to_cr(int gpreg, int cr, } old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); @@ -1797,7 +1795,7 @@ static int mov_to_cr(int gpreg, int cr, * Now arch.guest_table points to machine physical. */ - v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); update_pagetables(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 07 11:03:51 2006 +0100 @@ -66,7 +66,7 @@ void vmx_final_setup_guest(struct vcpu * /* Initialize monitor page table */ for_each_vcpu(d, vc) - vc->arch.monitor_table = mk_pagetable(0); + vc->arch.monitor_table = pagetable_null(); /* * Required to do this once per domain @@ -1223,7 +1223,7 @@ vmx_world_restore(struct vcpu *v, struct if(!get_page(mfn_to_page(mfn), v->domain)) return 0; old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); /* @@ -1459,7 +1459,7 @@ static int vmx_set_cr0(unsigned long val /* * Now arch.guest_table points to machine physical. */ - v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); update_pagetables(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", @@ -1477,7 +1477,7 @@ static int vmx_set_cr0(unsigned long val if ( v->arch.hvm_vmx.cpu_cr3 ) { put_page(mfn_to_page(get_mfn_from_gpfn( v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT))); - v->arch.guest_table = mk_pagetable(0); + v->arch.guest_table = pagetable_null(); } /* @@ -1635,7 +1635,7 @@ static int mov_to_cr(int gp, int cr, str domain_crash_synchronous(); /* need to take a clean path */ } old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) put_page(mfn_to_page(old_base_mfn)); /* @@ -1690,7 +1690,7 @@ static int mov_to_cr(int gp, int cr, str * Now arch.guest_table points to machine physical. */ - v->arch.guest_table = mk_pagetable((u64)mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); update_pagetables(v); HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", @@ -1970,7 +1970,6 @@ static inline void vmx_vmexit_do_extint( __hvm_bug(regs); vector &= INTR_INFO_VECTOR_MASK; - local_irq_disable(); TRACE_VMEXIT(1,vector); switch(vector) { @@ -2065,30 +2064,33 @@ asmlinkage void vmx_vmexit_handler(struc struct vcpu *v = current; int error; - if ((error = __vmread(VM_EXIT_REASON, &exit_reason))) - __hvm_bug(®s); + error = __vmread(VM_EXIT_REASON, &exit_reason); + BUG_ON(error); perfc_incra(vmexits, exit_reason); - /* don't bother H/W interrutps */ - if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT && - exit_reason != EXIT_REASON_VMCALL && - exit_reason != EXIT_REASON_IO_INSTRUCTION) + if ( (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) && + (exit_reason != EXIT_REASON_VMCALL) && + (exit_reason != EXIT_REASON_IO_INSTRUCTION) ) HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason); - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { + if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT ) + local_irq_enable(); + + if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) + { printk("Failed vm entry (reason 0x%x)\n", exit_reason); printk("*********** VMCS Area **************\n"); vmcs_dump_vcpu(); printk("**************************************\n"); domain_crash_synchronous(); - return; } __vmread(GUEST_RIP, &eip); TRACE_VMEXIT(0,exit_reason); - switch (exit_reason) { + switch ( exit_reason ) + { case EXIT_REASON_EXCEPTION_NMI: { /* diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Wed Jun 07 11:03:51 2006 +0100 @@ -55,29 +55,26 @@ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers. */ -#define HVM_MONITOR_EFLAGS 0x202 /* IF on */ #define NR_SKIPPED_REGS 6 /* See the above explanation */ -#define HVM_SAVE_ALL_NOSEGREGS \ - pushl $HVM_MONITOR_EFLAGS; \ - popf; \ - subl $(NR_SKIPPED_REGS*4), %esp; \ +#define HVM_SAVE_ALL_NOSEGREGS \ + subl $(NR_SKIPPED_REGS*4), %esp; \ movl $0, 0xc(%esp); /* XXX why do we need to force eflags==0 ?? */ \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ pushl %ebx; -#define HVM_RESTORE_ALL_NOSEGREGS \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax; \ +#define HVM_RESTORE_ALL_NOSEGREGS \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ addl $(NR_SKIPPED_REGS*4), %esp ALIGN diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Wed Jun 07 11:03:51 2006 +0100 @@ -51,45 +51,42 @@ * (2/1) u32 entry_vector; * (1/1) u32 error_code; */ -#define HVM_MONITOR_RFLAGS 0x202 /* IF on */ #define NR_SKIPPED_REGS 6 /* See the above explanation */ -#define HVM_SAVE_ALL_NOSEGREGS \ - pushq $HVM_MONITOR_RFLAGS; \ - popfq; \ - subq $(NR_SKIPPED_REGS*8), %rsp; \ - pushq %rdi; \ - pushq %rsi; \ - pushq %rdx; \ - pushq %rcx; \ - pushq %rax; \ - pushq %r8; \ - pushq %r9; \ - pushq %r10; \ - pushq %r11; \ - pushq %rbx; \ - pushq %rbp; \ - pushq %r12; \ - pushq %r13; \ - pushq %r14; \ - pushq %r15; \ +#define HVM_SAVE_ALL_NOSEGREGS \ + subq $(NR_SKIPPED_REGS*8), %rsp; \ + pushq %rdi; \ + pushq %rsi; \ + pushq %rdx; \ + pushq %rcx; \ + pushq %rax; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11; \ + pushq %rbx; \ + pushq %rbp; \ + pushq %r12; \ + pushq %r13; \ + pushq %r14; \ + pushq %r15; -#define HVM_RESTORE_ALL_NOSEGREGS \ - popq %r15; \ - popq %r14; \ - popq %r13; \ - popq %r12; \ - popq %rbp; \ - popq %rbx; \ - popq %r11; \ - popq %r10; \ - popq %r9; \ - popq %r8; \ - popq %rax; \ - popq %rcx; \ - popq %rdx; \ - popq %rsi; \ - popq %rdi; \ - addq $(NR_SKIPPED_REGS*8), %rsp; \ +#define HVM_RESTORE_ALL_NOSEGREGS \ + popq %r15; \ + popq %r14; \ + popq %r13; \ + popq %r12; \ + popq %rbp; \ + popq %rbx; \ + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rax; \ + popq %rcx; \ + popq %rdx; \ + popq %rsi; \ + popq %rdi; \ + addq $(NR_SKIPPED_REGS*8), %rsp; ENTRY(vmx_asm_vmexit_handler) /* selectors are restored/saved by VMX */ diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/i8259.c --- a/xen/arch/x86/i8259.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/i8259.c Wed Jun 07 11:03:51 2006 +0100 @@ -102,7 +102,7 @@ BUILD_SMP_INTERRUPT(thermal_interrupt,TH * moves to arch independent land */ -spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(i8259A_lock); static void disable_8259A_vector(unsigned int vector) { diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/microcode.c --- a/xen/arch/x86/microcode.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/microcode.c Wed Jun 07 11:03:51 2006 +0100 @@ -83,7 +83,7 @@ #include <asm/processor.h> #define pr_debug(x...) ((void)0) -#define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED +#define DECLARE_MUTEX(_m) DEFINE_SPINLOCK(_m) #define down(_m) spin_lock(_m) #define up(_m) spin_unlock(_m) #define vmalloc(_s) xmalloc_bytes(_s) diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/mm.c Wed Jun 07 11:03:51 2006 +0100 @@ -89,6 +89,7 @@ #include <xen/kernel.h> #include <xen/lib.h> #include <xen/mm.h> +#include <xen/domain.h> #include <xen/sched.h> #include <xen/errno.h> #include <xen/perfc.h> @@ -187,20 +188,16 @@ void arch_init_memory(void) * Any Xen-heap pages that we will allow to be mapped will have * their domain field set to dom_xen. */ - dom_xen = alloc_domain(); - spin_lock_init(&dom_xen->page_alloc_lock); - atomic_set(&dom_xen->refcnt, 1); - dom_xen->domain_id = DOMID_XEN; + dom_xen = alloc_domain(DOMID_XEN); + BUG_ON(dom_xen == NULL); /* * Initialise our DOMID_IO domain. * This domain owns I/O pages that are within the range of the page_info * array. Mappings occur at the priv of the caller. */ - dom_io = alloc_domain(); - spin_lock_init(&dom_io->page_alloc_lock); - atomic_set(&dom_io->refcnt, 1); - dom_io->domain_id = DOMID_IO; + dom_io = alloc_domain(DOMID_IO); + BUG_ON(dom_io == NULL); /* First 1MB of RAM is historically marked as I/O. */ for ( i = 0; i < 0x100; i++ ) @@ -999,6 +996,21 @@ static int alloc_l3_table(struct page_in int i; ASSERT(!shadow_mode_refcounts(d)); + +#ifdef CONFIG_X86_PAE + /* + * PAE pgdirs above 4GB are unacceptable if the guest does not understand + * the weird 'extended cr3' format for dealing with high-order address + * bits. We cut some slack for control tools (before vcpu0 is initialised). + */ + if ( (pfn >= 0x100000) && + unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) && + d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) ) + { + MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); + return 0; + } +#endif pl3e = map_domain_page(pfn); for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) @@ -1717,7 +1729,7 @@ int new_guest_cr3(unsigned long mfn) { /* Switch to idle pagetable: this VCPU has no active p.t. now. */ old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable(0); + v->arch.guest_table = pagetable_null(); update_pagetables(v); write_cr3(__pa(idle_pg_table)); if ( old_base_mfn != 0 ) @@ -1739,7 +1751,7 @@ int new_guest_cr3(unsigned long mfn) invalidate_shadow_ldt(v); old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT); + v->arch.guest_table = pagetable_from_pfn(mfn); update_pagetables(v); /* update shadow_table and monitor_table */ write_ptbase(v); @@ -2006,7 +2018,7 @@ int do_mmuext_op( { unsigned long old_mfn = pagetable_get_pfn(v->arch.guest_table_user); - v->arch.guest_table_user = mk_pagetable(mfn << PAGE_SHIFT); + v->arch.guest_table_user = pagetable_from_pfn(mfn); if ( old_mfn != 0 ) put_page_and_type(mfn_to_page(old_mfn)); } diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/setup.c Wed Jun 07 11:03:51 2006 +0100 @@ -85,8 +85,6 @@ extern void early_cpu_init(void); struct tss_struct init_tss[NR_CPUS]; -struct vcpu *idle_vcpu[NR_CPUS]; - extern unsigned long cpu0_stack[]; struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/shadow.c Wed Jun 07 11:03:51 2006 +0100 @@ -2472,7 +2472,7 @@ static void shadow_update_pagetables(str if ( !get_shadow_ref(smfn) ) BUG(); old_smfn = pagetable_get_pfn(v->arch.shadow_table); - v->arch.shadow_table = mk_pagetable((u64)smfn << PAGE_SHIFT); + v->arch.shadow_table = pagetable_from_pfn(smfn); if ( old_smfn ) put_shadow_ref(old_smfn); @@ -3481,15 +3481,16 @@ static void shadow_set_l2e_64(unsigned l __shadow_get_l3e(v, va, &sl3e); if (!(l3e_get_flags(sl3e) & _PAGE_PRESENT)) { - if (create_l2_shadow) { + if (create_l2_shadow) { perfc_incrc(shadow_set_l2e_force_map); shadow_map_into_current(v, va, PAGING_L2, PAGING_L3); __shadow_get_l3e(v, va, &sl3e); } else { printk("For non HVM shadow, create_l1_shadow:%d\n", create_l2_shadow); } - shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va)); - + + if ( v->domain->arch.ops->guest_paging_levels == PAGING_L4 ) + shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va)); } if ( put_ref_check ) { diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/shadow32.c Wed Jun 07 11:03:51 2006 +0100 @@ -583,7 +583,7 @@ static void free_shadow_pages(struct dom if ( pagetable_get_paddr(v->arch.shadow_table) ) { put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table)); - v->arch.shadow_table = mk_pagetable(0); + v->arch.shadow_table = pagetable_null(); if ( shadow_mode_external(d) ) { @@ -765,7 +765,7 @@ static void alloc_monitor_pagetable(stru mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty(); mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = l2e_empty(); - v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT); + v->arch.monitor_table = pagetable_from_pfn(mmfn); v->arch.monitor_vtable = mpl2e; if ( v->vcpu_id == 0 ) @@ -830,7 +830,7 @@ void free_monitor_pagetable(struct vcpu unmap_domain_page_global(v->arch.monitor_vtable); free_domheap_page(mfn_to_page(mfn)); - v->arch.monitor_table = mk_pagetable(0); + v->arch.monitor_table = pagetable_null(); v->arch.monitor_vtable = 0; } @@ -992,7 +992,7 @@ alloc_p2m_table(struct domain *d) l1tab = map_domain_page(page_to_mfn(page)); memset(l1tab, 0, PAGE_SIZE); - d->arch.phys_table = mk_pagetable(page_to_maddr(page)); + d->arch.phys_table = pagetable_from_page(page); } list_ent = d->page_list.next; @@ -1126,7 +1126,7 @@ int shadow_direct_map_init(struct domain memset(root, 0, PAGE_SIZE); unmap_domain_page(root); - d->arch.phys_table = mk_pagetable(page_to_maddr(page)); + d->arch.phys_table = pagetable_from_page(page); return 1; } @@ -1156,7 +1156,7 @@ void shadow_direct_map_clean(struct doma unmap_domain_page(l2e); - d->arch.phys_table = mk_pagetable(0); + d->arch.phys_table = pagetable_null(); } int __shadow_mode_enable(struct domain *d, unsigned int mode) @@ -3231,7 +3231,7 @@ void __update_pagetables(struct vcpu *v) if ( !get_shadow_ref(smfn) ) BUG(); old_smfn = pagetable_get_pfn(v->arch.shadow_table); - v->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT); + v->arch.shadow_table = pagetable_from_pfn(smfn); if ( old_smfn ) put_shadow_ref(old_smfn); diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/shadow_public.c Wed Jun 07 11:03:51 2006 +0100 @@ -50,7 +50,7 @@ int shadow_direct_map_init(struct domain memset(root, 0, PAGE_SIZE); root[PAE_SHADOW_SELF_ENTRY] = l3e_from_page(page, __PAGE_HYPERVISOR); - d->arch.phys_table = mk_pagetable(page_to_maddr(page)); + d->arch.phys_table = pagetable_from_page(page); unmap_domain_page(root); return 1; @@ -92,7 +92,7 @@ void shadow_direct_map_clean(struct doma unmap_domain_page(l3e); - d->arch.phys_table = mk_pagetable(0); + d->arch.phys_table = pagetable_null(); } /****************************************************************************/ @@ -338,7 +338,7 @@ static void alloc_monitor_pagetable(stru /* map the phys_to_machine map into the per domain Read-Only MPT space */ - v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT); + v->arch.monitor_table = pagetable_from_pfn(mmfn); v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e; mpl4e[l4_table_offset(RO_MPT_VIRT_START)] = l4e_empty(); @@ -380,7 +380,7 @@ void free_monitor_pagetable(struct vcpu unmap_domain_page_global(v->arch.monitor_vtable); free_domheap_page(mfn_to_page(mfn)); - v->arch.monitor_table = mk_pagetable(0); + v->arch.monitor_table = pagetable_null(); v->arch.monitor_vtable = 0; } #elif CONFIG_PAGING_LEVELS == 3 @@ -431,7 +431,7 @@ static void alloc_monitor_pagetable(stru for ( i = 0; i < (MACHPHYS_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) mpl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = l2e_empty(); - v->arch.monitor_table = mk_pagetable(m3mfn << PAGE_SHIFT); /* < 4GB */ + v->arch.monitor_table = pagetable_from_pfn(m3mfn); v->arch.monitor_vtable = (l2_pgentry_t *) mpl3e; if ( v->vcpu_id == 0 ) @@ -492,7 +492,7 @@ void free_monitor_pagetable(struct vcpu unmap_domain_page_global(v->arch.monitor_vtable); free_domheap_page(mfn_to_page(m3mfn)); - v->arch.monitor_table = mk_pagetable(0); + v->arch.monitor_table = pagetable_null(); v->arch.monitor_vtable = 0; } #endif @@ -924,7 +924,7 @@ void free_shadow_pages(struct domain *d) if ( pagetable_get_paddr(v->arch.shadow_table) ) { put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table)); - v->arch.shadow_table = mk_pagetable(0); + v->arch.shadow_table = pagetable_null(); if ( shadow_mode_external(d) ) { diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/smp.c --- a/xen/arch/x86/smp.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/smp.c Wed Jun 07 11:03:51 2006 +0100 @@ -161,7 +161,7 @@ void send_IPI_mask_phys(cpumask_t mask, local_irq_restore(flags); } -static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(flush_lock); static cpumask_t flush_cpumask; static unsigned long flush_va; diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/smpboot.c --- a/xen/arch/x86/smpboot.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/smpboot.c Wed Jun 07 11:03:51 2006 +0100 @@ -37,6 +37,7 @@ #include <xen/init.h> #include <xen/kernel.h> #include <xen/mm.h> +#include <xen/domain.h> #include <xen/sched.h> #include <xen/irq.h> #include <xen/delay.h> @@ -886,28 +887,16 @@ static int __devinit do_boot_cpu(int api int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - struct domain *d; struct vcpu *v; - int vcpu_id; ++cpucount; booting_cpu = cpu; - if ((vcpu_id = cpu % MAX_VIRT_CPUS) == 0) { - d = domain_create(IDLE_DOMAIN_ID, cpu); - BUG_ON(d == NULL); - v = d->vcpu[0]; - } else { - d = idle_vcpu[cpu - vcpu_id]->domain; - BUG_ON(d == NULL); - v = alloc_vcpu(d, vcpu_id, cpu); - } - - idle_vcpu[cpu] = v; + v = alloc_idle_vcpu(cpu); BUG_ON(v == NULL); - v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); + v->arch.monitor_table = pagetable_from_paddr(__pa(idle_pg_table)); /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/time.c Wed Jun 07 11:03:51 2006 +0100 @@ -40,10 +40,10 @@ boolean_param("hpet_force", opt_hpet_for unsigned long cpu_khz; /* CPU clock frequency in kHz. */ unsigned long hpet_address; -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(rtc_lock); unsigned long volatile jiffies; static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */ -static spinlock_t wc_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(wc_lock); struct time_scale { int shift; @@ -67,7 +67,7 @@ static s_time_t stime_platform_stamp; static s_time_t stime_platform_stamp; static u64 platform_timer_stamp; static struct time_scale platform_timer_scale; -static spinlock_t platform_timer_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(platform_timer_lock); static u64 (*read_platform_count)(void); /* diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/traps.c Wed Jun 07 11:03:51 2006 +0100 @@ -876,7 +876,7 @@ static int emulate_privileged_op(struct PAGE_FAULT(regs->edi, USER_WRITE_FAULT); break; } - regs->edi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes; + regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes); break; case 0x6e: /* OUTSB */ @@ -902,7 +902,7 @@ static int emulate_privileged_op(struct outl_user((u32)data, (u16)regs->edx, v, regs); break; } - regs->esi += (regs->eflags & EF_DF) ? -(int)op_bytes : op_bytes; + regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes); break; } @@ -1034,8 +1034,8 @@ static int emulate_privileged_op(struct break; case 3: /* Read CR3 */ - *reg = pfn_to_paddr(mfn_to_gmfn(v->domain, - pagetable_get_pfn(v->arch.guest_table))); + *reg = xen_pfn_to_cr3(mfn_to_gmfn( + v->domain, pagetable_get_pfn(v->arch.guest_table))); break; case 4: /* Read CR4 */ @@ -1085,7 +1085,7 @@ static int emulate_privileged_op(struct case 3: /* Write CR3 */ LOCK_BIGLOCK(v->domain); cleanup_writable_pagetable(v->domain); - (void)new_guest_cr3(gmfn_to_mfn(v->domain, paddr_to_pfn(*reg))); + (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg))); UNLOCK_BIGLOCK(v->domain); break; diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/asm-offsets.c --- a/xen/arch/x86/x86_32/asm-offsets.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_32/asm-offsets.c Wed Jun 07 11:03:51 2006 +0100 @@ -64,11 +64,13 @@ void __dummy__(void) arch.guest_context.kernel_ss); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt); OFFSET(VCPU_flags, struct vcpu, vcpu_flags); OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr); DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending); DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked); + DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events); BLANK(); OFFSET(TSS_ss0, struct tss_struct, ss0); diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/domain_page.c --- a/xen/arch/x86/x86_32/domain_page.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_32/domain_page.c Wed Jun 07 11:03:51 2006 +0100 @@ -183,7 +183,7 @@ static unsigned long inuse[BITS_TO_LONGS static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)]; static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)]; static unsigned int inuse_cursor; -static spinlock_t globalmap_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(globalmap_lock); void *map_domain_page_global(unsigned long pfn) { diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_32/entry.S Wed Jun 07 11:03:51 2006 +0100 @@ -130,7 +130,10 @@ failsafe_callback: movl VCPU_failsafe_sel(%ebx),%eax movw %ax,TRAPBOUNCE_cs(%edx) movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx) - call create_bounce_frame + bt $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%ebx) + jnc 1f + orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx) +1: call create_bounce_frame xorl %eax,%eax movl %eax,UREGS_ds(%esp) movl %eax,UREGS_es(%esp) diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_32/mm.c Wed Jun 07 11:03:51 2006 +0100 @@ -75,7 +75,8 @@ void __init paging_init(void) printk("PAE disabled.\n"); #endif - idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); + idle_vcpu[0]->arch.monitor_table = + pagetable_from_paddr(__pa(idle_pg_table)); if ( cpu_has_pge ) { diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_32/traps.c Wed Jun 07 11:03:51 2006 +0100 @@ -346,6 +346,12 @@ static long register_guest_callback(stru case CALLBACKTYPE_failsafe: v->arch.guest_context.failsafe_callback_cs = reg->address.cs; v->arch.guest_context.failsafe_callback_eip = reg->address.eip; + if ( reg->flags & CALLBACKF_mask_events ) + set_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); + else + clear_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); break; #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/asm-offsets.c --- a/xen/arch/x86/x86_64/asm-offsets.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_64/asm-offsets.c Wed Jun 07 11:03:51 2006 +0100 @@ -64,11 +64,14 @@ void __dummy__(void) arch.guest_context.syscall_callback_eip); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt); OFFSET(VCPU_flags, struct vcpu, vcpu_flags); OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr); DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending); DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked); + DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events); + DEFINE(_VGCF_syscall_disables_events, _VGCF_syscall_disables_events); BLANK(); OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa); diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_64/entry.S Wed Jun 07 11:03:51 2006 +0100 @@ -30,7 +30,10 @@ switch_to_kernel: movq VCPU_syscall_addr(%rbx),%rax movq %rax,TRAPBOUNCE_eip(%rdx) movw $0,TRAPBOUNCE_flags(%rdx) - call create_bounce_frame + bt $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx) + jnc 1f + orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) +1: call create_bounce_frame jmp test_all_events /* %rbx: struct vcpu, interrupts disabled */ @@ -77,7 +80,10 @@ failsafe_callback: movq VCPU_failsafe_addr(%rbx),%rax movq %rax,TRAPBOUNCE_eip(%rdx) movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx) - call create_bounce_frame + bt $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx) + jnc 1f + orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) +1: call create_bounce_frame jmp test_all_events .previous .section __pre_ex_table,"a" diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_64/mm.c Wed Jun 07 11:03:51 2006 +0100 @@ -81,7 +81,8 @@ void __init paging_init(void) l2_pgentry_t *l2_ro_mpt; struct page_info *pg; - idle_vcpu[0]->arch.monitor_table = mk_pagetable(__pa(idle_pg_table)); + idle_vcpu[0]->arch.monitor_table = + pagetable_from_paddr(__pa(idle_pg_table)); /* Create user-accessible L2 directory to map the MPT for guests. */ l3_ro_mpt = alloc_xenheap_page(); diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_64/traps.c Wed Jun 07 11:03:51 2006 +0100 @@ -195,7 +195,7 @@ unsigned long do_iret(void) /* Returning to user mode? */ if ( (iret_saved.cs & 3) == 3 ) { - if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) ) + if ( unlikely(pagetable_is_null(v->arch.guest_table_user)) ) { DPRINTK("Guest switching to user mode with no user page tables\n"); domain_crash_synchronous(); @@ -334,10 +334,22 @@ static long register_guest_callback(stru case CALLBACKTYPE_failsafe: v->arch.guest_context.failsafe_callback_eip = reg->address; + if ( reg->flags & CALLBACKF_mask_events ) + set_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); + else + clear_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); break; case CALLBACKTYPE_syscall: v->arch.guest_context.syscall_callback_eip = reg->address; + if ( reg->flags & CALLBACKF_mask_events ) + set_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); + else + clear_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); break; case CALLBACKTYPE_nmi: diff -r b09dbe439169 -r 9d86c1a70f34 xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/arch/x86/x86_emulate.c Wed Jun 07 11:03:51 2006 +0100 @@ -100,8 +100,8 @@ static uint8_t opcode_table[256] = { ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, /* 0x88 - 0x8F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov, + ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, 0, 0, 0, DstMem|SrcNone|ModRM|Mov, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -380,11 +380,12 @@ do{ __asm__ __volatile__ ( ((reg) & ((1UL << (ad_bytes << 3)) - 1)))) #define register_address_increment(reg, inc) \ do { \ + int _inc = (inc); /* signed type ensures sign extension to long */ \ if ( ad_bytes == sizeof(unsigned long) ) \ - (reg) += (inc); \ + (reg) += _inc; \ else \ (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \ - (((reg) + (inc)) & ((1UL << (ad_bytes << 3)) - 1)); \ + (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \ } while (0) void * @@ -858,7 +859,7 @@ x86_emulate_memop( &dst.val, 8, ctxt)) != 0 ) goto done; } - register_address_increment(_regs.esp, -(int)dst.bytes); + register_address_increment(_regs.esp, -dst.bytes); if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp), dst.val, dst.bytes, ctxt)) != 0 ) goto done; @@ -942,9 +943,9 @@ x86_emulate_memop( goto done; } register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); + _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); + _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); break; case 0xa6 ... 0xa7: /* cmps */ DPRINTF("Urk! I don't handle CMPS.\n"); @@ -955,7 +956,7 @@ x86_emulate_memop( dst.ptr = (unsigned long *)cr2; dst.val = _regs.eax; register_address_increment( - _regs.edi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); + _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); break; case 0xac ... 0xad: /* lods */ dst.type = OP_REG; @@ -964,7 +965,7 @@ x86_emulate_memop( if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 ) goto done; register_address_increment( - _regs.esi, (_regs.eflags & EFLG_DF) ? -(int)dst.bytes : dst.bytes); + _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes); break; case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/dom0_ops.c --- a/xen/common/dom0_ops.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/dom0_ops.c Wed Jun 07 11:03:51 2006 +0100 @@ -95,7 +95,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op long ret = 0; struct dom0_op curop, *op = &curop; void *ssid = NULL; /* save security ptr between pre and post/fail hooks */ - static spinlock_t dom0_lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(dom0_lock); if ( !IS_PRIV(current->domain) ) return -EPERM; diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/domain.c --- a/xen/common/domain.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/domain.c Wed Jun 07 11:03:51 2006 +0100 @@ -32,22 +32,111 @@ struct domain *domain_list; struct domain *dom0; -struct domain *domain_create(domid_t dom_id, unsigned int cpu) -{ - struct domain *d, **pd; - struct vcpu *v; - - if ( (d = alloc_domain()) == NULL ) +struct vcpu *idle_vcpu[NR_CPUS]; + +struct domain *alloc_domain(domid_t domid) +{ + struct domain *d; + + if ( (d = xmalloc(struct domain)) == NULL ) return NULL; - d->domain_id = dom_id; - + memset(d, 0, sizeof(*d)); + d->domain_id = domid; atomic_set(&d->refcnt, 1); - spin_lock_init(&d->big_lock); spin_lock_init(&d->page_alloc_lock); INIT_LIST_HEAD(&d->page_list); INIT_LIST_HEAD(&d->xenpage_list); + + return d; +} + + +void free_domain(struct domain *d) +{ + struct vcpu *v; + int i; + + sched_destroy_domain(d); + + for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- ) + if ( (v = d->vcpu[i]) != NULL ) + free_vcpu_struct(v); + + xfree(d); +} + + +struct vcpu *alloc_vcpu( + struct domain *d, unsigned int vcpu_id, unsigned int cpu_id) +{ + struct vcpu *v; + + BUG_ON(d->vcpu[vcpu_id] != NULL); + + if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL ) + return NULL; + + v->domain = d; + v->vcpu_id = vcpu_id; + v->processor = cpu_id; + atomic_set(&v->pausecnt, 0); + v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id]; + + v->cpu_affinity = is_idle_domain(d) ? + cpumask_of_cpu(cpu_id) : CPU_MASK_ALL; + + v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline; + v->runstate.state_entry_time = NOW(); + + if ( (vcpu_id != 0) && !is_idle_domain(d) ) + set_bit(_VCPUF_down, &v->vcpu_flags); + + if ( sched_init_vcpu(v) < 0 ) + { + free_vcpu_struct(v); + return NULL; + } + + d->vcpu[vcpu_id] = v; + if ( vcpu_id != 0 ) + d->vcpu[v->vcpu_id-1]->next_in_list = v; + + return v; +} + +struct vcpu *alloc_idle_vcpu(unsigned int cpu_id) +{ + struct domain *d; + struct vcpu *v; + unsigned int vcpu_id; + + if ((vcpu_id = cpu_id % MAX_VIRT_CPUS) == 0) + { + d = domain_create(IDLE_DOMAIN_ID, cpu_id); + BUG_ON(d == NULL); + v = d->vcpu[0]; + } + else + { + d = idle_vcpu[cpu_id - vcpu_id]->domain; + BUG_ON(d == NULL); + v = alloc_vcpu(d, vcpu_id, cpu_id); + } + + idle_vcpu[cpu_id] = v; + + return v; +} + +struct domain *domain_create(domid_t domid, unsigned int cpu) +{ + struct domain *d, **pd; + struct vcpu *v; + + if ( (d = alloc_domain(domid)) == NULL ) + return NULL; rangeset_domain_initialise(d); @@ -74,14 +163,14 @@ struct domain *domain_create(domid_t dom if ( !is_idle_domain(d) ) { write_lock(&domlist_lock); - pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */ + pd = &domain_list; /* NB. domain_list maintained in order of domid. */ for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) if ( (*pd)->domain_id > d->domain_id ) break; d->next_in_list = *pd; *pd = d; - d->next_in_hashbucket = domain_hash[DOMAIN_HASH(dom_id)]; - domain_hash[DOMAIN_HASH(dom_id)] = d; + d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)]; + domain_hash[DOMAIN_HASH(domid)] = d; write_unlock(&domlist_lock); } @@ -126,19 +215,16 @@ struct domain *find_domain_by_id(domid_t void domain_kill(struct domain *d) { - struct vcpu *v; - domain_pause(d); - if ( !test_and_set_bit(_DOMF_dying, &d->domain_flags) ) - { - for_each_vcpu(d, v) - sched_rem_domain(v); - gnttab_release_mappings(d); - domain_relinquish_resources(d); - put_domain(d); - - send_guest_global_virq(dom0, VIRQ_DOM_EXC); - } + + if ( test_and_set_bit(_DOMF_dying, &d->domain_flags) ) + return; + + gnttab_release_mappings(d); + domain_relinquish_resources(d); + put_domain(d); + + send_guest_global_virq(dom0, VIRQ_DOM_EXC); } diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/kernel.c --- a/xen/common/kernel.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/kernel.c Wed Jun 07 11:03:51 2006 +0100 @@ -184,6 +184,7 @@ long do_xen_version(int cmd, XEN_GUEST_H case XENVER_get_features: { xen_feature_info_t fi; + struct domain *d = current->domain; if ( copy_from_guest(&fi, arg, 1) ) return -EFAULT; @@ -191,7 +192,9 @@ long do_xen_version(int cmd, XEN_GUEST_H switch ( fi.submap_idx ) { case 0: - fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb); + fi.submap = 0; + if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) ) + fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb); if ( shadow_mode_translate(current->domain) ) fi.submap |= (1U << XENFEAT_writable_page_tables) | diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/keyhandler.c --- a/xen/common/keyhandler.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/keyhandler.c Wed Jun 07 11:03:51 2006 +0100 @@ -128,11 +128,12 @@ static void dump_domains(unsigned char k d->domain_flags, atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages, cpuset); printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-" - "%02x%02x-%02x%02x%02x%02x%02x%02x\n", + "%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n", d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3], d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7], d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11], - d->handle[12], d->handle[13], d->handle[14], d->handle[15]); + d->handle[12], d->handle[13], d->handle[14], d->handle[15], + d->vm_assist); arch_dump_domain_info(d); diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/memory.c --- a/xen/common/memory.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/memory.c Wed Jun 07 11:03:51 2006 +0100 @@ -31,14 +31,15 @@ static long static long increase_reservation( struct domain *d, - XEN_GUEST_HANDLE(ulong) extent_list, + XEN_GUEST_HANDLE(xen_pfn_t) extent_list, unsigned int nr_extents, unsigned int extent_order, unsigned int flags, int *preempted) { struct page_info *page; - unsigned long i, mfn; + unsigned long i; + xen_pfn_t mfn; if ( !guest_handle_is_null(extent_list) && !guest_handle_okay(extent_list, nr_extents) ) @@ -80,14 +81,16 @@ static long static long populate_physmap( struct domain *d, - XEN_GUEST_HANDLE(ulong) extent_list, + XEN_GUEST_HANDLE(xen_pfn_t) extent_list, unsigned int nr_extents, unsigned int extent_order, unsigned int flags, int *preempted) { struct page_info *page; - unsigned long i, j, gpfn, mfn; + unsigned long i, j; + xen_pfn_t gpfn; + xen_pfn_t mfn; if ( !guest_handle_okay(extent_list, nr_extents) ) return 0; @@ -177,13 +180,14 @@ static long static long decrease_reservation( struct domain *d, - XEN_GUEST_HANDLE(ulong) extent_list, + XEN_GUEST_HANDLE(xen_pfn_t) extent_list, unsigned int nr_extents, unsigned int extent_order, unsigned int flags, int *preempted) { - unsigned long i, j, gmfn; + unsigned long i, j; + xen_pfn_t gmfn; if ( !guest_handle_okay(extent_list, nr_extents) ) return 0; @@ -214,7 +218,9 @@ translate_gpfn_list( XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress) { struct xen_translate_gpfn_list op; - unsigned long i, gpfn, mfn; + unsigned long i; + xen_pfn_t gpfn; + xen_pfn_t mfn; struct domain *d; if ( copy_from_guest(&op, uop, 1) ) diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/page_alloc.c Wed Jun 07 11:03:51 2006 +0100 @@ -59,7 +59,7 @@ custom_param("lowmem_emergency_pool", pa #define round_pgdown(_p) ((_p)&PAGE_MASK) #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) -static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(page_scrub_lock); LIST_HEAD(page_scrub_list); /********************* @@ -250,7 +250,7 @@ static struct list_head heap[NR_ZONES][M static unsigned long avail[NR_ZONES]; -static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(heap_lock); void end_boot_allocator(void) { diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/perfc.c --- a/xen/common/perfc.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/perfc.c Wed Jun 07 11:03:51 2006 +0100 @@ -209,7 +209,7 @@ static int perfc_copy_info(XEN_GUEST_HAN /* Dom0 control of perf counters */ int perfc_control(dom0_perfccontrol_t *pc) { - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); u32 op = pc->op; int rc; diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_bvt.c --- a/xen/common/sched_bvt.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/sched_bvt.c Wed Jun 07 11:03:51 2006 +0100 @@ -160,15 +160,14 @@ static inline u32 calc_evt(struct vcpu * } /** - * bvt_alloc_task - allocate BVT private structures for a task - * @p: task to allocate private structures for - * + * bvt_init_vcpu - allocate BVT private structures for a VCPU. * Returns non-zero on failure. */ -static int bvt_alloc_task(struct vcpu *v) +static int bvt_init_vcpu(struct vcpu *v) { struct domain *d = v->domain; struct bvt_dom_info *inf; + struct bvt_vcpu_info *einf; if ( (d->sched_priv == NULL) ) { @@ -199,15 +198,7 @@ static int bvt_alloc_task(struct vcpu *v init_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor); } - return 0; -} - -/* - * Add and remove a domain - */ -static void bvt_add_task(struct vcpu *v) -{ - struct bvt_vcpu_info *einf = EBVT_INFO(v); + einf = EBVT_INFO(v); /* Allocate per-CPU context if this is the first domain to be added. */ if ( CPU_INFO(v->processor) == NULL ) @@ -223,13 +214,15 @@ static void bvt_add_task(struct vcpu *v) einf->avt = einf->evt = ~0U; BUG_ON(__task_on_runqueue(v)); __add_to_runqueue_head(v); - } + } else { /* Set avt and evt to system virtual time. */ einf->avt = CPU_SVT(v->processor); einf->evt = CPU_SVT(v->processor); } + + return 0; } static void bvt_wake(struct vcpu *v) @@ -298,10 +291,9 @@ static int bvt_set_affinity(struct vcpu /** - * bvt_free_task - free BVT private structures for a task - * @d: task - */ -static void bvt_free_task(struct domain *d) + * bvt_destroy_domain - free BVT private structures for a domain. + */ +static void bvt_destroy_domain(struct domain *d) { struct bvt_dom_info *inf = BVT_INFO(d); @@ -568,10 +560,10 @@ struct scheduler sched_bvt_def = { .name = "Borrowed Virtual Time", .opt_name = "bvt", .sched_id = SCHED_BVT, - - .alloc_task = bvt_alloc_task, - .add_task = bvt_add_task, - .free_task = bvt_free_task, + + .init_vcpu = bvt_init_vcpu, + .destroy_domain = bvt_destroy_domain, + .do_schedule = bvt_do_schedule, .control = bvt_ctl, .adjdom = bvt_adjdom, diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_credit.c --- a/xen/common/sched_credit.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/sched_credit.c Wed Jun 07 11:03:51 2006 +0100 @@ -75,14 +75,13 @@ } while ( 0 ); #define CSCHED_STATS_EXPAND_SCHED(_MACRO) \ - _MACRO(vcpu_alloc) \ - _MACRO(vcpu_add) \ + _MACRO(vcpu_init) \ _MACRO(vcpu_sleep) \ _MACRO(vcpu_wake_running) \ _MACRO(vcpu_wake_onrunq) \ _MACRO(vcpu_wake_runnable) \ _MACRO(vcpu_wake_not_runnable) \ - _MACRO(dom_free) \ + _MACRO(dom_destroy) \ _MACRO(schedule) \ _MACRO(tickle_local_idler) \ _MACRO(tickle_local_over) \ @@ -429,14 +428,14 @@ __csched_vcpu_acct_idle_locked(struct cs } static int -csched_vcpu_alloc(struct vcpu *vc) +csched_vcpu_init(struct vcpu *vc) { struct domain * const dom = vc->domain; struct csched_dom *sdom; struct csched_vcpu *svc; int16_t pri; - CSCHED_STAT_CRANK(vcpu_alloc); + CSCHED_STAT_CRANK(vcpu_init); /* Allocate, if appropriate, per-domain info */ if ( is_idle_vcpu(vc) ) @@ -489,19 +488,13 @@ csched_vcpu_alloc(struct vcpu *vc) if ( likely(sdom != NULL) ) csched_vcpu_acct(svc, 0); - return 0; -} - -static void -csched_vcpu_add(struct vcpu *vc) -{ - CSCHED_STAT_CRANK(vcpu_add); - /* Allocate per-PCPU info */ if ( unlikely(!CSCHED_PCPU(vc->processor)) ) csched_pcpu_init(vc->processor); CSCHED_VCPU_CHECK(vc); + + return 0; } static void @@ -644,12 +637,12 @@ csched_dom_cntl( } static void -csched_dom_free(struct domain *dom) +csched_dom_destroy(struct domain *dom) { struct csched_dom * const sdom = CSCHED_DOM(dom); int i; - CSCHED_STAT_CRANK(dom_free); + CSCHED_STAT_CRANK(dom_destroy); for ( i = 0; i < MAX_VIRT_CPUS; i++ ) { @@ -1215,14 +1208,15 @@ struct scheduler sched_credit_def = { .opt_name = "credit", .sched_id = SCHED_CREDIT, - .alloc_task = csched_vcpu_alloc, - .add_task = csched_vcpu_add, + .init_vcpu = csched_vcpu_init, + .destroy_domain = csched_dom_destroy, + .sleep = csched_vcpu_sleep, .wake = csched_vcpu_wake, + .set_affinity = csched_vcpu_set_affinity, .adjdom = csched_dom_cntl, - .free_task = csched_dom_free, .tick = csched_tick, .do_schedule = csched_schedule, diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/sched_sedf.c Wed Jun 07 11:03:51 2006 +0100 @@ -328,11 +328,9 @@ static inline void __add_to_runqueue_sor } -/* Allocates memory for per domain private scheduling data*/ -static int sedf_alloc_task(struct vcpu *v) -{ - PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n", - v->domain->domain_id, v->vcpu_id); +static int sedf_init_vcpu(struct vcpu *v) +{ + struct sedf_vcpu_info *inf; if ( v->domain->sched_priv == NULL ) { @@ -344,23 +342,11 @@ static int sedf_alloc_task(struct vcpu * if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL ) return -1; - memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info)); - return 0; -} - - -/* Setup the sedf_dom_info */ -static void sedf_add_task(struct vcpu *v) -{ - struct sedf_vcpu_info *inf = EDOM_INFO(v); - + inf = EDOM_INFO(v); inf->vcpu = v; - PRINT(2,"sedf_add_task was called, domain-id %i.%i\n", - v->domain->domain_id, v->vcpu_id); - /* Allocate per-CPU context if this is the first domain to be added. */ if ( unlikely(schedule_data[v->processor].sched_priv == NULL) ) { @@ -408,14 +394,13 @@ static void sedf_add_task(struct vcpu *v EDOM_INFO(v)->deadl_abs = 0; EDOM_INFO(v)->status &= ~SEDF_ASLEEP; } -} - -/* Frees memory used by domain info */ -static void sedf_free_task(struct domain *d) + + return 0; +} + +static void sedf_destroy_domain(struct domain *d) { int i; - - PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id); xfree(d->sched_priv); @@ -1452,9 +1437,9 @@ struct scheduler sched_sedf_def = { .opt_name = "sedf", .sched_id = SCHED_SEDF, - .alloc_task = sedf_alloc_task, - .add_task = sedf_add_task, - .free_task = sedf_free_task, + .init_vcpu = sedf_init_vcpu, + .destroy_domain = sedf_destroy_domain, + .do_schedule = sedf_do_schedule, .dump_cpu_state = sedf_dump_cpu_state, .sleep = sedf_sleep, diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/schedule.c --- a/xen/common/schedule.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/schedule.c Wed Jun 07 11:03:51 2006 +0100 @@ -99,74 +99,7 @@ void vcpu_runstate_get(struct vcpu *v, s } } -struct domain *alloc_domain(void) -{ - struct domain *d; - - if ( (d = xmalloc(struct domain)) != NULL ) - memset(d, 0, sizeof(*d)); - - return d; -} - -void free_domain(struct domain *d) -{ - struct vcpu *v; - int i; - - for_each_vcpu ( d, v ) - sched_rem_domain(v); - - SCHED_OP(free_task, d); - - for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- ) - if ( (v = d->vcpu[i]) != NULL ) - free_vcpu_struct(v); - - xfree(d); -} - -struct vcpu *alloc_vcpu( - struct domain *d, unsigned int vcpu_id, unsigned int cpu_id) -{ - struct vcpu *v; - - BUG_ON(d->vcpu[vcpu_id] != NULL); - - if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL ) - return NULL; - - v->domain = d; - v->vcpu_id = vcpu_id; - v->processor = cpu_id; - atomic_set(&v->pausecnt, 0); - v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id]; - - v->cpu_affinity = is_idle_domain(d) ? - cpumask_of_cpu(cpu_id) : CPU_MASK_ALL; - - v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline; - v->runstate.state_entry_time = NOW(); - - if ( (vcpu_id != 0) && !is_idle_domain(d) ) - set_bit(_VCPUF_down, &v->vcpu_flags); - - if ( SCHED_OP(alloc_task, v) < 0 ) - { - free_vcpu_struct(v); - return NULL; - } - - d->vcpu[vcpu_id] = v; - if ( vcpu_id != 0 ) - d->vcpu[v->vcpu_id-1]->next_in_list = v; - - sched_add_domain(v); - - return v; -} - -void sched_add_domain(struct vcpu *v) +int sched_init_vcpu(struct vcpu *v) { /* Initialise the per-domain timers. */ init_timer(&v->timer, vcpu_timer_fn, v, v->processor); @@ -179,17 +112,23 @@ void sched_add_domain(struct vcpu *v) set_bit(_VCPUF_running, &v->vcpu_flags); } - SCHED_OP(add_task, v); TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id); -} - -void sched_rem_domain(struct vcpu *v) -{ - kill_timer(&v->timer); - kill_timer(&v->poll_timer); - - SCHED_OP(rem_task, v); - TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id); + + return SCHED_OP(init_vcpu, v); +} + +void sched_destroy_domain(struct domain *d) +{ + struct vcpu *v; + + for_each_vcpu ( d, v ) + { + kill_timer(&v->timer); + kill_timer(&v->poll_timer); + TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id); + } + + SCHED_OP(destroy_domain, d); } void vcpu_sleep_nosync(struct vcpu *v) @@ -663,7 +602,7 @@ static void poll_timer_fn(void *data) /* Initialise the data structures. */ void __init scheduler_init(void) { - int i, rc; + int i; open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler); @@ -686,17 +625,6 @@ void __init scheduler_init(void) printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); SCHED_OP(init); - - if ( idle_vcpu[0] != NULL ) - { - schedule_data[0].curr = idle_vcpu[0]; - schedule_data[0].idle = idle_vcpu[0]; - - rc = SCHED_OP(alloc_task, idle_vcpu[0]); - BUG_ON(rc < 0); - - sched_add_domain(idle_vcpu[0]); - } } /* diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/trace.c --- a/xen/common/trace.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/trace.c Wed Jun 07 11:03:51 2006 +0100 @@ -173,25 +173,17 @@ void init_trace_bufs(void) */ int tb_control(dom0_tbufcontrol_t *tbc) { - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); int rc = 0; spin_lock(&lock); - - if ( !tb_init_done && - (tbc->op != DOM0_TBUF_SET_SIZE) && - (tbc->op != DOM0_TBUF_ENABLE) ) - { - spin_unlock(&lock); - return -EINVAL; - } switch ( tbc->op ) { case DOM0_TBUF_GET_INFO: tbc->cpu_mask = tb_cpu_mask; tbc->evt_mask = tb_event_mask; - tbc->buffer_mfn = __pa(t_bufs[0]) >> PAGE_SHIFT; + tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(t_bufs[0]) : 0UL; tbc->size = opt_tbuf_size * PAGE_SIZE; break; case DOM0_TBUF_SET_CPU_MASK: diff -r b09dbe439169 -r 9d86c1a70f34 xen/common/xmalloc.c --- a/xen/common/xmalloc.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/common/xmalloc.c Wed Jun 07 11:03:51 2006 +0100 @@ -35,7 +35,7 @@ #include <xen/prefetch.h> static LIST_HEAD(freelist); -static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(freelist_lock); struct xmalloc_hdr { diff -r b09dbe439169 -r 9d86c1a70f34 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/drivers/char/console.c Wed Jun 07 11:03:51 2006 +0100 @@ -53,7 +53,7 @@ static int sercon_handle = -1; static int sercon_handle = -1; static int vgacon_enabled = 0; -spinlock_t console_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(console_lock); /* * ******************************************************* @@ -563,7 +563,7 @@ static unsigned int debugtrace_prd; /* P static unsigned int debugtrace_prd; /* Producer index */ static unsigned int debugtrace_kilobytes = 128, debugtrace_bytes; static unsigned int debugtrace_used; -static spinlock_t debugtrace_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(debugtrace_lock); integer_param("debugtrace", debugtrace_kilobytes); void debugtrace_dump(void) @@ -675,7 +675,7 @@ void panic(const char *fmt, ...) va_list args; char buf[128]; unsigned long flags; - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); extern void machine_restart(char *); debugtrace_dump(); diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/asm-x86/page.h Wed Jun 07 11:03:51 2006 +0100 @@ -172,10 +172,13 @@ typedef struct { u32 pfn; } pagetable_t; /* x86_64 */ typedef struct { u64 pfn; } pagetable_t; #endif -#define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT) -#define pagetable_get_pfn(x) ((x).pfn) -#define mk_pagetable(pa) \ - ({ pagetable_t __p; __p.pfn = (pa) >> PAGE_SHIFT; __p; }) +#define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT) +#define pagetable_get_pfn(x) ((x).pfn) +#define pagetable_is_null(x) ((x).pfn == 0) +#define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) }) +#define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg)) +#define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT) +#define pagetable_null() pagetable_from_pfn(0) #endif #define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE) diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/arch-ia64.h Wed Jun 07 11:03:51 2006 +0100 @@ -26,6 +26,9 @@ DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif /* Arch specific VIRQs definition */ diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/arch-x86_32.h Wed Jun 07 11:03:51 2006 +0100 @@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif /* @@ -138,9 +141,17 @@ struct vcpu_guest_context { struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_HVM_GUEST (1<<1) -#define VGCF_IN_KERNEL (1<<2) +#define VGCF_I387_VALID (1<<0) +#define VGCF_HVM_GUEST (1<<1) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_hvm_guest 1 +#define VGCF_hvm_guest (1<<_VGCF_hvm_guest) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ @@ -158,10 +169,18 @@ typedef struct vcpu_guest_context vcpu_g typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; + xen_pfn_t pfn_to_mfn_frame_list_list; unsigned long nmi_reason; }; typedef struct arch_shared_info arch_shared_info_t; diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/arch-x86_64.h Wed Jun 07 11:03:51 2006 +0100 @@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif /* @@ -211,9 +214,19 @@ struct vcpu_guest_context { struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_HVM_GUEST (1<<1) -#define VGCF_IN_KERNEL (1<<2) +#define VGCF_I387_VALID (1<<0) +#define VGCF_HVM_GUEST (1<<1) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_hvm_guest 1 +#define VGCF_hvm_guest (1<<_VGCF_hvm_guest) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ @@ -234,10 +247,13 @@ typedef struct vcpu_guest_context vcpu_g typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; + xen_pfn_t pfn_to_mfn_frame_list_list; unsigned long nmi_reason; }; typedef struct arch_shared_info arch_shared_info_t; diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/callback.h --- a/xen/include/public/callback.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/callback.h Wed Jun 07 11:03:51 2006 +0100 @@ -29,12 +29,20 @@ #define CALLBACKTYPE_nmi 4 /* + * Disable event deliver during callback? This flag is ignored for event and + * NMI callbacks: event delivery is unconditionally disabled. + */ +#define _CALLBACKF_mask_events 0 +#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) + +/* * Register a callback. */ #define CALLBACKOP_register 0 struct callback_register { - int type; - xen_callback_t address; + uint16_t type; + uint16_t flags; + xen_callback_t address; }; typedef struct callback_register callback_register_t; DEFINE_XEN_GUEST_HANDLE(callback_register_t); @@ -47,7 +55,8 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe */ #define CALLBACKOP_unregister 1 struct callback_unregister { - int type; + uint16_t type; + uint16_t _unused; }; typedef struct callback_unregister callback_unregister_t; DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/dom0_ops.h Wed Jun 07 11:03:51 2006 +0100 @@ -19,7 +19,7 @@ * This makes sure that old versions of dom0 tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define DOM0_INTERFACE_VERSION 0x03000000 +#define DOM0_INTERFACE_VERSION 0x03000001 /************************************************************************/ @@ -27,10 +27,10 @@ struct dom0_getmemlist { struct dom0_getmemlist { /* IN variables. */ domid_t domain; - unsigned long max_pfns; - XEN_GUEST_HANDLE(ulong) buffer; - /* OUT variables. */ - unsigned long num_pfns; + uint64_t max_pfns; + XEN_GUEST_HANDLE(xen_pfn_t) buffer; + /* OUT variables. */ + uint64_t num_pfns; }; typedef struct dom0_getmemlist dom0_getmemlist_t; DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t); @@ -96,9 +96,9 @@ struct dom0_getdomaininfo { #define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code. */ #define DOMFLAGS_SHUTDOWNSHIFT 16 uint32_t flags; - unsigned long tot_pages; - unsigned long max_pages; - unsigned long shared_info_frame; /* MFN of shared_info struct */ + uint64_t tot_pages; + uint64_t max_pages; + xen_pfn_t shared_info_frame; /* MFN of shared_info struct */ uint64_t cpu_time; uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ @@ -162,7 +162,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t); struct dom0_getpageframeinfo { /* IN variables. */ - unsigned long mfn; /* Machine page frame number to query. */ + xen_pfn_t mfn; /* Machine page frame number to query. */ domid_t domain; /* To which domain does the frame belong? */ /* OUT variables. */ /* Is the page PINNED to a type? */ @@ -213,7 +213,7 @@ struct dom0_tbufcontrol { cpumap_t cpu_mask; uint32_t evt_mask; /* OUT variables */ - unsigned long buffer_mfn; + xen_pfn_t buffer_mfn; uint32_t size; }; typedef struct dom0_tbufcontrol dom0_tbufcontrol_t; @@ -229,8 +229,8 @@ struct dom0_physinfo { uint32_t sockets_per_node; uint32_t nr_nodes; uint32_t cpu_khz; - unsigned long total_pages; - unsigned long free_pages; + uint64_t total_pages; + uint64_t free_pages; uint32_t hw_cap[8]; }; typedef struct dom0_physinfo dom0_physinfo_t; @@ -276,7 +276,7 @@ struct dom0_shadow_control { uint32_t op; XEN_GUEST_HANDLE(ulong) dirty_bitmap; /* IN/OUT variables. */ - unsigned long pages; /* size of buffer, updated with actual size */ + uint64_t pages; /* size of buffer, updated with actual size */ /* OUT variables. */ struct dom0_shadow_control_stats stats; }; @@ -286,8 +286,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_shadow_cont #define DOM0_SETDOMAINMAXMEM 28 struct dom0_setdomainmaxmem { /* IN variables. */ - domid_t domain; - unsigned long max_memkb; + domid_t domain; + uint64_t max_memkb; }; typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t; DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t); @@ -295,8 +295,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_setdomainma #define DOM0_GETPAGEFRAMEINFO2 29 /* batched interface */ struct dom0_getpageframeinfo2 { /* IN variables. */ - domid_t domain; - unsigned long num; + domid_t domain; + uint64_t num; /* IN/OUT variables. */ XEN_GUEST_HANDLE(ulong) array; }; @@ -313,12 +313,12 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram #define DOM0_ADD_MEMTYPE 31 struct dom0_add_memtype { /* IN variables. */ - unsigned long mfn; - unsigned long nr_mfns; - uint32_t type; - /* OUT variables. */ - uint32_t handle; - uint32_t reg; + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; }; typedef struct dom0_add_memtype dom0_add_memtype_t; DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t); @@ -345,8 +345,8 @@ struct dom0_read_memtype { /* IN variables. */ uint32_t reg; /* OUT variables. */ - unsigned long mfn; - unsigned long nr_mfns; + xen_pfn_t mfn; + uint64_t nr_mfns; uint32_t type; }; typedef struct dom0_read_memtype dom0_read_memtype_t; @@ -499,8 +499,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_irq_permiss #define DOM0_IOMEM_PERMISSION 47 struct dom0_iomem_permission { domid_t domain; /* domain to be affected */ - unsigned long first_mfn; /* first page (physical page number) in range */ - unsigned long nr_mfns; /* number of pages in range (>0) */ + xen_pfn_t first_mfn; /* first page (physical page number) in range */ + uint64_t nr_mfns; /* number of pages in range (>0) */ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ }; typedef struct dom0_iomem_permission dom0_iomem_permission_t; @@ -509,7 +509,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permi #define DOM0_HYPERCALL_INIT 48 struct dom0_hypercall_init { domid_t domain; /* domain to be affected */ - unsigned long mfn; /* machine frame to be initialised */ + xen_pfn_t mfn; /* machine frame to be initialised */ }; typedef struct dom0_hypercall_init dom0_hypercall_init_t; DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t); diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/grant_table.h Wed Jun 07 11:03:51 2006 +0100 @@ -240,7 +240,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl #define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ - unsigned long mfn; + xen_pfn_t mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/io/netif.h Wed Jun 07 11:03:51 2006 +0100 @@ -26,6 +26,10 @@ /* Packet data has been validated against protocol checksum. */ #define _NETTXF_data_validated (1) #define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the request. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) struct netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/io/ring.h --- a/xen/include/public/io/ring.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/io/ring.h Wed Jun 07 11:03:51 2006 +0100 @@ -151,19 +151,27 @@ typedef struct __name##_back_ring __name #define RING_SIZE(_r) \ ((_r)->nr_ents) +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ #define RING_FULL(_r) \ - (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r)) + (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ #define RING_HAS_UNCONSUMED_RESPONSES(_r) \ - ((_r)->rsp_cons != (_r)->sring->rsp_prod) + ((_r)->sring->rsp_prod - (_r)->rsp_cons) #define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - (((_r)->req_cons != (_r)->sring->req_prod) && \ - (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r))) + ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ + }) /* Direct access to individual ring elements, by index. */ #define RING_GET_REQUEST(_r, _idx) \ diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/memory.h --- a/xen/include/public/memory.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/memory.h Wed Jun 07 11:03:51 2006 +0100 @@ -29,7 +29,7 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - XEN_GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ unsigned long nr_extents; @@ -87,7 +87,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - XEN_GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -117,7 +117,7 @@ struct xen_add_to_physmap { unsigned long idx; /* GPFN where the source mapping page should appear. */ - unsigned long gpfn; + xen_pfn_t gpfn; }; typedef struct xen_add_to_physmap xen_add_to_physmap_t; DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); @@ -135,13 +135,13 @@ struct xen_translate_gpfn_list { unsigned long nr_gpfns; /* List of GPFNs to translate. */ - XEN_GUEST_HANDLE(ulong) gpfn_list; + XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list; /* * Output list to contain MFN translations. May be the same as the input * list (in which case each input GPFN is overwritten with the output MFN). */ - XEN_GUEST_HANDLE(ulong) mfn_list; + XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; }; typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/public/xen.h --- a/xen/include/public/xen.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/public/xen.h Wed Jun 07 11:03:51 2006 +0100 @@ -197,7 +197,7 @@ struct mmuext_op { unsigned int cmd; union { /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ - unsigned long mfn; + xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; } arg1; @@ -234,10 +234,24 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); */ #define VMASST_CMD_enable 0 #define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ #define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ #define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ #define VMASST_TYPE_writable_pagetables 2 -#define MAX_VMASST_TYPE 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 #ifndef __ASSEMBLY__ @@ -443,9 +457,9 @@ struct start_info { unsigned long nr_pages; /* Total pages allocated to this domain. */ unsigned long shared_info; /* MACHINE address of shared info struct. */ uint32_t flags; /* SIF_xxx flags. */ - unsigned long store_mfn; /* MACHINE page number of shared page. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ uint32_t store_evtchn; /* Event channel for store communication. */ - unsigned long console_mfn; /* MACHINE address of console page. */ + xen_pfn_t console_mfn; /* MACHINE page number of console page. */ uint32_t console_evtchn; /* Event channel for console messages. */ /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ unsigned long pt_base; /* VIRTUAL address of page directory. */ diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/console.h --- a/xen/include/xen/console.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/xen/console.h Wed Jun 07 11:03:51 2006 +0100 @@ -9,8 +9,6 @@ #include <xen/spinlock.h> #include <xen/guest_access.h> - -extern spinlock_t console_lock; void set_printk_prefix(const char *prefix); diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/domain.h --- a/xen/include/xen/domain.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/xen/domain.h Wed Jun 07 11:03:51 2006 +0100 @@ -2,8 +2,14 @@ #ifndef __XEN_DOMAIN_H__ #define __XEN_DOMAIN_H__ -extern int boot_vcpu( +struct vcpu *alloc_vcpu( + struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); +int boot_vcpu( struct domain *d, int vcpuid, struct vcpu_guest_context *ctxt); +struct vcpu *alloc_idle_vcpu(unsigned int cpu_id); + +struct domain *alloc_domain(domid_t domid); +void free_domain(struct domain *d); /* * Arch-specifics. @@ -11,19 +17,18 @@ extern int boot_vcpu( struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id); -extern void free_vcpu_struct(struct vcpu *v); +void free_vcpu_struct(struct vcpu *v); -extern int arch_domain_create(struct domain *d); +int arch_domain_create(struct domain *d); -extern void arch_domain_destroy(struct domain *d); +void arch_domain_destroy(struct domain *d); -extern int arch_set_info_guest( - struct vcpu *v, struct vcpu_guest_context *c); +int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c); -extern void domain_relinquish_resources(struct domain *d); +void domain_relinquish_resources(struct domain *d); -extern void dump_pageframe_info(struct domain *d); +void dump_pageframe_info(struct domain *d); -extern void arch_dump_domain_info(struct domain *d); +void arch_dump_domain_info(struct domain *d); #endif /* __XEN_DOMAIN_H__ */ diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/xen/sched-if.h Wed Jun 07 11:03:51 2006 +0100 @@ -60,14 +60,17 @@ struct scheduler { void (*init) (void); void (*tick) (unsigned int cpu); - int (*alloc_task) (struct vcpu *); - void (*add_task) (struct vcpu *); - void (*free_task) (struct domain *); - void (*rem_task) (struct vcpu *); + + int (*init_vcpu) (struct vcpu *); + void (*destroy_domain) (struct domain *); + void (*sleep) (struct vcpu *); void (*wake) (struct vcpu *); + int (*set_affinity) (struct vcpu *, cpumask_t *); + struct task_slice (*do_schedule) (s_time_t); + int (*control) (struct sched_ctl_cmd *); int (*adjdom) (struct domain *, struct sched_adjdom_cmd *); diff -r b09dbe439169 -r 9d86c1a70f34 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Wed Jun 07 11:03:15 2006 +0100 +++ b/xen/include/xen/sched.h Wed Jun 07 11:03:51 2006 +0100 @@ -186,12 +186,6 @@ extern struct vcpu *idle_vcpu[NR_CPUS]; #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID) #define is_idle_vcpu(v) (is_idle_domain((v)->domain)) -struct vcpu *alloc_vcpu( - struct domain *d, unsigned int vcpu_id, unsigned int cpu_id); - -struct domain *alloc_domain(void); -void free_domain(struct domain *d); - #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */ #define put_domain(_d) \ if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d) @@ -226,7 +220,7 @@ static inline void get_knownalive_domain } extern struct domain *domain_create( - domid_t dom_id, unsigned int cpu); + domid_t domid, unsigned int cpu); extern int construct_dom0( struct domain *d, unsigned long image_start, unsigned long image_len, @@ -269,8 +263,8 @@ void new_thread(struct vcpu *d, #define set_current_state(_s) do { current->state = (_s); } while (0) void scheduler_init(void); void schedulers_start(void); -void sched_add_domain(struct vcpu *); -void sched_rem_domain(struct vcpu *); +int sched_init_vcpu(struct vcpu *); +void sched_destroy_domain(struct domain *); long sched_ctl(struct sched_ctl_cmd *); long sched_adjdom(struct sched_adjdom_cmd *); int sched_id(void); diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/Makefile Wed Jun 07 11:03:15 2006 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ - -ifneq ($(CONFIG_XEN_IA64_DOM0_VP),y) -obj-y += util.o -endif - -obj-y += core/ -#obj-y += char/ -obj-y += console/ -obj-y += evtchn/ -obj-$(CONFIG_XEN_IA64_DOM0_VP) += balloon/ -obj-y += privcmd/ -obj-y += xenbus/ - -obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ -obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ -obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ -obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ -obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ -obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ -obj-$(CONFIG_XEN_TPMDEV_FRONTEND) += tpmfront/ -obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ -obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront/ diff -r b09dbe439169 -r 9d86c1a70f34 linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/coreMakefile Wed Jun 07 11:03:15 2006 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-y := gnttab.o features.o -obj-$(CONFIG_PROC_FS) += xen_proc.o - -ifeq ($(ARCH),ia64) -obj-y += evtchn.o -obj-y += xenia64_init.o -ifeq ($(CONFIG_XEN_IA64_DOM0_VP),y) -obj-$(CONFIG_NET) += skbuff.o -endif -else -obj-y += reboot.o evtchn.o fixup.o -obj-$(CONFIG_SMP) += smp.o # setup_profiling_timer def'd in ia64 -obj-$(CONFIG_NET) += skbuff.o # until networking is up on ia64 -endif -obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o -obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |