[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID fdf25330e4a699c4b90aa28cc30843447cf9da61 # Parent 59d4c1863330e2023b05043d450da39cda47bd5a # Parent f91cc71173c55f18280b12e6732d9e7509d208be merge with xen-unstable.hg --- tools/blktap/Makefile | 93 tools/blktap/README | 137 - tools/blktap/README.sept05 | 33 tools/blktap/blkdump.c | 62 tools/blktap/blkif.c | 212 - tools/blktap/blktaplib.c | 453 --- tools/blktap/blktaplib.h | 171 - tools/blktap/list.h | 55 tools/blktap/parallax/Makefile | 62 tools/blktap/parallax/README | 171 - tools/blktap/parallax/block-async.c | 393 -- tools/blktap/parallax/block-async.h | 69 tools/blktap/parallax/blockstore.c | 1348 ---------- tools/blktap/parallax/blockstore.h | 134 tools/blktap/parallax/blockstored.c | 275 -- tools/blktap/parallax/bstest.c | 191 - tools/blktap/parallax/parallax.c | 608 ---- tools/blktap/parallax/radix.c | 631 ---- tools/blktap/parallax/radix.h | 45 tools/blktap/parallax/requests-async.c | 762 ----- tools/blktap/parallax/requests-async.h | 29 tools/blktap/parallax/snaplog.c | 238 - tools/blktap/parallax/snaplog.h | 61 tools/blktap/parallax/vdi.c | 367 -- tools/blktap/parallax/vdi.h | 55 tools/blktap/parallax/vdi_create.c | 52 tools/blktap/parallax/vdi_fill.c | 81 tools/blktap/parallax/vdi_list.c | 47 tools/blktap/parallax/vdi_snap.c | 43 tools/blktap/parallax/vdi_snap_delete.c | 48 tools/blktap/parallax/vdi_snap_list.c | 82 tools/blktap/parallax/vdi_tree.c | 132 tools/blktap/parallax/vdi_unittest.c | 184 - tools/blktap/parallax/vdi_validate.c | 97 tools/blktap/ublkback/Makefile | 40 tools/blktap/ublkback/ublkback.c | 18 tools/blktap/ublkback/ublkbacklib.c | 473 --- tools/blktap/ublkback/ublkbacklib.h | 16 tools/blktap/xenbus.c | 568 ---- docs/src/user.tex | 3 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c | 51 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c | 193 - linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c | 19 linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c | 54 linux-2.6-xen-sparse/drivers/xen/Kconfig | 10 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c | 10 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c | 3 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c | 4 linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile | 2 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c | 16 linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile | 2 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 4 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h | 23 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 17 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h | 22 patches/linux-2.6.16.13/ipv6-no-autoconf.patch | 20 tools/examples/network-bridge | 36 tools/examples/vif-bridge | 12 tools/examples/xen-network-common.sh | 45 tools/examples/xmexample.hvm | 12 tools/firmware/acpi/Makefile | 7 tools/firmware/acpi/acpi2_0.h | 2 tools/firmware/acpi/acpi_dsdt.asl | 345 ++ tools/firmware/acpi/acpi_dsdt.c | 399 +- tools/firmware/acpi/acpi_fadt.h | 21 tools/ioemu/hw/pc.c | 8 tools/ioemu/hw/pci.c | 19 tools/ioemu/hw/piix4acpi.c | 481 +++ tools/ioemu/target-i386-dm/Makefile | 2 tools/libxc/xc_domain.c | 11 tools/libxc/xc_linux_restore.c | 66 tools/libxc/xc_linux_save.c | 4 tools/libxc/xenctrl.h | 4 tools/python/xen/lowlevel/xc/xc.c | 31 tools/python/xen/util/SSHTransport.py | 102 tools/python/xen/util/xmlrpclib2.py | 55 tools/python/xen/xend/XendClient.py | 13 tools/python/xen/xend/XendDomainInfo.py | 5 tools/python/xen/xm/create.py | 2 tools/python/xen/xm/main.py | 34 tools/security/secpol_tool.c | 7 tools/xm-test/grouptest/default | 2 tools/xm-test/grouptest/medium | 2 tools/xm-test/lib/XmTestLib/Console.py | 70 tools/xm-test/tests/memset/03_memset_random_pos.py | 6 xen/acm/acm_core.c | 5 xen/acm/acm_policy.c | 45 xen/arch/ia64/linux-xen/smp.c | 36 xen/arch/ia64/xen/domain.c | 4 xen/arch/ia64/xen/xensetup.c | 3 xen/arch/x86/Makefile | 2 xen/arch/x86/audit.c | 4 xen/arch/x86/hvm/vmx/vmcs.c | 17 xen/arch/x86/hvm/vmx/vmx.c | 13 xen/arch/x86/mm.c | 205 + xen/arch/x86/setup.c | 7 xen/arch/x86/shadow.c | 125 xen/arch/x86/shadow32.c | 12 xen/arch/x86/shadow_guest32pae.c | 2 xen/arch/x86/shadow_public.c | 40 xen/arch/x86/time.c | 2 xen/arch/x86/traps.c | 242 + xen/arch/x86/x86_32/seg_fixup.c | 2 xen/arch/x86/x86_32/traps.c | 44 xen/arch/x86/x86_64/traps.c | 37 xen/arch/x86/x86_emulate.c | 4 xen/common/acm_ops.c | 2 xen/common/dom0_ops.c | 19 xen/common/domain.c | 2 xen/common/kernel.c | 5 xen/common/keyhandler.c | 20 xen/common/memory.c | 2 xen/common/sched_credit.c | 4 xen/common/sched_sedf.c | 26 xen/common/schedule.c | 4 xen/drivers/char/console.c | 45 xen/include/acm/acm_core.h | 9 xen/include/acm/acm_hooks.h | 18 xen/include/asm-ia64/debugger.h | 8 xen/include/asm-ia64/vmx.h | 1 xen/include/asm-ia64/xenprocessor.h | 2 xen/include/asm-x86/hvm/support.h | 2 xen/include/asm-x86/mm.h | 2 xen/include/asm-x86/processor.h | 15 xen/include/asm-x86/shadow.h | 44 xen/include/asm-x86/shadow_64.h | 36 xen/include/asm-x86/shadow_ops.h | 8 xen/include/public/arch-x86_32.h | 9 xen/include/public/arch-x86_64.h | 14 xen/include/public/dom0_ops.h | 23 xen/include/public/memory.h | 14 xen/include/xen/console.h | 2 xen/include/xen/lib.h | 1 xen/include/xen/sched.h | 1 134 files changed, 2608 insertions(+), 9373 deletions(-) diff -r 59d4c1863330 -r fdf25330e4a6 docs/src/user.tex --- a/docs/src/user.tex Fri Jun 23 15:26:01 2006 -0600 +++ b/docs/src/user.tex Fri Jun 23 15:33:25 2006 -0600 @@ -1972,7 +1972,8 @@ editing \path{grub.conf}. \item [ console=$<$specifier list$>$ ] Specify the destination for Xen console I/O. This is a comma-separated list of, for example: \begin{description} - \item[ vga ] Use VGA console and allow keyboard input. + \item[ vga ] Use VGA console (only until domain 0 boots, unless {\bf + vga[keep] } is specified). \item[ com1 ] Use serial port com1. \item[ com2H ] Use serial port com2. Transmitted chars will have the MSB set. Received chars must have MSB set. diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Fri Jun 23 15:33:25 2006 -0600 @@ -273,6 +273,49 @@ static void dump_fault_path(unsigned lon } #endif +static int spurious_fault(struct pt_regs *regs, + unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +#ifdef CONFIG_XEN + /* Faults in hypervisor area are never spurious. */ + if (address >= HYPERVISOR_VIRT_START) + return 0; +#endif + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & 0x0c) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + if ((error_code & 0x02) && !pte_write(*pte)) + return 0; +#ifdef CONFIG_X86_PAE + if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX)) + return 0; +#endif + + return 1; +} /* * This routine handles page faults. It determines the address, @@ -327,8 +370,16 @@ fastcall void __kprobes do_page_fault(st * protection error (error_code & 1) == 0. */ if (unlikely(address >= TASK_SIZE)) { +#ifdef CONFIG_XEN + /* Faults in hypervisor area can never be patched up. */ + if (address >= HYPERVISOR_VIRT_START) + goto bad_area_nosemaphore; +#endif if (!(error_code & 5)) goto vmalloc_fault; + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Fri Jun 23 15:33:25 2006 -0600 @@ -263,6 +263,10 @@ static void contiguous_bitmap_clear( } } +/* Protected by balloon_lock. */ +#define MAX_CONTIG_ORDER 7 +static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; + /* Ensure multi-page extents are contiguous in machine memory. */ int xen_create_contiguous_region( unsigned long vstart, unsigned int order, unsigned int address_bits) @@ -271,13 +275,23 @@ int xen_create_contiguous_region( pud_t *pud; pmd_t *pmd; pte_t *pte; + unsigned long *in_frames = discontig_frames, out_frame; unsigned long frame, i, flags; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = 0, - .domid = DOMID_SELF + long rc; + int success; + struct xen_memory_exchange exchange = { + .in = { + .nr_extents = 1UL << order, + .extent_order = 0, + .domid = DOMID_SELF + }, + .out = { + .nr_extents = 1, + .extent_order = order, + .address_bits = address_bits, + .domid = DOMID_SELF + } }; - set_xen_guest_handle(reservation.extent_start, &frame); /* * Currently an auto-translated guest will not perform I/O, nor will @@ -287,68 +301,73 @@ int xen_create_contiguous_region( if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; + if (order > MAX_CONTIG_ORDER) + return -ENOMEM; + + set_xen_guest_handle(exchange.in.extent_start, in_frames); + set_xen_guest_handle(exchange.out.extent_start, &out_frame); + scrub_pages(vstart, 1 << order); balloon_lock(flags); - /* 1. Zap current PTEs, giving away the underlying pages. */ - for (i = 0; i < (1<<order); i++) { + /* 1. Zap current PTEs, remembering MFNs. */ + for (i = 0; i < (1UL<<order); i++) { pgd = pgd_offset_k(vstart + (i*PAGE_SIZE)); pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE))); pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE))); pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); - frame = pte_mfn(*pte); - BUG_ON(HYPERVISOR_update_va_mapping( - vstart + (i*PAGE_SIZE), __pte_ma(0), 0)); + in_frames[i] = pte_mfn(*pte); + if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), + __pte_ma(0), 0)) + BUG(); set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, INVALID_P2M_ENTRY); - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, &reservation) != 1); } /* 2. Get a new contiguous memory extent. */ - reservation.extent_order = order; - reservation.address_bits = address_bits; - frame = __pa(vstart) >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_populate_physmap, - &reservation) != 1) - goto fail; + out_frame = __pa(vstart) >> PAGE_SHIFT; + rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); + success = (exchange.nr_exchanged == (1UL << order)); + BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); + BUG_ON(success && (rc != 0)); + if (unlikely(rc == -ENOSYS)) { + /* Compatibility when XENMEM_exchange is unsupported. */ + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &exchange.in) != (1UL << order)) + BUG(); + success = (HYPERVISOR_memory_op(XENMEM_populate_physmap, + &exchange.out) == 1); + if (!success) { + /* Couldn't get special memory: fall back to normal. */ + for (i = 0; i < (1UL<<order); i++) + in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i; + if (HYPERVISOR_memory_op(XENMEM_populate_physmap, + &exchange.in) != (1UL<<order)) + BUG(); + } + } /* 3. Map the new extent in place of old pages. */ - for (i = 0; i < (1<<order); i++) { - BUG_ON(HYPERVISOR_update_va_mapping( - vstart + (i*PAGE_SIZE), - pfn_pte_ma(frame+i, PAGE_KERNEL), 0)); - set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame+i); + for (i = 0; i < (1UL<<order); i++) { + frame = success ? (out_frame + i) : in_frames[i]; + if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), + pfn_pte_ma(frame, + PAGE_KERNEL), + 0)) + BUG(); + set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame); } flush_tlb_all(); - contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order); + if (success) + contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, + 1UL << order); balloon_unlock(flags); - return 0; - - fail: - reservation.extent_order = 0; - reservation.address_bits = 0; - - for (i = 0; i < (1<<order); i++) { - frame = (__pa(vstart) >> PAGE_SHIFT) + i; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_populate_physmap, &reservation) != 1); - BUG_ON(HYPERVISOR_update_va_mapping( - vstart + (i*PAGE_SIZE), - pfn_pte_ma(frame, PAGE_KERNEL), 0)); - set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame); - } - - flush_tlb_all(); - - balloon_unlock(flags); - - return -ENOMEM; + return success ? 0 : -ENOMEM; } void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) @@ -357,47 +376,79 @@ void xen_destroy_contiguous_region(unsig pud_t *pud; pmd_t *pmd; pte_t *pte; + unsigned long *out_frames = discontig_frames, in_frame; unsigned long frame, i, flags; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = 0, - .domid = DOMID_SELF + long rc; + int success; + struct xen_memory_exchange exchange = { + .in = { + .nr_extents = 1, + .extent_order = order, + .domid = DOMID_SELF + }, + .out = { + .nr_extents = 1UL << order, + .extent_order = 0, + .domid = DOMID_SELF + } }; - set_xen_guest_handle(reservation.extent_start, &frame); if (xen_feature(XENFEAT_auto_translated_physmap) || !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap)) return; + if (order > MAX_CONTIG_ORDER) + return; + + set_xen_guest_handle(exchange.in.extent_start, &in_frame); + set_xen_guest_handle(exchange.out.extent_start, out_frames); + scrub_pages(vstart, 1 << order); balloon_lock(flags); contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order); - /* 1. Zap current PTEs, giving away the underlying pages. */ - for (i = 0; i < (1<<order); i++) { - pgd = pgd_offset_k(vstart + (i*PAGE_SIZE)); - pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE))); - pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE))); - pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); - frame = pte_mfn(*pte); - BUG_ON(HYPERVISOR_update_va_mapping( - vstart + (i*PAGE_SIZE), __pte_ma(0), 0)); + /* 1. Find start MFN of contiguous extent. */ + pgd = pgd_offset_k(vstart); + pud = pud_offset(pgd, vstart); + pmd = pmd_offset(pud, vstart); + pte = pte_offset_kernel(pmd, vstart); + in_frame = pte_mfn(*pte); + + /* 2. Zap current PTEs. */ + for (i = 0; i < (1UL<<order); i++) { + if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), + __pte_ma(0), 0)); set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, INVALID_P2M_ENTRY); - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, &reservation) != 1); - } - - /* 2. Map new pages in place of old pages. */ - for (i = 0; i < (1<<order); i++) { - frame = (__pa(vstart) >> PAGE_SHIFT) + i; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_populate_physmap, &reservation) != 1); - BUG_ON(HYPERVISOR_update_va_mapping( - vstart + (i*PAGE_SIZE), - pfn_pte_ma(frame, PAGE_KERNEL), 0)); + out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i; + } + + /* 3. Do the exchange for non-contiguous MFNs. */ + rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); + success = (exchange.nr_exchanged == 1); + BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); + BUG_ON(success && (rc != 0)); + if (rc == -ENOSYS) { + /* Compatibility when XENMEM_exchange is unsupported. */ + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &exchange.in) != 1) + BUG(); + if (HYPERVISOR_memory_op(XENMEM_populate_physmap, + &exchange.out) != (1UL << order)) + BUG(); + success = 1; + } + + /* 4. Map new pages in place of old pages. */ + for (i = 0; i < (1UL<<order); i++) { + frame = success ? out_frames[i] : (in_frame + i); + if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), + pfn_pte_ma(frame, + PAGE_KERNEL), + 0)) + BUG(); set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame); } diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c Fri Jun 23 15:33:25 2006 -0600 @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/percpu.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/proto.h> @@ -92,8 +93,16 @@ static void __init setup_boot_cpu_data(v boot_cpu_data.x86_mask = eax & 0xf; } +#include <xen/interface/memory.h> +unsigned long *machine_to_phys_mapping; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + void __init x86_64_start_kernel(char * real_mode_data) { + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; char *s; int i; @@ -104,6 +113,16 @@ void __init x86_64_start_kernel(char * r start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) + xen_start_info->nr_pt_frames; } + + + machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START; + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } + while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) + machine_to_phys_order++; #if 0 for (i = 0; i < 256; i++) diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c Fri Jun 23 15:33:25 2006 -0600 @@ -307,6 +307,49 @@ int exception_trace = 1; #define MEM_LOG(_f, _a...) ((void)0) #endif +static int spurious_fault(struct pt_regs *regs, + unsigned long address, + unsigned long error_code) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +#ifdef CONFIG_XEN + /* Faults in hypervisor area are never spurious. */ + if ((address >= HYPERVISOR_VIRT_START) && + (address < HYPERVISOR_VIRT_END)) + return 0; +#endif + + /* Reserved-bit violation or user access to kernel space? */ + if (error_code & (PF_RSVD|PF_USER)) + return 0; + + pgd = init_mm.pgd + pgd_index(address); + if (!pgd_present(*pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return 0; + + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + return 0; + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX)) + return 0; + + return 1; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -361,16 +404,19 @@ asmlinkage void __kprobes do_page_fault( */ if (unlikely(address >= TASK_SIZE64)) { /* - * Must check for the entire kernel range here: with writable - * page tables the hypervisor may temporarily clear PMD - * entries. + * Don't check for the module range here: its PML4 + * is always initialized because it's shared with the main + * kernel text. Only vmalloc may need PML4 syncups. */ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && - address >= PAGE_OFFSET) { + ((address >= VMALLOC_START && address < VMALLOC_END))) { if (vmalloc_fault(address) < 0) goto bad_area_nosemaphore; return; } + /* Can take a spurious fault if mapping changes R/O -> R/W. */ + if (spurious_fault(regs, address, error_code)) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Fri Jun 23 15:33:25 2006 -0600 @@ -27,6 +27,11 @@ config XEN_UNPRIVILEGED_GUEST config XEN_UNPRIVILEGED_GUEST bool default !XEN_PRIVILEGED_GUEST + +config XEN_PRIVCMD + bool + depends on PROC_FS + default y config XEN_BACKEND tristate "Backend driver support" @@ -84,6 +89,11 @@ config XEN_BLKDEV_BACKEND block devices to other guests via a high-performance shared-memory interface. +config XEN_XENBUS_DEV + bool + depends on PROC_FS + default y + config XEN_NETDEV_BACKEND tristate "Network-device backend driver" depends on XEN_BACKEND && NET diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Jun 23 15:33:25 2006 -0600 @@ -58,7 +58,9 @@ #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) +#ifdef CONFIG_PROC_FS static struct proc_dir_entry *balloon_pde; +#endif static DECLARE_MUTEX(balloon_mutex); @@ -403,6 +405,7 @@ static int balloon_init_watcher(struct n return NOTIFY_DONE; } +#ifdef CONFIG_PROC_FS static int balloon_write(struct file *file, const char __user *buffer, unsigned long count, void *data) { @@ -456,6 +459,7 @@ static int balloon_read(char *page, char *eof = 1; return len; } +#endif static struct notifier_block xenstore_notifier; @@ -464,10 +468,10 @@ static int __init balloon_init(void) unsigned long pfn; struct page *page; - IPRINTK("Initialising balloon driver.\n"); - if (!is_running_on_xen()) return -ENODEV; + + IPRINTK("Initialising balloon driver.\n"); current_pages = min(xen_start_info->nr_pages, max_pfn); totalram_pages = current_pages; @@ -481,6 +485,7 @@ static int __init balloon_init(void) balloon_timer.data = 0; balloon_timer.function = balloon_alarm; +#ifdef CONFIG_PROC_FS if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) { WPRINTK("Unable to create /proc/xen/balloon.\n"); return -1; @@ -488,6 +493,7 @@ static int __init balloon_init(void) balloon_pde->read_proc = balloon_read; balloon_pde->write_proc = balloon_write; +#endif /* Initialise the balloon with excess memory space. */ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c --- a/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c Fri Jun 23 15:33:25 2006 -0600 @@ -109,6 +109,9 @@ static int __init setup_vcpu_hotplug_eve static struct notifier_block xsn_cpu = { .notifier_call = setup_cpu_watcher }; + if (!is_running_on_xen()) + return -ENODEV; + register_cpu_notifier(&hotplug_cpu); register_xenstore_notifier(&xsn_cpu); diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/core/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Fri Jun 23 15:33:25 2006 -0600 @@ -666,6 +666,10 @@ int irq_ignore_unhandled(unsigned int ir int irq_ignore_unhandled(unsigned int irq) { struct physdev_irq_status_query irq_status = { .irq = irq }; + + if (!is_running_on_xen()) + return 0; + (void)HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status); return !!(irq_status.flags & XENIRQSTAT_shared); } diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile Fri Jun 23 15:33:25 2006 -0600 @@ -1,2 +1,2 @@ -obj-y := privcmd.o +obj-$(CONFIG_XEN_PRIVCMD) := privcmd.o diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Fri Jun 23 15:33:25 2006 -0600 @@ -71,8 +71,6 @@ static int packet_read_shmem(struct pack char *buffer, int isuserbuffer, u32 left); static int vtpm_queue_packet(struct packet *pak); -#define MIN(x,y) (x) < (y) ? (x) : (y) - /*************************************************************** Buffer copying fo user and kernel space buffes. ***************************************************************/ @@ -309,7 +307,7 @@ int _packet_write(struct packet *pak, return 0; } - tocopy = MIN(size - offset, PAGE_SIZE); + tocopy = min_t(size_t, size - offset, PAGE_SIZE); if (copy_from_buffer((void *)(MMAP_VADDR(tpmif, i) | (tx->addr & ~PAGE_MASK)), @@ -365,7 +363,7 @@ static int packet_read(struct packet *pa u32 instance_no = htonl(pak->tpm_instance); u32 last_read = pak->last_read; - to_copy = MIN(4 - last_read, numbytes); + to_copy = min_t(size_t, 4 - last_read, numbytes); if (copy_to_buffer(&buffer[0], &(((u8 *) & instance_no)[last_read]), @@ -384,7 +382,7 @@ static int packet_read(struct packet *pa if (room_left > 0) { if (pak->data_buffer) { - u32 to_copy = MIN(pak->data_len - offset, room_left); + u32 to_copy = min_t(u32, pak->data_len - offset, room_left); u32 last_read = pak->last_read - 4; if (copy_to_buffer(&buffer[offset], @@ -424,7 +422,7 @@ static int packet_read_shmem(struct pack * and within that page at offset 'offset'. * Copy a maximum of 'room_left' bytes. */ - to_copy = MIN(PAGE_SIZE - pg_offset, room_left); + to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left); while (to_copy > 0) { void *src; struct gnttab_map_grant_ref map_op; @@ -451,7 +449,7 @@ static int packet_read_shmem(struct pack /* * User requests more than what's available */ - to_copy = MIN(tx->size, to_copy); + to_copy = min_t(u32, tx->size, to_copy); } DPRINTK("Copying from mapped memory at %08lx\n", @@ -483,7 +481,7 @@ static int packet_read_shmem(struct pack last_read += to_copy; room_left -= to_copy; - to_copy = MIN(PAGE_SIZE, room_left); + to_copy = min_t(u32, PAGE_SIZE, room_left); i++; } /* while (to_copy > 0) */ /* @@ -545,7 +543,7 @@ static ssize_t vtpm_op_read(struct file DPRINTK("size given by app: %d, available: %d\n", size, left); - ret_size = MIN(size, left); + ret_size = min_t(size_t, size, left); ret_size = packet_read(pak, ret_size, data, size, 1); diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Fri Jun 23 15:33:25 2006 -0600 @@ -9,4 +9,4 @@ xenbus-objs += xenbus_comms.o xenbus-objs += xenbus_comms.o xenbus-objs += xenbus_xs.o xenbus-objs += xenbus_probe.o -xenbus-objs += xenbus_dev.o +obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Jun 23 15:33:25 2006 -0600 @@ -926,6 +926,7 @@ void xenbus_probe(void *unused) } +#ifdef CONFIG_PROC_FS static struct file_operations xsd_kva_fops; static struct proc_dir_entry *xsd_kva_intf; static struct proc_dir_entry *xsd_port_intf; @@ -964,6 +965,7 @@ static int xsd_port_read(char *page, cha *eof = 1; return len; } +#endif static int __init xenbus_probe_init(void) @@ -1008,6 +1010,7 @@ static int __init xenbus_probe_init(void BUG_ON(err); xen_start_info->store_evtchn = alloc_unbound.port; +#ifdef CONFIG_PROC_FS /* And finally publish the above info in /proc/xen */ xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600); if (xsd_kva_intf) { @@ -1020,6 +1023,7 @@ static int __init xenbus_probe_init(void xsd_port_intf = create_xen_proc_entry("xsd_port", 0400); if (xsd_port_intf) xsd_port_intf->read_proc = xsd_port_read; +#endif } else xenstored_ready = 1; diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Fri Jun 23 15:33:25 2006 -0600 @@ -67,6 +67,10 @@ extern unsigned long *phys_to_machine_mapping; +#undef machine_to_phys_mapping +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; + static inline unsigned long pfn_to_mfn(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) @@ -84,24 +88,29 @@ static inline int phys_to_machine_mappin static inline unsigned long mfn_to_pfn(unsigned long mfn) { + extern unsigned long max_mapnr; unsigned long pfn; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - /* - * The array access can fail (e.g., device space beyond end of RAM). - * In such cases it doesn't matter what we return (we return garbage), - * but we must handle the fault without crashing! - */ + if (unlikely((mfn >> machine_to_phys_order) != 0)) + return max_mapnr; + + /* The array access can fail (e.g., device space beyond end of RAM). */ asm ( "1: movl %1,%0\n" "2:\n" + ".section .fixup,\"ax\"\n" + "3: movl %2,%0\n" + " jmp 2b\n" + ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" - " .long 1b,2b\n" + " .long 1b,3b\n" ".previous" - : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) ); + : "=r" (pfn) + : "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) ); return pfn; } diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Jun 23 15:33:25 2006 -0600 @@ -7,6 +7,7 @@ **/ #include <xen/interface/callback.h> +#include <xen/interface/memory.h> static char * __init machine_specific_memory_setup(void) { @@ -44,9 +45,16 @@ extern void failsafe_callback(void); extern void failsafe_callback(void); extern void nmi(void); +unsigned long *machine_to_phys_mapping; +EXPORT_SYMBOL(machine_to_phys_mapping); +unsigned int machine_to_phys_order; +EXPORT_SYMBOL(machine_to_phys_order); + static void __init machine_specific_arch_setup(void) { int ret; + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; struct xen_platform_parameters pp; struct callback_register event = { .type = CALLBACKTYPE_event, @@ -81,4 +89,13 @@ static void __init machine_specific_arch if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) set_fixaddr_top(pp.virt_start - PAGE_SIZE); + + machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START; + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { + machine_to_phys_mapping = (unsigned long *)mapping.v_start; + machine_to_phys_nr_ents = mapping.max_mfn + 1; + } + while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) + machine_to_phys_order++; } diff -r 59d4c1863330 -r fdf25330e4a6 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Fri Jun 23 15:26:01 2006 -0600 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Fri Jun 23 15:33:25 2006 -0600 @@ -85,6 +85,10 @@ void copy_page(void *, void *); extern unsigned long *phys_to_machine_mapping; +#undef machine_to_phys_mapping +extern unsigned long *machine_to_phys_mapping; +extern unsigned int machine_to_phys_order; + static inline unsigned long pfn_to_mfn(unsigned long pfn) { if (xen_feature(XENFEAT_auto_translated_physmap)) @@ -107,19 +111,23 @@ static inline unsigned long mfn_to_pfn(u if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - /* - * The array access can fail (e.g., device space beyond end of RAM). - * In such cases it doesn't matter what we return (we return garbage), - * but we must handle the fault without crashing! - */ + if (unlikely((mfn >> machine_to_phys_order) != 0)) + return end_pfn; + + /* The array access can fail (e.g., device space beyond end of RAM). */ asm ( "1: movq %1,%0\n" "2:\n" + ".section .fixup,\"ax\"\n" + "3: movq %2,%0\n" + " jmp 2b\n" + ".previous\n" ".section __ex_table,\"a\"\n" " .align 8\n" - " .quad 1b,2b\n" + " .quad 1b,3b\n" ".previous" - : "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) ); + : "=r" (pfn) + : "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) ); return pfn; } diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/network-bridge --- a/tools/examples/network-bridge Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/examples/network-bridge Fri Jun 23 15:33:25 2006 -0600 @@ -151,30 +151,6 @@ link_exists() fi } - -# Usage: create_bridge bridge -create_bridge () { - local bridge=$1 - - # Don't create the bridge if it already exists. - if ! brctl show | grep -q ${bridge} ; then - brctl addbr ${bridge} - brctl stp ${bridge} off - brctl setfd ${bridge} 0 - fi - ip link set ${bridge} up -} - -# Usage: add_to_bridge bridge dev -add_to_bridge () { - local bridge=$1 - local dev=$2 - # Don't add $dev to $bridge if it's already on a bridge. - if ! brctl show | grep -q ${dev} ; then - brctl addif ${bridge} ${dev} - fi -} - # Set the default forwarding policy for $dev to drop. # Allow forwarding to the bridge. antispoofing () { @@ -238,14 +214,13 @@ using loopback.nloopbacks=<N> on the dom fi ip link set ${netdev} name ${pdev} ip link set ${vdev} name ${netdev} - ip link set ${pdev} down arp off - ip link set ${pdev} addr fe:ff:ff:ff:ff:ff - ip addr flush ${pdev} + + setup_bridge_port ${pdev} + setup_bridge_port ${vif0} ip link set ${netdev} addr ${mac} arp on - add_to_bridge ${bridge} ${vif0} + ip link set ${bridge} up - ip link set ${vif0} up - ip link set ${pdev} up + add_to_bridge ${bridge} ${vif0} add_to_bridge2 ${bridge} ${pdev} do_ifup ${netdev} else @@ -301,6 +276,7 @@ add_to_bridge2() { local maxtries=10 echo -n "Waiting for ${dev} to negotiate link." + ip link set ${dev} up for i in `seq ${maxtries}` ; do if ifconfig ${dev} | grep -q RUNNING ; then break diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/vif-bridge --- a/tools/examples/vif-bridge Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/examples/vif-bridge Fri Jun 23 15:33:25 2006 -0600 @@ -48,16 +48,8 @@ fi case "$command" in online) - if brctl show | grep -q "$vif" - then - log debug "$vif already attached to a bridge" - exit 0 - fi - - brctl addif "$bridge" "$vif" || - fatal "brctl addif $bridge $vif failed" - - ifconfig "$vif" up || fatal "ifconfig $vif up failed" + setup_bridge_port "$vif" + add_to_bridge "$bridge" "$vif" ;; offline) diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/xen-network-common.sh --- a/tools/examples/xen-network-common.sh Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/examples/xen-network-common.sh Fri Jun 23 15:33:25 2006 -0600 @@ -104,3 +104,48 @@ find_dhcpd_init_file() { first_file -x /etc/init.d/{dhcp3-server,dhcp,dhcpd} } + +# configure interfaces which act as pure bridge ports: +# - make quiet: no arp, no multicast (ipv6 autoconf) +# - set mac address to fe:ff:ff:ff:ff:ff +setup_bridge_port() { + local dev="$1" + + # take interface down ... + ip link set ${dev} down + + # ... and configure it + ip link set ${dev} arp off + ip link set ${dev} multicast off + ip link set ${dev} addr fe:ff:ff:ff:ff:ff + ip addr flush ${dev} +} + +# Usage: create_bridge bridge +create_bridge () { + local bridge=$1 + + # Don't create the bridge if it already exists. + if [ ! -e "/sys/class/net/${bridge}/bridge" ]; then + brctl addbr ${bridge} + brctl stp ${bridge} off + brctl setfd ${bridge} 0 + ip link set ${bridge} arp off + ip link set ${bridge} multicast off + fi + ip link set ${bridge} up +} + +# Usage: add_to_bridge bridge dev +add_to_bridge () { + local bridge=$1 + local dev=$2 + + # Don't add $dev to $bridge if it's already on a bridge. + if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then + return + fi + brctl addif ${bridge} ${dev} + ip link set ${dev} up +} + diff -r 59d4c1863330 -r fdf25330e4a6 tools/examples/xmexample.hvm --- a/tools/examples/xmexample.hvm Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/examples/xmexample.hvm Fri Jun 23 15:33:25 2006 -0600 @@ -164,3 +164,15 @@ ne2000=0 #----------------------------------------------------------------------------- # start in full screen #full-screen=1 + + +#----------------------------------------------------------------------------- +# Enable USB support (specific devices specified at runtime through the +# monitor window) +#usb=1 + +# Enable USB mouse support (only enable one of the following, `mouse' for +# PS/2 protocol relative mouse, `tablet' for +# absolute mouse) +#usbdevice='mouse' +#usbdevice='tablet' diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/Makefile --- a/tools/firmware/acpi/Makefile Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/firmware/acpi/Makefile Fri Jun 23 15:33:25 2006 -0600 @@ -33,17 +33,16 @@ IASL_URL=http://developer.intel.com/tech IASL_URL=http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz vpath iasl $(PATH) -.PHONY: all all:$(ACPI_BIN) acpi_dsdt.c:acpi_dsdt.asl $(MAKE) iasl - iasl -oa -tc acpi_dsdt.asl + iasl -tc acpi_dsdt.asl mv acpi_dsdt.hex acpi_dsdt.c echo "int DsdtLen=sizeof(AmlCode);" >> acpi_dsdt.c rm *.aml +# iasl -oa -tc acpi_dsdt.asl -.PHONY: iasl iasl: @echo @echo "ACPI ASL compiler(iasl) is needed" @@ -62,10 +61,8 @@ iasl: $(ACPI_BIN):$(ACPI_GEN) ./$(ACPI_GEN) $(ACPI_BIN) -.PHONY: clean clean: rm -rf *.o $(ACPI_GEN) $(ACPI_BIN) $(IASL_VER) rm -rf $(IASL_VER).tar.gz -.PHONY: install install: all diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi2_0.h --- a/tools/firmware/acpi/acpi2_0.h Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/firmware/acpi/acpi2_0.h Fri Jun 23 15:33:25 2006 -0600 @@ -323,7 +323,7 @@ typedef struct { // The physical that acpi table reside in the guest BIOS //#define ACPI_PHYSICAL_ADDRESS 0xE2000 #define ACPI_PHYSICAL_ADDRESS 0xEA000 -#define ACPI_TABLE_SIZE (2*1024) //Currently 2K is enough +#define ACPI_TABLE_SIZE (4*1024) //Currently 4K is enough void AcpiBuildTable(uint8_t* buf); diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_dsdt.asl --- a/tools/firmware/acpi/acpi_dsdt.asl Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/firmware/acpi/acpi_dsdt.asl Fri Jun 23 15:33:25 2006 -0600 @@ -20,7 +20,7 @@ //** //** -DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL ", "XEN ", 2) +DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL","int-xen", 2006) { Name (\PMBS, 0x0C00) Name (\PMLN, 0x08) @@ -29,24 +29,33 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, Name (\APCB, 0xFEC00000) Name (\APCL, 0x00010000) Name (\PUID, 0x00) + Scope (\_PR) { Processor (CPU0, 0x00, 0x00000000, 0x00) {} Processor (CPU1, 0x01, 0x00000000, 0x00) {} Processor (CPU2, 0x02, 0x00000000, 0x00) {} Processor (CPU3, 0x03, 0x00000000, 0x00) {} + } /* Poweroff support - ties in with qemu emulation */ Name (\_S5, Package (0x04) { - 0x07, - 0x07, - 0x00, + 0x07, + 0x07, + 0x00, 0x00 }) + + Name(PICD, 0) + + Method(_PIC, 1) { + + Store(Arg0, PICD) + } Scope (\_SB) { Device (PCI0) @@ -55,9 +64,20 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, Name (_UID, 0x00) Name (_ADR, 0x00) Name (_BBN, 0x00) + OperationRegion (PIRP, PCI_Config, 0x3c, 0x10) + Field(PIRP, ByteAcc, NoLock, Preserve){ + IRQ3,3, + IRQ5,5, + IRQ7,7, + IRQ9,9, + IRQA,10, + IRQB,11 + } + Method (_CRS, 0, NotSerialized) { - Name (PRT0, ResourceTemplate () + + Name (PRT0, ResourceTemplate () { /* bus number is from 0 - 255*/ WordBusNumber (ResourceConsumer, MinFixed, MaxFixed, SubDecode, @@ -79,75 +99,270 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 0x0FFF, 0x0000, 0x0300) + + /* reserve what device model consumed for IDE and acpi pci device */ + WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, EntireRange, + 0x0000, + 0xc000, + 0xc01f, + 0x0000, + 0x0020) + /* reserve what device model consumed for Ethernet controller pci device */ + WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, EntireRange, + 0x0000, + 0xc020, + 0xc03f, + 0x0000, + 0x0010) + DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadOnly, 0x00000000, - 0x000A0000, + 0x000c0000, 0x000FFFFF, 0x00000000, - 0x00060000) + 0x00030000) + + /* reserve what device model consumed for PCI VGA device */ + + DWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, + 0x00000000, + 0xF0000000, + 0xF1FFFFFF, + 0x00000000, + 0x02000000) + DWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, + 0x00000000, + 0xF2000000, + 0xF2000FFF, + 0x00000000, + 0x00001000) + /* reserve what device model consumed for Ethernet controller pci device */ + DWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, + 0x00000000, + 0xF2001000, + 0xF200101F, + 0x00000000, + 0x00000020) }) Return (PRT0) } - - Name (AIR0, Package (0x06) - { - Package (0x04) - { - 0x001FFFFF, - 0x02, - 0x00, - 0x17 - }, - - Package (0x04) - { - 0x001FFFFF, - 0x03, - 0x00, - 0x13 - }, - - Package (0x04) - { - 0x001DFFFF, - 0x01, - 0x00, - 0x13 - }, - - Package (0x04) - { - 0x001DFFFF, - 0x00, - 0x00, - 0x10 - }, - - Package (0x04) - { - 0x001DFFFF, - 0x02, - 0x00, - 0x12 - }, - - Package (0x04) - { - 0x001DFFFF, - 0x03, - 0x00, - 0x17 - } - }) - Method (_PRT, 0, NotSerialized) - { - Return (AIR0) - } - + Name(BUFA, ResourceTemplate() { + IRQ(Level, ActiveLow, Shared) { + 3,4,5,6,7,10,11,12,14,15} + }) + + Name(BUFB, Buffer(){ + 0x23, 0x00, 0x00, 0x18, + 0x79, 0}) + + CreateWordField(BUFB, 0x01, IRQV) + + Name(BUFC, Buffer(){ + 5, 7, 10, 11 + }) + + CreateByteField(BUFC, 0x01, PIQA) + CreateByteField(BUFC, 0x01, PIQB) + CreateByteField(BUFC, 0x01, PIQC) + CreateByteField(BUFC, 0x01, PIQD) + + Device(LNKA) { + Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link + Name(_UID, 1) + Method(_STA, 0) { + And(PIRA, 0x80, Local0) + If(LEqual(Local0, 0x80)) { + Return(0x09) + } + Else { + Return(0x0B) + } + } + + Method(_PRS) { + + Return(BUFA) + } // Method(_PRS) + + Method(_DIS) { + Or(PIRA, 0x80, PIRA) + } + + Method(_CRS) { + And(PIRB, 0x0f, Local0) + ShiftLeft(0x1, Local0, IRQV) + Return(BUFB) + } + + Method(_SRS, 1) { + CreateWordField(ARG0, 0x01, IRQ1) + FindSetRightBit(IRQ1, Local0) + Decrement(Local0) + Store(Local0, PIRA) + } // Method(_SRS) + } + + Device(LNKB) { + Name(_HID, EISAID("PNP0C0F")) + Name(_UID, 2) + Method(_STA, 0) { + And(PIRB, 0x80, Local0) + If(LEqual(Local0, 0x80)) { + Return(0x09) + } + Else { + Return(0x0B) + } + } + + Method(_PRS) { + Return(BUFA) + } // Method(_PRS) + + Method(_DIS) { + + Or(PIRB, 0x80, PIRB) + } + + Method(_CRS) { + And(PIRB, 0x0f, Local0) + ShiftLeft(0x1, Local0, IRQV) + Return(BUFB) + } // Method(_CRS) + + Method(_SRS, 1) { + CreateWordField(ARG0, 0x01, IRQ1) + FindSetRightBit(IRQ1, Local0) + Decrement(Local0) + Store(Local0, PIRB) + } // Method(_SRS) + } + + Device(LNKC) { + Name(_HID, EISAID("PNP0C0F")) // PCI interrupt link + Name(_UID, 3) + Method(_STA, 0) { + And(PIRC, 0x80, Local0) + If(LEqual(Local0, 0x80)) { + Return(0x09) + } + Else { + Return(0x0B) + } + } + + Method(_PRS) { + Return(BUFA) + } // Method(_PRS) + + Method(_DIS) { + + Or(PIRC, 0x80, PIRC) + } + + Method(_CRS) { + And(PIRC, 0x0f, Local0) + ShiftLeft(0x1, Local0, IRQV) + Return(BUFB) + } // Method(_CRS) + + Method(_SRS, 1) { + CreateWordField(ARG0, 0x01, IRQ1) + FindSetRightBit(IRQ1, Local0) + Decrement(Local0) + Store(Local0, PIRC) + } // Method(_SRS) + } + + Device(LNKD) { + Name(_HID, EISAID("PNP0C0F")) + Name(_UID, 4) + Method(_STA, 0) { + And(PIRD, 0x80, Local0) + If(LEqual(Local0, 0x80)) { + Return(0x09) + } + Else { + Return(0x0B) + } + } + + Method(_PRS) { + Return(BUFA) + } // Method(_PRS) + + Method(_DIS) { + Or(PIRD, 0x80, PIRD) + } + + Method(_CRS) { + And(PIRD, 0x0f, Local0) + ShiftLeft(0x1, Local0, IRQV) + Return(BUFB) + } // Method(_CRS) + + Method(_SRS, 1) { + CreateWordField(ARG0, 0x01, IRQ1) + FindSetRightBit(IRQ1, Local0) + Decrement(Local0) + Store(Local0, PIRD) + } // Method(_SRS) + } + Method(_PRT,0) { + If(PICD) {Return(PRTA)} + Return (PRTP) + } // end _PRT + + + Name(PRTP, Package(){ + Package(){0x0000ffff, 0, \_SB.PCI0.LNKA, 0}, // Slot 1, INTA + Package(){0x0000ffff, 1, \_SB.PCI0.LNKB, 0}, // Slot 1, INTB + Package(){0x0000ffff, 2, \_SB.PCI0.LNKC, 0}, // Slot 1, INTC + Package(){0x0000ffff, 3, \_SB.PCI0.LNKD, 0}, // Slot 1, INTD + + Package(){0x0001ffff, 0, \_SB.PCI0.LNKB, 0}, // Slot 2, INTB + Package(){0x0001ffff, 1, \_SB.PCI0.LNKC, 0}, // Slot 2, INTC + Package(){0x0001ffff, 2, \_SB.PCI0.LNKD, 0}, // Slot 2, INTD + Package(){0x0001ffff, 3, \_SB.PCI0.LNKA, 0}, // Slot 2, INTA + + Package(){0x0002ffff, 0, \_SB.PCI0.LNKC, 0}, // Slot 3, INTC + Package(){0x0002ffff, 1, \_SB.PCI0.LNKD, 0}, // Slot 3, INTD + Package(){0x0002ffff, 2, \_SB.PCI0.LNKA, 0}, // Slot 3, INTA + Package(){0x0002ffff, 3, \_SB.PCI0.LNKB, 0}, // Slot 3, INTB + + Package(){0x0003ffff, 0, \_SB.PCI0.LNKD, 0}, // Slot 2, INTD + Package(){0x0003ffff, 1, \_SB.PCI0.LNKA, 0}, // Slot 2, INTA + Package(){0x0003ffff, 2, \_SB.PCI0.LNKB, 0}, // Slot 2, INTB + Package(){0x0003ffff, 3, \_SB.PCI0.LNKC, 0}, // Slot 2, INTC + + } + ) + Name(PRTA, Package(){ + Package(){0x0001ffff, 0, 0, 5}, // Device 1, INTA + + Package(){0x0002ffff, 0, 0, 7}, // Device 2, INTA + + Package(){0x0003ffff, 0, 0, 10}, // Device 3, INTA + + Package(){0x0003ffff, 0, 0, 11}, // Device 4, INTA + + + } + ) + Device (ISA) { - Name (_ADR, 0x00010000) /*TODO, device id, PCI bus num, ...*/ - + Name (_ADR, 0x00000000) /* device id, PCI bus num, ... */ + + OperationRegion(PIRQ, PCI_Config, 0x60, 0x4) + Scope(\) { + Field (\_SB.PCI0.ISA.PIRQ, ByteAcc, NoLock, Preserve) { + PIRA, 8, + PIRB, 8, + PIRC, 8, + PIRD, 8 + } + } Device (SYSR) { Name (_HID, EisaId ("PNP0C02")) diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_dsdt.c --- a/tools/firmware/acpi/acpi_dsdt.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/firmware/acpi/acpi_dsdt.c Fri Jun 23 15:33:25 2006 -0600 @@ -1,22 +1,22 @@ /* * * Intel ACPI Component Architecture - * ASL Optimizing Compiler / AML Disassembler version 20050624 [Aug 24 2005] + * ASL Optimizing Compiler / AML Disassembler version 20050513 [Jun 8 2005] * Copyright (C) 2000 - 2005 Intel Corporation * Supports ACPI Specification Revision 3.0 * - * Compilation of "acpi_dsdt.asl" - Thu May 4 17:42:00 2006 + * Compilation of "acpi_dsdt.asl" - Mon Jun 12 22:33:41 2006 * * C source code output * */ unsigned char AmlCode[] = { - 0x44,0x53,0x44,0x54,0x7C,0x04,0x00,0x00, /* 00000000 "DSDT|..." */ - 0x01,0x72,0x49,0x4E,0x54,0x45,0x4C,0x20, /* 00000008 ".rINTEL " */ - 0x58,0x45,0x4E,0x20,0x20,0x20,0x20,0x20, /* 00000010 "XEN " */ - 0x02,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ - 0x24,0x06,0x05,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "$.. .PMB" */ + 0x44,0x53,0x44,0x54,0xC3,0x08,0x00,0x00, /* 00000000 "DSDT...." */ + 0x01,0x0C,0x49,0x4E,0x54,0x45,0x4C,0x00, /* 00000008 "..INTEL." */ + 0x69,0x6E,0x74,0x2D,0x78,0x65,0x6E,0x00, /* 00000010 "int-xen." */ + 0xD6,0x07,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ + 0x13,0x05,0x05,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */ 0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C, /* 00000028 "S....PML" */ 0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31, /* 00000030 "N...IOB1" */ 0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08, /* 00000038 "..IOL1.." */ @@ -32,129 +32,266 @@ unsigned char AmlCode[] = 0x0B,0x43,0x50,0x55,0x33,0x03,0x00,0x00, /* 00000088 ".CPU3..." */ 0x00,0x00,0x00,0x08,0x5F,0x53,0x35,0x5F, /* 00000090 "...._S5_" */ 0x12,0x08,0x04,0x0A,0x07,0x0A,0x07,0x00, /* 00000098 "........" */ - 0x00,0x10,0x4A,0x3D,0x5F,0x53,0x42,0x5F, /* 000000A0 "..J=_SB_" */ - 0x5B,0x82,0x42,0x3D,0x50,0x43,0x49,0x30, /* 000000A8 "[.B=PCI0" */ - 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000000B0 "._HID.A." */ - 0x0A,0x03,0x08,0x5F,0x55,0x49,0x44,0x00, /* 000000B8 "..._UID." */ - 0x08,0x5F,0x41,0x44,0x52,0x00,0x08,0x5F, /* 000000C0 "._ADR.._" */ - 0x42,0x42,0x4E,0x00,0x14,0x4A,0x06,0x5F, /* 000000C8 "BBN..J._" */ - 0x43,0x52,0x53,0x00,0x08,0x50,0x52,0x54, /* 000000D0 "CRS..PRT" */ - 0x30,0x11,0x48,0x05,0x0A,0x54,0x88,0x0D, /* 000000D8 "0.H..T.." */ - 0x00,0x02,0x0F,0x00,0x00,0x00,0x00,0x00, /* 000000E0 "........" */ - 0xFF,0x00,0x00,0x00,0x00,0x01,0x47,0x01, /* 000000E8 "......G." */ - 0xF8,0x0C,0xF8,0x0C,0x01,0x08,0x88,0x0D, /* 000000F0 "........" */ - 0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x00, /* 000000F8 "........" */ - 0xF7,0x0C,0x00,0x00,0xF8,0x0C,0x88,0x0D, /* 00000100 "........" */ - 0x00,0x01,0x0C,0x03,0x00,0x00,0x00,0x0D, /* 00000108 "........" */ - 0xFF,0x0F,0x00,0x00,0x00,0x03,0x87,0x17, /* 00000110 "........" */ - 0x00,0x00,0x0C,0x02,0x00,0x00,0x00,0x00, /* 00000118 "........" */ - 0x00,0x00,0x0A,0x00,0xFF,0xFF,0x0F,0x00, /* 00000120 "........" */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00, /* 00000128 "........" */ - 0x79,0x00,0xA4,0x50,0x52,0x54,0x30,0x08, /* 00000130 "y..PRT0." */ - 0x41,0x49,0x52,0x30,0x12,0x4F,0x04,0x06, /* 00000138 "AIR0.O.." */ - 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x1F,0x00, /* 00000140 "........" */ - 0x0A,0x02,0x00,0x0A,0x17,0x12,0x0C,0x04, /* 00000148 "........" */ - 0x0C,0xFF,0xFF,0x1F,0x00,0x0A,0x03,0x00, /* 00000150 "........" */ - 0x0A,0x13,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000158 "........" */ - 0x1D,0x00,0x01,0x00,0x0A,0x13,0x12,0x0B, /* 00000160 "........" */ - 0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x00,0x00, /* 00000168 "........" */ - 0x0A,0x10,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000170 "........" */ - 0x1D,0x00,0x0A,0x02,0x00,0x0A,0x12,0x12, /* 00000178 "........" */ - 0x0C,0x04,0x0C,0xFF,0xFF,0x1D,0x00,0x0A, /* 00000180 "........" */ - 0x03,0x00,0x0A,0x17,0x14,0x0B,0x5F,0x50, /* 00000188 "......_P" */ - 0x52,0x54,0x00,0xA4,0x41,0x49,0x52,0x30, /* 00000190 "RT..AIR0" */ - 0x5B,0x82,0x42,0x2E,0x49,0x53,0x41,0x5F, /* 00000198 "[.B.ISA_" */ - 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 000001A0 "._ADR..." */ - 0x01,0x00,0x5B,0x82,0x46,0x0B,0x53,0x59, /* 000001A8 "..[.F.SY" */ - 0x53,0x52,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 000001B0 "SR._HID." */ - 0x41,0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49, /* 000001B8 "A...._UI" */ - 0x44,0x01,0x08,0x43,0x52,0x53,0x5F,0x11, /* 000001C0 "D..CRS_." */ - 0x4E,0x08,0x0A,0x8A,0x47,0x01,0x10,0x00, /* 000001C8 "N...G..." */ - 0x10,0x00,0x00,0x10,0x47,0x01,0x22,0x00, /* 000001D0 "....G."." */ - 0x22,0x00,0x00,0x0C,0x47,0x01,0x30,0x00, /* 000001D8 ""...G.0." */ - 0x30,0x00,0x00,0x10,0x47,0x01,0x44,0x00, /* 000001E0 "0...G.D." */ - 0x44,0x00,0x00,0x1C,0x47,0x01,0x62,0x00, /* 000001E8 "D...G.b." */ - 0x62,0x00,0x00,0x02,0x47,0x01,0x65,0x00, /* 000001F0 "b...G.e." */ - 0x65,0x00,0x00,0x0B,0x47,0x01,0x72,0x00, /* 000001F8 "e...G.r." */ - 0x72,0x00,0x00,0x0E,0x47,0x01,0x80,0x00, /* 00000200 "r...G..." */ - 0x80,0x00,0x00,0x01,0x47,0x01,0x84,0x00, /* 00000208 "....G..." */ - 0x84,0x00,0x00,0x03,0x47,0x01,0x88,0x00, /* 00000210 "....G..." */ - 0x88,0x00,0x00,0x01,0x47,0x01,0x8C,0x00, /* 00000218 "....G..." */ - 0x8C,0x00,0x00,0x03,0x47,0x01,0x90,0x00, /* 00000220 "....G..." */ - 0x90,0x00,0x00,0x10,0x47,0x01,0xA2,0x00, /* 00000228 "....G..." */ - 0xA2,0x00,0x00,0x1C,0x47,0x01,0xE0,0x00, /* 00000230 "....G..." */ - 0xE0,0x00,0x00,0x10,0x47,0x01,0xA0,0x08, /* 00000238 "....G..." */ - 0xA0,0x08,0x00,0x04,0x47,0x01,0xC0,0x0C, /* 00000240 "....G..." */ - 0xC0,0x0C,0x00,0x10,0x47,0x01,0xD0,0x04, /* 00000248 "....G..." */ - 0xD0,0x04,0x00,0x02,0x79,0x00,0x14,0x0B, /* 00000250 "....y..." */ - 0x5F,0x43,0x52,0x53,0x00,0xA4,0x43,0x52, /* 00000258 "_CRS..CR" */ - 0x53,0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43, /* 00000260 "S_[.+PIC" */ - 0x5F,0x08,0x5F,0x48,0x49,0x44,0x0B,0x41, /* 00000268 "_._HID.A" */ - 0xD0,0x08,0x5F,0x43,0x52,0x53,0x11,0x18, /* 00000270 ".._CRS.." */ - 0x0A,0x15,0x47,0x01,0x20,0x00,0x20,0x00, /* 00000278 "..G. . ." */ - 0x01,0x02,0x47,0x01,0xA0,0x00,0xA0,0x00, /* 00000280 "..G....." */ - 0x01,0x02,0x22,0x04,0x00,0x79,0x00,0x5B, /* 00000288 ".."..y.[" */ - 0x82,0x47,0x05,0x44,0x4D,0x41,0x30,0x08, /* 00000290 ".G.DMA0." */ - 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x02, /* 00000298 "_HID.A.." */ - 0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x41, /* 000002A0 ".._CRS.A" */ - 0x04,0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01, /* 000002A8 "..=*..G." */ - 0x00,0x00,0x00,0x00,0x00,0x10,0x47,0x01, /* 000002B0 "......G." */ - 0x81,0x00,0x81,0x00,0x00,0x03,0x47,0x01, /* 000002B8 "......G." */ - 0x87,0x00,0x87,0x00,0x00,0x01,0x47,0x01, /* 000002C0 "......G." */ - 0x89,0x00,0x89,0x00,0x00,0x03,0x47,0x01, /* 000002C8 "......G." */ - 0x8F,0x00,0x8F,0x00,0x00,0x01,0x47,0x01, /* 000002D0 "......G." */ - 0xC0,0x00,0xC0,0x00,0x00,0x20,0x47,0x01, /* 000002D8 "..... G." */ - 0x80,0x04,0x80,0x04,0x00,0x10,0x79,0x00, /* 000002E0 "......y." */ - 0x5B,0x82,0x25,0x54,0x4D,0x52,0x5F,0x08, /* 000002E8 "[.%TMR_." */ - 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x01, /* 000002F0 "_HID.A.." */ - 0x00,0x08,0x5F,0x43,0x52,0x53,0x11,0x10, /* 000002F8 ".._CRS.." */ - 0x0A,0x0D,0x47,0x01,0x40,0x00,0x40,0x00, /* 00000300 "..G.@.@." */ - 0x00,0x04,0x22,0x01,0x00,0x79,0x00,0x5B, /* 00000308 ".."..y.[" */ - 0x82,0x25,0x52,0x54,0x43,0x5F,0x08,0x5F, /* 00000310 ".%RTC_._" */ - 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00, /* 00000318 "HID.A..." */ - 0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A, /* 00000320 "._CRS..." */ - 0x0D,0x47,0x01,0x70,0x00,0x70,0x00,0x00, /* 00000328 ".G.p.p.." */ - 0x02,0x22,0x00,0x01,0x79,0x00,0x5B,0x82, /* 00000330 "."..y.[." */ - 0x22,0x53,0x50,0x4B,0x52,0x08,0x5F,0x48, /* 00000338 ""SPKR._H" */ - 0x49,0x44,0x0C,0x41,0xD0,0x08,0x00,0x08, /* 00000340 "ID.A...." */ - 0x5F,0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A, /* 00000348 "_CRS...." */ - 0x47,0x01,0x61,0x00,0x61,0x00,0x00,0x01, /* 00000350 "G.a.a..." */ - 0x79,0x00,0x5B,0x82,0x31,0x50,0x53,0x32, /* 00000358 "y.[.1PS2" */ - 0x4D,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00000360 "M._HID.A" */ - 0xD0,0x0F,0x13,0x08,0x5F,0x43,0x49,0x44, /* 00000368 "...._CID" */ - 0x0C,0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F, /* 00000370 ".A....._" */ - 0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08, /* 00000378 "STA....." */ - 0x5F,0x43,0x52,0x53,0x11,0x08,0x0A,0x05, /* 00000380 "_CRS...." */ - 0x22,0x00,0x10,0x79,0x00,0x5B,0x82,0x42, /* 00000388 ""..y.[.B" */ - 0x04,0x50,0x53,0x32,0x4B,0x08,0x5F,0x48, /* 00000390 ".PS2K._H" */ - 0x49,0x44,0x0C,0x41,0xD0,0x03,0x03,0x08, /* 00000398 "ID.A...." */ - 0x5F,0x43,0x49,0x44,0x0C,0x41,0xD0,0x03, /* 000003A0 "_CID.A.." */ - 0x0B,0x14,0x09,0x5F,0x53,0x54,0x41,0x00, /* 000003A8 "..._STA." */ - 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 000003B0 "...._CRS" */ - 0x11,0x18,0x0A,0x15,0x47,0x01,0x60,0x00, /* 000003B8 "....G.`." */ - 0x60,0x00,0x00,0x01,0x47,0x01,0x64,0x00, /* 000003C0 "`...G.d." */ - 0x64,0x00,0x00,0x01,0x22,0x02,0x00,0x79, /* 000003C8 "d..."..y" */ - 0x00,0x5B,0x82,0x3A,0x46,0x44,0x43,0x30, /* 000003D0 ".[.:FDC0" */ - 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000003D8 "._HID.A." */ - 0x07,0x00,0x14,0x09,0x5F,0x53,0x54,0x41, /* 000003E0 "...._STA" */ - 0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52, /* 000003E8 "....._CR" */ - 0x53,0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0, /* 000003F0 "S....G.." */ - 0x03,0xF0,0x03,0x01,0x06,0x47,0x01,0xF7, /* 000003F8 ".....G.." */ - 0x03,0xF7,0x03,0x01,0x01,0x22,0x40,0x00, /* 00000400 "....."@." */ - 0x2A,0x04,0x00,0x79,0x00,0x5B,0x82,0x35, /* 00000408 "*..y.[.5" */ - 0x55,0x41,0x52,0x31,0x08,0x5F,0x48,0x49, /* 00000410 "UAR1._HI" */ - 0x44,0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F, /* 00000418 "D.A...._" */ - 0x55,0x49,0x44,0x01,0x14,0x09,0x5F,0x53, /* 00000420 "UID..._S" */ - 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 00000428 "TA....._" */ - 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000430 "CRS....G" */ - 0x01,0xF8,0x03,0xF8,0x03,0x01,0x08,0x22, /* 00000438 "......."" */ - 0x10,0x00,0x79,0x00,0x5B,0x82,0x36,0x55, /* 00000440 "..y.[.6U" */ - 0x41,0x52,0x32,0x08,0x5F,0x48,0x49,0x44, /* 00000448 "AR2._HID" */ - 0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55, /* 00000450 ".A...._U" */ - 0x49,0x44,0x0A,0x02,0x14,0x09,0x5F,0x53, /* 00000458 "ID...._S" */ - 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 00000460 "TA....._" */ - 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000468 "CRS....G" */ - 0x01,0xF8,0x02,0xF8,0x02,0x01,0x08,0x22, /* 00000470 "......."" */ - 0x08,0x00,0x79,0x00, + 0x00,0x08,0x50,0x49,0x43,0x44,0x00,0x14, /* 000000A0 "..PICD.." */ + 0x0C,0x5F,0x50,0x49,0x43,0x01,0x70,0x68, /* 000000A8 "._PIC.ph" */ + 0x50,0x49,0x43,0x44,0x10,0x4E,0x80,0x5F, /* 000000B0 "PICD.N._" */ + 0x53,0x42,0x5F,0x5B,0x82,0x46,0x80,0x50, /* 000000B8 "SB_[.F.P" */ + 0x43,0x49,0x30,0x08,0x5F,0x48,0x49,0x44, /* 000000C0 "CI0._HID" */ + 0x0C,0x41,0xD0,0x0A,0x03,0x08,0x5F,0x55, /* 000000C8 ".A...._U" */ + 0x49,0x44,0x00,0x08,0x5F,0x41,0x44,0x52, /* 000000D0 "ID.._ADR" */ + 0x00,0x08,0x5F,0x42,0x42,0x4E,0x00,0x5B, /* 000000D8 ".._BBN.[" */ + 0x80,0x50,0x49,0x52,0x50,0x02,0x0A,0x3C, /* 000000E0 ".PIRP..<" */ + 0x0A,0x10,0x5B,0x81,0x24,0x50,0x49,0x52, /* 000000E8 "..[.$PIR" */ + 0x50,0x01,0x49,0x52,0x51,0x33,0x03,0x49, /* 000000F0 "P.IRQ3.I" */ + 0x52,0x51,0x35,0x05,0x49,0x52,0x51,0x37, /* 000000F8 "RQ5.IRQ7" */ + 0x07,0x49,0x52,0x51,0x39,0x09,0x49,0x52, /* 00000100 ".IRQ9.IR" */ + 0x51,0x41,0x0A,0x49,0x52,0x51,0x42,0x0B, /* 00000108 "QA.IRQB." */ + 0x14,0x48,0x0D,0x5F,0x43,0x52,0x53,0x00, /* 00000110 ".H._CRS." */ + 0x08,0x50,0x52,0x54,0x30,0x11,0x46,0x0C, /* 00000118 ".PRT0.F." */ + 0x0A,0xC2,0x88,0x0D,0x00,0x02,0x0F,0x00, /* 00000120 "........" */ + 0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00, /* 00000128 "........" */ + 0x00,0x01,0x47,0x01,0xF8,0x0C,0xF8,0x0C, /* 00000130 "..G....." */ + 0x01,0x08,0x88,0x0D,0x00,0x01,0x0C,0x03, /* 00000138 "........" */ + 0x00,0x00,0x00,0x00,0xF7,0x0C,0x00,0x00, /* 00000140 "........" */ + 0xF8,0x0C,0x88,0x0D,0x00,0x01,0x0C,0x03, /* 00000148 "........" */ + 0x00,0x00,0x00,0x0D,0xFF,0x0F,0x00,0x00, /* 00000150 "........" */ + 0x00,0x03,0x88,0x0D,0x00,0x01,0x0D,0x03, /* 00000158 "........" */ + 0x00,0x00,0x00,0xC0,0x1F,0xC0,0x00,0x00, /* 00000160 "........" */ + 0x20,0x00,0x88,0x0D,0x00,0x01,0x0D,0x03, /* 00000168 " ......." */ + 0x00,0x00,0x20,0xC0,0x3F,0xC0,0x00,0x00, /* 00000170 ".. .?..." */ + 0x10,0x00,0x87,0x17,0x00,0x00,0x0C,0x02, /* 00000178 "........" */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x0C,0x00, /* 00000180 "........" */ + 0xFF,0xFF,0x0F,0x00,0x00,0x00,0x00,0x00, /* 00000188 "........" */ + 0x00,0x00,0x03,0x00,0x87,0x17,0x00,0x00, /* 00000190 "........" */ + 0x0D,0x03,0x00,0x00,0x00,0x00,0x00,0x00, /* 00000198 "........" */ + 0x00,0xF0,0xFF,0xFF,0xFF,0xF1,0x00,0x00, /* 000001A0 "........" */ + 0x00,0x00,0x00,0x00,0x00,0x02,0x87,0x17, /* 000001A8 "........" */ + 0x00,0x00,0x0D,0x03,0x00,0x00,0x00,0x00, /* 000001B0 "........" */ + 0x00,0x00,0x00,0xF2,0xFF,0x0F,0x00,0xF2, /* 000001B8 "........" */ + 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00, /* 000001C0 "........" */ + 0x87,0x17,0x00,0x00,0x0D,0x03,0x00,0x00, /* 000001C8 "........" */ + 0x00,0x00,0x00,0x10,0x00,0xF2,0x1F,0x10, /* 000001D0 "........" */ + 0x00,0xF2,0x00,0x00,0x00,0x00,0x20,0x00, /* 000001D8 "...... ." */ + 0x00,0x00,0x79,0x00,0xA4,0x50,0x52,0x54, /* 000001E0 "..y..PRT" */ + 0x30,0x08,0x42,0x55,0x46,0x41,0x11,0x09, /* 000001E8 "0.BUFA.." */ + 0x0A,0x06,0x23,0xF8,0xDC,0x18,0x79,0x00, /* 000001F0 "..#...y." */ + 0x08,0x42,0x55,0x46,0x42,0x11,0x09,0x0A, /* 000001F8 ".BUFB..." */ + 0x06,0x23,0x00,0x00,0x18,0x79,0x00,0x8B, /* 00000200 ".#...y.." */ + 0x42,0x55,0x46,0x42,0x01,0x49,0x52,0x51, /* 00000208 "BUFB.IRQ" */ + 0x56,0x08,0x42,0x55,0x46,0x43,0x11,0x07, /* 00000210 "V.BUFC.." */ + 0x0A,0x04,0x05,0x07,0x0A,0x0B,0x8C,0x42, /* 00000218 ".......B" */ + 0x55,0x46,0x43,0x01,0x50,0x49,0x51,0x41, /* 00000220 "UFC.PIQA" */ + 0x8C,0x42,0x55,0x46,0x43,0x01,0x50,0x49, /* 00000228 ".BUFC.PI" */ + 0x51,0x42,0x8C,0x42,0x55,0x46,0x43,0x01, /* 00000230 "QB.BUFC." */ + 0x50,0x49,0x51,0x43,0x8C,0x42,0x55,0x46, /* 00000238 "PIQC.BUF" */ + 0x43,0x01,0x50,0x49,0x51,0x44,0x5B,0x82, /* 00000240 "C.PIQD[." */ + 0x48,0x08,0x4C,0x4E,0x4B,0x41,0x08,0x5F, /* 00000248 "H.LNKA._" */ + 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F, /* 00000250 "HID.A..." */ + 0x08,0x5F,0x55,0x49,0x44,0x01,0x14,0x1C, /* 00000258 "._UID..." */ + 0x5F,0x53,0x54,0x41,0x00,0x7B,0x50,0x49, /* 00000260 "_STA.{PI" */ + 0x52,0x41,0x0A,0x80,0x60,0xA0,0x08,0x93, /* 00000268 "RA..`..." */ + 0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1,0x04, /* 00000270 "`......." */ + 0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50,0x52, /* 00000278 "....._PR" */ + 0x53,0x00,0xA4,0x42,0x55,0x46,0x41,0x14, /* 00000280 "S..BUFA." */ + 0x11,0x5F,0x44,0x49,0x53,0x00,0x7D,0x50, /* 00000288 "._DIS.}P" */ + 0x49,0x52,0x41,0x0A,0x80,0x50,0x49,0x52, /* 00000290 "IRA..PIR" */ + 0x41,0x14,0x1A,0x5F,0x43,0x52,0x53,0x00, /* 00000298 "A.._CRS." */ + 0x7B,0x50,0x49,0x52,0x42,0x0A,0x0F,0x60, /* 000002A0 "{PIRB..`" */ + 0x79,0x01,0x60,0x49,0x52,0x51,0x56,0xA4, /* 000002A8 "y.`IRQV." */ + 0x42,0x55,0x46,0x42,0x14,0x1B,0x5F,0x53, /* 000002B0 "BUFB.._S" */ + 0x52,0x53,0x01,0x8B,0x68,0x01,0x49,0x52, /* 000002B8 "RS..h.IR" */ + 0x51,0x31,0x82,0x49,0x52,0x51,0x31,0x60, /* 000002C0 "Q1.IRQ1`" */ + 0x76,0x60,0x70,0x60,0x50,0x49,0x52,0x41, /* 000002C8 "v`p`PIRA" */ + 0x5B,0x82,0x49,0x08,0x4C,0x4E,0x4B,0x42, /* 000002D0 "[.I.LNKB" */ + 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000002D8 "._HID.A." */ + 0x0C,0x0F,0x08,0x5F,0x55,0x49,0x44,0x0A, /* 000002E0 "..._UID." */ + 0x02,0x14,0x1C,0x5F,0x53,0x54,0x41,0x00, /* 000002E8 "..._STA." */ + 0x7B,0x50,0x49,0x52,0x42,0x0A,0x80,0x60, /* 000002F0 "{PIRB..`" */ + 0xA0,0x08,0x93,0x60,0x0A,0x80,0xA4,0x0A, /* 000002F8 "...`...." */ + 0x09,0xA1,0x04,0xA4,0x0A,0x0B,0x14,0x0B, /* 00000300 "........" */ + 0x5F,0x50,0x52,0x53,0x00,0xA4,0x42,0x55, /* 00000308 "_PRS..BU" */ + 0x46,0x41,0x14,0x11,0x5F,0x44,0x49,0x53, /* 00000310 "FA.._DIS" */ + 0x00,0x7D,0x50,0x49,0x52,0x42,0x0A,0x80, /* 00000318 ".}PIRB.." */ + 0x50,0x49,0x52,0x42,0x14,0x1A,0x5F,0x43, /* 00000320 "PIRB.._C" */ + 0x52,0x53,0x00,0x7B,0x50,0x49,0x52,0x42, /* 00000328 "RS.{PIRB" */ + 0x0A,0x0F,0x60,0x79,0x01,0x60,0x49,0x52, /* 00000330 "..`y.`IR" */ + 0x51,0x56,0xA4,0x42,0x55,0x46,0x42,0x14, /* 00000338 "QV.BUFB." */ + 0x1B,0x5F,0x53,0x52,0x53,0x01,0x8B,0x68, /* 00000340 "._SRS..h" */ + 0x01,0x49,0x52,0x51,0x31,0x82,0x49,0x52, /* 00000348 ".IRQ1.IR" */ + 0x51,0x31,0x60,0x76,0x60,0x70,0x60,0x50, /* 00000350 "Q1`v`p`P" */ + 0x49,0x52,0x42,0x5B,0x82,0x49,0x08,0x4C, /* 00000358 "IRB[.I.L" */ + 0x4E,0x4B,0x43,0x08,0x5F,0x48,0x49,0x44, /* 00000360 "NKC._HID" */ + 0x0C,0x41,0xD0,0x0C,0x0F,0x08,0x5F,0x55, /* 00000368 ".A...._U" */ + 0x49,0x44,0x0A,0x03,0x14,0x1C,0x5F,0x53, /* 00000370 "ID...._S" */ + 0x54,0x41,0x00,0x7B,0x50,0x49,0x52,0x43, /* 00000378 "TA.{PIRC" */ + 0x0A,0x80,0x60,0xA0,0x08,0x93,0x60,0x0A, /* 00000380 "..`...`." */ + 0x80,0xA4,0x0A,0x09,0xA1,0x04,0xA4,0x0A, /* 00000388 "........" */ + 0x0B,0x14,0x0B,0x5F,0x50,0x52,0x53,0x00, /* 00000390 "..._PRS." */ + 0xA4,0x42,0x55,0x46,0x41,0x14,0x11,0x5F, /* 00000398 ".BUFA.._" */ + 0x44,0x49,0x53,0x00,0x7D,0x50,0x49,0x52, /* 000003A0 "DIS.}PIR" */ + 0x43,0x0A,0x80,0x50,0x49,0x52,0x43,0x14, /* 000003A8 "C..PIRC." */ + 0x1A,0x5F,0x43,0x52,0x53,0x00,0x7B,0x50, /* 000003B0 "._CRS.{P" */ + 0x49,0x52,0x43,0x0A,0x0F,0x60,0x79,0x01, /* 000003B8 "IRC..`y." */ + 0x60,0x49,0x52,0x51,0x56,0xA4,0x42,0x55, /* 000003C0 "`IRQV.BU" */ + 0x46,0x42,0x14,0x1B,0x5F,0x53,0x52,0x53, /* 000003C8 "FB.._SRS" */ + 0x01,0x8B,0x68,0x01,0x49,0x52,0x51,0x31, /* 000003D0 "..h.IRQ1" */ + 0x82,0x49,0x52,0x51,0x31,0x60,0x76,0x60, /* 000003D8 ".IRQ1`v`" */ + 0x70,0x60,0x50,0x49,0x52,0x43,0x5B,0x82, /* 000003E0 "p`PIRC[." */ + 0x49,0x08,0x4C,0x4E,0x4B,0x44,0x08,0x5F, /* 000003E8 "I.LNKD._" */ + 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F, /* 000003F0 "HID.A..." */ + 0x08,0x5F,0x55,0x49,0x44,0x0A,0x04,0x14, /* 000003F8 "._UID..." */ + 0x1C,0x5F,0x53,0x54,0x41,0x00,0x7B,0x50, /* 00000400 "._STA.{P" */ + 0x49,0x52,0x44,0x0A,0x80,0x60,0xA0,0x08, /* 00000408 "IRD..`.." */ + 0x93,0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1, /* 00000410 ".`......" */ + 0x04,0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50, /* 00000418 "......_P" */ + 0x52,0x53,0x00,0xA4,0x42,0x55,0x46,0x41, /* 00000420 "RS..BUFA" */ + 0x14,0x11,0x5F,0x44,0x49,0x53,0x00,0x7D, /* 00000428 ".._DIS.}" */ + 0x50,0x49,0x52,0x44,0x0A,0x80,0x50,0x49, /* 00000430 "PIRD..PI" */ + 0x52,0x44,0x14,0x1A,0x5F,0x43,0x52,0x53, /* 00000438 "RD.._CRS" */ + 0x00,0x7B,0x50,0x49,0x52,0x44,0x0A,0x0F, /* 00000440 ".{PIRD.." */ + 0x60,0x79,0x01,0x60,0x49,0x52,0x51,0x56, /* 00000448 "`y.`IRQV" */ + 0xA4,0x42,0x55,0x46,0x42,0x14,0x1B,0x5F, /* 00000450 ".BUFB.._" */ + 0x53,0x52,0x53,0x01,0x8B,0x68,0x01,0x49, /* 00000458 "SRS..h.I" */ + 0x52,0x51,0x31,0x82,0x49,0x52,0x51,0x31, /* 00000460 "RQ1.IRQ1" */ + 0x60,0x76,0x60,0x70,0x60,0x50,0x49,0x52, /* 00000468 "`v`p`PIR" */ + 0x44,0x14,0x16,0x5F,0x50,0x52,0x54,0x00, /* 00000470 "D.._PRT." */ + 0xA0,0x0A,0x50,0x49,0x43,0x44,0xA4,0x50, /* 00000478 "..PICD.P" */ + 0x52,0x54,0x41,0xA4,0x50,0x52,0x54,0x50, /* 00000480 "RTA.PRTP" */ + 0x08,0x50,0x52,0x54,0x50,0x12,0x43,0x0E, /* 00000488 ".PRTP.C." */ + 0x10,0x12,0x0B,0x04,0x0B,0xFF,0xFF,0x00, /* 00000490 "........" */ + 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0B,0x04, /* 00000498 "LNKA...." */ + 0x0B,0xFF,0xFF,0x01,0x4C,0x4E,0x4B,0x42, /* 000004A0 "....LNKB" */ + 0x00,0x12,0x0C,0x04,0x0B,0xFF,0xFF,0x0A, /* 000004A8 "........" */ + 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0C, /* 000004B0 ".LNKC..." */ + 0x04,0x0B,0xFF,0xFF,0x0A,0x03,0x4C,0x4E, /* 000004B8 "......LN" */ + 0x4B,0x44,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000004C0 "KD......" */ + 0xFF,0x01,0x00,0x00,0x4C,0x4E,0x4B,0x42, /* 000004C8 "....LNKB" */ + 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x01, /* 000004D0 "........" */ + 0x00,0x01,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 000004D8 "..LNKC.." */ + 0x0E,0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A, /* 000004E0 "........" */ + 0x02,0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E, /* 000004E8 ".LNKD..." */ + 0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x03, /* 000004F0 "........" */ + 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0D,0x04, /* 000004F8 "LNKA...." */ + 0x0C,0xFF,0xFF,0x02,0x00,0x00,0x4C,0x4E, /* 00000500 "......LN" */ + 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000508 "KC......" */ + 0xFF,0x02,0x00,0x01,0x4C,0x4E,0x4B,0x44, /* 00000510 "....LNKD" */ + 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02, /* 00000518 "........" */ + 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x41,0x00, /* 00000520 "...LNKA." */ + 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x02,0x00, /* 00000528 "........" */ + 0x0A,0x03,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000530 "..LNKB.." */ + 0x0D,0x04,0x0C,0xFF,0xFF,0x03,0x00,0x00, /* 00000538 "........" */ + 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000540 "LNKD...." */ + 0x0C,0xFF,0xFF,0x03,0x00,0x01,0x4C,0x4E, /* 00000548 "......LN" */ + 0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000550 "KA......" */ + 0xFF,0x03,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000558 ".....LNK" */ + 0x42,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000560 "B......." */ + 0x03,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x43, /* 00000568 "....LNKC" */ + 0x00,0x08,0x50,0x52,0x54,0x41,0x12,0x32, /* 00000570 "..PRTA.2" */ + 0x04,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x01, /* 00000578 "........" */ + 0x00,0x00,0x00,0x0A,0x05,0x12,0x0B,0x04, /* 00000580 "........" */ + 0x0C,0xFF,0xFF,0x02,0x00,0x00,0x00,0x0A, /* 00000588 "........" */ + 0x07,0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x03, /* 00000590 "........" */ + 0x00,0x00,0x00,0x0A,0x0A,0x12,0x0B,0x04, /* 00000598 "........" */ + 0x0C,0xFF,0xFF,0x03,0x00,0x00,0x00,0x0A, /* 000005A0 "........" */ + 0x0B,0x5B,0x82,0x48,0x31,0x49,0x53,0x41, /* 000005A8 ".[.H1ISA" */ + 0x5F,0x08,0x5F,0x41,0x44,0x52,0x00,0x5B, /* 000005B0 "_._ADR.[" */ + 0x80,0x50,0x49,0x52,0x51,0x02,0x0A,0x60, /* 000005B8 ".PIRQ..`" */ + 0x0A,0x04,0x10,0x2E,0x5C,0x00,0x5B,0x81, /* 000005C0 "....\.[." */ + 0x29,0x5C,0x2F,0x04,0x5F,0x53,0x42,0x5F, /* 000005C8 ")\/._SB_" */ + 0x50,0x43,0x49,0x30,0x49,0x53,0x41,0x5F, /* 000005D0 "PCI0ISA_" */ + 0x50,0x49,0x52,0x51,0x01,0x50,0x49,0x52, /* 000005D8 "PIRQ.PIR" */ + 0x41,0x08,0x50,0x49,0x52,0x42,0x08,0x50, /* 000005E0 "A.PIRB.P" */ + 0x49,0x52,0x43,0x08,0x50,0x49,0x52,0x44, /* 000005E8 "IRC.PIRD" */ + 0x08,0x5B,0x82,0x46,0x0B,0x53,0x59,0x53, /* 000005F0 ".[.F.SYS" */ + 0x52,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 000005F8 "R._HID.A" */ + 0xD0,0x0C,0x02,0x08,0x5F,0x55,0x49,0x44, /* 00000600 "...._UID" */ + 0x01,0x08,0x43,0x52,0x53,0x5F,0x11,0x4E, /* 00000608 "..CRS_.N" */ + 0x08,0x0A,0x8A,0x47,0x01,0x10,0x00,0x10, /* 00000610 "...G...." */ + 0x00,0x00,0x10,0x47,0x01,0x22,0x00,0x22, /* 00000618 "...G."."" */ + 0x00,0x00,0x0C,0x47,0x01,0x30,0x00,0x30, /* 00000620 "...G.0.0" */ + 0x00,0x00,0x10,0x47,0x01,0x44,0x00,0x44, /* 00000628 "...G.D.D" */ + 0x00,0x00,0x1C,0x47,0x01,0x62,0x00,0x62, /* 00000630 "...G.b.b" */ + 0x00,0x00,0x02,0x47,0x01,0x65,0x00,0x65, /* 00000638 "...G.e.e" */ + 0x00,0x00,0x0B,0x47,0x01,0x72,0x00,0x72, /* 00000640 "...G.r.r" */ + 0x00,0x00,0x0E,0x47,0x01,0x80,0x00,0x80, /* 00000648 "...G...." */ + 0x00,0x00,0x01,0x47,0x01,0x84,0x00,0x84, /* 00000650 "...G...." */ + 0x00,0x00,0x03,0x47,0x01,0x88,0x00,0x88, /* 00000658 "...G...." */ + 0x00,0x00,0x01,0x47,0x01,0x8C,0x00,0x8C, /* 00000660 "...G...." */ + 0x00,0x00,0x03,0x47,0x01,0x90,0x00,0x90, /* 00000668 "...G...." */ + 0x00,0x00,0x10,0x47,0x01,0xA2,0x00,0xA2, /* 00000670 "...G...." */ + 0x00,0x00,0x1C,0x47,0x01,0xE0,0x00,0xE0, /* 00000678 "...G...." */ + 0x00,0x00,0x10,0x47,0x01,0xA0,0x08,0xA0, /* 00000680 "...G...." */ + 0x08,0x00,0x04,0x47,0x01,0xC0,0x0C,0xC0, /* 00000688 "...G...." */ + 0x0C,0x00,0x10,0x47,0x01,0xD0,0x04,0xD0, /* 00000690 "...G...." */ + 0x04,0x00,0x02,0x79,0x00,0x14,0x0B,0x5F, /* 00000698 "...y..._" */ + 0x43,0x52,0x53,0x00,0xA4,0x43,0x52,0x53, /* 000006A0 "CRS..CRS" */ + 0x5F,0x5B,0x82,0x2B,0x50,0x49,0x43,0x5F, /* 000006A8 "_[.+PIC_" */ + 0x08,0x5F,0x48,0x49,0x44,0x0B,0x41,0xD0, /* 000006B0 "._HID.A." */ + 0x08,0x5F,0x43,0x52,0x53,0x11,0x18,0x0A, /* 000006B8 "._CRS..." */ + 0x15,0x47,0x01,0x20,0x00,0x20,0x00,0x01, /* 000006C0 ".G. . .." */ + 0x02,0x47,0x01,0xA0,0x00,0xA0,0x00,0x01, /* 000006C8 ".G......" */ + 0x02,0x22,0x04,0x00,0x79,0x00,0x5B,0x82, /* 000006D0 "."..y.[." */ + 0x47,0x05,0x44,0x4D,0x41,0x30,0x08,0x5F, /* 000006D8 "G.DMA0._" */ + 0x48,0x49,0x44,0x0C,0x41,0xD0,0x02,0x00, /* 000006E0 "HID.A..." */ + 0x08,0x5F,0x43,0x52,0x53,0x11,0x41,0x04, /* 000006E8 "._CRS.A." */ + 0x0A,0x3D,0x2A,0x10,0x04,0x47,0x01,0x00, /* 000006F0 ".=*..G.." */ + 0x00,0x00,0x00,0x00,0x10,0x47,0x01,0x81, /* 000006F8 ".....G.." */ + 0x00,0x81,0x00,0x00,0x03,0x47,0x01,0x87, /* 00000700 ".....G.." */ + 0x00,0x87,0x00,0x00,0x01,0x47,0x01,0x89, /* 00000708 ".....G.." */ + 0x00,0x89,0x00,0x00,0x03,0x47,0x01,0x8F, /* 00000710 ".....G.." */ + 0x00,0x8F,0x00,0x00,0x01,0x47,0x01,0xC0, /* 00000718 ".....G.." */ + 0x00,0xC0,0x00,0x00,0x20,0x47,0x01,0x80, /* 00000720 ".... G.." */ + 0x04,0x80,0x04,0x00,0x10,0x79,0x00,0x5B, /* 00000728 ".....y.[" */ + 0x82,0x25,0x54,0x4D,0x52,0x5F,0x08,0x5F, /* 00000730 ".%TMR_._" */ + 0x48,0x49,0x44,0x0C,0x41,0xD0,0x01,0x00, /* 00000738 "HID.A..." */ + 0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A, /* 00000740 "._CRS..." */ + 0x0D,0x47,0x01,0x40,0x00,0x40,0x00,0x00, /* 00000748 ".G.@.@.." */ + 0x04,0x22,0x01,0x00,0x79,0x00,0x5B,0x82, /* 00000750 "."..y.[." */ + 0x25,0x52,0x54,0x43,0x5F,0x08,0x5F,0x48, /* 00000758 "%RTC_._H" */ + 0x49,0x44,0x0C,0x41,0xD0,0x0B,0x00,0x08, /* 00000760 "ID.A...." */ + 0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,0x0D, /* 00000768 "_CRS...." */ + 0x47,0x01,0x70,0x00,0x70,0x00,0x00,0x02, /* 00000770 "G.p.p..." */ + 0x22,0x00,0x01,0x79,0x00,0x5B,0x82,0x22, /* 00000778 ""..y.[."" */ + 0x53,0x50,0x4B,0x52,0x08,0x5F,0x48,0x49, /* 00000780 "SPKR._HI" */ + 0x44,0x0C,0x41,0xD0,0x08,0x00,0x08,0x5F, /* 00000788 "D.A...._" */ + 0x43,0x52,0x53,0x11,0x0D,0x0A,0x0A,0x47, /* 00000790 "CRS....G" */ + 0x01,0x61,0x00,0x61,0x00,0x00,0x01,0x79, /* 00000798 ".a.a...y" */ + 0x00,0x5B,0x82,0x31,0x50,0x53,0x32,0x4D, /* 000007A0 ".[.1PS2M" */ + 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000007A8 "._HID.A." */ + 0x0F,0x13,0x08,0x5F,0x43,0x49,0x44,0x0C, /* 000007B0 "..._CID." */ + 0x41,0xD0,0x0F,0x13,0x14,0x09,0x5F,0x53, /* 000007B8 "A....._S" */ + 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 000007C0 "TA....._" */ + 0x43,0x52,0x53,0x11,0x08,0x0A,0x05,0x22, /* 000007C8 "CRS...."" */ + 0x00,0x10,0x79,0x00,0x5B,0x82,0x42,0x04, /* 000007D0 "..y.[.B." */ + 0x50,0x53,0x32,0x4B,0x08,0x5F,0x48,0x49, /* 000007D8 "PS2K._HI" */ + 0x44,0x0C,0x41,0xD0,0x03,0x03,0x08,0x5F, /* 000007E0 "D.A...._" */ + 0x43,0x49,0x44,0x0C,0x41,0xD0,0x03,0x0B, /* 000007E8 "CID.A..." */ + 0x14,0x09,0x5F,0x53,0x54,0x41,0x00,0xA4, /* 000007F0 ".._STA.." */ + 0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,0x11, /* 000007F8 "..._CRS." */ + 0x18,0x0A,0x15,0x47,0x01,0x60,0x00,0x60, /* 00000800 "...G.`.`" */ + 0x00,0x00,0x01,0x47,0x01,0x64,0x00,0x64, /* 00000808 "...G.d.d" */ + 0x00,0x00,0x01,0x22,0x02,0x00,0x79,0x00, /* 00000810 "..."..y." */ + 0x5B,0x82,0x3A,0x46,0x44,0x43,0x30,0x08, /* 00000818 "[.:FDC0." */ + 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x07, /* 00000820 "_HID.A.." */ + 0x00,0x14,0x09,0x5F,0x53,0x54,0x41,0x00, /* 00000828 "..._STA." */ + 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 00000830 "...._CRS" */ + 0x11,0x1B,0x0A,0x18,0x47,0x01,0xF0,0x03, /* 00000838 "....G..." */ + 0xF0,0x03,0x01,0x06,0x47,0x01,0xF7,0x03, /* 00000840 "....G..." */ + 0xF7,0x03,0x01,0x01,0x22,0x40,0x00,0x2A, /* 00000848 "...."@.*" */ + 0x04,0x00,0x79,0x00,0x5B,0x82,0x35,0x55, /* 00000850 "..y.[.5U" */ + 0x41,0x52,0x31,0x08,0x5F,0x48,0x49,0x44, /* 00000858 "AR1._HID" */ + 0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55, /* 00000860 ".A...._U" */ + 0x49,0x44,0x01,0x14,0x09,0x5F,0x53,0x54, /* 00000868 "ID..._ST" */ + 0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43, /* 00000870 "A....._C" */ + 0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01, /* 00000878 "RS....G." */ + 0xF8,0x03,0xF8,0x03,0x01,0x08,0x22,0x10, /* 00000880 "......"." */ + 0x00,0x79,0x00,0x5B,0x82,0x36,0x55,0x41, /* 00000888 ".y.[.6UA" */ + 0x52,0x32,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 00000890 "R2._HID." */ + 0x41,0xD0,0x05,0x01,0x08,0x5F,0x55,0x49, /* 00000898 "A...._UI" */ + 0x44,0x0A,0x02,0x14,0x09,0x5F,0x53,0x54, /* 000008A0 "D...._ST" */ + 0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43, /* 000008A8 "A....._C" */ + 0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01, /* 000008B0 "RS....G." */ + 0xF8,0x02,0xF8,0x02,0x01,0x08,0x22,0x08, /* 000008B8 "......"." */ + 0x00,0x79,0x00, }; int DsdtLen=sizeof(AmlCode); diff -r 59d4c1863330 -r fdf25330e4a6 tools/firmware/acpi/acpi_fadt.h --- a/tools/firmware/acpi/acpi_fadt.h Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/firmware/acpi/acpi_fadt.h Fri Jun 23 15:33:25 2006 -0600 @@ -22,19 +22,19 @@ // FADT Definitions, see ACPI 2.0 specification for details. // -#define ACPI_OEM_FADT_REVISION 0x00000000 // TBD +#define ACPI_OEM_FADT_REVISION 0x00000001 // TBD -#define ACPI_PREFERRED_PM_PROFILE 0x04 +#define ACPI_PREFERRED_PM_PROFILE 0x00 #define ACPI_SCI_INT 0x0009 -#define ACPI_SMI_CMD 0x000000B2 +#define ACPI_SMI_CMD 0x00000000 #define ACPI_ACPI_ENABLE 0x00 #define ACPI_ACPI_DISABLE 0x00 #define ACPI_S4_BIOS_REQ 0x00 #define ACPI_PSTATE_CNT 0x00 -#define ACPI_GPE1_BASE 0x20 +#define ACPI_GPE1_BASE 0x00 #define ACPI_CST_CNT 0x00 -#define ACPI_P_LVL2_LAT 0x0065 -#define ACPI_P_LVL3_LAT 0X03E9 +#define ACPI_P_LVL2_LAT 0x0064 +#define ACPI_P_LVL3_LAT 0X03E8 #define ACPI_FLUSH_SIZE 0x00 #define ACPI_FLUSH_STRIDE 0x00 #define ACPI_DUTY_OFFSET 0x01 @@ -51,15 +51,16 @@ // // Fixed Feature Flags // -#define ACPI_FIXED_FEATURE_FLAGS (ACPI_SLP_BUTTON| ACPI_WBINVD ) +#define ACPI_FIXED_FEATURE_FLAGS (ACPI_PROC_C1|ACPI_SLP_BUTTON|ACPI_WBINVD|ACPI_PWR_BUTTON|ACPI_FIX_RTC) // // PM1A Event Register Block Generic Address Information // #define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO -#define ACPI_PM1A_EVT_BLK_BIT_WIDTH 0x00 +#define ACPI_PM1A_EVT_BLK_BIT_WIDTH 0x20 #define ACPI_PM1A_EVT_BLK_BIT_OFFSET 0x00 -#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000008000 +//#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c010 +#define ACPI_PM1A_EVT_BLK_ADDRESS 0x000000000000c040 // // PM1B Event Register Block Generic Address Information @@ -73,7 +74,7 @@ // PM1A Control Register Block Generic Address Information // #define ACPI_PM1A_CNT_BLK_ADDRESS_SPACE_ID ACPI_SYSTEM_IO -#define ACPI_PM1A_CNT_BLK_BIT_WIDTH 0x08 +#define ACPI_PM1A_CNT_BLK_BIT_WIDTH 0x10 #define ACPI_PM1A_CNT_BLK_BIT_OFFSET 0x00 #define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/ioemu/hw/pc.c Fri Jun 23 15:33:25 2006 -0600 @@ -375,7 +375,9 @@ static int serial_io[MAX_SERIAL_PORTS] = static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 }; -extern int acpi_init(unsigned int base); +//extern int acpi_init(unsigned int base); +/* PIIX4 acpi pci configuration space, func 3 */ +extern void pci_piix4_acpi_init(PCIBus *bus); #define NOBIOS 1 @@ -583,7 +585,9 @@ void pc_init(uint64_t ram_size, int vga_ floppy_controller = fdctrl_init(6, 2, 0, 0x3f0, fd_table); cmos_init(ram_size, boot_device, bs_table, timeoffset); - acpi_init(0x8000); +// using PIIX4 acpi model +// acpi_init(0x8000); + pci_piix4_acpi_init(pci_bus); if (pci_enabled && usb_enabled) { usb_uhci_init(pci_bus, usb_root_ports); diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/pci.c --- a/tools/ioemu/hw/pci.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/ioemu/hw/pci.c Fri Jun 23 15:33:25 2006 -0600 @@ -1394,7 +1394,7 @@ static uint32_t pci_bios_io_addr; static uint32_t pci_bios_io_addr; static uint32_t pci_bios_mem_addr; /* host irqs corresponding to PCI irqs A-D */ -static uint8_t pci_irqs[4] = { 11, 9, 11, 9 }; +static uint8_t pci_irqs[4] = { 10, 11, 10, 11 }; static void pci_set_io_region_addr(PCIDevice *d, int region_num, uint32_t addr) { @@ -1447,12 +1447,22 @@ static void pci_bios_init_device(PCIDevi pci_set_io_region_addr(d, 3, 0x374); } break; + case 0x0680: + if (vendor_id == 0x8086 && device_id == 0x7113) { + // PIIX4 ACPI PM + pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4 + pci_config_writew(d, 0x22, 0x0000); + goto default_map; + } + break; + case 0x0300: if (vendor_id != 0x1234) goto default_map; /* VGA: map frame buffer to default Bochs VBE address */ pci_set_io_region_addr(d, 0, 0xE0000000); break; + case 0x0800: /* PIC */ vendor_id = pci_config_readw(d, PCI_VENDOR_ID); @@ -1497,6 +1507,13 @@ static void pci_bios_init_device(PCIDevi pic_irq = pci_irqs[pin]; pci_config_writeb(d, PCI_INTERRUPT_LINE, pic_irq); } + if (class== 0x0680&& vendor_id == 0x8086 && device_id == 0x7113) { + // PIIX4 ACPI PM + pci_config_writew(d, 0x20, 0x0000); // NO smb bus IO enable in PIIX4 + pci_config_writew(d, 0x22, 0x0000); + pci_config_writew(d, 0x3c, 0x0009); // Hardcodeed IRQ9 + pci_config_writew(d, 0x3d, 0x0001); + } } /* diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/target-i386-dm/Makefile --- a/tools/ioemu/target-i386-dm/Makefile Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/ioemu/target-i386-dm/Makefile Fri Jun 23 15:33:25 2006 -0600 @@ -281,7 +281,7 @@ VL_OBJS+= usb.o usb-hub.o usb-uhci.o usb # Hardware support VL_OBJS+= ide.o ne2000.o pckbd.o vga.o dma.o VL_OBJS+= fdc.o mc146818rtc.o serial.o i8259_stub.o pc.o port-e9.o -VL_OBJS+= cirrus_vga.o pcnet.o acpi.o +VL_OBJS+= cirrus_vga.o pcnet.o piix4acpi.o VL_OBJS+= $(SOUND_HW) $(AUDIODRV) mixeng.o ifeq ($(TARGET_ARCH), ppc) diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/libxc/xc_domain.c Fri Jun 23 15:33:25 2006 -0600 @@ -283,6 +283,17 @@ int xc_domain_setmaxmem(int xc_handle, op.cmd = DOM0_SETDOMAINMAXMEM; op.u.setdomainmaxmem.domain = (domid_t)domid; op.u.setdomainmaxmem.max_memkb = max_memkb; + return do_dom0_op(xc_handle, &op); +} + +int xc_domain_set_time_offset(int xc_handle, + uint32_t domid, + int32_t time_offset_seconds) +{ + DECLARE_DOM0_OP; + op.cmd = DOM0_SETTIMEOFFSET; + op.u.settimeoffset.domain = (domid_t)domid; + op.u.settimeoffset.time_offset_seconds = time_offset_seconds; return do_dom0_op(xc_handle, &op); } diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/libxc/xc_linux_restore.c Fri Jun 23 15:33:25 2006 -0600 @@ -572,42 +572,48 @@ int xc_linux_restore(int xc_handle, int nr_pins = 0; for (i = 0; i < max_pfn; i++) { - if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) { + if ( (pfn_type[i] & LPINTAB) == 0 ) + continue; + + switch (pfn_type[i]) { + + case (L1TAB|LPINTAB): + pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; + break; + + case (L2TAB|LPINTAB): + pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; + break; + + case (L3TAB|LPINTAB): + pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; + break; + + case (L4TAB|LPINTAB): + pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; + break; + + default: + continue; + } + + pin[nr_pins].arg1.mfn = p2m[i]; + nr_pins++; + + /* Batch full? Then flush. */ + if (nr_pins == MAX_PIN_BATCH) { if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) { ERR("Failed to pin batch of %d page tables", nr_pins); goto out; } nr_pins = 0; } - - if ( (pfn_type[i] & LPINTAB) == 0 ) - continue; - - switch(pfn_type[i]) { - - case (L1TAB|LPINTAB): - pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; - break; - - case (L2TAB|LPINTAB): - pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; - break; - - case (L3TAB|LPINTAB): - pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; - break; - - case (L4TAB|LPINTAB): - pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; - break; - - default: - continue; - } - - pin[nr_pins].arg1.mfn = p2m[i]; - nr_pins++; - + } + + /* Flush final partial batch. */ + if ((nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0)) { + ERR("Failed to pin batch of %d page tables", nr_pins); + goto out; } DPRINTF("\b\b\b\b100%%\n"); diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/libxc/xc_linux_save.c Fri Jun 23 15:33:25 2006 -0600 @@ -91,12 +91,12 @@ static inline int test_bit (int nr, vola static inline void clear_bit (int nr, volatile void * addr) { - BITMAP_ENTRY(nr, addr) &= ~(1 << BITMAP_SHIFT(nr)); + BITMAP_ENTRY(nr, addr) &= ~(1UL << BITMAP_SHIFT(nr)); } static inline void set_bit ( int nr, volatile void * addr) { - BITMAP_ENTRY(nr, addr) |= (1 << BITMAP_SHIFT(nr)); + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); } /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */ diff -r 59d4c1863330 -r fdf25330e4a6 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/libxc/xenctrl.h Fri Jun 23 15:33:25 2006 -0600 @@ -410,6 +410,10 @@ int xc_domain_setmaxmem(int xc_handle, uint32_t domid, unsigned int max_memkb); +int xc_domain_set_time_offset(int xc_handle, + uint32_t domid, + int32_t time_offset_seconds); + int xc_domain_memory_increase_reservation(int xc_handle, uint32_t domid, unsigned long nr_extents, diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/python/xen/lowlevel/xc/xc.c Fri Jun 23 15:33:25 2006 -0600 @@ -869,6 +869,30 @@ static PyObject *pyxc_domain_iomem_permi return zero; } +static PyObject *pyxc_domain_set_time_offset(XcObject *self, PyObject *args) +{ + uint32_t dom; + int32_t time_offset_seconds; + time_t calendar_time; + struct tm local_time; + struct tm utc_time; + + if (!PyArg_ParseTuple(args, "i", &dom)) + return NULL; + + calendar_time = time(NULL); + localtime_r(&calendar_time, &local_time); + gmtime_r(&calendar_time, &utc_time); + /* set up to get calendar time based on utc_time, with local dst setting */ + utc_time.tm_isdst = local_time.tm_isdst; + time_offset_seconds = (int32_t)difftime(calendar_time, mktime(&utc_time)); + + if (xc_domain_set_time_offset(self->xc_handle, dom, time_offset_seconds) != 0) + return NULL; + + Py_INCREF(zero); + return zero; +} static PyObject *dom_op(XcObject *self, PyObject *args, int (*fn)(int, uint32_t)) @@ -1207,6 +1231,13 @@ static PyMethodDef pyxc_methods[] = { METH_VARARGS, "\n" "Returns: [int]: The size in KiB of memory spanning the given number " "of pages.\n" }, + + { "domain_set_time_offset", + (PyCFunction)pyxc_domain_set_time_offset, + METH_VARARGS, "\n" + "Set a domain's time offset to Dom0's localtime\n" + " dom [int]: Domain whose time offset is being set.\n" + "Returns: [int] 0 on success; -1 on error.\n" }, { NULL, NULL, 0, NULL } }; diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/util/xmlrpclib2.py --- a/tools/python/xen/util/xmlrpclib2.py Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/python/xen/util/xmlrpclib2.py Fri Jun 23 15:33:25 2006 -0600 @@ -13,7 +13,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx> -# Copyright (C) 2006 XenSource Ltd. +# Copyright (C) 2006 XenSource Inc. #============================================================================ """ @@ -26,11 +26,18 @@ from httplib import HTTPConnection, HTTP from httplib import HTTPConnection, HTTP from xmlrpclib import Transport from SimpleXMLRPCServer import SimpleXMLRPCServer, SimpleXMLRPCRequestHandler +import SocketServer import xmlrpclib, socket, os, stat -import SocketServer -import xen.xend.XendClient from xen.xend.XendLogging import log + +try: + import SSHTransport + ssh_enabled = True +except ImportError: + # SSHTransport is disabled on Python <2.4, because it uses the subprocess + # package. + ssh_enabled = False # A new ServerProxy that also supports httpu urls. An http URL comes in the @@ -39,6 +46,31 @@ from xen.xend.XendLogging import log # httpu:///absolute/path/to/socket.sock # # It assumes that the RPC handler is /RPC2. This probably needs to be improved + +# We're forced to subclass the RequestHandler class so that we can work around +# some bugs in Keep-Alive handling and also enabled it by default +class XMLRPCRequestHandler(SimpleXMLRPCRequestHandler): + protocol_version = "HTTP/1.1" + + # this is inspired by SimpleXMLRPCRequestHandler's do_POST but differs + # in a few non-trivial ways + # 1) we never generate internal server errors. We let the exception + # propagate so that it shows up in the Xend debug logs + # 2) we don't bother checking for a _dispatch function since we don't + # use one + def do_POST(self): + data = self.rfile.read(int(self.headers["content-length"])) + rsp = self.server._marshaled_dispatch(data) + + self.send_response(200) + self.send_header("Content-Type", "text/xml") + self.send_header("Content-Length", str(len(rsp))) + self.end_headers() + + self.wfile.write(rsp) + self.wfile.flush() + if self.close_connection == 1: + self.connection.shutdown(1) class HTTPUnixConnection(HTTPConnection): def connect(self): @@ -75,9 +107,15 @@ class ServerProxy(xmlrpclib.ServerProxy) if protocol == 'httpu': uri = 'http:' + rest transport = UnixTransport() + elif protocol == 'ssh': + global ssh_enabled + if ssh_enabled: + (transport, uri) = SSHTransport.getHTTPURI(uri) + else: + raise ValueError( + "SSH transport not supported on Python <2.4.") xmlrpclib.ServerProxy.__init__(self, uri, transport, encoding, verbose, allow_none) - def __request(self, methodname, params): response = xmlrpclib.ServerProxy.__request(self, methodname, params) @@ -93,6 +131,10 @@ class ServerProxy(xmlrpclib.ServerProxy) class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer): allow_reuse_address = True + + def __init__(self, addr, requestHandler=XMLRPCRequestHandler, + logRequests=1): + SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests) def _marshaled_dispatch(self, data, dispatch_method = None): params, method = xmlrpclib.loads(data) @@ -121,6 +163,7 @@ class TCPXMLRPCServer(SocketServer.Threa except xmlrpclib.Fault, fault: response = xmlrpclib.dumps(fault) except Exception, exn: + import xen.xend.XendClient log.exception(exn) response = xmlrpclib.dumps( xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn))) @@ -131,10 +174,10 @@ class TCPXMLRPCServer(SocketServer.Threa # It implements proper support for allow_reuse_address by # unlink()'ing an existing socket. -class UnixXMLRPCRequestHandler(SimpleXMLRPCRequestHandler): +class UnixXMLRPCRequestHandler(XMLRPCRequestHandler): def address_string(self): try: - return SimpleXMLRPCRequestHandler.address_string(self) + return XMLRPCRequestHandler.address_string(self) except ValueError, e: return self.client_address[:2] diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xend/XendClient.py --- a/tools/python/xen/xend/XendClient.py Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/python/xen/xend/XendClient.py Fri Jun 23 15:33:25 2006 -0600 @@ -18,6 +18,8 @@ #============================================================================ from xen.util.xmlrpclib2 import ServerProxy +import os +import sys XML_RPC_SOCKET = "/var/run/xend/xmlrpc.sock" @@ -25,4 +27,13 @@ ERROR_GENERIC = 2 ERROR_GENERIC = 2 ERROR_INVALID_DOMAIN = 3 -server = ServerProxy('httpu:///var/run/xend/xmlrpc.sock') +uri = 'httpu:///var/run/xend/xmlrpc.sock' +if os.environ.has_key('XM_SERVER'): + uri = os.environ['XM_SERVER'] + +try: + server = ServerProxy(uri) +except ValueError, exn: + print >>sys.stderr, exn + sys.exit(1) + diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Jun 23 15:33:25 2006 -0600 @@ -135,6 +135,7 @@ ROUNDTRIPPING_CONFIG_ENTRIES = [ ('bootloader', str), ('bootloader_args', str), ('features', str), + ('localtime', int), ] ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS @@ -1259,6 +1260,10 @@ class XendDomainInfo: self.image = image.create(self, self.info['image'], self.info['device']) + + localtime = self.info['localtime'] + if localtime is not None and localtime == 1: + xc.domain_set_time_offset(self.domid) xc.domain_setcpuweight(self.domid, self.info['cpu_weight']) diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/python/xen/xm/create.py Fri Jun 23 15:33:25 2006 -0600 @@ -672,6 +672,8 @@ def make_config(vals): config.append(['backend', ['netif']]) if vals.tpmif: config.append(['backend', ['tpmif']]) + if vals.localtime: + config.append(['localtime', vals.localtime]) config_image = configure_image(vals) if vals.bootloader: diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/python/xen/xm/main.py Fri Jun 23 15:33:25 2006 -0600 @@ -41,6 +41,7 @@ import xen.xend.XendClient import xen.xend.XendClient from xen.xend.XendClient import server from xen.util import security +from select import select # getopt.gnu_getopt is better, but only exists in Python 2.3+. Use # getopt.getopt if gnu_getopt is not available. This will mean that options @@ -124,6 +125,7 @@ loadpolicy_help = "loadpolicy <policy> loadpolicy_help = "loadpolicy <policy> Load binary policy into hypervisor" makepolicy_help = "makepolicy <policy> Build policy and create .bin/.map files" labels_help = "labels [policy] [type=DOM|..] List <type> labels for (active) policy." +serve_help = "serve Proxy Xend XML-RPC over stdio" short_command_list = [ "console", @@ -171,7 +173,8 @@ host_commands = [ host_commands = [ "dmesg", "info", - "log" + "log", + "serve", ] scheduler_commands = [ @@ -273,7 +276,7 @@ for command in all_commands: #################################################################### def arg_check(args, name, lo, hi = -1): - n = len(args) + n = len([i for i in args if i != '--']) if hi == -1: if n != lo: @@ -833,6 +836,32 @@ def xm_log(args): arg_check(args, "log", 0) print server.xend.node.log() + +def xm_serve(args): + arg_check(args, "serve", 0) + + from fcntl import fcntl, F_SETFL + + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(xen.xend.XendClient.XML_RPC_SOCKET) + fcntl(sys.stdin, F_SETFL, os.O_NONBLOCK) + + while True: + iwtd, owtd, ewtd = select([sys.stdin, s], [], []) + if s in iwtd: + data = s.recv(4096) + if len(data) > 0: + sys.stdout.write(data) + sys.stdout.flush() + else: + break + if sys.stdin in iwtd: + data = sys.stdin.read(4096) + if len(data) > 0: + s.sendall(data) + else: + break + s.close() def parse_dev_info(info): def get_info(n, t, d): @@ -1072,6 +1101,7 @@ commands = { "dmesg": xm_dmesg, "info": xm_info, "log": xm_log, + "serve": xm_serve, # scheduler "sched-bvt": xm_sched_bvt, "sched-bvt-ctxallow": xm_sched_bvt_ctxallow, diff -r 59d4c1863330 -r fdf25330e4a6 tools/security/secpol_tool.c --- a/tools/security/secpol_tool.c Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/security/secpol_tool.c Fri Jun 23 15:33:25 2006 -0600 @@ -229,6 +229,7 @@ void acm_dump_policy_buffer(void *buf, i #define PULL_CACHE_SIZE 8192 uint8_t pull_buffer[PULL_CACHE_SIZE]; + int acm_domain_getpolicy(int xc_handle) { struct acm_getpolicy getpolicy; @@ -236,7 +237,7 @@ int acm_domain_getpolicy(int xc_handle) memset(pull_buffer, 0x00, sizeof(pull_buffer)); getpolicy.interface_version = ACM_INTERFACE_VERSION; - getpolicy.pullcache = (void *) pull_buffer; + set_xen_guest_handle(getpolicy.pullcache, pull_buffer); getpolicy.pullcache_size = sizeof(pull_buffer); ret = xc_acm_op(xc_handle, ACMOP_getpolicy, &getpolicy, sizeof(getpolicy)); @@ -281,7 +282,7 @@ int acm_domain_loadpolicy(int xc_handle, /* dump it and then push it down into xen/acm */ acm_dump_policy_buffer(buffer, len); setpolicy.interface_version = ACM_INTERFACE_VERSION; - setpolicy.pushcache = (void *) buffer; + set_xen_guest_handle(setpolicy.pushcache, buffer); setpolicy.pushcache_size = len; ret = xc_acm_op(xc_handle, ACMOP_setpolicy, &setpolicy, sizeof(setpolicy)); @@ -330,7 +331,7 @@ int acm_domain_dumpstats(int xc_handle) memset(stats_buffer, 0x00, sizeof(stats_buffer)); dumpstats.interface_version = ACM_INTERFACE_VERSION; - dumpstats.pullcache = (void *) stats_buffer; + set_xen_guest_handle(dumpstats.pullcache, stats_buffer); dumpstats.pullcache_size = sizeof(stats_buffer); ret = xc_acm_op(xc_handle, ACMOP_dumpstats, &dumpstats, sizeof(dumpstats)); diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/grouptest/default --- a/tools/xm-test/grouptest/default Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/xm-test/grouptest/default Fri Jun 23 15:33:25 2006 -0600 @@ -21,7 +21,7 @@ reboot reboot restore save -sedf +sched-credit shutdown sysrq unpause diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/grouptest/medium --- a/tools/xm-test/grouptest/medium Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/xm-test/grouptest/medium Fri Jun 23 15:33:25 2006 -0600 @@ -16,7 +16,7 @@ reboot reboot restore 02_restore_badparm_neg.test 03_restore_badfilename_neg.test 04_restore_withdevices_pos.test save -sedf +sched-credit shutdown sysrq 01_sysrq_basic_neg.test 02_sysrq_sync_pos.test unpause diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/lib/XmTestLib/Console.py --- a/tools/xm-test/lib/XmTestLib/Console.py Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/xm-test/lib/XmTestLib/Console.py Fri Jun 23 15:33:25 2006 -0600 @@ -82,9 +82,6 @@ class XmConsole: tty.setraw(self.consoleFd, termios.TCSANOW) - self.__chewall(self.consoleFd) - - def __addToHistory(self, line): self.historyBuffer.append(line) self.historyLines += 1 @@ -120,34 +117,47 @@ class XmConsole: output""" self.PROMPT = prompt - - def __chewall(self, fd): + def __getprompt(self, fd): timeout = 0 - bytes = 0 - - while timeout < 3: - i, o, e = select.select([fd], [], [], 1) - if fd in i: - try: - foo = os.read(fd, 1) - if self.debugMe: - sys.stdout.write(foo) - bytes += 1 - except Exception, exn: - raise ConsoleError(str(exn)) - - else: - timeout += 1 - - if self.limit and bytes >= self.limit: + bytes = 0 + while timeout < 180: + # eat anything while total bytes less than limit else raise RUNAWAY + while (not self.limit) or (bytes < self.limit): + i, o, e = select.select([fd], [], [], 1) + if fd in i: + try: + foo = os.read(fd, 1) + if self.debugMe: + sys.stdout.write(foo) + bytes += 1 + except Exception, exn: + raise ConsoleError(str(exn)) + else: + break + else: raise ConsoleError("Console run-away (exceeded %i bytes)" % self.limit, RUNAWAY) - - if self.debugMe: - print "Ignored %i bytes of miscellaneous console output" % bytes - - return bytes - + # press enter + os.write(self.consoleFd, "\n") + # look for prompt + for prompt_char in "\r\n" + self.PROMPT: + i, o, e = select.select([fd], [], [], 1) + if fd in i: + try: + foo = os.read(fd, 1) + if self.debugMe: + sys.stdout.write(foo) + if foo != prompt_char: + break + except Exception, exn: + raise ConsoleError(str(exn)) + else: + timeout += 1 + break + else: + break + else: + raise ConsoleError("Timed out waiting for console prompt") def __runCmd(self, command, saveHistory=True): output = "" @@ -155,7 +165,7 @@ class XmConsole: lines = 0 bytes = 0 - self.__chewall(self.consoleFd) + self.__getprompt(self.consoleFd) if verbose: print "[%s] Sending `%s'" % (self.domain, command) @@ -176,7 +186,7 @@ class XmConsole: "Failed to read from console (fd=%i): %s" % (self.consoleFd, exn)) else: - raise ConsoleError("Timed out waiting for console") + raise ConsoleError("Timed out waiting for console command") if self.limit and bytes >= self.limit: raise ConsoleError("Console run-away (exceeded %i bytes)" diff -r 59d4c1863330 -r fdf25330e4a6 tools/xm-test/tests/memset/03_memset_random_pos.py --- a/tools/xm-test/tests/memset/03_memset_random_pos.py Fri Jun 23 15:26:01 2006 -0600 +++ b/tools/xm-test/tests/memset/03_memset_random_pos.py Fri Jun 23 15:33:25 2006 -0600 @@ -22,12 +22,6 @@ except DomainError, e: FAIL(str(e)) times = random.randint(10,50) - -try: - console = XmConsole(domain.getName()) - console.sendInput("input") -except ConsoleError, e: - FAIL(str(e)) try: run = console.runCmd("cat /proc/xen/balloon | grep Current"); diff -r 59d4c1863330 -r fdf25330e4a6 xen/acm/acm_core.c --- a/xen/acm/acm_core.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/acm/acm_core.c Fri Jun 23 15:33:25 2006 -0600 @@ -222,9 +222,8 @@ acm_setup(unsigned int *initrdidx, pol = (struct acm_policy_buffer *)_policy_start; if (ntohl(pol->magic) == ACM_MAGIC) { - rc = acm_set_policy((void *)_policy_start, - (u32)_policy_len, - 0); + rc = do_acm_set_policy((void *)_policy_start, + (u32)_policy_len); if (rc == ACM_OK) { printkd("Policy len 0x%lx, start at %p.\n",_policy_len,_policy_start); diff -r 59d4c1863330 -r fdf25330e4a6 xen/acm/acm_policy.c --- a/xen/acm/acm_policy.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/acm/acm_policy.c Fri Jun 23 15:33:25 2006 -0600 @@ -26,36 +26,43 @@ #include <xen/lib.h> #include <xen/delay.h> #include <xen/sched.h> +#include <xen/guest_access.h> #include <acm/acm_core.h> #include <public/acm_ops.h> #include <acm/acm_hooks.h> #include <acm/acm_endian.h> int -acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size, int isuserbuffer) +acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size) { u8 *policy_buffer = NULL; - struct acm_policy_buffer *pol; + int ret = -EFAULT; if (buf_size < sizeof(struct acm_policy_buffer)) return -EFAULT; - /* 1. copy buffer from domain */ + /* copy buffer from guest domain */ if ((policy_buffer = xmalloc_array(u8, buf_size)) == NULL) return -ENOMEM; - if (isuserbuffer) { - if (copy_from_guest(policy_buffer, buf, buf_size)) - { - printk("%s: Error copying!\n",__func__); - goto error_free; - } - } else - memcpy(policy_buffer, buf, buf_size); - - /* 2. some sanity checking */ - pol = (struct acm_policy_buffer *)policy_buffer; - + if (copy_from_guest(policy_buffer, buf, buf_size)) + { + printk("%s: Error copying!\n",__func__); + goto error_free; + } + ret = do_acm_set_policy(policy_buffer, buf_size); + + error_free: + xfree(policy_buffer); + return ret; +} + + +int +do_acm_set_policy(void *buf, u32 buf_size) +{ + struct acm_policy_buffer *pol = (struct acm_policy_buffer *)buf; + /* some sanity checking */ if ((ntohl(pol->magic) != ACM_MAGIC) || (buf_size != ntohl(pol->len)) || (ntohl(pol->policy_version) != ACM_POLICY_VERSION)) @@ -85,33 +92,31 @@ acm_set_policy(XEN_GUEST_HANDLE(void) bu /* get bin_policy lock and rewrite policy (release old one) */ write_lock(&acm_bin_pol_rwlock); - /* 3. set label reference name */ + /* set label reference name */ if (acm_set_policy_reference(buf + ntohl(pol->policy_reference_offset), ntohl(pol->primary_buffer_offset) - ntohl(pol->policy_reference_offset))) goto error_lock_free; - /* 4. set primary policy data */ + /* set primary policy data */ if (acm_primary_ops->set_binary_policy(buf + ntohl(pol->primary_buffer_offset), ntohl(pol->secondary_buffer_offset) - ntohl(pol->primary_buffer_offset))) goto error_lock_free; - /* 5. set secondary policy data */ + /* set secondary policy data */ if (acm_secondary_ops->set_binary_policy(buf + ntohl(pol->secondary_buffer_offset), ntohl(pol->len) - ntohl(pol->secondary_buffer_offset))) goto error_lock_free; write_unlock(&acm_bin_pol_rwlock); - xfree(policy_buffer); return ACM_OK; error_lock_free: write_unlock(&acm_bin_pol_rwlock); error_free: printk("%s: Error setting policy.\n", __func__); - xfree(policy_buffer); return -EFAULT; } diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/linux-xen/smp.c --- a/xen/arch/ia64/linux-xen/smp.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/ia64/linux-xen/smp.c Fri Jun 23 15:33:25 2006 -0600 @@ -421,6 +421,42 @@ smp_call_function (void (*func) (void *i } EXPORT_SYMBOL(smp_call_function); +#ifdef XEN +int +on_selected_cpus(cpumask_t selected, void (*func) (void *info), void *info, + int retry, int wait) +{ + struct call_data_struct data; + unsigned int cpu, nr_cpus = cpus_weight(selected); + + ASSERT(local_irq_is_enabled()); + + if (!nr_cpus) + return 0; + + data.func = func; + data.info = info; + data.wait = wait; + atomic_set(&data.started, 0); + atomic_set(&data.finished, 0); + + spin_lock(&call_lock); + + call_data = &data; + wmb(); + + for_each_cpu_mask(cpu, selected) + send_IPI_single(cpu, IPI_CALL_FUNC); + + while (atomic_read(wait ? &data.finished : &data.started) != nr_cpus) + cpu_relax(); + + spin_unlock(&call_lock); + + return 0; +} +#endif + /* * this function calls the 'stop' function on all other CPUs in the system. */ diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/ia64/xen/domain.c Fri Jun 23 15:33:25 2006 -0600 @@ -895,9 +895,7 @@ int construct_dom0(struct domain *d, sprintf(si->magic, "xen-%i.%i-ia64", XEN_VERSION, XEN_SUBVERSION); si->nr_pages = max_pages; - /* Give up the VGA console if DOM0 is configured to grab it. */ - if (cmdline != NULL) - console_endboot(strstr(cmdline, "tty0") != NULL); + console_endboot(); printk("Dom0: 0x%lx\n", (u64)dom0); diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/ia64/xen/xensetup.c Fri Jun 23 15:33:25 2006 -0600 @@ -532,9 +532,8 @@ printk("num_online_cpus=%d, max_cpus=%d\ init_trace_bufs(); - /* Give up the VGA console if DOM0 is configured to grab it. */ if (opt_xencons) - console_endboot(cmdline && strstr(cmdline, "tty0")); + console_endboot(); domain0_ready = 1; diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/Makefile --- a/xen/arch/x86/Makefile Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/Makefile Fri Jun 23 15:33:25 2006 -0600 @@ -41,7 +41,7 @@ obj-y += x86_emulate.o obj-y += x86_emulate.o ifneq ($(pae),n) -obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o +obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o else obj-$(x86_32) += shadow32.o endif diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/audit.c --- a/xen/arch/x86/audit.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/audit.c Fri Jun 23 15:33:25 2006 -0600 @@ -923,8 +923,8 @@ void _audit_domain(struct domain *d, int d->domain_id, page_to_mfn(page), page->u.inuse.type_info, page->count_info); - printk("a->gpfn_and_flags=%p\n", - (void *)a->gpfn_and_flags); + printk("a->gpfn_and_flags=%"PRIx64"\n", + (u64)a->gpfn_and_flags); errors++; } break; diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Fri Jun 23 15:33:25 2006 -0600 @@ -74,12 +74,15 @@ static void __vmx_clear_vmcs(void *info) static void vmx_clear_vmcs(struct vcpu *v) { - unsigned int cpu = v->arch.hvm_vmx.active_cpu; - - if ( (cpu == -1) || (cpu == smp_processor_id()) ) - __vmx_clear_vmcs(v); - else - on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1); + int cpu = v->arch.hvm_vmx.active_cpu; + + if ( cpu == -1 ) + return; + + if ( cpu == smp_processor_id() ) + return __vmx_clear_vmcs(v); + + on_selected_cpus(cpumask_of_cpu(cpu), __vmx_clear_vmcs, v, 1, 1); } static void vmx_load_vmcs(struct vcpu *v) @@ -97,6 +100,8 @@ void vmx_vmcs_enter(struct vcpu *v) * context initialisation. * 2. VMPTRLD as soon as we context-switch to a HVM VCPU. * 3. VMCS destruction needs to happen later (from domain_destroy()). + * We can relax this a bit if a paused VCPU always commits its + * architectural state to a software structure. */ if ( v == current ) return; diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Fri Jun 23 15:33:25 2006 -0600 @@ -1623,7 +1623,7 @@ static int mov_to_cr(int gp, int cr, str if ( vmx_pgbit_test(v) ) { /* The guest is a 32-bit PAE guest. */ -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 unsigned long mfn, old_base_mfn; if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) ) @@ -1667,7 +1667,7 @@ static int mov_to_cr(int gp, int cr, str else { /* The guest is a 64 bit or 32-bit PAE guest. */ -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 if ( (v->domain->arch.ops != NULL) && v->domain->arch.ops->guest_paging_levels == PAGING_L2) { @@ -1680,15 +1680,6 @@ static int mov_to_cr(int gp, int cr, str { printk("Unsupported guest paging levels\n"); /* need to take a clean path */ - domain_crash_synchronous(); - } - } - else - { - if ( !shadow_set_guest_paging_levels(v->domain, - PAGING_L4) ) - { - printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); } } diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/mm.c Fri Jun 23 15:33:25 2006 -0600 @@ -108,11 +108,20 @@ #include <public/memory.h> #ifdef VERBOSE -#define MEM_LOG(_f, _a...) \ - printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \ +#define MEM_LOG(_f, _a...) \ + printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \ current->domain->domain_id , __LINE__ , ## _a ) #else #define MEM_LOG(_f, _a...) ((void)0) +#endif + +/* + * PTE updates can be done with ordinary writes except: + * 1. Debug builds get extra checking by using CMPXCHG[8B]. + * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B. + */ +#if !defined(NDEBUG) || defined(CONFIG_X86_PAE) +#define PTE_UPDATE_WITH_CMPXCHG #endif /* @@ -261,17 +270,19 @@ void share_xen_page_with_privileged_gues #ifdef NDEBUG /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */ -#define l3tab_needs_shadow(mfn) (mfn >= 0x100000) +#define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000) #else /* - * In debug builds we aggressively shadow PDPTs to exercise code paths. + * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths. * We cannot safely shadow the idle page table, nor shadow-mode page tables - * (detected by lack of an owning domain). Always shadow PDPTs above 4GB. + * (detected by lack of an owning domain). As required for correctness, we + * always shadow PDPTs aboive 4GB. */ #define l3tab_needs_shadow(mfn) \ - ((((mfn << PAGE_SHIFT) != __pa(idle_pg_table)) && \ - (page_get_owner(mfn_to_page(mfn)) != NULL)) || \ - (mfn >= 0x100000)) + (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \ + (page_get_owner(mfn_to_page(mfn)) != NULL) && \ + ((mfn) & 1)) || /* odd MFNs are shadowed */ \ + ((mfn) >= 0x100000)) #endif static l1_pgentry_t *fix_pae_highmem_pl1e; @@ -296,6 +307,8 @@ static void __write_ptbase(unsigned long if ( !l3tab_needs_shadow(mfn) ) { write_cr3(mfn << PAGE_SHIFT); + /* Cache is no longer in use or valid (/after/ write to %cr3). */ + cache->high_mfn = 0; return; } @@ -1167,20 +1180,35 @@ static inline int update_l1e(l1_pgentry_ l1_pgentry_t ol1e, l1_pgentry_t nl1e) { +#ifndef PTE_UPDATE_WITH_CMPXCHG + return !__copy_to_user(pl1e, &nl1e, sizeof(nl1e)); +#else intpte_t o = l1e_get_intpte(ol1e); intpte_t n = l1e_get_intpte(nl1e); - if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) || - unlikely(o != l1e_get_intpte(ol1e)) ) - { - MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte - ": saw %" PRIpte, - l1e_get_intpte(ol1e), - l1e_get_intpte(nl1e), - o); - return 0; - } + for ( ; ; ) + { + if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) + { + MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte + ": saw %" PRIpte, + l1e_get_intpte(ol1e), + l1e_get_intpte(nl1e), + o); + return 0; + } + + if ( o == l1e_get_intpte(ol1e) ) + break; + + /* Allowed to change in Accessed/Dirty flags only. */ + BUG_ON((o ^ l1e_get_intpte(ol1e)) & + ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); + ol1e = l1e_from_intpte(o); + } + return 1; +#endif } @@ -1228,17 +1256,24 @@ static int mod_l1_entry(l1_pgentry_t *pl return 1; } -#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \ - intpte_t __o = cmpxchg((intpte_t *)(_p), \ - _t ## e_get_intpte(_o), \ - _t ## e_get_intpte(_n)); \ - if ( __o != _t ## e_get_intpte(_o) ) \ - MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte \ - ": saw %" PRIpte "", \ - (_t ## e_get_intpte(_o)), \ - (_t ## e_get_intpte(_n)), \ - (__o)); \ - (__o == _t ## e_get_intpte(_o)); }) +#ifndef PTE_UPDATE_WITH_CMPXCHG +#define UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) +#else +#define UPDATE_ENTRY(_t,_p,_o,_n) ({ \ + for ( ; ; ) \ + { \ + intpte_t __o = cmpxchg((intpte_t *)(_p), \ + _t ## e_get_intpte(_o), \ + _t ## e_get_intpte(_n)); \ + if ( __o == _t ## e_get_intpte(_o) ) \ + break; \ + /* Allowed to change in Accessed/Dirty flags only. */ \ + BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \ + ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \ + _o = _t ## e_from_intpte(__o); \ + } \ + 1; }) +#endif /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ static int mod_l2_entry(l2_pgentry_t *pl2e, @@ -2408,8 +2443,8 @@ static int create_grant_pte_mapping( goto failed; } - if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) || - !update_l1e(va, ol1e, _nl1e) ) + ol1e = *(l1_pgentry_t *)va; + if ( !update_l1e(va, ol1e, _nl1e) ) { put_page_type(page); rc = GNTST_general_error; @@ -2486,7 +2521,7 @@ static int destroy_grant_pte_mapping( } /* Delete pagetable entry. */ - if ( unlikely(__put_user(0, (intpte_t *)va))) + if ( unlikely(!update_l1e((l1_pgentry_t *)va, ol1e, l1e_empty())) ) { MEM_LOG("Cannot delete PTE entry at %p", va); put_page_type(page); @@ -2566,7 +2601,7 @@ static int destroy_grant_va_mapping( } /* Delete pagetable entry. */ - if ( unlikely(__put_user(0, &pl1e->l1)) ) + if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty())) ) { MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); return GNTST_general_error; @@ -3020,6 +3055,20 @@ long arch_memory_op(int op, XEN_GUEST_HA return 0; } + case XENMEM_machphys_mapping: + { + struct xen_machphys_mapping mapping = { + .v_start = MACH2PHYS_VIRT_START, + .v_end = MACH2PHYS_VIRT_END, + .max_mfn = MACH2PHYS_NR_ENTRIES - 1 + }; + + if ( copy_to_guest(arg, &mapping, 1) ) + return -EFAULT; + + return 0; + } + default: return subarch_memory_op(op, arg); } @@ -3343,7 +3392,7 @@ static int ptwr_emulated_update( addr &= ~(sizeof(paddr_t)-1); if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) ) { - propagate_page_fault(addr, 4); /* user mode, read fault */ + propagate_page_fault(addr, 0); /* read fault */ return X86EMUL_PROPAGATE_FAULT; } /* Mask out bits provided by caller. */ @@ -3358,6 +3407,7 @@ static int ptwr_emulated_update( old |= full; } +#if 0 /* XXX KAF: I don't think this can happen. */ /* * We must not emulate an update to a PTE that is temporarily marked * writable by the batched ptwr logic, else we can corrupt page refcnts! @@ -3368,6 +3418,12 @@ static int ptwr_emulated_update( if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) && (l1_linear_offset(l1va) == l1_linear_offset(addr)) ) ptwr_flush(d, PTWR_PT_INACTIVE); +#else + BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) && + (l1_linear_offset(l1va) == l1_linear_offset(addr))); + BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) && + (l1_linear_offset(l1va) == l1_linear_offset(addr))); +#endif /* Read the PTE that maps the page being updated. */ if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], @@ -3409,8 +3465,9 @@ static int ptwr_emulated_update( } else { - ol1e = *pl1e; - *pl1e = nl1e; + ol1e = *pl1e; + if ( !update_l1e(pl1e, ol1e, nl1e) ) + BUG(); } unmap_domain_page(pl1e); @@ -3475,16 +3532,18 @@ int ptwr_do_page_fault(struct domain *d, unsigned long l2_idx; struct x86_emulate_ctxt emul_ctxt; - if ( unlikely(shadow_mode_enabled(d)) ) - return 0; + ASSERT(!shadow_mode_enabled(d)); /* * Attempt to read the PTE that maps the VA being accessed. By checking for * PDE validity in the L2 we avoid many expensive fixups in __get_user(). + * NB. The L2 entry cannot be detached due to existing ptwr work: the + * caller already checked that. */ - if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) & - _PAGE_PRESENT) || - __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)], + pl2e = &__linear_l2_table[l2_linear_offset(addr)]; + if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) || + !(l2e_get_flags(l2e) & _PAGE_PRESENT) || + __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], sizeof(pte)) ) { return 0; @@ -3557,21 +3616,31 @@ int ptwr_do_page_fault(struct domain *d, } /* - * If this is a multi-processor guest then ensure that the page is hooked - * into at most one L2 table, which must be the one running on this VCPU. + * Multi-processor guest? Then ensure that the page table is hooked into + * at most one L2, and also ensure that there is only one mapping of the + * page table itself (or there can be conflicting writable mappings from + * other VCPUs). */ - if ( (d->vcpu[0]->next_in_list != NULL) && - ((page->u.inuse.type_info & PGT_count_mask) != - (!!(page->u.inuse.type_info & PGT_pinned) + - (which == PTWR_PT_ACTIVE))) ) - { - /* Could be conflicting writable mappings from other VCPUs. */ - cleanup_writable_pagetable(d); - goto emulate; + if ( d->vcpu[0]->next_in_list != NULL ) + { + if ( /* Hooked into at most one L2 table (which this VCPU maps)? */ + ((page->u.inuse.type_info & PGT_count_mask) != + (!!(page->u.inuse.type_info & PGT_pinned) + + (which == PTWR_PT_ACTIVE))) || + /* PTEs are mapped read-only in only one place? */ + ((page->count_info & PGC_count_mask) != + (!!(page->count_info & PGC_allocated) + /* alloc count */ + (page->u.inuse.type_info & PGT_count_mask) + /* type count */ + 1)) ) /* map count */ + { + /* Could be conflicting writable mappings from other VCPUs. */ + cleanup_writable_pagetable(d); + goto emulate; + } } /* - * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at + * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a * time. If there is already one, we must flush it out. */ if ( d->arch.ptwr[which].l1va ) @@ -3592,18 +3661,16 @@ int ptwr_do_page_fault(struct domain *d, "pfn %lx\n", PTWR_PRINT_WHICH, addr, l2_idx << L2_PAGETABLE_SHIFT, pfn); - d->arch.ptwr[which].l1va = addr | 1; - d->arch.ptwr[which].l2_idx = l2_idx; - d->arch.ptwr[which].vcpu = current; - -#ifdef PERF_ARRAYS - d->arch.ptwr[which].eip = regs->eip; -#endif - /* For safety, disconnect the L1 p.t. page from current space. */ if ( which == PTWR_PT_ACTIVE ) { - l2e_remove_flags(*pl2e, _PAGE_PRESENT); + l2e_remove_flags(l2e, _PAGE_PRESENT); + if ( unlikely(__copy_to_user(pl2e, &l2e, sizeof(l2e))) ) + { + MEM_LOG("ptwr: Could not unhook l2e at %p", pl2e); + domain_crash(d); + return 0; + } flush_tlb_mask(d->domain_dirty_cpumask); } @@ -3617,14 +3684,24 @@ int ptwr_do_page_fault(struct domain *d, if ( unlikely(__put_user(pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1)) ) { - MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *) + MEM_LOG("ptwr: Could not update pte at %p", &linear_pg_table[l1_linear_offset(addr)]); - /* Toss the writable pagetable state and crash. */ - d->arch.ptwr[which].l1va = 0; domain_crash(d); return 0; } + /* + * Now record the writable pagetable state *after* any accesses that can + * cause a recursive page fault (i.e., those via the *_user() accessors). + * Otherwise we can enter ptwr_flush() with half-done ptwr state. + */ + d->arch.ptwr[which].l1va = addr | 1; + d->arch.ptwr[which].l2_idx = l2_idx; + d->arch.ptwr[which].vcpu = current; +#ifdef PERF_ARRAYS + d->arch.ptwr[which].eip = regs->eip; +#endif + return EXCRET_fault_fixed; emulate: diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/setup.c Fri Jun 23 15:33:25 2006 -0600 @@ -396,11 +396,13 @@ void __init __start_xen(multiboot_info_t BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE); BUILD_BUG_ON(sizeof(vcpu_info_t) != 64); - /* __foo are defined in public headers. Check they match internal defs. */ + /* Check definitions in public headers match internal defs. */ BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START); #ifdef HYPERVISOR_VIRT_END BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END); #endif + BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START); + BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END); init_frametable(); @@ -596,8 +598,7 @@ void __init __start_xen(multiboot_info_t init_trace_bufs(); - /* Give up the VGA console if DOM0 is configured to grab it. */ - console_endboot(cmdline && strstr(cmdline, "tty0")); + console_endboot(); /* Hide UART from DOM0 if we're using it */ serial_endboot(); diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/shadow.c Fri Jun 23 15:33:25 2006 -0600 @@ -222,6 +222,7 @@ alloc_shadow_page(struct domain *d, unsigned long smfn, real_gpfn; int pin = 0; void *l1, *lp; + u64 index = 0; // Currently, we only keep pre-zero'ed pages around for use as L1's... // This will change. Soon. @@ -354,9 +355,19 @@ alloc_shadow_page(struct domain *d, if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) pin = 1; #endif + +#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE ) + /* + * We use PGT_l4_shadow for 2-level paging guests on PAE + */ + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + pin = 1; +#endif + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + index = get_cr3_idxval(current); break; -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 case PGT_fl1_shadow: perfc_incr(shadow_l1_pages); d->arch.shadow_page_count++; @@ -393,7 +404,7 @@ alloc_shadow_page(struct domain *d, // ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) ); - set_shadow_status(d, gpfn, gmfn, smfn, psh_type); + set_shadow_status(d, gpfn, gmfn, smfn, psh_type, index); if ( pin ) shadow_pin(smfn); @@ -1324,7 +1335,7 @@ increase_writable_pte_prediction(struct prediction = (prediction & PGT_mfn_mask) | score; //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create); - set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred); + set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0); if ( create ) perfc_incr(writable_pte_predictions); @@ -1345,10 +1356,10 @@ decrease_writable_pte_prediction(struct //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score); if ( score ) - set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred); + set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0); else { - delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred); + delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 0); perfc_decr(writable_pte_predictions); } } @@ -1385,7 +1396,7 @@ static u32 remove_all_write_access_in_pt int is_l1_shadow = ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) == PGT_l1_shadow); -#if CONFIG_PAGING_LEVELS == 4 +#if CONFIG_PAGING_LEVELS >= 3 is_l1_shadow |= ((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) == PGT_fl1_shadow); @@ -1494,7 +1505,7 @@ static int remove_all_write_access( while ( a && a->gpfn_and_flags ) { if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 || (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow #endif ) @@ -1538,8 +1549,8 @@ static void resync_pae_guest_l3(struct d continue; idx = get_cr3_idxval(v); - smfn = __shadow_status( - d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), PGT_l4_shadow); + + smfn = __shadow_status(d, entry->gpfn, PGT_l4_shadow); if ( !smfn ) continue; @@ -1706,7 +1717,7 @@ static int resync_all(struct domain *d, { int error; -#if CONFIG_PAGING_LEVELS == 4 +#if CONFIG_PAGING_LEVELS >= 3 unsigned long gpfn; gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT; @@ -2420,17 +2431,6 @@ static void shadow_update_pagetables(str v->arch.guest_vtable = map_domain_page_global(gmfn); } -#if CONFIG_PAGING_LEVELS >= 3 - /* - * Handle 32-bit PAE enabled guest - */ - if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) - { - u32 index = get_cr3_idxval(v); - gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn; - } -#endif - /* * arch.shadow_table */ @@ -2443,6 +2443,23 @@ static void shadow_update_pagetables(str if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) ) smfn = shadow_l3_table(v, gpfn, gmfn); } + else +#endif + +#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE ) + /* + * We use PGT_l4_shadow for 2-level paging guests on PAE + */ + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + { + if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) ) + smfn = shadow_l3_table(v, gpfn, gmfn); + else + { + update_top_level_shadow(v, smfn); + need_sync = 1; + } + } else #endif if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) @@ -3093,6 +3110,36 @@ static inline unsigned long init_bl2( return smfn; } + +static inline unsigned long init_l3( + struct vcpu *v, unsigned long gpfn, unsigned long gmfn) +{ + unsigned long smfn; + l4_pgentry_t *spl4e; + unsigned long index; + + if ( unlikely(!(smfn = alloc_shadow_page(v->domain, gpfn, gmfn, PGT_l4_shadow))) ) + { + printk("Couldn't alloc an L4 shadow for pfn= %lx mfn= %lx\n", gpfn, gmfn); + BUG(); /* XXX Deal gracefully wiht failure. */ + } + + /* Map the self entry, L4&L3 share the same page */ + spl4e = (l4_pgentry_t *)map_domain_page(smfn); + + /* + * Shadow L4's pfn_info->tlbflush_timestamp + * should also save it's own index. + */ + + index = get_cr3_idxval(v); + frame_table[smfn].tlbflush_timestamp = index; + + memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t)); + spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR); + unmap_domain_page(spl4e); + return smfn; +} #endif #if CONFIG_PAGING_LEVELS == 3 @@ -3111,6 +3158,12 @@ static unsigned long shadow_l3_table( d->arch.ops->guest_paging_levels == PAGING_L2 ) { return init_bl2(d, gpfn, gmfn); + } + + if ( SH_GUEST_32PAE && + d->arch.ops->guest_paging_levels == PAGING_L3 ) + { + return init_l3(v, gpfn, gmfn); } if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) ) @@ -3223,6 +3276,11 @@ static unsigned long shadow_l4_table( return init_bl2(d, gpfn, gmfn); } + if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) + { + return init_l3(v, gpfn, gmfn); + } + if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) ) { printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); @@ -3230,24 +3288,6 @@ static unsigned long shadow_l4_table( } spl4e = (l4_pgentry_t *)map_domain_page(smfn); - - /* For 32-bit PAE guest on 64-bit host */ - if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 ) - { - unsigned long index; - /* - * Shadow L4's pfn_info->tlbflush_timestamp - * should also save it's own index. - */ - index = get_cr3_idxval(v); - frame_table[smfn].tlbflush_timestamp = index; - - memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t)); - /* Map the self entry */ - spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR); - unmap_domain_page(spl4e); - return smfn; - } /* Install hypervisor and 4x linear p.t. mapings. */ if ( (PGT_base_page_table == PGT_l4_page_table) && @@ -3378,7 +3418,7 @@ validate_bl2e_change( * This shadow_mark_va_out_of_sync() is for 2M page shadow */ static void shadow_mark_va_out_of_sync_2mp( - struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long writable_pl1e) + struct vcpu *v, unsigned long gpfn, unsigned long mfn, paddr_t writable_pl1e) { struct out_of_sync_entry *entry = shadow_mark_mfn_out_of_sync(v, gpfn, mfn); @@ -3647,6 +3687,7 @@ static inline int l2e_rw_fault( } unmap_domain_page(l1_p); + *gl2e_p = gl2e; return 1; } @@ -3720,7 +3761,7 @@ static inline int guest_page_fault( ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 ); -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 if ( (error_code & (ERROR_I | ERROR_P)) == (ERROR_I | ERROR_P) ) return 1; #endif @@ -4056,7 +4097,7 @@ struct shadow_ops MODE_32_2_HANDLER = { }; #endif -#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) || \ +#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) && !defined (GUEST_32PAE) ) || \ ( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) ) diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/shadow32.c Fri Jun 23 15:33:25 2006 -0600 @@ -306,7 +306,7 @@ alloc_shadow_page(struct domain *d, // ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) ); - set_shadow_status(d, gpfn, gmfn, smfn, psh_type); + set_shadow_status(d, gpfn, gmfn, smfn, psh_type, 0); if ( pin ) shadow_pin(smfn); @@ -395,7 +395,7 @@ void free_shadow_page(unsigned long smfn ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) ); - delete_shadow_status(d, gpfn, gmfn, type); + delete_shadow_status(d, gpfn, gmfn, type, 0); switch ( type ) { @@ -2319,7 +2319,7 @@ increase_writable_pte_prediction(struct prediction = (prediction & PGT_mfn_mask) | score; //printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create); - set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred); + set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0); if ( create ) perfc_incr(writable_pte_predictions); @@ -2340,10 +2340,10 @@ decrease_writable_pte_prediction(struct //printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score); if ( score ) - set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred); + set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0); else { - delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred); + delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 0); perfc_decr(writable_pte_predictions); } } @@ -2381,7 +2381,7 @@ free_writable_pte_predictions(struct dom * keep an accurate count of writable_pte_predictions to keep it * happy. */ - delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred); + delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0); perfc_decr(writable_pte_predictions); } diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow_guest32pae.c --- a/xen/arch/x86/shadow_guest32pae.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/shadow_guest32pae.c Fri Jun 23 15:33:25 2006 -0600 @@ -1,5 +1,4 @@ #define GUEST_32PAE -#if defined (__x86_64__) #include "shadow.c" struct shadow_ops MODE_64_PAE_HANDLER = { @@ -15,4 +14,3 @@ struct shadow_ops MODE_64_PAE_HANDLER = .gva_to_gpa = gva_to_gpa_64, }; -#endif diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/shadow_public.c Fri Jun 23 15:33:25 2006 -0600 @@ -123,8 +123,19 @@ int shadow_set_guest_paging_levels(struc #endif #if CONFIG_PAGING_LEVELS == 3 case 3: - if ( d->arch.ops != &MODE_64_3_HANDLER ) - d->arch.ops = &MODE_64_3_HANDLER; + if ( d->arch.ops == NULL || + shadow_mode_log_dirty(d) ) + { + if ( d->arch.ops != &MODE_64_3_HANDLER ) + d->arch.ops = &MODE_64_3_HANDLER; + } + else + { + if ( d->arch.ops == &MODE_64_2_HANDLER ) + free_shadow_pages(d); + if ( d->arch.ops != &MODE_64_PAE_HANDLER ) + d->arch.ops = &MODE_64_PAE_HANDLER; + } shadow_unlock(d); return 1; #endif @@ -268,10 +279,8 @@ free_shadow_tables(struct domain *d, uns put_shadow_ref(entry_get_pfn(ple[i])); if (d->arch.ops->guest_paging_levels == PAGING_L3) { -#if CONFIG_PAGING_LEVELS == 4 +#if CONFIG_PAGING_LEVELS >= 3 if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L4 ) -#elif CONFIG_PAGING_LEVELS == 3 - if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L3 ) #endif break; } @@ -710,6 +719,7 @@ void free_shadow_page(unsigned long smfn struct domain *d = page_get_owner(mfn_to_page(gmfn)); unsigned long gpfn = mfn_to_gmfn(d, gmfn); unsigned long type = page->u.inuse.type_info & PGT_type_mask; + u64 index = 0; SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn); @@ -722,12 +732,16 @@ void free_shadow_page(unsigned long smfn if ( !mfn ) gpfn |= (1UL << 63); } +#endif +#if CONFIG_PAGING_LEVELS >= 3 if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) - if ( type == PGT_l4_shadow ) - gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_pae_idx_shift) | gpfn; -#endif - - delete_shadow_status(d, gpfn, gmfn, type); + { + if ( type == PGT_l4_shadow ) + index = page->tlbflush_timestamp; + } +#endif + + delete_shadow_status(d, gpfn, gmfn, type, index); switch ( type ) { @@ -835,7 +849,7 @@ free_writable_pte_predictions(struct dom while ( count ) { count--; - delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred); + delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0); } xfree(gpfn_list); @@ -1050,8 +1064,8 @@ void __shadow_mode_disable(struct domain { if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 ) { - printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n", - __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags); + printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%"PRIx64"\n", + __FILE__, i, (u64)d->arch.shadow_ht[i].gpfn_and_flags); BUG(); } } diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/time.c Fri Jun 23 15:33:25 2006 -0600 @@ -699,7 +699,7 @@ void update_domain_wallclock_time(struct { spin_lock(&wc_lock); version_update_begin(&d->shared_info->wc_version); - d->shared_info->wc_sec = wc_sec; + d->shared_info->wc_sec = wc_sec + d->time_offset_seconds; d->shared_info->wc_nsec = wc_nsec; version_update_end(&d->shared_info->wc_version); spin_unlock(&wc_lock); diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/traps.c Fri Jun 23 15:33:25 2006 -0600 @@ -276,6 +276,42 @@ void show_stack(struct cpu_user_regs *re show_trace(regs); } +void show_stack_overflow(unsigned long esp) +{ +#ifdef MEMORY_GUARD + unsigned long esp_top = get_stack_bottom() & PAGE_MASK; + unsigned long *stack, addr; + + /* Trigger overflow trace if %esp is within 100 bytes of the guard page. */ + if ( ((esp - esp_top) > 100) && ((esp_top - esp) > 100) ) + return; + + if ( esp < esp_top ) + esp = esp_top; + + printk("Xen stack overflow:\n "); + + stack = (unsigned long *)esp; + while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 ) + { + addr = *stack++; + if ( is_kernel_text(addr) ) + { + printk("%p: [<%p>]", stack, _p(addr)); + print_symbol(" %s\n ", addr); + } + } + + printk("\n"); +#endif +} + +void show_execution_state(struct cpu_user_regs *regs) +{ + show_registers(regs); + show_stack(regs); +} + /* * This is called for faults at very unexpected times (e.g., when interrupts * are disabled). In such situations we can't do much that is safe. We try to @@ -297,7 +333,7 @@ asmlinkage void fatal_trap(int trapnr, s watchdog_disable(); console_start_sync(); - show_registers(regs); + show_execution_state(regs); if ( trapnr == TRAP_page_fault ) { @@ -360,7 +396,7 @@ static inline int do_trap(int trapnr, ch DEBUGGER_trap_fatal(trapnr, regs); - show_registers(regs); + show_execution_state(regs); panic("CPU%d FATAL TRAP: vector = %d (%s)\n" "[error_code=%04x]\n", smp_processor_id(), trapnr, str, regs->error_code); @@ -451,8 +487,23 @@ asmlinkage int do_invalid_op(struct cpu_ if ( unlikely(!guest_mode(regs)) ) { + char sig[5]; + /* Signature (ud2; .ascii "dbg") indicates dump state and continue. */ + if ( (__copy_from_user(sig, (char *)regs->eip, sizeof(sig)) == 0) && + (memcmp(sig, "\xf\xb""dbg", sizeof(sig)) == 0) ) + { + show_execution_state(regs); + regs->eip += sizeof(sig); + return EXCRET_fault_fixed; + } + printk("%02x %02x %02x %02x %02x\n", + (unsigned char)sig[0], + (unsigned char)sig[1], + (unsigned char)sig[2], + (unsigned char)sig[3], + (unsigned char)sig[4]); DEBUGGER_trap_fatal(TRAP_invalid_op, regs); - show_registers(regs); + show_execution_state(regs); panic("CPU%d FATAL TRAP: vector = %d (invalid opcode)\n", smp_processor_id(), TRAP_invalid_op); } @@ -481,7 +532,7 @@ asmlinkage int do_int3(struct cpu_user_r if ( !guest_mode(regs) ) { DEBUGGER_trap_fatal(TRAP_int3, regs); - show_registers(regs); + show_execution_state(regs); panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id()); } @@ -511,9 +562,9 @@ void propagate_page_fault(unsigned long v->vcpu_info->arch.cr2 = addr; /* Re-set error_code.user flag appropriately for the guest. */ - error_code &= ~4; + error_code &= ~PGERR_user_mode; if ( !guest_kernel_mode(v, guest_cpu_user_regs()) ) - error_code |= 4; + error_code |= PGERR_user_mode; ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault]; tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; @@ -547,6 +598,7 @@ static int handle_gdt_ldt_mapping_fault( { /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */ LOCK_BIGLOCK(d); + cleanup_writable_pagetable(d); ret = map_ldt_shadow_page(offset >> PAGE_SHIFT); UNLOCK_BIGLOCK(d); @@ -578,6 +630,98 @@ static int handle_gdt_ldt_mapping_fault( (((va) >= HYPERVISOR_VIRT_START)) #endif +static int __spurious_page_fault( + unsigned long addr, struct cpu_user_regs *regs) +{ + unsigned long mfn, cr3 = read_cr3(); +#if CONFIG_PAGING_LEVELS >= 4 + l4_pgentry_t l4e, *l4t; +#endif +#if CONFIG_PAGING_LEVELS >= 3 + l3_pgentry_t l3e, *l3t; +#endif + l2_pgentry_t l2e, *l2t; + l1_pgentry_t l1e, *l1t; + unsigned int required_flags, disallowed_flags; + + /* Reserved bit violations are never spurious faults. */ + if ( regs->error_code & PGERR_reserved_bit ) + return 0; + + required_flags = _PAGE_PRESENT; + if ( regs->error_code & PGERR_write_access ) + required_flags |= _PAGE_RW; + if ( regs->error_code & PGERR_user_mode ) + required_flags |= _PAGE_USER; + + disallowed_flags = 0; + if ( regs->error_code & PGERR_instr_fetch ) + disallowed_flags |= _PAGE_NX; + + mfn = cr3 >> PAGE_SHIFT; + +#if CONFIG_PAGING_LEVELS >= 4 + l4t = map_domain_page(mfn); + l4e = l4t[l4_table_offset(addr)]; + mfn = l4e_get_pfn(l4e); + unmap_domain_page(l4t); + if ( !(l4e_get_flags(l4e) & required_flags) || + (l4e_get_flags(l4e) & disallowed_flags) ) + return 0; +#endif + +#if CONFIG_PAGING_LEVELS >= 3 + l3t = map_domain_page(mfn); +#ifdef CONFIG_X86_PAE + l3t += (cr3 & 0xFE0UL) >> 3; +#endif + l3e = l3t[l3_table_offset(addr)]; + mfn = l3e_get_pfn(l3e); + unmap_domain_page(l3t); +#ifdef CONFIG_X86_PAE + if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) + return 0; +#else + if ( !(l3e_get_flags(l3e) & required_flags) || + (l3e_get_flags(l3e) & disallowed_flags) ) + return 0; +#endif +#endif + + l2t = map_domain_page(mfn); + l2e = l2t[l2_table_offset(addr)]; + mfn = l2e_get_pfn(l2e); + unmap_domain_page(l2t); + if ( !(l2e_get_flags(l2e) & required_flags) || + (l2e_get_flags(l2e) & disallowed_flags) ) + return 0; + if ( l2e_get_flags(l2e) & _PAGE_PSE ) + return 1; + + l1t = map_domain_page(mfn); + l1e = l1t[l1_table_offset(addr)]; + mfn = l1e_get_pfn(l1e); + unmap_domain_page(l1t); + if ( !(l1e_get_flags(l1e) & required_flags) || + (l1e_get_flags(l1e) & disallowed_flags) ) + return 0; + return 1; +} + +static int spurious_page_fault( + unsigned long addr, struct cpu_user_regs *regs) +{ + struct domain *d = current->domain; + int is_spurious; + + LOCK_BIGLOCK(d); + cleanup_writable_pagetable(d); + is_spurious = __spurious_page_fault(addr, regs); + UNLOCK_BIGLOCK(d); + + return is_spurious; +} + static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs) { struct vcpu *v = current; @@ -590,12 +734,17 @@ static int fixup_page_fault(unsigned lon if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); - } - else if ( unlikely(shadow_mode_enabled(d)) ) - { + /* + * Do not propagate spurious faults in the hypervisor area to the + * guest. It cannot fix them up. + */ + return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0); + } + + if ( unlikely(shadow_mode_enabled(d)) ) return shadow_fault(addr, regs); - } - else if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) ) + + if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) ) { LOCK_BIGLOCK(d); if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) && @@ -607,9 +756,14 @@ static int fixup_page_fault(unsigned lon return EXCRET_fault_fixed; } + /* + * Note it is *not* safe to check PGERR_page_present here. It can be + * clear, due to unhooked page table, when we would otherwise expect + * it to be set. We have an aversion to trusting that flag in Xen, and + * guests ought to be leery too. + */ if ( guest_kernel_mode(v, regs) && - /* Protection violation on write? No reserved-bit violation? */ - ((regs->error_code & 0xb) == 0x3) && + (regs->error_code & PGERR_write_access) && ptwr_do_page_fault(d, addr, regs) ) { UNLOCK_BIGLOCK(d); @@ -619,46 +773,6 @@ static int fixup_page_fault(unsigned lon } return 0; -} - -static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - int rc; - - /* - * The only possible reason for a spurious page fault not to be picked - * up already is that a page directory was unhooked by writable page table - * logic and then reattached before the faulting VCPU could detect it. - */ - if ( is_idle_domain(d) || /* no ptwr in idle domain */ - IN_HYPERVISOR_RANGE(addr) || /* no ptwr on hypervisor addrs */ - shadow_mode_enabled(d) || /* no ptwr logic in shadow mode */ - ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault? */ - return 0; - - LOCK_BIGLOCK(d); - - /* - * The page directory could have been detached again while we weren't - * holding the per-domain lock. Detect that and fix up if it's the case. - */ - if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) && - unlikely(l2_linear_offset(addr) == - d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) ) - { - ptwr_flush(d, PTWR_PT_ACTIVE); - rc = 1; - } - else - { - /* Okay, walk the page tables. Only check for not-present faults.*/ - rc = __spurious_page_fault(addr); - } - - UNLOCK_BIGLOCK(d); - return rc; } /* @@ -703,7 +817,7 @@ asmlinkage int do_page_fault(struct cpu_ DEBUGGER_trap_fatal(TRAP_page_fault, regs); - show_registers(regs); + show_execution_state(regs); show_page_walk(addr); panic("CPU%d FATAL PAGE FAULT\n" "[error_code=%04x]\n" @@ -784,8 +898,6 @@ static inline int admin_io_okay( (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0)) /* Propagate a fault back to the guest kernel. */ -#define USER_READ_FAULT 4 /* user mode, read fault */ -#define USER_WRITE_FAULT 6 /* user mode, write fault */ #define PAGE_FAULT(_faultaddr, _errcode) \ ({ propagate_page_fault(_faultaddr, _errcode); \ return EXCRET_fault_fixed; \ @@ -795,7 +907,7 @@ static inline int admin_io_okay( #define insn_fetch(_type, _size, _ptr) \ ({ unsigned long _x; \ if ( get_user(_x, (_type *)eip) ) \ - PAGE_FAULT(eip, USER_READ_FAULT); \ + PAGE_FAULT(eip, 0); /* read fault */ \ eip += _size; (_type)_x; }) static int emulate_privileged_op(struct cpu_user_regs *regs) @@ -864,17 +976,17 @@ static int emulate_privileged_op(struct case 1: data = (u8)inb_user((u16)regs->edx, v, regs); if ( put_user((u8)data, (u8 *)regs->edi) ) - PAGE_FAULT(regs->edi, USER_WRITE_FAULT); + PAGE_FAULT(regs->edi, PGERR_write_access); break; case 2: data = (u16)inw_user((u16)regs->edx, v, regs); if ( put_user((u16)data, (u16 *)regs->edi) ) - PAGE_FAULT(regs->edi, USER_WRITE_FAULT); + PAGE_FAULT(regs->edi, PGERR_write_access); break; case 4: data = (u32)inl_user((u16)regs->edx, v, regs); if ( put_user((u32)data, (u32 *)regs->edi) ) - PAGE_FAULT(regs->edi, USER_WRITE_FAULT); + PAGE_FAULT(regs->edi, PGERR_write_access); break; } regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes); @@ -889,17 +1001,17 @@ static int emulate_privileged_op(struct { case 1: if ( get_user(data, (u8 *)regs->esi) ) - PAGE_FAULT(regs->esi, USER_READ_FAULT); + PAGE_FAULT(regs->esi, 0); /* read fault */ outb_user((u8)data, (u16)regs->edx, v, regs); break; case 2: if ( get_user(data, (u16 *)regs->esi) ) - PAGE_FAULT(regs->esi, USER_READ_FAULT); + PAGE_FAULT(regs->esi, 0); /* read fault */ outw_user((u16)data, (u16)regs->edx, v, regs); break; case 4: if ( get_user(data, (u32 *)regs->esi) ) - PAGE_FAULT(regs->esi, USER_READ_FAULT); + PAGE_FAULT(regs->esi, 0); /* read fault */ outl_user((u32)data, (u16)regs->edx, v, regs); break; } @@ -1082,7 +1194,7 @@ static int emulate_privileged_op(struct v->arch.guest_context.ctrlreg[2] = *reg; v->vcpu_info->arch.cr2 = *reg; break; - + case 3: /* Write CR3 */ LOCK_BIGLOCK(v->domain); cleanup_writable_pagetable(v->domain); @@ -1270,7 +1382,7 @@ asmlinkage int do_general_protection(str DEBUGGER_trap_fatal(TRAP_gp_fault, regs); hardware_gp: - show_registers(regs); + show_execution_state(regs); panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n", smp_processor_id(), regs->error_code); return 0; diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_32/seg_fixup.c --- a/xen/arch/x86/x86_32/seg_fixup.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/x86_32/seg_fixup.c Fri Jun 23 15:33:25 2006 -0600 @@ -464,7 +464,7 @@ int gpf_emulate_4gb(struct cpu_user_regs return 0; page_fault: - propagate_page_fault((unsigned long)pb, 4); + propagate_page_fault((unsigned long)pb, 0); /* read fault */ return EXCRET_fault_fixed; } diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/x86_32/traps.c Fri Jun 23 15:33:25 2006 -0600 @@ -68,13 +68,11 @@ void show_registers(struct cpu_user_regs "ss: %04x cs: %04x\n", fault_regs.ds, fault_regs.es, fault_regs.fs, fault_regs.gs, fault_regs.ss, fault_regs.cs); - - show_stack(regs); } void show_page_walk(unsigned long addr) { - unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT; + unsigned long pfn, mfn, cr3 = read_cr3(); #ifdef CONFIG_X86_PAE l3_pgentry_t l3e, *l3t; #endif @@ -83,8 +81,11 @@ void show_page_walk(unsigned long addr) printk("Pagetable walk from %08lx:\n", addr); + mfn = cr3 >> PAGE_SHIFT; + #ifdef CONFIG_X86_PAE - l3t = map_domain_page(mfn); + l3t = map_domain_page(mfn); + l3t += (cr3 & 0xFE0UL) >> 3; l3e = l3t[l3_table_offset(addr)]; mfn = l3e_get_pfn(l3e); pfn = get_gpfn_from_mfn(mfn); @@ -111,40 +112,6 @@ void show_page_walk(unsigned long addr) pfn = get_gpfn_from_mfn(mfn); printk(" L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn); unmap_domain_page(l1t); -} - -int __spurious_page_fault(unsigned long addr) -{ - unsigned long mfn = read_cr3() >> PAGE_SHIFT; -#ifdef CONFIG_X86_PAE - l3_pgentry_t l3e, *l3t; -#endif - l2_pgentry_t l2e, *l2t; - l1_pgentry_t l1e, *l1t; - -#ifdef CONFIG_X86_PAE - l3t = map_domain_page(mfn); - l3e = l3t[l3_table_offset(addr)]; - mfn = l3e_get_pfn(l3e); - unmap_domain_page(l3t); - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) - return 0; -#endif - - l2t = map_domain_page(mfn); - l2e = l2t[l2_table_offset(addr)]; - mfn = l2e_get_pfn(l2e); - unmap_domain_page(l2t); - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - return 0; - if ( l2e_get_flags(l2e) & _PAGE_PSE ) - return 1; - - l1t = map_domain_page(mfn); - l1e = l1t[l1_table_offset(addr)]; - mfn = l1e_get_pfn(l1e); - unmap_domain_page(l1t); - return !!(l1e_get_flags(l1e) & _PAGE_PRESENT); } #define DOUBLEFAULT_STACK_SIZE 1024 @@ -173,6 +140,7 @@ asmlinkage void do_double_fault(void) tss->esi, tss->edi, tss->ebp, tss->esp); printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", tss->ds, tss->es, tss->fs, tss->gs, tss->ss); + show_stack_overflow(tss->esp); printk("************************************\n"); printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu); printk("System needs manual reset.\n"); diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/x86_64/traps.c Fri Jun 23 15:33:25 2006 -0600 @@ -68,8 +68,6 @@ void show_registers(struct cpu_user_regs "ss: %04x cs: %04x\n", fault_regs.ds, fault_regs.es, fault_regs.fs, fault_regs.gs, fault_regs.ss, fault_regs.cs); - - show_stack(regs); } void show_page_walk(unsigned long addr) @@ -115,40 +113,6 @@ void show_page_walk(unsigned long addr) printk(" L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn); } -int __spurious_page_fault(unsigned long addr) -{ - unsigned long mfn = read_cr3() >> PAGE_SHIFT; - l4_pgentry_t l4e, *l4t; - l3_pgentry_t l3e, *l3t; - l2_pgentry_t l2e, *l2t; - l1_pgentry_t l1e, *l1t; - - l4t = mfn_to_virt(mfn); - l4e = l4t[l4_table_offset(addr)]; - mfn = l4e_get_pfn(l4e); - if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) - return 0; - - l3t = mfn_to_virt(mfn); - l3e = l3t[l3_table_offset(addr)]; - mfn = l3e_get_pfn(l3e); - if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) - return 0; - - l2t = mfn_to_virt(mfn); - l2e = l2t[l2_table_offset(addr)]; - mfn = l2e_get_pfn(l2e); - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - return 0; - if ( l2e_get_flags(l2e) & _PAGE_PSE ) - return 1; - - l1t = mfn_to_virt(mfn); - l1e = l1t[l1_table_offset(addr)]; - mfn = l1e_get_pfn(l1e); - return !!(l1e_get_flags(l1e) & _PAGE_PRESENT); -} - asmlinkage void double_fault(void); asmlinkage void do_double_fault(struct cpu_user_regs *regs) { @@ -159,6 +123,7 @@ asmlinkage void do_double_fault(struct c /* Find information saved during fault and dump it to the console. */ printk("************************************\n"); show_registers(regs); + show_stack_overflow(regs->rsp); printk("************************************\n"); printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id()); printk("System needs manual reset.\n"); diff -r 59d4c1863330 -r fdf25330e4a6 xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/arch/x86/x86_emulate.c Fri Jun 23 15:33:25 2006 -0600 @@ -1146,7 +1146,7 @@ x86_emulate_read_std( *val = 0; if ( copy_from_user((void *)val, (void *)addr, bytes) ) { - propagate_page_fault(addr, 4); /* user mode, read fault */ + propagate_page_fault(addr, 0); /* read fault */ return X86EMUL_PROPAGATE_FAULT; } return X86EMUL_CONTINUE; @@ -1161,7 +1161,7 @@ x86_emulate_write_std( { if ( copy_to_user((void *)addr, (void *)&val, bytes) ) { - propagate_page_fault(addr, 6); /* user mode, write fault */ + propagate_page_fault(addr, PGERR_write_access); /* write fault */ return X86EMUL_PROPAGATE_FAULT; } return X86EMUL_CONTINUE; diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/acm_ops.c --- a/xen/common/acm_ops.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/acm_ops.c Fri Jun 23 15:33:25 2006 -0600 @@ -69,7 +69,7 @@ long do_acm_op(int cmd, XEN_GUEST_HANDLE return -EACCES; rc = acm_set_policy(setpolicy.pushcache, - setpolicy.pushcache_size, 1); + setpolicy.pushcache_size); break; } diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/dom0_ops.c --- a/xen/common/dom0_ops.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/dom0_ops.c Fri Jun 23 15:33:25 2006 -0600 @@ -693,6 +693,21 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op break; #endif + case DOM0_SETTIMEOFFSET: + { + struct domain *d; + + ret = -ESRCH; + d = find_domain_by_id(op->u.settimeoffset.domain); + if ( d != NULL ) + { + d->time_offset_seconds = op->u.settimeoffset.time_offset_seconds; + put_domain(d); + ret = 0; + } + } + break; + default: ret = arch_do_dom0_op(op, u_dom0_op); break; @@ -701,9 +716,9 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op spin_unlock(&dom0_lock); if (!ret) - acm_post_dom0_op(op, ssid); + acm_post_dom0_op(op, &ssid); else - acm_fail_dom0_op(op, ssid); + acm_fail_dom0_op(op, &ssid); return ret; } diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/domain.c --- a/xen/common/domain.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/domain.c Fri Jun 23 15:33:25 2006 -0600 @@ -234,7 +234,7 @@ void __domain_crash(struct domain *d) { printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n", d->domain_id, current->vcpu_id, smp_processor_id()); - show_registers(guest_cpu_user_regs()); + show_execution_state(guest_cpu_user_regs()); } else { diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/kernel.c --- a/xen/common/kernel.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/kernel.c Fri Jun 23 15:33:25 2006 -0600 @@ -96,10 +96,11 @@ char *print_tainted(char *str) { if ( tainted ) { - snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c", + snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c%c", tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', - tainted & TAINT_BAD_PAGE ? 'B' : ' '); + tainted & TAINT_BAD_PAGE ? 'B' : ' ', + tainted & TAINT_SYNC_CONSOLE ? 'C' : ' '); } else { diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/keyhandler.c --- a/xen/common/keyhandler.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/keyhandler.c Fri Jun 23 15:33:25 2006 -0600 @@ -87,10 +87,28 @@ static void show_handlers(unsigned char key_table[i].desc); } +static void __dump_execstate(void *unused) +{ + dump_execution_state(); +} + static void dump_registers(unsigned char key, struct cpu_user_regs *regs) { + unsigned int cpu; + printk("'%c' pressed -> dumping registers\n", key); - show_registers(regs); + + /* Get local execution state out immediately, in case we get stuck. */ + printk("\n*** Dumping CPU%d state: ***\n", smp_processor_id()); + show_execution_state(regs); + + for_each_online_cpu ( cpu ) + { + if ( cpu == smp_processor_id() ) + continue; + printk("\n*** Dumping CPU%d state: ***\n", cpu); + on_selected_cpus(cpumask_of_cpu(cpu), __dump_execstate, NULL, 1, 1); + } } static void halt_machine(unsigned char key, struct cpu_user_regs *regs) diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/memory.c --- a/xen/common/memory.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/memory.c Fri Jun 23 15:33:25 2006 -0600 @@ -282,7 +282,7 @@ memory_exchange(XEN_GUEST_HANDLE(xen_mem LIST_HEAD(in_chunk_list); LIST_HEAD(out_chunk_list); unsigned long in_chunk_order, out_chunk_order; - unsigned long gpfn, gmfn, mfn; + xen_pfn_t gpfn, gmfn, mfn; unsigned long i, j, k; unsigned int memflags = 0; long rc = 0; diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/sched_credit.c --- a/xen/common/sched_credit.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/sched_credit.c Fri Jun 23 15:33:25 2006 -0600 @@ -967,9 +967,6 @@ csched_load_balance(int cpu, struct csch if ( peer_cpu == cpu ) break; - BUG_ON( peer_cpu >= csched_priv.ncpus ); - BUG_ON( peer_cpu == cpu ); - /* * Get ahold of the scheduler lock for this peer CPU. * @@ -1072,7 +1069,6 @@ csched_schedule(s_time_t now) ret.task = snext->vcpu; CSCHED_VCPU_CHECK(ret.task); - BUG_ON( !vcpu_runnable(ret.task) ); return ret; } diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/sched_sedf.c Fri Jun 23 15:33:25 2006 -0600 @@ -360,24 +360,23 @@ static int sedf_init_vcpu(struct vcpu *v INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q)); } + /* Every VCPU gets an equal share of extratime by default. */ + inf->deadl_abs = 0; + inf->latency = 0; + inf->status = EXTRA_AWARE | SEDF_ASLEEP; + inf->extraweight = 1; + if ( v->domain->domain_id == 0 ) { - /*set dom0 to something useful to boot the machine*/ + /* Domain0 gets 75% guaranteed (15ms every 20ms). */ inf->period = MILLISECS(20); inf->slice = MILLISECS(15); - inf->latency = 0; - inf->deadl_abs = 0; - inf->status = EXTRA_AWARE | SEDF_ASLEEP; } else { - /*other domains run in best effort mode*/ + /* Best-effort extratime only. */ inf->period = WEIGHT_PERIOD; inf->slice = 0; - inf->deadl_abs = 0; - inf->latency = 0; - inf->status = EXTRA_AWARE | SEDF_ASLEEP; - inf->extraweight = 1; } inf->period_orig = inf->period; inf->slice_orig = inf->slice; @@ -609,7 +608,16 @@ static void desched_extra_dom(s_time_t n PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id, inf->short_block_lost_tot); +#if 0 + /* + * KAF: If we don't exit short-blocking state at this point + * domain0 can steal all CPU for up to 10 seconds before + * scheduling settles down (when competing against another + * CPU-bound domain). Doing this seems to make things behave + * nicely. Noone gets starved by default. + */ if ( inf->short_block_lost_tot <= 0 ) +#endif { PRINT(4,"Domain %i.%i compensated short block loss!\n", inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id); diff -r 59d4c1863330 -r fdf25330e4a6 xen/common/schedule.c --- a/xen/common/schedule.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/common/schedule.c Fri Jun 23 15:33:25 2006 -0600 @@ -33,8 +33,8 @@ extern void arch_getdomaininfo_ctxt(struct vcpu *, struct vcpu_guest_context *); -/* opt_sched: scheduler - default to SEDF */ -static char opt_sched[10] = "sedf"; +/* opt_sched: scheduler - default to credit */ +static char opt_sched[10] = "credit"; string_param("sched", opt_sched); #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ diff -r 59d4c1863330 -r fdf25330e4a6 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/drivers/char/console.c Fri Jun 23 15:33:25 2006 -0600 @@ -476,7 +476,11 @@ void init_console(void) if ( strncmp(p, "com", 3) == 0 ) sercon_handle = serial_parse_handle(p); else if ( strncmp(p, "vga", 3) == 0 ) + { vgacon_enabled = 1; + if ( strncmp(p+3, "[keep]", 6) == 0 ) + vgacon_enabled++; + } } init_vga(); @@ -497,14 +501,47 @@ void init_console(void) if ( opt_sync_console ) { serial_start_sync(sercon_handle); + add_taint(TAINT_SYNC_CONSOLE); printk("Console output is synchronous.\n"); } } -void console_endboot(int disable_vga) -{ - if ( disable_vga ) - vgacon_enabled = 0; +void console_endboot(void) +{ + int i, j; + + if ( opt_sync_console ) + { + printk("**********************************************\n"); + printk("******* WARNING: CONSOLE OUTPUT IS SYCHRONOUS\n"); + printk("******* This option is intended to aid debugging " + "of Xen by ensuring\n"); + printk("******* that all output is synchronously delivered " + "on the serial line.\n"); + printk("******* However it can introduce SIGNIFICANT latencies " + "and affect\n"); + printk("******* timekeeping. It is NOT recommended for " + "production use!\n"); + printk("**********************************************\n"); + for ( i = 0; i < 3; i++ ) + { + printk("%d... ", 3-i); + for ( j = 0; j < 100; j++ ) + { + if ( softirq_pending(smp_processor_id()) ) + do_softirq(); + mdelay(10); + } + } + printk("\n"); + } + + if ( vgacon_enabled ) + { + vgacon_enabled--; + printk("Xen is %s VGA console.\n", + vgacon_enabled ? "keeping" : "relinquishing"); + } /* * If user specifies so, we fool the switch routine to redirect input diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/acm/acm_core.h --- a/xen/include/acm/acm_core.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/acm/acm_core.h Fri Jun 23 15:33:25 2006 -0600 @@ -121,10 +121,11 @@ int acm_init_domain_ssid(domid_t id, ssi int acm_init_domain_ssid(domid_t id, ssidref_t ssidref); void acm_free_domain_ssid(struct acm_ssid_domain *ssid); int acm_init_binary_policy(u32 policy_code); -int acm_set_policy(void *buf, u32 buf_size, int isuserbuffer); -int acm_get_policy(void *buf, u32 buf_size); -int acm_dump_statistics(void *buf, u16 buf_size); -int acm_get_ssid(ssidref_t ssidref, u8 *buf, u16 buf_size); +int acm_set_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size); +int do_acm_set_policy(void *buf, u32 buf_size); +int acm_get_policy(XEN_GUEST_HANDLE(void) buf, u32 buf_size); +int acm_dump_statistics(XEN_GUEST_HANDLE(void) buf, u16 buf_size); +int acm_get_ssid(ssidref_t ssidref, XEN_GUEST_HANDLE(void) buf, u16 buf_size); int acm_get_decision(ssidref_t ssidref1, ssidref_t ssidref2, u32 hook); int acm_set_policy_reference(u8 * buf, u32 buf_size); int acm_dump_policy_reference(u8 *buf, u32 buf_size); diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/acm/acm_hooks.h --- a/xen/include/acm/acm_hooks.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/acm/acm_hooks.h Fri Jun 23 15:33:25 2006 -0600 @@ -273,7 +273,12 @@ static inline void acm_post_dom0_op(stru op->u.createdomain.domain, op->u.createdomain.ssidref); break; case DOM0_DESTROYDOMAIN: - acm_post_domain_destroy(ssid, op->u.destroydomain.domain); + if (*ssid == NULL) { + printkd("%s: ERROR. SSID unset.\n", + __func__); + break; + } + acm_post_domain_destroy(*ssid, op->u.destroydomain.domain); /* free security ssid for the destroyed domain (also if null policy */ acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid)); *ssid = NULL; @@ -281,13 +286,22 @@ static inline void acm_post_dom0_op(stru } } -static inline void acm_fail_dom0_op(struct dom0_op *op, void *ssid) +static inline void acm_fail_dom0_op(struct dom0_op *op, void **ssid) { switch(op->cmd) { case DOM0_CREATEDOMAIN: acm_fail_domain_create( current->domain->ssid, op->u.createdomain.ssidref); break; + case DOM0_DESTROYDOMAIN: + /* we don't handle domain destroy failure but at least free the ssid */ + if (*ssid == NULL) { + printkd("%s: ERROR. SSID unset.\n", + __func__); + break; + } + acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid)); + *ssid = NULL; } } diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/debugger.h --- a/xen/include/asm-ia64/debugger.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-ia64/debugger.h Fri Jun 23 15:33:25 2006 -0600 @@ -41,6 +41,14 @@ #include <xen/gdbstub.h> void show_registers(struct cpu_user_regs *regs); +void dump_stack(void); + +static inline void +show_execution_state(struct cpu_user_regs *regs) +{ + show_registers(regs); + dump_stack(); +} // NOTE: on xen struct pt_regs = struct cpu_user_regs // see include/asm-ia64/linux-xen/asm/ptrace.h diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/vmx.h --- a/xen/include/asm-ia64/vmx.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-ia64/vmx.h Fri Jun 23 15:33:25 2006 -0600 @@ -42,6 +42,7 @@ extern void vmx_save_state(struct vcpu * extern void vmx_save_state(struct vcpu *v); extern void vmx_load_state(struct vcpu *v); extern void show_registers(struct pt_regs *regs); +#define show_execution_state show_registers extern int vmx_build_physmap_table(struct domain *d); extern unsigned long __gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); extern void sync_split_caches(void); diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-ia64/xenprocessor.h --- a/xen/include/asm-ia64/xenprocessor.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-ia64/xenprocessor.h Fri Jun 23 15:33:25 2006 -0600 @@ -237,4 +237,6 @@ typedef union { u64 itir; } ia64_itir_t; +#define dump_execution_state() printk("FIXME: implement ia64 dump_execution_state()\n"); + #endif // _ASM_IA64_XENPROCESSOR_H diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-x86/hvm/support.h Fri Jun 23 15:33:25 2006 -0600 @@ -132,7 +132,7 @@ extern unsigned int opt_hvm_debug_level; #define __hvm_bug(regs) \ do { \ printk("__hvm_bug at %s:%d\n", __FILE__, __LINE__); \ - show_registers(regs); \ + show_execution_state(regs); \ domain_crash_synchronous(); \ } while (0) diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-x86/mm.h Fri Jun 23 15:33:25 2006 -0600 @@ -103,13 +103,11 @@ struct page_info #define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift) #define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask) #define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift) -#define PGT_pae_idx_shift PGT_high_mfn_shift #else /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */ #define PGT_mfn_mask ((1U<<23)-1) /* NX for PAE xen is not supported yet */ #define PGT_high_mfn_nx (1ULL << 63) -#define PGT_pae_idx_shift 23 #endif #define PGT_score_shift 23 diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/processor.h --- a/xen/include/asm-x86/processor.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-x86/processor.h Fri Jun 23 15:33:25 2006 -0600 @@ -128,6 +128,13 @@ /* 'arch_vcpu' flags values */ #define _TF_kernel_mode 0 #define TF_kernel_mode (1<<_TF_kernel_mode) + +/* #PF error code values. */ +#define PGERR_page_present (1U<<0) +#define PGERR_write_access (1U<<1) +#define PGERR_user_mode (1U<<2) +#define PGERR_reserved_bit (1U<<3) +#define PGERR_instr_fetch (1U<<4) #ifndef __ASSEMBLY__ @@ -522,10 +529,16 @@ extern always_inline void prefetchw(cons #endif void show_stack(struct cpu_user_regs *regs); +void show_stack_overflow(unsigned long esp); void show_registers(struct cpu_user_regs *regs); +void show_execution_state(struct cpu_user_regs *regs); void show_page_walk(unsigned long addr); -int __spurious_page_fault(unsigned long addr); asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs); + +/* Dumps current register and stack state. */ +#define dump_execution_state() \ + /* NB. Needs interrupts enabled else we end up in fatal_trap(). */ \ + __asm__ __volatile__ ( "pushf ; sti ; ud2 ; .ascii \"dbg\" ; popf" ) extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-x86/shadow.h Fri Jun 23 15:33:25 2006 -0600 @@ -112,6 +112,30 @@ do { } while (0) #endif +#if CONFIG_PAGING_LEVELS >= 3 +static inline u64 get_cr3_idxval(struct vcpu *v) +{ + u64 pae_cr3; + + if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 && + !shadow_mode_log_dirty(v->domain) ) + { + pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */ + return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK; + } + else + return 0; +} + +#define shadow_key_t u64 +#define index_to_key(x) ((x) << 32) +#else +#define get_cr3_idxval(v) (0) +#define shadow_key_t unsigned long +#define index_to_key(x) (0) +#endif + + #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min)) #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1)) #define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16)) @@ -309,7 +333,7 @@ extern unsigned long get_mfn_from_gpfn_f struct shadow_status { struct shadow_status *next; /* Pull-to-front list per hash bucket. */ - unsigned long gpfn_and_flags; /* Guest pfn plus flags. */ + shadow_key_t gpfn_and_flags; /* Guest pfn plus flags. */ unsigned long smfn; /* Shadow mfn. */ }; @@ -1180,7 +1204,13 @@ static inline unsigned long __shadow_sta struct domain *d, unsigned long gpfn, unsigned long stype) { struct shadow_status *p, *x, *head; - unsigned long key = gpfn | stype; + shadow_key_t key; +#if CONFIG_PAGING_LEVELS >= 3 + if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype == PGT_l4_shadow ) + key = gpfn | stype | index_to_key(get_cr3_idxval(current)); + else +#endif + key = gpfn | stype; ASSERT(shadow_lock_is_acquired(d)); ASSERT(gpfn == (gpfn & PGT_mfn_mask)); @@ -1295,10 +1325,11 @@ shadow_max_pgtable_type(struct domain *d } static inline void delete_shadow_status( - struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype) + struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype, u64 index) { struct shadow_status *p, *x, *n, *head; - unsigned long key = gpfn | stype; + + shadow_key_t key = gpfn | stype | index_to_key(index); ASSERT(shadow_lock_is_acquired(d)); ASSERT(!(gpfn & ~PGT_mfn_mask)); @@ -1374,11 +1405,12 @@ static inline void delete_shadow_status( static inline void set_shadow_status( struct domain *d, unsigned long gpfn, unsigned long gmfn, - unsigned long smfn, unsigned long stype) + unsigned long smfn, unsigned long stype, u64 index) { struct shadow_status *x, *head, *extra; int i; - unsigned long key = gpfn | stype; + + shadow_key_t key = gpfn | stype | index_to_key(index); SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype); diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-x86/shadow_64.h Fri Jun 23 15:33:25 2006 -0600 @@ -36,9 +36,9 @@ */ extern struct shadow_ops MODE_64_2_HANDLER; extern struct shadow_ops MODE_64_3_HANDLER; +extern struct shadow_ops MODE_64_PAE_HANDLER; #if CONFIG_PAGING_LEVELS == 4 extern struct shadow_ops MODE_64_4_HANDLER; -extern struct shadow_ops MODE_64_PAE_HANDLER; #endif #if CONFIG_PAGING_LEVELS == 3 @@ -65,10 +65,6 @@ typedef struct { intpte_t l4; } l4_pgent #define ESH_LOG(_f, _a...) ((void)0) #endif -#define PAGING_L4 4UL -#define PAGING_L3 3UL -#define PAGING_L2 2UL -#define PAGING_L1 1UL #define L_MASK 0xff #define PAE_PAGING_LEVELS 3 @@ -108,18 +104,14 @@ typedef struct { intpte_t lo; } pgentry_ #define entry_has_changed(x,y,flags) \ ( !!(((x).lo ^ (y).lo) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) +/******************************************************************************/ +/* + * The macro and inlines are for 32-bit PAE guest + */ +#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */ + #define PAE_SHADOW_SELF_ENTRY 259 #define PAE_L3_PAGETABLE_ENTRIES 4 - -/******************************************************************************/ -/* - * The macro and inlines are for 32-bit PAE guest on 64-bit host - */ -#define PAE_CR3_ALIGN 5 -#define PAE_CR3_IDX_MASK 0x7f -#define PAE_CR3_IDX_NO 128 - -#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */ /******************************************************************************/ static inline int table_offset_64(unsigned long va, int level) @@ -186,19 +178,10 @@ static inline int guest_table_offset_64( } } -static inline unsigned long get_cr3_idxval(struct vcpu *v) -{ - unsigned long pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */ - - return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK; -} - - #define SH_GUEST_32PAE 1 #else #define guest_table_offset_64(va, level, index) \ table_offset_64((va),(level)) -#define get_cr3_idxval(v) 0 #define SH_GUEST_32PAE 0 #endif @@ -514,7 +497,10 @@ static inline void entry_general( l1_p =(pgentry_64_t *)map_domain_page(smfn); for (i = 0; i < L1_PAGETABLE_ENTRIES; i++) - entry_remove_flags(l1_p[i], _PAGE_RW); + { + if ( mfn_is_page_table(entry_get_pfn(l1_p[i])) ) + entry_remove_flags(l1_p[i], _PAGE_RW); + } unmap_domain_page(l1_p); } diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/asm-x86/shadow_ops.h --- a/xen/include/asm-x86/shadow_ops.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/asm-x86/shadow_ops.h Fri Jun 23 15:33:25 2006 -0600 @@ -21,6 +21,14 @@ #ifndef _XEN_SHADOW_OPS_H #define _XEN_SHADOW_OPS_H + +#define PAGING_L4 4UL +#define PAGING_L3 3UL +#define PAGING_L2 2UL +#define PAGING_L1 1UL + +#define PAE_CR3_ALIGN 5 +#define PAE_CR3_IDX_MASK 0x7f #if defined( GUEST_PGENTRY_32 ) diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/public/arch-x86_32.h Fri Jun 23 15:33:25 2006 -0600 @@ -74,16 +74,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); */ #ifdef CONFIG_X86_PAE #define __HYPERVISOR_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_START 0xF5800000 +#define __MACH2PHYS_VIRT_END 0xF6800000 #else #define __HYPERVISOR_VIRT_START 0xFC000000 +#define __MACH2PHYS_VIRT_START 0xFC000000 +#define __MACH2PHYS_VIRT_END 0xFC400000 #endif #ifndef HYPERVISOR_VIRT_START #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) #ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) #endif /* Maximum number of virtual CPUs in multi-processor guests. */ diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/public/arch-x86_64.h Fri Jun 23 15:33:25 2006 -0600 @@ -85,21 +85,25 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #define __HYPERVISOR_VIRT_START 0xFFFF800000000000 #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 #ifndef HYPERVISOR_VIRT_START #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) #define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) #endif +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 #ifndef __ASSEMBLY__ - -/* The machine->physical mapping table starts at this address, read-only. */ -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif /* * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/public/dom0_ops.h Fri Jun 23 15:33:25 2006 -0600 @@ -513,6 +513,27 @@ struct dom0_hypercall_init { }; typedef struct dom0_hypercall_init dom0_hypercall_init_t; DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t); + +#define DOM0_DOMAIN_SETUP 49 +#define _XEN_DOMAINSETUP_hvm_guest 0 +#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) +typedef struct dom0_domain_setup { + domid_t domain; /* domain to be affected */ + unsigned long flags; /* XEN_DOMAINSETUP_* */ +#ifdef __ia64__ + unsigned long bp; /* mpaddr of boot param area */ + unsigned long maxmem; /* Highest memory address for MDT. */ +#endif +} dom0_domain_setup_t; +DEFINE_XEN_GUEST_HANDLE(dom0_domain_setup_t); + +#define DOM0_SETTIMEOFFSET 50 +struct dom0_settimeoffset { + domid_t domain; + int32_t time_offset_seconds; /* applied to domain wallclock time */ +}; +typedef struct dom0_settimeoffset dom0_settimeoffset_t; +DEFINE_XEN_GUEST_HANDLE(dom0_settimeoffset_t); struct dom0_op { uint32_t cmd; @@ -555,6 +576,8 @@ struct dom0_op { struct dom0_irq_permission irq_permission; struct dom0_iomem_permission iomem_permission; struct dom0_hypercall_init hypercall_init; + struct dom0_domain_setup domain_setup; + struct dom0_settimeoffset settimeoffset; uint8_t pad[128]; } u; }; diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/public/memory.h --- a/xen/include/public/memory.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/public/memory.h Fri Jun 23 15:33:25 2006 -0600 @@ -141,6 +141,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); /* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + unsigned long v_start, v_end; /* Start and end virtual addresses. */ + unsigned long max_mfn; /* Maximum MFN that can be looked up. */ +}; +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); + +/* * Sets the GPFN at which a particular page appears in the specified guest's * pseudophysical address space. * arg == addr of xen_add_to_physmap_t. diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/console.h --- a/xen/include/xen/console.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/xen/console.h Fri Jun 23 15:33:25 2006 -0600 @@ -15,7 +15,7 @@ long read_console_ring(XEN_GUEST_HANDLE( long read_console_ring(XEN_GUEST_HANDLE(char), u32 *, int); void init_console(void); -void console_endboot(int disable_vga); +void console_endboot(void); void console_force_unlock(void); void console_force_lock(void); diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/lib.h --- a/xen/include/xen/lib.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/xen/lib.h Fri Jun 23 15:33:25 2006 -0600 @@ -82,6 +82,7 @@ unsigned long long parse_size_and_unit(c #define TAINT_UNSAFE_SMP (1<<0) #define TAINT_MACHINE_CHECK (1<<1) #define TAINT_BAD_PAGE (1<<2) +#define TAINT_SYNC_CONSOLE (1<<3) extern int tainted; #define TAINT_STRING_MAX_LEN 20 extern char *print_tainted(char *str); diff -r 59d4c1863330 -r fdf25330e4a6 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Fri Jun 23 15:26:01 2006 -0600 +++ b/xen/include/xen/sched.h Fri Jun 23 15:33:25 2006 -0600 @@ -159,6 +159,7 @@ struct domain /* OProfile support. */ struct xenoprof *xenoprof; + int32_t time_offset_seconds; }; struct domain_setup_info diff -r 59d4c1863330 -r fdf25330e4a6 patches/linux-2.6.16.13/ipv6-no-autoconf.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patches/linux-2.6.16.13/ipv6-no-autoconf.patch Fri Jun 23 15:33:25 2006 -0600 @@ -0,0 +1,23 @@ + net/ipv6/addrconf.c | 2 ++ + 1 files changed, 2 insertions(+) + +Index: build/net/ipv6/addrconf.c +=================================================================== +--- build.orig/net/ipv6/addrconf.c ++++ build/net/ipv6/addrconf.c +@@ -2462,6 +2462,7 @@ static void addrconf_dad_start(struct in + spin_lock_bh(&ifp->lock); + + if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || ++ !(dev->flags&IFF_MULTICAST) || + !(ifp->flags&IFA_F_TENTATIVE)) { + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); +@@ -2546,6 +2547,7 @@ static void addrconf_dad_completed(struc + if (ifp->idev->cnf.forwarding == 0 && + ifp->idev->cnf.rtr_solicits > 0 && + (dev->flags&IFF_LOOPBACK) == 0 && ++ (dev->flags & IFF_MULTICAST) && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + struct in6_addr all_routers; + diff -r 59d4c1863330 -r fdf25330e4a6 tools/ioemu/hw/piix4acpi.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/ioemu/hw/piix4acpi.c Fri Jun 23 15:33:25 2006 -0600 @@ -0,0 +1,481 @@ +/* + * PIIX4 ACPI controller emulation + * + * Winston liwen Wang, winston.l.wang@xxxxxxxxx + * Copyright (c) 2006 , Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "vl.h" +#define FREQUENCE_PMTIMER 3753425 +/* acpi register bit define here */ + +/* PM1_STS */ +#define TMROF_STS (1 << 0) +#define BM_STS (1 << 4) +#define GBL_STS (1 << 5) +#define PWRBTN_STS (1 << 8) +#define RTC_STS (1 << 10) +#define PRBTNOR_STS (1 << 11) +#define WAK_STS (1 << 15) +/* PM1_EN */ +#define TMROF_EN (1 << 0) +#define GBL_EN (1 << 5) +#define PWRBTN_EN (1 << 8) +#define RTC_EN (1 << 10) +/* PM1_CNT */ +#define SCI_EN (1 << 0) +#define GBL_RLS (1 << 2) +#define SLP_EN (1 << 13) + +/* Bits of PM1a register define here */ +#define SLP_TYP_MASK 0x1C00 +#define SLP_VAL 0x1C00 + +typedef struct AcpiDeviceState AcpiDeviceState; +AcpiDeviceState *acpi_device_table; + +/* Bits of PM1a register define here */ +typedef struct PMTState { + uint32_t count; + int irq; + uint64_t next_pm_time; + QEMUTimer *pm_timer; +}PMTState; + +typedef struct PM1Event_BLK { + uint16_t pm1_status; /* pm1a_EVT_BLK */ + uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */ +}PM1Event_BLK; + +typedef struct PCIAcpiState { + PCIDevice dev; + uint16_t irq; + uint16_t pm1_status; /* pm1a_EVT_BLK */ + uint16_t pm1_enable; /* pm1a_EVT_BLK+2 */ + uint16_t pm1_control; /* pm1a_ECNT_BLK */ + uint32_t pm1_timer; /* pmtmr_BLK */ +} PCIAcpiState; + +static PMTState *pmtimer_state; +static PCIAcpiState *acpi_state; + +static void pmtimer_save(QEMUFile *f, void *opaque) +{ + PMTState *s = opaque; + + qemu_put_be32s(f, &s->count); + qemu_put_be32s(f, &s->irq); + qemu_put_be64s(f, &s->next_pm_time); + qemu_put_timer(f, s->pm_timer); +} + +static int pmtimer_load(QEMUFile *f, void *opaque, int version_id) +{ + PMTState *s = opaque; + + if (version_id != 1) + return -EINVAL; + qemu_get_be32s(f, &s->count); + qemu_get_be32s(f, &s->irq); + qemu_get_be64s(f, &s->next_pm_time); + qemu_get_timer(f, s->pm_timer); + return 0; + +} + +static inline void acpi_set_irq(PCIAcpiState *s) +{ +/* no real SCI event need for now, so comment the following line out */ +/* pic_set_irq(s->irq, 1); */ + printf("acpi_set_irq: s->irq %x \n",s->irq); +} + +static void pm_timer_update(void *opaque) +{ + PMTState *s = opaque; + s->next_pm_time += muldiv64(1, ticks_per_sec,FREQUENCE_PMTIMER); + qemu_mod_timer(s->pm_timer, s->next_pm_time); + acpi_state->pm1_timer ++; + + /* If pm timer is zero then reset it to zero. */ + if (acpi_state->pm1_timer >= 0x1000000) { +/* printf("pm_timerupdate: timer overflow: %x \n", acpi_state->pm1_timer); */ + + acpi_state->pm1_timer = 0; + acpi_state->pm1_status = acpi_state->pm1_status | TMROF_STS; + /* If TMROF_EN is set then send the irq. */ + if ((acpi_state->pm1_enable & TMROF_EN) == TMROF_EN) { + acpi_set_irq(acpi_state); + acpi_state->pm1_enable = 0x00; /* only need one time...*/ + } + } + s->count = acpi_state->pm1_timer; +} + +static PMTState *pmtimer_init(void) +{ + PMTState *s; + + s = qemu_mallocz(sizeof(PMTState)); + if (!s) + return NULL; + + /* s->irq = irq; */ + + s->pm_timer = qemu_new_timer(vm_clock, pm_timer_update, s); + + s->count = 0; + s->next_pm_time = qemu_get_clock(vm_clock) + muldiv64(1, ticks_per_sec,FREQUENCE_PMTIMER) + 1; + qemu_mod_timer(s->pm_timer, s->next_pm_time); + + register_savevm("pm timer", 1, 1, pmtimer_save, pmtimer_load, s); + return s; +} + +static void acpi_reset(PCIAcpiState *s) +{ + uint8_t *pci_conf; + pci_conf = s->dev.config; + + pci_conf[0x42] = 0x00; + pci_conf[0x43] = 0x00; + s->irq = 9; + s->pm1_status = 0; + s->pm1_enable = 0x00; /* TMROF_EN should cleared */ + s->pm1_control = SCI_EN; /* SCI_EN */ + s->pm1_timer = 0; +} + +/*byte access */ +static void acpiPm1Status_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + if ((val&TMROF_STS)==TMROF_STS) + s->pm1_status = s->pm1_status&!TMROF_STS; + + if ((val&GBL_STS)==GBL_STS) + s->pm1_status = s->pm1_status&!GBL_STS; + +/* printf("acpiPm1Status_writeb \n addr %x val:%x pm1_status:%x \n", addr, val,s->pm1_status); */ +} + +static uint32_t acpiPm1Status_readb(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = s->pm1_status; +/* printf("acpiPm1Status_readb \n addr %x val:%x\n", addr, val); */ + + return val; +} + +static void acpiPm1StatusP1_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_status = (val<<8)||(s->pm1_status); +/* printf("acpiPm1StatusP1_writeb \n addr %x val:%x\n", addr, val); */ +} + +static uint32_t acpiPm1StatusP1_readb(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = (s->pm1_status)>>8; + printf("acpiPm1StatusP1_readb \n addr %x val:%x\n", addr, val); + + return val; +} + +static void acpiPm1Enable_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_enable = val; +/* printf("acpiPm1Enable_writeb \n addr %x val:%x\n", addr, val); */ +} + +static uint32_t acpiPm1Enable_readb(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = (s->pm1_enable)||0x1; +/* printf("acpiPm1Enable_readb \n addr %x val:%x\n", addr, val); */ + + return val; +} + +static void acpiPm1EnableP1_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_enable = (val<<8)||(s->pm1_enable); +/* printf("acpiPm1EnableP1_writeb \n addr %x val:%x\n", addr, val); */ + +} + +static uint32_t acpiPm1EnableP1_readb(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = (s->pm1_enable)>>8; +/* printf("acpiPm1EnableP1_readb \n addr %x val:%x\n", addr, val); */ + + return val; +} + +static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_control = val; +/* printf("acpiPm1Control_writeb \n addr %x val:%x\n", addr, val); */ + +} + +static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = s->pm1_control; +/* printf("acpiPm1Control_readb \n addr %x val:%x\n", addr, val); */ + + return val; +} + +static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_control = (val<<8)||(s->pm1_control); +/* printf("acpiPm1ControlP1_writeb \n addr %x val:%x\n", addr, val); */ + + // Check for power off request + + if (((val & SLP_EN) != 0) && + ((val & SLP_TYP_MASK) == SLP_VAL)) { + s->pm1_timer=0x0; //clear ACPI timer + qemu_system_shutdown_request(); + } +} + +static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = (s->pm1_control)>>8; +/* printf("acpiPm1ControlP1_readb \n addr %x val:%x\n", addr, val); */ + + return val; +} + + +/* word access */ + +static void acpiPm1Status_writew(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + if ((val&TMROF_STS)==TMROF_STS) + s->pm1_status = s->pm1_status&!TMROF_STS; + + if ((val&GBL_STS)==GBL_STS) + s->pm1_status = s->pm1_status&!GBL_STS; + +/* printf("acpiPm1Status_writew \n addr %x val:%x pm1_status:%x \n", addr, val,s->pm1_status); */ +} + +static uint32_t acpiPm1Status_readw(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = s->pm1_status; +/* printf("acpiPm1Status_readw \n addr %x val:%x\n", addr, val); */ + + return val; +} + +static void acpiPm1Enable_writew(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_enable = val; +/* printf("acpiPm1Enable_writew \n addr %x val:%x\n", addr, val); */ + +} + +static uint32_t acpiPm1Enable_readw(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = s->pm1_enable; +/* printf("acpiPm1Enable_readw \n addr %x val:%x\n", addr, val); */ + + return val; +} + +static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_control = val; +/* printf("acpiPm1Control_writew \n addr %x val:%x\n", addr, val); */ + + // Check for power off request + + if (((val & SLP_EN) != 0) && + ((val & SLP_TYP_MASK) == SLP_VAL)) { + qemu_system_shutdown_request(); + } + +} + +static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = s->pm1_control; +/* printf("acpiPm1Control_readw \n addr %x val:%x\n", addr, val); */ + + return val; +} + +/* dword access */ + +static void acpiPm1Event_writel(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_status = val; + s->pm1_enable = val>>16; +/* printf("acpiPm1Event_writel \n addr %x val:%x \n", addr, val); */ + +} + +static uint32_t acpiPm1Event_readl(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = s->pm1_status|(s->pm1_enable<<16); +/* printf("acpiPm1Event_readl \n addr %x val:%x\n", addr, val); */ + + return val; +} + +static void acpiPm1Timer_writel(void *opaque, uint32_t addr, uint32_t val) +{ + PCIAcpiState *s = opaque; + + s->pm1_timer = val; +/* printf("acpiPm1Timer_writel \n addr %x val:%x\n", addr, val); */ +} + +static uint32_t acpiPm1Timer_readl(void *opaque, uint32_t addr) +{ + PCIAcpiState *s = opaque; + uint32_t val; + + val = s->pm1_timer; +/* printf("acpiPm1Timer_readl \n addr %x val:%x\n", addr, val); */ + return val; +} + +static void acpi_map(PCIDevice *pci_dev, int region_num, + uint32_t addr, uint32_t size, int type) +{ + PCIAcpiState *d = (PCIAcpiState *)pci_dev; + + printf("register acpi io\n"); + + /* Byte access */ + register_ioport_write(addr, 1, 1, acpiPm1Status_writeb, d); + register_ioport_read(addr, 1, 1, acpiPm1Status_readb, d); + register_ioport_write(addr+1, 1, 1, acpiPm1StatusP1_writeb, d); + register_ioport_read(addr+1, 1, 1, acpiPm1StatusP1_readb, d); + + register_ioport_write(addr + 2, 1, 1, acpiPm1Enable_writeb, d); + register_ioport_read(addr + 2, 1, 1, acpiPm1Enable_readb, d); + register_ioport_write(addr + 2 +1, 1, 1, acpiPm1EnableP1_writeb, d); + register_ioport_read(addr + 2 +1, 1, 1, acpiPm1EnableP1_readb, d); + + register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d); + register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d); + register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d); + register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d); + + /* Word access */ + register_ioport_write(addr, 2, 2, acpiPm1Status_writew, d); + register_ioport_read(addr, 2, 2, acpiPm1Status_readw, d); + + register_ioport_write(addr + 2, 2, 2, acpiPm1Enable_writew, d); + register_ioport_read(addr + 2, 2, 2, acpiPm1Enable_readw, d); + + register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d); + register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d); + + /* DWord access */ + register_ioport_write(addr, 4, 4, acpiPm1Event_writel, d); + register_ioport_read(addr, 4, 4, acpiPm1Event_readl, d); + + register_ioport_write(addr + 8, 4, 4, acpiPm1Timer_writel, d); + register_ioport_read(addr + 8, 4, 4, acpiPm1Timer_readl, d); +} + +/* PIIX4 acpi pci configuration space, func 3 */ +void pci_piix4_acpi_init(PCIBus *bus) +{ + PCIAcpiState *d; + uint8_t *pci_conf; + + /* register a function 3 of PIIX4 */ + d = (PCIAcpiState *)pci_register_device( + bus, "PIIX4 ACPI", sizeof(PCIAcpiState), + ((PCIDevice *)piix3_state)->devfn + 3, NULL, NULL); + + acpi_state = d; + pci_conf = d->dev.config; + pci_conf[0x00] = 0x86; /* Intel */ + pci_conf[0x01] = 0x80; + pci_conf[0x02] = 0x13; + pci_conf[0x03] = 0x71; + pci_conf[0x08] = 0x01; /* B0 stepping */ + pci_conf[0x09] = 0x00; /* base class */ + pci_conf[0x0a] = 0x80; /* Sub class */ + pci_conf[0x0b] = 0x06; + pci_conf[0x0e] = 0x00; + pci_conf[0x3d] = 0x01; /* Hardwired to PIRQA is used */ + + pci_register_io_region((PCIDevice *)d, 4, 0x10, + PCI_ADDRESS_SPACE_IO, acpi_map); + pmtimer_state = pmtimer_init(); + acpi_reset (d); +} diff -r 59d4c1863330 -r fdf25330e4a6 tools/python/xen/util/SSHTransport.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/python/xen/util/SSHTransport.py Fri Jun 23 15:33:25 2006 -0600 @@ -0,0 +1,102 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (C) 2006 Anthony Liguori <aliguori@xxxxxxxxxx> +# Copyright (C) 2006 XenSource Inc. +#============================================================================ + +""" +XML-RPC SSH transport. +""" + +from xmlrpclib import getparser, Fault +from subprocess import Popen, PIPE +from getpass import getuser +from fcntl import ioctl +import errno +import os +import termios + + +def getHTTPURI(uri): + (protocol, rest) = uri.split(':', 1) + if not rest.startswith('//'): + raise ValueError("Invalid ssh URL '%s'" % uri) + rest = rest[2:] + user = getuser() + path = 'RPC2' + if rest.find('@') != -1: + (user, rest) = rest.split('@', 1) + if rest.find('/') != -1: + (host, rest) = rest.split('/', 1) + if len(rest) > 0: + path = rest + else: + host = rest + transport = SSHTransport(host, user) + uri = 'http://%s/%s' % (host, path) + return transport, uri + + +class SSHTransport(object): + def __init__(self, host, user, askpass=None): + self.host = host + self.user = user + self.askpass = askpass + self.ssh = None + + def getssh(self): + if self.ssh == None: + if self.askpass: + f = open('/dev/tty', 'w') + try: + os.environ['SSH_ASKPASS'] = self.askpass + ioctl(f.fileno(), termios.TIOCNOTTY) + finally: + f.close() + + cmd = ['ssh', '%s@%s' % (self.user, self.host), 'xm serve'] + try: + self.ssh = Popen(cmd, bufsize=0, stdin=PIPE, stdout=PIPE) + except OSError, (err, msg): + if err == errno.ENOENT: + raise Fault(0, "ssh executable not found!") + raise + return self.ssh + + def request(self, host, handler, request_body, verbose=0): + p, u = getparser() + ssh = self.getssh() + ssh.stdin.write("""POST /%s HTTP/1.1 +User-Agent: Xen +Host: %s +Content-Type: text/xml +Content-Length: %d + +%s""" % (handler, host, len(request_body), request_body)) + ssh.stdin.flush() + + content_length = 0 + line = ssh.stdout.readline() + if line.split()[1] != '200': + raise Fault(0, 'Server returned %s' % (' '.join(line[1:]))) + + while line not in ['', '\r\n', '\n']: + if line.lower().startswith('content-length:'): + content_length = int(line[15:].strip()) + line = ssh.stdout.readline() + content = ssh.stdout.read(content_length) + p.feed(content) + p.close() + return u.close() diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/Makefile --- a/tools/blktap/Makefile Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -MAJOR = 3.0 -MINOR = 0 -SONAME = libblktap.so.$(MAJOR) - -XEN_ROOT = ../.. -include $(XEN_ROOT)/tools/Rules.mk - -SUBDIRS := -SUBDIRS += ublkback -#SUBDIRS += parallax - -BLKTAP_INSTALL_DIR = /usr/sbin - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 - -INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE) - -LIBS := -lpthread -lz - -SRCS := -SRCS += blktaplib.c xenbus.c blkif.c - -CFLAGS += -Werror -CFLAGS += -Wno-unused -CFLAGS += -fno-strict-aliasing -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -# get asprintf(): -CFLAGS += -D _GNU_SOURCE -# Get gcc to generate the dependencies for us. -CFLAGS += -Wp,-MD,.$(@F).d -CFLAGS += $(INCLUDES) -DEPS = .*.d - -OBJS = $(patsubst %.c,%.o,$(SRCS)) -IBINS := -#IBINS += blkdump - -LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) - -.PHONY: all -all: mk-symlinks libblktap.so #blkdump - @set -e; for subdir in $(SUBDIRS); do \ - $(MAKE) -C $$subdir $@; \ - done - -.PHONY: install -install: all - $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) - $(INSTALL_DIR) -p $(DESTDIR)/usr/include - $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR) - $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include - #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR) - @set -e; for subdir in $(SUBDIRS); do \ - $(MAKE) -C $$subdir $@; \ - done - -.PHONY: clean -clean: - rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump - @set -e; for subdir in $(SUBDIRS); do \ - $(MAKE) -C $$subdir $@; \ - done - -.PHONY: rpm -rpm: all - rm -rf staging - mkdir staging - mkdir staging/i386 - rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \ - --define "_rpmdir$$PWD/staging" -bb rpm.spec - mv staging/i386/*.rpm . - rm -rf staging - -libblktap.so: $(OBJS) - $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \ - -L$(XEN_XENSTORE) -l xenstore \ - -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) - ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) - ln -sf libblktap.so.$(MAJOR) $@ - -blkdump: libblktap.so - $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \ - -l blktap blkdump.c - -.PHONY: TAGS clean install mk-symlinks rpm - -.PHONY: TAGS -TAGS: - etags -t $(SRCS) *.h - --include $(DEPS) - diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/README --- a/tools/blktap/README Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,149 +0,0 @@ -Block Tap User-level Interfaces -Andrew Warfield -andrew.warfield@xxxxxxxxxxxx -February 8, 2005 - -NOTE #1: The blktap is _experimental_ code. It works for me. Your -mileage may vary. Don't use it for anything important. Please. ;) - -NOTE #2: All of the interfaces here are likely to change. This is all -early code, and I am checking it in because others want to play with -it. If you use it for anything, please let me know! - -Overview: ---------- - -This directory contains a library and set of example applications for -the block tap device. The block tap hooks into the split block device -interfaces above Xen allowing them to be extended. This extension can -be done in userspace with the help of a library. - -The tap can be installed either as an interposition domain in between -a frontend and backend driver pair, or as a terminating backend, in -which case it is responsible for serving all requests itself. - -There are two reasons that you might want to use the tap, -corresponding to these configurations: - - 1. To examine or modify a stream of block requests while they are - in-flight (e.g. to encrypt data, or add data-driven watchpoints) - - 2. To prototype a new backend driver, serving requests from the tap - rather than passing them along to the XenLinux blkback driver. - (e.g. to forward block requests to a remote host) - - -Interface: ----------- - -At the moment, the tap interface is similar in spirit to that of the -Linux netfilter. Requests are messages from a client (frontend) -domain to a disk (backend) domain. Responses are messages travelling -back, acknowledging the completion of a request. the library allows -chains of functions to be attached to these events. In addition, -hooks may be attached to handle control messages, which signify things -like connections from new domains. - -At present the control messages especially expose a lot of the -underlying driver interfaces. This may change in the future in order -to simplify writing hooks. - -Here are the public interfaces: - -These allow hook functions to be chained: - - void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)); - void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)); - void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)); - -This allows a response to be injected, in the case where a request has -been removed using BLKTAP_STOLEN. - - void blktap_inject_response(blkif_response_t *); - -These let you add file descriptors and handlers to the main poll loop: - - int blktap_attach_poll(int fd, short events, int (*func)(int)); - void blktap_detach_poll(int fd); - -This starts the main poll loop: - - int blktap_listen(void); - -Example: --------- - -blkimage.c uses an image on the local file system to serve requests to -a domain. Here's what it looks like: - ----[blkimg.c]--- - -/* blkimg.c - * - * file-backed disk. - */ - -#include "blktaplib.h" -#include "blkimglib.h" - - -int main(int argc, char *argv[]) -{ - image_init(); - - blktap_register_ctrl_hook("image_control", image_control); - blktap_register_request_hook("image_request", image_request); - blktap_listen(); - - return 0; -} - ----------------- - -All of the real work is in blkimglib.c, but this illustrates the -actual tap interface well enough. image_control() will be called with -all control messages. image_request() handles requests. As it reads -from an on-disk image file, no requests are ever passed on to a -backend, and so there will be no responses to process -- so there is -nothing registered as a response hook. - -Other examples: ---------------- - -Here is a list of other examples in the directory: - -Things that terminate a block request stream: - - blkimg - Use a image file/device to serve requests - blkgnbd - Use a remote gnbd server to serve requests - blkaio - Use libaio... (DOES NOT WORK) - -Things that don't: - - blkdump - Print in-flight requests. - blkcow - Really inefficient copy-on-write disks using libdb to store - writes. - -There are examples of plugging these things together, for instance -blkcowgnbd is a read-only gnbd device with copy-on-write to a local -file. - -TODO: ------ - -- Make session tracking work. At the moment these generally just handle a - single front-end client at a time. - -- Integrate with Xend. Need to cleanly pass a image identifier in the connect - message. - -- Make an asynchronous file-io terminator. The libaio attempt is - tragically stalled because mapped foreign pages make pfn_valid fail - (they are VM_IO), and so cannot be passed to aio as targets. A - better solution may be to tear the disk interfaces out of the real - backend and expose them somehow. - -- Make CoW suck less. - -- Do something more along the lines of dynamic linking for the - plugins, so thatthey don't all need a new main(). diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/README.sept05 --- a/tools/blktap/README.sept05 Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -The blktap has been rewritten substantially based on the current -blkback driver. I've removed passthrough support, as this is broken -by the move to grant tables and the lack of transitive grants. A -blktap VM is now only capable of terminating block requests in -userspace. - -ublkback/ contains a _very_ initial cut at a user-level version of the block -backend driver. It gives a working example of how the current tap -interfaces are used, in particular w.r.t. the vbd directories in -xenstore. - -parallax/ contains fairly recent parallax code. This does not run on -the changed blktap interface, but should only be a couple of hours -work to get going again. - -All of the tricky bits are done, but there is plenty of cleaning to -do, and the top-level functionality is not here yet. At the moment, -the daemon ignores the pdev requested by the tools and opens the file -or device specified by TMP_IMAGE_FILE_NAME in ublkback.c. - -TODO: -1. Fix to allow pdev in the store to specify the device to open. -2. Add support (to tools as well) to mount arbitrary files... - just write the filename to mount into the store, instead of pdev. -3. Reeximine blkif refcounting, it is almost certainly broken at the moment. - - creating a blkif should take a reference. - - each inflight request should take a reference on dequeue in blktaplib - - sending responses should drop refs. - - blkif should be implicitly freed when refcounts fall to 0. -4. Modify the parallax req/rsp code as per ublkback to use the new tap - interfaces. -5. Write a front end that allows parallax and normal mounts to coexist -6. Allow blkback and blktap to run at the same time. diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blkdump.c --- a/tools/blktap/blkdump.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -/* blkdump.c - * - * show a running trace of block requests as they fly by. - * - * (c) 2004 Andrew Warfield. - */ - -#include <stdio.h> -#include "blktaplib.h" - -int request_print(blkif_request_t *req) -{ - int i; - - if ( (req->operation == BLKIF_OP_READ) || - (req->operation == BLKIF_OP_WRITE) ) - { - printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", - ID_TO_DOM(req->id), ID_TO_IDX(req->id), - blkif_op_name[req->operation], - req->nr_segments, req->handle, - req->sector_number); - - - for (i=0; i < req->nr_segments; i++) { - printf(" (gref: 0x%8x start: %u stop: %u)\n", - req->seg[i].gref, - req->seg[i].first_sect, - req->seg[i].last_sect); - } - - } else { - printf("Unknown request message type.\n"); - } - - return BLKTAP_PASS; -} - -int response_print(blkif_response_t *rsp) -{ - if ( (rsp->operation == BLKIF_OP_READ) || - (rsp->operation == BLKIF_OP_WRITE) ) - { - printf("[%2u:%2u>%5s] (status: %d)\n", - ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), - blkif_op_name[rsp->operation], - rsp->status); - - } else { - printf("Unknown request message type.\n"); - } - return BLKTAP_PASS; -} - -int main(int argc, char *argv[]) -{ - blktap_register_request_hook("request_print", request_print); - blktap_register_response_hook("response_print", response_print); - blktap_listen(); - - return 0; -} diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blkif.c --- a/tools/blktap/blkif.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,212 +0,0 @@ -/* - * blkif.c - * - * The blkif interface for blktap. A blkif describes an in-use virtual disk. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> -#include <err.h> - -#include "blktaplib.h" - -#if 1 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) -#else -#define DPRINTF(_f, _a...) ((void)0) -#endif - -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) - -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) -{ - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - ((blkif->domid != domid) || (blkif->handle != handle)) ) - blkif = blkif->hash_next; - return blkif; -} - -blkif_t *alloc_blkif(domid_t domid) -{ - blkif_t *blkif; - - blkif = (blkif_t *)malloc(sizeof(blkif_t)); - if (!blkif) - return NULL; - - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - - return blkif; -} - -static int (*new_blkif_hook)(blkif_t *blkif) = NULL; -void register_new_blkif_hook(int (*fn)(blkif_t *blkif)) -{ - new_blkif_hook = fn; -} - -int blkif_init(blkif_t *blkif, long int handle, long int pdev, - long int readonly) -{ - domid_t domid; - blkif_t **pblkif; - - if (blkif == NULL) - return -EINVAL; - - domid = blkif->domid; - blkif->handle = handle; - blkif->pdev = pdev; - blkif->readonly = readonly; - - /* - * Call out to the new_blkif_hook. The tap application should define this, - * and it should return having set blkif->ops - * - */ - if (new_blkif_hook == NULL) - { - warn("Probe detected a new blkif, but no new_blkif_hook!"); - return -1; - } - new_blkif_hook(blkif); - - /* Now wire it in. */ - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif != NULL ) - { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - { - DPRINTF("Could not create blkif: already exists\n"); - return -1; - } - pblkif = &(*pblkif)->hash_next; - } - blkif->hash_next = NULL; - *pblkif = blkif; - - return 0; -} - -void free_blkif(blkif_t *blkif) -{ - blkif_t **pblkif, *curs; - - pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)]; - while ( (curs = *pblkif) != NULL ) - { - if ( blkif == curs ) - { - *pblkif = curs->hash_next; - } - pblkif = &curs->hash_next; - } - free(blkif); -} - -void blkif_register_request_hook(blkif_t *blkif, char *name, - int (*rh)(blkif_t *, blkif_request_t *, int)) -{ - request_hook_t *rh_ent, **c; - - rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t)); - if (!rh_ent) - { - warn("couldn't allocate a new hook"); - return; - } - - rh_ent->func = rh; - rh_ent->next = NULL; - if (asprintf(&rh_ent->name, "%s", name) == -1) - { - free(rh_ent); - warn("couldn't allocate a new hook name"); - return; - } - - c = &blkif->request_hook_chain; - while (*c != NULL) { - c = &(*c)->next; - } - *c = rh_ent; -} - -void blkif_register_response_hook(blkif_t *blkif, char *name, - int (*rh)(blkif_t *, blkif_response_t *, int)) -{ - response_hook_t *rh_ent, **c; - - rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t)); - if (!rh_ent) - { - warn("couldn't allocate a new hook"); - return; - } - - rh_ent->func = rh; - rh_ent->next = NULL; - if (asprintf(&rh_ent->name, "%s", name) == -1) - { - free(rh_ent); - warn("couldn't allocate a new hook name"); - return; - } - - c = &blkif->response_hook_chain; - while (*c != NULL) { - c = &(*c)->next; - } - *c = rh_ent; -} - -void blkif_print_hooks(blkif_t *blkif) -{ - request_hook_t *req_hook; - response_hook_t *rsp_hook; - - DPRINTF("Request Hooks:\n"); - req_hook = blkif->request_hook_chain; - while (req_hook != NULL) - { - DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name); - req_hook = req_hook->next; - } - - DPRINTF("Response Hooks:\n"); - rsp_hook = blkif->response_hook_chain; - while (rsp_hook != NULL) - { - DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name); - rsp_hook = rsp_hook->next; - } -} - - -long int vbd_size(blkif_t *blkif) -{ - return 1000000000; -} - -long int vbd_secsize(blkif_t *blkif) -{ - return 512; -} - -unsigned vbd_info(blkif_t *blkif) -{ - return 0; -} - - -void __init_blkif(void) -{ - memset(blkif_hash, 0, sizeof(blkif_hash)); -} diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blktaplib.c --- a/tools/blktap/blktaplib.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,453 +0,0 @@ -/* - * blktaplib.c - * - * userspace interface routines for the blktap driver. - * - * (threadsafe(r) version) - * - * (c) 2004 Andrew Warfield. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <sys/user.h> -#include <err.h> -#include <errno.h> -#include <sys/types.h> -#include <linux/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <signal.h> -#include <sys/poll.h> -#include <sys/ioctl.h> -#include <string.h> -#include <unistd.h> -#include <pthread.h> -#include <xs.h> - -#define __COMPILING_BLKTAP_LIB -#include "blktaplib.h" - -#if 0 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) -#else -#define DPRINTF(_f, _a...) ((void)0) -#endif -#define DEBUG_RING_IDXS 0 - -#define POLLRDNORM 0x040 - -#define BLKTAP_IOCTL_KICK 1 - - -void got_sig_bus(); -void got_sig_int(); - -/* in kernel these are opposite, but we are a consumer now. */ -blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */ -blkif_front_ring_t be_ring; - -unsigned long mmap_vstart = 0; -char *blktap_mem; -int fd = 0; - -#define BLKTAP_RING_PAGES 1 /* Front */ -#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES) - -int bad_count = 0; -void bad(void) -{ - bad_count ++; - if (bad_count > 50) exit(0); -} -/*-----[ ID Manipulation from tap driver code ]--------------------------*/ - -#define ACTIVE_RING_IDX unsigned short - -inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) -{ - return ( (fe_dom << 16) | idx ); -} - -inline unsigned int ID_TO_IDX(unsigned long id) -{ - return ( id & 0x0000ffff ); -} - -inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); } - -static int (*request_hook)(blkif_request_t *req) = NULL; -static int (*response_hook)(blkif_response_t *req) = NULL; - -/*-----[ Data to/from Backend (server) VM ]------------------------------*/ - -/* - -inline int write_req_to_be_ring(blkif_request_t *req) -{ - blkif_request_t *req_d; - static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER; - - pthread_mutex_lock(&be_prod_mutex); - req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt); - memcpy(req_d, req, sizeof(blkif_request_t)); - wmb(); - be_ring.req_prod_pvt++; - pthread_mutex_unlock(&be_prod_mutex); - - return 0; -} -*/ - -inline int write_rsp_to_fe_ring(blkif_response_t *rsp) -{ - blkif_response_t *rsp_d; - static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER; - - pthread_mutex_lock(&fe_prod_mutex); - rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt); - memcpy(rsp_d, rsp, sizeof(blkif_response_t)); - wmb(); - fe_ring.rsp_prod_pvt++; - pthread_mutex_unlock(&fe_prod_mutex); - - return 0; -} - -static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp) -{ - response_hook_t *rsp_hook; - - rsp_hook = blkif->response_hook_chain; - while (rsp_hook != NULL) - { - switch(rsp_hook->func(blkif, rsp, 1)) - { - case BLKTAP_PASS: - break; - default: - printf("Only PASS is supported for resp hooks!\n"); - } - rsp_hook = rsp_hook->next; - } -} - - -static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER; - -void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp) -{ - - apply_rsp_hooks(blkif, rsp); - - write_rsp_to_fe_ring(rsp); -} - -void blktap_kick_responses(void) -{ - pthread_mutex_lock(&push_mutex); - - RING_PUSH_RESPONSES(&fe_ring); - ioctl(fd, BLKTAP_IOCTL_KICK_FE); - - pthread_mutex_unlock(&push_mutex); -} - -/*-----[ Polling fd listeners ]------------------------------------------*/ - -#define MAX_POLLFDS 64 - -typedef struct { - int (*func)(int fd); - struct pollfd *pfd; - int fd; - short events; - int active; -} pollhook_t; - -static struct pollfd pfd[MAX_POLLFDS+2]; /* tap and store are extra */ -static pollhook_t pollhooks[MAX_POLLFDS]; -static unsigned int ph_freelist[MAX_POLLFDS]; -static unsigned int ph_cons, ph_prod; -#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons)) -#define PH_IDX(x) (x % MAX_POLLFDS) - -int blktap_attach_poll(int fd, short events, int (*func)(int fd)) -{ - pollhook_t *ph; - - if (nr_pollhooks() == MAX_POLLFDS) { - printf("Too many pollhooks!\n"); - return -1; - } - - ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]]; - - ph->func = func; - ph->fd = fd; - ph->events = events; - ph->active = 1; - - DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, - nr_pollhooks()); - - return 0; -} - -void blktap_detach_poll(int fd) -{ - int i; - - for (i=0; i<MAX_POLLFDS; i++) - if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) { - ph_freelist[PH_IDX(ph_prod++)] = i; - pollhooks[i].pfd->fd = -1; - pollhooks[i].active = 0; - break; - } - - DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, - nr_pollhooks()); -} - -void pollhook_init(void) -{ - int i; - - for (i=0; i < MAX_POLLFDS; i++) { - ph_freelist[i] = (i+1) % MAX_POLLFDS; - pollhooks[i].active = 0; - } - - ph_cons = 0; - ph_prod = MAX_POLLFDS; -} - -void __attribute__ ((constructor)) blktaplib_init(void) -{ - pollhook_init(); -} - -/*-----[ The main listen loop ]------------------------------------------*/ - -int blktap_listen(void) -{ - int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret; - struct xs_handle *h; - blkif_t *blkif; - - /* comms rings: */ - blkif_request_t *req; - blkif_response_t *rsp; - blkif_sring_t *sring; - RING_IDX rp, i, pfd_count; - - /* pending rings */ - blkif_request_t req_pending[BLK_RING_SIZE]; - /* blkif_response_t rsp_pending[BLK_RING_SIZE] */; - - /* handler hooks: */ - request_hook_t *req_hook; - response_hook_t *rsp_hook; - - signal (SIGBUS, got_sig_bus); - signal (SIGINT, got_sig_int); - - __init_blkif(); - - fd = open("/dev/blktap", O_RDWR); - if (fd == -1) - err(-1, "open failed!"); - - blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if ((int)blktap_mem == -1) - err(-1, "mmap failed!"); - - /* assign the rings to the mapped memory */ -/* - sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE); - FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE); -*/ - sring = (blkif_sring_t *)((unsigned long)blktap_mem); - BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE); - - mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT); - - - /* Set up store connection and watch. */ - h = xs_daemon_open(); - if (h == NULL) - err(-1, "xs_daemon_open"); - - ret = add_blockdevice_probe_watch(h, "Domain-0"); - if (ret != 0) - err(0, "adding device probewatch"); - - ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); - - while(1) { - int ret; - - /* build the poll list */ - pfd_count = 0; - for ( i=0; i < MAX_POLLFDS; i++ ) { - pollhook_t *ph = &pollhooks[i]; - - if (ph->active) { - pfd[pfd_count].fd = ph->fd; - pfd[pfd_count].events = ph->events; - ph->pfd = &pfd[pfd_count]; - pfd_count++; - } - } - - tap_pfd = pfd_count++; - pfd[tap_pfd].fd = fd; - pfd[tap_pfd].events = POLLIN; - - store_pfd = pfd_count++; - pfd[store_pfd].fd = xs_fileno(h); - pfd[store_pfd].events = POLLIN; - - if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) { - if (DEBUG_RING_IDXS) - ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS); - continue; - } - - for (i=0; i < MAX_POLLFDS; i++) { - if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) ) - pollhooks[i].func(pollhooks[i].pfd->fd); - } - - if (pfd[store_pfd].revents) { - ret = xs_fire_next_watch(h); - } - - if (pfd[tap_pfd].revents) - { - /* empty the fe_ring */ - notify_fe = 0; - notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring); - rp = fe_ring.sring->req_prod; - rmb(); - for (i = fe_ring.req_cons; i != rp; i++) - { - int done = 0; - - req = RING_GET_REQUEST(&fe_ring, i); - memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req)); - req = &req_pending[ID_TO_IDX(req->id)]; - - blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle); - - if (blkif != NULL) - { - req_hook = blkif->request_hook_chain; - while (req_hook != NULL) - { - switch(req_hook->func(blkif, req, ((i+1) == rp))) - { - case BLKTAP_RESPOND: - apply_rsp_hooks(blkif, (blkif_response_t *)req); - write_rsp_to_fe_ring((blkif_response_t *)req); - notify_fe = 1; - done = 1; - break; - case BLKTAP_STOLEN: - done = 1; - break; - case BLKTAP_PASS: - break; - default: - printf("Unknown request hook return value!\n"); - } - if (done) break; - req_hook = req_hook->next; - } - } - - if (done == 0) - { - /* this was: */ - /* write_req_to_be_ring(req); */ - - unsigned long id = req->id; - unsigned short operation = req->operation; - printf("Unterminated request!\n"); - rsp = (blkif_response_t *)req; - rsp->id = id; - rsp->operation = operation; - rsp->status = BLKIF_RSP_ERROR; - write_rsp_to_fe_ring(rsp); - notify_fe = 1; - done = 1; - } - - } - fe_ring.req_cons = i; - - /* empty the be_ring */ -/* - notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring); - rp = be_ring.sring->rsp_prod; - rmb(); - for (i = be_ring.rsp_cons; i != rp; i++) - { - - rsp = RING_GET_RESPONSE(&be_ring, i); - memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp)); - rsp = &rsp_pending[ID_TO_IDX(rsp->id)]; - - DPRINTF("copying a be request\n"); - - apply_rsp_hooks(rsp); - write_rsp_to_fe_ring(rsp); - } - be_ring.rsp_cons = i; -*/ - /* notify the domains */ -/* - if (notify_be) { - DPRINTF("notifying be\n"); -pthread_mutex_lock(&push_mutex); - RING_PUSH_REQUESTS(&be_ring); - ioctl(fd, BLKTAP_IOCTL_KICK_BE); -pthread_mutex_unlock(&push_mutex); - } -*/ - if (notify_fe) { - DPRINTF("notifying fe\n"); - pthread_mutex_lock(&push_mutex); - RING_PUSH_RESPONSES(&fe_ring); - ioctl(fd, BLKTAP_IOCTL_KICK_FE); - pthread_mutex_unlock(&push_mutex); - } - } - } - - - munmap(blktap_mem, PAGE_SIZE); - - mmap_failed: - close(fd); - - open_failed: - return 0; -} - -void got_sig_bus() { - printf("Attempted to access a page that isn't.\n"); - exit(-1); -} - -void got_sig_int() { - DPRINTF("quitting -- returning to passthrough mode.\n"); - if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH ); - close(fd); - fd = 0; - exit(0); -} diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/blktaplib.h --- a/tools/blktap/blktaplib.h Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,171 +0,0 @@ -/* blktaplib.h - * - * userland accessors to the block tap. - * - * Sept 2/05 -- I'm scaling this back to only support block remappings - * to user in a backend domain. Passthrough and interposition can be readded - * once transitive grants are available. - */ - -#ifndef __BLKTAPLIB_H__ -#define __BLKTAPLIB_H__ - -#include <xenctrl.h> -#include <sys/user.h> -#include <xen/xen.h> -#include <xen/io/blkif.h> -#include <xen/io/ring.h> -#include <xen/io/domain_controller.h> -#include <xs.h> - -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) - -/* /dev/xen/blktap resides at device number major=10, minor=202 */ -#define BLKTAP_MINOR 202 - -/* size of the extra VMA area to map in attached pages. */ -#define BLKTAP_VMA_PAGES BLK_RING_SIZE - -/* blktap IOCTLs: */ -#define BLKTAP_IOCTL_KICK_FE 1 -#define BLKTAP_IOCTL_KICK_BE 2 -#define BLKTAP_IOCTL_SETMODE 3 -#define BLKTAP_IOCTL_PRINT_IDXS 100 - -/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ -#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ -#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 -#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 -#define BLKTAP_MODE_COPY_FE 0x00000004 -#define BLKTAP_MODE_COPY_BE 0x00000008 -#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 -#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 - -#define BLKTAP_MODE_INTERPOSE \ - (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) - -#define BLKTAP_MODE_COPY_BOTH \ - (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) - -#define BLKTAP_MODE_COPY_BOTH_PAGES \ - (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) - -static inline int BLKTAP_MODE_VALID(unsigned long arg) -{ - return ( - ( arg == BLKTAP_MODE_PASSTHROUGH ) || - ( arg == BLKTAP_MODE_INTERCEPT_FE ) || - ( arg == BLKTAP_MODE_INTERPOSE ) ); -/* - return ( - ( arg == BLKTAP_MODE_PASSTHROUGH ) || - ( arg == BLKTAP_MODE_INTERCEPT_FE ) || - ( arg == BLKTAP_MODE_INTERCEPT_BE ) || - ( arg == BLKTAP_MODE_INTERPOSE ) || - ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || - ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || - ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) - ); -*/ -} - -/* Return values for handling messages in hooks. */ -#define BLKTAP_PASS 0 /* Keep passing this request as normal. */ -#define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */ -#define BLKTAP_STOLEN 2 /* Hook has stolen request. */ - -//#define domid_t unsigned short - -inline unsigned int ID_TO_IDX(unsigned long id); -inline domid_t ID_TO_DOM(unsigned long id); - -int blktap_attach_poll(int fd, short events, int (*func)(int)); -void blktap_detach_poll(int fd); -int blktap_listen(void); - -struct blkif; - -typedef struct request_hook_st { - char *name; - int (*func)(struct blkif *, blkif_request_t *, int); - struct request_hook_st *next; -} request_hook_t; - -typedef struct response_hook_st { - char *name; - int (*func)(struct blkif *, blkif_response_t *, int); - struct response_hook_st *next; -} response_hook_t; - -struct blkif_ops { - long int (*get_size)(struct blkif *blkif); - long int (*get_secsize)(struct blkif *blkif); - unsigned (*get_info)(struct blkif *blkif); -}; - -typedef struct blkif { - domid_t domid; - long int handle; - - long int pdev; - long int readonly; - - enum { DISCONNECTED, CONNECTED } state; - - struct blkif_ops *ops; - request_hook_t *request_hook_chain; - response_hook_t *response_hook_chain; - - struct blkif *hash_next; - - void *prv; /* device-specific data */ -} blkif_t; - -void register_new_blkif_hook(int (*fn)(blkif_t *blkif)); -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); -blkif_t *alloc_blkif(domid_t domid); -int blkif_init(blkif_t *blkif, long int handle, long int pdev, - long int readonly); -void free_blkif(blkif_t *blkif); -void __init_blkif(void); - - -/* xenstore/xenbus: */ -extern int add_blockdevice_probe_watch(struct xs_handle *h, - const char *domname); -int xs_fire_next_watch(struct xs_handle *h); - - -void blkif_print_hooks(blkif_t *blkif); -void blkif_register_request_hook(blkif_t *blkif, char *name, - int (*rh)(blkif_t *, blkif_request_t *, int)); -void blkif_register_response_hook(blkif_t *blkif, char *name, - int (*rh)(blkif_t *, blkif_response_t *, int)); -void blkif_inject_response(blkif_t *blkif, blkif_response_t *); -void blktap_kick_responses(void); - -/* this must match the underlying driver... */ -#define MAX_PENDING_REQS 64 - -/* Accessing attached data page mappings */ -#define MMAP_PAGES \ - (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) -#define MMAP_VADDR(_req,_seg) \ - (mmap_vstart + \ - ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * PAGE_SIZE)) - -extern unsigned long mmap_vstart; - -/* Defines that are only used by library clients */ - -#ifndef __COMPILING_BLKTAP_LIB - -static char *blkif_op_name[] = { - [BLKIF_OP_READ] = "READ", - [BLKIF_OP_WRITE] = "WRITE", -}; - -#endif /* __COMPILING_BLKTAP_LIB */ - -#endif /* __BLKTAPLIB_H__ */ diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/list.h --- a/tools/blktap/list.h Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -/* - * list.h - * - * This is a subset of linux's list.h intended to be used in user-space. - * - */ - -#ifndef __LIST_H__ -#define __LIST_H__ - -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} -#define list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -#endif /* __LIST_H__ */ diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/Makefile --- a/tools/blktap/parallax/Makefile Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -XEN_ROOT = ../../.. -include $(XEN_ROOT)/tools/Rules.mk - -PARALLAX_INSTALL_DIR = /usr/sbin - -INSTALL = install -INSTALL_PROG = $(INSTALL) -m0755 -INSTALL_DIR = $(INSTALL) -d -m0755 - -INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC) - -LDFLAGS = -L.. -lpthread -lz -lblktap - -#PLX_SRCS := -PLX_SRCS := vdi.c -PLX_SRCS += radix.c -PLX_SRCS += snaplog.c -PLX_SRCS += blockstore.c -PLX_SRCS += block-async.c -PLX_SRCS += requests-async.c -VDI_SRCS := $(PLX_SRCS) -PLX_SRCS += parallax.c - -#VDI_TOOLS := -VDI_TOOLS := vdi_create -VDI_TOOLS += vdi_list -VDI_TOOLS += vdi_snap -VDI_TOOLS += vdi_snap_list -VDI_TOOLS += vdi_snap_delete -VDI_TOOLS += vdi_fill -VDI_TOOLS += vdi_tree -VDI_TOOLS += vdi_validate - -CFLAGS += -Werror -CFLAGS += -Wno-unused -CFLAGS += -fno-strict-aliasing -CFLAGS += $(INCLUDES) -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -# Get gcc to generate the dependencies for us. -CFLAGS += -Wp,-MD,.$(@F).d -DEPS = .*.d - -OBJS = $(patsubst %.c,%.o,$(SRCS)) -IBINS = parallax $(VDI_TOOLS) - -.PHONY: all -all: $(VDI_TOOLS) parallax blockstored - -.PHONY: install -install: all - $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR) - -.PHONY: clean -clean: - rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest - -parallax: $(PLX_SRCS) - $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS) - -${VDI_TOOLS}: %: %.c $(VDI_SRCS) - $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS) - --include $(DEPS) diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/README --- a/tools/blktap/parallax/README Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,177 +0,0 @@ -Parallax Quick Overview -March 3, 2005 - -This is intended to provide a quick set of instructions to let you -guys play with the current parallax source. In it's current form, the -code will let you run an arbitrary number of VMs off of a single disk -image, doing copy-on-write as they make updates. Each domain is -assigned a virtual disk image (VDI), which may be based on a snapshot -of an existing image. All of the VDI and snapshot management should -currently work. - -The current implementation uses a single file as a blockstore for -_everything_ this will soon be replaced by the fancier backend code -and the local cache. As it stands, Parallax will create -"blockstore.dat" in the directory that you run it from, and use -largefile support to make this grow to unfathomable girth. So, you -probably want to run the daemon off of a local disk, with a lot of -free space. - -Here's how to get going: - -0. Setup: ---------- - -Pick a local directory on a disk with lots of room. You should be -running from a privileged domain (e.g. dom0) with the blocktap -configured in and block backend NOT. - -For convenience (for the moment) copy all of the vdi tools (vdi_*) and -the parallax daemon from tools/blktap into this directory. - -1. Populate the blockstore: ---------------------------- - -First you need to put at least one image into the blockstore. You -will need a disk image, either as a file or local partition. My -general approach has been to - -(a) make a really big sparse file with - - dd if=/dev/zero of=./image bs=4K count=1 seek=[big value] - -(b) put a filesystem into it - - mkfs.ext3 ./image - -(c) mount it using loopback - - mkdir ./mnt - mount -o loop ./image - -(d) cd into it and untar one of the image files from srg-roots. - - cd mnt - tar ... - -NOTE: Beware if your system is FC3. mkfs is not compatible with old -versions of fedora, and so you don't have much choice but to install -further fc3 images if you have used the fc3 version of mkfs. - -(e) unmount the image - - cd .. - umount mnt - -(f) now, create a new VDI to hold the image - - ./vdi_create "My new FC3 VDI" - -(g) get the id of the new VDI. - - ./vdi_list - - | 0 My new FC3 VDI - -(0 is the VDI id... create a few more if you want.) - -(h) hoover your image into the new VDI. - - ./vdi_fill 0 ./image - -This will pull the entire image into the blockstore and set up a -mapping tree for it for VDI 0. Passing a device (i.e. /dev/sda3) -should also work, but vdi_fill has NO notion of sparseness yet, so you -are going to pump a block into the store for each block you read. - -vdi_fill will count up until it is done, and you should be ready to -go. If you want to be anal, you can use vdi_validate to test the VDI -against the original image. - -2. Create some extra VDIs -------------------------- - -VDIs are actually a list of snapshots, and each snapshot is a full -image of mappings. So, to preserve an immutable copy of a current -VDI, do this: - -(a) Snapshot your new VDI. - - ./vdi_snap 0 - -Snapshotting writes the current radix root to the VDI's snapshot log, -and assigns it a new writable root. - -(b) look at the VDI's snapshot log. - - ./vdi_snap_list 0 - - | 16 0 Thu Mar 3 19:27:48 2005 565111 31 - -The first two columns constitute a snapshot id and represent the -(block, offset) of the snapshot record. The Date tells you when the -snapshot was made, and 31 is the radix root node of the snapshot. - -(c) Create a new VDI, based on that snapshot, and look at the list. - - ./vdi_create "FC3 - Copy 1" 16 0 - ./vdi_list - - | 0 My new FC3 VDI - | 1 FC3 - Copy 1 - -NOTE: If you have Graphviz installed on your system, you can use -vdi_tree to generate a postscript of your current set of VDIs and -snapshots. - - -Create as many VDIs as you need for the VMs that you want to run. - -3. Boot some VMs: ------------------ - -Parallax currently uses a hack in xend to pass the VDI id, you need to -modify the disk line of the VM config that is going to mount it. - -(a) set up your vm config, by using the following disk line: - - disk = ['parallax:1,sda1,w,0' ] - -This example uses VDI 1 (from vdi_list above), presents it as sda1 -(writable), and uses dom 0 as the backend. If you were running the -daemon (and tap driver) in some domain other than 0, you would change -this last parameter. - -NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so that it knows what to do with "parallax:". - -(b) Run parallax in the backend domain. - - ./parallax - -(c) create your new domain. - - xm create ... - ---- - -That's pretty much all there is to it at the moment. Hope this is -clear enough to get you going. Now, a few serious caveats that will -be sorted out in the almost immediate future: - -WARNINGS: ---------- - -1. There is NO locking in the VDI tools at the moment, so I'd avoid -running them in parallel, or more importantly, running them while the -daemon is running. - -2. I doubt that xend will be very happy about restarting if you have -parallax-using domains. So if it dies while there are active parallax -doms, you may need to reboot. - -3. I've turned off write-in-place. So at the moment, EVERY block -write is a log append on the blockstore. I've been having some probs -with the radix tree's marking of writable blocks after snapshots and -will sort this out very soon. - - diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/block-async.c --- a/tools/blktap/parallax/block-async.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,393 +0,0 @@ -/* block-async.c - * - * Asynchronous block wrappers for parallax. - */ - - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <pthread.h> -#include "block-async.h" -#include "blockstore.h" -#include "vdi.h" - - -#if 0 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) -#else -#define DPRINTF(_f, _a...) ((void)0) -#endif - -/* We have a queue of outstanding I/O requests implemented as a - * circular producer-consumer ring with free-running buffers. - * to allow reordering, this ring indirects to indexes in an - * ring of io_structs. - * - * the block_* calls may either add an entry to this ring and return, - * or satisfy the request immediately and call the callback directly. - * None of the io calls in parallax should be nested enough to worry - * about stack problems with this approach. - */ - -struct read_args { - uint64_t addr; -}; - -struct write_args { - uint64_t addr; - char *block; -}; - -struct alloc_args { - char *block; -}; - -struct pending_io_req { - enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op; - union { - struct read_args r; - struct write_args w; - struct alloc_args a; - } u; - io_cb_t cb; - void *param; -}; - -void radix_lock_init(struct radix_lock *r) -{ - int i; - - pthread_mutex_init(&r->lock, NULL); - for (i=0; i < 1024; i++) { - r->lines[i] = 0; - r->waiters[i] = NULL; - r->state[i] = ANY; - } -} - -/* maximum outstanding I/O requests issued asynchronously */ -/* must be a power of 2.*/ -#define MAX_PENDING_IO 1024 - -/* how many threads to concurrently issue I/O to the disk. */ -#define IO_POOL_SIZE 10 - -static struct pending_io_req pending_io_reqs[MAX_PENDING_IO]; -static int pending_io_list[MAX_PENDING_IO]; -static unsigned long io_prod = 0, io_cons = 0, io_free = 0; -#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1)) -#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs) -#define PENDING_IO_ENT(_x) \ - (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]]) -#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod) -#define CAN_CONSUME_PENDING_IO (io_cons != io_prod) -static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t pending_io_cond = PTHREAD_COND_INITIALIZER; - -static void init_pending_io(void) -{ - int i; - - for (i=0; i<MAX_PENDING_IO; i++) - pending_io_list[i] = i; - -} - -void block_read(uint64_t addr, io_cb_t cb, void *param) -{ - struct pending_io_req *req; - - pthread_mutex_lock(&pending_io_lock); - assert(CAN_PRODUCE_PENDING_IO); - - req = PENDING_IO_ENT(io_prod++); - DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req); - req->op = IO_READ; - req->u.r.addr = addr; - req->cb = cb; - req->param = param; - - pthread_cond_signal(&pending_io_cond); - pthread_mutex_unlock(&pending_io_lock); -} - - -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param) -{ - struct pending_io_req *req; - - pthread_mutex_lock(&pending_io_lock); - assert(CAN_PRODUCE_PENDING_IO); - - req = PENDING_IO_ENT(io_prod++); - DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req); - req->op = IO_WRITE; - req->u.w.addr = addr; - req->u.w.block = block; - req->cb = cb; - req->param = param; - - pthread_cond_signal(&pending_io_cond); - pthread_mutex_unlock(&pending_io_lock); -} - - -void block_alloc(char *block, io_cb_t cb, void *param) -{ - struct pending_io_req *req; - - pthread_mutex_lock(&pending_io_lock); - assert(CAN_PRODUCE_PENDING_IO); - - req = PENDING_IO_ENT(io_prod++); - req->op = IO_ALLOC; - req->u.a.block = block; - req->cb = cb; - req->param = param; - - pthread_cond_signal(&pending_io_cond); - pthread_mutex_unlock(&pending_io_lock); -} - -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param) -{ - struct io_ret ret; - pthread_mutex_lock(&r->lock); - - if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) { - r->lines[row]++; - r->state[row] = READ; - DPRINTF("RLOCK : %3d (row: %d)\n", r->lines[row], row); - pthread_mutex_unlock(&r->lock); - ret.type = IO_INT_T; - ret.u.i = 0; - cb(ret, param); - } else { - struct radix_wait **rwc; - struct radix_wait *rw = - (struct radix_wait *) malloc (sizeof(struct radix_wait)); - DPRINTF("RLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row); - rw->type = RLOCK; - rw->param = param; - rw->cb = cb; - rw->next = NULL; - /* append to waiters list. */ - rwc = &r->waiters[row]; - while (*rwc != NULL) rwc = &(*rwc)->next; - *rwc = rw; - pthread_mutex_unlock(&r->lock); - return; - } -} - - -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param) -{ - struct io_ret ret; - pthread_mutex_lock(&r->lock); - - /* the second check here is redundant -- just here for debugging now. */ - if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) { - r->state[row] = STOP; - r->lines[row] = -1; - DPRINTF("WLOCK : %3d (row: %d)\n", r->lines[row], row); - pthread_mutex_unlock(&r->lock); - ret.type = IO_INT_T; - ret.u.i = 0; - cb(ret, param); - } else { - struct radix_wait **rwc; - struct radix_wait *rw = - (struct radix_wait *) malloc (sizeof(struct radix_wait)); - DPRINTF("WLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row); - rw->type = WLOCK; - rw->param = param; - rw->cb = cb; - rw->next = NULL; - /* append to waiters list. */ - rwc = &r->waiters[row]; - while (*rwc != NULL) rwc = &(*rwc)->next; - *rwc = rw; - pthread_mutex_unlock(&r->lock); - return; - } - -} - -/* called with radix_lock locked and lock count of zero. */ -static void wake_waiters(struct radix_lock *r, int row) -{ - struct pending_io_req *req; - struct radix_wait *rw; - - if (r->lines[row] != 0) return; - if (r->waiters[row] == NULL) return; - - if (r->waiters[row]->type == WLOCK) { - - rw = r->waiters[row]; - pthread_mutex_lock(&pending_io_lock); - assert(CAN_PRODUCE_PENDING_IO); - - req = PENDING_IO_ENT(io_prod++); - req->op = IO_WWAKE; - req->cb = rw->cb; - req->param = rw->param; - r->lines[row] = -1; /* write lock the row. */ - r->state[row] = STOP; - r->waiters[row] = rw->next; - free(rw); - pthread_mutex_unlock(&pending_io_lock); - - } else /* RLOCK */ { - - while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) { - rw = r->waiters[row]; - pthread_mutex_lock(&pending_io_lock); - assert(CAN_PRODUCE_PENDING_IO); - - req = PENDING_IO_ENT(io_prod++); - req->op = IO_RWAKE; - req->cb = rw->cb; - req->param = rw->param; - r->lines[row]++; /* read lock the row. */ - r->state[row] = READ; - r->waiters[row] = rw->next; - free(rw); - pthread_mutex_unlock(&pending_io_lock); - } - - if (r->waiters[row] != NULL) /* There is a write queued still */ - r->state[row] = STOP; - } - - pthread_mutex_lock(&pending_io_lock); - pthread_cond_signal(&pending_io_cond); - pthread_mutex_unlock(&pending_io_lock); -} - -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param) -{ - struct io_ret ret; - - pthread_mutex_lock(&r->lock); - assert(r->lines[row] > 0); /* try to catch misuse. */ - r->lines[row]--; - if (r->lines[row] == 0) { - r->state[row] = ANY; - wake_waiters(r, row); - } - pthread_mutex_unlock(&r->lock); - cb(ret, param); -} - -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param) -{ - struct io_ret ret; - - pthread_mutex_lock(&r->lock); - assert(r->lines[row] == -1); /* try to catch misuse. */ - r->lines[row] = 0; - r->state[row] = ANY; - wake_waiters(r, row); - pthread_mutex_unlock(&r->lock); - cb(ret, param); -} - -/* consumer calls */ -static void do_next_io_req(struct pending_io_req *req) -{ - struct io_ret ret; - void *param; - - switch (req->op) { - case IO_READ: - ret.type = IO_BLOCK_T; - ret.u.b = readblock(req->u.r.addr); - break; - case IO_WRITE: - ret.type = IO_INT_T; - ret.u.i = writeblock(req->u.w.addr, req->u.w.block); - DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr); - break; - case IO_ALLOC: - ret.type = IO_ADDR_T; - ret.u.a = allocblock(req->u.a.block); - break; - case IO_RWAKE: - DPRINTF("WAKE DEFERRED RLOCK!\n"); - ret.type = IO_INT_T; - ret.u.i = 0; - break; - case IO_WWAKE: - DPRINTF("WAKE DEFERRED WLOCK!\n"); - ret.type = IO_INT_T; - ret.u.i = 0; - break; - default: - DPRINTF("Unknown IO operation on pending list!\n"); - return; - } - - param = req->param; - pthread_mutex_lock(&pending_io_lock); - pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req); - pthread_mutex_unlock(&pending_io_lock); - - assert(req->cb != NULL); - req->cb(ret, param); - -} - -void *io_thread(void *param) -{ - int tid; - struct pending_io_req *req; - - /* Set this thread's tid. */ - tid = *(int *)param; - free(param); - -start: - pthread_mutex_lock(&pending_io_lock); - while (io_prod == io_cons) { - pthread_cond_wait(&pending_io_cond, &pending_io_lock); - } - - if (io_prod == io_cons) { - /* unnecessary wakeup. */ - pthread_mutex_unlock(&pending_io_lock); - goto start; - } - - req = PENDING_IO_ENT(io_cons++); - pthread_mutex_unlock(&pending_io_lock); - - do_next_io_req(req); - - goto start; - -} - -static pthread_t io_pool[IO_POOL_SIZE]; -void start_io_threads(void) - -{ - int i, tid=0; - - for (i=0; i < IO_POOL_SIZE; i++) { - int ret, *t; - t = (int *)malloc(sizeof(int)); - *t = tid++; - ret = pthread_create(&io_pool[i], NULL, io_thread, t); - if (ret != 0) printf("Error starting thread %d\n", i); - } - -} - -void init_block_async(void) -{ - init_pending_io(); - start_io_threads(); -} diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/block-async.h --- a/tools/blktap/parallax/block-async.h Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -/* block-async.h - * - * Asynchronous block wrappers for parallax. - */ - -#ifndef _BLOCKASYNC_H_ -#define _BLOCKASYNC_H_ - -#include <assert.h> -#include <xenctrl.h> -#include "vdi.h" - -struct io_ret -{ - enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type; - union { - uint64_t a; - char *b; - int i; - } u; -}; - -typedef void (*io_cb_t)(struct io_ret r, void *param); - -/* per-vdi lock structures to make sure requests run in a safe order. */ -struct radix_wait { - enum {RLOCK, WLOCK} type; - io_cb_t cb; - void *param; - struct radix_wait *next; -}; - -struct radix_lock { - pthread_mutex_t lock; - int lines[1024]; - struct radix_wait *waiters[1024]; - enum {ANY, READ, STOP} state[1024]; -}; -void radix_lock_init(struct radix_lock *r); - -void block_read(uint64_t addr, io_cb_t cb, void *param); -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param); -void block_alloc(char *block, io_cb_t cb, void *param); -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param); -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param); -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param); -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param); -void init_block_async(void); - -static inline uint64_t IO_ADDR(struct io_ret r) -{ - assert(r.type == IO_ADDR_T); - return r.u.a; -} - -static inline char *IO_BLOCK(struct io_ret r) -{ - assert(r.type == IO_BLOCK_T); - return r.u.b; -} - -static inline int IO_INT(struct io_ret r) -{ - assert(r.type == IO_INT_T); - return r.u.i; -} - - -#endif //_BLOCKASYNC_H_ diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstore.c --- a/tools/blktap/parallax/blockstore.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1348 +0,0 @@ -/************************************************************************** - * - * blockstore.c - * - * Simple block store interface - * - */ - -#include <fcntl.h> -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <stdarg.h> -#include "blockstore.h" -#include <pthread.h> - -//#define BLOCKSTORE_REMOTE -//#define BSDEBUG - -#define RETRY_TIMEOUT 1000000 /* microseconds */ - -/***************************************************************************** - * Debugging - */ -#ifdef BSDEBUG -void DB(char *format, ...) -{ - va_list args; - fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key)); - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); -} -#else -#define DB(format, ...) (void)0 -#endif - -#ifdef BLOCKSTORE_REMOTE - -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <netinet/in.h> -#include <netdb.h> - -/***************************************************************************** - * Network state * - *****************************************************************************/ - -/* The individual disk servers we talks to. These will be referenced by - * an integer index into bsservers[]. - */ -bsserver_t bsservers[MAX_SERVERS]; - -/* The cluster map. This is indexed by an integer cluster number. - */ -bscluster_t bsclusters[MAX_CLUSTERS]; - -/* Local socket. - */ -struct sockaddr_in sin_local; -int bssock = 0; - -/***************************************************************************** - * Notification * - *****************************************************************************/ - -typedef struct pool_thread_t_struct { - pthread_mutex_t ptmutex; - pthread_cond_t ptcv; - int newdata; -} pool_thread_t; - -pool_thread_t pool_thread[READ_POOL_SIZE+1]; - -#define RECV_NOTIFY(tid) { \ - pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \ - pool_thread[tid].newdata = 1; \ - DB("CV Waking %u", tid); \ - pthread_cond_signal(&(pool_thread[tid].ptcv)); \ - pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); } -#define RECV_AWAIT(tid) { \ - pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \ - if (pool_thread[tid].newdata) { \ - pool_thread[tid].newdata = 0; \ - DB("CV Woken %u", tid); \ - } \ - else { \ - DB("CV Waiting %u", tid); \ - pthread_cond_wait(&(pool_thread[tid].ptcv), \ - &(pool_thread[tid].ptmutex)); \ - } \ - pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); } - -/***************************************************************************** - * Message queue management * - *****************************************************************************/ - -/* Protects the queue manipulation critcal regions. - */ -pthread_mutex_t ptmutex_queue; -#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue) -#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue) - -pthread_mutex_t ptmutex_recv; -#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv) -#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv) - -/* A message queue entry. We allocate one of these for every request we send. - * Asynchronous reply reception also used one of these. - */ -typedef struct bsq_t_struct { - struct bsq_t_struct *prev; - struct bsq_t_struct *next; - int status; - int server; - int length; - struct msghdr msghdr; - struct iovec iov[2]; - int tid; - struct timeval tv_sent; - bshdr_t message; - void *block; -} bsq_t; - -#define BSQ_STATUS_MATCHED 1 - -pthread_mutex_t ptmutex_luid; -#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid) -#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid) - -static uint64_t luid_cnt = 0x1000ULL; -uint64_t new_luid(void) { - uint64_t luid; - ENTER_LUID_CR; - luid = luid_cnt++; - LEAVE_LUID_CR; - return luid; -} - -/* Queue of outstanding requests. - */ -bsq_t *bs_head = NULL; -bsq_t *bs_tail = NULL; -int bs_qlen = 0; - -/* - */ -void queuedebug(char *msg) { - bsq_t *q; - ENTER_QUEUE_CR; - fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen); - for (q = bs_head; q; q = q->next) { - fprintf(stderr, " luid=%016llx server=%u\n", - q->message.luid, q->server); - } - LEAVE_QUEUE_CR; -} - -int enqueue(bsq_t *qe) { - ENTER_QUEUE_CR; - qe->next = NULL; - qe->prev = bs_tail; - if (!bs_head) - bs_head = qe; - else - bs_tail->next = qe; - bs_tail = qe; - bs_qlen++; - LEAVE_QUEUE_CR; -#ifdef BSDEBUG - queuedebug("enqueue"); -#endif - return 0; -} - -int dequeue(bsq_t *qe) { - bsq_t *q; - ENTER_QUEUE_CR; - for (q = bs_head; q; q = q->next) { - if (q == qe) { - if (q->prev) - q->prev->next = q->next; - else - bs_head = q->next; - if (q->next) - q->next->prev = q->prev; - else - bs_tail = q->prev; - bs_qlen--; - goto found; - } - } - - LEAVE_QUEUE_CR; -#ifdef BSDEBUG - queuedebug("dequeue not found"); -#endif - return 0; - - found: - LEAVE_QUEUE_CR; -#ifdef BSDEBUG - queuedebug("dequeue not found"); -#endif - return 1; -} - -bsq_t *queuesearch(bsq_t *qe) { - bsq_t *q; - ENTER_QUEUE_CR; - for (q = bs_head; q; q = q->next) { - if ((qe->server == q->server) && - (qe->message.operation == q->message.operation) && - (qe->message.luid == q->message.luid)) { - - if ((q->message.operation == BSOP_READBLOCK) && - ((q->message.flags & BSOP_FLAG_ERROR) == 0)) { - q->block = qe->block; - qe->block = NULL; - } - q->length = qe->length; - q->message.flags = qe->message.flags; - q->message.id = qe->message.id; - q->status |= BSQ_STATUS_MATCHED; - - if (q->prev) - q->prev->next = q->next; - else - bs_head = q->next; - if (q->next) - q->next->prev = q->prev; - else - bs_tail = q->prev; - q->next = NULL; - q->prev = NULL; - bs_qlen--; - goto found; - } - } - - LEAVE_QUEUE_CR; -#ifdef BSDEBUG - queuedebug("queuesearch not found"); -#endif - return NULL; - - found: - LEAVE_QUEUE_CR; -#ifdef BSDEBUG - queuedebug("queuesearch found"); -#endif - return q; -} - -/***************************************************************************** - * Network communication * - *****************************************************************************/ - -int send_message(bsq_t *qe) { - int rc; - - qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin); - qe->msghdr.msg_namelen = sizeof(struct sockaddr_in); - qe->msghdr.msg_iov = qe->iov; - if (qe->block) - qe->msghdr.msg_iovlen = 2; - else - qe->msghdr.msg_iovlen = 1; - qe->msghdr.msg_control = NULL; - qe->msghdr.msg_controllen = 0; - qe->msghdr.msg_flags = 0; - - qe->iov[0].iov_base = (void *)&(qe->message); - qe->iov[0].iov_len = MSGBUFSIZE_ID; - - if (qe->block) { - qe->iov[1].iov_base = qe->block; - qe->iov[1].iov_len = BLOCK_SIZE; - } - - qe->message.luid = new_luid(); - - qe->status = 0; - qe->tid = (int)pthread_getspecific(tid_key); - if (enqueue(qe) < 0) { - fprintf(stderr, "Error enqueuing request.\n"); - return -1; - } - - gettimeofday(&(qe->tv_sent), NULL); - DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid); - rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT); - //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0, - // (struct sockaddr *)&(bsservers[qe->server].sin), - // sizeof(struct sockaddr_in)); - if (rc < 0) - return rc; - - return rc; -} - -int recv_message(bsq_t *qe) { - struct sockaddr_in from; - //int flen = sizeof(from); - int rc; - - qe->msghdr.msg_name = &from; - qe->msghdr.msg_namelen = sizeof(struct sockaddr_in); - qe->msghdr.msg_iov = qe->iov; - if (qe->block) - qe->msghdr.msg_iovlen = 2; - else - qe->msghdr.msg_iovlen = 1; - qe->msghdr.msg_control = NULL; - qe->msghdr.msg_controllen = 0; - qe->msghdr.msg_flags = 0; - - qe->iov[0].iov_base = (void *)&(qe->message); - qe->iov[0].iov_len = MSGBUFSIZE_ID; - if (qe->block) { - qe->iov[1].iov_base = qe->block; - qe->iov[1].iov_len = BLOCK_SIZE; - } - - rc = recvmsg(bssock, &(qe->msghdr), 0); - - //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0, - // (struct sockaddr *)&from, &flen); - return rc; -} - -int get_server_number(struct sockaddr_in *sin) { - int i; - -#ifdef BSDEBUG2 - fprintf(stderr, - "get_server_number(%u.%u.%u.%u/%u)\n", - (unsigned int)sin->sin_addr.s_addr & 0xff, - ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff, - ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff, - ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff, - (unsigned int)sin->sin_port); -#endif - - for (i = 0; i < MAX_SERVERS; i++) { - if (bsservers[i].hostname) { -#ifdef BSDEBUG2 - fprintf(stderr, - "get_server_number check %u.%u.%u.%u/%u\n", - (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff, - ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff, - ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 16)&0xff, - ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 24)&0xff, - (unsigned int)bsservers[i].sin.sin_port); -#endif - if ((sin->sin_family == bsservers[i].sin.sin_family) && - (sin->sin_port == bsservers[i].sin.sin_port) && - (memcmp((void *)&(sin->sin_addr), - (void *)&(bsservers[i].sin.sin_addr), - sizeof(struct in_addr)) == 0)) { - return i; - } - } - } - - return -1; -} - -void *rx_buffer = NULL; -bsq_t rx_qe; -bsq_t *recv_any(void) { - struct sockaddr_in from; - int rc; - - DB("ENTER recv_any\n"); - - rx_qe.msghdr.msg_name = &from; - rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in); - rx_qe.msghdr.msg_iov = rx_qe.iov; - if (!rx_buffer) { - rx_buffer = malloc(BLOCK_SIZE); - if (!rx_buffer) { - perror("recv_any malloc"); - return NULL; - } - } - rx_qe.block = rx_buffer; - rx_buffer = NULL; - rx_qe.msghdr.msg_iovlen = 2; - rx_qe.msghdr.msg_control = NULL; - rx_qe.msghdr.msg_controllen = 0; - rx_qe.msghdr.msg_flags = 0; - - rx_qe.iov[0].iov_base = (void *)&(rx_qe.message); - rx_qe.iov[0].iov_len = MSGBUFSIZE_ID; - rx_qe.iov[1].iov_base = rx_qe.block; - rx_qe.iov[1].iov_len = BLOCK_SIZE; - - rc = recvmsg(bssock, &(rx_qe.msghdr), 0); - if (rc < 0) { - perror("recv_any"); - return NULL; - } - - rx_qe.length = rc; - rx_qe.server = get_server_number(&from); - - DB("recv_any from %d luid=%016llx len=%u\n", - rx_qe.server, rx_qe.message.luid, rx_qe.length); - - return &rx_qe; -} - -void recv_recycle_buffer(bsq_t *q) { - if (q->block) { - rx_buffer = q->block; - q->block = NULL; - } -} - -// cycle through reading any incoming, searching for a match in the -// queue, until we have all we need. -int wait_recv(bsq_t **reqs, int numreqs) { - bsq_t *q, *m; - unsigned int x, i; - int tid = (int)pthread_getspecific(tid_key); - - DB("ENTER wait_recv %u\n", numreqs); - - checkmatch: - x = 0xffffffff; - for (i = 0; i < numreqs; i++) { - x &= reqs[i]->status; - } - if ((x & BSQ_STATUS_MATCHED)) { - DB("LEAVE wait_recv\n"); - return numreqs; - } - - RECV_AWAIT(tid); - - /* - rxagain: - ENTER_RECV_CR; - q = recv_any(); - LEAVE_RECV_CR; - if (!q) - return -1; - - m = queuesearch(q); - recv_recycle_buffer(q); - if (!m) { - fprintf(stderr, "Unmatched RX\n"); - goto rxagain; - } - */ - - goto checkmatch; - -} - -/* retry - */ -static int retry_count = 0; -int retry(bsq_t *qe) -{ - int rc; - gettimeofday(&(qe->tv_sent), NULL); - DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid); - retry_count++; - rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT); - if (rc < 0) - return rc; - return 0; -} - -/* queue runner - */ -void *queue_runner(void *arg) -{ - for (;;) { - struct timeval now; - long long nowus, sus; - bsq_t *q; - int r; - - sleep(1); - - gettimeofday(&now, NULL); - nowus = now.tv_usec + now.tv_sec * 1000000; - ENTER_QUEUE_CR; - r = retry_count; - for (q = bs_head; q; q = q->next) { - sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000; - if ((nowus - sus) > RETRY_TIMEOUT) { - if (retry(q) < 0) { - fprintf(stderr, "Error on sendmsg retry.\n"); - } - } - } - if (r != retry_count) { - fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count); - } - LEAVE_QUEUE_CR; - } -} - -/* receive loop - */ -void *receive_loop(void *arg) -{ - bsq_t *q, *m; - - for(;;) { - q = recv_any(); - if (!q) { - fprintf(stderr, "recv_any error\n"); - } - else { - m = queuesearch(q); - recv_recycle_buffer(q); - if (!m) { - fprintf(stderr, "Unmatched RX\n"); - } - else { - DB("RX MATCH"); - RECV_NOTIFY(m->tid); - } - } - } -} -pthread_t pthread_recv; - -/***************************************************************************** - * Reading * - *****************************************************************************/ - -void *readblock_indiv(int server, uint64_t id) { - void *block; - bsq_t *qe; - int len, rc; - - qe = (bsq_t *)malloc(sizeof(bsq_t)); - if (!qe) { - perror("readblock qe malloc"); - return NULL; - } - qe->block = NULL; - - /* - qe->block = malloc(BLOCK_SIZE); - if (!qe->block) { - perror("readblock qe malloc"); - free((void *)qe); - return NULL; - } - */ - - qe->server = server; - - qe->message.operation = BSOP_READBLOCK; - qe->message.flags = 0; - qe->message.id = id; - qe->length = MSGBUFSIZE_ID; - - if (send_message(qe) < 0) { - perror("readblock sendto"); - goto err; - } - - /*len = recv_message(qe); - if (len < 0) { - perror("readblock recv"); - goto err; - }*/ - - rc = wait_recv(&qe, 1); - if (rc < 0) { - perror("readblock recv"); - goto err; - } - - if ((qe->message.flags & BSOP_FLAG_ERROR)) { - fprintf(stderr, "readblock server error\n"); - goto err; - } - if (qe->length < MSGBUFSIZE_BLOCK) { - fprintf(stderr, "readblock recv short (%u)\n", len); - goto err; - } - /* if ((block = malloc(BLOCK_SIZE)) == NULL) { - perror("readblock malloc"); - goto err; - } - memcpy(block, qe->message.block, BLOCK_SIZE); - */ - block = qe->block; - - free((void *)qe); - return block; - - err: - free(qe->block); - free((void *)qe); - return NULL; -} - -/** - * readblock: read a block from disk - * @id: block id to read - * - * @return: pointer to block, NULL on error - */ -void *readblock(uint64_t id) { - int map = (int)BSID_MAP(id); - uint64_t xid; - static int i = CLUSTER_MAX_REPLICAS - 1; - void *block = NULL; - - /* special case for the "superblock" just use the first block on the - * first replica. (extend to blocks < 6 for vdi bug) - */ - if (id < 6) { - block = readblock_indiv(bsclusters[map].servers[0], id); - goto out; - } - - i++; - if (i >= CLUSTER_MAX_REPLICAS) - i = 0; - switch (i) { - case 0: - xid = BSID_REPLICA0(id); - break; - case 1: - xid = BSID_REPLICA1(id); - break; - case 2: - xid = BSID_REPLICA2(id); - break; - } - - block = readblock_indiv(bsclusters[map].servers[i], xid); - - out: -#ifdef BSDEBUG - if (block) - fprintf(stderr, "READ: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", - id, - (unsigned int)((unsigned char *)block)[0], - (unsigned int)((unsigned char *)block)[1], - (unsigned int)((unsigned char *)block)[2], - (unsigned int)((unsigned char *)block)[3], - (unsigned int)((unsigned char *)block)[4], - (unsigned int)((unsigned char *)block)[5], - (unsigned int)((unsigned char *)block)[6], - (unsigned int)((unsigned char *)block)[7]); - else - fprintf(stderr, "READ: %016llx NULL\n", id); -#endif - return block; -} - -/***************************************************************************** - * Writing * - *****************************************************************************/ - -bsq_t *writeblock_indiv(int server, uint64_t id, void *block) { - - bsq_t *qe; - int len; - - qe = (bsq_t *)malloc(sizeof(bsq_t)); - if (!qe) { - perror("writeblock qe malloc"); - goto err; - } - qe->server = server; - - qe->message.operation = BSOP_WRITEBLOCK; - qe->message.flags = 0; - qe->message.id = id; - //memcpy(qe->message.block, block, BLOCK_SIZE); - qe->block = block; - qe->length = MSGBUFSIZE_BLOCK; - - if (send_message(qe) < 0) { - perror("writeblock sendto"); - goto err; - } - - return qe; - - err: - free((void *)qe); - return NULL; -} - - -/** - * writeblock: write an existing block to disk - * @id: block id - * @block: pointer to block - * - * @return: zero on success, -1 on failure - */ -int writeblock(uint64_t id, void *block) { - - int map = (int)BSID_MAP(id); - int rep0 = bsclusters[map].servers[0]; - int rep1 = bsclusters[map].servers[1]; - int rep2 = bsclusters[map].servers[2]; - bsq_t *reqs[3]; - int rc; - - reqs[0] = reqs[1] = reqs[2] = NULL; - -#ifdef BSDEBUG - fprintf(stderr, - "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", - id, - (unsigned int)((unsigned char *)block)[0], - (unsigned int)((unsigned char *)block)[1], - (unsigned int)((unsigned char *)block)[2], - (unsigned int)((unsigned char *)block)[3], - (unsigned int)((unsigned char *)block)[4], - (unsigned int)((unsigned char *)block)[5], - (unsigned int)((unsigned char *)block)[6], - (unsigned int)((unsigned char *)block)[7]); -#endif - - /* special case for the "superblock" just use the first block on the - * first replica. (extend to blocks < 6 for vdi bug) - */ - if (id < 6) { - reqs[0] = writeblock_indiv(rep0, id, block); - if (!reqs[0]) - return -1; - rc = wait_recv(reqs, 1); - return rc; - } - - reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block); - if (!reqs[0]) - goto err; - reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block); - if (!reqs[1]) - goto err; - reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block); - if (!reqs[2]) - goto err; - - rc = wait_recv(reqs, 3); - if (rc < 0) { - perror("writeblock recv"); - goto err; - } - if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) { - fprintf(stderr, "writeblock server0 error\n"); - goto err; - } - if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) { - fprintf(stderr, "writeblock server1 error\n"); - goto err; - } - if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) { - fprintf(stderr, "writeblock server2 error\n"); - goto err; - } - - - free((void *)reqs[0]); - free((void *)reqs[1]); - free((void *)reqs[2]); - return 0; - - err: - if (reqs[0]) { - dequeue(reqs[0]); - free((void *)reqs[0]); - } - if (reqs[1]) { - dequeue(reqs[1]); - free((void *)reqs[1]); - } - if (reqs[2]) { - dequeue(reqs[2]); - free((void *)reqs[2]); - } - return -1; -} - -/***************************************************************************** - * Allocation * - *****************************************************************************/ - -/** - * allocblock: write a new block to disk - * @block: pointer to block - * - * @return: new id of block on disk - */ -uint64_t allocblock(void *block) { - return allocblock_hint(block, 0); -} - -bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) { - bsq_t *qe; - int len; - - qe = (bsq_t *)malloc(sizeof(bsq_t)); - if (!qe) { - perror("allocblock_hint qe malloc"); - goto err; - } - qe->server = server; - - qe->message.operation = BSOP_ALLOCBLOCK; - qe->message.flags = 0; - qe->message.id = hint; - //memcpy(qe->message.block, block, BLOCK_SIZE); - qe->block = block; - qe->length = MSGBUFSIZE_BLOCK; - - if (send_message(qe) < 0) { - perror("allocblock_hint sendto"); - goto err; - } - - return qe; - - err: - free((void *)qe); - return NULL; -} - -/** - * allocblock_hint: write a new block to disk - * @block: pointer to block - * @hint: allocation hint - * - * @return: new id of block on disk - */ -uint64_t allocblock_hint(void *block, uint64_t hint) { - int map = (int)hint; - int rep0 = bsclusters[map].servers[0]; - int rep1 = bsclusters[map].servers[1]; - int rep2 = bsclusters[map].servers[2]; - bsq_t *reqs[3]; - int rc; - uint64_t id0, id1, id2; - - reqs[0] = reqs[1] = reqs[2] = NULL; - - DB("ENTER allocblock\n"); - - reqs[0] = allocblock_hint_indiv(rep0, block, hint); - if (!reqs[0]) - goto err; - reqs[1] = allocblock_hint_indiv(rep1, block, hint); - if (!reqs[1]) - goto err; - reqs[2] = allocblock_hint_indiv(rep2, block, hint); - if (!reqs[2]) - goto err; - - rc = wait_recv(reqs, 3); - if (rc < 0) { - perror("allocblock recv"); - goto err; - } - if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) { - fprintf(stderr, "allocblock server0 error\n"); - goto err; - } - if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) { - fprintf(stderr, "allocblock server1 error\n"); - goto err; - } - if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) { - fprintf(stderr, "allocblock server2 error\n"); - goto err; - } - - id0 = reqs[0]->message.id; - id1 = reqs[1]->message.id; - id2 = reqs[2]->message.id; - -#ifdef BSDEBUG - fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", - BSID(map, id0, id1, id2), - (unsigned int)((unsigned char *)block)[0], - (unsigned int)((unsigned char *)block)[1], - (unsigned int)((unsigned char *)block)[2], - (unsigned int)((unsigned char *)block)[3], - (unsigned int)((unsigned char *)block)[4], - (unsigned int)((unsigned char *)block)[5], - (unsigned int)((unsigned char *)block)[6], - (unsigned int)((unsigned char *)block)[7]); -#endif - - free((void *)reqs[0]); - free((void *)reqs[1]); - free((void *)reqs[2]); - return BSID(map, id0, id1, id2); - - err: - if (reqs[0]) { - dequeue(reqs[0]); - free((void *)reqs[0]); - } - if (reqs[1]) { - dequeue(reqs[1]); - free((void *)reqs[1]); - } - if (reqs[2]) { - dequeue(reqs[2]); - free((void *)reqs[2]); - } - return 0; -} - -#else /* /BLOCKSTORE_REMOTE */ - -/***************************************************************************** - * Local storage version * - *****************************************************************************/ - -/** - * readblock: read a block from disk - * @id: block id to read - * - * @return: pointer to block, NULL on error - */ - -void *readblock(uint64_t id) { - void *block; - int block_fp; - -//printf("readblock(%llu)\n", id); - block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644); - - if (block_fp < 0) { - perror("open"); - return NULL; - } - - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { - printf ("%Ld ", id); - printf ("%Ld\n", (id - 1) * BLOCK_SIZE); - perror("readblock lseek"); - goto err; - } - if ((block = malloc(BLOCK_SIZE)) == NULL) { - perror("readblock malloc"); - goto err; - } - if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { - perror("readblock read"); - free(block); - goto err; - } - close(block_fp); - return block; - -err: - close(block_fp); - return NULL; -} - -/** - * writeblock: write an existing block to disk - * @id: block id - * @block: pointer to block - * - * @return: zero on success, -1 on failure - */ -int writeblock(uint64_t id, void *block) { - - int block_fp; - - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); - - if (block_fp < 0) { - perror("open"); - return -1; - } - - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { - perror("writeblock lseek"); - goto err; - } - if (write(block_fp, block, BLOCK_SIZE) < 0) { - perror("writeblock write"); - goto err; - } - close(block_fp); - return 0; - -err: - close(block_fp); - return -1; -} - -/** - * allocblock: write a new block to disk - * @block: pointer to block - * - * @return: new id of block on disk - */ - -uint64_t allocblock(void *block) { - uint64_t lb; - off64_t pos; - int block_fp; - - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); - - if (block_fp < 0) { - perror("open"); - return 0; - } - - pos = lseek64(block_fp, 0, SEEK_END); - if (pos == (off64_t)-1) { - perror("allocblock lseek"); - goto err; - } - if (pos % BLOCK_SIZE != 0) { - fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE); - goto err; - } - if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { - perror("allocblock write"); - goto err; - } - lb = pos / BLOCK_SIZE + 1; -//printf("alloc(%Ld)\n", lb); - close(block_fp); - return lb; - -err: - close(block_fp); - return 0; - -} - -/** - * allocblock_hint: write a new block to disk - * @block: pointer to block - * @hint: allocation hint - * - * @return: new id of block on disk - */ -uint64_t allocblock_hint(void *block, uint64_t hint) { - return allocblock(block); -} - -#endif /* BLOCKSTORE_REMOTE */ - -/***************************************************************************** - * Memory management * - *****************************************************************************/ - -/** - * newblock: get a new in-memory block set to zeros - * - * @return: pointer to new block, NULL on error - */ -void *newblock(void) { - void *block = malloc(BLOCK_SIZE); - if (block == NULL) { - perror("newblock"); - return NULL; - } - memset(block, 0, BLOCK_SIZE); - return block; -} - - -/** - * freeblock: unallocate an in-memory block - * @id: block id (zero if this is only in-memory) - * @block: block to be freed - */ -void freeblock(void *block) { - free(block); -} - -static freeblock_t *new_freeblock(void) -{ - freeblock_t *fb; - - fb = newblock(); - - if (fb == NULL) return NULL; - - fb->magic = FREEBLOCK_MAGIC; - fb->next = 0ULL; - fb->count = 0ULL; - memset(fb->list, 0, sizeof fb->list); - - return fb; -} - -void releaseblock(uint64_t id) -{ - blockstore_super_t *bs_super; - freeblock_t *fl_current; - - /* get superblock */ - bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER); - - /* get freeblock_current */ - if (bs_super->freelist_current == 0ULL) - { - fl_current = new_freeblock(); - bs_super->freelist_current = allocblock(fl_current); - writeblock(BLOCKSTORE_SUPER, bs_super); - } else { - fl_current = readblock(bs_super->freelist_current); - } - - /* if full, chain to superblock and allocate new current */ - - if (fl_current->count == FREEBLOCK_SIZE) { - fl_current->next = bs_super->freelist_full; - writeblock(bs_super->freelist_current, fl_current); - bs_super->freelist_full = bs_super->freelist_current; - freeblock(fl_current); - fl_current = new_freeblock(); - bs_super->freelist_current = allocblock(fl_current); - writeblock(BLOCKSTORE_SUPER, bs_super); - } - - /* append id to current */ - fl_current->list[fl_current->count++] = id; - writeblock(bs_super->freelist_current, fl_current); - - freeblock(fl_current); - freeblock(bs_super); - - -} - -/* freelist debug functions: */ -void freelist_count(int print_each) -{ - blockstore_super_t *bs_super; - freeblock_t *fb; - uint64_t total = 0, next; - - bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER); - - if (bs_super->freelist_current == 0ULL) { - printf("freelist is empty!\n"); - return; - } - - fb = readblock(bs_super->freelist_current); - printf("%Ld entires on current.\n", fb->count); - total += fb->count; - if (print_each == 1) - { - int i; - for (i=0; i< fb->count; i++) - printf(" %Ld\n", fb->list[i]); - } - - freeblock(fb); - - if (bs_super->freelist_full == 0ULL) { - printf("freelist_full is empty!\n"); - return; - } - - next = bs_super->freelist_full; - for (;;) { - fb = readblock(next); - total += fb->count; - if (print_each == 1) - { - int i; - for (i=0; i< fb->count; i++) - printf(" %Ld\n", fb->list[i]); - } - next = fb->next; - freeblock(fb); - if (next == 0ULL) break; - } - printf("Total of %Ld ids on freelist.\n", total); -} - -/***************************************************************************** - * Initialisation * - *****************************************************************************/ - -int __init_blockstore(void) -{ - int i; - blockstore_super_t *bs_super; - uint64_t ret; - int block_fp; - -#ifdef BLOCKSTORE_REMOTE - struct hostent *addr; - - pthread_mutex_init(&ptmutex_queue, NULL); - pthread_mutex_init(&ptmutex_luid, NULL); - pthread_mutex_init(&ptmutex_recv, NULL); - /*pthread_mutex_init(&ptmutex_notify, NULL);*/ - for (i = 0; i <= READ_POOL_SIZE; i++) { - pool_thread[i].newdata = 0; - pthread_mutex_init(&(pool_thread[i].ptmutex), NULL); - pthread_cond_init(&(pool_thread[i].ptcv), NULL); - } - - bsservers[0].hostname = "firebug.cl.cam.ac.uk"; - bsservers[1].hostname = "planb.cl.cam.ac.uk"; - bsservers[2].hostname = "simcity.cl.cam.ac.uk"; - bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/; - bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/; - bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/; - bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/; - bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/; - bsservers[8].hostname = NULL; - bsservers[9].hostname = NULL; - bsservers[10].hostname = NULL; - bsservers[11].hostname = NULL; - bsservers[12].hostname = NULL; - bsservers[13].hostname = NULL; - bsservers[14].hostname = NULL; - bsservers[15].hostname = NULL; - - for (i = 0; i < MAX_SERVERS; i++) { - if (!bsservers[i].hostname) - continue; - addr = gethostbyname(bsservers[i].hostname); - if (!addr) { - perror("bad hostname"); - return -1; - } - bsservers[i].sin.sin_family = addr->h_addrtype; - bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT); - bsservers[i].sin.sin_addr.s_addr = - ((struct in_addr *)(addr->h_addr))->s_addr; - } - - /* Cluster map - */ - bsclusters[0].servers[0] = 0; - bsclusters[0].servers[1] = 1; - bsclusters[0].servers[2] = 2; - bsclusters[1].servers[0] = 1; - bsclusters[1].servers[1] = 2; - bsclusters[1].servers[2] = 3; - bsclusters[2].servers[0] = 2; - bsclusters[2].servers[1] = 3; - bsclusters[2].servers[2] = 4; - bsclusters[3].servers[0] = 3; - bsclusters[3].servers[1] = 4; - bsclusters[3].servers[2] = 5; - bsclusters[4].servers[0] = 4; - bsclusters[4].servers[1] = 5; - bsclusters[4].servers[2] = 6; - bsclusters[5].servers[0] = 5; - bsclusters[5].servers[1] = 6; - bsclusters[5].servers[2] = 7; - bsclusters[6].servers[0] = 6; - bsclusters[6].servers[1] = 7; - bsclusters[6].servers[2] = 0; - bsclusters[7].servers[0] = 7; - bsclusters[7].servers[1] = 0; - bsclusters[7].servers[2] = 1; - - /* Local socket set up - */ - bssock = socket(AF_INET, SOCK_DGRAM, 0); - if (bssock < 0) { - perror("Bad socket"); - return -1; - } - memset(&sin_local, 0, sizeof(sin_local)); - sin_local.sin_family = AF_INET; - sin_local.sin_port = htons(BLOCKSTORED_PORT); - sin_local.sin_addr.s_addr = htonl(INADDR_ANY); - if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) { - perror("bind"); - close(bssock); - return -1; - } - - pthread_create(&pthread_recv, NULL, receive_loop, NULL); - pthread_create(&pthread_recv, NULL, queue_runner, NULL); - -#else /* /BLOCKSTORE_REMOTE */ - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); - - if (block_fp < 0) { - perror("open"); - return -1; - exit(-1); - } - - if (lseek(block_fp, 0, SEEK_END) == 0) { - bs_super = newblock(); - bs_super->magic = BLOCKSTORE_MAGIC; - bs_super->freelist_full = 0LL; - bs_super->freelist_current = 0LL; - - ret = allocblock(bs_super); - - freeblock(bs_super); - } else { - bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER); - if (bs_super->magic != BLOCKSTORE_MAGIC) - { - printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n"); - exit(-1); - } - freeblock(bs_super); - } - - close(block_fp); - -#endif /* BLOCKSTORE_REMOTE */ - return 0; -} - -void __exit_blockstore(void) -{ - int i; -#ifdef BLOCKSTORE_REMOTE - pthread_mutex_destroy(&ptmutex_recv); - pthread_mutex_destroy(&ptmutex_luid); - pthread_mutex_destroy(&ptmutex_queue); - /*pthread_mutex_destroy(&ptmutex_notify); - pthread_cond_destroy(&ptcv_notify);*/ - for (i = 0; i <= READ_POOL_SIZE; i++) { - pthread_mutex_destroy(&(pool_thread[i].ptmutex)); - pthread_cond_destroy(&(pool_thread[i].ptcv)); - } -#endif -} diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstore.h --- a/tools/blktap/parallax/blockstore.h Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ -/************************************************************************** - * - * blockstore.h - * - * Simple block store interface - * - */ - -#ifndef __BLOCKSTORE_H__ -#define __BLOCKSTORE_H__ - -#include <netinet/in.h> -#include <xenctrl.h> - -#define BLOCK_SIZE 4096 -#define BLOCK_SHIFT 12 -#define BLOCK_MASK 0xfffffffffffff000LL - -/* XXX SMH: where is the below supposed to be defined???? */ -#ifndef SECTOR_SHIFT -#define SECTOR_SHIFT 9 -#endif - -#define FREEBLOCK_SIZE (BLOCK_SIZE / sizeof(uint64_t)) - (3 * sizeof(uint64_t)) -#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL - -typedef struct { - uint64_t magic; - uint64_t next; - uint64_t count; - uint64_t list[FREEBLOCK_SIZE]; -} freeblock_t; - -#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL -#define BLOCKSTORE_SUPER 1ULL - -typedef struct { - uint64_t magic; - uint64_t freelist_full; - uint64_t freelist_current; -} blockstore_super_t; - -extern void *newblock(); -extern void *readblock(uint64_t id); -extern uint64_t allocblock(void *block); -extern uint64_t allocblock_hint(void *block, uint64_t hint); -extern int writeblock(uint64_t id, void *block); - -/* Add this blockid to a freelist, to be recycled by the allocator. */ -extern void releaseblock(uint64_t id); - -/* this is a memory free() operation for block-sized allocations */ -extern void freeblock(void *block); -extern int __init_blockstore(void); - -/* debug for freelist. */ -void freelist_count(int print_each); -#define ALLOCFAIL (((uint64_t)(-1))) - -/* Distribution - */ -#define BLOCKSTORED_PORT 9346 - -struct bshdr_t_struct { - uint32_t operation; - uint32_t flags; - uint64_t id; - uint64_t luid; -} __attribute__ ((packed)); -typedef struct bshdr_t_struct bshdr_t; - -struct bsmsg_t_struct { - bshdr_t hdr; - unsigned char block[BLOCK_SIZE]; -} __attribute__ ((packed)); - -typedef struct bsmsg_t_struct bsmsg_t; - -#define MSGBUFSIZE_OP sizeof(uint32_t) -#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t)) -#define MSGBUFSIZE_ID (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint64_t)) -#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t) - -#define BSOP_READBLOCK 0x01 -#define BSOP_WRITEBLOCK 0x02 -#define BSOP_ALLOCBLOCK 0x03 -#define BSOP_FREEBLOCK 0x04 - -#define BSOP_FLAG_ERROR 0x01 - -#define BS_ALLOC_SKIP 10 -#define BS_ALLOC_HACK - -/* Remote hosts and cluster map - XXX need to generalise - */ - -/* - - Interim ID format is - - 63 60 59 40 39 20 19 0 - +----+--------------------+--------------------+--------------------+ - |map | replica 2 | replica 1 | replica 0 | - +----+--------------------+--------------------+--------------------+ - - The map is an index into a table detailing which machines form the - cluster. - - */ - -#define BSID_REPLICA0(_id) ((_id)&0xfffffULL) -#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL) -#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL) -#define BSID_MAP(_id) (((_id)>>60)&0xfULL) - -#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \ - (((uint64_t)(_rep2))<<40) | \ - (((uint64_t)(_rep1))<<20) | ((uint64_t)(_rep0))) - -typedef struct bsserver_t_struct { - char *hostname; - struct sockaddr_in sin; -} bsserver_t; - -#define MAX_SERVERS 16 - -#define CLUSTER_MAX_REPLICAS 3 -typedef struct bscluster_t_struct { - int servers[CLUSTER_MAX_REPLICAS]; -} bscluster_t; - -#define MAX_CLUSTERS 16 - -#endif /* __BLOCKSTORE_H__ */ diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/blockstored.c --- a/tools/blktap/parallax/blockstored.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,275 +0,0 @@ -/************************************************************************** - * - * blockstored.c - * - * Block store daemon. - * - */ - -#include <fcntl.h> -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <netinet/in.h> -#include <errno.h> -#include "blockstore.h" - -//#define BSDEBUG - -int readblock_into(uint64_t id, void *block); - -int open_socket(uint16_t port) { - - struct sockaddr_in sn; - int sock; - - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - perror("Bad socket"); - return -1; - } - memset(&sn, 0, sizeof(sn)); - sn.sin_family = AF_INET; - sn.sin_port = htons(port); - sn.sin_addr.s_addr = htonl(INADDR_ANY); - if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) { - perror("bind"); - close(sock); - return -1; - } - - return sock; -} - -static int block_fp = -1; -static int bssock = -1; - -int send_reply(struct sockaddr_in *peer, void *buffer, int len) { - - int rc; - -#ifdef BSDEBUG - fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n", - len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t *)buffer)->hdr.id); -#endif - rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, sizeof(*peer)); - if (rc < 0) { - perror("send_reply"); - return 1; - } - - - return 0; -} - -static bsmsg_t msgbuf; - -void service_loop(void) { - - for (;;) { - int rc, len; - struct sockaddr_in from; - size_t slen = sizeof(from); - uint64_t bid; - - len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0, - (struct sockaddr *)&from, &slen); - - if (len < 0) { - perror("recvfrom"); - continue; - } - - if (len < MSGBUFSIZE_OP) { - fprintf(stderr, "Short packet.\n"); - continue; - } - -#ifdef BSDEBUG - fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n", - len, msgbuf.hdr.operation, msgbuf.hdr.id); -#endif - - switch (msgbuf.hdr.operation) { - case BSOP_READBLOCK: - if (len < MSGBUFSIZE_ID) { - fprintf(stderr, "Short packet (readblock %u).\n", len); - continue; - } - rc = readblock_into(msgbuf.hdr.id, msgbuf.block); - if (rc < 0) { - fprintf(stderr, "readblock error\n"); - msgbuf.hdr.flags = BSOP_FLAG_ERROR; - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); - continue; - } - msgbuf.hdr.flags = 0; - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK); - break; - case BSOP_WRITEBLOCK: - if (len < MSGBUFSIZE_BLOCK) { - fprintf(stderr, "Short packet (writeblock %u).\n", len); - continue; - } - rc = writeblock(msgbuf.hdr.id, msgbuf.block); - if (rc < 0) { - fprintf(stderr, "writeblock error\n"); - msgbuf.hdr.flags = BSOP_FLAG_ERROR; - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); - continue; - } - msgbuf.hdr.flags = 0; - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); - break; - case BSOP_ALLOCBLOCK: - if (len < MSGBUFSIZE_BLOCK) { - fprintf(stderr, "Short packet (allocblock %u).\n", len); - continue; - } - bid = allocblock(msgbuf.block); - if (bid == ALLOCFAIL) { - fprintf(stderr, "allocblock error\n"); - msgbuf.hdr.flags = BSOP_FLAG_ERROR; - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); - continue; - } - msgbuf.hdr.id = bid; - msgbuf.hdr.flags = 0; - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); - break; - } - - } -} - -/** - * readblock: read a block from disk - * @id: block id to read - * @block: pointer to buffer to receive block - * - * @return: 0 if OK, other on error - */ - -int readblock_into(uint64_t id, void *block) { - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { - printf ("%Ld\n", (id - 1) * BLOCK_SIZE); - perror("readblock lseek"); - return -1; - } - if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { - perror("readblock read"); - return -1; - } - return 0; -} - -/** - * writeblock: write an existing block to disk - * @id: block id - * @block: pointer to block - * - * @return: zero on success, -1 on failure - */ -int writeblock(uint64_t id, void *block) { - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { - perror("writeblock lseek"); - return -1; - } - if (write(block_fp, block, BLOCK_SIZE) < 0) { - perror("writeblock write"); - return -1; - } - return 0; -} - -/** - * allocblock: write a new block to disk - * @block: pointer to block - * - * @return: new id of block on disk - */ -static uint64_t lastblock = 0; - -uint64_t allocblock(void *block) { - uint64_t lb; - off64_t pos; - - retry: - pos = lseek64(block_fp, 0, SEEK_END); - if (pos == (off64_t)-1) { - perror("allocblock lseek"); - return ALLOCFAIL; - } - if (pos % BLOCK_SIZE != 0) { - fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE); - return ALLOCFAIL; - } - if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { - perror("allocblock write"); - return ALLOCFAIL; - } - lb = pos / BLOCK_SIZE + 1; - -#ifdef BS_ALLOC_HACK - if (lb < BS_ALLOC_SKIP) - goto retry; -#endif - - if (lb <= lastblock) - printf("[*** %Ld alredy allocated! ***]\n", lb); - - lastblock = lb; - return lb; -} - -/** - * newblock: get a new in-memory block set to zeros - * - * @return: pointer to new block, NULL on error - */ -void *newblock(void) { - void *block = malloc(BLOCK_SIZE); - if (block == NULL) { - perror("newblock"); - return NULL; - } - memset(block, 0, BLOCK_SIZE); - return block; -} - - -/** - * freeblock: unallocate an in-memory block - * @id: block id (zero if this is only in-memory) - * @block: block to be freed - */ -void freeblock(void *block) { - free(block); -} - - -int main(int argc, char **argv) -{ - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); - - if (block_fp < 0) { - perror("open"); - return -1; - } - - bssock = open_socket(BLOCKSTORED_PORT); - if (bssock < 0) { - return -1; - } - - service_loop(); - - close(bssock); - - return 0; -} diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/bstest.c --- a/tools/blktap/parallax/bstest.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,191 +0,0 @@ -/************************************************************************** - * - * bstest.c - * - * Block store daemon test program. - * - * usage: bstest <host>|X {r|w|a} ID - * - */ - -#include <fcntl.h> -#include <unistd.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <netinet/in.h> -#include <netdb.h> -#include <errno.h> -#include "blockstore.h" - -int direct(char *host, uint32_t op, uint64_t id, int len) { - struct sockaddr_in sn, peer; - int sock; - bsmsg_t msgbuf; - int rc, slen; - struct hostent *addr; - - addr = gethostbyname(host); - if (!addr) { - perror("bad hostname"); - exit(1); - } - peer.sin_family = addr->h_addrtype; - peer.sin_port = htons(BLOCKSTORED_PORT); - peer.sin_addr.s_addr = ((struct in_addr *)(addr->h_addr))->s_addr; - fprintf(stderr, "Sending to: %u.%u.%u.%u\n", - (unsigned int)(unsigned char)addr->h_addr[0], - (unsigned int)(unsigned char)addr->h_addr[1], - (unsigned int)(unsigned char)addr->h_addr[2], - (unsigned int)(unsigned char)addr->h_addr[3]); - - sock = socket(AF_INET, SOCK_DGRAM, 0); - if (sock < 0) { - perror("Bad socket"); - exit(1); - } - memset(&sn, 0, sizeof(sn)); - sn.sin_family = AF_INET; - sn.sin_port = htons(BLOCKSTORED_PORT); - sn.sin_addr.s_addr = htonl(INADDR_ANY); - if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) { - perror("bind"); - close(sock); - exit(1); - } - - memset((void *)&msgbuf, 0, sizeof(msgbuf)); - msgbuf.operation = op; - msgbuf.id = id; - - rc = sendto(sock, (void *)&msgbuf, len, 0, - (struct sockaddr *)&peer, sizeof(peer)); - if (rc < 0) { - perror("sendto"); - exit(1); - } - - slen = sizeof(peer); - len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0, - (struct sockaddr *)&peer, &slen); - if (len < 0) { - perror("recvfrom"); - exit(1); - } - - printf("Reply %u bytes:\n", len); - if (len >= MSGBUFSIZE_OP) - printf(" operation: %u\n", msgbuf.operation); - if (len >= MSGBUFSIZE_FLAGS) - printf(" flags: 0x%x\n", msgbuf.flags); - if (len >= MSGBUFSIZE_ID) - printf(" id: %llu\n", msgbuf.id); - if (len >= (MSGBUFSIZE_ID + 4)) - printf(" data: %02x %02x %02x %02x...\n", - (unsigned int)msgbuf.block[0], - (unsigned int)msgbuf.block[1], - (unsigned int)msgbuf.block[2], - (unsigned int)msgbuf.block[3]); - - if (sock > 0) - close(sock); - - return 0; -} - -int main (int argc, char **argv) { - - uint32_t op = 0; - uint64_t id = 0; - int len = 0, rc; - void *block; - - if (argc < 3) { - fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n"); - return 1; - } - - switch (argv[2][0]) { - case 'r': - case 'R': - op = BSOP_READBLOCK; - len = MSGBUFSIZE_ID; - break; - case 'w': - case 'W': - op = BSOP_WRITEBLOCK; - len = MSGBUFSIZE_BLOCK; - break; - case 'a': - case 'A': - op = BSOP_ALLOCBLOCK; - len = MSGBUFSIZE_BLOCK; - break; - default: - fprintf(stderr, "Unknown action '%s'.\n", argv[2]); - return 1; - } - - if (argc >= 4) - id = atoll(argv[3]); - - if (strcmp(argv[1], "X") == 0) { - rc = __init_blockstore(); - if (rc < 0) { - fprintf(stderr, "blockstore init failed.\n"); - return 1; - } - switch(op) { - case BSOP_READBLOCK: - block = readblock(id); - if (block) { - printf("data: %02x %02x %02x %02x...\n", - (unsigned int)((unsigned char*)block)[0], - (unsigned int)((unsigned char*)block)[1], - (unsigned int)((unsigned char*)block)[2], - (unsigned int)((unsigned char*)block)[3]); - } - break; - case BSOP_WRITEBLOCK: - block = malloc(BLOCK_SIZE); - if (!block) { - perror("bstest malloc"); - return 1; - } - memset(block, 0, BLOCK_SIZE); - rc = writeblock(id, block); - if (rc != 0) { - printf("error\n"); - } - else { - printf("OK\n"); - } - break; - case BSOP_ALLOCBLOCK: - block = malloc(BLOCK_SIZE); - if (!block) { - perror("bstest malloc"); - return 1; - } - memset(block, 0, BLOCK_SIZE); - id = allocblock_hint(block, id); - if (id == 0) { - printf("error\n"); - } - else { - printf("ID: %llu\n", id); - } - break; - } - } - else { - direct(argv[1], op, id, len); - } - - - return 0; -} diff -r 59d4c1863330 -r fdf25330e4a6 tools/blktap/parallax/parallax.c --- a/tools/blktap/parallax/parallax.c Fri Jun 23 15:26:01 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,608 +0,0 @@ -/************************************************************************** - * - * parallax.c - * - * The Parallax Storage Server - * - */ - - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <pthread.h> -#include "blktaplib.h" -#include "blockstore.h" -#include "vdi.h" -#include "block-async.h" -#include "requests-async.h" - -#define PARALLAX_DEV 61440 -#define SECTS_PER_NODE 8 - - -#if 0 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) -#else -#define DPRINTF(_f, _a...) ((void)0) -#endif - -/* ------[ session records ]----------------------------------------------- */ - -#define BLKIF_HASHSZ 1024 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) - -#define VDI_HASHSZ 16 -#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1)) - -typedef struct blkif { - domid_t domid; - unsigned int handle; - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; - vdi_t *vdi_hash[VDI_HASHSZ]; - struct blkif *hash_next; -} blkif_t; - -static blkif_t *blkif_hash[BLKIF_HASHSZ]; - -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) -{ - if ( handle != 0 ) - printf("blktap/parallax don't currently support non-0 dev handles!\n"); - - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif != NULL) && - ((blkif->domid != domid) || (blkif->handle != handle)) ) - blkif = blkif->hash_next; - return blkif; -} - -vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device) -{ - vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)]; - - while ((vdi != NULL) && (vdi->vdevice != device)) - vdi = vdi->next; - - return vdi; -} - -/* ------[ control message handling ]-------------------------------------- */ - -void blkif_create(blkif_be_create_t *create) -{ - domid_t domid = create->domid; - unsigned int handle = create->blkif_handle; - blkif_t **pblkif, *blkif; - - DPRINTF("parallax (blkif_create): create is %p\n", create); - - if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL ) - { - DPRINTF("Could not create blkif: out of memory\n"); - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; - return; - } - - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - blkif->handle = handle; - blkif->status = DISCONNECTED; - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( *pblkif != NULL ) - { - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) - { - DPRINTF("Could not create blkif: already exists (%d,%d)\n", - domid, handle); - create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; - free(blkif); - return; - } - pblkif = &(*pblkif)->hash_next; - } - - blkif->hash_next = *pblkif; - *pblkif = blkif; - - DPRINTF("Successfully created blkif\n"); - create->status = BLKIF_BE_STATUS_OKAY; -} - -void blkif_destroy(blkif_be_destroy_t *destroy) -{ - domid_t domid = destroy->domid; - unsigned int handle = destroy->blkif_handle; - blkif_t **pblkif, *blkif; - - DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); - - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; - while ( (blkif = *pblkif) != NULL ) - { - if ( (blkif->domid == domid) && (blkif->handle == handle) ) - { - if ( blkif->status != DISCONNECTED ) - goto still_connected; - goto destroy; - } - pblkif = &blkif->hash_next; - } - - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - - still_connected: - destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; - return; - - destroy: - *pblkif = blkif->hash_next; - free(blkif); - destroy->status = BLKIF_BE_STATUS_OKAY; -} - -void vbd_create(blkif_be_vbd_create_t *create) -{ - blkif_t *blkif; - vdi_t *vdi, **vdip; - blkif_vdev_t vdevice = create->vdevice; - - DPRINTF("parallax (vbd_create): create=%p\n", create); - - blkif = blkif_find_by_handle(create->domid, create->blkif_handle); - if ( blkif == NULL ) - { - DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", - create->domid, create->blkif_handle); - create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } - - /* VDI identifier is in grow->extent.sector_start */ - DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", - (unsigned long)create->dev_handle); - - vdi = vdi_get(create->dev_handle); - if (vdi == NULL) - { - printf("parallax (vbd_create): VDI %lx not found.\n", - (unsigned long)create->dev_handle); - create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; - return; - } - - vdi->next = NULL; - vdi->vdevice = vdevice; - vdip = &blkif->vdi_hash[VDI_HASH(vdevice)]; - while (*vdip != NULL) - vdip = &(*vdip)->next; - *vdip = vdi; - - DPRINTF("blkif_create succeeded\n"); - create->status = BLKIF_BE_STATUS_OKAY; -} - -void vbd_destroy(blkif_be_vbd_destroy_t *destroy) -{ - blkif_t *blkif; - vdi_t *vdi, **vdip; - blkif_vdev_t vdevice = destroy->vdevice; - - blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); - if ( blkif == NULL ) - { - DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", - destroy->domid, destroy->blkif_handle); - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; - return; - } _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |