[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] merge
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID 0ed4a312765bf6160a36dfe1548fdb289cfae52f # Parent dc50cdd66c5ca20aba3bcc57aac22fd7a82d01f2 # Parent c445d4a0dd76b4859d058368ffab7c65f732acab merge diff -r dc50cdd66c5c -r 0ed4a312765b Config.mk --- a/Config.mk Tue Mar 14 20:10:21 2006 +++ b/Config.mk Tue Mar 14 20:50:35 2006 @@ -1,4 +1,7 @@ # -*- mode: Makefile; -*- + +# A debug build of Xen and tools? +debug ?= n # Currently supported architectures: x86_32, x86_64 XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/) @@ -27,6 +30,22 @@ INSTALL_DATA = $(INSTALL) -m0644 INSTALL_PROG = $(INSTALL) -m0755 +ifneq ($(debug),y) +# Optimisation flags are overridable +CFLAGS ?= -O2 -fomit-frame-pointer +CFLAGS += -DNDEBUG +else +CFLAGS += -g +endif + +ifeq ($(XEN_TARGET_ARCH),x86_32) +CFLAGS += -m32 -march=i686 +endif + +ifeq ($(XEN_TARGET_ARCH),x86_64) +CFLAGS += -m64 +endif + ifeq ($(XEN_TARGET_ARCH),x86_64) LIBDIR = lib64 else @@ -39,6 +58,8 @@ endif test-gcc-flag = $(shell $(1) -v --help 2>&1 | grep -q " $(2) " && echo $(2)) + +CFLAGS += -Wall -Wstrict-prototypes HOSTCFLAGS += $(call test-gcc-flag,$(HOSTCC),-Wdeclaration-after-statement) CFLAGS += $(call test-gcc-flag,$(CC),-Wdeclaration-after-statement) diff -r dc50cdd66c5c -r 0ed4a312765b buildconfigs/linux-defconfig_xen0_x86_64 --- a/buildconfigs/linux-defconfig_xen0_x86_64 Tue Mar 14 20:10:21 2006 +++ b/buildconfigs/linux-defconfig_xen0_x86_64 Tue Mar 14 20:50:35 2006 @@ -108,6 +108,7 @@ CONFIG_X86_IO_APIC=y CONFIG_X86_XEN_GENAPIC=y CONFIG_X86_LOCAL_APIC=y +CONFIG_MTRR=y # CONFIG_SMP is not set CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set diff -r dc50cdd66c5c -r 0ed4a312765b buildconfigs/linux-defconfig_xen_x86_64 --- a/buildconfigs/linux-defconfig_xen_x86_64 Tue Mar 14 20:10:21 2006 +++ b/buildconfigs/linux-defconfig_xen_x86_64 Tue Mar 14 20:50:35 2006 @@ -111,6 +111,7 @@ CONFIG_X86_IO_APIC=y CONFIG_X86_XEN_GENAPIC=y CONFIG_X86_LOCAL_APIC=y +CONFIG_MTRR=y CONFIG_SMP=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set diff -r dc50cdd66c5c -r 0ed4a312765b extras/mini-os/Makefile --- a/extras/mini-os/Makefile Tue Mar 14 20:10:21 2006 +++ b/extras/mini-os/Makefile Tue Mar 14 20:50:35 2006 @@ -32,6 +32,7 @@ OBJS += $(patsubst %.c,%.o,$(wildcard *.c)) OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c)) +#OBJS += $(patsubst %.c,%.o,$(wildcard console/*.c)) HDRS := $(wildcard include/*.h) HDRS += $(wildcard include/xen/*.h) diff -r dc50cdd66c5c -r 0ed4a312765b extras/mini-os/domain_config --- a/extras/mini-os/domain_config Tue Mar 14 20:10:21 2006 +++ b/extras/mini-os/domain_config Tue Mar 14 20:50:35 2006 @@ -15,3 +15,5 @@ # A name for your domain. All domains must have different names. name = "Mini-OS" + +on_crash = 'destroy' diff -r dc50cdd66c5c -r 0ed4a312765b extras/mini-os/include/lib.h --- a/extras/mini-os/include/lib.h Tue Mar 14 20:10:21 2006 +++ b/extras/mini-os/include/lib.h Tue Mar 14 20:50:35 2006 @@ -57,6 +57,7 @@ #include <stdarg.h> + /* printing */ #define printk printf #define kprintf printf diff -r dc50cdd66c5c -r 0ed4a312765b extras/mini-os/include/mm.h --- a/extras/mini-os/include/mm.h Tue Mar 14 20:10:21 2006 +++ b/extras/mini-os/include/mm.h Tue Mar 14 20:50:35 2006 @@ -25,18 +25,34 @@ #ifndef _MM_H_ #define _MM_H_ -#ifdef __i386__ +#if defined(__i386__) #include <xen/arch-x86_32.h> +#elif defined(__x86_64__) +#include <xen/arch-x86_64.h> +#else +#error "Unsupported architecture" #endif -#ifdef __x86_64__ -#include <xen/arch-x86_64.h> -#endif +#include <lib.h> - -#ifdef __x86_64__ +#define L1_FRAME 1 +#define L2_FRAME 2 +#define L3_FRAME 3 #define L1_PAGETABLE_SHIFT 12 + +#if defined(__i386__) + +#define L2_PAGETABLE_SHIFT 22 + +#define L1_PAGETABLE_ENTRIES 1024 +#define L2_PAGETABLE_ENTRIES 1024 + +#define PADDR_BITS 32 +#define PADDR_MASK (~0UL) + +#elif defined(__x86_64__) + #define L2_PAGETABLE_SHIFT 21 #define L3_PAGETABLE_SHIFT 30 #define L4_PAGETABLE_SHIFT 39 @@ -52,29 +68,29 @@ #define PADDR_MASK ((1UL << PADDR_BITS)-1) #define VADDR_MASK ((1UL << VADDR_BITS)-1) -#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) +/* Get physical address of page mapped by pte (paddr_t). */ +#define l1e_get_paddr(x) \ + ((unsigned long)(((x) & (PADDR_MASK&PAGE_MASK)))) +#define l2e_get_paddr(x) \ + ((unsigned long)(((x) & (PADDR_MASK&PAGE_MASK)))) +#define l3e_get_paddr(x) \ + ((unsigned long)(((x) & (PADDR_MASK&PAGE_MASK)))) +#define l4e_get_paddr(x) \ + ((unsigned long)(((x) & (PADDR_MASK&PAGE_MASK)))) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) +#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) #endif - - -#ifdef __i386__ - -#define L1_PAGETABLE_SHIFT 12 -#define L2_PAGETABLE_SHIFT 22 - -#define L1_PAGETABLE_ENTRIES 1024 -#define L2_PAGETABLE_ENTRIES 1024 - -#elif defined(__x86_64__) -#endif +#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) /* Given a virtual address, get an entry offset into a page table. */ #define l1_table_offset(_a) \ (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) #define l2_table_offset(_a) \ (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) -#ifdef __x86_64__ +#if defined(__x86_64__) #define l3_table_offset(_a) \ (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) #define l4_table_offset(_a) \ @@ -92,8 +108,15 @@ #define _PAGE_PSE 0x080UL #define _PAGE_GLOBAL 0x100UL -#define L1_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED) -#define L2_PROT (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_USER) +#if defined(__i386__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) +#elif defined(__x86_64__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif #define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT) #define PAGE_SHIFT L1_PAGETABLE_SHIFT @@ -124,9 +147,9 @@ return phys; } -#ifdef __x86_64__ +#if defined(__x86_64__) #define VIRT_START 0xFFFFFFFF00000000UL -#else +#elif defined(__i386__) #define VIRT_START 0xC0000000UL #endif @@ -136,6 +159,11 @@ #define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) #define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) #define mfn_to_virt(_mfn) (mach_to_virt(_mfn << PAGE_SHIFT)) +#define pfn_to_virt(_pfn) (to_virt(_pfn << PAGE_SHIFT)) + +/* Pagetable walking. */ +#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) +#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) void init_mm(void); unsigned long alloc_pages(int order); diff -r dc50cdd66c5c -r 0ed4a312765b extras/mini-os/include/os.h --- a/extras/mini-os/include/os.h Tue Mar 14 20:10:21 2006 +++ b/extras/mini-os/include/os.h Tue Mar 14 20:50:35 2006 @@ -59,6 +59,8 @@ void trap_init(void); + + /* * The use of 'barrier' in the following reflects their use as local-lock * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following diff -r dc50cdd66c5c -r 0ed4a312765b extras/mini-os/mm.c --- a/extras/mini-os/mm.c Tue Mar 14 20:10:21 2006 +++ b/extras/mini-os/mm.c Tue Mar 14 20:50:35 2006 @@ -51,7 +51,8 @@ unsigned long *phys_to_machine_mapping; extern char *stack; extern char _text, _etext, _edata, _end; - +extern void do_exit(void); +extern void page_walk(unsigned long virt_addr); /********************* * ALLOCATION BITMAP @@ -63,7 +64,6 @@ #define allocated_in_map(_pn) \ (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1)))) - /* * Hint regarding bitwise arithmetic in map_{alloc,free}: @@ -208,7 +208,6 @@ unsigned long range, bitmap_size; chunk_head_t *ch; chunk_tail_t *ct; - for ( i = 0; i < FREELIST_SIZE; i++ ) { free_head[i] = &free_tail[i]; @@ -366,106 +365,181 @@ free_head[order] = freed_ch; } + + +void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn, + unsigned long offset, unsigned long level) +{ + unsigned long *tab = (unsigned long *)start_info.pt_base; + unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); + unsigned long prot_e, prot_t, pincmd; + mmu_update_t mmu_updates[0]; + struct mmuext_op pin_request; + + DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, " + "prev_l_mfn=%lx, offset=%lx\n", + level, *pt_pfn, prev_l_mfn, offset); + + if (level == L1_FRAME) + { + prot_e = L1_PROT; + prot_t = L2_PROT; + pincmd = MMUEXT_PIN_L1_TABLE; + } +#if (defined __x86_64__) + else if (level == L2_FRAME) + { + prot_e = L2_PROT; + prot_t = L3_PROT; + pincmd = MMUEXT_PIN_L2_TABLE; + } + else if (level == L3_FRAME) + { + prot_e = L3_PROT; + prot_t = L4_PROT; + pincmd = MMUEXT_PIN_L3_TABLE; + } +#endif + else + { + printk("new_pt_frame() called with invalid level number %d\n", level); + do_exit(); + } + + /* Update the entry */ +#if (defined __x86_64__) + tab = pte_to_virt(tab[l4_table_offset(pt_page)]); + tab = pte_to_virt(tab[l3_table_offset(pt_page)]); +#endif + mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) + + sizeof(void *)* l1_table_offset(pt_page); + mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | + (prot_e & ~_PAGE_RW); + if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) + { + printk("PTE for new page table page could not be updated\n"); + do_exit(); + } + + /* Pin the page to provide correct protection */ + pin_request.cmd = pincmd; + pin_request.arg1.mfn = pfn_to_mfn(*pt_pfn); + if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0) + { + printk("ERROR: pinning failed\n"); + do_exit(); + } + + /* Now fill the new page table page with entries. + Update the page directory as well. */ + mmu_updates[0].ptr = (prev_l_mfn << PAGE_SHIFT) + sizeof(void *) * offset; + mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t; + if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) + { + printk("ERROR: mmu_update failed\n"); + do_exit(); + } + + *pt_pfn += 1; +} + void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn) { - unsigned long pfn_to_map, pt_frame; - unsigned long mach_ptd, max_mach_ptd; - int count; - unsigned long mach_pte, virt_pte; - unsigned long *ptd = (unsigned long *)start_info.pt_base; - mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; - struct mmuext_op pin_request; - - /* Firstly work out what is the first pfn that is not yet in page tables - NB. Assuming that builder fills whole pt_frames (which it does at the - moment) - */ + unsigned long start_address, end_address; + unsigned long pfn_to_map, pt_pfn = *start_pfn; + static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1]; + unsigned long *tab = (unsigned long *)start_info.pt_base; + unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); + unsigned long page, offset; + int count = 0; + +#if defined(__x86_64__) + pfn_to_map = (start_info.nr_pt_frames - 3) * L1_PAGETABLE_ENTRIES; +#else pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES; - DEBUG("start_pfn=%ld, first pfn_to_map %ld, max_pfn=%ld", - *start_pfn, pfn_to_map, *max_pfn); - - /* Machine address of page table directory */ - mach_ptd = phys_to_machine(to_phys(start_info.pt_base)); - mach_ptd += sizeof(void *) * - l2_table_offset((unsigned long)to_virt(PFN_PHYS(pfn_to_map))); - - max_mach_ptd = sizeof(void *) * - l2_table_offset((unsigned long)to_virt(PFN_PHYS(*max_pfn))); - - /* Check that we are not trying to access Xen region */ - if(max_mach_ptd > sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START)) - { - printk("WARNING: mini-os will not use all the memory supplied\n"); - max_mach_ptd = sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START); - *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE); - } - max_mach_ptd += phys_to_machine(to_phys(start_info.pt_base)); - DEBUG("Max_mach_ptd 0x%lx", max_mach_ptd); - - pt_frame = *start_pfn; - /* Should not happen - no empty, mapped pages */ - if(pt_frame >= pfn_to_map) - { - printk("ERROR: Not even a single empty, mapped page\n"); - *(int*)0=0; - } - - while(mach_ptd < max_mach_ptd) - { - /* Correct protection needs to be set for the new page table frame */ - virt_pte = (unsigned long)to_virt(PFN_PHYS(pt_frame)); - mach_pte = ptd[l2_table_offset(virt_pte)] & ~(PAGE_SIZE-1); - mach_pte += sizeof(void *) * l1_table_offset(virt_pte); - DEBUG("New page table page: pfn=0x%lx, mfn=0x%lx, virt_pte=0x%lx, " - "mach_pte=0x%lx", pt_frame, pfn_to_mfn(pt_frame), - virt_pte, mach_pte); +#endif + start_address = (unsigned long)pfn_to_virt(pfn_to_map); + end_address = (unsigned long)pfn_to_virt(*max_pfn); + + /* We worked out the virtual memory range to map, now mapping loop */ + printk("Mapping memory range 0x%lx - 0x%lx\n", start_address, end_address); + + while(start_address < end_address) + { + tab = (unsigned long *)start_info.pt_base; + mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base)); + +#if defined(__x86_64__) + offset = l4_table_offset(start_address); + /* Need new L3 pt frame */ + if(!(start_address & L3_MASK)) + new_pt_frame(&pt_pfn, mfn, offset, L3_FRAME); - /* Update the entry */ - mmu_updates[0].ptr = mach_pte; - mmu_updates[0].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT | - (L1_PROT & ~_PAGE_RW); - if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0) + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); + offset = l3_table_offset(start_address); + /* Need new L2 pt frame */ + if(!(start_address & L2_MASK)) + new_pt_frame(&pt_pfn, mfn, offset, L2_FRAME); + + page = tab[offset]; + mfn = pte_to_mfn(page); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); +#endif + offset = l2_table_offset(start_address); + /* Need new L1 pt frame */ + if(!(start_address & L1_MASK)) + new_pt_frame(&pt_pfn, mfn, offset, L1_FRAME); + + page = tab[offset]; + mfn = pte_to_mfn(page); + offset = l1_table_offset(start_address); + + mmu_updates[count].ptr = (mfn << PAGE_SHIFT) + sizeof(void *) * offset; + mmu_updates[count].val = + pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT; + count++; + if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn) { - printk("PTE for new page table page could not be updated\n"); - *(int*)0=0; + if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0) + { + printk("PTE could not be updated\n"); + do_exit(); + } + count = 0; } - - /* Pin the page to provide correct protection */ - pin_request.cmd = MMUEXT_PIN_L1_TABLE; - pin_request.arg1.mfn = pfn_to_mfn(pt_frame); - if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0) + start_address += PAGE_SIZE; + } + + *start_pfn = pt_pfn; +} + + +void mem_test(unsigned long *start_add, unsigned long *end_add) +{ + unsigned long mask = 0x10000; + unsigned long *pointer; + + for(pointer = start_add; pointer < end_add; pointer++) + { + if(!(((unsigned long)pointer) & 0xfffff)) { - printk("ERROR: pinning failed\n"); - *(int*)0=0; + printk("Writing to %lx\n", pointer); + page_walk((unsigned long)pointer); } - - /* Now fill the new page table page with entries. - Update the page directory as well. */ - count = 0; - mmu_updates[count].ptr = mach_ptd; - mmu_updates[count].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT | - L2_PROT; - count++; - mach_ptd += sizeof(void *); - mach_pte = phys_to_machine(PFN_PHYS(pt_frame++)); - - for(;count <= L1_PAGETABLE_ENTRIES && pfn_to_map <= *max_pfn; count++) - { - mmu_updates[count].ptr = mach_pte; - mmu_updates[count].val = - pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT; - if(count == 1) DEBUG("mach_pte 0x%lx", mach_pte); - mach_pte += sizeof(void *); - } - if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0) - { - printk("ERROR: mmu_update failed\n"); - *(int*)0=0; - } - (*start_pfn)++; - } - - *start_pfn = pt_frame; + *pointer = (unsigned long)pointer & ~mask; + } + + for(pointer = start_add; pointer < end_add; pointer++) + { + if(((unsigned long)pointer & ~mask) != *pointer) + printk("Read error at 0x%lx. Read: 0x%lx, should read 0x%lx\n", + (unsigned long)pointer, + *pointer, + ((unsigned long)pointer & ~mask)); + } + } void init_mm(void) @@ -485,23 +559,21 @@ phys_to_machine_mapping = (unsigned long *)start_info.mfn_list; /* First page follows page table pages and 3 more pages (store page etc) */ - start_pfn = PFN_UP(to_phys(start_info.pt_base)) + start_info.nr_pt_frames + 3; + start_pfn = PFN_UP(to_phys(start_info.pt_base)) + + start_info.nr_pt_frames + 3; max_pfn = start_info.nr_pages; - + printk(" start_pfn: %lx\n", start_pfn); printk(" max_pfn: %lx\n", max_pfn); - -#ifdef __i386__ build_pagetable(&start_pfn, &max_pfn); -#endif - + /* * now we can initialise the page allocator */ printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n", (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn), (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn)); - init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); + init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); printk("MM: done\n"); } diff -r dc50cdd66c5c -r 0ed4a312765b extras/mini-os/traps.c --- a/extras/mini-os/traps.c Tue Mar 14 20:10:21 2006 +++ b/extras/mini-os/traps.c Tue Mar 14 20:50:35 2006 @@ -69,6 +69,30 @@ DO_ERROR(12, "stack segment", stack_segment) DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR(18, "machine check", machine_check) + +void page_walk(unsigned long virt_address) +{ + unsigned long *tab = (unsigned long *)start_info.pt_base; + unsigned long addr = virt_address, page; + printk("Pagetable walk from virt %lx, base %lx:\n", virt_address, start_info.pt_base); + +#if defined(__x86_64__) + page = tab[l4_table_offset(addr)]; + tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); + printk(" L4 = %p (%p) [offset = %lx]\n", page, tab, l4_table_offset(addr)); + + page = tab[l3_table_offset(addr)]; + tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); + printk(" L3 = %p (%p) [offset = %lx]\n", page, tab, l3_table_offset(addr)); +#endif + page = tab[l2_table_offset(addr)]; + tab = to_virt(mfn_to_pfn(pte_to_mfn(page)) << PAGE_SHIFT); + printk(" L2 = %p (%p) [offset = %lx]\n", page, tab, l2_table_offset(addr)); + + page = tab[l1_table_offset(addr)]; + printk(" L1 = %p (%p) [offset = %lx]\n", page, tab, l1_table_offset(addr)); + +} void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long addr) diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c Tue Mar 14 20:50:35 2006 @@ -7,6 +7,8 @@ #include <asm/mtrr.h> #include "mtrr.h" + +static DECLARE_MUTEX(mtrr_sem); void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned int *size, mtrr_type * type) @@ -63,18 +65,23 @@ int error; dom0_op_t op; + down(&mtrr_sem); + op.cmd = DOM0_ADD_MEMTYPE; op.u.add_memtype.mfn = base; op.u.add_memtype.nr_mfns = size; op.u.add_memtype.type = type; error = HYPERVISOR_dom0_op(&op); if (error) { + up(&mtrr_sem); BUG_ON(error > 0); return error; } if (increment) ++usage_table[op.u.add_memtype.reg]; + + up(&mtrr_sem); return op.u.add_memtype.reg; } @@ -104,17 +111,18 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) { - int i, max; + unsigned i; mtrr_type ltype; unsigned long lbase; unsigned int lsize; int error = -EINVAL; dom0_op_t op; - max = num_var_ranges; + down(&mtrr_sem); + if (reg < 0) { /* Search for existing MTRR */ - for (i = 0; i < max; ++i) { + for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (lbase == base && lsize == size) { reg = i; @@ -143,6 +151,7 @@ } error = reg; out: + up(&mtrr_sem); return error; } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Tue Mar 14 20:50:35 2006 @@ -79,6 +79,10 @@ /* Pseudo-eflags. */ NMI_MASK = 0x80000000 +#ifndef CONFIG_XEN +#define DISABLE_INTERRUPTS cli +#define ENABLE_INTERRUPTS sti +#else /* Offsets into shared_info_t. */ #define evtchn_upcall_pending /* 0 */ #define evtchn_upcall_mask 1 @@ -86,33 +90,24 @@ #define sizeof_vcpu_shift 6 #ifdef CONFIG_SMP -#define preempt_disable(reg) incl TI_preempt_count(reg) -#define preempt_enable(reg) decl TI_preempt_count(reg) -#define XEN_GET_VCPU_INFO(reg) preempt_disable(%ebp) ; \ - movl TI_cpu(%ebp),reg ; \ - shl $sizeof_vcpu_shift,reg ; \ - addl HYPERVISOR_shared_info,reg -#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%ebp) -#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff +#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ + shl $sizeof_vcpu_shift,%esi ; \ + addl HYPERVISOR_shared_info,%esi #else -#define XEN_GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg -#define XEN_PUT_VCPU_INFO(reg) -#define XEN_PUT_VCPU_INFO_fixup -#endif - -#define XEN_LOCKED_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg) -#define XEN_LOCKED_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg) -#define XEN_BLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ - XEN_LOCKED_BLOCK_EVENTS(reg) ; \ - XEN_PUT_VCPU_INFO(reg) -#define XEN_UNBLOCK_EVENTS(reg) XEN_GET_VCPU_INFO(reg) ; \ - XEN_LOCKED_UNBLOCK_EVENTS(reg) ; \ - XEN_PUT_VCPU_INFO(reg) -#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) +#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi +#endif + +#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) +#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) +#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ + __DISABLE_INTERRUPTS +#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ + __ENABLE_INTERRUPTS +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) +#endif #ifdef CONFIG_PREEMPT -#define preempt_stop GET_THREAD_INFO(%ebp) ; \ - XEN_BLOCK_EVENTS(%esi) +#define preempt_stop cli #else #define preempt_stop #define resume_kernel restore_nocheck @@ -159,21 +154,6 @@ .previous -#define RESTORE_ALL \ - RESTORE_REGS \ - addl $4, %esp; \ -1: iret; \ -.section .fixup,"ax"; \ -2: pushl $0; \ - pushl $do_iret_error; \ - jmp error_code; \ -.previous; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,2b; \ -.previous - - ENTRY(ret_from_fork) pushl %eax call schedule_tail @@ -199,7 +179,7 @@ testl $(VM_MASK | 2), %eax jz resume_kernel ENTRY(resume_userspace) - XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -210,15 +190,15 @@ #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - XEN_BLOCK_EVENTS(%esi) + cli cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all - testb $0xFF,EVENT_MASK(%esp) # interrupts off (exception path) ? - jnz restore_all + testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? + jz restore_all call preempt_schedule_irq jmp need_resched #endif @@ -289,7 +269,7 @@ call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) # store the return value syscall_exit: - XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -297,7 +277,7 @@ jne syscall_exit_work restore_all: -#if 0 /* XEN */ +#ifndef CONFIG_XEN movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: OLDSS(%esp) contains the wrong/random values if we # are returning to the kernel. @@ -307,22 +287,26 @@ andl $(VM_MASK | (4 << 8) | 3), %eax cmpl $((4 << 8) | 3), %eax je ldt_ss # returning to user-space with LDT SS -#endif /* XEN */ +restore_nocheck: +#else restore_nocheck: testl $(VM_MASK|NMI_MASK), EFLAGS(%esp) jnz hypervisor_iret movb EVENT_MASK(%esp), %al notb %al # %al == ~saved_mask - XEN_GET_VCPU_INFO(%esi) + GET_VCPU_INFO andb evtchn_upcall_mask(%esi),%al andb $1,%al # %al == mask & ~saved_mask jnz restore_all_enable_events # != 0 => reenable event delivery - XEN_PUT_VCPU_INFO(%esi) +#endif RESTORE_REGS addl $4, %esp 1: iret .section .fixup,"ax" iret_exc: +#ifndef CONFIG_XEN + sti +#endif pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -332,13 +316,7 @@ .long 1b,iret_exc .previous -hypervisor_iret: - andl $~NMI_MASK, EFLAGS(%esp) - RESTORE_REGS - addl $4, %esp - jmp hypercall_page + (__HYPERVISOR_iret * 32) - -#if 0 /* XEN */ +#ifndef CONFIG_XEN ldt_ss: larl OLDSS(%esp), %eax jnz restore_nocheck @@ -363,7 +341,13 @@ .align 4 .long 1b,iret_exc .previous -#endif /* XEN */ +#else +hypervisor_iret: + andl $~NMI_MASK, EFLAGS(%esp) + RESTORE_REGS + addl $4, %esp + jmp hypercall_page + (__HYPERVISOR_iret * 32) +#endif # perform work that needs to be done immediately before resumption ALIGN @@ -372,7 +356,7 @@ jz work_notifysig work_resched: call schedule - XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -424,7 +408,7 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending - XEN_UNBLOCK_EVENTS(%esi) # could let do_syscall_trace() call + ENABLE_INTERRUPTS # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -444,7 +428,7 @@ movl $-ENOSYS,EAX(%esp) jmp resume_userspace -#if 0 /* XEN */ +#ifndef CONFIG_XEN #define FIXUP_ESPFIX_STACK \ movl %esp, %eax; \ /* switch to 32bit stack using the pointer on top of 16bit stack */ \ @@ -503,7 +487,9 @@ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -#endif /* XEN */ +#else +#define UNWIND_ESPFIX_STACK +#endif ENTRY(divide_error) pushl $0 # no error code @@ -522,7 +508,7 @@ pushl %ebx cld pushl %es -# UNWIND_ESPFIX_STACK + UNWIND_ESPFIX_STACK popl %ecx movl ES(%esp), %edi # get the function address movl ORIG_EAX(%esp), %edx # get the error code @@ -535,6 +521,7 @@ call *%edi jmp ret_from_exception +#ifdef CONFIG_XEN # A note on the "critical region" in our callback handler. # We want to avoid stacking callback handlers due to events occurring # during handling of the last event. To do this, we keep events disabled @@ -560,15 +547,24 @@ jmp ret_from_intr ALIGN -restore_all_enable_events: - XEN_LOCKED_UNBLOCK_EVENTS(%esi) +restore_all_enable_events: + __ENABLE_INTERRUPTS scrit: /**** START OF CRITICAL REGION ****/ - XEN_TEST_PENDING(%esi) + __TEST_PENDING jnz 14f # process more events if necessary... - XEN_PUT_VCPU_INFO(%esi) - RESTORE_ALL -14: XEN_LOCKED_BLOCK_EVENTS(%esi) - XEN_PUT_VCPU_INFO(%esi) + RESTORE_REGS + addl $4, %esp +1: iret +.section .fixup,"ax" +2: pushl $0 + pushl $do_iret_error + jmp error_code +.previous +.section __ex_table,"a" + .align 4 + .long 1b,2b +.previous +14: __DISABLE_INTERRUPTS jmp 11b ecrit: /**** END OF CRITICAL REGION ****/ # [How we do the fixup]. We want to merge the current stack frame with the @@ -577,14 +573,13 @@ # registers are in each frame. We do this quickly using the lookup table # 'critical_fixup_table'. For each byte offset in the critical region, it # provides the number of bytes which have already been popped from the -# interrupted stack frame. +# interrupted stack frame. critical_region_fixup: addl $critical_fixup_table-scrit,%eax movzbl (%eax),%eax # %eax contains num bytes popped cmpb $0xff,%al # 0xff => vcpu_info critical region jne 15f GET_THREAD_INFO(%ebp) - XEN_PUT_VCPU_INFO(%esi) # abort vcpu_info critical region xorl %eax,%eax 15: mov %esp,%esi add %eax,%esi # %esi points at end of src region @@ -602,9 +597,8 @@ jmp 11b critical_fixup_table: - .byte 0xff,0xff,0xff # testb $0xff,(%esi) = XEN_TEST_PENDING + .byte 0xff,0xff,0xff # testb $0xff,(%esi) = __TEST_PENDING .byte 0xff,0xff # jnz 14f - XEN_PUT_VCPU_INFO_fixup .byte 0x00 # pop %ebx .byte 0x04 # pop %ecx .byte 0x08 # pop %edx @@ -617,7 +611,6 @@ .byte 0x24,0x24,0x24 # add $4,%esp .byte 0x28 # iret .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) - XEN_PUT_VCPU_INFO_fixup .byte 0x00,0x00 # jmp 11b # Hypervisor uses this for application faults while it executes. @@ -646,6 +639,7 @@ .long 3b,8b; \ .long 4b,9b; \ .previous +#endif ENTRY(coprocessor_error) pushl $0 @@ -660,7 +654,17 @@ ENTRY(device_not_available) pushl $-1 # mark this as an int SAVE_ALL - #preempt_stop /* This is already an interrupt gate on Xen. */ +#ifndef CONFIG_XEN + movl %cr0, %eax + testl $0x4, %eax # EM (math emulation bit) + je device_available_emulate + pushl $0 # temporary storage for ORIG_EIP + call math_emulate + addl $4, %esp + jmp ret_from_exception +device_available_emulate: +#endif + preempt_stop call math_state_restore jmp ret_from_exception @@ -703,16 +707,7 @@ jmp ret_from_exception .previous .text -ENTRY(nmi) - pushl %eax - SAVE_ALL - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - orl $NMI_MASK, EFLAGS(%esp) - jmp restore_all - -#if 0 /* XEN */ +#ifndef CONFIG_XEN /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -783,7 +778,16 @@ .align 4 .long 1b,iret_exc .previous -#endif /* XEN */ +#else +ENTRY(nmi) + pushl %eax + SAVE_ALL + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi + orl $NMI_MASK, EFLAGS(%esp) + jmp restore_all +#endif KPROBE_ENTRY(int3) pushl $-1 # mark this as an int diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Tue Mar 14 20:50:35 2006 @@ -29,8 +29,6 @@ /* Set up the stack pointer */ movl $(init_thread_union+THREAD_SIZE),%esp - -checkCPUtype: /* get vendor info */ xorl %eax,%eax # call CPUID with 0 -> return vendor ID diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Tue Mar 14 20:50:35 2006 @@ -107,9 +107,9 @@ { local_irq_disable(); - if (need_resched()) { + if (need_resched()) local_irq_enable(); - } else { + else { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); stop_hz_timer(); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Tue Mar 14 20:50:35 2006 @@ -157,18 +157,26 @@ } __setup("independent_wallclock", __independent_wallclock); +/* Permitted clock jitter, in usecs, beyond which a warning will be printed. */ +static unsigned long permitted_clock_jitter = 10000UL; +static int __init __permitted_clock_jitter(char *str) +{ + permitted_clock_jitter = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("permitted_clock_jitter=", __permitted_clock_jitter); + int tsc_disable __devinitdata = 0; static void delay_tsc(unsigned long loops) { unsigned long bclock, now; - + rdtscl(bclock); - do - { + do { rep_nop(); rdtscl(now); - } while ((now-bclock) < loops); + } while ((now - bclock) < loops); } struct timer_opts timer_tsc = { @@ -187,7 +195,7 @@ u32 tmp1, tmp2; #endif - if ( shift < 0 ) + if (shift < 0) delta >>= -shift; else delta <<= shift; @@ -226,7 +234,7 @@ struct vcpu_time_info *info; info = &HYPERVISOR_shared_info->vcpu_info[0].time; do_div(__cpu_khz, info->tsc_to_system_mul); - if ( info->tsc_shift < 0 ) + if (info->tsc_shift < 0) cpu_khz = __cpu_khz << -info->tsc_shift; else cpu_khz = __cpu_khz >> info->tsc_shift; @@ -284,8 +292,7 @@ shadow_tv.tv_sec = s->wc_sec; shadow_tv.tv_nsec = s->wc_nsec; rmb(); - } - while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version)); + } while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version)); if (!independent_wallclock) __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec); @@ -312,8 +319,7 @@ dst->tsc_to_nsec_mul = src->tsc_to_system_mul; dst->tsc_shift = src->tsc_shift; rmb(); - } - while ((src->version & 1) | (dst->version ^ src->version)); + } while ((src->version & 1) | (dst->version ^ src->version)); dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; } @@ -324,8 +330,9 @@ struct shadow_time_info *dst; src = &HYPERVISOR_shared_info->vcpu_info[cpu].time; - dst = &per_cpu(shadow_time, cpu); - + dst = &per_cpu(shadow_time, cpu); + + rmb(); return (dst->version == src->version); } @@ -454,7 +461,7 @@ * overflows. If that were to happen then our shadow time values would * be stale, so we can retry with fresh ones. */ - for ( ; ; ) { + for (;;) { nsec = tv->tv_nsec - get_nsec_offset(shadow); if (time_values_up_to_date(cpu)) break; @@ -552,11 +559,11 @@ do { local_time_version = shadow->version; - smp_rmb(); + barrier(); time = shadow->system_timestamp + get_nsec_offset(shadow); if (!time_values_up_to_date(cpu)) get_time_values_from_xen(); - smp_rmb(); + barrier(); } while (local_time_version != shadow->version); put_cpu(); @@ -614,7 +621,7 @@ get_time_values_from_xen(); /* Obtain a consistent snapshot of elapsed wallclock cycles. */ - delta = delta_cpu = + delta = delta_cpu = shadow->system_timestamp + get_nsec_offset(shadow); delta -= processed_system_time; delta_cpu -= per_cpu(processed_system_time, cpu); @@ -633,13 +640,13 @@ per_cpu(processed_blocked_time, cpu); barrier(); } while (sched_time != runstate->state_entry_time); - } - while (!time_values_up_to_date(cpu)); - - if ((unlikely(delta < -1000000LL) || unlikely(delta_cpu < 0)) + } while (!time_values_up_to_date(cpu)); + + if ((unlikely(delta < -(s64)permitted_clock_jitter) || + unlikely(delta_cpu < -(s64)permitted_clock_jitter)) && printk_ratelimit()) { printk("Timer ISR/%d: Time went backwards: " - "delta=%lld cpu_delta=%lld shadow=%lld " + "delta=%lld delta_cpu=%lld shadow=%lld " "off=%lld processed=%lld cpu_processed=%lld\n", cpu, delta, delta_cpu, shadow->system_timestamp, (s64)get_nsec_offset(shadow), @@ -669,8 +676,10 @@ * HACK: Passing NULL to account_steal_time() * ensures that the ticks are accounted as stolen. */ - if (stolen > 0) { + if ((stolen > 0) && (delta_cpu > 0)) { delta_cpu -= stolen; + if (unlikely(delta_cpu < 0)) + stolen += delta_cpu; /* clamp local-time progress */ do_div(stolen, NS_PER_TICK); per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK; per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK; @@ -682,8 +691,10 @@ * HACK: Passing idle_task to account_steal_time() * ensures that the ticks are accounted as idle/wait. */ - if (blocked > 0) { + if ((blocked > 0) && (delta_cpu > 0)) { delta_cpu -= blocked; + if (unlikely(delta_cpu < 0)) + blocked += delta_cpu; /* clamp local-time progress */ do_div(blocked, NS_PER_TICK); per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK; per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK; @@ -938,7 +949,7 @@ } /* Convert jiffies to system time. */ -static inline u64 jiffies_to_st(unsigned long j) +u64 jiffies_to_st(unsigned long j) { unsigned long seq; long delta; @@ -949,13 +960,14 @@ delta = j - jiffies; /* NB. The next check can trigger in some wrap-around cases, * but that's ok: we'll just end up with a shorter timeout. */ - if (delta < 1) + if (delta < 1) delta = 1; st = processed_system_time + (delta * (u64)NS_PER_TICK); } while (read_seqretry(&xtime_lock, seq)); return st; } +EXPORT_SYMBOL(jiffies_to_st); /* * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu @@ -965,7 +977,7 @@ { unsigned int cpu = smp_processor_id(); unsigned long j; - + /* We must do this /before/ checking rcu_pending(). */ cpu_set(cpu, nohz_cpu_mask); smp_mb(); @@ -1012,7 +1024,7 @@ do { seq = read_seqbegin(&xtime_lock); /* Use cpu0 timestamp: cpu's shadow is not initialised yet. */ - per_cpu(processed_system_time, cpu) = + per_cpu(processed_system_time, cpu) = per_cpu(shadow_time, 0).system_timestamp; init_missing_ticks_accounting(cpu); } while (read_seqretry(&xtime_lock, seq)); @@ -1041,13 +1053,31 @@ * now however. */ static ctl_table xen_subtable[] = { - {1, "independent_wallclock", &independent_wallclock, - sizeof(independent_wallclock), 0644, NULL, proc_dointvec}, - {0} + { + .ctl_name = 1, + .procname = "independent_wallclock", + .data = &independent_wallclock, + .maxlen = sizeof(independent_wallclock), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = 2, + .procname = "permitted_clock_jitter", + .data = &permitted_clock_jitter, + .maxlen = sizeof(permitted_clock_jitter), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, + { 0 } }; static ctl_table xen_table[] = { - {123, "xen", NULL, 0, 0555, xen_subtable}, - {0} + { + .ctl_name = 123, + .procname = "xen", + .mode = 0555, + .child = xen_subtable}, + { 0 } }; static int __init xen_sysctl_init(void) { diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Tue Mar 14 20:50:35 2006 @@ -201,56 +201,6 @@ BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } -void xen_pte_pin(unsigned long ptr) -{ - struct mmuext_op op; - op.cmd = MMUEXT_PIN_L1_TABLE; - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); - BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void xen_pte_unpin(unsigned long ptr) -{ - struct mmuext_op op; - op.cmd = MMUEXT_UNPIN_TABLE; - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); - BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -#ifdef CONFIG_X86_64 -void xen_pud_pin(unsigned long ptr) -{ - struct mmuext_op op; - op.cmd = MMUEXT_PIN_L3_TABLE; - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); - BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void xen_pud_unpin(unsigned long ptr) -{ - struct mmuext_op op; - op.cmd = MMUEXT_UNPIN_TABLE; - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); - BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void xen_pmd_pin(unsigned long ptr) -{ - struct mmuext_op op; - op.cmd = MMUEXT_PIN_L2_TABLE; - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); - BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} - -void xen_pmd_unpin(unsigned long ptr) -{ - struct mmuext_op op; - op.cmd = MMUEXT_UNPIN_TABLE; - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); - BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); -} -#endif /* CONFIG_X86_64 */ - void xen_set_ldt(unsigned long ptr, unsigned long len) { struct mmuext_op op; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/i386/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue Mar 14 20:50:35 2006 @@ -556,10 +556,15 @@ kmap_init(); - /* Switch to the real shared_info page, and clear the dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); + if (!xen_feature(XENFEAT_auto_translated_physmap) || + xen_start_info->shared_info >= xen_start_info->nr_pages) { + /* Switch to the real shared_info page, and clear the + * dummy page. */ + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); + HYPERVISOR_shared_info = + (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); + } /* Setup mapping of lower 1st MB */ for (i = 0; i < NR_FIX_ISAMAPS; i++) diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/x86_64/Kconfig --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Tue Mar 14 20:50:35 2006 @@ -222,7 +222,7 @@ config MTRR bool "MTRR (Memory Type Range Register) support" - depends on !X86_64_XEN + depends on !XEN_UNPRIVILEGED_GUEST ---help--- On Intel P6 family processors (Pentium Pro, Pentium II and later) the Memory Type Range Registers (MTRRs) may be used to control diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c Tue Mar 14 20:50:35 2006 @@ -124,9 +124,9 @@ { local_irq_disable(); - if (need_resched()) { + if (need_resched()) local_irq_enable(); - } else { + else { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); stop_hz_timer(); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue Mar 14 20:50:35 2006 @@ -664,6 +664,13 @@ setup_xen_features(); + if (xen_feature(XENFEAT_auto_translated_physmap) && + xen_start_info->shared_info < xen_start_info->nr_pages) { + HYPERVISOR_shared_info = + (shared_info_t *)__va(xen_start_info->shared_info); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); + } + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Mar 14 20:50:35 2006 @@ -244,7 +244,6 @@ if (pud_none(*pud)) { pmd = (pmd_t *) spp_getpage(); make_page_readonly(pmd, XENFEAT_writable_page_tables); - xen_pmd_pin(__pa(pmd)); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); if (pmd != pmd_offset(pud, 0)) { printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); @@ -255,7 +254,6 @@ if (pmd_none(*pmd)) { pte = (pte_t *) spp_getpage(); make_page_readonly(pte, XENFEAT_writable_page_tables); - xen_pte_pin(__pa(pte)); set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); if (pte != pte_offset_kernel(pmd, 0)) { printk("PAGETABLE BUG #02!\n"); @@ -297,7 +295,6 @@ pmd = (pmd_t *) spp_getpage(); make_page_readonly(pmd, XENFEAT_writable_page_tables); - xen_pmd_pin(__pa(pmd)); set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); @@ -311,7 +308,6 @@ if (pmd_none(*pmd)) { pte = (pte_t *) spp_getpage(); make_page_readonly(pte, XENFEAT_writable_page_tables); - xen_pte_pin(__pa(pte)); set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); if (pte != pte_offset_kernel(pmd, 0)) { @@ -461,7 +457,6 @@ } pte = pte_save; early_make_page_readonly(pte, XENFEAT_writable_page_tables); - xen_pte_pin(pte_phys); set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE)); } } @@ -500,7 +495,6 @@ pmd = alloc_static_page(&pmd_phys); early_make_page_readonly(pmd, XENFEAT_writable_page_tables); - xen_pmd_pin(pmd_phys); spin_lock(&init_mm.page_table_lock); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); phys_pmd_init(pmd, paddr, end); @@ -545,9 +539,6 @@ xen_pgd_pin(__pa_symbol(init_level4_pgt)); xen_pgd_pin(__pa_symbol(init_level4_user_pgt)); - xen_pud_pin(__pa_symbol(level3_kernel_pgt)); - xen_pud_pin(__pa_symbol(level3_user_pgt)); - xen_pmd_pin(__pa_symbol(level2_kernel_pgt)); set_pgd((pgd_t *)(init_level4_user_pgt + 511), mk_kernel_pgd(__pa_symbol(level3_user_pgt))); @@ -581,7 +572,6 @@ pte_page = alloc_static_page(&phys); early_make_page_readonly( pte_page, XENFEAT_writable_page_tables); - xen_pte_pin(phys); set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER)); } else { addr = page[pmd_index(va)]; @@ -662,7 +652,6 @@ pud = alloc_static_page(&pud_phys); early_make_page_readonly(pud, XENFEAT_writable_page_tables); } - xen_pud_pin(pud_phys); next = start + PGDIR_SIZE; if (next > end) next = end; @@ -757,10 +746,16 @@ free_area_init_node(0, NODE_DATA(0), zones, __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - - memset(empty_zero_page, 0, sizeof(empty_zero_page)); + if (!xen_feature(XENFEAT_auto_translated_physmap) || + xen_start_info->shared_info >= xen_start_info->nr_pages) { + /* Switch to the real shared_info page, and clear the + * dummy page. */ + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); + HYPERVISOR_shared_info = + (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); + } + init_mm.context.pinned = 1; /* Setup mapping of lower 1st MB */ @@ -937,7 +932,6 @@ ClearPageReserved(virt_to_page(addr)); set_page_count(virt_to_page(addr), 1); memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); - xen_pte_unpin(__pa(addr)); make_page_writable( __va(__pa(addr)), XENFEAT_writable_page_tables); /* diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/Kconfig --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig Tue Mar 14 20:50:35 2006 @@ -30,12 +30,14 @@ default !XEN_PRIVILEGED_GUEST config XEN_PCIDEV_BACKEND - bool "PCI device backend driver" - select PCI - default y if XEN_PRIVILEGED_GUEST + tristate "PCI device backend driver" + depends PCI + default XEN_PRIVILEGED_GUEST help The PCI device backend driver allows the kernel to export arbitrary - PCI devices to other guests. + PCI devices to other guests. If you select this to be a module, you + will need to make sure no other driver has bound to the device(s) + you want to make visible to other guests. choice prompt "PCI Backend Mode" diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Tue Mar 14 20:50:35 2006 @@ -468,6 +468,7 @@ return -1; current_pages = min(xen_start_info->nr_pages, max_pfn); + totalram_pages = current_pages; target_pages = current_pages; balloon_low = 0; balloon_high = 0; @@ -547,6 +548,7 @@ &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL); BUG_ON(ret); current_pages -= 1UL << order; + totalram_pages = current_pages; balloon_unlock(flags); schedule_work(&balloon_worker); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Tue Mar 14 20:50:35 2006 @@ -95,20 +95,16 @@ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); return -ENOMEM; } + + memset(info, 0, sizeof(*info)); info->xbdev = dev; info->vdevice = vdevice; info->connected = BLKIF_STATE_DISCONNECTED; - info->mi = NULL; - info->gd = NULL; INIT_WORK(&info->work, blkif_restart_queue, (void *)info); - info->shadow_free = 0; - memset(info->shadow, 0, sizeof(info->shadow)); for (i = 0; i < BLK_RING_SIZE; i++) info->shadow[i].req.id = i+1; info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - - info->users = 0; /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); @@ -182,9 +178,8 @@ } err = xenbus_switch_state(dev, xbt, XenbusStateInitialised); - if (err) { + if (err) goto abort_transaction; - } err = xenbus_transaction_end(xbt, 0); if (err) { @@ -295,17 +290,17 @@ /* ** Connection ** */ -/* -** Invoked when the backend is finally 'ready' (and has told produced -** the details about the physical device - #sectors, size, etc). -*/ +/* + * Invoked when the backend is finally 'ready' (and has told produced + * the details about the physical device - #sectors, size, etc). + */ static void connect(struct blkfront_info *info) { unsigned long sectors, sector_size; unsigned int binfo; int err; - if( (info->connected == BLKIF_STATE_CONNECTED) || + if ((info->connected == BLKIF_STATE_CONNECTED) || (info->connected == BLKIF_STATE_SUSPENDED) ) return; @@ -330,7 +325,7 @@ return; } - (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); + (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); /* Kick pending requests. */ spin_lock_irq(&blkif_io_lock); @@ -353,11 +348,7 @@ DPRINTK("blkfront_closing: %s removed\n", dev->nodename); - if (info->mi) { - DPRINTK("Calling xlvbd_del\n"); - xlvbd_del(info); - info->mi = NULL; - } + xlvbd_del(info); xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed); } @@ -463,8 +454,7 @@ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", command, (long)argument, inode->i_rdev); - switch ( command ) - { + switch (command) { case HDIO_GETGEO: /* return ENOSYS to use defaults */ return -ENOSYS; @@ -490,7 +480,7 @@ * blkif_queue_request * * request block io - * + * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} * buffer: buffer to read/write into. this should be a @@ -557,7 +547,7 @@ ring_req->seg[ring_req->nr_segments] = (struct blkif_request_segment) { .gref = ref, - .first_sect = fsect, + .first_sect = fsect, .last_sect = lsect }; ring_req->nr_segments++; @@ -679,9 +669,8 @@ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); if (more_to_do) goto again; - } else { + } else info->ring.sring->rsp_event = i + 1; - } kick_pending_request_queues(info); @@ -694,8 +683,8 @@ { /* Prevent new requests being issued until we fix things up. */ spin_lock_irq(&blkif_io_lock); - info->connected = suspend ? - BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; + info->connected = suspend ? + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; spin_unlock_irq(&blkif_io_lock); /* Free resources associated with old device channel. */ @@ -706,7 +695,7 @@ info->ring.sring = NULL; } if (info->irq) - unbind_from_irqhandler(info->irq, info); + unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; } @@ -767,11 +756,11 @@ kfree(copy); - (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); - + (void)xenbus_switch_state(info->xbdev, XBT_NULL, XenbusStateConnected); + /* Now safe for us to use the shared ring */ spin_lock_irq(&blkif_io_lock); - info->connected = BLKIF_STATE_CONNECTED; + info->connected = BLKIF_STATE_CONNECTED; spin_unlock_irq(&blkif_io_lock); /* Send off requeued requests */ diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/blkfront/block.h --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Tue Mar 14 20:50:35 2006 @@ -55,24 +55,24 @@ #include <asm/atomic.h> #include <asm/uaccess.h> -#if 1 +#if 1 #define IPRINTK(fmt, args...) \ printk(KERN_INFO "xen_blk: " fmt, ##args) #else #define IPRINTK(fmt, args...) ((void)0) #endif -#if 1 +#if 1 #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "xen_blk: " fmt, ##args) #else #define WPRINTK(fmt, args...) ((void)0) #endif - -#define DPRINTK(_f, _a...) pr_debug ( _f , ## _a ) + +#define DPRINTK(_f, _a...) pr_debug(_f, ## _a) #if 0 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) +#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a) #else #define DPRINTK_IOCTL(_f, _a...) ((void)0) #endif @@ -139,7 +139,7 @@ unsigned command, unsigned long argument); extern int blkif_check(dev_t dev); extern int blkif_revalidate(dev_t dev); -extern void do_blkif_request (request_queue_t *rq); +extern void do_blkif_request (request_queue_t *rq); /* Virtual block-device subsystem. */ /* Note that xlvbd_add doesn't call add_disk for you: you're expected diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Tue Mar 14 20:50:35 2006 @@ -216,6 +216,10 @@ int nr_minors = 1; int err = -ENODEV; + BUG_ON(info->gd != NULL); + BUG_ON(info->mi != NULL); + BUG_ON(info->rq != NULL); + mi = xlbd_get_major_info(vdevice); if (mi == NULL) goto out; @@ -268,6 +272,7 @@ out: if (mi) xlbd_put_major_info(mi); + info->mi = NULL; return err; } @@ -294,22 +299,20 @@ void xlvbd_del(struct blkfront_info *info) { - struct block_device *bd; - - bd = bdget(info->dev); - if (bd == NULL) + if (info->mi == NULL) return; - if (info->gd == NULL) - return; - + BUG_ON(info->gd == NULL); del_gendisk(info->gd); put_disk(info->gd); + info->gd = NULL; + xlbd_put_major_info(info->mi); info->mi = NULL; + + BUG_ON(info->rq == NULL); blk_cleanup_queue(info->rq); - - bdput(bd); + info->rq = NULL; } /* diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Tue Mar 14 20:50:35 2006 @@ -85,8 +85,7 @@ else if (!strncmp(str, "off", 3)) xc_mode = XC_OFF; - switch ( xc_mode ) - { + switch (xc_mode) { case XC_SERIAL: n = simple_strtol(str+4, &q, 10); if (q > (str + 4)) @@ -227,7 +226,7 @@ va_list args; int printk_len; static char printk_buf[1024]; - + /* Emit the output into the temporary buffer */ va_start(args, fmt); printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args); @@ -485,7 +484,7 @@ spin_lock_irqsave(&xencons_lock, flags); __xencons_tx_flush(); - spin_unlock_irqrestore(&xencons_lock, flags); + spin_unlock_irqrestore(&xencons_lock, flags); } static void xencons_wait_until_sent(struct tty_struct *tty, int timeout) @@ -495,17 +494,15 @@ if (TTY_INDEX(tty) != 0) return; - while (DRV(tty->driver)->chars_in_buffer(tty)) - { + while (DRV(tty->driver)->chars_in_buffer(tty)) { set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); if (signal_pending(current)) break; - if ( (timeout != 0) && - time_after(jiffies, orig_jiffies + timeout) ) + if (timeout && time_after(jiffies, orig_jiffies + timeout)) break; } - + set_current_state(TASK_RUNNING); } @@ -521,7 +518,7 @@ if (xencons_tty == NULL) xencons_tty = tty; __xencons_tx_flush(); - spin_unlock_irqrestore(&xencons_lock, flags); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -543,7 +540,7 @@ tty->closing = 0; spin_lock_irqsave(&xencons_lock, flags); xencons_tty = NULL; - spin_unlock_irqrestore(&xencons_lock, flags); + spin_unlock_irqrestore(&xencons_lock, flags); } } @@ -574,7 +571,7 @@ xencons_ring_init(); - xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? + xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES); if (xencons_driver == NULL) return -ENOMEM; @@ -584,15 +581,14 @@ DRV(xencons_driver)->type = TTY_DRIVER_TYPE_SERIAL; DRV(xencons_driver)->subtype = SERIAL_TYPE_NORMAL; DRV(xencons_driver)->init_termios = tty_std_termios; - DRV(xencons_driver)->flags = + DRV(xencons_driver)->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_NO_DEVFS; DRV(xencons_driver)->termios = xencons_termios; DRV(xencons_driver)->termios_locked = xencons_termios_locked; - if (xc_mode == XC_SERIAL) - { + if (xc_mode == XC_SERIAL) { DRV(xencons_driver)->name = "ttyS"; DRV(xencons_driver)->minor_start = 64 + xc_num; DRV(xencons_driver)->name_base = 0 + xc_num; @@ -630,7 +626,7 @@ printk("Xen virtual console successfully installed as %s%d\n", DRV(xencons_driver)->name, DRV(xencons_driver)->name_base ); - + return 0; } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c --- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c Tue Mar 14 20:50:35 2006 @@ -55,7 +55,7 @@ notify_daemon(); return sent; -} +} static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs) { diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/core/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c Tue Mar 14 20:50:35 2006 @@ -72,7 +72,7 @@ /* IRQ <-> IPI mapping. */ #ifndef NR_IPIS -#define NR_IPIS 1 +#define NR_IPIS 1 #endif DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]); @@ -209,7 +209,7 @@ irq_bindcount[irq]++; spin_unlock(&irq_mapping_update_lock); - + return irq; } @@ -238,7 +238,7 @@ irq_bindcount[irq]++; spin_unlock(&irq_mapping_update_lock); - + return irq; } @@ -535,9 +535,9 @@ /* NB. We are happy to share unless we are probing. */ op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; if (HYPERVISOR_event_channel_op(&op) != 0) { - if ( !probing_irq(irq) ) - printk(KERN_INFO "Failed to obtain physical " - "IRQ %d\n", irq); + if (!probing_irq(irq)) + printk(KERN_INFO "Failed to obtain physical IRQ %d\n", + irq); return 0; } evtchn = op.u.bind_pirq.port; @@ -669,7 +669,7 @@ * like a real IO-APIC we 'lose the interrupt edge' if the channel is * masked. */ - if (synch_test_bit(port, &s->evtchn_pending[0]) && + if (synch_test_bit(port, &s->evtchn_pending[0]) && !synch_test_and_set_bit(port / BITS_PER_LONG, &vcpu_info->evtchn_pending_sel)) { vcpu_info->evtchn_upcall_pending = 1; @@ -722,7 +722,7 @@ op.u.bind_virq.vcpu = 0; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); evtchn = op.u.bind_virq.port; - + /* Record the new mapping. */ evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); @@ -744,7 +744,7 @@ op.u.bind_ipi.vcpu = 0; BUG_ON(HYPERVISOR_event_channel_op(&op) != 0); evtchn = op.u.bind_ipi.port; - + /* Record the new mapping. */ evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); @@ -794,8 +794,7 @@ } /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */ - for (i = 0; i < NR_PIRQS; i++) - { + for (i = 0; i < NR_PIRQS; i++) { irq_bindcount[pirq_to_irq(i)] = 1; #ifdef RTC_IRQ diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/core/features.c --- a/linux-2.6-xen-sparse/drivers/xen/core/features.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/features.c Tue Mar 14 20:50:35 2006 @@ -19,7 +19,7 @@ xen_feature_info_t fi; int i, j; - for (i=0; i<XENFEAT_NR_SUBMAPS; i++) { + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { fi.submap_idx = i; if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) break; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/core/gnttab.c --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Tue Mar 14 20:50:35 2006 @@ -31,6 +31,8 @@ #include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> #include <asm/pgtable.h> #include <xen/interface/xen.h> #include <asm/fixmap.h> @@ -40,7 +42,7 @@ #if 1 #define ASSERT(_p) \ - if ( !(_p) ) { printk(KERN_ALERT"Assertion '%s': line %d, file %s\n", \ + if (!(_p)) { printk(KERN_ALERT"Assertion '%s': line %d, file %s\n", \ #_p , __LINE__, __FILE__); *(int*)0=0; } #else #define ASSERT(_p) ((void)0) @@ -77,7 +79,7 @@ static grant_ref_t gnttab_free_head; static spinlock_t gnttab_list_lock = SPIN_LOCK_UNLOCKED; -static grant_entry_t *shared; +static grant_entry_t *shared = NULL; static struct gnttab_free_callback *gnttab_free_callback_list = NULL; @@ -152,7 +154,7 @@ gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly) { int ref; - + if (unlikely((ref = get_free_entry()) == -1)) return -ENOSPC; @@ -192,13 +194,12 @@ nflags = shared[ref].flags; do { - if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { + if ((flags = nflags) & (GTF_reading|GTF_writing)) { printk(KERN_ALERT "WARNING: g.e. still in use!\n"); return 0; } - } - while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != - flags); + } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != + flags); return 1; } @@ -211,8 +212,7 @@ if (page != 0) { free_page(page); } - } - else { + } else { /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ printk(KERN_WARNING @@ -253,7 +253,7 @@ * reference and return failure (== 0). */ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags ) + if (synch_cmpxchg(&shared[ref].flags, flags, 0) == flags) return 0; cpu_relax(); } @@ -356,12 +356,35 @@ spin_unlock_irqrestore(&gnttab_list_lock, flags); } +#ifndef __ia64__ +static int map_pte_fn(pte_t *pte, struct page *pte_page, + unsigned long addr, void *data) +{ + unsigned long **frames = (unsigned long **)data; + + set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); + (*frames)++; + return 0; +} + +static int unmap_pte_fn(pte_t *pte, struct page *pte_page, + unsigned long addr, void *data) +{ + + set_pte_at(&init_mm, addr, pte, __pte(0)); + return 0; +} +#endif + int gnttab_resume(void) { gnttab_setup_table_t setup; - unsigned long frames[NR_GRANT_FRAMES]; - int i; + unsigned long frames[NR_GRANT_FRAMES]; +#ifndef __ia64__ + void *pframes = frames; + struct vm_struct *area; +#endif setup.dom = DOMID_SELF; setup.nr_frames = NR_GRANT_FRAMES; @@ -370,12 +393,18 @@ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1)); BUG_ON(setup.status != 0); -#ifdef __ia64__ +#ifndef __ia64__ + if (shared == NULL) { + area = get_vm_area(PAGE_SIZE * NR_GRANT_FRAMES, VM_IOREMAP); + BUG_ON(area == NULL); + shared = area->addr; + } + BUG_ON(generic_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * NR_GRANT_FRAMES, + map_pte_fn, &pframes)); +#else shared = __va(frames[0] << PAGE_SHIFT); printk("grant table at %p\n", shared); -#else - for (i = 0; i < NR_GRANT_FRAMES; i++) - set_fixmap(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT); #endif return 0; @@ -384,10 +413,12 @@ int gnttab_suspend(void) { - int i; - - for (i = 0; i < NR_GRANT_FRAMES; i++) - clear_fixmap(FIX_GNTTAB_END - i); + +#ifndef __ia64__ + generic_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * NR_GRANT_FRAMES, + unmap_pte_fn, NULL); +#endif return 0; } @@ -401,10 +432,6 @@ return -ENODEV; BUG_ON(gnttab_resume()); - -#ifndef __ia64__ - shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END); -#endif for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) gnttab_list[i] = i + 1; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue Mar 14 20:50:35 2006 @@ -29,10 +29,11 @@ #define SHUTDOWN_POWEROFF 0 #define SHUTDOWN_REBOOT 1 #define SHUTDOWN_SUSPEND 2 -// Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only -// report a crash, not be instructed to crash! -// HALT is the same as POWEROFF, as far as we're concerned. The tools use -// the distinction when we return the reason code to them. +/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + * report a crash, not be instructed to crash! + * HALT is the same as POWEROFF, as far as we're concerned. The tools use + * the distinction when we return the reason code to them. + */ #define SHUTDOWN_HALT 4 void machine_emergency_restart(void) @@ -84,13 +85,13 @@ { int i, j, k, fpp; - extern int gnttab_suspend(void); - extern int gnttab_resume(void); - - extern void time_resume(void); extern unsigned long max_pfn; extern unsigned long *pfn_to_mfn_frame_list_list; extern unsigned long *pfn_to_mfn_frame_list[]; + + extern int gnttab_suspend(void); + extern int gnttab_resume(void); + extern void time_resume(void); #ifdef CONFIG_SMP cpumask_t prev_online_cpus; @@ -167,26 +168,26 @@ */ HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); - shutting_down = SHUTDOWN_INVALID; + shutting_down = SHUTDOWN_INVALID; set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); memset(empty_zero_page, 0, PAGE_SIZE); - + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = virt_to_mfn(pfn_to_mfn_frame_list_list); - + fpp = PAGE_SIZE/sizeof(unsigned long); for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) { if ((j % fpp) == 0) { k++; - pfn_to_mfn_frame_list_list[k] = + pfn_to_mfn_frame_list_list[k] = virt_to_mfn(pfn_to_mfn_frame_list[k]); j = 0; } - pfn_to_mfn_frame_list[k][j] = + pfn_to_mfn_frame_list[k][j] = virt_to_mfn(&phys_to_machine_mapping[i]); } HYPERVISOR_shared_info->arch.max_pfn = max_pfn; @@ -207,7 +208,7 @@ #endif - /* + /* * Only resume xenbus /after/ we've prepared our VCPUs; otherwise * the VCPU hotplug callback can race with our vcpu_prepare */ @@ -231,7 +232,7 @@ static int shutdown_process(void *__unused) { - static char *envp[] = { "HOME=/", "TERM=linux", + static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; static char *restart_argv[] = { "/sbin/reboot", NULL }; static char *poweroff_argv[] = { "/sbin/poweroff", NULL }; @@ -291,7 +292,7 @@ else err = kthread_create_on_cpu(__do_suspend, NULL, "suspend", 0); - if ( err < 0 ) { + if (err < 0) { printk(KERN_WARNING "Error creating shutdown process (%d): " "retrying...\n", -err); schedule_delayed_work(&shutdown_work, HZ/2); @@ -406,14 +407,12 @@ err2 = register_xenbus_watch(&sysrq_watch); #endif - if (err1) { + if (err1) printk(KERN_ERR "Failed to set shutdown watcher\n"); - } - + #ifdef CONFIG_MAGIC_SYSRQ - if (err2) { + if (err2) printk(KERN_ERR "Failed to set sysrq watcher\n"); - } #endif return NOTIFY_DONE; @@ -421,11 +420,11 @@ static int __init setup_shutdown_event(void) { - + xenstore_notifier.notifier_call = setup_shutdown_watcher; register_xenstore_notifier(&xenstore_notifier); - + return 0; } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/net_driver_util.c --- a/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/net_driver_util.c Tue Mar 14 20:50:35 2006 @@ -40,9 +40,8 @@ int i; char *e; char *macstr = xenbus_read(XBT_NULL, dev->nodename, "mac", NULL); - if (IS_ERR(macstr)) { + if (IS_ERR(macstr)) return PTR_ERR(macstr); - } s = macstr; for (i = 0; i < ETH_ALEN; i++) { mac[i] = simple_strtoul(s, &e, 16); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Mar 14 20:50:35 2006 @@ -91,7 +91,7 @@ struct net_device_stats stats; unsigned int tx_full; - + netif_tx_front_ring_t tx; netif_rx_front_ring_t rx; @@ -129,9 +129,9 @@ struct sk_buff *rx_skbs[NET_RX_RING_SIZE+1]; grant_ref_t gref_tx_head; - grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; grant_ref_t gref_rx_head; - grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; + grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; struct xenbus_device *xbdev; int tx_ring_ref; @@ -433,7 +433,7 @@ skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dst_ip, dev, src_ip, - /*dst_hw*/ NULL, /*src_hw*/ NULL, + /*dst_hw*/ NULL, /*src_hw*/ NULL, /*target_hw*/ dev->dev_addr); if (skb == NULL) return -ENOMEM; @@ -480,7 +480,7 @@ printk(KERN_ALERT "network_tx_buf_gc: warning " "-- grant still in use by backend " "domain.\n"); - goto out; + goto out; } gnttab_end_foreign_access_ref( np->grant_tx_ref[id], GNTMAP_readonly); @@ -490,9 +490,9 @@ ADD_ID_TO_FREELIST(np->tx_skbs, id); dev_kfree_skb_irq(skb); } - + np->tx.rsp_cons = prod; - + /* * Set a new event, then check for race with update of tx_cons. * Note that it is essential to schedule a callback, no matter @@ -506,7 +506,7 @@ mb(); } while (prod != np->tx.sring->rsp_prod); - out: + out: if (np->tx_full && ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { np->tx_full = 0; @@ -582,7 +582,7 @@ id = GET_ID_FROM_FREELIST(np->rx_skbs); np->rx_skbs[id] = skb; - + RING_GET_REQUEST(&np->rx, req_prod + i)->id = id; ref = gnttab_claim_grant_reference(&np->gref_rx_head); BUG_ON((signed short)ref < 0); @@ -628,11 +628,10 @@ /* Check return status of HYPERVISOR_memory_op(). */ if (unlikely(rx_mcl[i].result != i)) panic("Unable to reduce memory reservation\n"); - } else { + } else if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != i) panic("Unable to reduce memory reservation\n"); - } /* Above is a suitable barrier to ensure backend will see requests. */ np->rx.req_prod_pvt = req_prod + i; @@ -668,7 +667,7 @@ dev_kfree_skb(skb); skb = nskb; } - + spin_lock_irq(&np->tx_lock); if (np->backend_state != BEST_CONNECTED) { @@ -765,7 +764,7 @@ rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ - for (i = np->rx.rsp_cons, work_done = 0; + for (i = np->rx.rsp_cons, work_done = 0; (i != rp) && (work_done < budget); i++, work_done++) { rx = RING_GET_RESPONSE(&np->rx, i); @@ -807,7 +806,7 @@ skb->len = rx->status; skb->tail = skb->data + skb->len; - if ( rx->flags & NETRXF_csum_valid ) + if (rx->flags & NETRXF_csum_valid) skb->ip_summed = CHECKSUM_UNNECESSARY; np->stats.rx_packets++; @@ -862,7 +861,7 @@ * Enough room in skbuff for the data we were passed? Also, * Linux expects at least 16 bytes headroom in each rx buffer. */ - if (unlikely(skb->tail > skb->end) || + if (unlikely(skb->tail > skb->end) || unlikely((skb->data - skb->head) < 16)) { if (net_ratelimit()) { if (skb->tail > skb->end) @@ -894,7 +893,7 @@ if ((skb = nskb) == NULL) continue; } - + /* Set the shinfo area, which is hidden behind the data. */ init_skb_shinfo(skb); /* Ethernet work: Delayed to here as it peeks the header. */ @@ -995,9 +994,9 @@ tx->id = i; gnttab_grant_foreign_access_ref( - np->grant_tx_ref[i], np->xbdev->otherend_id, + np->grant_tx_ref[i], np->xbdev->otherend_id, virt_to_mfn(np->tx_skbs[i]->data), - GNTMAP_readonly); + GNTMAP_readonly); tx->gref = np->grant_tx_ref[i]; tx->offset = (unsigned long)skb->data & ~PAGE_MASK; tx->size = skb->len; @@ -1012,7 +1011,7 @@ RING_PUSH_REQUESTS(&np->tx); /* Rebuild the RX buffer freelist and the RX ring itself. */ - for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) { + for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) { if ((unsigned long)np->rx_skbs[i] < __PAGE_OFFSET) continue; gnttab_grant_foreign_transfer_ref( @@ -1021,7 +1020,7 @@ RING_GET_REQUEST(&np->rx, requeue_idx)->gref = np->grant_rx_ref[i]; RING_GET_REQUEST(&np->rx, requeue_idx)->id = i; - requeue_idx++; + requeue_idx++; } np->rx.req_prod_pvt = requeue_idx; @@ -1055,9 +1054,8 @@ np->evtchn, np->tx, np->rx); - } else { + } else IPRINTK("<vif NULL>\n"); - } #endif } @@ -1150,7 +1148,7 @@ SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &dev->dev); - + if ((err = register_netdev(netdev)) != 0) { printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err); @@ -1181,16 +1179,16 @@ * We use this notifier to send out a fake ARP reply to reset switches and * router ARP caches when an IP interface is brought up on a VIF. */ -static int +static int inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net_device *dev = ifa->ifa_dev->dev; /* UP event and is it one of our devices? */ if (event == NETDEV_UP && dev->open == network_open) (void)send_fake_arp(dev); - + return NOTIFY_DONE; } @@ -1336,8 +1334,8 @@ module_exit(netif_exit); MODULE_LICENSE("Dual BSD/GPL"); - - + + /* ** /proc **/ @@ -1354,9 +1352,8 @@ (struct net_device *)((unsigned long)data & ~3UL); struct netfront_info *np = netdev_priv(dev); int len = 0, which_target = (long)data & 3; - - switch (which_target) - { + + switch (which_target) { case TARGET_MIN: len = sprintf(page, "%d\n", np->rx_min_target); break; @@ -1403,8 +1400,7 @@ spin_lock(&np->rx_lock); - switch (which_target) - { + switch (which_target) { case TARGET_MIN: if (target > np->rx_max_target) np->rx_max_target = target; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pciback/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/pciback/Makefile Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/Makefile Tue Mar 14 20:50:35 2006 @@ -1,9 +1,9 @@ -obj-y += pciback.o +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o pciback-y := pci_stub.o pciback_ops.o xenbus.o pciback-y += conf_space.o conf_space_header.o -pciback-${CONFIG_XEN_PCIDEV_BACKEND_VPCI} += vpci.o -pciback-${CONFIG_XEN_PCIDEV_BACKEND_PASS} += passthrough.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o +pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pciback/conf_space.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/conf_space.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/conf_space.c Tue Mar 14 20:50:35 2006 @@ -13,6 +13,9 @@ #include <linux/pci.h> #include "pciback.h" #include "conf_space.h" + +static int permissive = 0; +module_param(permissive, bool, 0644); #define DEFINE_PCI_CONFIG(op,size,type) \ int pciback_##op##_config_##size \ @@ -198,7 +201,7 @@ int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value) { - int err = 0; + int err = 0, handled = 0; struct pciback_dev_data *dev_data = pci_get_drvdata(dev); struct config_field_entry *cfg_entry; struct config_field *field; @@ -233,6 +236,21 @@ field_start - req_start); err = conf_space_write(dev, cfg_entry, offset, tmp_val); + handled = 1; + } + } + + if (!handled && !err && permissive) { + switch (size) { + case 1: + err = pci_write_config_byte(dev, offset, (u8)value); + break; + case 2: + err = pci_write_config_word(dev, offset, (u16)value); + break; + case 4: + err = pci_write_config_dword(dev, offset, (u32)value); + break; } } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pciback/conf_space_header.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/conf_space_header.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/conf_space_header.c Tue Mar 14 20:50:35 2006 @@ -24,21 +24,19 @@ if (unlikely(verbose_request)) printk(KERN_DEBUG "pciback: %s: enable\n", pci_name(dev)); - dev->is_enabled = 1; - pcibios_enable_device(dev, (1 << PCI_NUM_RESOURCES) - 1); + pci_enable_device(dev); } else if (dev->is_enabled && !is_enable_cmd(value)) { if (unlikely(verbose_request)) printk(KERN_DEBUG "pciback: %s: disable\n", pci_name(dev)); - pciback_disable_device(dev); + pci_disable_device(dev); } if (!dev->is_busmaster && is_master_cmd(value)) { if (unlikely(verbose_request)) printk(KERN_DEBUG "pciback: %s: set bus master\n", pci_name(dev)); - dev->is_busmaster = 1; - pcibios_set_master(dev); + pci_set_master(dev); } if (value & PCI_COMMAND_INVALIDATE) { diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pciback/pci_stub.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/pci_stub.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/pci_stub.c Tue Mar 14 20:50:35 2006 @@ -207,8 +207,6 @@ return 0; } - -device_initcall(pcistub_init_devices_late); static int __devinit pcistub_seize(struct pci_dev *dev) { @@ -367,6 +365,7 @@ return -EINVAL; } +#ifndef MODULE /* * fs_initcall happens before device_initcall * so pciback *should* get called first (b/c we @@ -375,3 +374,34 @@ * driver to register) */ fs_initcall(pcistub_init); +#endif + +static int __init pciback_init(void) +{ +#ifdef MODULE + int err; + + err = pcistub_init(); + if (err < 0) + return err; +#endif + + if (list_empty(&pci_stub_device_ids)) + return -ENODEV; + pcistub_init_devices_late(); + pciback_xenbus_register(); + + __unsafe(THIS_MODULE); + + return 0; +} + +static void pciback_cleanup(void) +{ + BUG(); +} + +module_init(pciback_init); +module_exit(pciback_cleanup); + +MODULE_LICENSE("Dual BSD/GPL"); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pciback/pciback.h --- a/linux-2.6-xen-sparse/drivers/xen/pciback/pciback.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/pciback.h Tue Mar 14 20:50:35 2006 @@ -43,7 +43,6 @@ void pcistub_put_pci_dev(struct pci_dev *dev); /* Ensure a device is turned off or reset */ -void pciback_disable_device(struct pci_dev *dev); void pciback_reset_device(struct pci_dev *pdev); /* Access a virtual configuration space for a PCI device */ @@ -69,5 +68,7 @@ /* Handles events from front-end */ irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs); +int pciback_xenbus_register(void); + extern int verbose_request; #endif diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pciback/pciback_ops.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/pciback_ops.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/pciback_ops.c Tue Mar 14 20:50:35 2006 @@ -5,21 +5,11 @@ */ #include <linux/module.h> #include <asm/bitops.h> +#include <xen/evtchn.h> #include "pciback.h" int verbose_request = 0; module_param(verbose_request, int, 0644); - -/* For those architectures without a pcibios_disable_device */ -void __attribute__ ((weak)) pcibios_disable_device(struct pci_dev *dev) { } - -void pciback_disable_device(struct pci_dev *dev) -{ - if (dev->is_enabled) { - dev->is_enabled = 0; - pcibios_disable_device(dev); - } -} /* Ensure a device is "turned off" and ready to be exported. * This also sets up the device's private data to keep track of what should @@ -32,7 +22,7 @@ /* Disable devices (but not bridges) */ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { - pciback_disable_device(dev); + pci_disable_device(dev); pci_write_config_word(dev, PCI_COMMAND, 0); @@ -78,6 +68,7 @@ wmb(); clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_irq(pdev->evtchn_irq); out: return IRQ_HANDLED; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c Tue Mar 14 20:50:35 2006 @@ -430,10 +430,7 @@ .otherend_changed = pciback_frontend_changed, }; -static __init int pciback_xenbus_register(void) +int __init pciback_xenbus_register(void) { return xenbus_register_backend(&xenbus_pciback_driver); } - -/* Must only initialize our xenbus driver after the pcistub driver */ -device_initcall(pciback_xenbus_register); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pcifront/pci.c --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci.c Tue Mar 14 20:50:35 2006 @@ -18,8 +18,10 @@ spin_lock(&pcifront_dev_lock); - if (!pcifront_dev) + if (!pcifront_dev) { dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); + pcifront_dev = pdev; + } else { dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); err = -EEXIST; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c Tue Mar 14 20:50:35 2006 @@ -40,9 +40,8 @@ { int err = 0; struct xen_pci_op *active_op = &pdev->sh_info->op; - unsigned long irq_flags; - - unsigned int volatile ttl = (1U << 29); + unsigned long irq_flags, poll_end; + evtchn_port_t port = pdev->evtchn; spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); @@ -51,14 +50,17 @@ /* Go */ wmb(); set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); - notify_remote_via_evtchn(pdev->evtchn); - - /* IRQs are disabled for the pci config. space reads/writes, - * which means no event channel to notify us that the backend - * is done so spin while waiting for the answer */ - while (test_bit - (_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)) { - if (!ttl) { + notify_remote_via_evtchn(port); + + poll_end = jiffies + 5*HZ; + clear_evtchn(port); + + while (test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags)) { + if (HYPERVISOR_poll(&port, 1, poll_end)) + BUG(); + clear_evtchn(port); + if (time_after(jiffies, poll_end)) { dev_err(&pdev->xdev->dev, "pciback not responding!!!\n"); clear_bit(_XEN_PCIF_active, @@ -66,7 +68,6 @@ err = XEN_PCI_ERR_dev_not_found; goto out; } - ttl--; } memcpy(op, active_op, sizeof(struct xen_pci_op)); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c Tue Mar 14 20:50:35 2006 @@ -310,7 +310,8 @@ goto abort_transaction; } - err = xenbus_switch_state(dev, xbt, XenbusStateInitialised); + err = xenbus_printf(xbt, dev->nodename, + "state", "%d", XenbusStateInitialised); if (err) { goto abort_transaction; } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile Tue Mar 14 20:50:35 2006 @@ -1,8 +1,8 @@ obj-y += xenbus.o xenbus-objs = -xenbus-objs += xenbus_client.o +xenbus-objs += xenbus_client.o xenbus-objs += xenbus_comms.o xenbus-objs += xenbus_xs.o -xenbus-objs += xenbus_probe.o -xenbus-objs += xenbus_dev.o +xenbus-objs += xenbus_probe.o +xenbus-objs += xenbus_dev.o diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c Tue Mar 14 20:50:35 2006 @@ -39,7 +39,7 @@ pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) int xenbus_watch_path(struct xenbus_device *dev, const char *path, - struct xenbus_watch *watch, + struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)) { @@ -62,7 +62,7 @@ int xenbus_watch_path2(struct xenbus_device *dev, const char *path, - const char *path2, struct xenbus_watch *watch, + const char *path2, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)) { @@ -74,9 +74,8 @@ } err = xenbus_watch_path(dev, state, watch, callback); - if (err) { + if (err) kfree(state); - } return err; } EXPORT_SYMBOL(xenbus_watch_path2); @@ -190,7 +189,7 @@ va_start(ap, fmt); _dev_error(dev, err, fmt, ap); va_end(ap); - + xenbus_switch_state(dev, XBT_NULL, XenbusStateClosing); } EXPORT_SYMBOL(xenbus_dev_fatal); @@ -213,7 +212,6 @@ .u.alloc_unbound.dom = DOMID_SELF, .u.alloc_unbound.remote_dom = dev->otherend_id }; - int err = HYPERVISOR_event_channel_op(&op); if (err) xenbus_dev_fatal(dev, err, "allocating event channel"); @@ -231,7 +229,6 @@ .u.bind_interdomain.remote_dom = dev->otherend_id, .u.bind_interdomain.remote_port = remote_port, }; - int err = HYPERVISOR_event_channel_op(&op); if (err) xenbus_dev_fatal(dev, err, @@ -388,7 +385,6 @@ XenbusState xenbus_read_driver_state(const char *path) { XenbusState result; - int err = xenbus_gather(XBT_NULL, path, "state", "%d", &result, NULL); if (err) result = XenbusStateClosed; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Tue Mar 14 20:50:35 2006 @@ -38,8 +38,8 @@ static int xenbus_irq; -extern void xenbus_probe(void *); -extern int xenstored_ready; +extern void xenbus_probe(void *); +extern int xenstored_ready; static DECLARE_WORK(probe_work, xenbus_probe, NULL); DECLARE_WAIT_QUEUE_HEAD(xb_waitq); @@ -52,9 +52,9 @@ static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) { if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); - } + xenstored_ready = 1; + schedule_work(&probe_work); + } wake_up(&xb_waitq); return IRQ_HANDLED; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Mar 14 20:50:35 2006 @@ -366,7 +366,6 @@ xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); xenbus_switch_state(dev, XBT_NULL, XenbusStateClosed); return -ENODEV; - } static int xenbus_dev_remove(struct device *_dev) @@ -495,9 +494,8 @@ static void xenbus_dev_release(struct device *dev) { - if (dev) { + if (dev) xenbus_dev_free(to_xenbus_device(dev)); - } } /* Simplified asprintf. */ @@ -600,7 +598,7 @@ nodename = kasprintf("%s/%s/%s", xenbus_frontend.root, type, name); if (!nodename) return -ENOMEM; - + DPRINTK("%s", nodename); err = xenbus_probe_node(&xenbus_frontend, type, nodename); @@ -959,9 +957,8 @@ if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) return -EINVAL; - vma->vm_pgoff = mfn_to_pfn(xen_start_info->store_mfn); - - if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + if (remap_pfn_range(vma, vma->vm_start, + mfn_to_pfn(xen_start_info->store_mfn), size, vma->vm_page_prot)) return -EAGAIN; @@ -972,6 +969,7 @@ int count, int *eof, void *data) { int len; + len = sprintf(page, "0x%p", mfn_to_virt(xen_start_info->store_mfn)); *eof = 1; return len; @@ -1006,8 +1004,8 @@ device_register(&xenbus_backend.dev); /* - ** Domain0 doesn't have a store_evtchn or store_mfn yet. - */ + * Domain0 doesn't have a store_evtchn or store_mfn yet. + */ dom0 = (xen_start_info->store_evtchn == 0); if (dom0) { @@ -1029,7 +1027,7 @@ xen_start_info->store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >> PAGE_SHIFT); - + /* Next allocate a local port which xenstored can bind to */ op.cmd = EVTCHNOP_alloc_unbound; op.u.alloc_unbound.dom = DOMID_SELF; @@ -1040,14 +1038,16 @@ xen_start_info->store_evtchn = op.u.alloc_unbound.port; /* And finally publish the above info in /proc/xen */ - if ((xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0400))) { + xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600); + if (xsd_kva_intf) { memcpy(&xsd_kva_fops, xsd_kva_intf->proc_fops, sizeof(xsd_kva_fops)); xsd_kva_fops.mmap = xsd_kva_mmap; xsd_kva_intf->proc_fops = &xsd_kva_fops; xsd_kva_intf->read_proc = xsd_kva_read; } - if ((xsd_port_intf = create_xen_proc_entry("xsd_port", 0400))) + xsd_port_intf = create_xen_proc_entry("xsd_port", 0400); + if (xsd_port_intf) xsd_port_intf->read_proc = xsd_port_read; } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Mar 14 20:50:35 2006 @@ -176,9 +176,8 @@ if (err) { msg->type = XS_ERROR; ret = ERR_PTR(err); - } else { + } else ret = read_reply(&msg->type, &msg->len); - } up(&xs_state.request_mutex); @@ -275,7 +274,7 @@ return num; } -/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ static char *join(const char *dir, const char *name) { char *buffer; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h Tue Mar 14 20:50:35 2006 @@ -84,8 +84,6 @@ FIX_PCIE_MCFG, #endif FIX_SHARED_INFO, - FIX_GNTTAB_BEGIN, - FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1, #define NR_FIX_ISAMAPS 256 FIX_ISAMAP_END, FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h Tue Mar 14 20:50:35 2006 @@ -33,6 +33,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> #include <xen/interface/nmi.h> +#include <linux/errno.h> #define __STR(x) #x #define STR(x) __STR(x) @@ -167,6 +168,31 @@ int cmd, unsigned long arg) { return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int +HYPERVISOR_sched_op_new( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op_new, cmd, arg); +} + +static inline int +HYPERVISOR_poll( + evtchn_port_t *ports, unsigned int nr_ports, u64 timeout) +{ + struct sched_poll sched_poll = { + .ports = ports, + .nr_ports = nr_ports, + .timeout = jiffies_to_st(timeout) + }; + + int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0); + + return rc; } static inline long @@ -304,8 +330,7 @@ static inline int HYPERVISOR_nmi_op( - unsigned long op, - unsigned long arg) + unsigned long op, void *arg) { return _hypercall2(int, nmi_op, op, arg); } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h Tue Mar 14 20:50:35 2006 @@ -79,12 +79,6 @@ void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */ void xen_pgd_pin(unsigned long ptr); void xen_pgd_unpin(unsigned long ptr); -void xen_pud_pin(unsigned long ptr); /* x86_64 only */ -void xen_pud_unpin(unsigned long ptr); /* x86_64 only */ -void xen_pmd_pin(unsigned long ptr); /* x86_64 only */ -void xen_pmd_unpin(unsigned long ptr); /* x86_64 only */ -void xen_pte_pin(unsigned long ptr); -void xen_pte_unpin(unsigned long ptr); void xen_set_ldt(unsigned long ptr, unsigned long bytes); void xen_machphys_update(unsigned long mfn, unsigned long pfn); @@ -102,6 +96,9 @@ unsigned long vstart, unsigned int order, unsigned int address_bits); void xen_destroy_contiguous_region( unsigned long vstart, unsigned int order); + +/* Turn jiffies into Xen system time. */ +u64 jiffies_to_st(unsigned long jiffies); #include <asm/hypercall.h> diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Tue Mar 14 20:50:35 2006 @@ -71,7 +71,8 @@ { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & ~FOREIGN_FRAME_BIT; + return phys_to_machine_mapping[(unsigned int)(pfn)] & + ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Tue Mar 14 20:50:35 2006 @@ -131,8 +131,8 @@ return !pte.pte_low && !pte.pte_high; } -#define pte_mfn(_pte) ( ((_pte).pte_low >> PAGE_SHIFT) |\ - (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)) ) +#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) |\ + (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT))) #define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte)) extern unsigned long long __supported_pte_mask; diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Tue Mar 14 20:50:35 2006 @@ -272,7 +272,16 @@ pte_t pte; if (full) { pte = *ptep; +#ifdef CONFIG_X86_PAE + /* Cannot do this in a single step, as the compiler may + issue the two stores in either order, but the hypervisor + must not see the high part before the low one. */ + ptep->pte_low = 0; + barrier(); + ptep->pte_high = 0; +#else *ptep = __pte(0); +#endif } else { pte = ptep_get_and_clear(mm, addr, ptep); } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Tue Mar 14 20:50:35 2006 @@ -596,7 +596,7 @@ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \ _vcpu->evtchn_upcall_mask = 0; \ barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + if (unlikely(_vcpu->evtchn_upcall_pending)) \ force_evtchn_callback(); \ preempt_enable(); \ } while (0) @@ -618,7 +618,7 @@ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \ if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ barrier(); /* unmask then check (avoid races) */ \ - if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + if (unlikely(_vcpu->evtchn_upcall_pending)) \ force_evtchn_callback(); \ preempt_enable(); \ } else \ diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue Mar 14 20:50:35 2006 @@ -35,12 +35,21 @@ static void __init machine_specific_arch_setup(void) { struct xen_platform_parameters pp; + struct xennmi_callback cb; + + if (xen_feature(XENFEAT_auto_translated_physmap) && + xen_start_info->shared_info < xen_start_info->nr_pages) { + HYPERVISOR_shared_info = + (shared_info_t *)__va(xen_start_info->shared_info); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); + } HYPERVISOR_set_callbacks( __KERNEL_CS, (unsigned long)hypervisor_callback, __KERNEL_CS, (unsigned long)failsafe_callback); - HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi); + cb.handler_address = (unsigned long)&nmi; + HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); machine_specific_modify_cpu_capabilities(&boot_cpu_data); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue Mar 14 20:50:35 2006 @@ -33,6 +33,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> #include <xen/interface/dom0_ops.h> +#include <linux/errno.h> /* FIXME: temp place to hold these page related macros */ #include <asm/page.h> @@ -166,6 +167,31 @@ return _hypercall2(int, sched_op, cmd, arg); } +static inline int +HYPERVISOR_sched_op_new( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op_new, cmd, arg); +} + +static inline int +HYPERVISOR_poll( + evtchn_port_t *ports, unsigned int nr_ports, unsigned long timeout) +{ + struct sched_poll sched_poll = { + .ports = ports, + .nr_ports = nr_ports, + .timeout = jiffies_to_st(timeout) + }; + + int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0); + + return rc; +} + static inline long HYPERVISOR_set_timer_op( u64 timeout) diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h Tue Mar 14 20:50:35 2006 @@ -44,6 +44,9 @@ void force_evtchn_callback(void); +/* Turn jiffies into Xen system time. XXX Implement me. */ +#define jiffies_to_st(j) 0 + #include <asm/hypercall.h> // for drivers/xen/privcmd/privcmd.c diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h Tue Mar 14 20:50:35 2006 @@ -52,8 +52,6 @@ FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, #endif FIX_SHARED_INFO, - FIX_GNTTAB_BEGIN, - FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1, #define NR_FIX_ISAMAPS 256 FIX_ISAMAP_END, FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h Tue Mar 14 20:50:35 2006 @@ -36,6 +36,8 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> +#include <xen/interface/nmi.h> +#include <linux/errno.h> #define __STR(x) #x #define STR(x) __STR(x) @@ -173,6 +175,31 @@ return _hypercall2(int, sched_op, cmd, arg); } +static inline int +HYPERVISOR_sched_op_new( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op_new, cmd, arg); +} + +static inline int +HYPERVISOR_poll( + evtchn_port_t *ports, unsigned int nr_ports, u64 timeout) +{ + struct sched_poll sched_poll = { + .ports = ports, + .nr_ports = nr_ports, + .timeout = jiffies_to_st(timeout) + }; + + int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0); + + return rc; +} + static inline long HYPERVISOR_set_timer_op( u64 timeout) @@ -304,8 +331,7 @@ static inline int HYPERVISOR_nmi_op( - unsigned long op, - unsigned long arg) + unsigned long op, void *arg) { return _hypercall2(int, nmi_op, op, arg); } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Tue Mar 14 20:50:35 2006 @@ -89,7 +89,8 @@ { if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; - return phys_to_machine_mapping[(unsigned int)(pfn)] & ~FOREIGN_FRAME_BIT; + return phys_to_machine_mapping[(unsigned int)(pfn)] & + ~FOREIGN_FRAME_BIT; } static inline int phys_to_machine_mapping_valid(unsigned long pfn) diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgalloc.h Tue Mar 14 20:50:35 2006 @@ -181,7 +181,6 @@ static inline void pte_free_kernel(pte_t *pte) { BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); - xen_pte_unpin(__pa(pte)); make_page_writable(pte, XENFEAT_writable_page_tables); free_page((unsigned long)pte); } diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h Tue Mar 14 20:50:35 2006 @@ -23,13 +23,18 @@ static void __init machine_specific_arch_setup(void) { +#ifdef CONFIG_X86_LOCAL_APIC + struct xennmi_callback cb; +#endif + HYPERVISOR_set_callbacks( (unsigned long) hypervisor_callback, (unsigned long) failsafe_callback, (unsigned long) system_call); #ifdef CONFIG_X86_LOCAL_APIC - HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi); + cb.handler_address = (unsigned long)&nmi; + HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); #endif machine_specific_modify_cpu_capabilities(&boot_cpu_data); diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/linux/mm.h --- a/linux-2.6-xen-sparse/include/linux/mm.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/linux/mm.h Tue Mar 14 20:50:35 2006 @@ -1020,9 +1020,9 @@ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ #ifdef CONFIG_XEN -typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr, +typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr, void *data); -extern int generic_page_range(struct mm_struct *mm, unsigned long address, +extern int generic_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); #endif diff -r dc50cdd66c5c -r 0ed4a312765b linux-2.6-xen-sparse/include/xen/xenbus.h --- a/linux-2.6-xen-sparse/include/xen/xenbus.h Tue Mar 14 20:10:21 2006 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h Tue Mar 14 20:50:35 2006 @@ -28,8 +28,8 @@ * IN THE SOFTWARE. */ -#ifndef _ASM_XEN_XENBUS_H -#define _ASM_XEN_XENBUS_H +#ifndef _XEN_XENBUS_H +#define _XEN_XENBUS_H #include <linux/device.h> #include <linux/notifier.h> @@ -170,7 +170,7 @@ * be saved in the store. */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, - struct xenbus_watch *watch, + struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)); @@ -185,7 +185,7 @@ * saved in the store. */ int xenbus_watch_path2(struct xenbus_device *dev, const char *path, - const char *path2, struct xenbus_watch *watch, + const char *path2, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)); @@ -216,8 +216,8 @@ * page to that address, and sets *vaddr to that address. * xenbus_map_ring does not allocate the virtual address space (you must do * this yourself!). It only maps in the page to the specified address. - * Returns 0 on success, and GNTST_* (see xen/include/public/grant_table.h) or - * -ENOMEM on error. If an error is returned, device will switch to + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM on error. If an error is returned, device will switch to * XenbusStateClosing and the error message will be saved in XenStore. */ int xenbus_map_ring_valloc(struct xenbus_device *dev, @@ -231,7 +231,7 @@ * Use xenbus_unmap_ring_vfree if you mapped in your memory with * xenbus_map_ring_valloc (it will free the virtual address space). * Returns 0 on success and returns GNTST_* on error - * (see xen/include/public/grant_table.h). + * (see xen/include/interface/grant_table.h). */ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); int xenbus_unmap_ring(struct xenbus_device *dev, @@ -285,7 +285,7 @@ ...); -#endif /* _ASM_XEN_XENBUS_H */ +#endif /* _XEN_XENBUS_H */ /* * Local variables: diff -r dc50cdd66c5c -r 0ed4a312765b tools/Rules.mk --- a/tools/Rules.mk Tue Mar 14 20:10:21 2006 +++ b/tools/Rules.mk Tue Mar 14 20:50:35 2006 @@ -9,16 +9,6 @@ XEN_LIBXC = $(XEN_ROOT)/tools/libxc XEN_XENSTORE = $(XEN_ROOT)/tools/xenstore XEN_LIBXENSTAT = $(XEN_ROOT)/tools/xenstat/libxenstat/src - -ifeq ($(XEN_TARGET_ARCH),x86_32) -CFLAGS += -m32 -march=i686 -LDFLAGS += -m32 -endif - -ifeq ($(XEN_TARGET_ARCH),x86_64) -CFLAGS += -m64 -LDFLAGS += -m64 -endif X11_LDPATH = -L/usr/X11R6/$(LIBDIR) diff -r dc50cdd66c5c -r 0ed4a312765b tools/blktap/Makefile --- a/tools/blktap/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/blktap/Makefile Tue Mar 14 20:50:35 2006 @@ -22,11 +22,8 @@ SRCS := SRCS += blktaplib.c xenbus.c blkif.c -CFLAGS += -Wall CFLAGS += -Werror CFLAGS += -Wno-unused -#CFLAGS += -O3 -CFLAGS += -g3 CFLAGS += -fno-strict-aliasing CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE # get asprintf(): diff -r dc50cdd66c5c -r 0ed4a312765b tools/blktap/parallax/Makefile --- a/tools/blktap/parallax/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/blktap/parallax/Makefile Tue Mar 14 20:50:35 2006 @@ -31,11 +31,8 @@ VDI_TOOLS += vdi_tree VDI_TOOLS += vdi_validate -CFLAGS += -Wall CFLAGS += -Werror CFLAGS += -Wno-unused -#CFLAGS += -O3 -CFLAGS += -g3 CFLAGS += -fno-strict-aliasing CFLAGS += $(INCLUDES) CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE @@ -58,7 +55,7 @@ $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS) ${VDI_TOOLS}: %: %.c $(VDI_SRCS) - $(CC) $(CFLAGS) -g3 -o $@ $@.c $(LDFLAGS) $(VDI_SRCS) + $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS) .PHONY: TAGS clean install rpm --include $(DEPS) \ No newline at end of file +-include $(DEPS) diff -r dc50cdd66c5c -r 0ed4a312765b tools/blktap/ublkback/Makefile --- a/tools/blktap/ublkback/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/blktap/ublkback/Makefile Tue Mar 14 20:50:35 2006 @@ -9,11 +9,8 @@ IBIN = ublkback INSTALL_DIR = /usr/sbin -CFLAGS += -Wall CFLAGS += -Werror CFLAGS += -Wno-unused -#CFLAGS += -O3 -CFLAGS += -g3 CFLAGS += -fno-strict-aliasing CFLAGS += -I $(XEN_LIBXC) CFLAGS += $(INCLUDES) -I. diff -r dc50cdd66c5c -r 0ed4a312765b tools/console/Makefile --- a/tools/console/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/console/Makefile Tue Mar 14 20:50:35 2006 @@ -9,7 +9,7 @@ INSTALL_PROG = $(INSTALL) -m0755 INSTALL_DIR = $(INSTALL) -d -m0755 -CFLAGS += -Wall -Werror -g3 +CFLAGS += -Werror -g CFLAGS += -I $(XEN_LIBXC) CFLAGS += -I $(XEN_XENSTORE) diff -r dc50cdd66c5c -r 0ed4a312765b tools/console/testsuite/Makefile --- a/tools/console/testsuite/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/console/testsuite/Makefile Tue Mar 14 20:50:35 2006 @@ -1,5 +1,6 @@ -CFLAGS=-g -Wall -CC=gcc +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk + LDFLAGS=-static all: console-dom0 console-domU procpipe diff -r dc50cdd66c5c -r 0ed4a312765b tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c --- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c Tue Mar 14 20:10:21 2006 +++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c Tue Mar 14 20:50:35 2006 @@ -229,8 +229,6 @@ if (xc_waitdomain(xc_handle, current_domid, &w, 0)) return -1; - linux_set_inferior(); - *status = 'T'; if (expect_signal) return expect_signal; diff -r dc50cdd66c5c -r 0ed4a312765b tools/debugger/libxendebug/Makefile --- a/tools/debugger/libxendebug/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/debugger/libxendebug/Makefile Tue Mar 14 20:50:35 2006 @@ -7,14 +7,12 @@ MAJOR = 3.0 MINOR = 0 -CC = gcc - XEN_ROOT = ../../.. include $(XEN_ROOT)/tools/Rules.mk SRCS := xendebug.c -CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing +CFLAGS += -Werror -fno-strict-aliasing CFLAGS += $(INCLUDES) -I. -I$(XEN_ROOT)/tools/libxc # Get gcc to generate the dependencies for us. CFLAGS += -Wp,-MD,.$(@F).d diff -r dc50cdd66c5c -r 0ed4a312765b tools/debugger/pdb/Makefile --- a/tools/debugger/pdb/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/debugger/pdb/Makefile Tue Mar 14 20:50:35 2006 @@ -20,7 +20,6 @@ INCLUDES += -I $(OCAML_ROOT)/lib/ocaml CFLAGS += $(INCLUDES) -CFLAGS += -Wall CFLAGS += -Werror CFLAGS += -g diff -r dc50cdd66c5c -r 0ed4a312765b tools/firmware/acpi/Makefile --- a/tools/firmware/acpi/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/firmware/acpi/Makefile Tue Mar 14 20:50:35 2006 @@ -16,11 +16,12 @@ # * # */ # + +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Rules.mk + # Compiler flag -CFLAG=-I. -I../../libxc - -# Compiler tool -CC=gcc +HOSTCFLAGS += -I. -I../../libxc # TARGET C_SRC=$(shell ls *.c) @@ -33,7 +34,7 @@ vpath iasl $(PATH) all:$(ACPI_BIN) - + acpi_dsdt.c:acpi_dsdt.asl $(MAKE) iasl iasl -oa -tc acpi_dsdt.asl @@ -54,15 +55,13 @@ install $(IASL_VER)/compiler/iasl /usr/bin/iasl $(ACPI_GEN):$(C_SRC) $(H_SRC) acpi_dsdt.c - $(CC) -o $(ACPI_GEN) $(CFLAG) $(shell ls *.c) + $(HOSTCC) -o $(ACPI_GEN) $(HOSTCFLAGS) $(shell ls *.c) $(ACPI_BIN):$(ACPI_GEN) ./$(ACPI_GEN) $(ACPI_BIN) - + clean: rm -rf *.o $(ACPI_GEN) $(ACPI_BIN) $(IASL_VER) -# rm -f acpi_dsdt.c rm -rf $(IASL_VER).tar.gz -install:all - - + +install: all diff -r dc50cdd66c5c -r 0ed4a312765b tools/ioemu/Makefile --- a/tools/ioemu/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/ioemu/Makefile Tue Mar 14 20:50:35 2006 @@ -3,7 +3,7 @@ -include config-host.mak -CFLAGS+=-Wall -O2 -g -fno-strict-aliasing +CFLAGS+=-g -fno-strict-aliasing ifdef CONFIG_DARWIN CFLAGS+= -mdynamic-no-pic endif diff -r dc50cdd66c5c -r 0ed4a312765b tools/ioemu/configure --- a/tools/ioemu/configure Tue Mar 14 20:10:21 2006 +++ b/tools/ioemu/configure Tue Mar 14 20:50:35 2006 @@ -392,17 +392,17 @@ echo "configdir=$configdir" >> $config_mak echo "LIBDIR=$libdir" >> $config_mak echo "#define CONFIG_QEMU_SHAREDIR \"$datadir\"" >> $config_h -echo "MAKE=$make" >> $config_mak -echo "CC=$cc" >> $config_mak -if test "$have_gcc3_options" = "yes" ; then - echo "HAVE_GCC3_OPTIONS=yes" >> $config_mak -fi -echo "HOST_CC=$host_cc" >> $config_mak -echo "AR=$ar" >> $config_mak -echo "STRIP=$strip -s -R .comment -R .note" >> $config_mak -echo "CFLAGS=$CFLAGS" >> $config_mak -echo "LDFLAGS=$LDFLAGS" >> $config_mak -echo "EXESUF=$EXESUF" >> $config_mak +#echo "MAKE=$make" >> $config_mak +#echo "CC=$cc" >> $config_mak +#if test "$have_gcc3_options" = "yes" ; then +# echo "HAVE_GCC3_OPTIONS=yes" >> $config_mak +#fi +#echo "HOST_CC=$host_cc" >> $config_mak +#echo "AR=$ar" >> $config_mak +#echo "STRIP=$strip -s -R .comment -R .note" >> $config_mak +#echo "CFLAGS=$CFLAGS" >> $config_mak +#echo "LDFLAGS=$LDFLAGS" >> $config_mak +#echo "EXESUF=$EXESUF" >> $config_mak if test "$bigendian" = "yes" ; then echo "WORDS_BIGENDIAN=yes" >> $config_mak diff -r dc50cdd66c5c -r 0ed4a312765b tools/ioemu/monitor.c --- a/tools/ioemu/monitor.c Tue Mar 14 20:10:21 2006 +++ b/tools/ioemu/monitor.c Tue Mar 14 20:50:35 2006 @@ -407,6 +407,7 @@ static void do_eject(int force, const char *filename) { + char cmd[1024]; BlockDriverState *bs; bs = bdrv_find(filename); @@ -415,6 +416,9 @@ return; } eject_device(bs, force); + sprintf(cmd, "eject %s", filename); + system(cmd); + } static void do_change(const char *device, const char *filename) diff -r dc50cdd66c5c -r 0ed4a312765b tools/ioemu/target-i386-dm/Makefile --- a/tools/ioemu/target-i386-dm/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/ioemu/target-i386-dm/Makefile Tue Mar 14 20:50:35 2006 @@ -13,7 +13,7 @@ VPATH+=:$(SRC_PATH)/linux-user DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH) endif -CFLAGS+=-Wall -O2 -g -fno-strict-aliasing +CFLAGS+=-g -fno-strict-aliasing LDFLAGS=-g LIBS= HELPER_CFLAGS=$(CFLAGS) @@ -99,7 +99,6 @@ endif ifeq ($(ARCH),i386) -CFLAGS+=-fomit-frame-pointer OP_CFLAGS=$(CFLAGS) -mpreferred-stack-boundary=2 ifeq ($(HAVE_GCC3_OPTIONS),yes) OP_CFLAGS+= -falign-functions=0 -fno-gcse @@ -236,7 +235,7 @@ all: $(PROGS) $(QEMU_USER): $(OBJS) - $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS) ifeq ($(ARCH),alpha) # Mark as 32 bit binary, i. e. it will be mapped into the low 31 bit of # the address space (31 bit so sign extending doesn't matter) @@ -312,7 +311,7 @@ endif $(QEMU_SYSTEM): $(VL_OBJS) libqemu.a - $(CC) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(VNC_LIBS) $(VL_LIBS) -lpthread + $(CC) $(CFLAGS) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(VNC_LIBS) $(VL_LIBS) -lpthread vnc.o: vnc.c keyboard_rdesktop.c $(CC) $(CFLAGS) $(DEFINES) $(VNC_CFLAGS) -c -o $@ $< diff -r dc50cdd66c5c -r 0ed4a312765b tools/ioemu/vl.h --- a/tools/ioemu/vl.h Tue Mar 14 20:10:21 2006 +++ b/tools/ioemu/vl.h Tue Mar 14 20:50:35 2006 @@ -697,7 +697,7 @@ void tcx_init(DisplayState *ds, uint32_t addr); /* sched.c */ -void sched_init(); +void sched_init(uint32_t, uint32_t); /* magic-load.c */ void magic_init(const char *kfn, int kloadaddr, uint32_t addr); @@ -799,7 +799,7 @@ int gdbserver_start(int port); void update_select_wakeup_events(void); -void tun_receive_handler(); +void tun_receive_handler(fd_set *); extern char domain_name[]; #endif /* VL_H */ diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/Makefile --- a/tools/libxc/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/Makefile Tue Mar 14 20:50:35 2006 @@ -6,8 +6,6 @@ MAJOR = 3.0 MINOR = 0 - -CC = gcc XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk @@ -48,9 +46,7 @@ BUILD_SRCS += xc_hvm_build.c endif -CFLAGS += -Wall CFLAGS += -Werror -CFLAGS += -O3 CFLAGS += -fno-strict-aliasing CFLAGS += $(INCLUDES) -I. diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_core.c Tue Mar 14 20:50:35 2006 @@ -6,18 +6,17 @@ #include <zlib.h> /* number of pages to write at a time */ -#define DUMP_INCREMENT 4 * 1024 +#define DUMP_INCREMENT (4 * 1024) #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) static int copy_from_domain_page(int xc_handle, uint32_t domid, - unsigned long *page_array, - unsigned long src_pfn, + unsigned long mfn, void *dst_page) { void *vaddr = xc_map_foreign_range( - xc_handle, domid, PAGE_SIZE, PROT_READ, page_array[src_pfn]); + xc_handle, domid, PAGE_SIZE, PROT_READ, mfn); if ( vaddr == NULL ) return -1; memcpy(dst_page, vaddr, PAGE_SIZE); @@ -26,93 +25,152 @@ } int -xc_domain_dumpcore(int xc_handle, - uint32_t domid, - const char *corename) +xc_domain_dumpcore_via_callback(int xc_handle, + uint32_t domid, + void *args, + dumpcore_rtn_t dump_rtn) { unsigned long nr_pages; - unsigned long *page_array; + unsigned long *page_array = NULL; xc_dominfo_t info; - int i, nr_vcpus = 0, dump_fd; + int i, nr_vcpus = 0; char *dump_mem, *dump_mem_start = NULL; struct xc_core_header header; vcpu_guest_context_t ctxt[MAX_VIRT_CPUS]; + char dummy[PAGE_SIZE]; + int dummy_len; + int sts; - - if ((dump_fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0) { - PERROR("Could not open corefile %s: %s", corename, strerror(errno)); - goto error_out; - } - - if ((dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL) { + if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL ) + { PERROR("Could not allocate dump_mem"); goto error_out; } - if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1) { + if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 ) + { PERROR("Could not get info for domain"); goto error_out; } - for (i = 0; i < info.max_vcpu_id; i++) - if (xc_vcpu_getcontext(xc_handle, domid, - i, &ctxt[nr_vcpus]) == 0) + if ( domid != info.domid ) + { + PERROR("Domain %d does not exist", domid); + goto error_out; + } + + for ( i = 0; i <= info.max_vcpu_id; i++ ) + if ( xc_vcpu_getcontext(xc_handle, domid, i, &ctxt[nr_vcpus]) == 0) nr_vcpus++; nr_pages = info.nr_pages; - header.xch_magic = XC_CORE_MAGIC; + header.xch_magic = XC_CORE_MAGIC; header.xch_nr_vcpus = nr_vcpus; header.xch_nr_pages = nr_pages; header.xch_ctxt_offset = sizeof(struct xc_core_header); header.xch_index_offset = sizeof(struct xc_core_header) + sizeof(vcpu_guest_context_t)*nr_vcpus; - header.xch_pages_offset = round_pgup(sizeof(struct xc_core_header) + - (sizeof(vcpu_guest_context_t) * nr_vcpus) + - (nr_pages * sizeof(unsigned long))); + dummy_len = (sizeof(struct xc_core_header) + + (sizeof(vcpu_guest_context_t) * nr_vcpus) + + (nr_pages * sizeof(unsigned long))); + header.xch_pages_offset = round_pgup(dummy_len); + + sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header)); + if ( sts != 0 ) + goto error_out; - if (write(dump_fd, &header, sizeof(struct xc_core_header)) < 0 || - write(dump_fd, &ctxt, sizeof(ctxt[0]) * nr_vcpus) < 0) + sts = dump_rtn(args, (char *)&ctxt, sizeof(ctxt[0]) * nr_vcpus); + if ( sts != 0 ) + goto error_out; + + if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) { - PERROR("write failed"); - goto error_out; - } - - if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) { printf("Could not allocate memory\n"); goto error_out; } - if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) { + if ( xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages ) + { printf("Could not get the page frame list\n"); goto error_out; } - if (write(dump_fd, page_array, nr_pages * sizeof(unsigned long)) < 0) + sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(unsigned long)); + if ( sts != 0 ) + goto error_out; + + /* Pad the output data to page alignment. */ + memset(dummy, 0, PAGE_SIZE); + sts = dump_rtn(args, dummy, header.xch_pages_offset - dummy_len); + if ( sts != 0 ) + goto error_out; + + for ( dump_mem = dump_mem_start, i = 0; i < nr_pages; i++ ) { - PERROR("write failed"); - goto error_out; - } - lseek(dump_fd, header.xch_pages_offset, SEEK_SET); - for (dump_mem = dump_mem_start, i = 0; i < nr_pages; i++) { - copy_from_domain_page(xc_handle, domid, page_array, i, dump_mem); + copy_from_domain_page(xc_handle, domid, page_array[i], dump_mem); dump_mem += PAGE_SIZE; - if (((i + 1) % DUMP_INCREMENT == 0) || (i + 1) == nr_pages) { - if (write(dump_fd, dump_mem_start, dump_mem - dump_mem_start) < - dump_mem - dump_mem_start) { - PERROR("Partial write, file system full?"); + if ( ((i + 1) % DUMP_INCREMENT == 0) || ((i + 1) == nr_pages) ) + { + sts = dump_rtn(args, dump_mem_start, dump_mem - dump_mem_start); + if ( sts != 0 ) goto error_out; - } dump_mem = dump_mem_start; } } - close(dump_fd); free(dump_mem_start); + free(page_array); return 0; + error_out: - if (dump_fd != -1) - close(dump_fd); free(dump_mem_start); + free(page_array); return -1; +} + +/* Callback args for writing to a local dump file. */ +struct dump_args { + int fd; +}; + +/* Callback routine for writing to a local dump file. */ +static int local_file_dump(void *args, char *buffer, unsigned int length) +{ + struct dump_args *da = args; + int bytes, offset; + + for ( offset = 0; offset < length; offset += bytes ) + { + bytes = write(da->fd, &buffer[offset], length-offset); + if ( bytes <= 0 ) + { + PERROR("Failed to write buffer: %s", strerror(errno)); + return -errno; + } + } + + return 0; +} + +int +xc_domain_dumpcore(int xc_handle, + uint32_t domid, + const char *corename) +{ + struct dump_args da; + int sts; + + if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 ) + { + PERROR("Could not open corefile %s: %s", corename, strerror(errno)); + return -errno; + } + + sts = xc_domain_dumpcore_via_callback( + xc_handle, domid, &da, &local_file_dump); + + close(da.fd); + + return sts; } /* diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_hvm_build.c Tue Mar 14 20:50:35 2006 @@ -132,7 +132,7 @@ } /* - * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader + * Use E820 reserved memory 0x9F800 to pass HVM info to hvmloader * hvmloader will use this info to set BIOS accordingly */ static int set_hvm_info(int xc_handle, uint32_t dom, @@ -338,24 +338,29 @@ return -1; } -int xc_hvm_build(int xc_handle, - uint32_t domid, - int memsize, - const char *image_name, - unsigned int vcpus, - unsigned int pae, - unsigned int acpi, - unsigned int apic, - unsigned int store_evtchn, - unsigned long *store_mfn) +static int xc_hvm_build_internal(int xc_handle, + uint32_t domid, + int memsize, + char *image, + unsigned long image_size, + unsigned int vcpus, + unsigned int pae, + unsigned int acpi, + unsigned int apic, + unsigned int store_evtchn, + unsigned long *store_mfn) { dom0_op_t launch_op, op; int rc, i; vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt; unsigned long nr_pages; - char *image = NULL; - unsigned long image_size; xen_capabilities_info_t xen_caps; + + if ( (image == NULL) || (image_size == 0) ) + { + ERROR("Image required"); + goto error_out; + } if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 ) { @@ -375,9 +380,6 @@ PERROR("Could not find total pages for domain"); goto error_out; } - - if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL ) - goto error_out; if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ) { @@ -404,8 +406,6 @@ ERROR("Error constructing guest OS"); goto error_out; } - - free(image); /* FPU is set up to default initial state. */ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); @@ -450,7 +450,6 @@ return rc; error_out: - free(image); return -1; } @@ -580,6 +579,92 @@ return 0; } +/* xc_hvm_build + * + * Create a domain for a virtualized Linux, using files/filenames + * + */ + +int xc_hvm_build(int xc_handle, + uint32_t domid, + int memsize, + const char *image_name, + unsigned int vcpus, + unsigned int pae, + unsigned int acpi, + unsigned int apic, + unsigned int store_evtchn, + unsigned long *store_mfn) +{ + char *image; + int sts; + unsigned long image_size; + + if ( (image_name == NULL) || + ((image = xc_read_image(image_name, &image_size)) == NULL) ) + return -1; + + sts = xc_hvm_build_internal(xc_handle, domid, memsize, + image, image_size, + vcpus, pae, acpi, apic, + store_evtchn, store_mfn); + + free(image); + + return sts; +} + +/* xc_hvm_build_mem + * + * Create a domain for a virtualized Linux, using buffers + * + */ + +int xc_hvm_build_mem(int xc_handle, + uint32_t domid, + int memsize, + const char *image_buffer, + unsigned long image_size, + unsigned int vcpus, + unsigned int pae, + unsigned int acpi, + unsigned int apic, + unsigned int store_evtchn, + unsigned long *store_mfn) +{ + int sts; + unsigned long img_len; + char *img; + + /* Validate that there is a kernel buffer */ + + if ( (image_buffer == NULL) || (image_size == 0) ) + { + ERROR("kernel image buffer not present"); + return -1; + } + + img = xc_inflate_buffer(image_buffer, image_size, &img_len); + if (img == NULL) + { + ERROR("unable to inflate ram disk buffer"); + return -1; + } + + sts = xc_hvm_build_internal(xc_handle, domid, memsize, + img, img_len, + vcpus, pae, acpi, apic, + store_evtchn, store_mfn); + + /* xc_inflate_buffer may return the original buffer pointer (for + for already inflated buffers), so exercise some care in freeing */ + + if ( (img != NULL) && (img != image_buffer) ) + free(img); + + return sts; +} + /* * Local variables: * mode: C diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_ia64_stubs.c Tue Mar 14 20:50:35 2006 @@ -658,7 +658,7 @@ goto error_out; } - if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL ){ + if ( (image = xc_read_image(image_name, &image_size)) == NULL ){ PERROR("Could not read guest firmware image %s",image_name); goto error_out; } diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_linux_build.c Tue Mar 14 20:50:35 2006 @@ -45,6 +45,15 @@ #ifdef __ia64__ #define probe_aout9(image,image_size,load_funcs) 1 #endif + +struct initrd_info { + enum { INITRD_none, INITRD_file, INITRD_mem } type; + unsigned long len; + union { + gzFile file_handle; + char *mem_addr; + } u; +}; static const char *feature_names[XENFEAT_NR_SUBMAPS*32] = { [XENFEAT_writable_page_tables] = "writable_page_tables", @@ -117,7 +126,7 @@ return -EINVAL; } -static int probeimageformat(char *image, +static int probeimageformat(const char *image, unsigned long image_size, struct load_funcs *load_funcs) { @@ -127,6 +136,42 @@ { ERROR( "Unrecognized image format" ); return -EINVAL; + } + + return 0; +} + +int load_initrd(int xc_handle, domid_t dom, + struct initrd_info *initrd, + unsigned long physbase, + unsigned long *phys_to_mach) +{ + char page[PAGE_SIZE]; + unsigned long pfn_start, pfn, nr_pages; + + if ( initrd->type == INITRD_none ) + return 0; + + pfn_start = physbase >> PAGE_SHIFT; + nr_pages = (initrd->len + PAGE_SIZE - 1) >> PAGE_SHIFT; + + for ( pfn = pfn_start; pfn < (pfn_start + nr_pages); pfn++ ) + { + if ( initrd->type == INITRD_mem ) + { + xc_copy_to_domain_page( + xc_handle, dom, phys_to_mach[pfn], + &initrd->u.mem_addr[(pfn - pfn_start) << PAGE_SHIFT]); + } + else + { + if ( gzread(initrd->u.file_handle, page, PAGE_SIZE) == -1 ) + { + PERROR("Error reading initrd image, could not"); + return -EINVAL; + } + xc_copy_to_domain_page(xc_handle, dom, phys_to_mach[pfn], page); + } } return 0; @@ -406,8 +451,8 @@ static int setup_guest(int xc_handle, uint32_t dom, - char *image, unsigned long image_size, - gzFile initrd_gfd, unsigned long initrd_len, + const char *image, unsigned long image_size, + struct initrd_info *initrd, unsigned long nr_pages, unsigned long *pvsi, unsigned long *pvke, unsigned long *pvss, vcpu_guest_context_t *ctxt, @@ -427,7 +472,6 @@ unsigned long start_page, pgnr; start_info_t *start_info; int rc; - unsigned long i; rc = probeimageformat(image, image_size, &load_funcs); if ( rc != 0 ) @@ -441,7 +485,7 @@ dsi.v_start = round_pgdown(dsi.v_start); vinitrd_start = round_pgup(dsi.v_end); - vinitrd_end = vinitrd_start + initrd_len; + vinitrd_end = vinitrd_start + initrd->len; v_end = round_pgup(vinitrd_end); start_page = dsi.v_start >> PAGE_SHIFT; @@ -452,7 +496,8 @@ goto error_out; } - if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, pgnr) != pgnr ) + if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, + start_page, pgnr) != pgnr ) { PERROR("Could not get the page frame list"); goto error_out; @@ -472,23 +517,9 @@ (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array, &dsi); - /* Load the initial ramdisk image. */ - if ( initrd_len != 0 ) - { - for ( i = (vinitrd_start - dsi.v_start); - i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE ) - { - char page[PAGE_SIZE]; - if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 ) - { - PERROR("Error reading initrd image, could not"); - goto error_out; - } - xc_copy_to_domain_page(xc_handle, dom, - page_array[i>>PAGE_SHIFT], page); - } - } - + if ( load_initrd(xc_handle, dom, initrd, + vinitrd_start - dsi.v_start, page_array) ) + goto error_out; *pvke = dsi.v_kernentry; @@ -522,11 +553,11 @@ start_info->store_evtchn = store_evtchn; start_info->console_mfn = nr_pages - 1; start_info->console_evtchn = console_evtchn; - start_info->nr_pages = nr_pages; // FIXME?: nr_pages - 2 ???? - if ( initrd_len != 0 ) + start_info->nr_pages = nr_pages; // FIXME?: nr_pages - 2 ???? + if ( initrd->len != 0 ) { ctxt->initrd.start = vinitrd_start; - ctxt->initrd.size = initrd_len; + ctxt->initrd.size = initrd->len; } else { @@ -550,8 +581,8 @@ #else /* x86 */ static int setup_guest(int xc_handle, uint32_t dom, - char *image, unsigned long image_size, - gzFile initrd_gfd, unsigned long initrd_len, + const char *image, unsigned long image_size, + struct initrd_info *initrd, unsigned long nr_pages, unsigned long *pvsi, unsigned long *pvke, unsigned long *pvss, vcpu_guest_context_t *ctxt, @@ -578,15 +609,11 @@ struct load_funcs load_funcs; struct domain_setup_info dsi; unsigned long vinitrd_start; - unsigned long vinitrd_end; unsigned long vphysmap_start; - unsigned long vphysmap_end; unsigned long vstartinfo_start; - unsigned long vstartinfo_end; unsigned long vstoreinfo_start; - unsigned long vstoreinfo_end; unsigned long vconsole_start; - unsigned long vconsole_end; + unsigned long vsharedinfo_start = 0; /* XXX gcc */ unsigned long vstack_start; unsigned long vstack_end; unsigned long vpt_start; @@ -612,6 +639,34 @@ goto error_out; } + /* Parse and validate kernel features. */ + p = strstr(dsi.xen_guest_string, "FEATURES="); + if ( p != NULL ) + { + if ( !parse_features(p + strlen("FEATURES="), + supported_features, + required_features) ) + { + ERROR("Failed to parse guest kernel features.\n"); + goto error_out; + } + + printf("Supported features = { %08x }.\n", supported_features[0]); + printf("Required features = { %08x }.\n", required_features[0]); + } + + for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ ) + { + if ( (supported_features[i]&required_features[i]) != required_features[i] ) + { + ERROR("Guest kernel does not support a required feature.\n"); + goto error_out; + } + } + + shadow_mode_enabled = test_feature_bit(XENFEAT_auto_translated_physmap, + required_features); + /* * Why do we need this? The number of page-table frames depends on the * size of the bootstrap address space. But the size of the address space @@ -619,17 +674,22 @@ * read-only). We have a pair of simultaneous equations in two unknowns, * which we solve by exhaustive search. */ - vinitrd_start = round_pgup(dsi.v_end); - vinitrd_end = vinitrd_start + initrd_len; - vphysmap_start = round_pgup(vinitrd_end); - vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long)); - vstartinfo_start = round_pgup(vphysmap_end); - vstartinfo_end = vstartinfo_start + PAGE_SIZE; - vstoreinfo_start = vstartinfo_end; - vstoreinfo_end = vstoreinfo_start + PAGE_SIZE; - vconsole_start = vstoreinfo_end; - vconsole_end = vconsole_start + PAGE_SIZE; - vpt_start = vconsole_end; + v_end = round_pgup(dsi.v_end); + vinitrd_start = v_end; + v_end += round_pgup(initrd->len); + vphysmap_start = v_end; + v_end += round_pgup(nr_pages * sizeof(unsigned long)); + vstartinfo_start = v_end; + v_end += PAGE_SIZE; + vstoreinfo_start = v_end; + v_end += PAGE_SIZE; + vconsole_start = v_end; + v_end += PAGE_SIZE; + if ( shadow_mode_enabled ) { + vsharedinfo_start = v_end; + v_end += PAGE_SIZE; + } + vpt_start = v_end; for ( nr_pt_pages = 2; ; nr_pt_pages++ ) { @@ -669,26 +729,22 @@ #define _p(a) ((void *) (a)) - printf("VIRTUAL MEMORY ARRANGEMENT:\n" - " Loaded kernel: %p->%p\n" - " Init. ramdisk: %p->%p\n" - " Phys-Mach map: %p->%p\n" - " Start info: %p->%p\n" - " Store page: %p->%p\n" - " Console page: %p->%p\n" - " Page tables: %p->%p\n" - " Boot stack: %p->%p\n" - " TOTAL: %p->%p\n", - _p(dsi.v_kernstart), _p(dsi.v_kernend), - _p(vinitrd_start), _p(vinitrd_end), - _p(vphysmap_start), _p(vphysmap_end), - _p(vstartinfo_start), _p(vstartinfo_end), - _p(vstoreinfo_start), _p(vstoreinfo_end), - _p(vconsole_start), _p(vconsole_end), - _p(vpt_start), _p(vpt_end), - _p(vstack_start), _p(vstack_end), - _p(dsi.v_start), _p(v_end)); - printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry)); + printf("VIRTUAL MEMORY ARRANGEMENT:\n"); + printf(" Loaded kernel: %p->%p\n", _p(dsi.v_kernstart), + _p(dsi.v_kernend)); + if ( initrd->len ) + printf(" Initial ramdisk: %p->%p\n", _p(vinitrd_start), + _p(vinitrd_start + initrd->len)); + printf(" Phys-Mach map: %p\n", _p(vphysmap_start)); + printf(" Start info: %p\n", _p(vstartinfo_start)); + printf(" Store page: %p\n", _p(vstoreinfo_start)); + printf(" Console page: %p\n", _p(vconsole_start)); + if ( shadow_mode_enabled ) + printf(" Shared Info page: %p\n", _p(vsharedinfo_start)); + printf(" Page tables: %p\n", _p(vpt_start)); + printf(" Boot stack: %p\n", _p(vstack_start)); + printf(" TOTAL: %p->%p\n", _p(dsi.v_start), _p(v_end)); + printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry)); if ( ((v_end - dsi.v_start)>>PAGE_SHIFT) > nr_pages ) { @@ -710,54 +766,13 @@ goto error_out; } - (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array, + (load_funcs.loadimage)(image, image_size, + xc_handle, dom, page_array, &dsi); - /* Parse and validate kernel features. */ - p = strstr(dsi.xen_guest_string, "FEATURES="); - if ( p != NULL ) - { - if ( !parse_features(p + strlen("FEATURES="), - supported_features, - required_features) ) - { - ERROR("Failed to parse guest kernel features.\n"); - goto error_out; - } - - fprintf(stderr, "Supported features = { %08x }.\n", - supported_features[0]); - fprintf(stderr, "Required features = { %08x }.\n", - required_features[0]); - } - - for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ ) - { - if ( (supported_features[i]&required_features[i]) != required_features[i] ) - { - ERROR("Guest kernel does not support a required feature.\n"); - goto error_out; - } - } - - shadow_mode_enabled = test_feature_bit(XENFEAT_auto_translated_physmap, required_features); - - /* Load the initial ramdisk image. */ - if ( initrd_len != 0 ) - { - for ( i = (vinitrd_start - dsi.v_start); - i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE ) - { - char page[PAGE_SIZE]; - if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 ) - { - PERROR("Error reading initrd image, could not"); - goto error_out; - } - xc_copy_to_domain_page(xc_handle, dom, - page_array[i>>PAGE_SHIFT], page); - } - } + if ( load_initrd(xc_handle, dom, initrd, + vinitrd_start - dsi.v_start, page_array) ) + goto error_out; /* setup page tables */ #if defined(__i386__) @@ -851,7 +866,7 @@ if ( shadow_mode_enabled ) { - struct xen_reserved_phys_area xrpa; + struct xen_add_to_physmap xatp; /* Enable shadow translate mode */ if ( xc_shadow_control(xc_handle, dom, @@ -862,17 +877,36 @@ goto error_out; } - /* Find the shared info frame. It's guaranteed to be at the - start of the PFN hole. */ - xrpa.domid = dom; - xrpa.idx = 0; - rc = xc_memory_op(xc_handle, XENMEM_reserved_phys_area, &xrpa); + guest_shared_info_mfn = (vsharedinfo_start-dsi.v_start) >> PAGE_SHIFT; + + /* Map shared info frame into guest physmap. */ + xatp.domid = dom; + xatp.space = XENMAPSPACE_shared_info; + xatp.idx = 0; + xatp.gpfn = guest_shared_info_mfn; + rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp); if ( rc != 0 ) { - PERROR("Cannot find shared info pfn"); + PERROR("Cannot map shared info pfn"); goto error_out; } - guest_shared_info_mfn = xrpa.first_gpfn; + + /* Map grant table frames into guest physmap. */ + for ( i = 0; ; i++ ) + { + xatp.domid = dom; + xatp.space = XENMAPSPACE_grant_table; + xatp.idx = i; + xatp.gpfn = nr_pages + i; + rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp); + if ( rc != 0 ) + { + if ( errno == EINVAL ) + break; /* done all grant tables */ + PERROR("Cannot map grant table pfn"); + goto error_out; + } + } } else { @@ -914,10 +948,10 @@ start_info->store_evtchn = store_evtchn; start_info->console_mfn = guest_console_mfn; start_info->console_evtchn = console_evtchn; - if ( initrd_len != 0 ) + if ( initrd->len != 0 ) { start_info->mod_start = vinitrd_start; - start_info->mod_len = initrd_len; + start_info->mod_len = initrd->len; } if ( cmdline != NULL ) { @@ -970,27 +1004,24 @@ } #endif -int xc_linux_build(int xc_handle, - uint32_t domid, - const char *image_name, - const char *ramdisk_name, - const char *cmdline, - const char *features, - unsigned long flags, - unsigned int store_evtchn, - unsigned long *store_mfn, - unsigned int console_evtchn, - unsigned long *console_mfn) +static int xc_linux_build_internal(int xc_handle, + uint32_t domid, + char *image, + unsigned long image_size, + struct initrd_info *initrd, + const char *cmdline, + const char *features, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) { dom0_op_t launch_op; DECLARE_DOM0_OP; - int initrd_fd = -1; - gzFile initrd_gfd = NULL; int rc, i; vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt; unsigned long nr_pages; - char *image = NULL; - unsigned long image_size, initrd_size=0; unsigned long vstartinfo_start, vkern_entry, vstack_start; uint32_t features_bitmap[XENFEAT_NR_SUBMAPS] = { 0, }; @@ -1007,26 +1038,6 @@ { PERROR("Could not find total pages for domain"); goto error_out; - } - - if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL ) - goto error_out; - - if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) ) - { - if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 ) - { - PERROR("Could not open the initial ramdisk image"); - goto error_out; - } - - initrd_size = xc_get_filesz(initrd_fd); - - if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL ) - { - PERROR("Could not allocate decompression state for initrd"); - goto error_out; - } } #ifdef VALGRIND @@ -1051,7 +1062,8 @@ memset(ctxt, 0, sizeof(*ctxt)); if ( setup_guest(xc_handle, domid, image, image_size, - initrd_gfd, initrd_size, nr_pages, + initrd, + nr_pages, &vstartinfo_start, &vkern_entry, &vstack_start, ctxt, cmdline, op.u.getdomaininfo.shared_info_frame, @@ -1062,12 +1074,6 @@ ERROR("Error constructing guest OS"); goto error_out; } - - if ( initrd_fd >= 0 ) - close(initrd_fd); - if ( initrd_gfd ) - gzclose(initrd_gfd); - free(image); #ifdef __ia64__ /* based on new_thread in xen/arch/ia64/domain.c */ @@ -1154,12 +1160,129 @@ return rc; error_out: - if ( initrd_gfd != NULL ) - gzclose(initrd_gfd); - else if ( initrd_fd >= 0 ) - close(initrd_fd); + return -1; +} + +int xc_linux_build_mem(int xc_handle, + uint32_t domid, + const char *image_buffer, + unsigned long image_size, + const char *initrd, + unsigned long initrd_len, + const char *cmdline, + const char *features, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + int sts; + char *img_buf; + unsigned long img_len; + struct initrd_info initrd_info = { .type = INITRD_none }; + + /* A kernel buffer is required */ + if ( (image_buffer == NULL) || (image_size == 0) ) + { + ERROR("kernel image buffer not present"); + return -1; + } + + /* If it's gzipped, inflate it; otherwise, use as is */ + /* xc_inflate_buffer may return the same buffer pointer if */ + /* the buffer is already inflated */ + img_buf = xc_inflate_buffer(image_buffer, image_size, &img_len); + if ( img_buf == NULL ) + { + ERROR("unable to inflate kernel image buffer"); + return -1; + } + + /* RAM disks are optional; if we get one, inflate it */ + if ( initrd != NULL ) + { + initrd_info.type = INITRD_mem; + initrd_info.u.mem_addr = xc_inflate_buffer( + initrd, initrd_len, &initrd_info.len); + if ( initrd_info.u.mem_addr == NULL ) + { + ERROR("unable to inflate ram disk buffer"); + sts = -1; + goto out; + } + } + + sts = xc_linux_build_internal(xc_handle, domid, img_buf, img_len, + &initrd_info, cmdline, features, flags, + store_evtchn, store_mfn, + console_evtchn, console_mfn); + + out: + /* The inflation routines may pass back the same buffer so be */ + /* sure that we have a buffer and that it's not the one passed in. */ + /* Don't unnecessarily annoy/surprise/confound the caller */ + if ( (img_buf != NULL) && (img_buf != image_buffer) ) + free(img_buf); + if ( (initrd_info.u.mem_addr != NULL) && + (initrd_info.u.mem_addr != initrd) ) + free(initrd_info.u.mem_addr); + + return sts; +} + +int xc_linux_build(int xc_handle, + uint32_t domid, + const char *image_name, + const char *initrd_name, + const char *cmdline, + const char *features, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + char *image = NULL; + unsigned long image_size; + struct initrd_info initrd_info = { .type = INITRD_none }; + int fd = -1, sts = -1; + + if ( (image_name == NULL) || + ((image = xc_read_image(image_name, &image_size)) == NULL )) + return -1; + + if ( (initrd_name != NULL) && (strlen(initrd_name) != 0) ) + { + initrd_info.type = INITRD_file; + + if ( (fd = open(initrd_name, O_RDONLY)) < 0 ) + { + PERROR("Could not open the initial ramdisk image"); + goto error_out; + } + + initrd_info.len = xc_get_filesz(fd); + if ( (initrd_info.u.file_handle = gzdopen(fd, "rb")) == NULL ) + { + PERROR("Could not allocate decompression state for initrd"); + goto error_out; + } + } + + sts = xc_linux_build_internal(xc_handle, domid, image, image_size, + &initrd_info, cmdline, features, flags, + store_evtchn, store_mfn, + console_evtchn, console_mfn); + + error_out: free(image); - return -1; + if ( fd >= 0 ) + close(fd); + if ( initrd_info.u.file_handle ) + gzclose(initrd_info.u.file_handle); + + return sts; } /* diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_load_aout9.c --- a/tools/libxc/xc_load_aout9.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_load_aout9.c Tue Mar 14 20:50:35 2006 @@ -12,20 +12,19 @@ #error "Unsupported architecture" #endif - #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) #define KZERO 0x80000000 #define KOFFSET(_p) ((_p)&~KZERO) -static int parseaout9image(char *, unsigned long, struct domain_setup_info *); -static int loadaout9image(char *, unsigned long, int, uint32_t, unsigned long *, struct domain_setup_info *); -static void copyout(int, uint32_t, unsigned long *, unsigned long, void *, int); -struct Exec *get_header(char *, unsigned long, struct Exec *); +static int parseaout9image(const char *, unsigned long, struct domain_setup_info *); +static int loadaout9image(const char *, unsigned long, int, uint32_t, unsigned long *, struct domain_setup_info *); +static void copyout(int, uint32_t, unsigned long *, unsigned long, const char *, int); +struct Exec *get_header(const char *, unsigned long, struct Exec *); int probe_aout9( - char *image, + const char *image, unsigned long image_size, struct load_funcs *load_funcs) { @@ -43,7 +42,7 @@ static int parseaout9image( - char *image, + const char *image, unsigned long image_size, struct domain_setup_info *dsi) { @@ -77,7 +76,7 @@ static int loadaout9image( - char *image, + const char *image, unsigned long image_size, int xch, uint32_t dom, unsigned long *parray, @@ -111,7 +110,7 @@ int xch, uint32_t dom, unsigned long *parray, unsigned long addr, - void *buf, + const char *buf, int sz) { unsigned long pgoff, chunksz, off; @@ -143,7 +142,7 @@ */ struct Exec * get_header( - char *image, + const char *image, unsigned long image_size, struct Exec *ehdr) { diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_load_bin.c --- a/tools/libxc/xc_load_bin.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_load_bin.c Tue Mar 14 20:50:35 2006 @@ -99,23 +99,22 @@ #define FLAGS_REQUIRED XEN_REACTOS_FLAG_ADDRSVALID static struct xen_bin_image_table * -findtable(char *image, unsigned long image_size); +findtable(const char *image, unsigned long image_size); static int parsebinimage( - char *image, unsigned long image_size, struct domain_setup_info *dsi); + const char *image, unsigned long image_size, + struct domain_setup_info *dsi); static int loadbinimage( - char *image, unsigned long image_size, int xch, uint32_t dom, + const char *image, unsigned long image_size, int xch, uint32_t dom, unsigned long *parray, struct domain_setup_info *dsi); -int probe_bin(char *image, +int probe_bin(const char *image, unsigned long image_size, struct load_funcs *load_funcs) { - if ( NULL == findtable(image, image_size) ) - { - return -EINVAL; - } + if ( findtable(image, image_size) == NULL ) + return -EINVAL; load_funcs->parseimage = parsebinimage; load_funcs->loadimage = loadbinimage; @@ -124,7 +123,7 @@ } static struct xen_bin_image_table * -findtable(char *image, unsigned long image_size) +findtable(const char *image, unsigned long image_size) { struct xen_bin_image_table *table; unsigned long *probe_ptr; @@ -133,15 +132,12 @@ /* Don't go outside the image */ if ( image_size < sizeof(struct xen_bin_image_table) ) - { return NULL; - } + probe_count = image_size; /* Restrict to first 8k */ - if ( 8192 < probe_count ) - { + if ( probe_count > 8192 ) probe_count = 8192; - } probe_count = (probe_count - sizeof(struct xen_bin_image_table)) / sizeof(unsigned long); @@ -165,7 +161,7 @@ return NULL; } -static int parsebinimage(char *image, +static int parsebinimage(const char *image, unsigned long image_size, struct domain_setup_info *dsi) { @@ -238,7 +234,7 @@ static int loadbinimage( - char *image, unsigned long image_size, int xch, uint32_t dom, + const char *image, unsigned long image_size, int xch, uint32_t dom, unsigned long *parray, struct domain_setup_info *dsi) { unsigned long size; diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_load_elf.c Tue Mar 14 20:50:35 2006 @@ -19,26 +19,25 @@ static int parseelfimage( - char *image, unsigned long image_size, struct domain_setup_info *dsi); + const char *image, unsigned long image_size, + struct domain_setup_info *dsi); static int loadelfimage( - char *image, unsigned long image_size, int xch, uint32_t dom, + const char *image, unsigned long image_size, int xch, uint32_t dom, unsigned long *parray, struct domain_setup_info *dsi); static int loadelfsymtab( - char *image, int xch, uint32_t dom, unsigned long *parray, + const char *image, int xch, uint32_t dom, unsigned long *parray, struct domain_setup_info *dsi); -int probe_elf(char *image, +int probe_elf(const char *image, unsigned long image_size, struct load_funcs *load_funcs) { Elf_Ehdr *ehdr = (Elf_Ehdr *)image; if ( !IS_ELF(*ehdr) ) - { - return -EINVAL; - } + return -EINVAL; load_funcs->parseimage = parseelfimage; load_funcs->loadimage = loadelfimage; @@ -52,7 +51,7 @@ ((phdr->p_flags & (PF_W|PF_X)) != 0)); } -static int parseelfimage(char *image, +static int parseelfimage(const char *image, unsigned long elfsize, struct domain_setup_info *dsi) { @@ -60,7 +59,8 @@ Elf_Phdr *phdr; Elf_Shdr *shdr; unsigned long kernstart = ~0UL, kernend=0UL; - char *shstrtab, *guestinfo=NULL, *p; + const char *shstrtab; + char *guestinfo=NULL, *p; int h; if ( !IS_ELF(*ehdr) ) @@ -98,7 +98,7 @@ if ( strcmp(&shstrtab[shdr->sh_name], "__xen_guest") != 0 ) continue; - guestinfo = image + shdr->sh_offset; + guestinfo = (char *)image + shdr->sh_offset; if ( (strstr(guestinfo, "LOADER=generic") == NULL) && (strstr(guestinfo, "GUEST_OS=linux") == NULL) ) @@ -171,7 +171,7 @@ static int loadelfimage( - char *image, unsigned long elfsize, int xch, uint32_t dom, + const char *image, unsigned long elfsize, int xch, uint32_t dom, unsigned long *parray, struct domain_setup_info *dsi) { Elf_Ehdr *ehdr = (Elf_Ehdr *)image; @@ -222,7 +222,7 @@ static int loadelfsymtab( - char *image, int xch, uint32_t dom, unsigned long *parray, + const char *image, int xch, uint32_t dom, unsigned long *parray, struct domain_setup_info *dsi) { Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr; @@ -271,8 +271,9 @@ (shdr[h].sh_type == SHT_SYMTAB) ) { if ( parray != NULL ) - xc_map_memcpy(maxva, image + shdr[h].sh_offset, shdr[h].sh_size, - xch, dom, parray, dsi->v_start); + xc_map_memcpy(maxva, image + shdr[h].sh_offset, + shdr[h].sh_size, + xch, dom, parray, dsi->v_start); /* Mangled to be based on ELF header location. */ shdr[h].sh_offset = maxva - dsi->symtab_addr; diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_private.c Tue Mar 14 20:50:35 2006 @@ -231,8 +231,8 @@ goto out1; } break; - case XENMEM_reserved_phys_area: - if ( mlock(arg, sizeof(struct xen_reserved_phys_area)) ) + case XENMEM_add_to_physmap: + if ( mlock(arg, sizeof(struct xen_add_to_physmap)) ) { PERROR("Could not mlock"); goto out1; @@ -277,8 +277,8 @@ safe_munlock(xmml->extent_start, xmml->max_extents * sizeof(unsigned long)); break; - case XENMEM_reserved_phys_area: - safe_munlock(arg, sizeof(struct xen_reserved_phys_area)); + case XENMEM_add_to_physmap: + safe_munlock(arg, sizeof(struct xen_add_to_physmap)); break; case XENMEM_translate_gpfn_list: safe_munlock(trans->mfn_list, trans->nr_gpfns * sizeof(long)); @@ -364,7 +364,7 @@ int xc_copy_to_domain_page(int xc_handle, uint32_t domid, unsigned long dst_pfn, - void *src_page) + const char *src_page) { void *vaddr = xc_map_foreign_range( xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn); @@ -410,7 +410,7 @@ return sz; } -void xc_map_memcpy(unsigned long dst, char *src, unsigned long size, +void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size, int xch, uint32_t dom, unsigned long *parray, unsigned long vstart) { diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_ptrace.c --- a/tools/libxc/xc_ptrace.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_ptrace.c Tue Mar 14 20:50:35 2006 @@ -1,5 +1,4 @@ #define XC_PTRACE_PRIVATE - #include <sys/ptrace.h> #include <sys/wait.h> @@ -8,6 +7,36 @@ #include "xc_private.h" #include "xg_private.h" #include "xc_ptrace.h" + +#ifdef DEBUG +static char *ptrace_names[] = { + "PTRACE_TRACEME", + "PTRACE_PEEKTEXT", + "PTRACE_PEEKDATA", + "PTRACE_PEEKUSER", + "PTRACE_POKETEXT", + "PTRACE_POKEDATA", + "PTRACE_POKEUSER", + "PTRACE_CONT", + "PTRACE_KILL", + "PTRACE_SINGLESTEP", + "PTRACE_INVALID", + "PTRACE_INVALID", + "PTRACE_GETREGS", + "PTRACE_SETREGS", + "PTRACE_GETFPREGS", + "PTRACE_SETFPREGS", + "PTRACE_ATTACH", + "PTRACE_DETACH", + "PTRACE_GETFPXREGS", + "PTRACE_SETFPXREGS", + "PTRACE_INVALID", + "PTRACE_INVALID", + "PTRACE_INVALID", + "PTRACE_INVALID", + "PTRACE_SYSCALL", +}; +#endif /* XXX application state */ static long nr_pages = 0; diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xc_ptrace.h --- a/tools/libxc/xc_ptrace.h Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xc_ptrace.h Tue Mar 14 20:50:35 2006 @@ -9,8 +9,6 @@ #define BSD_PAGE_MASK (PAGE_SIZE-1) #define PDRSHIFT 22 #define PSL_T 0x00000100 /* trace enable bit */ - -extern const char const * ptrace_names[]; struct gdb_regs { long ebx; /* 0 */ diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xenctrl.h Tue Mar 14 20:50:35 2006 @@ -139,9 +139,27 @@ uint32_t *pdomid); +/* Functions to produce a dump of a given domain + * xc_domain_dumpcore - produces a dump to a specified file + * xc_domain_dumpcore_via_callback - produces a dump, using a specified + * callback function + */ int xc_domain_dumpcore(int xc_handle, uint32_t domid, const char *corename); + +/* Define the callback function type for xc_domain_dumpcore_via_callback. + * + * This function is called by the coredump code for every "write", + * and passes an opaque object for the use of the function and + * created by the caller of xc_domain_dumpcore_via_callback. + */ +typedef int (dumpcore_rtn_t)(void *arg, char *buffer, unsigned int length); + +int xc_domain_dumpcore_via_callback(int xc_handle, + uint32_t domid, + void *arg, + dumpcore_rtn_t dump_rtn); /* * This function sets the maximum number of vcpus that a domain may create. @@ -372,13 +390,13 @@ unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start); + unsigned long *extent_start); int xc_domain_memory_decrease_reservation(int xc_handle, uint32_t domid, unsigned long nr_extents, unsigned int extent_order, - unsigned long *extent_start); + unsigned long *extent_start); int xc_domain_memory_populate_physmap(int xc_handle, uint32_t domid, @@ -411,7 +429,7 @@ uint8_t allow_access); unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, - unsigned long mfn); + unsigned long mfn); typedef dom0_perfc_desc_t xc_perfc_desc_t; /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */ @@ -457,7 +475,7 @@ * @parm virt the virtual address to translate */ unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, - int vcpu, unsigned long long virt); + int vcpu, unsigned long long virt); int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, unsigned long max_pfns); @@ -467,7 +485,7 @@ unsigned int start_page, unsigned int nr_pages); int xc_copy_to_domain_page(int xc_handle, uint32_t domid, - unsigned long dst_pfn, void *src_page); + unsigned long dst_pfn, const char *src_page); int xc_clear_domain_page(int xc_handle, uint32_t domid, unsigned long dst_pfn); @@ -478,7 +496,7 @@ long xc_get_max_pages(int xc_handle, uint32_t domid); int xc_mmuext_op(int xc_handle, struct mmuext_op *op, unsigned int nr_ops, - domid_t dom); + domid_t dom); int xc_memory_op(int xc_handle, int cmd, void *arg); diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xenguest.h Tue Mar 14 20:50:35 2006 @@ -42,6 +42,22 @@ unsigned long *store_mfn, unsigned int console_evtchn, unsigned long *console_mfn); +/** + * This function will create a domain for a paravirtualized Linux + * using file names pointing to kernel and ramdisk + * + * @parm xc_handle a handle to an open hypervisor interface + * @parm domid the id of the domain + * @param image_name name of the kernel image file + * @param ramdisk_name name of the ramdisk image file + * @parm cmdline command line string + * @parm flags domain creation flags + * @parm store_evtchn the store event channel for this domain to use + * @parm store_mfn returned with the mfn of the store page + * @parm console_evtchn the console event channel for this domain to use + * @parm conole_mfn returned with the mfn of the console page + * @return 0 on success, -1 on failure + */ int xc_linux_build(int xc_handle, uint32_t domid, const char *image_name, @@ -54,6 +70,38 @@ unsigned int console_evtchn, unsigned long *console_mfn); +/** + * This function will create a domain for a paravirtualized Linux + * using buffers for kernel and initrd + * + * @param xc_handle a handle to an open hypervisor interface + * @param domid the id of the domain + * @param image_buffer buffer containing kernel image + * @param image_size size of the kernel image buffer + * @param initrd_buffer name of the ramdisk image file + * @param initrd_size size of the ramdisk buffer + * @param cmdline command line string + * @param flags domain creation flags + * @param store_evtchn the store event channel for this domain to use + * @param store_mfn returned with the mfn of the store page + * @param console_evtchn the console event channel for this domain to use + * @param conole_mfn returned with the mfn of the console page + * @return 0 on success, -1 on failure + */ +int xc_linux_build_mem(int xc_handle, + uint32_t domid, + const char *image_buffer, + unsigned long image_size, + const char *initrd_buffer, + unsigned long initrd_size, + const char *cmdline, + const char *features, + unsigned long flags, + unsigned int store_evtchn, + unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn); + int xc_hvm_build(int xc_handle, uint32_t domid, int memsize, @@ -65,4 +113,16 @@ unsigned int store_evtchn, unsigned long *store_mfn); -#endif // XENGUEST_H +int xc_hvm_build_mem(int xc_handle, + uint32_t domid, + int memsize, + const char *image_buffer, + unsigned long image_size, + unsigned int vcpus, + unsigned int pae, + unsigned int acpi, + unsigned int apic, + unsigned int store_evtchn, + unsigned long *store_mfn); + +#endif /* XENGUEST_H */ diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xg_private.c --- a/tools/libxc/xg_private.c Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xg_private.c Tue Mar 14 20:50:35 2006 @@ -10,15 +10,15 @@ #include "xg_private.h" -char *xc_read_kernel_image(const char *filename, unsigned long *size) +char *xc_read_image(const char *filename, unsigned long *size) { int kernel_fd = -1; gzFile kernel_gfd = NULL; char *image = NULL; unsigned int bytes; - if ( filename == NULL ) - goto out; + if ( (filename == NULL) || (size == NULL) ) + return NULL; if ( (kernel_fd = open(filename, O_RDONLY)) < 0 ) { @@ -60,6 +60,62 @@ return image; } +char *xc_inflate_buffer(const char *in_buf, unsigned long in_size, + unsigned long *out_size) +{ + int sts; + z_stream zStream; + unsigned long out_len; + char *out_buf; + + /* Not compressed? Then return the original buffer. */ + if ( ((unsigned char)in_buf[0] != 0x1F) || + ((unsigned char)in_buf[1] != 0x8B) ) + { + if ( out_size != NULL ) + *out_size = in_size; + return (char *)in_buf; + } + + out_len = in_buf[in_size-4] + + (256 * (in_buf[in_size-3] + + (256 * (in_buf[in_size-2] + + (256 * in_buf[in_size-1]))))); + bzero(&zStream, sizeof(zStream)); + out_buf = malloc(out_len + 16); /* Leave a little extra space */ + if ( out_buf == NULL ) + { + ERROR("Error mallocing buffer\n"); + return NULL; + } + + zStream.next_in = (unsigned char *)in_buf; + zStream.avail_in = in_size; + zStream.next_out = (unsigned char *)out_buf; + zStream.avail_out = out_len+16; + sts = inflateInit2(&zStream, (MAX_WBITS+32)); /* +32 means "handle gzip" */ + if ( sts != Z_OK ) + { + ERROR("inflateInit failed, sts %d\n", sts); + free(out_buf); + return NULL; + } + + /* Inflate in one pass/call */ + sts = inflate(&zStream, Z_FINISH); + if ( sts != Z_STREAM_END ) + { + ERROR("inflate failed, sts %d\n", sts); + free(out_buf); + return NULL; + } + + if ( out_size != NULL ) + *out_size = out_len; + + return out_buf; +} + /*******************/ int pin_table( @@ -77,7 +133,7 @@ } /* This is shared between save and restore, and may generally be useful. */ -unsigned long csum_page (void * page) +unsigned long csum_page(void *page) { int i; unsigned long *p = page; diff -r dc50cdd66c5c -r 0ed4a312765b tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Tue Mar 14 20:10:21 2006 +++ b/tools/libxc/xg_private.h Tue Mar 14 20:50:35 2006 @@ -26,7 +26,11 @@ #endif -char *xc_read_kernel_image(const char *filename, unsigned long *size); +char *xc_read_image(const char *filename, unsigned long *size); +char *xc_inflate_buffer(const char *in_buf, + unsigned long in_size, + unsigned long *out_size); + unsigned long csum_page (void * page); #define _PAGE_PRESENT 0x001 @@ -89,7 +93,7 @@ #define l2_table_offset_pae(_a) \ (((_a) >> L2_PAGETABLE_SHIFT_PAE) & (L2_PAGETABLE_ENTRIES_PAE - 1)) #define l3_table_offset_pae(_a) \ - (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1)) + (((_a) >> L3_PAGETABLE_SHIFT_PAE) & (L3_PAGETABLE_ENTRIES_PAE - 1)) #if defined(__i386__) #define l1_table_offset(_a) \ @@ -102,9 +106,9 @@ #define l2_table_offset(_a) \ (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) #define l3_table_offset(_a) \ - (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) + (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) #define l4_table_offset(_a) \ - (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) + (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) #endif #define ERROR(_m, _a...) \ @@ -141,11 +145,12 @@ char *xen_guest_string; }; -typedef int (*parseimagefunc)(char *image, unsigned long image_size, - struct domain_setup_info *dsi); -typedef int (*loadimagefunc)(char *image, unsigned long image_size, int xch, - uint32_t dom, unsigned long *parray, - struct domain_setup_info *dsi); +typedef int (*parseimagefunc)(const char *image, unsigned long image_size, + struct domain_setup_info *dsi); +typedef int (*loadimagefunc)(const char *image, unsigned long image_size, + int xch, + uint32_t dom, unsigned long *parray, + struct domain_setup_info *dsi); struct load_funcs { @@ -167,21 +172,24 @@ } mfn_mapper_t; int xc_copy_to_domain_page(int xc_handle, uint32_t domid, - unsigned long dst_pfn, void *src_page); + unsigned long dst_pfn, const char *src_page); unsigned long xc_get_filesz(int fd); -void xc_map_memcpy(unsigned long dst, char *src, unsigned long size, +void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size, int xch, uint32_t dom, unsigned long *parray, unsigned long vstart); int pin_table(int xc_handle, unsigned int type, unsigned long mfn, - domid_t dom); + domid_t dom); /* image loading */ -int probe_elf(char *image, unsigned long image_size, struct load_funcs *funcs); -int probe_bin(char *image, unsigned long image_size, struct load_funcs *funcs); -int probe_aout9(char *image, unsigned long image_size, struct load_funcs *funcs); +int probe_elf(const char *image, unsigned long image_size, + struct load_funcs *funcs); +int probe_bin(const char *image, unsigned long image_size, + struct load_funcs *funcs); +int probe_aout9(const char *image, unsigned long image_size, + struct load_funcs *funcs); #endif diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/Makefile --- a/tools/misc/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/misc/Makefile Tue Mar 14 20:50:35 2006 @@ -5,7 +5,7 @@ XEN_ROOT=../.. include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += -Wall -Werror -O3 +CFLAGS += -Werror INCLUDES += -I $(XEN_XC) INCLUDES += -I $(XEN_LIBXC) diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/cpuperf/Makefile --- a/tools/misc/cpuperf/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/misc/cpuperf/Makefile Tue Mar 14 20:50:35 2006 @@ -16,8 +16,6 @@ # these are for Xen XEN_ROOT=../../.. include $(XEN_ROOT)/tools/Rules.mk - -CFLAGS += -Wall -O3 HDRS = $(wildcard *.h) SRCS = $(wildcard *.c) diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/cpuperf/cpuperf_xeno.h --- a/tools/misc/cpuperf/cpuperf_xeno.h Tue Mar 14 20:10:21 2006 +++ b/tools/misc/cpuperf/cpuperf_xeno.h Tue Mar 14 20:50:35 2006 @@ -13,7 +13,7 @@ static int xc_handle; -void xen_init() +void xen_init(void) { if ( (xc_handle = xc_interface_open()) == -1 ) { @@ -24,12 +24,12 @@ } -void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high ) +void dom0_wrmsr(int cpu_mask, int msr, unsigned int low, unsigned int high) { xc_msr_write (xc_handle, cpu_mask, msr, low, high); } -unsigned long long dom0_rdmsr( int cpu_mask, int msr ) +unsigned long long dom0_rdmsr(int cpu_mask, int msr) { return xc_msr_read(xc_handle, cpu_mask, msr); } diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/lomount/Makefile --- a/tools/misc/lomount/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/misc/lomount/Makefile Tue Mar 14 20:50:35 2006 @@ -6,7 +6,7 @@ XEN_ROOT=../../.. include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += -Wall -Werror -O3 +CFLAGS += -Werror HDRS = $(wildcard *.h) OBJS = $(patsubst %.c,%.o,$(wildcard *.c)) diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/lomount/lomount.c --- a/tools/misc/lomount/lomount.c Tue Mar 14 20:10:21 2006 +++ b/tools/misc/lomount/lomount.c Tue Mar 14 20:50:35 2006 @@ -195,7 +195,7 @@ return fail; } -void usage() +void usage(void) { fprintf(stderr, "You must specify at least -diskimage and -partition.\n"); fprintf(stderr, "All other arguments are passed through to 'mount'.\n"); diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/mbootpack/Makefile --- a/tools/misc/mbootpack/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/misc/mbootpack/Makefile Tue Mar 14 20:50:35 2006 @@ -20,10 +20,8 @@ INCS := -I. -I- DEFS := LDFLAGS := -CFLAGS := -Wall -Wpointer-arith -Wcast-qual -Wno-unused -Wno-format -CFLAGS += -Wmissing-prototypes -#CFLAGS += -pipe -g -O0 -Wcast-align -CFLAGS += -pipe -O3 +CFLAGS += -Wpointer-arith -Wcast-qual -Wno-unused -Wno-format +CFLAGS += -Wmissing-prototypes -pipe # What object files need building for the program OBJS := mbootpack.o buildimage.o @@ -33,7 +31,7 @@ DEPS = .*.d mbootpack: $(OBJS) - $(HOSTCC) -o $@ $(filter-out %.a, $^) $(LDFLAGS) + $(HOSTCC) -o $@ $(filter-out %.a, $^) clean: $(RM) mbootpack *.o $(DEPS) bootsect setup bzimage_header.c bin2c diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/miniterm/Makefile --- a/tools/misc/miniterm/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/misc/miniterm/Makefile Tue Mar 14 20:50:35 2006 @@ -1,9 +1,10 @@ +XEN_ROOT:=../../.. +include $(XEN_ROOT)/tools/Rules.mk + INSTALL = install INSTALL_PROG = $(INSTALL) -m0755 INSTALL_DIR = $(INSTALL) -d -m0755 -CC = gcc -CFLAGS = -Wall -O3 TARGET = miniterm all: $(TARGET) @@ -16,4 +17,4 @@ $(RM) *.o $(TARGET) *~ $(TARGET): $(TARGET).c - $(CC) $(CFLAGS) -o $@ $< + $(HOSTCC) $(HOSTCFLAGS) -o $@ $< diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/nsplitd/Makefile --- a/tools/misc/nsplitd/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/misc/nsplitd/Makefile Tue Mar 14 20:50:35 2006 @@ -1,6 +1,6 @@ +XEN_ROOT := ../../.. +include $(XEN_ROOT)/tools/Rules.mk -CC = gcc -CFLAGS = -Wall -O3 CFILES = $(wildcard *.c) HDRS = $(wildcard *.h) @@ -16,7 +16,7 @@ $(RM) *.o $(TARGET) *~ $(TARGET): $(OBJS) - $(CC) $(CFLAGS) -o $@ $^ + $(HOSTCC) $(HOSTCFLAGS) -o $@ $^ %.o: %.c $(HDRS) Makefile - $(CC) $(CFLAGS) -c -o $@ $< + $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $< diff -r dc50cdd66c5c -r 0ed4a312765b tools/misc/xc_shadow.c --- a/tools/misc/xc_shadow.c Tue Mar 14 20:10:21 2006 +++ b/tools/misc/xc_shadow.c Tue Mar 14 20:50:35 2006 @@ -18,7 +18,7 @@ #include <errno.h> #include <string.h> -void usage() +void usage(void) { printf("xc_shadow: -[0|1|2]\n"); printf(" set shadow mode\n"); @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { int xc_handle; - int mode; + int mode = 0; if ( argc > 1 ) { diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/Makefile --- a/tools/python/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/python/Makefile Tue Mar 14 20:50:35 2006 @@ -9,10 +9,10 @@ ifndef XEN_PYTHON_NATIVE_INSTALL install: all - CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" + CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --force else install: all - CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" + CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force endif test: diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/web/httpserver.py --- a/tools/python/xen/web/httpserver.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/web/httpserver.py Tue Mar 14 20:50:35 2006 @@ -13,7 +13,9 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2006 XenSource Ltd. #============================================================================ + import threading import string @@ -28,6 +30,7 @@ from xen.xend.XendError import XendError import http +import unix from resource import Resource, ErrorPage from SrvDir import SrvDir @@ -267,30 +270,27 @@ closed = False - def __init__(self, interface='', port=8080, root=None): - if root is None: - root = SrvDir() + def __init__(self, root, interface, port=8080): + self.root = root self.interface = interface self.port = port - self.root = root # ready indicates when we are ready to begin accept connections # it should be set after a successful bind self.ready = False - - def getRoot(self): - return self.root - - def getPort(self): - return self.port def run(self): self.bind() self.listen() self.ready = True - self.requestLoop() + + while not self.closed: + (sock, addr) = self.accept() + self.processRequest(sock, addr) + def stop(self): self.close() + def bind(self): self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -303,23 +303,12 @@ def accept(self): return self.socket.accept() - def requestLoop(self): - while not self.closed: - self.acceptRequest() - def close(self): self.closed = True try: self.socket.close() except: pass - - def acceptRequest(self): - try: - (sock, addr) = self.accept() - self.processRequest(sock, addr) - except socket.error: - return def processRequest(self, sock, addr): try: @@ -340,23 +329,12 @@ def getResource(self, req): return self.root.getRequestResource(req) + class UnixHttpServer(HttpServer): - def __init__(self, path=None, root=None): - HttpServer.__init__(self, interface='localhost', root=root) + def __init__(self, root, path): + HttpServer.__init__(self, root, 'localhost') self.path = path def bind(self): - pathdir = os.path.dirname(self.path) - if not os.path.exists(pathdir): - os.makedirs(pathdir) - else: - try: - os.unlink(self.path) - except SystemExit: - raise - except Exception, ex: - pass - self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - #self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.socket.bind(self.path) + self.socket = unix.bind(self.path) diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/web/unix.py --- a/tools/python/xen/web/unix.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/web/unix.py Tue Mar 14 20:50:35 2006 @@ -13,15 +13,34 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2005 Mike Wray <mike.wray@xxxxxx> -# Copyright (C) 2005 XenSource Ltd. +# Copyright (C) 2005-2006 XenSource Ltd. #============================================================================ -import socket import os import os.path +import socket +import stat import connection + + +def bind(path): + """Create a Unix socket, and bind it to the given path. The socket is +created such that only the current user may access it.""" + + parent = os.path.dirname(path) + if os.path.exists(parent): + os.chown(parent, os.geteuid(), os.getegid()) + os.chmod(parent, stat.S_IRWXU) + if os.path.exists(path): + os.unlink(path) + else: + os.makedirs(parent, stat.S_IRWXU) + + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.bind(path) + return sock class UnixListener(connection.SocketListener): @@ -31,19 +50,7 @@ def createSocket(self): - pathdir = os.path.dirname(self.path) - if not os.path.exists(pathdir): - os.makedirs(pathdir) - else: - try: - os.unlink(self.path) - except SystemExit: - raise - except Exception, ex: - pass - sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - sock.bind(self.path) - return sock + return bind(self.path) def acceptConnection(self, sock, _): diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/XendDomain.py Tue Mar 14 20:50:35 2006 @@ -487,7 +487,17 @@ """ dominfo = self.domain_lookup(domid) try: - return xc.sedf_domain_get(dominfo.getDomid()) + + sedf_info = xc.sedf_domain_get(dominfo.getDomid()) + # return sxpr + return ['sedf', + ['domain', sedf_info['domain']], + ['period', sedf_info['period']], + ['slice', sedf_info['slice']], + ['latency', sedf_info['latency']], + ['extratime', sedf_info['extratime']], + ['weight', sedf_info['weight']]] + except Exception, ex: raise XendError(str(ex)) diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Mar 14 20:50:35 2006 @@ -1580,10 +1580,11 @@ controllerClasses[device_class] = cls -from xen.xend.server import blkif, netif, tpmif, pciif, iopif, usbif +from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, usbif addControllerClass('vbd', blkif.BlkifController) addControllerClass('vif', netif.NetifController) addControllerClass('vtpm', tpmif.TPMifController) addControllerClass('pci', pciif.PciController) addControllerClass('ioports', iopif.IOPortsController) +addControllerClass('irq', irqif.IRQController) addControllerClass('usb', usbif.UsbifController) diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/XendLogging.py --- a/tools/python/xen/xend/XendLogging.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/XendLogging.py Tue Mar 14 20:50:35 2006 @@ -13,7 +13,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> -# Copyright (C) 2005 XenSource Ltd +# Copyright (C) 2005, 2006 XenSource Ltd. #============================================================================ @@ -22,9 +22,10 @@ import logging import logging.handlers +from xen.xend.server import params -__all__ = [ 'log', 'init', 'getLogFilename', 'addLogStderr', - 'removeLogStderr' ] + +__all__ = [ 'log', 'init', 'getLogFilename' ] if not 'TRACE' in logging.__dict__: @@ -38,37 +39,28 @@ log = logging.getLogger("xend") -DEFAULT_MAX_BYTES = 1 << 20 # 1MB -DEFAULT_BACKUP_COUNT = 5 +MAX_BYTES = 1 << 20 # 1MB +BACKUP_COUNT = 5 STDERR_FORMAT = "[%(name)s] %(levelname)s (%(module)s:%(lineno)d) %(message)s" LOGFILE_FORMAT = "[%(asctime)s %(name)s] %(levelname)s (%(module)s:%(lineno)d) %(message)s" DATE_FORMAT = "%Y-%m-%d %H:%M:%S" -stderrHandler = logging.StreamHandler() -stderrHandler.setFormatter(logging.Formatter(STDERR_FORMAT, DATE_FORMAT)) - logfilename = None -def init(filename, level=logging.INFO, maxBytes=None, backupCount=None): - """Initialise logging. Logs to 'filename' by default, but does not log to - stderr unless addLogStderr() is called. +def init(filename, level): + """Initialise logging. Logs to the given filename, and logs to stderr if + XEND_DEBUG is set. """ global logfilename def openFileHandler(fname): - return logging.handlers.RotatingFileHandler(fname, - mode='a', - maxBytes=maxBytes, - backupCount=backupCount) - - if not maxBytes: - maxBytes = DEFAULT_MAX_BYTES - if not backupCount: - backupCount = DEFAULT_BACKUP_COUNT + return logging.handlers.RotatingFileHandler(fname, mode = 'a', + maxBytes = MAX_BYTES, + backupCount = BACKUP_COUNT) # Rather unintuitively, getLevelName will get the number corresponding to # a level name, as well as getting the name corresponding to a level @@ -89,16 +81,12 @@ fileHandler.setFormatter(logging.Formatter(LOGFILE_FORMAT, DATE_FORMAT)) log.addHandler(fileHandler) + if params.XEND_DEBUG: + stderrHandler = logging.StreamHandler() + stderrHandler.setFormatter(logging.Formatter(STDERR_FORMAT, + DATE_FORMAT)) + log.addHandler(stderrHandler) + def getLogFilename(): return logfilename - - -def addLogStderr(): - """Add logging to stderr.""" - log.addHandler(stderrHandler) - - -def removeLogStderr(): - """Remove logging to stderr.""" - log.removeHandler(stderrHandler) diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/XendRoot.py --- a/tools/python/xen/xend/XendRoot.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/XendRoot.py Tue Mar 14 20:50:35 2006 @@ -102,15 +102,14 @@ """ print >>sys.stderr, "xend [ERROR]", fmt % args + def configure(self): self.set_config() - logfile = self.get_config_value("logfile", self.logfile_default) - loglevel = self.get_config_value("loglevel", self.loglevel_default) - XendLogging.init(logfile, level = loglevel) - - from xen.xend.server import params - if params.XEND_DEBUG: - XendLogging.addLogStderr() + XendLogging.init(self.get_config_value("logfile", + self.logfile_default), + self.get_config_value("loglevel", + self.loglevel_default)) + def set_config(self): """If the config file exists, read it. If not, ignore it. diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/balloon.py --- a/tools/python/xen/xend/balloon.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/balloon.py Tue Mar 14 20:50:35 2006 @@ -27,7 +27,8 @@ from XendError import VmError -PROC_XEN_BALLOON = "/proc/xen/balloon" +PROC_XEN_BALLOON = '/proc/xen/balloon' + BALLOON_OUT_SLACK = 1 # MiB. We need this because the physinfo details are # rounded. RETRY_LIMIT = 10 @@ -39,6 +40,47 @@ # such requirements. SLEEP_TIME_GROWTH = 0.1 +# A mapping between easy-to-remember labels and the more verbose +# label actually shown in the PROC_XEN_BALLOON file. +labels = { 'current' : 'Current allocation', + 'target' : 'Requested target', + 'low-balloon' : 'Low-mem balloon', + 'high-balloon' : 'High-mem balloon', + 'limit' : 'Xen hard limit' } + +def _get_proc_balloon(label): + """Returns the value for the named label. Returns None if the label was + not found or the value was non-numeric.""" + + f = file(PROC_XEN_BALLOON, 'r') + try: + for line in f: + keyvalue = line.split(':') + if keyvalue[0] == label: + values = keyvalue[1].split() + if values[0].isdigit(): + return int(values[0]) + else: + return None + return None + finally: + f.close() + +def get_dom0_current_alloc(): + """Returns the current memory allocation (in MiB) of dom0.""" + + kb = _get_proc_balloon(labels['current']) + if kb == None: + raise VmError('Failed to query current memory allocation of dom0.') + return kb / 1024 + +def get_dom0_target_alloc(): + """Returns the target memory allocation (in MiB) of dom0.""" + + kb = _get_proc_balloon(labels['target']) + if kb == None: + raise VmError('Failed to query target memory allocation of dom0.') + return kb / 1024 def free(required): """Balloon out memory from the privileged domain so that there is the @@ -88,7 +130,7 @@ log.debug("Balloon: free %d; need %d.", free_mem, need_mem) if dom0_min_mem > 0: - dom0_alloc = _get_dom0_alloc() + dom0_alloc = get_dom0_current_alloc() new_alloc = dom0_alloc - (need_mem - free_mem) if (new_alloc >= dom0_min_mem and @@ -121,20 +163,3 @@ finally: del xc - - -def _get_dom0_alloc(): - """Return current allocation memory of dom0 (in MiB). Return 0 on error""" - - f = file(PROC_XEN_BALLOON, 'r') - try: - line = f.readline() - for x in line.split(): - for n in x: - if not n.isdigit(): - break - else: - return int(x) / 1024 - return 0 - finally: - f.close() diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/server/SrvServer.py --- a/tools/python/xen/xend/server/SrvServer.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/server/SrvServer.py Tue Mar 14 20:50:35 2006 @@ -13,6 +13,7 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #============================================================================ # Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2006 XenSource Ltd. #============================================================================ """Example xend HTTP @@ -106,11 +107,11 @@ root.putChild('xend', SrvRoot()) servers = XendServers() if xroot.get_xend_http_server(): - port = xroot.get_xend_port() - interface = xroot.get_xend_address() - servers.add(HttpServer(root=root, interface=interface, port=port)) + servers.add(HttpServer(root, + xroot.get_xend_address(), + xroot.get_xend_port())) if xroot.get_xend_unix_server(): path = xroot.get_xend_unix_path() log.info('unix path=' + path) - servers.add(UnixHttpServer(path=path, root=root)) + servers.add(UnixHttpServer(root, path)) return servers diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/server/iopif.py --- a/tools/python/xen/xend/server/iopif.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/server/iopif.py Tue Mar 14 20:50:35 2006 @@ -83,4 +83,4 @@ 'ioports: Failed to configure legacy i/o range: %s - %s' % (io_from, io_to)) - return (dev, {}, {}) + return (None, {}, {}) diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/server/netif.py --- a/tools/python/xen/xend/server/netif.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/server/netif.py Tue Mar 14 20:50:35 2006 @@ -72,8 +72,6 @@ sxp.child_value(config, 'script', xroot.get_vif_script())) typ = sxp.child_value(config, 'type') - if typ == 'ioemu': - return (None,{},{}) bridge = sxp.child_value(config, 'bridge') mac = sxp.child_value(config, 'mac') vifname = sxp.child_value(config, 'vifname') @@ -87,15 +85,19 @@ back = { 'script' : script, 'mac' : mac, 'handle' : "%i" % devid } + + if typ == 'ioemu': + front = {} + back['type'] = 'ioemu' + else: + front = { 'handle' : "%i" % devid, + 'mac' : mac } if ipaddr: back['ip'] = ' '.join(ipaddr) if bridge: back['bridge'] = bridge if vifname: back['vifname'] = vifname - - front = { 'handle' : "%i" % devid, - 'mac' : mac } return (devid, back, front) diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xm/create.py Tue Mar 14 20:50:35 2006 @@ -252,14 +252,20 @@ gopts.var('pci', val='BUS:DEV.FUNC', fn=append_value, default=[], use="""Add a PCI device to a domain, using given params (in hex). - For example '-pci c0:02.1a'. + For example 'pci=c0:02.1a'. The option may be repeated to add more than one pci device.""") gopts.var('ioports', val='FROM[-TO]', fn=append_value, default=[], use="""Add a legacy I/O range to a domain, using given params (in hex). - For example '-ioports 02f8-02ff'. + For example 'ioports=02f8-02ff'. The option may be repeated to add more than one i/o range.""") + +gopts.var('irq', val='IRQ', + fn=append_value, default=[], + use="""Add an IRQ (interrupt line) to a domain. + For example 'irq=7'. + This option may be repeated to add more than one IRQ.""") gopts.var('usb', val='PATH', fn=append_value, default=[], @@ -487,6 +493,13 @@ for (io_from, io_to) in vals.ioports: config_ioports = ['ioports', ['from', io_from], ['to', io_to]] config_devs.append(['device', config_ioports]) + +def configure_irq(config_devs, vals): + """Create the config for irqs. + """ + for irq in vals.irq: + config_irq = ['irq', ['irq', irq]] + config_devs.append(['device', config_irq]) def configure_usb(config_devs, vals): for path in vals.usb: @@ -615,6 +628,7 @@ configure_disks(config_devs, vals) configure_pci(config_devs, vals) configure_ioports(config_devs, vals) + configure_irq(config_devs, vals) configure_vifs(config_devs, vals) configure_usb(config_devs, vals) configure_vtpm(config_devs, vals) diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xm/main.py Tue Mar 14 20:50:35 2006 @@ -83,7 +83,18 @@ parameters""" sched_bvt_ctxallow_help = """sched-bvt-ctxallow <Allow> Set the BVT scheduler context switch allowance""" -sched_sedf_help = "sched-sedf <Parameters> Set simple EDF parameters" +sched_sedf_help = "sched-sedf [DOM] [OPTIONS] Show|Set simple EDF parameters\n" + \ +" -p, --period Relative deadline(ms).\n\ + -s, --slice Worst-case execution time(ms)\n\ + (slice < period).\n\ + -l, --latency scaled period(ms) in case the domain\n\ + is doing heavy I/O.\n\ + -e, --extra flag (0/1) which controls whether the\n\ + domain can run in extra-time\n\ + -w, --weight mutually exclusive with period/slice and\n\ + specifies another way of setting a domain's\n\ + cpu period/slice." + block_attach_help = """block-attach <DomId> <BackDev> <FrontDev> <Mode> [BackDomId] Create a new virtual block device""" block_detach_help = """block-detach <DomId> <DevId> Destroy a domain's virtual block device, @@ -148,8 +159,7 @@ host_commands = [ "dmesg", "info", - "log", - "top", + "log" ] scheduler_commands = [ @@ -374,6 +384,20 @@ 'state' : get_info('state', str, '??'), 'cpu_time' : get_info('cpu_time', float, 0), 'ssidref' : get_info('ssidref', int, 0), + } + + +def parse_sedf_info(info): + def get_info(n, t, d): + return t(sxp.child_value(info, n, d)) + + return { + 'dom' : get_info('domain', int, -1), + 'period' : get_info('period', int, -1), + 'slice' : get_info('slice', int, -1), + 'latency' : get_info('latency', int, -1), + 'extratime': get_info('extratime', int, -1), + 'weight' : get_info('weight', int, -1), } @@ -617,12 +641,86 @@ server.xend_node_cpu_bvt_slice_set(slice) def xm_sched_sedf(args): - arg_check(args, "sched-sedf", 6) - - dom = args[0] - v = map(int, args[1:6]) - from xen.xend.XendClient import server - server.xend_domain_cpu_sedf_set(dom, *v) + def ns_to_ms(val): + return float(val) * 0.000001 + + def ms_to_ns(val): + return (float(val) / 0.000001) + + def print_sedf(info): + info['period'] = ns_to_ms(info['period']) + info['slice'] = ns_to_ms(info['slice']) + info['latency'] = ns_to_ms(info['latency']) + print( ("%(name)-32s %(dom)3d %(period)9.1f %(slice)9.1f" + + " %(latency)7.1f %(extratime)6d %(weight)6d") % info) + + def domid_match(domid, info): + return domid is None or domid == info['name'] or domid == str(info['dom']) + + # we want to just display current info if no parameters are passed + if len(args) == 0: + domid = None + else: + # we expect at least a domain id (name or number) + # and at most a domid up to 5 options with values + arg_check(args, "sched-sedf", 1, 11) + domid = args[0] + # drop domid from args since get_opt doesn't recognize it + args = args[1:] + + opts = {} + try: + (options, params) = getopt.gnu_getopt(args, 'p:s:l:e:w:', + ['period=', 'slice=', 'latency=', 'extratime=', 'weight=']) + except getopt.GetoptError, opterr: + err(opterr) + sys.exit(1) + + # convert to nanoseconds if needed + for (k, v) in options: + if k in ['-p', '--period']: + opts['period'] = ms_to_ns(v) + elif k in ['-s', '--slice']: + opts['slice'] = ms_to_ns(v) + elif k in ['-l', '--latency']: + opts['latency'] = ms_to_ns(v) + elif k in ['-e', '--extratime']: + opts['extratime'] = v + elif k in ['-w', '--weight']: + opts['weight'] = v + + # print header if we aren't setting any parameters + if len(opts.keys()) == 0: + print '%-33s %-2s %-4s %-4s %-7s %-5s %-6s'%('Name','ID','Period(ms)', + 'Slice(ms)', 'Lat(ms)', + 'Extra','Weight') + + from xen.xend.XendClient import server + doms = filter(lambda x : domid_match(domid, x), + [parse_doms_info(dom) for dom in getDomains("")]) + for d in doms: + # fetch current values so as not to clobber them + sedf_info = \ + parse_sedf_info(server.xend_domain_cpu_sedf_get(d['dom'])) + sedf_info['name'] = d['name'] + + # update values in case of call to set + if len(opts.keys()) > 0: + for k in opts.keys(): + sedf_info[k]=opts[k] + + # send the update, converting user input + v = map(int, [sedf_info['period'], sedf_info['slice'], + sedf_info['latency'],sedf_info['extratime'], + sedf_info['weight']]) + rv = server.xend_domain_cpu_sedf_set(d['dom'], *v) + if int(rv) != 0: + err("Failed to set sedf parameters (rv=%d)."%(rv)) + + # not setting values, display info + else: + print_sedf(sedf_info) + def xm_info(args): arg_check(args, "info", 0) diff -r dc50cdd66c5c -r 0ed4a312765b tools/security/Makefile --- a/tools/security/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/security/Makefile Tue Mar 14 20:50:35 2006 @@ -1,9 +1,7 @@ XEN_ROOT = ../.. include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += -Wall CFLAGS += -Werror -CFLAGS += -O3 CFLAGS += -fno-strict-aliasing CFLAGS += -I. diff -r dc50cdd66c5c -r 0ed4a312765b tools/vnet/libxutil/Makefile --- a/tools/vnet/libxutil/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/vnet/libxutil/Makefile Tue Mar 14 20:50:35 2006 @@ -29,8 +29,7 @@ LIB_OBJS := $(LIB_SRCS:.c=.o) PIC_OBJS := $(LIB_SRCS:.c=.opic) -CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing -CFLAGS += -g +CFLAGS += -Werror -fno-strict-aliasing # Get gcc to generate the dependencies for us. CFLAGS += -Wp,-MD,.$(@F).d diff -r dc50cdd66c5c -r 0ed4a312765b tools/vnet/vnetd/Makefile --- a/tools/vnet/vnetd/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/vnet/vnetd/Makefile Tue Mar 14 20:50:35 2006 @@ -42,9 +42,6 @@ CPPFLAGS += -D __ARCH_I386_ATOMIC__ #---------------------------------------------------------------------------- -CFLAGS += -g -CFLAGS += -O2 -CFLAGS += -Wall CFLAGS += $(INCLUDES) $(LIBS) LDFLAGS += $(LIBS) diff -r dc50cdd66c5c -r 0ed4a312765b tools/vtpm/Rules.mk --- a/tools/vtpm/Rules.mk Tue Mar 14 20:10:21 2006 +++ b/tools/vtpm/Rules.mk Tue Mar 14 20:50:35 2006 @@ -14,7 +14,7 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin # General compiler flags -CFLAGS = -Wall -Werror -g3 -I. +CFLAGS = -Werror -g3 -I. # For generating dependencies CFLAGS += -Wp,-MD,.$(@F).d diff -r dc50cdd66c5c -r 0ed4a312765b tools/vtpm_manager/Rules.mk --- a/tools/vtpm_manager/Rules.mk Tue Mar 14 20:10:21 2006 +++ b/tools/vtpm_manager/Rules.mk Tue Mar 14 20:50:35 2006 @@ -14,7 +14,7 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin # General compiler flags -CFLAGS = -Wall -Werror -g3 -I. +CFLAGS = -Werror -g3 -I. # For generating dependencies CFLAGS += -Wp,-MD,.$(@F).d diff -r dc50cdd66c5c -r 0ed4a312765b tools/vtpm_manager/manager/Makefile --- a/tools/vtpm_manager/manager/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/vtpm_manager/manager/Makefile Tue Mar 14 20:50:35 2006 @@ -20,7 +20,7 @@ rm -f $(BIN) *~ $(BIN): $(OBJS) - $(CC) $(LDFLAGS) $^ $(LIBS) -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LIBS) -o $@ # libraries LIBS += ../tcs/libTCS.a ../util/libTCGUtils.a ../crypto/libtcpaCrypto.a diff -r dc50cdd66c5c -r 0ed4a312765b tools/xcutils/Makefile --- a/tools/xcutils/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/xcutils/Makefile Tue Mar 14 20:50:35 2006 @@ -19,7 +19,7 @@ INCLUDES += -I $(XEN_LIBXC) -CFLAGS += -Wall -Werror -O3 -fno-strict-aliasing +CFLAGS += -Werror -fno-strict-aliasing CFLAGS += $(INCLUDES) # Make gcc generate dependencies. diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenmon/Makefile --- a/tools/xenmon/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/xenmon/Makefile Tue Mar 14 20:50:35 2006 @@ -20,7 +20,7 @@ XEN_ROOT=../.. include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += -Wall -Werror -g +CFLAGS += -Werror -g CFLAGS += -I $(XEN_XC) CFLAGS += -I $(XEN_LIBXC) LDFLAGS += -L $(XEN_LIBXC) diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenmon/xenbaked.c --- a/tools/xenmon/xenbaked.c Tue Mar 14 20:10:21 2006 +++ b/tools/xenmon/xenbaked.c Tue Mar 14 20:50:35 2006 @@ -299,7 +299,7 @@ exit(EXIT_FAILURE); } - tbufs_mapped = xc_map_foreign_range(xc_handle, 0 /* Dom 0 ID */, + tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN, size * num, PROT_READ | PROT_WRITE, tbufs_mfn); @@ -379,7 +379,7 @@ /** * get_num_cpus - get the number of logical CPUs */ -unsigned int get_num_cpus() +unsigned int get_num_cpus(void) { dom0_op_t op; int xc_handle = xc_interface_open(); @@ -409,7 +409,7 @@ /** * monitor_tbufs - monitor the contents of tbufs */ -int monitor_tbufs() +int monitor_tbufs(void) { int i; extern void process_record(int, struct t_rec *); diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenmon/xenmon.py --- a/tools/xenmon/xenmon.py Tue Mar 14 20:10:21 2006 +++ b/tools/xenmon/xenmon.py Tue Mar 14 20:50:35 2006 @@ -452,6 +452,12 @@ if c == ord('c'): cpu = (cpu + 1) % ncpu + # n/p = cycle to the next/previous CPU + if c == ord('n'): + cpu = (cpu + 1) % ncpu + if c == ord('p'): + cpu = (cpu - 1) % ncpu + stdscr.erase() _c.nocbreak() @@ -502,6 +508,7 @@ shm = mmap.mmap(shmf.fileno(), QOS_DATA_SIZE) interval = 0 + curr = last = time.time() outfiles = {} for dom in range(0, NDOMAINS): outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w') @@ -561,9 +568,10 @@ h1[dom][4], h1[dom][5][0], h1[dom][5][1])) outfiles[dom].flush() - - interval += options.interval - time.sleep(1) + curr = time.time() + interval += (curr - last) * 1000 + last = curr + time.sleep(options.interval / 1000.0) for dom in range(0, NDOMAINS): outfiles[dom].close() diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenstat/libxenstat/Makefile --- a/tools/xenstat/libxenstat/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/xenstat/libxenstat/Makefile Tue Mar 14 20:50:35 2006 @@ -48,7 +48,7 @@ $(RANLIB) $@ $(SHLIB): $(OBJECTS) - $(CC) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $(OBJECTS) + $(CC) $(CFLAGS) $(LDFLAGS) $(SONAME_FLAGS) -shared -o $@ $(OBJECTS) src/xenstat.o: src/xenstat.c src/xenstat.h src/xen-interface.h $(CC) $(CFLAGS) $(WARN_FLAGS) -c -o $@ $< diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenstat/libxenstat/src/xen-interface.c --- a/tools/xenstat/libxenstat/src/xen-interface.c Tue Mar 14 20:10:21 2006 +++ b/tools/xenstat/libxenstat/src/xen-interface.c Tue Mar 14 20:50:35 2006 @@ -31,7 +31,7 @@ /* Initialize for xen-interface. Returns a handle to be used with subsequent * calls to the xen-interface functions or NULL if an error occurs. */ -xi_handle *xi_init() +xi_handle *xi_init(void) { xi_handle *handle; diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenstat/libxenstat/src/xen-interface.h --- a/tools/xenstat/libxenstat/src/xen-interface.h Tue Mar 14 20:10:21 2006 +++ b/tools/xenstat/libxenstat/src/xen-interface.h Tue Mar 14 20:50:35 2006 @@ -26,7 +26,7 @@ /* Initialize for xen-interface. Returns a handle to be used with subsequent * calls to the xen-interface functions or NULL if an error occurs. */ -xi_handle *xi_init(); +xi_handle *xi_init(void); /* Release the handle to libxc, free resources, etc. */ void xi_uninit(xi_handle *handle); diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenstat/libxenstat/src/xenstat.c --- a/tools/xenstat/libxenstat/src/xenstat.c Tue Mar 14 20:10:21 2006 +++ b/tools/xenstat/libxenstat/src/xenstat.c Tue Mar 14 20:50:35 2006 @@ -129,7 +129,7 @@ /* * libxenstat API */ -xenstat_handle *xenstat_init() +xenstat_handle *xenstat_init(void) { xenstat_handle *handle; diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenstat/libxenstat/src/xenstat.h --- a/tools/xenstat/libxenstat/src/xenstat.h Tue Mar 14 20:10:21 2006 +++ b/tools/xenstat/libxenstat/src/xenstat.h Tue Mar 14 20:50:35 2006 @@ -26,7 +26,7 @@ /* Initialize the xenstat library. Returns a handle to be used with * subsequent calls to the xenstat library, or NULL if an error occurs. */ -xenstat_handle *xenstat_init(); +xenstat_handle *xenstat_init(void); /* Release the handle to libxc, free resources, etc. */ void xenstat_uninit(xenstat_handle * handle); diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenstore/Makefile --- a/tools/xenstore/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/xenstore/Makefile Tue Mar 14 20:50:35 2006 @@ -12,7 +12,7 @@ # Make gcc generate dependencies. BASECFLAGS += -Wp,-MD,.$(@F).d PROG_DEP = .*.d -BASECFLAGS+= -O3 $(PROFILE) +BASECFLAGS+= $(PROFILE) #BASECFLAGS+= -I$(XEN_ROOT)/tools BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc BASECFLAGS+= -I. diff -r dc50cdd66c5c -r 0ed4a312765b tools/xenstore/xenstored_core.c --- a/tools/xenstore/xenstored_core.c Tue Mar 14 20:10:21 2006 +++ b/tools/xenstore/xenstored_core.c Tue Mar 14 20:50:35 2006 @@ -66,7 +66,7 @@ static TDB_CONTEXT *tdb_ctx; static void corrupt(struct connection *conn, const char *fmt, ...); -static void check_store(); +static void check_store(void); #define log(...) \ do { \ @@ -238,7 +238,7 @@ } -static void reopen_log() +static void reopen_log(void) { if (tracefile) { if (tracefd > 0) @@ -1612,7 +1612,7 @@ } -static void check_store() +static void check_store(void) { char * root = talloc_strdup(NULL, "/"); struct hashtable * reachable = diff -r dc50cdd66c5c -r 0ed4a312765b tools/xentrace/Makefile --- a/tools/xentrace/Makefile Tue Mar 14 20:10:21 2006 +++ b/tools/xentrace/Makefile Tue Mar 14 20:50:35 2006 @@ -6,7 +6,7 @@ XEN_ROOT=../.. include $(XEN_ROOT)/tools/Rules.mk -CFLAGS += -Wall -Werror -O3 +CFLAGS += -Werror CFLAGS += -I $(XEN_XC) CFLAGS += -I $(XEN_LIBXC) diff -r dc50cdd66c5c -r 0ed4a312765b tools/xentrace/xentrace.c --- a/tools/xentrace/xentrace.c Tue Mar 14 20:10:21 2006 +++ b/tools/xentrace/xentrace.c Tue Mar 14 20:50:35 2006 @@ -144,7 +144,7 @@ exit(EXIT_FAILURE); } - tbufs_mapped = xc_map_foreign_range(xc_handle, 0 /* Dom 0 ID */, + tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN, size * num, PROT_READ | PROT_WRITE, tbufs_mfn); @@ -258,7 +258,7 @@ /** * get_num_cpus - get the number of logical CPUs */ -unsigned int get_num_cpus() +unsigned int get_num_cpus(void) { dom0_op_t op; int xc_handle = xc_interface_open(); diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/README --- a/tools/xm-test/README Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/README Tue Mar 14 20:50:35 2006 @@ -120,8 +120,9 @@ # ./runtest.sh <logfile> This will run all tests, as well as generate and submit a report at -the end. All output files will begin with "<logfile>." If you wish to -prevent submission of a report, add "-d" to the command line like this: +the end. All output files will begin with "<logfile>." +If you wish to prevent submission of a report, add "-d" to the +command line like this: # ./runtest.sh -d <logfile> @@ -131,15 +132,19 @@ # ./runtest.sh -s <logfile> -For people needing a quick test run instead the full suite, a quick -mode has been added that will attempt to run a representative subset -of tests. This is not a substitute for the whole suite, but will -verify that some of the major functions of xen and xm are working: - - # ./runtest.sh -q <logfile> - -Because of the current structure of the reporting software, submission -of quick test run results is not supported. +Group test sets are supported in xm-test. This is form of layering of +tests groups/cases/tests. In the framework directory "grouptest", +files exist for group processing. The user can add groups, casenames +and test lists as required. Default group run is "grouptest/default". + + # ./runtest.sh -g <groupname> <logfile> + +* NOTE: There is a quick set of tests in group mode, that was added to +run certain casenames and tests. It is not a substitute for the full +xm-test test suite. + # ./runtest.sh -g quick <logfile> + + It may be desirable to run a specific test group. This can be accomplished by doing the following: diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/lib/XmTestLib/Test.py --- a/tools/xm-test/lib/XmTestLib/Test.py Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/lib/XmTestLib/Test.py Tue Mar 14 20:50:35 2006 @@ -32,6 +32,7 @@ import select import signal import re +import glob TEST_PASS = 0 TEST_FAIL = 255 @@ -184,7 +185,16 @@ domain.destroy() return False - + +# +# We currently can only load as many concurrent HVM domains as loop +# devices, need to find how many devices the system has. +def getMaxHVMDomains(): + nodes = glob.glob("/dev/loop*") + maxd = len(nodes) + + return maxd + if __name__ == "__main__": diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/runtest.sh --- a/tools/xm-test/runtest.sh Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/runtest.sh Tue Mar 14 20:50:35 2006 @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/sh ## ## Test driver script @@ -11,7 +11,7 @@ echo " Where opts are:" echo " -d : do not submit a report for this run" echo " -b : do not ask any questions (batch mode)" - echo " -q : run a quick test set" + echo " -g : run a group test set" echo " -e <email> : set email address for report" echo " -s <report> : just submit report <report>" echo " -h | --help : show this help" @@ -92,11 +92,12 @@ echo "Running sanity checks..." make -C tests/_sanity check 2>&1 | grep REASON if [ $? -eq 0 ]; then - echo "Sanity checks failed" - exit 1 - fi - -} + echo "Sanity checks failed" + exit 1 + fi + +} + # Get contact info if needed get_contact_info() { @@ -128,26 +129,21 @@ # Run the tests run_tests() { - output=$1 - echo Running real tests... - TEST_VERBOSE=1 make -k check > $output 2>&1 -} - -run_tests_quick() { - - output=$1 - - create_tests="01_create_basic_pos.test 07_create_mem64_pos.test 10_create_fastdestroy.test 14_create_blockroot_pos.test" - unpause_tests="01_unpause_basic_pos.test" - memset_tests="01_memset_basic_pos.test 03_memset_random_pos.test" - help_tests="06_help_allcmds.test" - testgroups="create unpause memset help" - - echo "*** Quick test" > $output - for group in $testgroups; do - eval $(echo list=\$${group}_tests) - echo "*** Running tests [$list] from $group" - (cd tests/$group && TEST_VERBOSE=1 make -k check TESTS="$list") >> $output 2>&1 + groupentered=$1 + output=$2 + + exec < grouptest/$groupentered + while read casename testlist; do + echo Running $casename tests... + echo "*** case $casename from group $groupentered" >> $output + if [ -z "$testlist" ]; then + echo "*** Running tests for case $casename" >> $output + (cd tests/$casename && TEST_VERBOSE=1 make -k check) >> $output 2>&1 + else + echo "*** Running tests $testlist from case $casename" >> $output + (cd tests/$casename && TEST_VERBOSE=1 make -k check TESTS="$testlist") >> $output 2>&1 + fi + done } @@ -195,6 +191,7 @@ report=yes batch=no run=yes +GROUPENTERED=default # Resolve options while [ $# -gt 0 ] @@ -213,8 +210,13 @@ echo $1 > contact_info echo "(Email set to $1)" ;; - -q) - run=quick + -g) + shift + GROUPENTERED=$1 + if [ ! -f grouptest/$GROUPENTERED ]; then + echo "No file for group $GROUPENTERED" + exit 1 + fi ;; -s) run=no @@ -265,18 +267,15 @@ if [ "$run" != "no" ]; then runnable_tests make_environment_report $OSREPORTTEMP $PROGREPORTTEMP - if [ "$run" = "yes" ]; then - run_tests $OUTPUT - else - run_tests_quick $OUTPUT - fi + run_tests $GROUPENTERED $OUTPUT make_text_reports $PASSFAIL $FAILURES $OUTPUT $TXTREPORT make_result_report $OUTPUT $RESULTREPORTTEMP cat $OSREPORTTEMP $PROGREPORTTEMP $RESULTREPORTTEMP > $XMLREPORT rm $OSREPORTTEMP $PROGREPORTTEMP $RESULTREPORTTEMP -fi - -if [ "$report" = "yes" ] && [ "$run" = "yes" ]; then + +fi + +if [ "$report" = "yes" ]; then if [ ! -f "$XMLREPORT" ]; then echo "No such file: $XMLREPORT" exit 1 diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/create/11_create_concurrent_pos.py --- a/tools/xm-test/tests/create/11_create_concurrent_pos.py Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/create/11_create_concurrent_pos.py Tue Mar 14 20:50:35 2006 @@ -8,8 +8,14 @@ import time import random -MIN_DOMS = 10 -MAX_DOMS = 50 +if ENABLE_HVM_SUPPORT: + MAX_DOMS = getMaxHVMDomains() + if MAX_DOMS > 50: + MAX_DOMS = 50 +else: + MAX_DOMS = 50 + +MIN_DOMS = 5 MEM_PER_DOM = 24 domains = [] diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/create/13_create_multinic_pos.py --- a/tools/xm-test/tests/create/13_create_multinic_pos.py Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/create/13_create_multinic_pos.py Tue Mar 14 20:50:35 2006 @@ -5,8 +5,16 @@ from XmTestLib import * -for i in range(0,10): - config = {"vif": ['' for _ in range(0, i)]} +# The current device model, qemu-dm, only supports 8 MAX_NICS currently. +if ENABLE_HVM_SUPPORT: + MAX_NICS = 8 + nic = "type=ioemu, bridge=xenbr0" +else: + MAX_NICS = 10 + nic = '' + +for i in range(0,MAX_NICS): + config = {"vif": [ nic ] * i} domain = XmTestDomain(extraConfig=config) try: diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/list/06_list_nonroot.py --- a/tools/xm-test/tests/list/06_list_nonroot.py Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/list/06_list_nonroot.py Tue Mar 14 20:50:35 2006 @@ -12,4 +12,4 @@ eyecatcher = "Error: Most commands need root access" where = output.find(eyecatcher) if where == -1: - FAIL("xm help: didn't see the root hint, saw %s" % output) + FAIL("xm list: didn't see the root hint, saw %s" % output) diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/network/Makefile.am --- a/tools/xm-test/tests/network/Makefile.am Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/network/Makefile.am Tue Mar 14 20:50:35 2006 @@ -1,10 +1,15 @@ SUBDIRS = - TESTS = \ 02_network_local_ping_pos.test \ + 03_network_local_tcp_pos.test \ + 04_network_local_udp_pos.test \ 05_network_dom0_ping_pos.test \ - 11_network_domU_ping_pos.test + 06_network_dom0_tcp_pos.test \ + 07_network_dom0_udp_pos.test \ + 11_network_domU_ping_pos.test \ + 12_network_domU_tcp_pos.test \ + 13_network_domU_udp_pos.test diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/Makefile.am --- a/tools/xm-test/tests/sedf/Makefile.am Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/sedf/Makefile.am Tue Mar 14 20:50:35 2006 @@ -1,7 +1,11 @@ - SUBDIRS = -TESTS = 01_sedf_multi_pos.test +TESTS = 01_sedf_period_slice_pos.test \ + 02_sedf_period_lower_neg.test \ + 03_sedf_slice_lower_neg.test \ + 04_sedf_slice_upper_neg.test \ + 05_sedf_extratime_pos.test \ + 06_sedf_extratime_disable_neg.test XFAIL_TESTS = diff -r dc50cdd66c5c -r 0ed4a312765b xen/Makefile --- a/xen/Makefile Tue Mar 14 20:10:21 2006 +++ b/xen/Makefile Tue Mar 14 20:50:35 2006 @@ -6,7 +6,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 3 export XEN_SUBVERSION = 0 -export XEN_EXTRAVERSION = .0 +export XEN_EXTRAVERSION = -unstable export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) export BASEDIR := $(CURDIR) diff -r dc50cdd66c5c -r 0ed4a312765b xen/Rules.mk --- a/xen/Rules.mk Tue Mar 14 20:10:21 2006 +++ b/xen/Rules.mk Tue Mar 14 20:50:35 2006 @@ -4,7 +4,6 @@ # 'make clean' before rebuilding. # verbose ?= n -debug ?= n perfc ?= n perfc_arrays?= n crash_debug ?= n @@ -47,12 +46,7 @@ CFLAGS += -g -D__XEN__ -ifneq ($(debug),y) -CFLAGS += -DNDEBUG -ifeq ($(verbose),y) -CFLAGS += -DVERBOSE -endif -else +ifneq ($(debug)$(verbose),nn) CFLAGS += -DVERBOSE endif diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Tue Mar 14 20:10:21 2006 +++ b/xen/arch/ia64/Rules.mk Tue Mar 14 20:50:35 2006 @@ -12,7 +12,7 @@ -I$(BASEDIR)/include/asm-ia64/linux-null \ -I$(BASEDIR)/arch/ia64/linux -I$(BASEDIR)/arch/ia64/linux-xen -CFLAGS := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing +CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing #CFLAGS += -O3 # -O3 over-inlines making debugging tough! CFLAGS += -O2 # but no optimization causes compile errors! #CFLAGS += -iwithprefix include -Wall -DMONITOR_BASE=$(MONITOR_BASE) diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Tue Mar 14 20:50:35 2006 @@ -22,7 +22,7 @@ #include <xen/config.h> #include <xen/errno.h> #include <asm/vmx_vcpu.h> -//#include <public/xen.h> +#include <xen/guest_access.h> #include <public/event_channel.h> #include <asm/vmmu.h> #include <asm/tlb.h> @@ -100,7 +100,7 @@ VCPU *vcpu=current; u64 r32,ret; vcpu_get_gr_nat(vcpu,16,&r32); - ret=do_dom0_op((dom0_op_t *)r32); + ret=do_dom0_op(guest_handle_from_ptr(r32, dom0_op_t)); vcpu_set_gr(vcpu, 8, ret, 0); vmx_vcpu_increment_iip(vcpu); @@ -111,7 +111,7 @@ VCPU *vcpu=current; u64 r32,ret; vcpu_get_gr_nat(vcpu,16,&r32); - ret=do_event_channel_op((evtchn_op_t *)r32); + ret=do_event_channel_op(guest_handle_from_ptr(r32, evtchn_op_t)); vcpu_set_gr(vcpu, 8, ret, 0); vmx_vcpu_increment_iip(vcpu); } @@ -122,7 +122,7 @@ u64 r32,r33,ret; vcpu_get_gr_nat(vcpu,16,&r32); vcpu_get_gr_nat(vcpu,17,&r33); - ret=do_xen_version((int )r32,(void *)r33); + ret=do_xen_version((int )r32,guest_handle_from_ptr(r33, void)); vcpu_set_gr(vcpu, 8, ret, 0); vmx_vcpu_increment_iip(vcpu); } diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/ia64/xen/hypercall.c Tue Mar 14 20:50:35 2006 @@ -10,6 +10,7 @@ #include <xen/sched.h> #include <xen/hypercall.h> #include <xen/multicall.h> +#include <xen/guest_access.h> #include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ #include <asm/sal.h> /* FOR struct ia64_sal_retval */ @@ -175,7 +176,8 @@ (int) vcpu_get_gr(v,33)); break; case __HYPERVISOR_dom0_op: - regs->r8 = do_dom0_op((struct dom0_op *) regs->r14); + regs->r8 = do_dom0_op(guest_handle_from_ptr(regs->r14, + dom0_op_t)); break; case __HYPERVISOR_memory_op: @@ -194,30 +196,30 @@ regs->r8 = reservation.nr_extents; break; default: - regs->r8 = do_memory_op((int) regs->r14, (void *)regs->r15); + regs->r8 = do_memory_op((int) regs->r14, guest_handle_from_ptr(regs->r15, void)); break; } } break; case __HYPERVISOR_event_channel_op: - regs->r8 = do_event_channel_op((struct evtchn_op *) regs->r14); + regs->r8 = do_event_channel_op(guest_handle_from_ptr(regs->r14, evtchn_op_t)); break; case __HYPERVISOR_grant_table_op: - regs->r8 = do_grant_table_op((unsigned int) regs->r14, (void *) regs->r15, (unsigned int) regs->r16); + regs->r8 = do_grant_table_op((unsigned int) regs->r14, guest_handle_from_ptr(regs->r15, void), (unsigned int) regs->r16); break; case __HYPERVISOR_console_io: - regs->r8 = do_console_io((int) regs->r14, (int) regs->r15, (char *) regs->r16); + regs->r8 = do_console_io((int) regs->r14, (int) regs->r15, guest_handle_from_ptr(regs->r16, char)); break; case __HYPERVISOR_xen_version: - regs->r8 = do_xen_version((int) regs->r14, (void *) regs->r15); + regs->r8 = do_xen_version((int) regs->r14, guest_handle_from_ptr(regs->r15, void)); break; case __HYPERVISOR_multicall: - regs->r8 = do_multicall((struct multicall_entry *) regs->r14, (unsigned int) regs->r15); + regs->r8 = do_multicall(guest_handle_from_ptr(regs->r14, multicall_entry_t), (unsigned int) regs->r15); break; default: diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/ia64/xen/process.c --- a/xen/arch/ia64/xen/process.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/ia64/xen/process.c Tue Mar 14 20:50:35 2006 @@ -810,7 +810,6 @@ case 'l': arg = (unsigned long)va_arg(args, unsigned long); break; - case 'p': case 'h': arg = (unsigned long)va_arg(args, void *); break; diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/Rules.mk --- a/xen/arch/x86/Rules.mk Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/Rules.mk Tue Mar 14 20:50:35 2006 @@ -9,14 +9,10 @@ supervisor_mode_kernel ?= n CFLAGS += -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -CFLAGS += -iwithprefix include -Wall -Werror -Wno-pointer-arith -pipe +CFLAGS += -iwithprefix include -Werror -Wno-pointer-arith -pipe CFLAGS += -I$(BASEDIR)/include CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-generic CFLAGS += -I$(BASEDIR)/include/asm-x86/mach-default - -ifneq ($(debug),y) -CFLAGS += -O3 -fomit-frame-pointer -endif # Prevent floating-point variables from creeping into Xen. CFLAGS += -msoft-float @@ -26,19 +22,20 @@ CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector) CFLAGS += $(call test-gcc-flag,$(CC),-fno-stack-protector-all) -ifeq ($(TARGET_SUBARCH),x86_32) -CFLAGS += -m32 -march=i686 -LDFLAGS += -m elf_i386 -ifeq ($(pae),y) +ifeq ($(TARGET_SUBARCH)$(pae),x86_32y) CFLAGS += -DCONFIG_X86_PAE=1 endif -endif + ifeq ($(supervisor_mode_kernel),y) CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1 endif +ifeq ($(XEN_TARGET_ARCH),x86_32) +LDFLAGS += -m elf_i386 +endif + ifeq ($(TARGET_SUBARCH),x86_64) -CFLAGS += -m64 -mno-red-zone -fpic -fno-reorder-blocks +CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks CFLAGS += -fno-asynchronous-unwind-tables LDFLAGS += -m elf_x86_64 endif diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/apic.c Tue Mar 14 20:50:35 2006 @@ -892,9 +892,9 @@ return result; } -unsigned int get_apic_bus_scale(void) -{ - return bus_scale; +u32 get_apic_bus_cycle(void) +{ + return bus_cycle; } static unsigned int calibration_result; diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/audit.c --- a/xen/arch/x86/audit.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/audit.c Tue Mar 14 20:50:35 2006 @@ -350,7 +350,7 @@ unmap_domain_page(pt); } - void adjust_shadow_tables() + void adjust_shadow_tables(void) { struct shadow_status *a; unsigned long smfn, gmfn; @@ -402,7 +402,7 @@ } } - void adjust_oos_list() + void adjust_oos_list(void) { struct out_of_sync_entry *oos; @@ -426,7 +426,7 @@ } } - void adjust_for_pgtbase() + void adjust_for_pgtbase(void) { struct vcpu *v; @@ -443,7 +443,7 @@ } } - void adjust_guest_pages() + void adjust_guest_pages(void) { struct list_head *list_ent = d->page_list.next; struct page_info *page; diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/dom0_ops.c --- a/xen/arch/x86/dom0_ops.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/dom0_ops.c Tue Mar 14 20:50:35 2006 @@ -458,7 +458,7 @@ { memcpy(c, &v->arch.guest_context, sizeof(*c)); - if ( HVM_DOMAIN(v) ) + if ( hvm_guest(v) ) { hvm_store_cpu_guest_regs(v, &c->user_regs); hvm_store_cpu_guest_ctrl_regs(v, c->ctrlreg); @@ -473,9 +473,9 @@ c->flags = 0; if ( test_bit(_VCPUF_fpu_initialised, &v->vcpu_flags) ) c->flags |= VGCF_I387_VALID; - if ( KERNEL_MODE(v, &v->arch.guest_context.user_regs) ) + if ( guest_kernel_mode(v, &v->arch.guest_context.user_regs) ) c->flags |= VGCF_IN_KERNEL; - if ( HVM_DOMAIN(v) ) + if ( hvm_guest(v) ) c->flags |= VGCF_HVM_GUEST; c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table); diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/domain.c Tue Mar 14 20:50:35 2006 @@ -312,7 +312,8 @@ goto fail_nomem; memset(d->shared_info, 0, PAGE_SIZE); - SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); + share_xen_page_with_guest( + virt_to_page(d->shared_info), d, XENSHARE_writable); } return 0; @@ -451,6 +452,43 @@ return 0; } +long +arch_do_vcpu_op( + int cmd, struct vcpu *v, GUEST_HANDLE(void) arg) +{ + long rc = 0; + + switch ( cmd ) + { + case VCPUOP_register_runstate_memory_area: + { + struct vcpu_register_runstate_memory_area area; + + rc = -EINVAL; + if ( v != current ) + break; + + rc = -EFAULT; + if ( copy_from_guest(&area, arg, 1) ) + break; + + if ( !access_ok(area.addr.v, sizeof(*area.addr.v)) ) + break; + + rc = 0; + v->runstate_guest = area.addr.v; + __copy_to_user(v->runstate_guest, &v->runstate, sizeof(v->runstate)); + + break; + } + + default: + rc = -ENOSYS; + break; + } + + return rc; +} void new_thread(struct vcpu *d, unsigned long start_pc, @@ -682,7 +720,7 @@ stack_regs, CTXT_SWITCH_STACK_BYTES); unlazy_fpu(p); - if ( !HVM_DOMAIN(p) ) + if ( !hvm_guest(p) ) { save_segments(p); } @@ -711,7 +749,7 @@ loaddebug(&n->arch.guest_context, 7); } - if ( !HVM_DOMAIN(n) ) + if ( !hvm_guest(n) ) { set_int80_direct_trap(n); switch_kernel_stack(n, cpu); @@ -775,7 +813,7 @@ /* Re-enable interrupts before restoring state which may fault. */ local_irq_enable(); - if ( !HVM_DOMAIN(next) ) + if ( !hvm_guest(next) ) { load_LDT(next); load_segments(next); @@ -831,7 +869,6 @@ { \ case 'i': __arg = (unsigned long)va_arg(args, unsigned int); break; \ case 'l': __arg = (unsigned long)va_arg(args, unsigned long); break; \ - case 'p': __arg = (unsigned long)va_arg(args, void *); break; \ case 'h': __arg = (unsigned long)va_arg(args, void *); break; \ default: __arg = 0; BUG(); \ } \ @@ -994,7 +1031,7 @@ v->arch.guest_table_user = mk_pagetable(0); } - if ( HVM_DOMAIN(v) ) + if ( hvm_guest(v) ) hvm_relinquish_guest_resources(v); } diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/hvm.c Tue Mar 14 20:50:35 2006 @@ -186,7 +186,7 @@ { struct hvm_domain *platform; - if ( !HVM_DOMAIN(current) || (current->vcpu_id != 0) ) + if ( !hvm_guest(current) || (current->vcpu_id != 0) ) return; shadow_direct_map_init(d); @@ -205,12 +205,10 @@ } } -void pic_irq_request(int *interrupt_request, int level) -{ - if (level) - *interrupt_request = 1; - else - *interrupt_request = 0; +void pic_irq_request(void *data, int level) +{ + int *interrupt_request = data; + *interrupt_request = level; } void hvm_pic_assist(struct vcpu *v) @@ -324,7 +322,7 @@ int rc = 0; /* current must be HVM domain BSP */ - if ( !(HVM_DOMAIN(bsp) && bsp->vcpu_id == 0) ) { + if ( !(hvm_guest(bsp) && bsp->vcpu_id == 0) ) { printk("Not calling hvm_bringup_ap from BSP context.\n"); domain_crash_synchronous(); } diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/i8259.c --- a/xen/arch/x86/hvm/i8259.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/i8259.c Tue Mar 14 20:50:35 2006 @@ -407,7 +407,7 @@ pic_reset(s); } -void pic_init(struct hvm_virpic *s, void (*irq_request)(), +void pic_init(struct hvm_virpic *s, void (*irq_request)(void *, int), void *irq_request_opaque) { memset(s, 0, sizeof(*s)); @@ -422,7 +422,8 @@ return; } -void pic_set_alt_irq_func(struct hvm_virpic *s, void (*alt_irq_func)(), +void pic_set_alt_irq_func(struct hvm_virpic *s, + void (*alt_irq_func)(void *, int, int), void *alt_irq_opaque) { s->alt_irq_func = alt_irq_func; diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/svm/intr.c Tue Mar 14 20:50:35 2006 @@ -187,7 +187,7 @@ } /* let's inject this interrupt */ TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0); - svm_inject_extint(v, intr_vector, VMX_INVALID_ERROR_CODE); + svm_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE); interrupt_post_injection(v, intr_vector, intr_type); break; case VLAPIC_DELIV_MODE_SMI: diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/svm/x86_64/exits.S --- a/xen/arch/x86/hvm/svm/x86_64/exits.S Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/svm/x86_64/exits.S Tue Mar 14 20:50:35 2006 @@ -157,6 +157,7 @@ jnz svm_process_softirqs svm_restore_all_guest: call svm_intr_assist + call svm_asid call svm_load_cr2 sti /* diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/vlapic.c Tue Mar 14 20:50:35 2006 @@ -37,7 +37,9 @@ /* XXX remove this definition after GFW enabled */ #define VLAPIC_NO_BIOS -extern unsigned int get_apic_bus_scale(void); +extern u32 get_apic_bus_cycle(void); + +#define APIC_BUS_CYCLE_NS (((s_time_t)get_apic_bus_cycle()) / 1000) static unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] = { @@ -50,7 +52,8 @@ result = find_highest_bit((uint32_t *)&vlapic->irr[0], INTR_LEN_32); - if (result != -1 && result < 16) { + if ( result != -1 && result < 16 ) + { printk("VLAPIC: irr on reserved bits %d\n ", result); domain_crash_synchronous(); } @@ -78,10 +81,11 @@ result = find_highest_bit((uint32_t *)&vlapic->isr[0], INTR_LEN_32); - if (result != -1 && result < 16) { + if ( result != -1 && result < 16 ) + { int i = 0; printk("VLAPIC: isr on reserved bits %d, isr is\n ", result); - for (i = 0; i < INTR_LEN_32; i += 2) + for ( i = 0; i < INTR_LEN_32; i += 2 ) printk("%d: 0x%08x%08x\n", i, vlapic->isr[i], vlapic->isr[i+1]); return -1; } @@ -97,12 +101,12 @@ tpr = (vlapic->task_priority >> 4) & 0xf; /* we want 7:4 */ isr = vlapic_find_highest_isr(vlapic); - if (isr != -1) + if ( isr != -1 ) isrv = (isr >> 4) & 0xf; /* ditto */ else isrv = 0; - if (tpr >= isrv) + if ( tpr >= isrv ) ppr = vlapic->task_priority & 0xff; else ppr = isrv << 4; /* low 4 bits of PPR have to be cleared */ @@ -110,7 +114,7 @@ vlapic->processor_priority = ppr; HVM_DBG_LOG(DBG_LEVEL_VLAPIC_INTERRUPT, - "vlapic_update_ppr: vlapic %p ppr %x isr %x isrv %x", + "vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x.", vlapic, ppr, isr, isrv); return ppr; @@ -124,46 +128,52 @@ int result = 0; struct vlapic *target = VLAPIC(v); - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest: " - "target %p source %p dest %x dest_mode %x short_hand %x " - "delivery_mode %x", + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "target %p, source %p, dest 0x%x, " + "dest_mode 0x%x, short_hand 0x%x, delivery_mode 0x%x.", target, source, dest, dest_mode, short_hand, delivery_mode); - if ( unlikely(!target) && - ( (delivery_mode != VLAPIC_DELIV_MODE_INIT) && - (delivery_mode != VLAPIC_DELIV_MODE_STARTUP) && - (delivery_mode != VLAPIC_DELIV_MODE_NMI) )) { - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest " - "uninitialized target v %p delivery_mode %x dest %x\n", - v, delivery_mode, dest); - return result; - } - - switch (short_hand) { + if ( unlikely(target == NULL) && + ((delivery_mode != VLAPIC_DELIV_MODE_INIT) && + (delivery_mode != VLAPIC_DELIV_MODE_STARTUP) && + (delivery_mode != VLAPIC_DELIV_MODE_NMI)) ) + { + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "uninitialized target vcpu %p, " + "delivery_mode 0x%x, dest 0x%x.\n", v, delivery_mode, dest); + return result; + } + + switch ( short_hand ) { case VLAPIC_NO_SHORTHAND: - if (!dest_mode) { /* Physical */ - result = ((target ? target->id : v->vcpu_id ) == dest); - } else { /* Logical */ - if (!target) + if ( !dest_mode ) /* Physical */ + { + result = (target != NULL ? target->id : v->vcpu_id) == dest; + } + else /* Logical */ + { + if ( target == NULL ) break; - if (((target->dest_format >> 28) & 0xf) == 0xf) { /* Flat mode */ + if ( ((target->dest_format >> 28) & 0xf) == 0xf ) /* Flat mode */ + { result = (target->logical_dest >> 24) & dest; - } else { - if ((delivery_mode == VLAPIC_DELIV_MODE_LPRI) && - (dest == 0xff)) { + } + else + { + if ( (delivery_mode == VLAPIC_DELIV_MODE_LPRI) && + (dest == 0xff) ) + { /* What shall we do now? */ printk("Broadcast IPI with lowest priority " "delivery mode\n"); domain_crash_synchronous(); } result = (target->logical_dest == (dest & 0xf)) ? - ((target->logical_dest >> 4) & (dest >> 4)) : 0; + ((target->logical_dest >> 4) & (dest >> 4)) : 0; } } break; case VLAPIC_SHORTHAND_SELF: - if (target == source) + if ( target == source ) result = 1; break; @@ -172,7 +182,7 @@ break; case VLAPIC_SHORTHAND_EXCLUDE_SELF: - if (target != source) + if ( target != source ) result = 1; break; @@ -190,27 +200,30 @@ static int vlapic_accept_irq(struct vcpu *v, int delivery_mode, int vector, int level, int trig_mode) { - int result = 0; + int result = 0; struct vlapic *vlapic = VLAPIC(v); - switch (delivery_mode) { + switch ( delivery_mode ) { case VLAPIC_DELIV_MODE_FIXED: case VLAPIC_DELIV_MODE_LPRI: /* FIXME add logic for vcpu on reset */ - if (unlikely(!vlapic || !vlapic_enabled(vlapic))) - return result; - - if (test_and_set_bit(vector, &vlapic->irr[0])) { + if ( unlikely(vlapic == NULL || !vlapic_enabled(vlapic)) ) + break; + + if ( test_and_set_bit(vector, &vlapic->irr[0]) ) + { printk("<vlapic_accept_irq>" - "level trig mode repeatedly for vector %d\n", vector); - result = 0; - } else { - if (level) { - printk("<vlapic_accept_irq> level trig mode for vector %d\n", vector); - set_bit(vector, &vlapic->tmr[0]); - } - } - evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->vcpu)); + "level trig mode repeatedly for vector %d\n", vector); + break; + } + + if ( level ) + { + printk("<vlapic_accept_irq> level trig mode for vector %d\n", + vector); + set_bit(vector, &vlapic->tmr[0]); + } + evtchn_set_pending(v, iopacket_port(v)); result = 1; break; @@ -225,11 +238,13 @@ break; case VLAPIC_DELIV_MODE_INIT: - if ( !level && trig_mode == 1 ) { //Deassert + if ( !level && trig_mode == 1 ) //Deassert printk("This hvm_vlapic is for P4, no work for De-assert init\n"); - } else { + else + { /* FIXME How to check the situation after vcpu reset? */ - if ( test_and_clear_bit(_VCPUF_initialised, &v->vcpu_flags) ) { + if ( test_and_clear_bit(_VCPUF_initialised, &v->vcpu_flags) ) + { printk("Reset hvm vcpu not supported yet\n"); domain_crash_synchronous(); } @@ -247,7 +262,8 @@ v->arch.hvm_vcpu.init_sipi_sipi_state = HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM; - if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) { + if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + { printk("SIPI for initialized vcpu vcpuid %x\n", v->vcpu_id); domain_crash_synchronous(); } @@ -257,7 +273,7 @@ break; default: - printk("TODO: not support interrup type %x\n", delivery_mode); + printk("TODO: not support interrupt type %x\n", delivery_mode); domain_crash_synchronous(); break; } @@ -276,13 +292,15 @@ int next, old; struct vlapic* target = NULL; - if (dest_mode == 0) { //Physical mode - printk("<apic_round_robin> lowest priority for physical mode\n"); + if ( dest_mode == 0 ) //Physical mode + { + printk("<apic_round_robin> lowest priority for physical mode.\n"); return NULL; } - if (!bitmap) { - printk("<apic_round_robin> no bit on bitmap\n"); + if ( !bitmap ) + { + printk("<apic_round_robin> no bit set in bitmap.\n"); return NULL; } @@ -292,10 +310,12 @@ do { /* the vcpu array is arranged according to vcpu_id */ - if (test_bit(next, &bitmap)) { + if ( test_bit(next, &bitmap) ) + { target = d->vcpu[next]->arch.hvm_vcpu.vlapic; - if (!target || !vlapic_enabled(target)) { + if ( target == NULL || !vlapic_enabled(target) ) + { printk("warning: targe round robin local apic disabled\n"); /* XXX should we domain crash?? Or should we return NULL */ } @@ -303,48 +323,47 @@ } next ++; - if (!d->vcpu[next] || - !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) || - next == MAX_VIRT_CPUS) + if ( !d->vcpu[next] || + !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) || + next == MAX_VIRT_CPUS ) next = 0; - }while(next != old); + } while ( next != old ); d->arch.hvm_domain.round_info[vector] = next; spin_unlock(&d->arch.hvm_domain.round_robin_lock); + return target; } -void -vlapic_EOI_set(struct vlapic *vlapic) +void vlapic_EOI_set(struct vlapic *vlapic) { int vector = vlapic_find_highest_isr(vlapic); /* Not every write EOI will has correpsoning ISR, one example is when Kernel check timer on setup_IO_APIC */ - if (vector == -1) { + if ( vector == -1 ) return ; - } - - vlapic_clear_isr(vlapic, vector); + + clear_bit(vector, &vlapic->isr[0]); vlapic_update_ppr(vlapic); - if (test_and_clear_bit(vector, &vlapic->tmr[0])) + if ( test_and_clear_bit(vector, &vlapic->tmr[0]) ) ioapic_update_EOI(vlapic->domain, vector); } int vlapic_check_vector(struct vlapic *vlapic, unsigned char dm, int vector) { - if ((dm == VLAPIC_DELIV_MODE_FIXED) && (vector < 16)) { + if ( (dm == VLAPIC_DELIV_MODE_FIXED) && (vector < 16) ) + { vlapic->err_status |= 0x40; vlapic_accept_irq(vlapic->vcpu, VLAPIC_DELIV_MODE_FIXED, - vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 0, 0); - printk("<vlapic_check_vector>: check fail\n"); + vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 0, 0); + printk("<vlapic_check_vector>: check failed.\n"); return 0; } return 1; } - void vlapic_ipi(struct vlapic *vlapic) { @@ -360,68 +379,47 @@ struct vcpu *v = NULL; uint32_t lpr_map; - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_ipi: " - "icr_high %x icr_low %x " - "short_hand %x dest %x trig_mode %x level %x " - "dest_mode %x delivery_mode %x vector %x", - vlapic->icr_high, vlapic->icr_low, - short_hand, dest, trig_mode, level, dest_mode, - delivery_mode, vector); - - for_each_vcpu ( vlapic->domain, v ) { - if (vlapic_match_dest(v, vlapic, short_hand, - dest, dest_mode, delivery_mode)) { - if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) { + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "icr_high 0x%x, icr_low 0x%x, " + "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " + "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x.", + vlapic->icr_high, vlapic->icr_low, short_hand, dest, + trig_mode, level, dest_mode, delivery_mode, vector); + + for_each_vcpu ( vlapic->domain, v ) + { + if ( vlapic_match_dest(v, vlapic, short_hand, + dest, dest_mode, delivery_mode) ) + { + if ( delivery_mode == VLAPIC_DELIV_MODE_LPRI ) set_bit(v->vcpu_id, &lpr_map); - } else + else vlapic_accept_irq(v, delivery_mode, vector, level, trig_mode); } } - if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) { + if ( delivery_mode == VLAPIC_DELIV_MODE_LPRI ) + { v = vlapic->vcpu; target = apic_round_robin(v->domain, dest_mode, vector, lpr_map); - if (target) + if ( target ) vlapic_accept_irq(target->vcpu, delivery_mode, vector, level, trig_mode); } } -static void vlapic_begin_timer(struct vlapic *vlapic) -{ - s_time_t cur = NOW(), offset; - - offset = vlapic->timer_current * - (262144 / get_apic_bus_scale()) * vlapic->timer_divide_counter; - vlapic->vlapic_timer.expires = cur + offset; - - set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires ); - - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_begin_timer: " - "bus_scale %x now %08x%08x expire %08x%08x " - "offset %08x%08x current %x", - get_apic_bus_scale(), (uint32_t)(cur >> 32), (uint32_t)cur, - (uint32_t)(vlapic->vlapic_timer.expires >> 32), - (uint32_t) vlapic->vlapic_timer.expires, - (uint32_t)(offset >> 32), (uint32_t)offset, - vlapic->timer_current); -} - -void vlapic_read_aligned(struct vlapic *vlapic, unsigned int offset, +static void vlapic_read_aligned(struct vlapic *vlapic, unsigned int offset, unsigned int len, unsigned int *result) { - if (len != 4) { - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "local apic read with len=%d (should be 4)", len); - } + if ( len != 4 ) + printk("<vlapic_read_aligned> read with len=%d (should be 4).\n", len); *result = 0; - switch (offset) { + switch ( offset ) { case APIC_ID: - *result = (vlapic->id) << 24; + *result = vlapic->id << 24; break; case APIC_LVR: @@ -433,7 +431,7 @@ break; case APIC_ARBPRI: - printk("Access local APIC ARBPRI register which is for P6\n"); + printk("access local APIC ARBPRI register which is for P6\n"); break; case APIC_PROCPRI: @@ -489,7 +487,7 @@ break; case APIC_ESR: - if (vlapic->err_write_count) + if ( vlapic->err_write_count ) *result = vlapic->err_status; break; @@ -501,51 +499,58 @@ *result = vlapic->icr_high; break; - case APIC_LVTT: /* LVT Timer Reg */ - case APIC_LVTTHMR: /* LVT Thermal Monitor */ - case APIC_LVTPC: /* LVT Performance Counter */ - case APIC_LVT0: /* LVT LINT0 Reg */ - case APIC_LVT1: /* LVT Lint1 Reg */ - case APIC_LVTERR: /* LVT Error Reg */ + case APIC_LVTT: /* LVT Timer Reg */ + case APIC_LVTTHMR: /* LVT Thermal Monitor */ + case APIC_LVTPC: /* LVT Performance Counter */ + case APIC_LVT0: /* LVT LINT0 Reg */ + case APIC_LVT1: /* LVT Lint1 Reg */ + case APIC_LVTERR: /* LVT Error Reg */ *result = vlapic->lvt[(offset - APIC_LVTT) >> 4]; break; case APIC_TMICT: - *result = vlapic->timer_initial; - break; - - case APIC_TMCCT: //Timer CCR - { - uint32_t counter; - s_time_t passed, cur = NOW(); - - if (cur <= vlapic->timer_current_update) { - passed = ~0x0LL - vlapic->timer_current_update + cur; - HVM_DBG_LOG(DBG_LEVEL_VLAPIC,"time elapsed"); - }else - passed = cur - vlapic->timer_current_update; - - counter = (passed * get_apic_bus_scale()) / (262144* vlapic->timer_divide_counter); - if (vlapic->timer_current > counter) - *result = vlapic->timer_current - counter; - else { - if (!vlapic_lvt_timer_period(vlapic)) - *result = 0; - //FIXME should we add interrupt here? + *result = vlapic->timer_initial_count; + break; + + case APIC_TMCCT: //Timer CCR + { + uint32_t counter_passed; + s_time_t passed, now = NOW(); + + if ( unlikely(now <= vlapic->timer_current_update) ) + { + passed = ~0x0LL - vlapic->timer_current_update + now; + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "time elapsed."); + } + else + passed = now - vlapic->timer_current_update; + + counter_passed = passed / + (APIC_BUS_CYCLE_NS * vlapic->timer_divide_count); + vlapic->timer_current_count -= counter_passed; + if ( vlapic->timer_current_count <= 0 ) + { + if ( unlikely(!vlapic_lvt_timer_period(vlapic)) ) + { + vlapic->timer_current_count = 0; + // FIXME: should we add interrupt here? + } else - //*result = counter % vlapic->timer_initial; - *result = vlapic->timer_initial - (counter - vlapic->timer_current); + { + do { + vlapic->timer_current_count += vlapic->timer_initial_count; + } while ( vlapic->timer_current_count < 0 ); + } } - vlapic->timer_current = *result; - vlapic->timer_current_update = NOW(); + + *result = vlapic->timer_current_count; + vlapic->timer_current_update = now; HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, - "initial %x timer current %x " - "update %08x%08x cur %08x%08x offset %d", - vlapic->timer_initial, vlapic->timer_current, - (uint32_t)(vlapic->timer_current_update >> 32), - (uint32_t)vlapic->timer_current_update , - (uint32_t)(cur >> 32), (uint32_t)cur, counter); + "timer initial count 0x%x, timer current count 0x%x, " + "update 0x%016"PRIx64", now 0x%016"PRIx64", offset 0x%x.", + vlapic->timer_initial_count, vlapic->timer_current_count, + vlapic->timer_current_update, now, counter_passed); } break; @@ -554,7 +559,7 @@ break; default: - printk("Read local APIC address %x not implemented\n",offset); + printk("Read local APIC address 0x%x not implemented\n", offset); *result = 0; break; } @@ -569,17 +574,16 @@ struct vlapic *vlapic = VLAPIC(v); unsigned int offset = address - vlapic->base_address; - if ( len != 4) { - /* some bugs on kernel cause read this with byte*/ + /* some bugs on kernel cause read this with byte*/ + if ( len != 4 ) HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "Local APIC read with len = %lx, should be 4 instead\n", + "read with len=0x%lx, should be 4 instead.\n", len); - } alignment = offset & 0x3; vlapic_read_aligned(vlapic, offset & ~0x3, 4, &tmp); - switch (len) { + switch ( len ) { case 1: result = *((unsigned char *)&tmp + alignment); break; @@ -593,14 +597,14 @@ break; default: - printk("Local APIC read with len = %lx, should be 4 instead\n", len); + printk("Local APIC read with len=0x%lx, should be 4 instead.\n", len); domain_crash_synchronous(); break; } - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "vlapic_read offset %x with length %lx and the result is %lx", - offset, len, result); + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "offset 0x%x with length 0x%lx, " + "and the result is 0x%lx.", offset, len, result); + return result; } @@ -610,40 +614,42 @@ struct vlapic *vlapic = VLAPIC(v); unsigned int offset = address - vlapic->base_address; - if (offset != 0xb0) + if ( offset != 0xb0 ) HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "vlapic_write offset %x with length %lx source %lx", - offset, len, val); + "offset 0x%x with length 0x%lx, and value is 0x%lx.", + offset, len, val); /* * According to IA 32 Manual, all resgiters should be accessed with * 32 bits alignment. */ - if (len != 4) { + if ( len != 4 ) + { unsigned int tmp; unsigned char alignment; /* Some kernel do will access with byte/word alignment*/ printk("Notice: Local APIC write with len = %lx\n",len); alignment = offset & 0x3; - tmp = vlapic_read(v, offset & (~0x3), 4); - switch (len) { + tmp = vlapic_read(v, offset & ~0x3, 4); + switch ( len ) { case 1: /* XXX the saddr is a tmp variable from caller, so should be ok - But we should still change the following ref to val to + But we should still change the following ref to val to local variable later */ - val = (tmp & ~(0xff << alignment)) | - ((val & 0xff) << alignment); + val = (tmp & ~(0xff << alignment)) | + ((val & 0xff) << alignment); break; case 2: - if (alignment != 0x0 && alignment != 0x2) { + if ( alignment != 0x0 && alignment != 0x2 ) + { printk("alignment error for vlapic with len == 2\n"); - domain_crash_synchronous(); + domain_crash_synchronous(); } val = (tmp & ~(0xffff << alignment)) | - ((val & 0xffff) << alignment); + ((val & 0xffff) << alignment); break; case 3: @@ -661,7 +667,7 @@ offset &= 0xff0; - switch (offset) { + switch ( offset ) { case APIC_ID: /* Local APIC ID */ vlapic->id = ((val) >> 24) & VAPIC_ID_MASK; break; @@ -685,9 +691,10 @@ case APIC_SPIV: vlapic->spurious_vec = val & 0x1ff; - if (!(vlapic->spurious_vec & 0x100)) { - int i = 0; - for (i = 0; i < VLAPIC_LVT_NUM; i++) + if ( !(vlapic->spurious_vec & 0x100) ) + { + int i; + for ( i = 0; i < VLAPIC_LVT_NUM; i++ ) vlapic->lvt[i] |= 0x10000; vlapic->status |= VLAPIC_SOFTWARE_DISABLE_MASK; } @@ -697,7 +704,7 @@ case APIC_ESR: vlapic->err_write_count = !vlapic->err_write_count; - if (!vlapic->err_write_count) + if ( !vlapic->err_write_count ) vlapic->err_status = 0; break; @@ -711,30 +718,32 @@ vlapic->icr_high = val & 0xff000000; break; - case APIC_LVTT: // LVT Timer Reg - case APIC_LVTTHMR: // LVT Thermal Monitor - case APIC_LVTPC: // LVT Performance Counter - case APIC_LVT0: // LVT LINT0 Reg - case APIC_LVT1: // LVT Lint1 Reg - case APIC_LVTERR: // LVT Error Reg + case APIC_LVTT: // LVT Timer Reg + case APIC_LVTTHMR: // LVT Thermal Monitor + case APIC_LVTPC: // LVT Performance Counter + case APIC_LVT0: // LVT LINT0 Reg + case APIC_LVT1: // LVT Lint1 Reg + case APIC_LVTERR: // LVT Error Reg { int vt = (offset - APIC_LVTT) >> 4; vlapic->lvt[vt] = val & vlapic_lvt_mask[vt]; - if (vlapic->status & VLAPIC_SOFTWARE_DISABLE_MASK) + if ( vlapic->status & VLAPIC_SOFTWARE_DISABLE_MASK ) vlapic->lvt[vt] |= VLAPIC_LVT_BIT_MASK; /* On hardware, when write vector less than 0x20 will error */ vlapic_check_vector(vlapic, vlapic_lvt_dm(vlapic->lvt[vt]), - vlapic_lvt_vector(vlapic, vt)); - - if (!vlapic->vcpu_id && (offset == APIC_LVT0)) { - if ((vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_DELIMOD) - == 0x700) { - if (!(vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_MASK)) { + vlapic_lvt_vector(vlapic, vt)); + + if ( !vlapic->vcpu_id && (offset == APIC_LVT0) ) + { + if ( (vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_DELIMOD) + == 0x700 ) + { + if ( vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_MASK ) + clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + else set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); - }else - clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); } else clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); @@ -744,33 +753,41 @@ break; case APIC_TMICT: - if (vlapic_timer_active(vlapic)) - stop_timer(&(vlapic->vlapic_timer)); - - vlapic->timer_initial = val; - vlapic->timer_current = val; - vlapic->timer_current_update = NOW(); - - vlapic_begin_timer(vlapic); - - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "timer_init %x timer_current %x" - "timer_current_update %08x%08x", - vlapic->timer_initial, vlapic->timer_current, - (uint32_t)(vlapic->timer_current_update >> 32), - (uint32_t)vlapic->timer_current_update); + { + s_time_t now = NOW(), offset; + + if ( vlapic_timer_active(vlapic) ) + stop_timer(&vlapic->vlapic_timer); + + vlapic->timer_initial_count = val; + vlapic->timer_current_count = val; + vlapic->timer_current_update = now; + + offset = APIC_BUS_CYCLE_NS * + vlapic->timer_divide_count * + vlapic->timer_initial_count; + + set_timer(&vlapic->vlapic_timer, now + offset); + + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, + "bus cycle is %"PRId64"ns, now 0x%016"PRIx64", " + "timer initial count 0x%x, offset 0x%016"PRIx64", " + "expire @ 0x%016"PRIx64".", + APIC_BUS_CYCLE_NS, now, vlapic->timer_initial_count, + offset, now + offset); + } break; case APIC_TDCR: { - //FIXME clean this code - unsigned char tmp1,tmp2; - tmp1 = (val & 0xf); - tmp2 = ((tmp1 & 0x3 )|((tmp1 & 0x8) >>1)) + 1; - vlapic->timer_divide_counter = 0x1<<tmp2; - - HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, - "timer divider is 0x%x", - vlapic->timer_divide_counter); + unsigned int tmp1, tmp2; + + tmp1 = val & 0xf; + tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; + vlapic->timer_divide_count = 0x1 << (tmp2 & 0x7); + + HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, "timer divide count is 0x%x", + vlapic->timer_divide_count); } break; @@ -784,9 +801,9 @@ { struct vlapic *vlapic = VLAPIC(v); - if (vlapic_global_enabled(vlapic) && - (addr >= vlapic->base_address) && - (addr <= (vlapic->base_address + VLOCAL_APIC_MEM_LENGTH))) + if ( vlapic_global_enabled(vlapic) && + (addr >= vlapic->base_address) && + (addr <= vlapic->base_address + VLOCAL_APIC_MEM_LENGTH) ) return 1; return 0; @@ -801,23 +818,21 @@ void vlapic_msr_set(struct vlapic *vlapic, uint64_t value) { /* When apic disabled */ - if (!vlapic) + if ( vlapic == NULL ) return; - if (vlapic->vcpu_id) + if ( vlapic->vcpu_id ) value &= ~MSR_IA32_APICBASE_BSP; vlapic->apic_base_msr = value; vlapic->base_address = vlapic_get_base_address(vlapic); - if (!(value & 0x800)) + if ( !(value & 0x800) ) set_bit(_VLAPIC_GLOB_DISABLE, &vlapic->status ); HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "apic base msr = 0x%08x%08x,\nbase address = 0x%lx", - (uint32_t)(vlapic->apic_base_msr >> 32), - (uint32_t)vlapic->apic_base_msr, - vlapic->base_address); + "apic base msr is 0x%016"PRIx64", and base address is 0x%lx.", + vlapic->apic_base_msr, vlapic->base_address); } static inline int vlapic_get_init_id(struct vcpu *v) @@ -827,42 +842,51 @@ void vlapic_timer_fn(void *data) { - struct vlapic *vlapic; - - vlapic = data; - if (!vlapic_enabled(vlapic)) return; - - vlapic->timer_current_update = NOW(); - - if (vlapic_lvt_timer_enabled(vlapic)) { - if (!vlapic_irr_status(vlapic, - vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER))) { - test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER), - &vlapic->irr[0]); - } - else - vlapic->intr_pending_count[vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)]++; - evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->vcpu)); - } - - vlapic->timer_current_update = NOW(); - if (vlapic_lvt_timer_period(vlapic)) { + struct vlapic *vlapic = data; + struct vcpu *v; + uint32_t timer_vector; + s_time_t now; + + if ( unlikely(!vlapic_enabled(vlapic) || + !vlapic_lvt_timer_enabled(vlapic)) ) + return; + + v = vlapic->vcpu; + timer_vector = vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER); + now = NOW(); + + vlapic->timer_current_update = now; + + if ( test_and_set_bit(timer_vector, &vlapic->irr[0]) ) + vlapic->intr_pending_count[timer_vector]++; + + if ( vlapic_lvt_timer_period(vlapic) ) + { s_time_t offset; - vlapic->timer_current = vlapic->timer_initial; - offset = vlapic->timer_current * (262144/get_apic_bus_scale()) * vlapic->timer_divide_counter; - vlapic->vlapic_timer.expires = NOW() + offset; - set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires); - }else { - vlapic->timer_current = 0; - } + vlapic->timer_current_count = vlapic->timer_initial_count; + + offset = APIC_BUS_CYCLE_NS * + vlapic->timer_divide_count * + vlapic->timer_initial_count; + set_timer(&vlapic->vlapic_timer, now + offset); + } + else + vlapic->timer_current_count = 0; + +#if 0 + if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) + { + /* TODO: add guest time handling here */ + } +#endif HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, - "vlapic_timer_fn: now: %08x%08x expire %08x%08x init %x current %x", - (uint32_t)(NOW() >> 32),(uint32_t)NOW(), - (uint32_t)(vlapic->vlapic_timer.expires >> 32), - (uint32_t)vlapic->vlapic_timer.expires, - vlapic->timer_initial,vlapic->timer_current); + "now 0x%016"PRIx64", expire @ 0x%016"PRIx64", " + "timer initial count 0x%x, timer current count 0x%x.", + now, vlapic->vlapic_timer.expires, + vlapic->timer_initial_count, + vlapic->timer_current_count); } #if 0 @@ -873,7 +897,7 @@ int type; type = __fls(vlapic->direct_intr.deliver_mode); - if (type == -1) + if ( type == -1 ) return -1; *mode = type; @@ -881,15 +905,45 @@ } #endif -int -vlapic_accept_pic_intr(struct vcpu *v) +int vlapic_accept_pic_intr(struct vcpu *v) { struct vlapic *vlapic = VLAPIC(v); return vlapic ? test_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status) : 1; } -int cpu_get_apic_interrupt(struct vcpu* v, int *mode) +int cpu_get_apic_interrupt(struct vcpu *v, int *mode) +{ + struct vlapic *vlapic = VLAPIC(v); + + if ( vlapic && vlapic_enabled(vlapic) ) + { + int highest_irr = vlapic_find_highest_irr(vlapic); + + if ( highest_irr != -1 && highest_irr >= vlapic->processor_priority ) + { + if ( highest_irr < 0x10 ) + { + uint32_t err_vector; + + vlapic->err_status |= 0x20; + err_vector = vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR); + + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, + "Sending an illegal vector 0x%x.", highest_irr); + + set_bit(err_vector, &vlapic->irr[0]); + highest_irr = err_vector; + } + + *mode = VLAPIC_DELIV_MODE_FIXED; + return highest_irr; + } + } + return -1; +} + +int cpu_has_apic_interrupt(struct vcpu* v) { struct vlapic *vlapic = VLAPIC(v); @@ -897,47 +951,35 @@ int highest_irr = vlapic_find_highest_irr(vlapic); if (highest_irr != -1 && highest_irr >= vlapic->processor_priority) { - if (highest_irr < 0x10) { - vlapic->err_status |= 0x20; - /* XXX What will happen if this vector illegal stil */ - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "hvm_intr_assist: illegal vector number %x err_status %x", - highest_irr, vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR)); - - set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), &vlapic->irr[0]); - highest_irr = vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR); - } - - *mode = VLAPIC_DELIV_MODE_FIXED; - return highest_irr; - } - } - return -1; -} - -void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) { - struct vlapic *vlapic = VLAPIC(v); - - if (!vlapic) + return 1; + } + } + return 0; +} + +void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) +{ + struct vlapic *vlapic = VLAPIC(v); + + if ( unlikely(vlapic == NULL) ) return; - switch (deliver_mode) { + switch ( deliver_mode ) { case VLAPIC_DELIV_MODE_FIXED: case VLAPIC_DELIV_MODE_LPRI: - vlapic_set_isr(vlapic, vector); - vlapic_clear_irr(vlapic, vector); + set_bit(vector, &vlapic->isr[0]); + clear_bit(vector, &vlapic->irr[0]); vlapic_update_ppr(vlapic); - if (vector == vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)) { + if ( vector == vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER) ) + { vlapic->intr_pending_count[vector]--; - if (vlapic->intr_pending_count[vector] > 0) - test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER), - &vlapic->irr[0]); - } - - break; - /*XXX deal with these later */ - + if ( vlapic->intr_pending_count[vector] > 0 ) + test_and_set_bit(vector, &vlapic->irr[0]); + } + break; + + /*XXX deal with these later */ case VLAPIC_DELIV_MODE_RESERVED: printk("Ignore deliver mode 3 in vlapic_post_injection\n"); break; @@ -950,7 +992,7 @@ break; default: - printk("<vlapic_post_injection> error deliver mode\n"); + printk("<vlapic_post_injection> invalid deliver mode\n"); break; } } @@ -978,12 +1020,12 @@ vlapic->apic_base_msr = VLAPIC_BASE_MSR_INIT_VALUE; - if (apic_id == 0) + if ( apic_id == 0 ) vlapic->apic_base_msr |= MSR_IA32_APICBASE_BSP; vlapic->base_address = vlapic_get_base_address(vlapic); - for (i = 0; i < VLAPIC_LVT_NUM; i++) + for ( i = 0; i < VLAPIC_LVT_NUM; i++ ) vlapic->lvt[i] = VLAPIC_LVT_BIT_MASK; vlapic->dest_format = 0xffffffffU; @@ -1000,18 +1042,18 @@ * XXX According to mp sepcific, BIOS will enable LVT0/1, * remove it after BIOS enabled */ - if (!v->vcpu_id) { + if ( !v->vcpu_id ) + { vlapic->lvt[VLAPIC_LVT_LINT0] = 0x700; vlapic->lvt[VLAPIC_LVT_LINT1] = 0x500; set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); } #endif - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_reset: " - "vcpu=%p id=%d vlapic_apic_base_msr=%08x%08x " - "vlapic_base_address=%0lx", - v, vlapic->id, (uint32_t)(vlapic->apic_base_msr >> 32), - (uint32_t)vlapic->apic_base_msr, vlapic->base_address); + HVM_DBG_LOG(DBG_LEVEL_VLAPIC, + "vcpu=%p, id=%d, vlapic_apic_base_msr=0x%016"PRIx64", " + "base_address=0x%0lx.", + v, vlapic->id, vlapic->apic_base_msr, vlapic->base_address); return 1; } @@ -1025,7 +1067,8 @@ HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_init %d", v->vcpu_id); vlapic = xmalloc_bytes(sizeof(struct vlapic)); - if (!vlapic) { + if ( vlapic == NULL ) + { printk("malloc vlapic error for vcpu %x\n", v->vcpu_id); return -ENOMEM; } diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/vmx/io.c Tue Mar 14 20:50:35 2006 @@ -86,28 +86,53 @@ } static inline void -enable_irq_window(unsigned long cpu_exec_control) -{ - if (!(cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING)) { - cpu_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control); +enable_irq_window(struct vcpu *v) +{ + u32 *cpu_exec_control = &v->arch.hvm_vcpu.u.vmx.exec_control; + + if (!(*cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING)) { + *cpu_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, *cpu_exec_control); } } static inline void -disable_irq_window(unsigned long cpu_exec_control) -{ - if ( cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING ) { - cpu_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control); - } +disable_irq_window(struct vcpu *v) +{ + u32 *cpu_exec_control = &v->arch.hvm_vcpu.u.vmx.exec_control; + + if ( *cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING ) { + *cpu_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, *cpu_exec_control); + } +} + +static inline int is_interruptibility_state(void) +{ + int interruptibility; + __vmread(GUEST_INTERRUPTIBILITY_INFO, &interruptibility); + return interruptibility; +} + +/* check to see if there is pending interrupt */ +int cpu_has_pending_irq(struct vcpu *v) +{ + struct hvm_domain *plat = &v->domain->arch.hvm_domain; + + /* APIC */ + if ( cpu_has_apic_interrupt(v) ) return 1; + + /* PIC */ + if ( !vlapic_accept_pic_intr(v) ) return 0; + + return plat->interrupt_request; } asmlinkage void vmx_intr_assist(void) { int intr_type = 0; int highest_vector; - unsigned long intr_fields, eflags, interruptibility, cpu_exec_control; + unsigned long eflags; struct vcpu *v = current; struct hvm_domain *plat=&v->domain->arch.hvm_domain; struct hvm_virpit *vpit = &plat->vpit; @@ -121,42 +146,26 @@ pic_set_irq(pic, 0, 1); } - __vmread_vcpu(v, CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control); - __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields); - - if (intr_fields & INTR_INFO_VALID_MASK) { - enable_irq_window(cpu_exec_control); - HVM_DBG_LOG(DBG_LEVEL_1, "vmx_intr_assist: intr_fields: %lx", - intr_fields); - return; - } - - __vmread(GUEST_INTERRUPTIBILITY_INFO, &interruptibility); - - if (interruptibility) { - enable_irq_window(cpu_exec_control); - HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility: %lx",interruptibility); + if ( !cpu_has_pending_irq(v) ) return; + + if ( is_interruptibility_state() ) { /* pre-cleared for emulated instruction */ + enable_irq_window(v); + HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility"); return; } __vmread(GUEST_RFLAGS, &eflags); if (irq_masked(eflags)) { - enable_irq_window(cpu_exec_control); + enable_irq_window(v); return; } highest_vector = cpu_get_interrupt(v, &intr_type); - - if (highest_vector == -1) { - disable_irq_window(cpu_exec_control); - return; - } - switch (intr_type) { case VLAPIC_DELIV_MODE_EXT: case VLAPIC_DELIV_MODE_FIXED: case VLAPIC_DELIV_MODE_LPRI: - vmx_inject_extint(v, highest_vector, VMX_INVALID_ERROR_CODE); + vmx_inject_extint(v, highest_vector, VMX_DELIVER_NO_ERROR_CODE); TRACE_3D(TRC_VMX_INT, v->domain->domain_id, highest_vector, 0); break; case VLAPIC_DELIV_MODE_SMI: diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Tue Mar 14 20:50:35 2006 @@ -79,7 +79,7 @@ { struct vcpu *v = (struct vcpu *)info; - ASSERT(HVM_DOMAIN(v)); + ASSERT(hvm_guest(v)); if (v->arch.hvm_vmx.launch_cpu == smp_processor_id()) __vmpclear(virt_to_maddr(v->arch.hvm_vmx.vmcs)); @@ -87,7 +87,7 @@ void vmx_request_clear_vmcs(struct vcpu *v) { - ASSERT(HVM_DOMAIN(v)); + ASSERT(hvm_guest(v)); if (v->arch.hvm_vmx.launch_cpu == smp_processor_id()) __vmpclear(virt_to_maddr(v->arch.hvm_vmx.vmcs)); @@ -219,6 +219,7 @@ error |= __vmwrite(CR0_READ_SHADOW, cr0); error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL, MONITOR_CPU_BASED_EXEC_CONTROLS); + v->arch.hvm_vcpu.u.vmx.exec_control = MONITOR_CPU_BASED_EXEC_CONTROLS; __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : ); @@ -362,7 +363,7 @@ return error; } -static inline int construct_vmcs_host() +static inline int construct_vmcs_host(void) { int error = 0; #ifdef __x86_64__ diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Mar 14 20:50:35 2006 @@ -223,6 +223,11 @@ switch (regs->ecx){ case MSR_EFER: + /* offending reserved bit will cause #GP */ + if ( msr_content & + ~( EFER_LME | EFER_LMA | EFER_NX | EFER_SCE ) ) + vmx_inject_exception(vc, TRAP_gp_fault, 0); + if ((msr_content & EFER_LME) ^ test_bit(VMX_CPU_STATE_LME_ENABLED, &vc->arch.hvm_vmx.cpu_state)){ @@ -236,18 +241,9 @@ if (msr_content & EFER_LME) set_bit(VMX_CPU_STATE_LME_ENABLED, &vc->arch.hvm_vmx.cpu_state); - /* No update for LME/LMA since it have no effect */ + msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content; - if (msr_content & ~(EFER_LME | EFER_LMA)){ - msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content; - if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){ - rdmsrl(MSR_EFER, - host_state->msr_items[VMX_INDEX_MSR_EFER]); - set_bit(VMX_INDEX_MSR_EFER, &host_state->flags); - set_bit(VMX_INDEX_MSR_EFER, &msr->flags); - } - } break; case MSR_FS_BASE: @@ -483,12 +479,13 @@ void do_nmi(struct cpu_user_regs *); -static int check_vmx_controls(ctrls, msr) +static int check_vmx_controls(u32 ctrls, u32 msr) { u32 vmx_msr_low, vmx_msr_high; rdmsr(msr, vmx_msr_low, vmx_msr_high); - if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) { + if ( (ctrls < vmx_msr_low) || (ctrls > vmx_msr_high) ) + { printk("Insufficient VMX capability 0x%x, " "msr=0x%x,low=0x%8x,high=0x%x\n", ctrls, msr, vmx_msr_low, vmx_msr_high); @@ -599,6 +596,7 @@ __vmread(GUEST_RIP, ¤t_eip); __vmwrite(GUEST_RIP, current_eip + inst_len); + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); } @@ -1919,7 +1917,7 @@ /* don't bother H/W interrutps */ if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT && exit_reason != EXIT_REASON_VMCALL && - exit_reason != EXIT_REASON_IO_INSTRUCTION) + exit_reason != EXIT_REASON_IO_INSTRUCTION) HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason); if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { @@ -2029,6 +2027,7 @@ case EXIT_REASON_PENDING_INTERRUPT: __vmwrite(CPU_BASED_VM_EXEC_CONTROL, MONITOR_CPU_BASED_EXEC_CONTROLS); + v->arch.hvm_vcpu.u.vmx.exec_control = MONITOR_CPU_BASED_EXEC_CONTROLS; break; case EXIT_REASON_TASK_SWITCH: __hvm_bug(®s); @@ -2053,6 +2052,7 @@ __update_guest_eip(inst_len); break; } +#if 0 /* keep this for debugging */ case EXIT_REASON_VMCALL: __get_instruction_length(inst_len); __vmread(GUEST_RIP, &eip); @@ -2061,6 +2061,7 @@ hvm_print_line(v, regs.eax); /* provides the current domain */ __update_guest_eip(inst_len); break; +#endif case EXIT_REASON_CR_ACCESS: { __vmread(GUEST_RIP, &eip); @@ -2101,6 +2102,21 @@ case EXIT_REASON_MWAIT_INSTRUCTION: __hvm_bug(®s); break; + case EXIT_REASON_VMCALL: + case EXIT_REASON_VMCLEAR: + case EXIT_REASON_VMLAUNCH: + case EXIT_REASON_VMPTRLD: + case EXIT_REASON_VMPTRST: + case EXIT_REASON_VMREAD: + case EXIT_REASON_VMRESUME: + case EXIT_REASON_VMWRITE: + case EXIT_REASON_VMOFF: + case EXIT_REASON_VMON: + /* Report invalid opcode exception when a VMX guest tries to execute + any of the VMX instructions */ + vmx_inject_exception(v, TRAP_invalid_op, VMX_DELIVER_NO_ERROR_CODE); + break; + default: __hvm_bug(®s); /* should not happen */ } diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/mm.c Tue Mar 14 20:50:35 2006 @@ -176,10 +176,9 @@ void arch_init_memory(void) { - extern void subarch_init_memory(struct domain *); + extern void subarch_init_memory(void); unsigned long i, pfn, rstart_pfn, rend_pfn; - struct page_info *page; memset(percpu_info, 0, sizeof(percpu_info)); @@ -189,6 +188,7 @@ * their domain field set to dom_xen. */ dom_xen = alloc_domain(); + spin_lock_init(&dom_xen->page_alloc_lock); atomic_set(&dom_xen->refcnt, 1); dom_xen->domain_id = DOMID_XEN; @@ -198,17 +198,13 @@ * array. Mappings occur at the priv of the caller. */ dom_io = alloc_domain(); + spin_lock_init(&dom_io->page_alloc_lock); atomic_set(&dom_io->refcnt, 1); dom_io->domain_id = DOMID_IO; /* First 1MB of RAM is historically marked as I/O. */ for ( i = 0; i < 0x100; i++ ) - { - page = mfn_to_page(i); - page->count_info = PGC_allocated | 1; - page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; - page_set_owner(page, dom_io); - } + share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable); /* Any areas not specified as RAM by the e820 map are considered I/O. */ for ( i = 0, pfn = 0; i < e820.nr_map; i++ ) @@ -221,17 +217,47 @@ for ( ; pfn < rstart_pfn; pfn++ ) { BUG_ON(!mfn_valid(pfn)); - page = mfn_to_page(pfn); - page->count_info = PGC_allocated | 1; - page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; - page_set_owner(page, dom_io); + share_xen_page_with_guest( + mfn_to_page(pfn), dom_io, XENSHARE_writable); } /* Skip the RAM region. */ pfn = rend_pfn; } BUG_ON(pfn != max_page); - subarch_init_memory(dom_xen); + subarch_init_memory(); +} + +void share_xen_page_with_guest( + struct page_info *page, struct domain *d, int readonly) +{ + if ( page_get_owner(page) == d ) + return; + + set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); + + spin_lock(&d->page_alloc_lock); + + /* The incremented type count pins as writable or read-only. */ + page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); + page->u.inuse.type_info |= PGT_validated | 1; + + page_set_owner(page, d); + wmb(); /* install valid domain ptr before updating refcnt. */ + ASSERT(page->count_info == 0); + page->count_info |= PGC_allocated | 1; + + if ( unlikely(d->xenheap_pages++ == 0) ) + get_knownalive_domain(d); + list_add_tail(&page->list, &d->xenpage_list); + + spin_unlock(&d->page_alloc_lock); +} + +void share_xen_page_with_privileged_guests( + struct page_info *page, int readonly) +{ + share_xen_page_with_guest(page, dom_xen, readonly); } void write_ptbase(struct vcpu *v) @@ -751,9 +777,8 @@ unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) { unsigned long l2_backptr = l2_type & PGT_va_mask; - BUG_ON(l2_backptr == PGT_va_unknown); - if ( l2_backptr == PGT_va_mutable ) - return 0; + ASSERT(l2_backptr != PGT_va_unknown); + ASSERT(l2_backptr != PGT_va_mutable); *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | (offset_in_l2 << L2_PAGETABLE_SHIFT); @@ -767,8 +792,8 @@ unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) { unsigned long l2_backptr = l2_type & PGT_va_mask; - BUG_ON(l2_backptr == PGT_va_unknown); - + ASSERT(l2_backptr != PGT_va_unknown); + ASSERT(l2_backptr != PGT_va_mutable); *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | (offset_in_l2 << L2_PAGETABLE_SHIFT); return 1; @@ -778,8 +803,8 @@ unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type) { unsigned long l3_backptr = l3_type & PGT_va_mask; - BUG_ON(l3_backptr == PGT_va_unknown); - + ASSERT(l3_backptr != PGT_va_unknown); + ASSERT(l3_backptr != PGT_va_mutable); *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) | (offset_in_l3 << L3_PAGETABLE_SHIFT); return 1; @@ -788,9 +813,6 @@ static inline int l3_backptr( unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type) { - unsigned long l4_backptr = l4_type & PGT_va_mask; - BUG_ON(l4_backptr == PGT_va_unknown); - *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT); return 1; } @@ -1412,9 +1434,8 @@ nx &= ~PGT_validated; } } - else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == - (PGT_pinned | 1)) && - ((nx & PGT_type_mask) != PGT_writable_page)) ) + else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == + (PGT_pinned|PGT_l1_page_table|1)) ) { /* Page is now only pinned. Make the back pointer mutable again. */ nx |= PGT_va_mutable; @@ -1507,14 +1528,19 @@ nx &= ~PGT_va_mask; nx |= type; /* we know the actual type is correct */ } - else if ( ((type & PGT_va_mask) != PGT_va_mutable) && - ((type & PGT_va_mask) != (x & PGT_va_mask)) ) + else { + ASSERT((type & PGT_va_mask) != (x & PGT_va_mask)); + ASSERT((type & PGT_va_mask) != PGT_va_mutable); #ifdef CONFIG_X86_PAE /* We use backptr as extra typing. Cannot be unknown. */ if ( (type & PGT_type_mask) == PGT_l2_page_table ) return 0; #endif + /* Fixme: add code to propagate va_unknown to subtables. */ + if ( ((type & PGT_type_mask) >= PGT_l2_page_table) && + !shadow_mode_refcounts(page_get_owner(page)) ) + return 0; /* This table is possibly mapped at multiple locations. */ nx &= ~PGT_va_mask; nx |= PGT_va_unknown; @@ -1732,14 +1758,15 @@ } int do_mmuext_op( - struct mmuext_op *uops, + GUEST_HANDLE(mmuext_op_t) uops, unsigned int count, - unsigned int *pdone, + GUEST_HANDLE(uint) pdone, unsigned int foreigndom) { struct mmuext_op op; int rc = 0, i = 0, okay, cpu = smp_processor_id(); - unsigned long mfn, type, done = 0; + unsigned long mfn, type; + unsigned int done = 0; struct page_info *page; struct vcpu *v = current; struct domain *d = v->domain; @@ -1751,8 +1778,8 @@ if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { count &= ~MMU_UPDATE_PREEMPTED; - if ( unlikely(pdone != NULL) ) - (void)get_user(done, pdone); + if ( unlikely(!guest_handle_is_null(pdone)) ) + (void)copy_from_guest(&done, pdone, 1); } if ( !set_foreigndom(cpu, foreigndom) ) @@ -1761,7 +1788,7 @@ goto out; } - if ( unlikely(!array_access_ok(uops, count, sizeof(op))) ) + if ( unlikely(!guest_handle_okay(uops, count)) ) { rc = -EFAULT; goto out; @@ -1772,14 +1799,14 @@ if ( hypercall_preempt_check() ) { rc = hypercall_create_continuation( - __HYPERVISOR_mmuext_op, "pipi", + __HYPERVISOR_mmuext_op, "hihi", uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); break; } - if ( unlikely(__copy_from_user(&op, uops, sizeof(op)) != 0) ) - { - MEM_LOG("Bad __copy_from_user"); + if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) ) + { + MEM_LOG("Bad __copy_from_guest"); rc = -EFAULT; break; } @@ -1792,6 +1819,16 @@ { case MMUEXT_PIN_L1_TABLE: type = PGT_l1_page_table | PGT_va_mutable; + goto pin_page; + + case MMUEXT_PIN_L2_TABLE: + case MMUEXT_PIN_L3_TABLE: + case MMUEXT_PIN_L4_TABLE: + /* Ignore pinning of subdirectories. */ + if ( (op.cmd - MMUEXT_PIN_L1_TABLE) != (CONFIG_PAGING_LEVELS - 1) ) + break; + + type = PGT_root_page_table; pin_page: if ( shadow_mode_refcounts(FOREIGNDOM) ) @@ -1814,20 +1851,6 @@ } break; - -#ifndef CONFIG_X86_PAE /* Unsafe on PAE because of Xen-private mappings. */ - case MMUEXT_PIN_L2_TABLE: - type = PGT_l2_page_table | PGT_va_mutable; - goto pin_page; -#endif - - case MMUEXT_PIN_L3_TABLE: - type = PGT_l3_page_table | PGT_va_mutable; - goto pin_page; - - case MMUEXT_PIN_L4_TABLE: - type = PGT_l4_page_table | PGT_va_mutable; - goto pin_page; case MMUEXT_UNPIN_TABLE: if ( shadow_mode_refcounts(d) ) @@ -1969,24 +1992,25 @@ break; } - uops++; + guest_handle_add_offset(uops, 1); } out: process_deferred_ops(cpu); /* Add incremental work we have done to the @done output parameter. */ - if ( unlikely(pdone != NULL) ) - __put_user(done + i, pdone); + done += i; + if ( unlikely(!guest_handle_is_null(pdone)) ) + copy_to_guest(pdone, &done, 1); UNLOCK_BIGLOCK(d); return rc; } int do_mmu_update( - struct mmu_update *ureqs, + GUEST_HANDLE(mmu_update_t) ureqs, unsigned int count, - unsigned int *pdone, + GUEST_HANDLE(uint) pdone, unsigned int foreigndom) { struct mmu_update req; @@ -2010,8 +2034,8 @@ if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { count &= ~MMU_UPDATE_PREEMPTED; - if ( unlikely(pdone != NULL) ) - (void)get_user(done, pdone); + if ( unlikely(!guest_handle_is_null(pdone)) ) + (void)copy_from_guest(&done, pdone, 1); } domain_mmap_cache_init(&mapcache); @@ -2027,7 +2051,7 @@ perfc_addc(num_page_updates, count); perfc_incr_histo(bpt_updates, count, PT_UPDATES); - if ( unlikely(!array_access_ok(ureqs, count, sizeof(req))) ) + if ( unlikely(!guest_handle_okay(ureqs, count)) ) { rc = -EFAULT; goto out; @@ -2038,14 +2062,14 @@ if ( hypercall_preempt_check() ) { rc = hypercall_create_continuation( - __HYPERVISOR_mmu_update, "pipi", + __HYPERVISOR_mmu_update, "hihi", ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); break; } - if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) ) - { - MEM_LOG("Bad __copy_from_user"); + if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) ) + { + MEM_LOG("Bad __copy_from_guest"); rc = -EFAULT; break; } @@ -2212,7 +2236,7 @@ break; } - ureqs++; + guest_handle_add_offset(ureqs, 1); } out: @@ -2222,8 +2246,9 @@ process_deferred_ops(cpu); /* Add incremental work we have done to the @done output parameter. */ - if ( unlikely(pdone != NULL) ) - __put_user(done + i, pdone); + done += i; + if ( unlikely(!guest_handle_is_null(pdone)) ) + copy_to_guest(pdone, &done, 1); if ( unlikely(shadow_mode_enabled(d)) ) check_pagetable(v, "post-mmu"); /* debug */ @@ -2684,7 +2709,7 @@ } -long do_set_gdt(unsigned long *frame_list, unsigned int entries) +long do_set_gdt(GUEST_HANDLE(ulong) frame_list, unsigned int entries) { int nr_pages = (entries + 511) / 512; unsigned long frames[16]; @@ -2694,7 +2719,7 @@ if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; - if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) ) + if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) ) return -EFAULT; LOCK_BIGLOCK(current->domain); @@ -2790,52 +2815,60 @@ long arch_memory_op(int op, GUEST_HANDLE(void) arg) { - struct xen_reserved_phys_area xrpa; - unsigned long pfn; - struct domain *d; - unsigned int i; - switch ( op ) { - case XENMEM_reserved_phys_area: - if ( copy_from_guest(&xrpa, arg, 1) ) + case XENMEM_add_to_physmap: + { + struct xen_add_to_physmap xatp; + unsigned long mfn = 0, gpfn; + struct domain *d; + + if ( copy_from_guest(&xatp, arg, 1) ) return -EFAULT; - /* No guest has more than one reserved area. */ - if ( xrpa.idx != 0 ) + if ( (d = find_domain_by_id(xatp.domid)) == NULL ) return -ESRCH; - if ( (d = find_domain_by_id(xrpa.domid)) == NULL ) - return -ESRCH; - - /* Only initialised translated guests have a reserved area. */ - if ( !shadow_mode_translate(d) || (d->max_pages == 0) ) + switch ( xatp.space ) + { + case XENMAPSPACE_shared_info: + if ( xatp.idx == 0 ) + mfn = virt_to_mfn(d->shared_info); + break; + case XENMAPSPACE_grant_table: + if ( xatp.idx < NR_GRANT_FRAMES ) + mfn = virt_to_mfn(d->grant_table->shared) + xatp.idx; + break; + default: + break; + } + + if ( !shadow_mode_translate(d) || (mfn == 0) ) { put_domain(d); - return -ESRCH; + return -EINVAL; } LOCK_BIGLOCK(d); - if ( d->arch.first_reserved_pfn == 0 ) - { - d->arch.first_reserved_pfn = pfn = d->max_pages; - guest_physmap_add_page( - d, pfn + 0, virt_to_maddr(d->shared_info) >> PAGE_SHIFT); - for ( i = 0; i < NR_GRANT_FRAMES; i++ ) - guest_physmap_add_page( - d, pfn + 1 + i, gnttab_shared_mfn(d, d->grant_table, i)); - } + + /* Remove previously mapped page if it was present. */ + if ( mfn_valid(gmfn_to_mfn(d, xatp.gpfn)) ) + guest_remove_page(d, xatp.gpfn); + + /* Unmap from old location, if any. */ + gpfn = get_gpfn_from_mfn(mfn); + if ( gpfn != INVALID_M2P_ENTRY ) + guest_physmap_remove_page(d, gpfn, mfn); + + /* Map at new location. */ + guest_physmap_add_page(d, xatp.gpfn, mfn); + UNLOCK_BIGLOCK(d); - xrpa.first_gpfn = d->arch.first_reserved_pfn; - xrpa.nr_gpfns = 32; - put_domain(d); - if ( copy_to_guest(arg, &xrpa, 1) ) - return -EFAULT; - break; + } default: return subarch_memory_op(op, arg); diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/physdev.c Tue Mar 14 20:50:35 2006 @@ -6,6 +6,7 @@ #include <xen/sched.h> #include <xen/irq.h> #include <xen/event.h> +#include <xen/guest_access.h> #include <asm/current.h> #include <asm/smpboot.h> #include <public/xen.h> @@ -21,13 +22,13 @@ /* * Demuxing hypercall. */ -long do_physdev_op(struct physdev_op *uop) +long do_physdev_op(GUEST_HANDLE(physdev_op_t) uop) { struct physdev_op op; long ret; int irq; - if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) return -EFAULT; switch ( op.cmd ) @@ -101,7 +102,7 @@ break; } - if ( copy_to_user(uop, &op, sizeof(op)) ) + if ( copy_to_guest(uop, &op, 1) ) ret = -EFAULT; return ret; diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/shadow.c Tue Mar 14 20:50:35 2006 @@ -202,6 +202,16 @@ * tlbflush_timestamp holds a min & max index of valid page table entries * within the shadow page. */ +static inline void +shadow_page_info_init(struct page_info *page, + unsigned long gmfn, + u32 psh_type) +{ + ASSERT( (gmfn & ~PGT_mfn_mask) == 0 ); + page->u.inuse.type_info = psh_type | gmfn; + page->count_info = 0; + page->tlbflush_timestamp = 0; +} static inline unsigned long alloc_shadow_page(struct domain *d, @@ -249,6 +259,11 @@ l1 = map_domain_page(page_to_mfn(page + 1)); memset(l1, 0, PAGE_SIZE); unmap_domain_page(l1); + + /* we'd like to initialize the second continuous page here + * and leave the first page initialization later */ + + shadow_page_info_init(page+1, gmfn, psh_type); #else page = alloc_domheap_page(NULL); if (!page) @@ -294,10 +309,7 @@ smfn = page_to_mfn(page); - ASSERT( (gmfn & ~PGT_mfn_mask) == 0 ); - page->u.inuse.type_info = psh_type | gmfn; - page->count_info = 0; - page->tlbflush_timestamp = 0; + shadow_page_info_init(page, gmfn, psh_type); switch ( psh_type ) { @@ -3401,7 +3413,9 @@ l1_pgentry_t sl1e; l1_pgentry_t old_sl1e; l2_pgentry_t sl2e; +#ifdef __x86_64__ u64 nx = 0; +#endif int put_ref_check = 0; /* Check if gpfn is 2M aligned */ @@ -3416,7 +3430,9 @@ l2e_remove_flags(tmp_l2e, _PAGE_PSE); if (l2e_get_flags(gl2e) & _PAGE_NX) { l2e_remove_flags(tmp_l2e, _PAGE_NX); - nx = 1ULL << 63; +#ifdef __x86_64__ + nx = PGT_high_mfn_nx; +#endif } @@ -3424,7 +3440,11 @@ if ( !__shadow_get_l2e(v, va, &sl2e) ) sl2e = l2e_empty(); +#ifdef __x86_64__ l1_mfn = __shadow_status(d, start_gpfn | nx, PGT_fl1_shadow); +#else + l1_mfn = __shadow_status(d, start_gpfn, PGT_fl1_shadow); +#endif /* Check the corresponding l2e */ if (l1_mfn) { @@ -3442,7 +3462,11 @@ } else { /* Allocate a new page as shadow page table if need */ gmfn = gmfn_to_mfn(d, start_gpfn); +#ifdef __x86_64__ l1_mfn = alloc_shadow_page(d, start_gpfn | nx, gmfn, PGT_fl1_shadow); +#else + l1_mfn = alloc_shadow_page(d, start_gpfn, gmfn, PGT_fl1_shadow); +#endif if (unlikely(!l1_mfn)) { BUG(); } @@ -3582,6 +3606,11 @@ base_idx = get_cr3_idxval(v); ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 ); + +#if CONFIG_PAGING_LEVELS >= 4 + if ( (error_code & (ERROR_I | ERROR_P)) == (ERROR_I | ERROR_P) ) + return 1; +#endif #if CONFIG_PAGING_LEVELS == 4 if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/traps.c Tue Mar 14 20:50:35 2006 @@ -132,10 +132,10 @@ int i; unsigned long *stack, addr; - if ( HVM_DOMAIN(current) ) + if ( hvm_guest(current) ) return; - if ( VM86_MODE(regs) ) + if ( vm86_mode(regs) ) { stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff)); printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ", @@ -254,7 +254,7 @@ unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr; int i; - if ( GUEST_MODE(regs) ) + if ( guest_mode(regs) ) return show_guest_stack(regs); printk("Xen stack trace from "__OP"sp=%p:\n ", stack); @@ -333,7 +333,7 @@ DEBUGGER_trap_entry(trapnr, regs); - if ( !GUEST_MODE(regs) ) + if ( !guest_mode(regs) ) goto xen_fault; ti = ¤t->arch.guest_context.trap_ctxt[trapnr]; @@ -399,7 +399,7 @@ DEBUGGER_trap_entry(TRAP_int3, regs); - if ( !GUEST_MODE(regs) ) + if ( !guest_mode(regs) ) { DEBUGGER_trap_fatal(TRAP_int3, regs); show_registers(regs); @@ -430,6 +430,11 @@ v->arch.guest_context.ctrlreg[2] = addr; v->vcpu_info->arch.cr2 = addr; + + /* Re-set error_code.user flag appropriately for the guest. */ + error_code &= ~4; + if ( !guest_kernel_mode(v, guest_cpu_user_regs()) ) + error_code |= 4; ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault]; tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; @@ -469,7 +474,7 @@ if ( unlikely(ret == 0) ) { /* In hypervisor mode? Leave it to the #PF handler to fix up. */ - if ( !GUEST_MODE(regs) ) + if ( !guest_mode(regs) ) return 0; /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */ propagate_page_fault( @@ -501,7 +506,7 @@ if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) { - if ( shadow_mode_external(d) && GUEST_MODE(regs) ) + if ( shadow_mode_external(d) && guest_mode(regs) ) return shadow_fault(addr, regs); if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( @@ -523,7 +528,7 @@ return EXCRET_fault_fixed; } - if ( KERNEL_MODE(v, regs) && + if ( guest_kernel_mode(v, regs) && /* Protection violation on write? No reserved-bit violation? */ ((regs->error_code & 0xb) == 0x3) && ptwr_do_page_fault(d, addr, regs) ) @@ -541,7 +546,7 @@ * #PF error code: * Bit 0: Protection violation (=1) ; Page not present (=0) * Bit 1: Write access - * Bit 2: Supervisor mode + * Bit 2: User mode (=1) ; Supervisor mode (=0) * Bit 3: Reserved bit violation * Bit 4: Instruction fetch */ @@ -559,7 +564,7 @@ if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) ) return rc; - if ( unlikely(!GUEST_MODE(regs)) ) + if ( unlikely(!guest_mode(regs)) ) { if ( likely((fixup = search_exception_table(regs->eip)) != 0) ) { @@ -615,7 +620,7 @@ #define TOGGLE_MODE() ((void)0) #endif - if ( v->arch.iopl >= (KERNEL_MODE(v, regs) ? 1 : 3) ) + if ( v->arch.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3) ) return 1; if ( v->arch.iobmp_limit > (port + bytes) ) @@ -844,7 +849,7 @@ case 0xfa: /* CLI */ case 0xfb: /* STI */ - if ( v->arch.iopl < (KERNEL_MODE(v, regs) ? 1 : 3) ) + if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) ) goto fail; /* * This is just too dangerous to allow, in my opinion. Consider if the @@ -863,7 +868,7 @@ } /* Remaining instructions only emulated from guest kernel. */ - if ( !KERNEL_MODE(v, regs) ) + if ( !guest_kernel_mode(v, regs) ) goto fail; /* Privileged (ring 0) instructions. */ @@ -1065,7 +1070,7 @@ if ( regs->error_code & 1 ) goto hardware_gp; - if ( !GUEST_MODE(regs) ) + if ( !guest_mode(regs) ) goto gp_in_kernel; /* @@ -1092,7 +1097,7 @@ { /* This fault must be due to <INT n> instruction. */ ti = ¤t->arch.guest_context.trap_ctxt[regs->error_code>>3]; - if ( PERMIT_SOFTINT(TI_GET_DPL(ti), v, regs) ) + if ( permit_softint(TI_GET_DPL(ti), v, regs) ) { tb->flags = TBF_EXCEPTION; regs->eip += 2; @@ -1300,7 +1305,7 @@ DEBUGGER_trap_entry(TRAP_debug, regs); - if ( !GUEST_MODE(regs) ) + if ( !guest_mode(regs) ) { /* Clear TF just for absolute sanity. */ regs->eflags &= ~EF_TF; @@ -1404,14 +1409,14 @@ } -long do_set_trap_table(struct trap_info *traps) +long do_set_trap_table(GUEST_HANDLE(trap_info_t) traps) { struct trap_info cur; struct trap_info *dst = current->arch.guest_context.trap_ctxt; long rc = 0; /* If no table is presented then clear the entire virtual IDT. */ - if ( traps == NULL ) + if ( guest_handle_is_null(traps) ) { memset(dst, 0, 256 * sizeof(*dst)); init_int80_direct_trap(current); @@ -1423,11 +1428,11 @@ if ( hypercall_preempt_check() ) { rc = hypercall_create_continuation( - __HYPERVISOR_set_trap_table, "p", traps); - break; - } - - if ( copy_from_user(&cur, traps, sizeof(cur)) ) + __HYPERVISOR_set_trap_table, "h", traps); + break; + } + + if ( copy_from_guest(&cur, traps, 1) ) { rc = -EFAULT; break; @@ -1443,7 +1448,7 @@ if ( cur.vector == 0x80 ) init_int80_direct_trap(current); - traps++; + guest_handle_add_offset(traps, 1); } return rc; diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_32/domain_page.c --- a/xen/arch/x86/x86_32/domain_page.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_32/domain_page.c Tue Mar 14 20:50:35 2006 @@ -28,7 +28,7 @@ * then it means we are running on the idle domain's page table and must * therefore use its mapcache. */ - if ( unlikely(!pagetable_get_pfn(v->arch.guest_table)) && !HVM_DOMAIN(v) ) + if ( unlikely(!pagetable_get_pfn(v->arch.guest_table)) && !hvm_guest(v) ) { /* If we really are idling, perform lazy context switch now. */ if ( (v = idle_vcpu[smp_processor_id()]) == current ) diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_32/entry.S Tue Mar 14 20:50:35 2006 @@ -586,6 +586,13 @@ movl %eax,UREGS_eax(%ecx) jmp do_sched_op +do_arch_sched_op_new: + # Ensure we return success even if we return via schedule_tail() + xorl %eax,%eax + GET_GUEST_REGS(%ecx) + movl %eax,UREGS_eax(%ecx) + jmp do_sched_op_new + .data ENTRY(exception_table) @@ -640,6 +647,7 @@ .long do_mmuext_op .long do_acm_op .long do_nmi_op + .long do_arch_sched_op_new .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr @@ -674,6 +682,7 @@ .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ .byte 2 /* do_nmi_op */ + .byte 2 /* do_arch_sched_op_new */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_32/mm.c Tue Mar 14 20:50:35 2006 @@ -144,7 +144,7 @@ flush_tlb_all_pge(); } -void subarch_init_memory(struct domain *dom_xen) +void subarch_init_memory(void) { unsigned long m2p_start_mfn; unsigned int i, j; @@ -175,10 +175,7 @@ for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ ) { struct page_info *page = mfn_to_page(m2p_start_mfn + j); - page->count_info = PGC_allocated | 1; - /* Ensure it's only mapped read-only by domains. */ - page->u.inuse.type_info = PGT_gdt_page | 1; - page_set_owner(page, dom_xen); + share_xen_page_with_privileged_guests(page, XENSHARE_readonly); } } diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_32/seg_fixup.c --- a/xen/arch/x86/x86_32/seg_fixup.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_32/seg_fixup.c Tue Mar 14 20:50:35 2006 @@ -280,7 +280,7 @@ int gs_override = 0; /* WARNING: We only work for ring-3 segments. */ - if ( unlikely(VM86_MODE(regs)) || unlikely(!RING_3(regs)) ) + if ( unlikely(vm86_mode(regs)) || unlikely(!ring_3(regs)) ) { DPRINTK("Taken fault at bad CS %04x\n", regs->cs); goto fail; diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_32/traps.c Tue Mar 14 20:50:35 2006 @@ -24,7 +24,7 @@ char taint_str[TAINT_STRING_MAX_LEN]; const char *context; - if ( HVM_DOMAIN(current) && GUEST_MODE(regs) ) + if ( hvm_guest(current) && guest_mode(regs) ) { context = "hvm"; hvm_store_cpu_guest_regs(current, &fault_regs); @@ -32,9 +32,9 @@ } else { - context = GUEST_MODE(regs) ? "guest" : "hypervisor"; - - if ( !GUEST_MODE(regs) ) + context = guest_mode(regs) ? "guest" : "hypervisor"; + + if ( !guest_mode(regs) ) { fault_regs.esp = (unsigned long)®s->esp; fault_regs.ss = read_segment_register(ss); @@ -53,7 +53,7 @@ print_tainted(taint_str)); printk("CPU: %d\nEIP: %04x:[<%08x>]", smp_processor_id(), fault_regs.cs, fault_regs.eip); - if ( !GUEST_MODE(regs) ) + if ( !guest_mode(regs) ) print_symbol(" %s", fault_regs.eip); printk("\nEFLAGS: %08x CONTEXT: %s\n", fault_regs.eflags, context); printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n", @@ -172,17 +172,17 @@ regs->esp += 4; regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF; - if ( VM86_MODE(regs) ) + if ( vm86_mode(regs) ) { /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */ if ( __copy_from_user(®s->esp, (void __user *)regs->esp, 24) ) domain_crash_synchronous(); } - else if ( unlikely(RING_0(regs)) ) - { - domain_crash_synchronous(); - } - else if ( !RING_1(regs) ) + else if ( unlikely(ring_0(regs)) ) + { + domain_crash_synchronous(); + } + else if ( !ring_1(regs) ) { /* Return to ring 2/3: pop and restore ESP and SS. */ if ( __copy_from_user(®s->esp, (void __user *)regs->esp, 8) ) diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_64/entry.S Tue Mar 14 20:50:35 2006 @@ -495,6 +495,13 @@ movq %rax,UREGS_rax(%r10) jmp do_sched_op +do_arch_sched_op_new: + # Ensure we return success even if we return via schedule_tail() + xorl %eax,%eax + GET_GUEST_REGS(%r10) + movq %rax,UREGS_rax(%r10) + jmp do_sched_op_new + .data ENTRY(exception_table) @@ -549,6 +556,7 @@ .quad do_mmuext_op .quad do_acm_op .quad do_nmi_op + .quad do_arch_sched_op_new .rept NR_hypercalls-((.-hypercall_table)/8) .quad do_ni_hypercall .endr @@ -583,6 +591,7 @@ .byte 4 /* do_mmuext_op */ .byte 1 /* do_acm_op */ .byte 2 /* do_nmi_op */ + .byte 2 /* do_arch_sched_op_new */ .rept NR_hypercalls-(.-hypercall_args_table) .byte 0 /* do_ni_hypercall */ .endr diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_64/mm.c --- a/xen/arch/x86/x86_64/mm.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_64/mm.c Tue Mar 14 20:50:35 2006 @@ -134,7 +134,7 @@ flush_tlb_all_pge(); } -void subarch_init_memory(struct domain *dom_xen) +void subarch_init_memory(void) { unsigned long i, v, m2p_start_mfn; l3_pgentry_t l3e; @@ -174,11 +174,7 @@ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) { struct page_info *page = mfn_to_page(m2p_start_mfn + i); - page->count_info = PGC_allocated | 1; - /* gdt to make sure it's only mapped read-only by non-privileged - domains. */ - page->u.inuse.type_info = PGT_gdt_page | 1; - page_set_owner(page, dom_xen); + share_xen_page_with_privileged_guests(page, XENSHARE_readonly); } } } diff -r dc50cdd66c5c -r 0ed4a312765b xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Tue Mar 14 20:10:21 2006 +++ b/xen/arch/x86/x86_64/traps.c Tue Mar 14 20:50:35 2006 @@ -24,7 +24,7 @@ char taint_str[TAINT_STRING_MAX_LEN]; const char *context; - if ( HVM_DOMAIN(current) && GUEST_MODE(regs) ) + if ( hvm_guest(current) && guest_mode(regs) ) { context = "hvm"; hvm_store_cpu_guest_regs(current, &fault_regs); @@ -32,7 +32,7 @@ } else { - context = GUEST_MODE(regs) ? "guest" : "hypervisor"; + context = guest_mode(regs) ? "guest" : "hypervisor"; fault_crs[0] = read_cr0(); fault_crs[3] = read_cr3(); fault_regs.ds = read_segment_register(ds); @@ -46,7 +46,7 @@ print_tainted(taint_str)); printk("CPU: %d\nRIP: %04x:[<%016lx>]", smp_processor_id(), fault_regs.cs, fault_regs.rip); - if ( !GUEST_MODE(regs) ) + if ( !guest_mode(regs) ) print_symbol(" %s", fault_regs.rip); printk("\nRFLAGS: %016lx CONTEXT: %s\n", fault_regs.rflags, context); printk("rax: %016lx rbx: %016lx rcx: %016lx\n", diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/acm_ops.c --- a/xen/common/acm_ops.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/acm_ops.c Tue Mar 14 20:50:35 2006 @@ -25,13 +25,14 @@ #include <xen/event.h> #include <xen/trace.h> #include <xen/console.h> +#include <xen/guest_access.h> #include <asm/shadow.h> #include <public/sched_ctl.h> #include <acm/acm_hooks.h> #ifndef ACM_SECURITY -long do_acm_op(struct acm_op * u_acm_op) +long do_acm_op(GUEST_HANDLE(acm_op_t) u_acm_op) { return -ENOSYS; } @@ -56,7 +57,7 @@ return 0; } -long do_acm_op(struct acm_op * u_acm_op) +long do_acm_op(GUEST_HANDLE(acm_op_t) u_acm_op) { long ret = 0; struct acm_op curop, *op = &curop; @@ -64,7 +65,7 @@ if (acm_authorize_acm_ops(current->domain, POLICY)) return -EPERM; - if (copy_from_user(op, u_acm_op, sizeof(*op))) + if (copy_from_guest(op, u_acm_op, 1)) return -EFAULT; if (op->interface_version != ACM_INTERFACE_VERSION) @@ -88,7 +89,7 @@ ret = acm_get_policy(op->u.getpolicy.pullcache, op->u.getpolicy.pullcache_size); if (!ret) - copy_to_user(u_acm_op, op, sizeof(*op)); + copy_to_guest(u_acm_op, op, 1); } break; @@ -99,7 +100,7 @@ ret = acm_dump_statistics(op->u.dumpstats.pullcache, op->u.dumpstats.pullcache_size); if (!ret) - copy_to_user(u_acm_op, op, sizeof(*op)); + copy_to_guest(u_acm_op, op, 1); } break; @@ -139,7 +140,7 @@ op->u.getssid.ssidbuf, op->u.getssid.ssidbuf_size); if (!ret) - copy_to_user(u_acm_op, op, sizeof(*op)); + copy_to_guest(u_acm_op, op, 1); } break; @@ -215,7 +216,7 @@ ret = -ESRCH; if (!ret) - copy_to_user(u_acm_op, op, sizeof(*op)); + copy_to_guest(u_acm_op, op, 1); } break; diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/bitmap.c --- a/xen/common/bitmap.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/bitmap.c Tue Mar 14 20:50:35 2006 @@ -10,7 +10,6 @@ #include <xen/errno.h> #include <xen/bitmap.h> #include <xen/bitops.h> -#include <asm/uaccess.h> /* * bitmaps provide an array of bits, implemented using an an diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/domain.c --- a/xen/common/domain.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/domain.c Tue Mar 14 20:50:35 2006 @@ -18,6 +18,7 @@ #include <xen/domain_page.h> #include <xen/rangeset.h> #include <xen/guest_access.h> +#include <xen/hypercall.h> #include <asm/debugger.h> #include <public/dom0_ops.h> #include <public/sched.h> @@ -399,7 +400,7 @@ return arch_set_info_guest(v, ctxt); } -long do_vcpu_op(int cmd, int vcpuid, void *arg) +long do_vcpu_op(int cmd, int vcpuid, GUEST_HANDLE(void) arg) { struct domain *d = current->domain; struct vcpu *v; @@ -421,7 +422,7 @@ break; } - if ( copy_from_user(ctxt, arg, sizeof(*ctxt)) ) + if ( copy_from_guest(ctxt, arg, 1) ) { xfree(ctxt); rc = -EFAULT; @@ -457,35 +458,13 @@ { struct vcpu_runstate_info runstate; vcpu_runstate_get(v, &runstate); - if ( copy_to_user(arg, &runstate, sizeof(runstate)) ) + if ( copy_to_guest(arg, &runstate, 1) ) rc = -EFAULT; break; } - case VCPUOP_register_runstate_memory_area: - { - struct vcpu_register_runstate_memory_area area; - - rc = -EINVAL; - if ( v != current ) - break; - - rc = -EFAULT; - if ( copy_from_user(&area, arg, sizeof(area)) ) - break; - - if ( !access_ok(area.addr.v, sizeof(*area.addr.v)) ) - break; - - rc = 0; - v->runstate_guest = area.addr.v; - __copy_to_user(v->runstate_guest, &v->runstate, sizeof(v->runstate)); - - break; - } - default: - rc = -ENOSYS; + rc = arch_do_vcpu_op(cmd, v, arg); break; } diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/event_channel.c --- a/xen/common/event_channel.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/event_channel.c Tue Mar 14 20:50:35 2006 @@ -23,6 +23,7 @@ #include <xen/event.h> #include <xen/irq.h> #include <xen/iocap.h> +#include <xen/guest_access.h> #include <asm/current.h> #include <public/xen.h> @@ -437,6 +438,47 @@ return ret; } +void evtchn_set_pending(struct vcpu *v, int port) +{ + struct domain *d = v->domain; + shared_info_t *s = d->shared_info; + + /* + * The following bit operations must happen in strict order. + * NB. On x86, the atomic bit operations also act as memory barriers. + * There is therefore sufficiently strict ordering for this architecture -- + * others may require explicit memory barriers. + */ + + if ( test_and_set_bit(port, &s->evtchn_pending[0]) ) + return; + + if ( !test_bit (port, &s->evtchn_mask[0]) && + !test_and_set_bit(port / BITS_PER_LONG, + &v->vcpu_info->evtchn_pending_sel) && + !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) ) + { + evtchn_notify(v); + } + else if ( unlikely(test_bit(_VCPUF_blocked, &v->vcpu_flags) && + v->vcpu_info->evtchn_upcall_mask) ) + { + /* + * Blocked and masked will usually mean that the VCPU executed + * SCHEDOP_poll. Kick the VCPU in case this port is in its poll list. + */ + vcpu_unblock(v); + } +} + +void send_guest_virq(struct vcpu *v, int virq) +{ + int port = v->virq_to_evtchn[virq]; + + if ( likely(port != 0) ) + evtchn_set_pending(v, port); +} + void send_guest_pirq(struct domain *d, int pirq) { int port = d->pirq_to_evtchn[pirq]; @@ -578,12 +620,12 @@ return 0; } -long do_event_channel_op(struct evtchn_op *uop) +long do_event_channel_op(GUEST_HANDLE(evtchn_op_t) uop) { long rc; struct evtchn_op op; - if ( copy_from_user(&op, uop, sizeof(op)) != 0 ) + if ( copy_from_guest(&op, uop, 1) != 0 ) return -EFAULT; if (acm_pre_event_channel(&op)) @@ -593,31 +635,31 @@ { case EVTCHNOP_alloc_unbound: rc = evtchn_alloc_unbound(&op.u.alloc_unbound); - if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) ) + if ( (rc == 0) && (copy_to_guest(uop, &op, 1) != 0) ) rc = -EFAULT; /* Cleaning up here would be a mess! */ break; case EVTCHNOP_bind_interdomain: rc = evtchn_bind_interdomain(&op.u.bind_interdomain); - if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) ) + if ( (rc == 0) && (copy_to_guest(uop, &op, 1) != 0) ) rc = -EFAULT; /* Cleaning up here would be a mess! */ break; case EVTCHNOP_bind_virq: rc = evtchn_bind_virq(&op.u.bind_virq); - if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) ) + if ( (rc == 0) && (copy_to_guest(uop, &op, 1) != 0) ) rc = -EFAULT; /* Cleaning up here would be a mess! */ break; case EVTCHNOP_bind_ipi: rc = evtchn_bind_ipi(&op.u.bind_ipi); - if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) ) + if ( (rc == 0) && (copy_to_guest(uop, &op, 1) != 0) ) rc = -EFAULT; /* Cleaning up here would be a mess! */ break; case EVTCHNOP_bind_pirq: rc = evtchn_bind_pirq(&op.u.bind_pirq); - if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) ) + if ( (rc == 0) && (copy_to_guest(uop, &op, 1) != 0) ) rc = -EFAULT; /* Cleaning up here would be a mess! */ break; @@ -631,7 +673,7 @@ case EVTCHNOP_status: rc = evtchn_status(&op.u.status); - if ( (rc == 0) && (copy_to_user(uop, &op, sizeof(op)) != 0) ) + if ( (rc == 0) && (copy_to_guest(uop, &op, 1) != 0) ) rc = -EFAULT; break; diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/gdbstub.c --- a/xen/common/gdbstub.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/gdbstub.c Tue Mar 14 20:50:35 2006 @@ -35,7 +35,6 @@ than any actual intention. It doesn't at the moment. */ #include <xen/lib.h> -#include <asm/uaccess.h> #include <xen/spinlock.h> #include <xen/serial.h> #include <xen/irq.h> @@ -348,7 +347,10 @@ } } - gdb_write_to_packet_str((x != length) ? "OK" : "E11", ctx); + if (x == length) + gdb_write_to_packet_str("OK", ctx); + else + gdb_write_to_packet_str("E11", ctx); dbg_printk("Write done.\n"); @@ -397,13 +399,18 @@ break; case 'M': /* Write memory */ addr = simple_strtoul(ctx->in_buf + 1, &ptr, 16); - if ( (ptr == (ctx->in_buf + 1)) || (ptr[0] != ':') ) + if ( (ptr == (ctx->in_buf + 1)) || (ptr[0] != ',') ) { gdb_send_reply("E03", ctx); return 0; } length = simple_strtoul(ptr + 1, &ptr, 16); - gdb_cmd_write_mem(addr, length, ptr, ctx); + if ( ptr[0] != ':') + { + gdb_send_reply("E04", ctx); + return 0; + } + gdb_cmd_write_mem(addr, length, ptr + 1, ctx); break; case 'p': /* read register */ addr = simple_strtoul(ctx->in_buf + 1, &ptr, 16); @@ -419,12 +426,6 @@ } gdb_arch_read_reg(addr, regs, ctx); break; - case 'Z': /* We need to claim to support these or gdb - won't let you continue the process. */ - case 'z': - gdb_send_reply("OK", ctx); - break; - case 'D': ctx->currently_attached = 0; gdb_send_reply("OK", ctx); diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/grant_table.c --- a/xen/common/grant_table.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/grant_table.c Tue Mar 14 20:50:35 2006 @@ -28,8 +28,9 @@ #include <xen/sched.h> #include <xen/shadow.h> #include <xen/mm.h> +#include <xen/trace.h> +#include <xen/guest_access.h> #include <acm/acm_hooks.h> -#include <xen/trace.h> #define PIN_FAIL(_lbl, _rc, _f, _a...) \ do { \ @@ -187,7 +188,7 @@ /* Merge two 16-bit values into a 32-bit combined update. */ /* NB. Endianness! */ - prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; + scombo = ((u32)sdom << 16) | (u32)sflags; new_scombo = scombo | GTF_reading; if ( !(op->flags & GNTMAP_readonly) ) @@ -198,12 +199,7 @@ "Attempt to write-pin a r/o grant entry.\n"); } - /* NB. prev_scombo is updated in place to seen value. */ - if ( unlikely(cmpxchg_user((u32 *)&sha->flags, - prev_scombo, - new_scombo)) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Fault while modifying shared flags and domid.\n"); + prev_scombo = cmpxchg((u32 *)&sha->flags, scombo, new_scombo); /* Did the combined update work (did we see what we expected?). */ if ( likely(prev_scombo == scombo) ) @@ -306,17 +302,17 @@ static long gnttab_map_grant_ref( - struct gnttab_map_grant_ref *uop, unsigned int count) + GUEST_HANDLE(gnttab_map_grant_ref_t) uop, unsigned int count) { int i; struct gnttab_map_grant_ref op; for ( i = 0; i < count; i++ ) { - if ( unlikely(__copy_from_user(&op, &uop[i], sizeof(op))) ) + if ( unlikely(__copy_from_guest_offset(&op, uop, i, 1)) ) return -EFAULT; __gnttab_map_grant_ref(&op); - if ( unlikely(__copy_to_user(&uop[i], &op, sizeof(op))) ) + if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) ) return -EFAULT; } @@ -443,17 +439,17 @@ static long gnttab_unmap_grant_ref( - struct gnttab_unmap_grant_ref *uop, unsigned int count) + GUEST_HANDLE(gnttab_unmap_grant_ref_t) uop, unsigned int count) { int i; struct gnttab_unmap_grant_ref op; for ( i = 0; i < count; i++ ) { - if ( unlikely(__copy_from_user(&op, &uop[i], sizeof(op))) ) + if ( unlikely(__copy_from_guest_offset(&op, uop, i, 1)) ) goto fault; __gnttab_unmap_grant_ref(&op); - if ( unlikely(__copy_to_user(&uop[i], &op, sizeof(op))) ) + if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) ) goto fault; } @@ -467,7 +463,7 @@ static long gnttab_setup_table( - struct gnttab_setup_table *uop, unsigned int count) + GUEST_HANDLE(gnttab_setup_table_t) uop, unsigned int count) { struct gnttab_setup_table op; struct domain *d; @@ -478,7 +474,7 @@ if ( count != 1 ) return -EINVAL; - if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) { DPRINTK("Fault while reading gnttab_setup_table_t.\n"); return -EFAULT; @@ -517,14 +513,14 @@ for ( i = 0; i < op.nr_frames; i++ ) { gmfn = gnttab_shared_gmfn(d, d->grant_table, i); - (void)copy_to_user(&op.frame_list[i], &gmfn, sizeof(gmfn)); + (void)copy_to_guest_offset(op.frame_list, i, &gmfn, 1); } } put_domain(d); out: - if ( unlikely(copy_to_user(uop, &op, sizeof(op))) ) + if ( unlikely(copy_to_guest(uop, &op, 1)) ) return -EFAULT; return 0; @@ -572,15 +568,10 @@ /* Merge two 16-bit values into a 32-bit combined update. */ /* NB. Endianness! */ - prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; - - /* NB. prev_scombo is updated in place to seen value. */ - if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, - prev_scombo | GTF_transfer_committed)) ) - { - DPRINTK("Fault while modifying shared flags and domid.\n"); - goto fail; - } + scombo = ((u32)sdom << 16) | (u32)sflags; + + prev_scombo = cmpxchg((u32 *)&sha->flags, scombo, + scombo | GTF_transfer_committed); /* Did the combined update work (did we see what we expected?). */ if ( likely(prev_scombo == scombo) ) @@ -608,7 +599,7 @@ static long gnttab_transfer( - struct gnttab_transfer *uop, unsigned int count) + GUEST_HANDLE(gnttab_transfer_t) uop, unsigned int count) { struct domain *d = current->domain; struct domain *e; @@ -621,7 +612,7 @@ for ( i = 0; i < count; i++ ) { /* Read from caller address space. */ - if ( unlikely(__copy_from_user(&gop, &uop[i], sizeof(gop))) ) + if ( unlikely(__copy_from_guest_offset(&gop, uop, i, 1)) ) { DPRINTK("gnttab_transfer: error reading req %d/%d\n", i, count); return -EFAULT; @@ -708,7 +699,7 @@ gop.status = GNTST_okay; copyback: - if ( unlikely(__copy_from_user(&uop[i], &gop, sizeof(gop))) ) + if ( unlikely(__copy_to_guest_offset(uop, i, &gop, 1)) ) { DPRINTK("gnttab_transfer: error writing resp %d/%d\n", i, count); return -EFAULT; @@ -718,9 +709,9 @@ return 0; } -long +long do_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) + unsigned int cmd, GUEST_HANDLE(void) uop, unsigned int count) { long rc; struct domain *d = current->domain; @@ -736,27 +727,38 @@ switch ( cmd ) { case GNTTABOP_map_grant_ref: - if ( unlikely(!array_access_ok( - uop, count, sizeof(gnttab_map_grant_ref_t))) ) + { + GUEST_HANDLE(gnttab_map_grant_ref_t) map = + guest_handle_cast(uop, gnttab_map_grant_ref_t); + if ( unlikely(!guest_handle_okay(map, count)) ) goto out; - rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); + rc = gnttab_map_grant_ref(map, count); break; + } case GNTTABOP_unmap_grant_ref: - if ( unlikely(!array_access_ok( - uop, count, sizeof(gnttab_unmap_grant_ref_t))) ) + { + GUEST_HANDLE(gnttab_unmap_grant_ref_t) unmap = + guest_handle_cast(uop, gnttab_unmap_grant_ref_t); + if ( unlikely(!guest_handle_okay(unmap, count)) ) goto out; - rc = gnttab_unmap_grant_ref( - (gnttab_unmap_grant_ref_t *)uop, count); + rc = gnttab_unmap_grant_ref(unmap, count); break; + } case GNTTABOP_setup_table: - rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count); + { + rc = gnttab_setup_table( + guest_handle_cast(uop, gnttab_setup_table_t), count); break; + } case GNTTABOP_transfer: - if (unlikely(!array_access_ok( - uop, count, sizeof(gnttab_transfer_t)))) + { + GUEST_HANDLE(gnttab_transfer_t) transfer = + guest_handle_cast(uop, gnttab_transfer_t); + if ( unlikely(!guest_handle_okay(transfer, count)) ) goto out; - rc = gnttab_transfer(uop, count); + rc = gnttab_transfer(transfer, count); break; + } default: rc = -ENOSYS; break; diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/kernel.c --- a/xen/common/kernel.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/kernel.c Tue Mar 14 20:50:35 2006 @@ -11,6 +11,7 @@ #include <xen/compile.h> #include <xen/sched.h> #include <xen/shadow.h> +#include <xen/guest_access.h> #include <asm/current.h> #include <public/nmi.h> #include <public/version.h> @@ -116,7 +117,7 @@ * Simple hypercalls. */ -long do_xen_version(int cmd, void *arg) +long do_xen_version(int cmd, GUEST_HANDLE(void) arg) { switch ( cmd ) { @@ -129,7 +130,7 @@ { xen_extraversion_t extraversion; safe_strcpy(extraversion, XEN_EXTRAVERSION); - if ( copy_to_user(arg, extraversion, sizeof(extraversion)) ) + if ( copy_to_guest(arg, (char *)extraversion, sizeof(extraversion)) ) return -EFAULT; return 0; } @@ -141,7 +142,7 @@ safe_strcpy(info.compile_by, XEN_COMPILE_BY); safe_strcpy(info.compile_domain, XEN_COMPILE_DOMAIN); safe_strcpy(info.compile_date, XEN_COMPILE_DATE); - if ( copy_to_user(arg, &info, sizeof(info)) ) + if ( copy_to_guest(arg, &info, 1) ) return -EFAULT; return 0; } @@ -154,7 +155,7 @@ memset(info, 0, sizeof(info)); arch_get_xen_caps(info); - if ( copy_to_user(arg, info, sizeof(info)) ) + if ( copy_to_guest(arg, (char *)info, sizeof(info)) ) return -EFAULT; return 0; } @@ -164,7 +165,7 @@ xen_platform_parameters_t params = { .virt_start = HYPERVISOR_VIRT_START }; - if ( copy_to_user(arg, ¶ms, sizeof(params)) ) + if ( copy_to_guest(arg, ¶ms, 1) ) return -EFAULT; return 0; @@ -174,7 +175,7 @@ { xen_changeset_info_t chgset; safe_strcpy(chgset, XEN_CHANGESET); - if ( copy_to_user(arg, chgset, sizeof(chgset)) ) + if ( copy_to_guest(arg, (char *)chgset, sizeof(chgset)) ) return -EFAULT; return 0; } @@ -183,7 +184,7 @@ { xen_feature_info_t fi; - if ( copy_from_user(&fi, arg, sizeof(fi)) ) + if ( copy_from_guest(&fi, arg, 1) ) return -EFAULT; switch ( fi.submap_idx ) @@ -202,7 +203,7 @@ return -EINVAL; } - if ( copy_to_user(arg, &fi, sizeof(fi)) ) + if ( copy_to_guest(arg, &fi, 1) ) return -EFAULT; return 0; } @@ -212,31 +213,34 @@ return -ENOSYS; } -long do_nmi_op(unsigned int cmd, void *arg) +long do_nmi_op(unsigned int cmd, GUEST_HANDLE(void) arg) { struct vcpu *v = current; struct domain *d = current->domain; + struct xennmi_callback cb; long rc = 0; switch ( cmd ) { case XENNMI_register_callback: + rc = -EINVAL; if ( (d->domain_id != 0) || (v->vcpu_id != 0) ) - { - rc = -EINVAL; - } - else - { - v->nmi_addr = (unsigned long)arg; + break; + + rc = -EFAULT; + if ( copy_from_guest(&cb, arg, 1) ) + break; + + v->nmi_addr = cb.handler_address; #ifdef CONFIG_X86 - /* - * If no handler was registered we can 'lose the NMI edge'. - * Re-assert it now. - */ - if ( d->shared_info->arch.nmi_reason != 0 ) - set_bit(_VCPUF_nmi_pending, &v->vcpu_flags); + /* + * If no handler was registered we can 'lose the NMI edge'. Re-assert + * it now. + */ + if ( d->shared_info->arch.nmi_reason != 0 ) + set_bit(_VCPUF_nmi_pending, &v->vcpu_flags); #endif - } + rc = 0; break; case XENNMI_unregister_callback: v->nmi_addr = 0; diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/keyhandler.c --- a/xen/common/keyhandler.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/keyhandler.c Tue Mar 14 20:50:35 2006 @@ -14,6 +14,7 @@ #include <xen/rangeset.h> #include <asm/debugger.h> #include <asm/shadow.h> +#include <asm/div64.h> #define KEY_MAX 256 #define STR_MAX 64 @@ -168,6 +169,58 @@ read_unlock(&domlist_lock); } +static cpumask_t read_clocks_cpumask = CPU_MASK_NONE; +static s_time_t read_clocks_time[NR_CPUS]; + +static void read_clocks_slave(void *unused) +{ + unsigned int cpu = smp_processor_id(); + while ( !cpu_isset(cpu, read_clocks_cpumask) ) + cpu_relax(); + read_clocks_time[cpu] = NOW(); + cpu_clear(cpu, read_clocks_cpumask); +} + +static void read_clocks(unsigned char key) +{ + unsigned int cpu = smp_processor_id(), min_cpu, max_cpu; + u64 min, max, dif, difus; + static DEFINE_SPINLOCK(lock); + + spin_lock(&lock); + + smp_call_function(read_clocks_slave, NULL, 0, 0); + + local_irq_disable(); + read_clocks_cpumask = cpu_online_map; + read_clocks_time[cpu] = NOW(); + cpu_clear(cpu, read_clocks_cpumask); + local_irq_enable(); + + while ( !cpus_empty(read_clocks_cpumask) ) + cpu_relax(); + + min_cpu = max_cpu = cpu; + for_each_online_cpu ( cpu ) + { + if ( read_clocks_time[cpu] < read_clocks_time[min_cpu] ) + min_cpu = cpu; + if ( read_clocks_time[cpu] > read_clocks_time[max_cpu] ) + max_cpu = cpu; + } + + min = read_clocks_time[min_cpu]; + max = read_clocks_time[max_cpu]; + + spin_unlock(&lock); + + dif = difus = max - min; + do_div(difus, 1000); + printk("Min = %"PRIu64" ; Max = %"PRIu64" ; Diff = %"PRIu64 + " (%"PRIu64" microseconds)\n", + min, max, dif, difus); +} + extern void dump_runq(unsigned char key); #ifndef NDEBUG extern void audit_domains_key(unsigned char key); @@ -178,7 +231,7 @@ extern void perfc_reset(unsigned char key); #endif -void do_debug_key(unsigned char key, struct cpu_user_regs *regs) +static void do_debug_key(unsigned char key, struct cpu_user_regs *regs) { (void)debugger_trap_fatal(0xf001, regs); nop(); /* Prevent the compiler doing tail call @@ -187,7 +240,7 @@ } #ifndef NDEBUG -void debugtrace_key(unsigned char key) +static void debugtrace_key(unsigned char key) { debugtrace_send_to_console = !debugtrace_send_to_console; debugtrace_dump(); @@ -210,6 +263,9 @@ 'r', dump_runq, "dump run queues"); register_irq_keyhandler( 'R', halt_machine, "reboot machine"); + + register_keyhandler( + 't', read_clocks, "display multi-cpu clock info"); #ifndef NDEBUG register_keyhandler( diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/lib.c --- a/xen/common/lib.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/lib.c Tue Mar 14 20:50:35 2006 @@ -158,8 +158,7 @@ * leading zeros). */ u64 -__qdivrem(uq, vq, arq) - u64 uq, vq, *arq; +__qdivrem(u64 uq, u64 vq, u64 *arq) { union uu tmp; digit *u, *v, *q; @@ -382,8 +381,7 @@ * Divide two unsigned quads. */ u64 -__udivdi3(a, b) - u64 a, b; +__udivdi3(u64 a, u64 b) { return (__qdivrem(a, b, (u64 *)0)); diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/memory.c --- a/xen/common/memory.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/memory.c Tue Mar 14 20:50:35 2006 @@ -137,7 +137,43 @@ out: return i; } - + +int +guest_remove_page( + struct domain *d, + unsigned long gmfn) +{ + struct page_info *page; + unsigned long mfn; + + mfn = gmfn_to_mfn(d, gmfn); + if ( unlikely(!mfn_valid(mfn)) ) + { + DPRINTK("Domain %u page number %lx invalid\n", + d->domain_id, mfn); + return 0; + } + + page = mfn_to_page(mfn); + if ( unlikely(!get_page(page, d)) ) + { + DPRINTK("Bad page free for domain %u\n", d->domain_id); + return 0; + } + + if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) + put_page_and_type(page); + + if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) + put_page(page); + + guest_physmap_remove_page(d, gmfn, mfn); + + put_page(page); + + return 1; +} + static long decrease_reservation( struct domain *d, @@ -147,8 +183,7 @@ unsigned int flags, int *preempted) { - struct page_info *page; - unsigned long i, j, gmfn, mfn; + unsigned long i, j, gmfn; if ( !guest_handle_okay(extent_list, nr_extents) ) return 0; @@ -166,30 +201,8 @@ for ( j = 0; j < (1 << extent_order); j++ ) { - mfn = gmfn_to_mfn(d, gmfn + j); - if ( unlikely(!mfn_valid(mfn)) ) - { - DPRINTK("Domain %u page number %lx invalid\n", - d->domain_id, mfn); + if ( !guest_remove_page(d, gmfn + j) ) return i; - } - - page = mfn_to_page(mfn); - if ( unlikely(!get_page(page, d)) ) - { - DPRINTK("Bad page free for domain %u\n", d->domain_id); - return i; - } - - if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) - put_page_and_type(page); - - if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) - put_page(page); - - guest_physmap_remove_page(d, gmfn + j, mfn); - - put_page(page); } } diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/multicall.c --- a/xen/common/multicall.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/multicall.c Tue Mar 14 20:50:35 2006 @@ -10,12 +10,15 @@ #include <xen/sched.h> #include <xen/event.h> #include <xen/multicall.h> +#include <xen/guest_access.h> #include <asm/current.h> #include <asm/hardirq.h> struct mc_state mc_state[NR_CPUS]; -long do_multicall(struct multicall_entry *call_list, unsigned int nr_calls) +long +do_multicall( + GUEST_HANDLE(multicall_entry_t) call_list, unsigned int nr_calls) { struct mc_state *mcs = &mc_state[smp_processor_id()]; unsigned int i; @@ -26,22 +29,16 @@ return -EINVAL; } - if ( unlikely(!array_access_ok(call_list, nr_calls, sizeof(*call_list))) ) - { - DPRINTK("Bad memory range %p for %u*%u bytes.\n", - call_list, nr_calls, (unsigned int)sizeof(*call_list)); + if ( unlikely(!guest_handle_okay(call_list, nr_calls)) ) goto fault; - } for ( i = 0; i < nr_calls; i++ ) { - if ( unlikely(__copy_from_user(&mcs->call, &call_list[i], - sizeof(*call_list))) ) - { - DPRINTK("Error copying from user range %p for %u bytes.\n", - &call_list[i], (unsigned int)sizeof(*call_list)); + if ( hypercall_preempt_check() ) + goto preempted; + + if ( unlikely(__copy_from_guest(&mcs->call, call_list, 1)) ) goto fault; - } do_multicall_call(&mcs->call); @@ -53,38 +50,21 @@ */ struct multicall_entry corrupt; memset(&corrupt, 0xAA, sizeof(corrupt)); - (void)__copy_to_user(&call_list[i], &corrupt, sizeof(corrupt)); + (void)__copy_to_guest(call_list, &corrupt, 1); } #endif - if ( unlikely(__copy_to_user(&call_list[i].result, - &mcs->call.result, - sizeof(mcs->call.result))) ) + if ( unlikely(__copy_field_to_guest(call_list, &mcs->call, result)) ) + goto fault; + + if ( test_bit(_MCSF_call_preempted, &mcs->flags) ) { - DPRINTK("Error writing result back to multicall block.\n"); - goto fault; + /* Copy the sub-call continuation. */ + (void)__copy_to_guest(call_list, &mcs->call, 1); + goto preempted; } - if ( hypercall_preempt_check() ) - { - /* - * Copy the sub-call continuation if it was preempted. - * Otherwise skip over the sub-call entirely. - */ - if ( !test_bit(_MCSF_call_preempted, &mcs->flags) ) - i++; - else - (void)__copy_to_user(&call_list[i], &mcs->call, - sizeof(*call_list)); - - /* Only create a continuation if there is work left to be done. */ - if ( i < nr_calls ) - { - mcs->flags = 0; - return hypercall_create_continuation( - __HYPERVISOR_multicall, "pi", &call_list[i], nr_calls-i); - } - } + guest_handle_add_offset(call_list, 1); } mcs->flags = 0; @@ -93,6 +73,11 @@ fault: mcs->flags = 0; return -EFAULT; + + preempted: + mcs->flags = 0; + return hypercall_create_continuation( + __HYPERVISOR_multicall, "hi", call_list, nr_calls-i); } /* diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/perfc.c --- a/xen/common/perfc.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/perfc.c Tue Mar 14 20:50:35 2006 @@ -8,7 +8,6 @@ #include <xen/mm.h> #include <xen/guest_access.h> #include <public/dom0_ops.h> -#include <asm/uaccess.h> #undef PERFCOUNTER #undef PERFCOUNTER_CPU diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/sched_sedf.c Tue Mar 14 20:50:35 2006 @@ -56,6 +56,12 @@ #define EXTRA_QUANTUM (MICROSECS(500)) #define WEIGHT_PERIOD (MILLISECS(100)) #define WEIGHT_SAFETY (MILLISECS(5)) + +/* FIXME: need to validate that these are sane */ +#define PERIOD_MAX ULONG_MAX +#define PERIOD_MIN (MICROSECS(10)) +#define SLICE_MAX ULONG_MAX +#define SLICE_MIN (MICROSECS(5)) #define IMPLY(a, b) (!(a) || (b)) #define EQ(a, b) ((!!(a)) == (!!(b))) @@ -579,7 +585,7 @@ curinf->deadl_abs += DIV_UP(now - curinf->deadl_abs, curinf->period) * curinf->period; - ASSERT(curinf->deadl_abs > now); + ASSERT(curinf->deadl_abs >= now); /*give a fresh slice*/ curinf->cputime = 0; if (PERIOD_BEGIN(curinf) > now) @@ -1609,7 +1615,10 @@ * Sanity checking: note that disabling extra weight requires * that we set a non-zero slice. */ - if ( (cmd->u.sedf.slice == 0) || + if ( (cmd->u.sedf.period > PERIOD_MAX) || + (cmd->u.sedf.period < PERIOD_MIN) || + (cmd->u.sedf.slice > SLICE_MAX) || + (cmd->u.sedf.slice < SLICE_MIN) || (cmd->u.sedf.slice > cmd->u.sedf.period) ) return -EINVAL; EDOM_INFO(v)->weight = 0; diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/schedule.c --- a/xen/common/schedule.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/schedule.c Tue Mar 14 20:50:35 2006 @@ -27,6 +27,7 @@ #include <xen/softirq.h> #include <xen/trace.h> #include <xen/mm.h> +#include <xen/guest_access.h> #include <public/sched.h> #include <public/sched_ctl.h> @@ -42,6 +43,7 @@ static void s_timer_fn(void *unused); static void t_timer_fn(void *unused); static void dom_timer_fn(void *data); +static void poll_timer_fn(void *data); /* This is global for now so that private implementations can reach it */ struct schedule_data schedule_data[NR_CPUS]; @@ -164,8 +166,9 @@ void sched_add_domain(struct vcpu *v) { - /* Initialise the per-domain timer. */ + /* Initialise the per-domain timers. */ init_timer(&v->timer, dom_timer_fn, v, v->processor); + init_timer(&v->poll_timer, poll_timer_fn, v, v->processor); if ( is_idle_vcpu(v) ) { @@ -181,6 +184,8 @@ void sched_rem_domain(struct vcpu *v) { kill_timer(&v->timer); + kill_timer(&v->poll_timer); + SCHED_OP(rem_task, v); TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id); } @@ -270,6 +275,55 @@ return 0; } +static long do_poll(struct sched_poll *sched_poll) +{ + struct vcpu *v = current; + evtchn_port_t port; + long rc = 0; + unsigned int i; + + /* Fairly arbitrary limit. */ + if ( sched_poll->nr_ports > 128 ) + return -EINVAL; + + if ( !guest_handle_okay(sched_poll->ports, sched_poll->nr_ports) ) + return -EFAULT; + + /* Ensure that upcalls are disabled: tested by evtchn_set_pending(). */ + if ( !v->vcpu_info->evtchn_upcall_mask ) + return -EINVAL; + + set_bit(_VCPUF_blocked, &v->vcpu_flags); + + /* Check for events /after/ blocking: avoids wakeup waiting race. */ + for ( i = 0; i < sched_poll->nr_ports; i++ ) + { + rc = -EFAULT; + if ( __copy_from_guest_offset(&port, sched_poll->ports, i, 1) ) + goto out; + + rc = -EINVAL; + if ( port >= MAX_EVTCHNS ) + goto out; + + rc = 0; + if ( evtchn_pending(v->domain, port) ) + goto out; + } + + if ( sched_poll->timeout != 0 ) + set_timer(&v->poll_timer, sched_poll->timeout); + + TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id); + __enter_scheduler(); + + stop_timer(&v->poll_timer); + + out: + clear_bit(_VCPUF_blocked, &v->vcpu_flags); + return rc; +} + /* Voluntarily yield the processor for this allocation. */ static long do_yield(void) { @@ -301,6 +355,61 @@ TRACE_3D(TRC_SCHED_SHUTDOWN, current->domain->domain_id, current->vcpu_id, arg); domain_shutdown(current->domain, (u8)arg); + break; + } + + default: + ret = -ENOSYS; + } + + return ret; +} + +long do_sched_op_new(int cmd, GUEST_HANDLE(void) arg) +{ + long ret = 0; + + switch ( cmd ) + { + case SCHEDOP_yield: + { + ret = do_yield(); + break; + } + + case SCHEDOP_block: + { + ret = do_block(); + break; + } + + case SCHEDOP_shutdown: + { + struct sched_shutdown sched_shutdown; + + ret = -EFAULT; + if ( copy_from_guest(&sched_shutdown, arg, 1) ) + break; + + ret = 0; + TRACE_3D(TRC_SCHED_SHUTDOWN, + current->domain->domain_id, current->vcpu_id, + sched_shutdown.reason); + domain_shutdown(current->domain, (u8)sched_shutdown.reason); + + break; + } + + case SCHEDOP_poll: + { + struct sched_poll sched_poll; + + ret = -EFAULT; + if ( copy_from_guest(&sched_poll, arg, 1) ) + break; + + ret = do_poll(&sched_poll); + break; } @@ -518,6 +627,13 @@ send_guest_virq(v, VIRQ_TIMER); } +/* SCHEDOP_poll timeout callback. */ +static void poll_timer_fn(void *data) +{ + struct vcpu *v = data; + vcpu_unblock(v); +} + /* Initialise the data structures. */ void __init scheduler_init(void) { diff -r dc50cdd66c5c -r 0ed4a312765b xen/common/trace.c --- a/xen/common/trace.c Tue Mar 14 20:10:21 2006 +++ b/xen/common/trace.c Tue Mar 14 20:50:35 2006 @@ -83,8 +83,9 @@ /* Share pages so that xentrace can map them. */ for ( i = 0; i < nr_pages; i++ ) - SHARE_PFN_WITH_DOMAIN(virt_to_page(rawbuf + i * PAGE_SIZE), dom0); - + share_xen_page_with_privileged_guests( + virt_to_page(rawbuf) + i, XENSHARE_writable); + for_each_online_cpu ( i ) { buf = t_bufs[i] = (struct t_buf *)&rawbuf[i*opt_tbuf_size*PAGE_SIZE]; diff -r dc50cdd66c5c -r 0ed4a312765b xen/drivers/char/console.c --- a/xen/drivers/char/console.c Tue Mar 14 20:10:21 2006 +++ b/xen/drivers/char/console.c Tue Mar 14 20:50:35 2006 @@ -22,7 +22,6 @@ #include <xen/delay.h> #include <xen/guest_access.h> #include <asm/current.h> -#include <asm/uaccess.h> #include <asm/debugger.h> #include <asm/io.h> @@ -320,7 +319,7 @@ __serial_rx(c, regs); } -long guest_console_write(char *buffer, int count) +static long guest_console_write(GUEST_HANDLE(char) buffer, int count) { char kbuf[128], *kptr; int kcount; @@ -336,11 +335,11 @@ if ( hypercall_preempt_check() ) return hypercall_create_continuation( - __HYPERVISOR_console_io, "iip", + __HYPERVISOR_console_io, "iih", CONSOLEIO_write, count, buffer); kcount = min_t(int, count, sizeof(kbuf)-1); - if ( copy_from_user(kbuf, buffer, kcount) ) + if ( copy_from_guest((char *)kbuf, buffer, kcount) ) return -EFAULT; kbuf[kcount] = '\0'; @@ -349,14 +348,14 @@ for ( kptr = kbuf; *kptr != '\0'; kptr++ ) putchar_console(*kptr); - buffer += kcount; - count -= kcount; + guest_handle_add_offset(buffer, kcount); + count -= kcount; } return 0; } -long do_console_io(int cmd, int count, char *buffer) +long do_console_io(int cmd, int count, GUEST_HANDLE(char) buffer) { long rc; unsigned int idx, len; @@ -382,7 +381,7 @@ len = SERIAL_RX_SIZE - idx; if ( (rc + len) > count ) len = count - rc; - if ( copy_to_user(&buffer[rc], &serial_rx_ring[idx], len) ) + if ( copy_to_guest_offset(buffer, rc, &serial_rx_ring[idx], len) ) { rc = -EFAULT; break; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-ia64/guest_access.h --- a/xen/include/asm-ia64/guest_access.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-ia64/guest_access.h Tue Mar 14 20:50:35 2006 @@ -21,6 +21,8 @@ (GUEST_HANDLE(type)) { _x }; \ }) +#define guest_handle_from_ptr(ptr, type) ((GUEST_HANDLE(type)) { (type *)ptr }) + /* * Copy an array of objects to guest context via a guest handle, * specifying an offset into the guest array. @@ -39,6 +41,20 @@ const typeof(ptr) _x = (hnd).p; \ const typeof(ptr) _y = (ptr); \ copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ +}) + +/* Copy sub-field of a structure to guest context via a guest handle. */ +#define copy_field_to_guest(hnd, ptr, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + copy_to_user(_x, _y, sizeof(*_x)); \ +}) + +/* Copy sub-field of a structure from guest context via a guest handle. */ +#define copy_field_from_guest(ptr, hnd, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + copy_from_user(_y, _x, sizeof(*_x)); \ }) /* @@ -60,4 +76,16 @@ __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ }) +#define __copy_field_to_guest(hnd, ptr, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + __copy_to_user(_x, _y, sizeof(*_x)); \ +}) + +#define __copy_field_from_guest(ptr, hnd, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + __copy_from_user(_y, _x, sizeof(*_x)); \ +}) + #endif /* __ASM_IA64_GUEST_ACCESS_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-ia64/hypercall.h --- a/xen/include/asm-ia64/hypercall.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-ia64/hypercall.h Tue Mar 14 20:50:35 2006 @@ -16,4 +16,6 @@ u64 *pdone, u64 foreigndom); +#define arch_do_vcpu_op(cmd, vcpu, arg) (-ENOSYS) + #endif /* __ASM_IA64_HYPERCALL_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-ia64/mm.h Tue Mar 14 20:50:35 2006 @@ -131,7 +131,8 @@ #define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d)) /* Dummy now */ -#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) do { } while (0) +#define share_xen_page_with_guest(p, d, r) do { } while (0) +#define share_xen_page_with_privileged_guests(p, r) do { } while (0) extern struct page_info *frame_table; extern unsigned long frame_table_size; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-ia64/xensystem.h --- a/xen/include/asm-ia64/xensystem.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-ia64/xensystem.h Tue Mar 14 20:50:35 2006 @@ -75,35 +75,7 @@ // FIXME SMP... see system.h, does this need to be different? #define switch_to(prev,next,last) __switch_to(prev, next, last) -#define __cmpxchg_user(ptr, new, old, _size) \ -({ \ - register long __gu_r8 asm ("r8"); \ - asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ - asm volatile ("mov %1=r0;;\n" \ - "[1:]\tcmpxchg"_size".acq %0=[%2],%3,ar.ccv\n" \ - "\t.xdata4 \"__ex_table\", 1b-., 1f-.\n" \ - "[1:]" \ - : "=r"(old), "=r"(__gu_r8) : \ - "r"(ptr), "r"(new) : "memory"); \ - __gu_r8; \ -}) - - -// NOTE: Xen defines args as pointer,old,new whereas ia64 uses pointer,new,old -// so reverse them here -#define cmpxchg_user(_p,_o,_n) \ -({ \ - register long _rc; \ - ia64_mf(); \ - switch ( sizeof(*(_p)) ) { \ - case 1: _rc = __cmpxchg_user(_p,_n,_o,"1"); break; \ - case 2: _rc = __cmpxchg_user(_p,_n,_o,"2"); break; \ - case 4: _rc = __cmpxchg_user(_p,_n,_o,"4"); break; \ - case 8: _rc = __cmpxchg_user(_p,_n,_o,"8"); break; \ - } \ - ia64_mf(); \ - _rc; \ -}) +#define local_irq_is_enabled() (!irqs_disabled()) #endif // __ASSEMBLY__ #endif // _ASM_IA64_XENSYSTEM_H diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/debugger.h --- a/xen/include/asm-x86/debugger.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/debugger.h Tue Mar 14 20:50:35 2006 @@ -88,7 +88,7 @@ { struct vcpu *v = current; - if ( KERNEL_MODE(v, regs) && + if ( guest_kernel_mode(v, regs) && test_bit(_DOMF_debugging, &v->domain->domain_flags) && ((vector == TRAP_int3) || (vector == TRAP_debug)) ) { diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/grant_table.h --- a/xen/include/asm-x86/grant_table.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/grant_table.h Tue Mar 14 20:50:35 2006 @@ -23,11 +23,9 @@ #define gnttab_create_shared_page(d, t, i) \ do { \ - SHARE_PFN_WITH_DOMAIN( \ - virt_to_page((char *)(t)->shared + ((i) * PAGE_SIZE)), (d)); \ - set_gpfn_from_mfn( \ - (virt_to_maddr((t)->shared) >> PAGE_SHIFT) + (i), \ - INVALID_M2P_ENTRY); \ + share_xen_page_with_guest( \ + virt_to_page((char *)(t)->shared + ((i) * PAGE_SIZE)), \ + (d), XENSHARE_writable); \ } while ( 0 ) #define gnttab_shared_mfn(d, t, i) \ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/guest_access.h --- a/xen/include/asm-x86/guest_access.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/guest_access.h Tue Mar 14 20:50:35 2006 @@ -41,6 +41,20 @@ copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ }) +/* Copy sub-field of a structure to guest context via a guest handle. */ +#define copy_field_to_guest(hnd, ptr, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + copy_to_user(_x, _y, sizeof(*_x)); \ +}) + +/* Copy sub-field of a structure from guest context via a guest handle. */ +#define copy_field_from_guest(ptr, hnd, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + copy_from_user(_y, _x, sizeof(*_x)); \ +}) + /* * Pre-validate a guest handle. * Allows use of faster __copy_* functions. @@ -60,4 +74,16 @@ __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ }) +#define __copy_field_to_guest(hnd, ptr, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + __copy_to_user(_x, _y, sizeof(*_x)); \ +}) + +#define __copy_field_from_guest(ptr, hnd, field) ({ \ + const typeof(&(ptr)->field) _x = &(hnd).p->field; \ + const typeof(&(ptr)->field) _y = &(ptr)->field; \ + __copy_from_user(_y, _x, sizeof(*_x)); \ +}) + #endif /* __ASM_X86_GUEST_ACCESS_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hvm/io.h --- a/xen/include/asm-x86/hvm/io.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hvm/io.h Tue Mar 14 20:50:35 2006 @@ -151,9 +151,10 @@ extern void hvm_wait_io(void); extern void hvm_safe_block(void); extern void hvm_io_assist(struct vcpu *v); -extern void pic_irq_request(int *interrupt_request, int level); +extern void pic_irq_request(void *data, int level); extern void hvm_pic_assist(struct vcpu *v); extern int cpu_get_interrupt(struct vcpu *v, int *type); +extern int cpu_has_pending_irq(struct vcpu *v); // XXX - think about this, maybe use bit 30 of the mfn to signify an MMIO frame. #define mmio_space(gpa) (!VALID_MFN(get_mfn_from_gpfn((gpa) >> PAGE_SHIFT))) diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hvm/support.h Tue Mar 14 20:50:35 2006 @@ -32,7 +32,7 @@ #define HVM_DEBUG 0 #endif -#define HVM_DOMAIN(v) ((v)->arch.guest_context.flags & VGCF_HVM_GUEST) +#define hvm_guest(v) ((v)->arch.guest_context.flags & VGCF_HVM_GUEST) static inline shared_iopage_t *get_sp(struct domain *d) { @@ -99,7 +99,7 @@ #define PC_DEBUG_PORT 0x80 -#define VMX_INVALID_ERROR_CODE -1 +#define VMX_DELIVER_NO_ERROR_CODE -1 /* * This works for both 32bit & 64bit eflags filteration @@ -122,9 +122,9 @@ extern unsigned int opt_hvm_debug_level; #define HVM_DBG_LOG(level, _f, _a...) \ - if ((level) & opt_hvm_debug_level) \ - printk("[HVM:%d.%d] " _f "\n", \ - current->domain->domain_id, current->vcpu_id, ## _a) + if ( (level) & opt_hvm_debug_level ) \ + printk("[HVM:%d.%d] <%s> " _f "\n", \ + current->domain->domain_id, current->vcpu_id, __func__, ## _a) #else #define HVM_DBG_LOG(level, _f, _a...) #endif diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hvm/vlapic.h --- a/xen/include/asm-x86/hvm/vlapic.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hvm/vlapic.h Tue Mar 14 20:50:35 2006 @@ -101,15 +101,14 @@ #define VLAPIC_LVT_BIT_TRIG (1 << 15) #define VLAPIC_LVT_TIMERMODE (1 << 17) -#define VLAPIC_DELIV_MODE_FIXED 0x0 -#define VLAPIC_DELIV_MODE_LPRI 0x1 -#define VLAPIC_DELIV_MODE_SMI 0x2 -#define VLAPIC_DELIV_MODE_RESERVED 0x3 -#define VLAPIC_DELIV_MODE_NMI 0x4 -#define VLAPIC_DELIV_MODE_INIT 0x5 -#define VLAPIC_DELIV_MODE_STARTUP 0x6 -#define VLAPIC_DELIV_MODE_EXT 0x7 - +#define VLAPIC_DELIV_MODE_FIXED 0x0 +#define VLAPIC_DELIV_MODE_LPRI 0x1 +#define VLAPIC_DELIV_MODE_SMI 0x2 +#define VLAPIC_DELIV_MODE_RESERVED 0x3 +#define VLAPIC_DELIV_MODE_NMI 0x4 +#define VLAPIC_DELIV_MODE_INIT 0x5 +#define VLAPIC_DELIV_MODE_STARTUP 0x6 +#define VLAPIC_DELIV_MODE_EXT 0x7 #define VLAPIC_NO_SHORTHAND 0x0 @@ -118,41 +117,29 @@ #define VLAPIC_SHORTHAND_EXCLUDE_SELF 0x3 #define vlapic_lvt_timer_enabled(vlapic) \ - (!(vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_BIT_MASK)) - -#define vlapic_lvt_vector(vlapic, type) \ - (vlapic->lvt[type] & VLAPIC_LVT_BIT_VECTOR) - -#define vlapic_lvt_dm(value) ((value >> 8) && 7) -#define vlapic_lvt_timer_period(vlapic) \ - (vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_TIMERMODE) - -#define vlapic_isr_status(vlapic,vector) \ - test_bit(vector, &vlapic->isr[0]) - -#define vlapic_irr_status(vlapic,vector) \ - test_bit(vector, &vlapic->irr[0]) - -#define vlapic_set_isr(vlapic,vector) \ - test_and_set_bit(vector, &vlapic->isr[0]) - -#define vlapic_set_irr(vlapic,vector) \ - test_and_set_bit(vector, &vlapic->irr[0]) - -#define vlapic_clear_irr(vlapic,vector) \ - clear_bit(vector, &vlapic->irr[0]) -#define vlapic_clear_isr(vlapic,vector) \ - clear_bit(vector, &vlapic->isr[0]) - -#define vlapic_enabled(vlapic) \ - (!(vlapic->status & \ - (VLAPIC_GLOB_DISABLE_MASK | VLAPIC_SOFTWARE_DISABLE_MASK))) - -#define vlapic_global_enabled(vlapic) \ - !(test_bit(_VLAPIC_GLOB_DISABLE, &(vlapic)->status)) - -#define VLAPIC_IRR(t) ((t)->irr[0]) -#define VLAPIC_ID(t) ((t)->id) + (!((vlapic)->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_BIT_MASK)) + +#define vlapic_lvt_vector(vlapic, type) \ + ((vlapic)->lvt[(type)] & VLAPIC_LVT_BIT_VECTOR) + +#define vlapic_lvt_dm(value) (((value) >> 8) && 7) +#define vlapic_lvt_timer_period(vlapic) \ + ((vlapic)->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_TIMERMODE) + +#define _VLAPIC_GLOB_DISABLE 0x0 +#define VLAPIC_GLOB_DISABLE_MASK 0x1 +#define VLAPIC_SOFTWARE_DISABLE_MASK 0x2 +#define _VLAPIC_BSP_ACCEPT_PIC 0x3 + +#define vlapic_enabled(vlapic) \ + (!((vlapic)->status & \ + (VLAPIC_GLOB_DISABLE_MASK | VLAPIC_SOFTWARE_DISABLE_MASK))) + +#define vlapic_global_enabled(vlapic) \ + (!(test_bit(_VLAPIC_GLOB_DISABLE, &(vlapic)->status))) + +#define VLAPIC_IRR(t) ((t)->irr[0]) +#define VLAPIC_ID(t) ((t)->id) typedef struct direct_intr_info { int deliver_mode; @@ -163,10 +150,6 @@ { //FIXME check what would be 64 bit on EM64T uint32_t version; -#define _VLAPIC_GLOB_DISABLE 0x0 -#define VLAPIC_GLOB_DISABLE_MASK 0x1 -#define VLAPIC_SOFTWARE_DISABLE_MASK 0x2 -#define _VLAPIC_BSP_ACCEPT_PIC 0x3 uint32_t status; uint32_t id; uint32_t vcpu_id; @@ -180,10 +163,10 @@ uint32_t dest_format; uint32_t spurious_vec; uint32_t lvt[6]; - uint32_t timer_initial; - uint32_t timer_current; + uint32_t timer_initial_count; + uint32_t timer_current_count; uint32_t timer_divconf; - uint32_t timer_divide_counter; + uint32_t timer_divide_count; struct timer vlapic_timer; int intr_pending_count[MAX_VECTOR]; s_time_t timer_current_update; @@ -203,16 +186,16 @@ int ret; ret = test_and_set_bit(vec, &t->irr[0]); - if (trig) - test_and_set_bit(vec, &t->tmr[0]); + if ( trig ) + set_bit(vec, &t->tmr[0]); /* We may need to wake up target vcpu, besides set pending bit here */ return ret; } -static inline int vlapic_timer_active(struct vlapic *vlapic) -{ - return active_timer(&(vlapic->vlapic_timer)); +static inline int vlapic_timer_active(struct vlapic *vlapic) +{ + return active_timer(&vlapic->vlapic_timer); } int vlapic_find_highest_irr(struct vlapic *vlapic); @@ -226,6 +209,7 @@ void vlapic_post_injection(struct vcpu* v, int vector, int deliver_mode); +int cpu_has_apic_interrupt(struct vcpu* v); int cpu_get_apic_interrupt(struct vcpu* v, int *mode); extern uint32_t vlapic_update_ppr(struct vlapic *vlapic); @@ -244,6 +228,7 @@ uint32_t bitmap); s_time_t get_apictime_scheduled(struct vcpu *v); + int hvm_apic_support(struct domain *d); #endif /* __ASM_X86_HVM_VLAPIC_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Tue Mar 14 20:50:35 2006 @@ -69,6 +69,7 @@ struct arch_vmx_struct { struct vmcs_struct *vmcs; /* VMCS pointer in virtual. */ unsigned int launch_cpu; /* VMCS is valid on this CPU. */ + u32 exec_control; /* cache of cpu execution control */ unsigned long flags; /* VMCS flags */ unsigned long cpu_cr0; /* copy of guest CR0 */ unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue Mar 14 20:50:35 2006 @@ -119,7 +119,15 @@ #define EXIT_REASON_RDPMC 15 #define EXIT_REASON_RDTSC 16 #define EXIT_REASON_VMCALL 18 - +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 #define EXIT_REASON_CR_ACCESS 28 #define EXIT_REASON_DR_ACCESS 29 #define EXIT_REASON_IO_INSTRUCTION 30 @@ -425,7 +433,7 @@ /* Reflect it back into the guest */ intr_fields = (INTR_INFO_VALID_MASK | type | trap); - if (error_code != VMX_INVALID_ERROR_CODE) { + if (error_code != VMX_DELIVER_NO_ERROR_CODE) { __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); intr_fields |= INTR_INFO_DELIEVER_CODE_MASK; } @@ -455,7 +463,7 @@ if (vector & INTR_INFO_DELIEVER_CODE_MASK) __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code); else - error_code = VMX_INVALID_ERROR_CODE; + error_code = VMX_DELIVER_NO_ERROR_CODE; vector &= 0xff; #ifndef NDEBUG diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hvm/vpic.h --- a/xen/include/asm-x86/hvm/vpic.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hvm/vpic.h Tue Mar 14 20:50:35 2006 @@ -55,7 +55,7 @@ /* 0 is master pic, 1 is slave pic */ /* XXX: better separation between the two pics */ PicState pics[2]; - void (*irq_request)(int *opaque, int level); + void (*irq_request)(void *opaque, int level); void *irq_request_opaque; /* IOAPIC callback support */ void (*alt_irq_func)(void *opaque, int irq_num, int level); @@ -66,10 +66,10 @@ void pic_set_irq(struct hvm_virpic *s, int irq, int level); void pic_set_irq_new(void *opaque, int irq, int level); void pic_init(struct hvm_virpic *s, - void (*irq_request)(), + void (*irq_request)(void *, int), void *irq_request_opaque); void pic_set_alt_irq_func(struct hvm_virpic *s, - void(*alt_irq_func)(), + void (*alt_irq_func)(void *, int, int), void *alt_irq_opaque); int pic_read_irq(struct hvm_virpic *s); void pic_update_irq(struct hvm_virpic *s); diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hvm/vpit.h --- a/xen/include/asm-x86/hvm/vpit.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hvm/vpit.h Tue Mar 14 20:50:35 2006 @@ -38,8 +38,8 @@ struct hvm_virpit { /* for simulation of counter 0 in mode 2 */ u64 period_cycles; /* pit frequency in cpu cycles */ - u64 inject_point; /* the time inject virt intr */ u64 shift; /* save the value of offset - drift */ + s_time_t inject_point; /* the time inject virt intr */ s_time_t scheduled; /* scheduled timer interrupt */ struct timer pit_timer; /* periodic timer for mode 2*/ unsigned int channel; /* the pit channel, counter 0~2 */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/hypercall.h --- a/xen/include/asm-x86/hypercall.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/hypercall.h Tue Mar 14 20:50:35 2006 @@ -5,22 +5,22 @@ #ifndef __ASM_X86_HYPERCALL_H__ #define __ASM_X86_HYPERCALL_H__ -struct trap_info; +#include <public/physdev.h> + extern long do_set_trap_table( - struct trap_info *traps); + GUEST_HANDLE(trap_info_t) traps); -struct mmu_update; extern int do_mmu_update( - struct mmu_update *ureqs, + GUEST_HANDLE(mmu_update_t) ureqs, unsigned int count, - unsigned int *pdone, + GUEST_HANDLE(uint) pdone, unsigned int foreigndom); extern long do_set_gdt( - unsigned long *frame_list, + GUEST_HANDLE(ulong) frame_list, unsigned int entries); extern long @@ -52,10 +52,9 @@ u64 val64, unsigned long flags); -struct physdev_op; extern long do_physdev_op( - struct physdev_op *uop); + GUEST_HANDLE(physdev_op_t) uop); extern int do_update_va_mapping_otherdomain( @@ -66,14 +65,19 @@ extern int do_mmuext_op( - struct mmuext_op *uops, + GUEST_HANDLE(mmuext_op_t) uops, unsigned int count, - unsigned int *pdone, + GUEST_HANDLE(uint) pdone, unsigned int foreigndom); extern unsigned long do_iret( void); + +struct vcpu; +extern long +arch_do_vcpu_op( + int cmd, struct vcpu *v, GUEST_HANDLE(void) arg); #ifdef __x86_64__ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/mm.h Tue Mar 14 20:50:35 2006 @@ -98,8 +98,17 @@ /* 16-bit count of uses of this frame as its current type. */ #define PGT_count_mask ((1U<<16)-1) +#ifdef __x86_64__ +#define PGT_high_mfn_shift 52 +#define PGT_high_mfn_mask (0x7ffUL << PGT_high_mfn_shift) +#define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask) +#define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift) +#else /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */ #define PGT_mfn_mask ((1U<<23)-1) + /* NX for PAE xen is not supported yet */ +#define PGT_high_mfn_nx (1ULL << 63) +#endif #define PGT_score_shift 23 #define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift) @@ -138,21 +147,12 @@ #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d)) -#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \ - do { \ - page_set_owner((_pfn), (_dom)); \ - /* The incremented type count is intended to pin to 'writable'. */ \ - (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \ - wmb(); /* install valid domain ptr before updating refcnt. */ \ - spin_lock(&(_dom)->page_alloc_lock); \ - /* _dom holds an allocation reference */ \ - ASSERT((_pfn)->count_info == 0); \ - (_pfn)->count_info |= PGC_allocated | 1; \ - if ( unlikely((_dom)->xenheap_pages++ == 0) ) \ - get_knownalive_domain(_dom); \ - list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list); \ - spin_unlock(&(_dom)->page_alloc_lock); \ - } while ( 0 ) +#define XENSHARE_writable 0 +#define XENSHARE_readonly 1 +extern void share_xen_page_with_guest( + struct page_info *page, struct domain *d, int readonly); +extern void share_xen_page_with_privileged_guests( + struct page_info *page, int readonly); extern struct page_info *frame_table; extern unsigned long max_page; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/page.h Tue Mar 14 20:50:35 2006 @@ -190,6 +190,10 @@ /* Shorthand versions of the above functions. */ #define __pa(x) (virt_to_maddr(x)) #define __va(x) (maddr_to_virt(x)) + +/* Convert between Xen-heap virtual addresses and machine frame numbers. */ +#define virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT) +#define mfn_to_virt(mfn) (maddr_to_virt(mfn << PAGE_SHIFT)) /* Convert between machine frame numbers and page-info structures. */ #define mfn_to_page(mfn) (frame_table + (mfn)) diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/regs.h --- a/xen/include/asm-x86/regs.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/regs.h Tue Mar 14 20:50:35 2006 @@ -31,15 +31,17 @@ EF_ID = 0x00200000, /* id */ }; -#define GUEST_MODE(r) \ +#define guest_mode(r) \ ({ \ unsigned long diff = (char *)guest_cpu_user_regs() - (char *)(r); \ /* Frame pointer must point into current CPU stack. */ \ ASSERT(diff < STACK_SIZE); \ - /* If a guest frame, it must not be a ring 0 frame (unless HVM guest). */ \ - ASSERT((diff != 0) || VM86_MODE(r) || !RING_0(r) || HVM_DOMAIN(current)); \ - /* If not a guest frame, it must be a ring 0 frame. */ \ - ASSERT((diff == 0) || (!VM86_MODE(r) && RING_0(r))); \ + /* If a guest frame, it must be have guest privs (unless HVM guest). */ \ + /* We permit CS==0 which can come from an uninitialised trap entry. */ \ + ASSERT((diff != 0) || vm86_mode(r) || ((r->cs&3) >= GUEST_KERNEL_RPL) || \ + (r->cs == 0) || hvm_guest(current)); \ + /* If not a guest frame, it must be a hypervisor frame. */ \ + ASSERT((diff == 0) || (!vm86_mode(r) && (r->cs == __HYPERVISOR_CS))); \ /* Return TRUE if it's a guest frame. */ \ (diff == 0); \ }) diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/shadow.h Tue Mar 14 20:50:35 2006 @@ -1646,7 +1646,7 @@ || (va >= HYPERVISOR_VIRT_END) #endif ) && - KERNEL_MODE(v, regs) ) + guest_kernel_mode(v, regs) ) return 1; return 0; @@ -1700,7 +1700,7 @@ struct domain *d = v->domain; int paging_enabled; - if ( HVM_DOMAIN(v) ) + if ( hvm_guest(v) ) paging_enabled = hvm_paging_enabled(v); else // HACK ALERT: there's currently no easy way to figure out if a domU diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/shadow_64.h Tue Mar 14 20:50:35 2006 @@ -51,8 +51,11 @@ #define READ_FAULT 0 #define WRITE_FAULT 1 -#define ERROR_W 2 +#define ERROR_P 1 +#define ERROR_W 2 #define ERROR_U 4 +#define ERROR_I (1 << 4) + #define X86_64_SHADOW_DEBUG 0 #if X86_64_SHADOW_DEBUG diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/x86_32/regs.h --- a/xen/include/asm-x86/x86_32/regs.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/x86_32/regs.h Tue Mar 14 20:50:35 2006 @@ -4,16 +4,17 @@ #include <xen/types.h> #include <public/xen.h> -#define VM86_MODE(_r) ((_r)->eflags & EF_VM) -#define RING_0(_r) (((_r)->cs & 3) == 0) -#define RING_1(_r) (((_r)->cs & 3) == 1) -#define RING_2(_r) (((_r)->cs & 3) == 2) -#define RING_3(_r) (((_r)->cs & 3) == 3) +#define vm86_mode(r) ((r)->eflags & EF_VM) +#define ring_0(r) (((r)->cs & 3) == 0) +#define ring_1(r) (((r)->cs & 3) == 1) +#define ring_2(r) (((r)->cs & 3) == 2) +#define ring_3(r) (((r)->cs & 3) == 3) -#define KERNEL_MODE(_e, _r) (!VM86_MODE(_r) && RING_1(_r)) +#define guest_kernel_mode(v, r) \ + (!vm86_mode(r) && ring_1(r)) -#define PERMIT_SOFTINT(_dpl, _e, _r) \ - ((_dpl) >= (VM86_MODE(_r) ? 3 : ((_r)->cs & 3))) +#define permit_softint(dpl, v, r) \ + ((dpl) >= (vm86_mode(r) ? 3 : ((r)->cs & 3))) /* Number of bytes of on-stack execution state to be context-switched. */ #define CTXT_SWITCH_STACK_BYTES (sizeof(struct cpu_user_regs)) diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/asm-x86/x86_64/regs.h --- a/xen/include/asm-x86/x86_64/regs.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/asm-x86/x86_64/regs.h Tue Mar 14 20:50:35 2006 @@ -4,16 +4,17 @@ #include <xen/types.h> #include <public/xen.h> -#define VM86_MODE(_r) (0) /* No VM86 support in long mode. */ -#define RING_0(_r) (((_r)->cs & 3) == 0) -#define RING_1(_r) (((_r)->cs & 3) == 1) -#define RING_2(_r) (((_r)->cs & 3) == 2) -#define RING_3(_r) (((_r)->cs & 3) == 3) +#define vm86_mode(r) (0) /* No VM86 support in long mode. */ +#define ring_0(r) (((r)->cs & 3) == 0) +#define ring_1(r) (((r)->cs & 3) == 1) +#define ring_2(r) (((r)->cs & 3) == 2) +#define ring_3(r) (((r)->cs & 3) == 3) -#define KERNEL_MODE(_e, _r) ((_e)->arch.flags & TF_kernel_mode) +#define guest_kernel_mode(v, r) \ + (ring_3(r) && ((v)->arch.flags & TF_kernel_mode)) -#define PERMIT_SOFTINT(_dpl, _e, _r) \ - ((_dpl) >= (KERNEL_MODE(_e, _r) ? 1 : 3)) +#define permit_softint(dpl, v, r) \ + ((dpl) >= (guest_kernel_mode(v, r) ? 1 : 3)) /* Number of bytes of on-stack execution state to be context-switched. */ /* NB. Segment registers and bases are not saved/restored on x86/64 stack. */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/acm.h --- a/xen/include/public/acm.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/acm.h Tue Mar 14 20:50:35 2006 @@ -152,7 +152,7 @@ uint32_t ec_eval_count; uint32_t gt_eval_count; uint32_t ec_denied_count; - uint32_t gt_denied_count; + uint32_t gt_denied_count; uint32_t ec_cachehit_count; uint32_t gt_cachehit_count; }; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/acm_ops.h --- a/xen/include/public/acm_ops.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/acm_ops.h Tue Mar 14 20:50:35 2006 @@ -10,6 +10,7 @@ #include "xen.h" #include "sched_ctl.h" +#include "acm.h" /* * Make sure you increment the interface version whenever you modify this file! @@ -71,7 +72,7 @@ int acm_decision; /* out */ }; -struct acm_op { +typedef struct acm_op { uint32_t cmd; uint32_t interface_version; /* ACM_INTERFACE_VERSION */ union { @@ -81,7 +82,8 @@ struct acm_getssid getssid; struct acm_getdecision getdecision; } u; -}; +} acm_op_t; +DEFINE_GUEST_HANDLE(acm_op_t); #endif /* __XEN_PUBLIC_ACM_OPS_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/arch-ia64.h Tue Mar 14 20:50:35 2006 @@ -60,8 +60,8 @@ #define INVALID_MFN (~0UL) -#define MEM_G (1UL << 30) -#define MEM_M (1UL << 20) +#define MEM_G (1UL << 30) +#define MEM_M (1UL << 20) #define MMIO_START (3 * MEM_G) #define MMIO_SIZE (512 * MEM_M) @@ -70,7 +70,7 @@ #define VGA_IO_SIZE 0x20000 #define LEGACY_IO_START (MMIO_START + MMIO_SIZE) -#define LEGACY_IO_SIZE (64*MEM_M) +#define LEGACY_IO_SIZE (64*MEM_M) #define IO_PAGE_START (LEGACY_IO_START + LEGACY_IO_SIZE) #define IO_PAGE_SIZE PAGE_SIZE @@ -82,7 +82,7 @@ #define IO_SAPIC_SIZE 0x100000 #define PIB_START 0xfee00000UL -#define PIB_SIZE 0x100000 +#define PIB_SIZE 0x100000 #define GFW_START (4*MEM_G -16*MEM_M) #define GFW_SIZE (16*MEM_M) @@ -300,7 +300,7 @@ typedef struct { unsigned long start; - unsigned long size; + unsigned long size; } arch_initrd_info_t; #define IA64_COMMAND_LINE_SIZE 512 diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/arch-x86_32.h Tue Mar 14 20:50:35 2006 @@ -102,6 +102,7 @@ uint16_t cs; /* code selector */ unsigned long address; /* code offset */ } trap_info_t; +DEFINE_GUEST_HANDLE(trap_info_t); typedef struct cpu_user_regs { uint32_t ebx; @@ -125,6 +126,7 @@ uint16_t fs, _pad4; uint16_t gs, _pad5; } cpu_user_regs_t; +DEFINE_GUEST_HANDLE(cpu_user_regs_t); typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ @@ -157,7 +159,7 @@ typedef struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; + unsigned long pfn_to_mfn_frame_list_list; unsigned long nmi_reason; } arch_shared_info_t; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/arch-x86_64.h Tue Mar 14 20:50:35 2006 @@ -136,15 +136,6 @@ uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; /* Bottom of iret stack frame. */ }; -/* - * For compatibility with HYPERVISOR_switch_to_user which is the old - * name for HYPERVISOR_iret. - */ -struct switch_to_user { - /* Top of stack (%rsp at point of hypercall). */ - uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; - /* Bottom of iret stack frame. */ -}; /* * Send an array of these to HYPERVISOR_set_trap_table(). @@ -166,6 +157,7 @@ uint16_t cs; /* code selector */ unsigned long address; /* code offset */ } trap_info_t; +DEFINE_GUEST_HANDLE(trap_info_t); #ifdef __GNUC__ /* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ @@ -205,6 +197,7 @@ uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ } cpu_user_regs_t; +DEFINE_GUEST_HANDLE(cpu_user_regs_t); #undef __DECL_REG @@ -242,7 +235,7 @@ typedef struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; + unsigned long pfn_to_mfn_frame_list_list; unsigned long nmi_reason; } arch_shared_info_t; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/dom0_ops.h Tue Mar 14 20:50:35 2006 @@ -173,7 +173,7 @@ } dom0_readconsole_t; DEFINE_GUEST_HANDLE(dom0_readconsole_t); -/* +/* * Set which physical cpus a vcpu can execute on. */ #define DOM0_SETVCPUAFFINITY 20 @@ -231,7 +231,7 @@ } dom0_sched_id_t; DEFINE_GUEST_HANDLE(dom0_sched_id_t); -/* +/* * Control shadow pagetables operation */ #define DOM0_SHADOW_CONTROL 25 @@ -248,8 +248,8 @@ typedef struct dom0_shadow_control_stats { uint32_t fault_count; uint32_t dirty_count; - uint32_t dirty_net_count; - uint32_t dirty_block_count; + uint32_t dirty_net_count; + uint32_t dirty_block_count; } dom0_shadow_control_stats_t; DEFINE_GUEST_HANDLE(dom0_shadow_control_stats_t); @@ -402,7 +402,7 @@ } dom0_getdomaininfolist_t; DEFINE_GUEST_HANDLE(dom0_getdomaininfolist_t); -#define DOM0_PLATFORM_QUIRK 39 +#define DOM0_PLATFORM_QUIRK 39 #define QUIRK_NOIRQBALANCING 1 typedef struct dom0_platform_quirk { /* IN variables. */ @@ -463,14 +463,14 @@ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ } dom0_iomem_permission_t; DEFINE_GUEST_HANDLE(dom0_iomem_permission_t); - + #define DOM0_HYPERCALL_INIT 48 typedef struct dom0_hypercall_init { domid_t domain; /* domain to be affected */ unsigned long mfn; /* machine frame to be initialised */ } dom0_hypercall_init_t; DEFINE_GUEST_HANDLE(dom0_hypercall_init_t); - + typedef struct dom0_op { uint32_t cmd; uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ @@ -507,7 +507,7 @@ struct dom0_platform_quirk platform_quirk; struct dom0_physical_memory_map physical_memory_map; struct dom0_max_vcpus max_vcpus; - struct dom0_setdomainhandle setdomainhandle; + struct dom0_setdomainhandle setdomainhandle; struct dom0_setdebugging setdebugging; struct dom0_irq_permission irq_permission; struct dom0_iomem_permission iomem_permission; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/event_channel.h --- a/xen/include/public/event_channel.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/event_channel.h Tue Mar 14 20:50:35 2006 @@ -10,6 +10,7 @@ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ typedef uint32_t evtchn_port_t; +DEFINE_GUEST_HANDLE(evtchn_port_t); /* * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as @@ -189,6 +190,7 @@ evtchn_unmask_t unmask; } u; } evtchn_op_t; +DEFINE_GUEST_HANDLE(evtchn_op_t); #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/grant_table.h Tue Mar 14 20:50:35 2006 @@ -167,6 +167,7 @@ grant_handle_t handle; uint64_t dev_bus_addr; } gnttab_map_grant_ref_t; +DEFINE_GUEST_HANDLE(gnttab_map_grant_ref_t); /* * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings @@ -188,6 +189,7 @@ /* OUT parameters. */ int16_t status; /* GNTST_* */ } gnttab_unmap_grant_ref_t; +DEFINE_GUEST_HANDLE(gnttab_unmap_grant_ref_t); /* * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least @@ -205,8 +207,9 @@ uint32_t nr_frames; /* OUT parameters. */ int16_t status; /* GNTST_* */ - unsigned long *frame_list; + GUEST_HANDLE(ulong) frame_list; } gnttab_setup_table_t; +DEFINE_GUEST_HANDLE(gnttab_setup_table_t); /* * GNTTABOP_dump_table: Dump the contents of the grant table to the @@ -219,6 +222,7 @@ /* OUT parameters. */ int16_t status; /* GNTST_* */ } gnttab_dump_table_t; +DEFINE_GUEST_HANDLE(gnttab_dump_table_t); /* * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The @@ -237,6 +241,7 @@ /* OUT parameters. */ int16_t status; } gnttab_transfer_t; +DEFINE_GUEST_HANDLE(gnttab_transfer_t); /* * Bitfield values for update_pin_status.flags. @@ -262,7 +267,7 @@ * GNTMAP_contains_pte subflag: * 0 => This map request contains a host virtual address. * 1 => This map request contains the machine addess of the PTE to update. - */ + */ #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/hvm/vmx_assist.h --- a/xen/include/public/hvm/vmx_assist.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/hvm/vmx_assist.h Tue Mar 14 20:50:35 2006 @@ -22,13 +22,13 @@ unsigned int seg_type : 4, s : 1, dpl : 2, - p : 1, + p : 1, reserved0 : 4, avl : 1, - reserved1 : 1, + reserved1 : 1, default_ops_size: 1, g : 1, - null_bit : 1, + null_bit : 1, reserved2 : 15; } fields; unsigned int bytes; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/io/ring.h --- a/xen/include/public/io/ring.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/io/ring.h Tue Mar 14 20:50:35 2006 @@ -39,7 +39,7 @@ * * These expand out to give you a set of types, as you can see below. * The most important of these are: - * + * * mytag_sring_t - The shared ring. * mytag_front_ring_t - The 'front' half of the ring. * mytag_back_ring_t - The 'back' half of the ring. @@ -58,7 +58,7 @@ * mytag_back_ring_t back_ring; * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ - + #define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ \ /* Shared ring entry */ \ @@ -97,7 +97,7 @@ typedef struct __name##_back_ring __name##_back_ring_t /* - * Macros for manipulating rings. + * Macros for manipulating rings. * * FRONT_RING_whatever works on the "front end" of a ring: here * requests are pushed on to the ring and responses taken off it. @@ -105,7 +105,7 @@ * BACK_RING_whatever works on the "back end" of a ring: here * requests are taken off the ring and responses put on. * - * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. * This is OK in 1-for-1 request-response situations where the * requestor (front end) never has more than RING_SIZE()-1 * outstanding requests. @@ -151,7 +151,7 @@ #define RING_SIZE(_r) \ ((_r)->nr_ents) -/* Test if there is an empty slot available on the front ring. +/* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ #define RING_FULL(_r) \ @@ -159,24 +159,19 @@ /* Test if there are outstanding messages to be processed on a ring. */ #define RING_HAS_UNCONSUMED_RESPONSES(_r) \ - ( (_r)->rsp_cons != (_r)->sring->rsp_prod ) - + ((_r)->rsp_cons != (_r)->sring->rsp_prod) + #define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ( ((_r)->req_cons != (_r)->sring->req_prod ) && \ - (((_r)->req_cons - (_r)->rsp_prod_pvt) != \ - RING_SIZE(_r)) ) - + (((_r)->req_cons != (_r)->sring->req_prod) && \ + (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r))) + /* Direct access to individual ring elements, by index. */ #define RING_GET_REQUEST(_r, _idx) \ - (&((_r)->sring->ring[ \ - ((_idx) & (RING_SIZE(_r) - 1)) \ - ].req)) + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) #define RING_GET_RESPONSE(_r, _idx) \ - (&((_r)->sring->ring[ \ - ((_idx) & (RING_SIZE(_r) - 1)) \ - ].rsp)) - + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + /* Loop termination condition: Would the specified index overflow the ring? */ #define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) @@ -212,7 +207,7 @@ * The second argument is a boolean return value. True indicates that there * are pending messages on the ring (i.e., the connection should not be put * to sleep). - * + * * These macros will set the req_event/rsp_event field to trigger a * notification on the very next message that is enqueued. If you want to * create batches of work (i.e., only receive a notification after several diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/io/xenbus.h --- a/xen/include/public/io/xenbus.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/io/xenbus.h Tue Mar 14 20:50:35 2006 @@ -6,9 +6,8 @@ * Copyright (C) 2005 XenSource Ltd. */ -#ifndef _XEN_XENBUS_H -#define _XEN_XENBUS_H - +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H /* The state of either end of the Xenbus, i.e. the current communication status of initialisation across the bus. States here imply nothing about @@ -30,8 +29,7 @@ } XenbusState; - -#endif /* _XEN_XENBUS_H */ +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ /* * Local variables: diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/memory.h --- a/xen/include/public/memory.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/memory.h Tue Mar 14 20:50:35 2006 @@ -80,7 +80,7 @@ * machphys table is smaller than max_extents * 2MB. */ unsigned int max_extents; - + /* * Pointer to buffer to fill with list of extent starts. If there are * any large discontiguities in the machine address space, 2MB gaps in @@ -97,25 +97,27 @@ DEFINE_GUEST_HANDLE(xen_machphys_mfn_list_t); /* - * Returns the base and size of the specified reserved 'RAM hole' in the - * specified guest's pseudophysical address space. - * arg == addr of xen_reserved_phys_area_t. + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. */ -#define XENMEM_reserved_phys_area 7 -typedef struct xen_reserved_phys_area { - /* Which domain to report about? */ +#define XENMEM_add_to_physmap 7 +typedef struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ domid_t domid; - /* - * Which reserved area to report? Out-of-range request reports - * -ESRCH. Currently no architecture will have more than one reserved area. - */ - unsigned int idx; + /* Source mapping space. */ +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ + unsigned int space; - /* Base and size of the specified reserved area. */ - unsigned long first_gpfn, nr_gpfns; -} xen_reserved_phys_area_t; -DEFINE_GUEST_HANDLE(xen_reserved_phys_area_t); + /* Index into source mapping space. */ + unsigned long idx; + + /* GPFN where the source mapping page should appear. */ + unsigned long gpfn; +} xen_add_to_physmap_t; +DEFINE_GUEST_HANDLE(xen_add_to_physmap_t); /* * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/nmi.h --- a/xen/include/public/nmi.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/nmi.h Tue Mar 14 20:50:35 2006 @@ -31,9 +31,14 @@ /* * Register NMI callback for this (calling) VCPU. Currently this only makes * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. - * arg == address of callback function. + * arg == pointer to xennmi_callback structure. */ #define XENNMI_register_callback 0 +typedef struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +} xennmi_callback_t; +DEFINE_GUEST_HANDLE(xennmi_callback_t); /* * Deregister NMI callback for this (calling) VCPU. diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/physdev.h --- a/xen/include/public/physdev.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/physdev.h Tue Mar 14 20:50:35 2006 @@ -37,14 +37,14 @@ uint32_t reg; /* IN or OUT */ uint32_t value; -} physdevop_apic_t; +} physdevop_apic_t; typedef struct physdevop_irq { /* IN */ uint32_t irq; /* OUT */ uint32_t vector; -} physdevop_irq_t; +} physdevop_irq_t; typedef struct physdev_op { uint32_t cmd; @@ -56,6 +56,7 @@ physdevop_irq_t irq_op; } u; } physdev_op_t; +DEFINE_GUEST_HANDLE(physdev_op_t); #endif /* __XEN_PUBLIC_PHYSDEV_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/sched.h --- a/xen/include/public/sched.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/sched.h Tue Mar 14 20:50:35 2006 @@ -9,16 +9,32 @@ #ifndef __XEN_PUBLIC_SCHED_H__ #define __XEN_PUBLIC_SCHED_H__ +#include "event_channel.h" + /* - * Prototype for this hypercall is: - * int sched_op(int cmd, unsigned long arg) + * There are two forms of this hypercall. + * + * The first and preferred version is only available from Xen 3.0.2. + * The prototype for this hypercall is: + * long sched_op_new(int cmd, void *arg) * @cmd == SCHEDOP_??? (scheduler operation). - * @arg == Operation-specific extra argument(s). + * @arg == Operation-specific extra argument(s), as described below. + * + * The legacy version of this hypercall supports only the following commands: + * SCHEDOP_yield, SCHEDOP_block, and SCHEDOP_shutdown. The prototype for the + * legacy hypercall is: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * + * The sub-command descriptions below describe extra arguments for the + * sched_op_new() hypercall. */ /* * Voluntarily yield the CPU. - * @arg == 0. + * @arg == NULL. */ #define SCHEDOP_yield 0 @@ -27,18 +43,35 @@ * If called with event upcalls masked, this operation will atomically * reenable event delivery and check for pending events before blocking the * VCPU. This avoids a "wakeup waiting" race. - * @arg == 0. + * @arg == NULL. */ #define SCHEDOP_block 1 /* * Halt execution of this domain (all VCPUs) and notify the system controller. - * @arg == SHUTDOWN_??? (reason for shutdown). + * @arg == pointer to sched_shutdown structure. */ #define SCHEDOP_shutdown 2 +typedef struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* */ +} sched_shutdown_t; +DEFINE_GUEST_HANDLE(sched_shutdown_t); /* - * Reason codes for SCHEDOP_shutdown. These may be interpreted by controller + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll structure. + */ +#define SCHEDOP_poll 3 +typedef struct sched_poll { + GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +} sched_poll_t; +DEFINE_GUEST_HANDLE(sched_poll_t); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does * not care about the shutdown code. */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/sched_ctl.h --- a/xen/include/public/sched_ctl.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/sched_ctl.h Tue Mar 14 20:50:35 2006 @@ -34,24 +34,20 @@ uint32_t direction; domid_t domain; union { - struct bvt_adjdom - { + struct bvt_adjdom { uint32_t mcu_adv; /* mcu advance: inverse of weight */ uint32_t warpback; /* warp? */ int32_t warpvalue; /* warp value */ int64_t warpl; /* warp limit */ int64_t warpu; /* unwarp time requirement */ } bvt; - - struct sedf_adjdom - { + struct sedf_adjdom { uint64_t period; uint64_t slice; uint64_t latency; uint32_t extratime; uint32_t weight; } sedf; - } u; }; diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/public/xen.h --- a/xen/include/public/xen.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/public/xen.h Tue Mar 14 20:50:35 2006 @@ -54,13 +54,12 @@ #define __HYPERVISOR_vm_assist 21 #define __HYPERVISOR_update_va_mapping_otherdomain 22 #define __HYPERVISOR_iret 23 /* x86 only */ -#define __HYPERVISOR_switch_vm86 23 /* x86/32 only (obsolete name) */ -#define __HYPERVISOR_switch_to_user 23 /* x86/64 only (obsolete name) */ #define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_acm_op 27 #define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op_new 29 /* * VIRTUAL INTERRUPTS @@ -162,7 +161,7 @@ #define MMUEXT_NEW_USER_BASEPTR 15 #ifndef __ASSEMBLY__ -struct mmuext_op { +typedef struct mmuext_op { unsigned int cmd; union { /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ @@ -176,7 +175,8 @@ /* TLB_FLUSH_MULTI, INVLPG_MULTI */ void *vcpumask; } arg2; -}; +} mmuext_op_t; +DEFINE_GUEST_HANDLE(mmuext_op_t); #endif /* These are passed as 'flags' to update_va_mapping. They can be ORed. */ @@ -243,6 +243,7 @@ uint64_t ptr; /* Machine address of PTE. */ uint64_t val; /* New contents of PTE. */ } mmu_update_t; +DEFINE_GUEST_HANDLE(mmu_update_t); /* * Send an array of these to HYPERVISOR_multicall(). @@ -252,6 +253,7 @@ unsigned long op, result; unsigned long args[6]; } multicall_entry_t; +DEFINE_GUEST_HANDLE(multicall_entry_t); /* * Event channel endpoints per domain: diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/xen/event.h --- a/xen/include/xen/event.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/xen/event.h Tue Mar 14 20:50:35 2006 @@ -15,41 +15,14 @@ #include <asm/bitops.h> #include <asm/event.h> -/* - * EVENT-CHANNEL NOTIFICATIONS - * NB. On x86, the atomic bit operations also act as memory barriers. There - * is therefore sufficiently strict ordering for this architecture -- others - * may require explicit memory barriers. - */ - -static inline void evtchn_set_pending(struct vcpu *v, int port) -{ - struct domain *d = v->domain; - shared_info_t *s = d->shared_info; - - /* These four operations must happen in strict order. */ - if ( !test_and_set_bit(port, &s->evtchn_pending[0]) && - !test_bit (port, &s->evtchn_mask[0]) && - !test_and_set_bit(port / BITS_PER_LONG, - &v->vcpu_info->evtchn_pending_sel) && - !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) ) - { - evtchn_notify(v); - } -} +extern void evtchn_set_pending(struct vcpu *v, int port); /* * send_guest_virq: * @v: VCPU to which virtual IRQ should be sent * @virq: Virtual IRQ number (VIRQ_*) */ -static inline void send_guest_virq(struct vcpu *v, int virq) -{ - int port = v->virq_to_evtchn[virq]; - - if ( likely(port != 0) ) - evtchn_set_pending(v, port); -} +extern void send_guest_virq(struct vcpu *v, int virq); /* * send_guest_pirq: @@ -63,6 +36,9 @@ (!!(v)->vcpu_info->evtchn_upcall_pending & \ !(v)->vcpu_info->evtchn_upcall_mask) +#define evtchn_pending(d, p) \ + (test_bit((p), &(d)->shared_info->evtchn_pending[0])) + /* Send a notification from a local event-channel port. */ extern long evtchn_send(unsigned int lport); diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/xen/gdbstub.h --- a/xen/include/xen/gdbstub.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/xen/gdbstub.h Tue Mar 14 20:50:35 2006 @@ -20,6 +20,9 @@ #ifndef __XEN_GDBSTUB_H__ #define __XEN_GDBSTUB_H__ + +#include <asm/atomic.h> +#include <asm/page.h> #ifdef CRASH_DEBUG diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/xen/hypercall.h --- a/xen/include/xen/hypercall.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/xen/hypercall.h Tue Mar 14 20:50:35 2006 @@ -9,6 +9,8 @@ #include <xen/types.h> #include <xen/time.h> #include <public/xen.h> +#include <public/acm_ops.h> +#include <public/event_channel.h> #include <asm/hypercall.h> extern long @@ -20,46 +22,43 @@ int cmd, unsigned long arg); -struct dom0_op; extern long do_dom0_op( - struct dom0_op *u_dom0_op); + GUEST_HANDLE(dom0_op_t) u_dom0_op); extern long do_memory_op( int cmd, - void *arg); + GUEST_HANDLE(void) arg); -struct multicall_entry; extern long do_multicall( - struct multicall_entry *call_list, + GUEST_HANDLE(multicall_entry_t) call_list, unsigned int nr_calls); extern long do_set_timer_op( s_time_t timeout); -struct evtchn_op; extern long do_event_channel_op( - struct evtchn_op *uop); + GUEST_HANDLE(evtchn_op_t) uop); extern long do_xen_version( int cmd, - void *arg); + GUEST_HANDLE(void) arg); extern long do_console_io( int cmd, int count, - char *buffer); + GUEST_HANDLE(char) buffer); extern long do_grant_table_op( unsigned int cmd, - void *uop, + GUEST_HANDLE(void) uop, unsigned int count); extern long @@ -71,16 +70,15 @@ do_vcpu_op( int cmd, int vcpuid, - void *arg); + GUEST_HANDLE(void) arg); -struct acm_op; extern long do_acm_op( - struct acm_op *u_acm_op); + GUEST_HANDLE(acm_op_t) u_acm_op); extern long do_nmi_op( unsigned int cmd, - void *arg); + GUEST_HANDLE(void) arg); #endif /* __XEN_HYPERCALL_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/xen/mm.h --- a/xen/include/xen/mm.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/xen/mm.h Tue Mar 14 20:50:35 2006 @@ -82,4 +82,6 @@ #define sync_pagetable_state(d) ((void)0) #endif +int guest_remove_page(struct domain *d, unsigned long gmfn); + #endif /* __XEN_MM_H__ */ diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/xen/perfc_defn.h --- a/xen/include/xen/perfc_defn.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/xen/perfc_defn.h Tue Mar 14 20:50:35 2006 @@ -26,7 +26,7 @@ PERFCOUNTER_ARRAY(hypercalls, "hypercalls", NR_hypercalls) PERFCOUNTER_ARRAY(exceptions, "exceptions", 32) -#define VMX_PERF_EXIT_REASON_SIZE 37 +#define VMX_PERF_EXIT_REASON_SIZE 44 #define VMX_PERF_VECTOR_SIZE 0x20 PERFCOUNTER_ARRAY(vmexits, "vmexits", VMX_PERF_EXIT_REASON_SIZE) PERFCOUNTER_ARRAY(cause_vector, "cause vector", VMX_PERF_VECTOR_SIZE) diff -r dc50cdd66c5c -r 0ed4a312765b xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Mar 14 20:10:21 2006 +++ b/xen/include/xen/sched.h Tue Mar 14 20:50:35 2006 @@ -67,6 +67,8 @@ struct timer timer; /* one-shot timer for timeout values */ unsigned long sleep_tick; /* tick at which this vcpu started sleep */ + struct timer poll_timer; /* timeout for SCHEDOP_poll */ + void *sched_priv; /* scheduler-specific data */ struct vcpu_runstate_info runstate; @@ -266,7 +268,7 @@ void sched_rem_domain(struct vcpu *); long sched_ctl(struct sched_ctl_cmd *); long sched_adjdom(struct sched_adjdom_cmd *); -int sched_id(); +int sched_id(void); void vcpu_wake(struct vcpu *d); void vcpu_sleep_nosync(struct vcpu *d); void vcpu_sleep_sync(struct vcpu *d); @@ -310,7 +312,6 @@ * It contains one character per argument as follows: * 'i' [unsigned] {char, int} * 'l' [unsigned] long - * 'p' pointer (foo *) * 'h' guest handle (GUEST_HANDLE(foo)) */ unsigned long hypercall_create_continuation( diff -r dc50cdd66c5c -r 0ed4a312765b tools/python/xen/xend/server/irqif.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/python/xen/xend/server/irqif.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,73 @@ +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx> +# Copyright (C) 2005 XenSource Ltd +# Copyright (C) 2005 Jody Belka +#============================================================================ +# This code based on tools/python/xen/xend/server/iopif.py and modified +# to handle interrupts +#============================================================================ + + +import types + +import xen.lowlevel.xc; + +from xen.xend import sxp +from xen.xend.XendError import VmError + +from xen.xend.server.DevController import DevController + + +xc = xen.lowlevel.xc.xc() + + +class IRQController(DevController): + + def __init__(self, vm): + DevController.__init__(self, vm) + + + def getDeviceDetails(self, config): + """@see DevController.getDeviceDetails""" + + def get_param(field): + try: + val = sxp.child_value(config, field) + + if not val: + raise VmError('irq: Missing %s config setting' % field) + + if isinstance(val, types.StringType): + return int(val,10) + radix = 10 + else: + return val + except: + raise VmError('irq: Invalid config setting %s: %s' % + (field, val)) + + pirq = get_param('irq') + + rc = xc.domain_irq_permission(dom = self.getDomid(), + pirq = pirq, + allow_access = True) + + if rc < 0: + #todo non-fatal + raise VmError( + 'irq: Failed to configure irq: %d' % (pirq)) + + return (None, {}, {}) diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/grouptest/create --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/grouptest/create Tue Mar 14 20:50:35 2006 @@ -0,0 +1,1 @@ +create diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/grouptest/default --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/grouptest/default Tue Mar 14 20:50:35 2006 @@ -0,0 +1,29 @@ +block-create +block-destroy +block-list +console +create +destroy +dmesg +domid +domname +enforce_dom0_cpus +help +info +list +memmax +memset +migrate +network +network-attach +pause +reboot +restore +save +sedf +shutdown +sysrq +unpause +vcpu-disable +vcpu-pin +vtpm diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/grouptest/quick --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/grouptest/quick Tue Mar 14 20:50:35 2006 @@ -0,0 +1,4 @@ +create 01_create_basic_pos.test 07_create_mem64_pos.test 10_create_fastdestroy.test 14_create_blockroot_pos.test +unpause 01_unpause_basic_pos.test +memset 01_memset_basic_pos.test 03_memset_random_pos.test +help 06_help_allcmds.test diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/network/03_network_local_tcp_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/network/03_network_local_tcp_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,95 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: <dykman@xxxxxxxxxx> + +# TCP tests on local interfaces. +# - creates a single guest domain +# - sets up a single NIC +# - conducts hping tcp tests to the local loopback and IP address + +# hping2 127.0.0.1 -c 1 -d $size +# hping2 $local_IP -c 1 -d $size +# where $size = 1, 48, 64, 512, 1440, 1448, 1500, 1505, +# 4096, 4192, 32767, 65507, 65508 + + +trysizes = [ 1, 48, 64, 512, 1440, 1448, 1500, 1505, 4096, 4192, + 32767, 65495 ] + + +from XmTestLib import * +rc = 0 + +Net = XmNetwork() + +try: + # read an IP address from the config + ip = Net.ip("dom1", "eth0") + mask = Net.mask("dom1", "eth0") +except NetworkError, e: + FAIL(str(e)) + +# Fire up a guest domain w/1 nic +if ENABLE_HVM_SUPPORT: + brg = "xenbr0" + config = {"vif" : ['type=ioemu, bridge=%s' % brg]} +else: + brg = None + config = {"vif" : ['ip=%s' % ip]} + +domain = XmTestDomain(extraConfig=config) +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + + +# Attach a console +try: + console = XmConsole(domain.getName(), historySaveCmds=True) +except ConsoleError, e: + FAIL(str(e)) + +try: + # Activate the console + console.sendInput("bhs") + + # Bring up the "lo" interface. + console.runCmd("ifconfig lo 127.0.0.1") + + console.runCmd("ifconfig eth0 inet "+ip+" netmask "+mask+" up") + + # First do loopback + lofails="" + for size in trysizes: + out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -q -c 20 " + + "--fast -d " + str(size)) + if out["return"]: + lofails += " " + str(size) + + # Next comes eth0 + eth0fails="" + for size in trysizes: + out = console.runCmd("hping2 " + ip + " -E /dev/urandom -q -c 20 " + + "--fast -d "+ str(size)) + if out["return"]: + eth0fails += " " + str(size) +except ConsoleError, e: + FAIL(str(e)) +except NetworkError, e: + FAIL(str(e)) + + +# Tally up failures +failures="" +if len(lofails): + failures += "TCP hping2 over loopback failed for size" + lofails + ". " +if len(eth0fails): + failures += "TCP hping2 over eth0 failed for size" + eth0fails + "." +if len(failures): + FAIL(failures) + diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/network/04_network_local_udp_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/network/04_network_local_udp_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,96 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: <dykman@xxxxxxxxxx> + +# UDP tests on local interfaces. +# - creates a single guest domain +# - sets up a single NIC +# - conducts hping udp tests to the local loopback and IP address + +# hping2 127.0.0.1 -2 -c 1 -d $size +# hping2 $local_IP -2 -c 1 -d $size +# where $size = 1, 48, 64, 512, 1440, 1448, 1500, 1505, +# 4096, 4192, 32767, 65507, 65508 + + +trysizes = [ 1, 48, 64, 512, 1440, 1448, 1500, 1505, 4096, 4192, + 32767, 65495 ] + +from XmTestLib import * +rc = 0 + +Net = XmNetwork() + +try: + # read an IP address from the config + ip = Net.ip("dom1", "eth0") + mask = Net.mask("dom1", "eth0") +except NetworkError, e: + FAIL(str(e)) + +# Fire up a guest domain w/1 nic +if ENABLE_HVM_SUPPORT: + brg = "xenbr0" + config = {"vif" : ['type=ioemu, bridge=%s' % brg]} +else: + brg = None + config = {"vif" : ['ip=%s' % ip]} + +domain = XmTestDomain(extraConfig=config) +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + + +# Attach a console +try: + console = XmConsole(domain.getName(), historySaveCmds=True) +except ConsoleError, e: + FAIL(str(e)) + +try: + # Activate the console + console.sendInput("bhs") + + # Bring up the "lo" interface. + console.runCmd("ifconfig lo 127.0.0.1") + + console.runCmd("ifconfig eth0 inet "+ip+" netmask "+mask+" up") + + # First do loopback + lofails="" + for size in trysizes: + out = console.runCmd("hping2 127.0.0.1 -E /dev/urandom -2 -q -c 20 " + + "--fast -d " + str(size)) + if out["return"]: + lofails += " " + str(size) + print out["output"] + + # Next comes eth0 + eth0fails="" + for size in trysizes: + out = console.runCmd("hping2 " + ip + " -E /dev/urandom -2 -q -c 20 " + + "--fast -d " + str(size)) + if out["return"]: + eth0fails += " " + str(size) + print out["output"] +except ConsoleError, e: + FAIL(str(e)) +except NetworkError, e: + FAIL(str(e)) + + +# Tally up failures +failures="" +if len(lofails): + failures += "UDP hping2 over loopback failed for size" + lofails + ". " +if len(eth0fails): + failures += "UDP hping2 over eth0 failed for size" + eth0fails + "." +if len(failures): + FAIL(failures) + diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/network/06_network_dom0_tcp_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/network/06_network_dom0_tcp_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,81 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: <dykman@xxxxxxxxxx> + +# TCP tests to dom0. +# - determines dom0 network +# - creates a single guest domain +# - sets up a single NIC on same subnet as dom0 +# - conducts hping2 tcp tests to the dom0 IP address + +# hping2 $dom0_IP -c 1 -d $size +# where $size = 1, 48, 64, 512, 1440, 1448, 1500, 1505, +# 4096, 4192, 32767, 65507, 65508 + +trysizes = [ 1, 48, 64, 512, 1440, 1500, 1505, 4096, 4192, + 32767, 65495 ] + + + +from XmTestLib import * +rc = 0 + +Net = XmNetwork() + +try: + # read an IP address from the config + ip = Net.ip("dom1", "eth0") + mask = Net.mask("dom1", "eth0") +except NetworkError, e: + FAIL(str(e)) + +# Fire up a guest domain w/1 nic +if ENABLE_HVM_SUPPORT: + brg = "xenbr0" + config = {"vif" : ['type=ioemu, bridge=%s' % brg]} +else: + brg = None + config = {"vif" : ["ip=%s" % ip]} + +domain = XmTestDomain(extraConfig=config) +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + + +# Attach a console +try: + console = XmConsole(domain.getName(), historySaveCmds=True) + # Activate the console + console.sendInput("bhs") +except ConsoleError, e: + FAIL(str(e)) + +try: + # Add a suitable dom0 IP address + dom0ip = Net.ip("dom0", "eth0", todomname=domain.getName(), toeth="eth0", bridge=brg) +except NetworkError, e: + FAIL(str(e)) + +try: + console.runCmd("ifconfig eth0 inet "+ip+" netmask "+mask+" up") + + # Ping dom0 + fails="" + for size in trysizes: + out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -q -c 20 " + + "--fast -d " + str(size)) + if out["return"]: + fails += " " + str(size) + print out["output"] +except ConsoleError, e: + FAIL(str(e)) + +if len(fails): + FAIL("TCP hping2 to dom0 failed for size" + fails + ".") + diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/network/07_network_dom0_udp_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/network/07_network_dom0_udp_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,81 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: <dykman@xxxxxxxxxx> + +# UDP tests to dom0. +# - determines dom0 network +# - creates a single guest domain +# - sets up a single NIC on same subnet as dom0 +# - conducts hping2 udp tests to the dom0 IP address + +# hping2 $dom0_IP -2 -c 1 -d $size +# where $size = 1, 48, 64, 512, 1440, 1448, 1500, 1505, +# 4096, 4192, 32767, 65507, 65508 + +trysizes = [ 1, 48, 64, 512, 1440, 1500, 1505, 4096, 4192, + 32767, 65495 ] + + + +from XmTestLib import * +rc = 0 + +Net = XmNetwork() + +try: + # read an IP address from the config + ip = Net.ip("dom1", "eth0") + mask = Net.mask("dom1", "eth0") +except NetworkError, e: + FAIL(str(e)) + +# Fire up a guest domain w/1 nic +if ENABLE_HVM_SUPPORT: + brg = "xenbr0" + config = {"vif" : ['type=ioemu, bridge=%s' % brg]} +else: + brg = None + config = {"vif" : ["ip=%s" % ip]} + +domain = XmTestDomain(extraConfig=config) +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + + +# Attach a console +try: + console = XmConsole(domain.getName(), historySaveCmds=True) + # Activate the console + console.sendInput("bhs") +except ConsoleError, e: + FAIL(str(e)) + +try: + # Add a suitable dom0 IP address + dom0ip = Net.ip("dom0", "eth0", todomname=domain.getName(), toeth="eth0", bridge=brg) +except NetworkError, e: + FAIL(str(e)) + +try: + console.runCmd("ifconfig eth0 inet "+ip+" netmask "+mask+" up") + + # Ping dom0 + fails="" + for size in trysizes: + out = console.runCmd("hping2 " + dom0ip + " -E /dev/urandom -2 -q -c 20" + + " --fast -d " + str(size)) + if out["return"]: + fails += " " + str(size) + print out["output"] +except ConsoleError, e: + FAIL(str(e)) + +if len(fails): + FAIL("UDP hping2 to dom0 failed for size" + fails + ".") + diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/network/12_network_domU_tcp_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/network/12_network_domU_tcp_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,82 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: <dykman@xxxxxxxxxx> + +# TCP tests to domU interface +# - creates two guest domains +# - sets up a single NIC on each on same subnet +# - conducts tcp tests to the domU IP address. + +# hping2 $domU_IP -c 1 -d $size +# where $size = 1, 48, 64, 512, 1440, 1500, 1505, +# 4096, 4192, 32767, 65507, 65508 + +pingsizes = [ 1, 48, 64, 512, 1440, 1500, 1505, 4096, 4192, 16384, 24567, + 32767, 65495 ] + +from XmTestLib import * + +def netDomain(ip): + if ENABLE_HVM_SUPPORT: + config = {"vif" : ['type=ioemu']} + else: + config = {"vif" : ["ip=%s" % ip]} + + dom = XmTestDomain(extraConfig=config) + try: + dom.start() + except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + try: + # Attach a console + console = XmConsole(dom.getName(), historySaveCmds=True) + # Activate the console + console.sendInput("bhs") + except ConsoleError, e: + FAIL(str(e)) + return console + +rc = 0 + +Net = XmNetwork() + +try: + # pick an IP address + ip1 = Net.ip("dom1", "eth2") + mask1 = Net.mask("dom1", "eth2") +except NetworkError, e: + FAIL(str(e)) + +try: + # pick another IP address + ip2 = Net.ip("dom2", "eth2") + mask2 = Net.mask("dom2", "eth2") +except NetworkError, e: + FAIL(str(e)) + +# Fire up a pair of guest domains w/1 nic each +src_console = netDomain(ip1) +dst_console = netDomain(ip2) + +try: + src_console.runCmd("ifconfig eth0 inet "+ip1+" netmask "+mask1+" up") + dst_console.runCmd("ifconfig eth0 inet "+ip2+" netmask "+mask2+" up") + + # Ping the victim over eth0 + fails="" + for size in pingsizes: + out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -q -c 20 " + + "--fast -d " + str(size)) + if out["return"]: + fails += " " + str(size) + print out["output"] +except ConsoleError, e: + FAIL(str(e)) + +if len(fails): + FAIL("TCP hping2 failed for size" + fails + ".") + diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/network/13_network_domU_udp_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/network/13_network_domU_udp_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,82 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2006 +# Author: <dykman@xxxxxxxxxx> + +# UDP tests to domU interface +# - creates two guest domains +# - sets up a single NIC on each on same subnet +# - conducts udp tests to the domU IP address. + +# hping2 $domU_IP -2 -c 1 -d $size +# where $size = 1, 48, 64, 512, 1440, 1500, 1505, +# 4096, 4192, 32767, 65507, 65508 + +pingsizes = [ 1, 48, 64, 512, 1440, 1500, 1505, 4096, 4192, + 32767, 65495 ] + +from XmTestLib import * + +def netDomain(ip): + if ENABLE_HVM_SUPPORT: + config = {"vif" : ['type=ioemu']} + else: + config = {"vif" : ["ip=%s" % ip]} + + dom = XmTestDomain(extraConfig=config) + try: + dom.start() + except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + try: + # Attach a console + console = XmConsole(dom.getName(), historySaveCmds=True) + # Activate the console + console.sendInput("bhs") + except ConsoleError, e: + FAIL(str(e)) + return console + +rc = 0 + +Net = XmNetwork() + +try: + # pick an IP address + ip1 = Net.ip("dom1", "eth2") + mask1 = Net.mask("dom1", "eth2") +except NetworkError, e: + FAIL(str(e)) + +try: + # pick another IP address + ip2 = Net.ip("dom2", "eth2") + mask2 = Net.mask("dom2", "eth2") +except NetworkError, e: + FAIL(str(e)) + +# Fire up a pair of guest domains w/1 nic each +src_console = netDomain(ip1) +dst_console = netDomain(ip2) + +try: + src_console.runCmd("ifconfig eth0 inet "+ip1+" netmask "+mask1+" up") + dst_console.runCmd("ifconfig eth0 inet "+ip2+" netmask "+mask2+" up") + + # Ping the victim over eth0 + fails="" + for size in pingsizes: + out = src_console.runCmd("hping2 " + ip2 + " -E /dev/urandom -2 -q " + + "-c 20 --fast -d " + str(size)) + if out["return"]: + fails += " " + str(size) + print out["output"] +except ConsoleError, e: + FAIL(str(e)) + +if len(fails): + FAIL("UDP hping2 failed for size" + fails + ".") + diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/01_sedf_period_slice_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/sedf/01_sedf_period_slice_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,62 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2005 +# Author: Dan Smith <danms@xxxxxxxxxx> +# Author: Ryan Harper <ryanh@xxxxxxxxxx> + +from XmTestLib import * + +def get_sedf_params(domain): + status, output = traceCommand("xm sched-sedf %s" %(domain.getName())) + return (status, output.split('\n')[1].split()) + + +domain = XmTestDomain(extraConfig = {"sched":"sedf"}) + +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + +# get current param values as baseline +(status, params) = get_sedf_params(domain) + +# check rv +if status != 0: + FAIL("Getting sedf parameters return non-zero rv (%d)", status) + +# parse out current params +(name, domid, p, s, l, e, w) = params + +# NB: setting period requires non-zero slice +# scale current period in half +period = str(float(p) / 2) +slice = str(float(p) / 4) + +opts = "%s -p %s -s %s" %(domain.getName(), period, slice) +(status, output) = traceCommand("xm sched-sedf %s" %(opts)) + +# check rv +if status != 0: + FAIL("Setting sedf parameters return non-zero rv (%d)" % status) + +# validate +(s,params) = get_sedf_params(domain) + +# check rv +if s != 0: + FAIL("Getting sedf parameters return non-zero rv (%d)" % s) + +(name,domid,p1,s1,l1,e1,w1) = params + +if p1 != period: + FAIL("Failed to change domain period from %f to %f" %(p, period)) + +if s1 != slice: + FAIL("Failed to change domain slice from %f to %f" %(s, slice)) + +# Stop the domain (nice shutdown) +domain.stop() diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/02_sedf_period_lower_neg.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/sedf/02_sedf_period_lower_neg.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,44 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2005 +# Author: Dan Smith <danms@xxxxxxxxxx> +# Author: Ryan Harper <ryanh@xxxxxxxxxx> +# +# Test if sched-sedf <dom> -p <period> handles lower bound + +from XmTestLib import * + +def get_sedf_params(domain): + status, output = traceCommand("xm sched-sedf %s" %(domain.getName())) + return (status, output.split('\n')[1].split()) + + +domain = XmTestDomain(extraConfig = {"sched":"sedf"}) + +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + +# pick bogus period +period = "-1" + +# NB: setting period requires non-zero slice +# scale current period in half +slice = "5" + +opts = "%s -p %s -s %s" %(domain.getName(), period, slice) +(status, output) = traceCommand("xm sched-sedf %s" %(opts)) + +# we should see this output from xm +eyecatcher = "Failed to set sedf parameters" + +# check for failure +if output.find(eyecatcher) >= 0: + FAIL("sched-sedf let me set bogus period (%s)" %(period)) + +# Stop the domain (nice shutdown) +domain.stop() diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/03_sedf_slice_lower_neg.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/sedf/03_sedf_slice_lower_neg.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,40 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2005 +# Author: Dan Smith <danms@xxxxxxxxxx> +# Author: Ryan Harper <ryanh@xxxxxxxxxx> +# +# Test if sched-sedf <dom> -p <period> handles lower bound + +from XmTestLib import * + +def get_sedf_params(domain): + status, output = traceCommand("xm sched-sedf %s" %(domain.getName())) + return (status, output.split('\n')[1].split()) + + +domain = XmTestDomain(extraConfig = {"sched":"sedf"}) + +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + +# pick bogus slice +slice = "0" + +opts = "%s -s %s" %(domain.getName(), slice) +(status, output) = traceCommand("xm sched-sedf %s" %(opts)) + +# we should see this output from xm +eyecatcher = "Failed to set sedf parameters" + +# check for failure +if output.find(eyecatcher) >= 0: + FAIL("sched-sedf let me set bogus slice (%s)" %(slice)) + +# Stop the domain (nice shutdown) +domain.stop() diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/04_sedf_slice_upper_neg.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/sedf/04_sedf_slice_upper_neg.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,48 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2005 +# Author: Dan Smith <danms@xxxxxxxxxx> +# Author: Ryan Harper <ryanh@xxxxxxxxxx> + +from XmTestLib import * + +def get_sedf_params(domain): + status, output = traceCommand("xm sched-sedf %s" %(domain.getName())) + return (status, output.split('\n')[1].split()) + + +domain = XmTestDomain(extraConfig = {"sched":"sedf"}) + +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + +# get current param values as baseline +(status, params) = get_sedf_params(domain) + +# check rv +if status != 0: + FAIL("Getting sedf parameters return non-zero rv (%d)", status) + +# parse out current params +(name, domid, p, s, l, e, w) = params + +# set slice > than current period +slice = str(float(p)+1) + +opts = "%s -s %s" %(domain.getName(), slice) +(status, output) = traceCommand("xm sched-sedf %s" %(opts)) + +# we should see this output from xm +eyecatcher = "Failed to set sedf parameters" + +# check for failure +if output.find(eyecatcher) >= 0: + FAIL("sched-sedf let me set a slice bigger than my period.") + +# Stop the domain (nice shutdown) +domain.stop() diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/05_sedf_extratime_pos.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/sedf/05_sedf_extratime_pos.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,63 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2005 +# Author: Dan Smith <danms@xxxxxxxxxx> +# Author: Ryan Harper <ryanh@xxxxxxxxxx> + +from XmTestLib import * + +def get_sedf_params(domain): + status, output = traceCommand("xm sched-sedf %s" %(domain.getName())) + return (status, output.split('\n')[1].split()) + + +domain = XmTestDomain(extraConfig = {"sched":"sedf"}) + +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + +# get current param values as baseline +(status, params) = get_sedf_params(domain) + +# check rv +if status != 0: + FAIL("Getting sedf parameters return non-zero rv (%d)", status) + +# parse out current params +(name, domid, p, s, l, e, w) = params + +# toggle extratime value +extratime = str((int(e)+1)%2) + +direction = "disable" +# NB: when disabling extratime(=0), must pass in a slice +opts = "%s -e %s" %(domain.getName(), extratime) +if extratime == "0": + opts += " -s %s" %( str( (float(p)/2)+1 ) ) + direction = "enable" + +(status, output) = traceCommand("xm sched-sedf %s" %(opts)) + +# check rv +if status != 0: + FAIL("Setting sedf parameters return non-zero rv (%d)" % status) + +# validate +(s,params) = get_sedf_params(domain) + +# check rv +if s != 0: + FAIL("Getting sedf parameters return non-zero rv (%d)" % s) + +(name,domid,p1,s1,l1,e1,w1) = params + +if e1 != extratime: + FAIL("Failed to %s extratime" %(direction)) + +# Stop the domain (nice shutdown) +domain.stop() diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/06_sedf_extratime_disable_neg.py --- /dev/null Tue Mar 14 20:10:21 2006 +++ b/tools/xm-test/tests/sedf/06_sedf_extratime_disable_neg.py Tue Mar 14 20:50:35 2006 @@ -0,0 +1,71 @@ +#!/usr/bin/python + +# Copyright (C) International Business Machines Corp., 2005 +# Author: Dan Smith <danms@xxxxxxxxxx> +# Author: Ryan Harper <ryanh@xxxxxxxxxx> + +from XmTestLib import * + +def get_sedf_params(domain): + status, output = traceCommand("xm sched-sedf %s" %(domain.getName())) + return (status, output.split('\n')[1].split()) + + +domain = XmTestDomain(extraConfig = {"sched":"sedf"}) + +try: + domain.start() +except DomainError, e: + if verbose: + print "Failed to create test domain because:" + print e.extra + FAIL(str(e)) + +# get current param values as baseline +(status, params) = get_sedf_params(domain) + +# check rv +if status != 0: + FAIL("Getting sedf parameters return non-zero rv (%d)", status) + +# parse out current params +(name, domid, p, s, l, e, w) = params + +# if extratime is off, turn it on and drop slice to 0 +if str(e) == "0": + extratime = 1 + opts = "%s -e %s" %(domain.getName(), extratime) + (status, output) = traceCommand("xm sched-sedf %s" %(opts)) + + # check rv + if status != 0: + FAIL("Failed to force extratime on (%d)" % status) + + # drop slice to 0 now that we are in extratime mode + slice = 0 + + opts = "%s -s %s" %(domain.getName(), slice) + (status, output) = traceCommand("xm sched-sedf %s" %(opts)) + + # check rv + if status != 0: + FAIL("Failed to force slice to 0 (%d)" % status) + + +# ASSERT(extratime=1, slice=0) + +# attempt to disable extratime without setting slice +extratime = "0" + +opts = "%s -e %s " %(domain.getName(), extratime) +(status, output) = traceCommand("xm sched-sedf %s" %(opts)) + +# we should see this output from xm +eyecatcher = "Failed to set sedf parameters" + +# check for failure +if output.find(eyecatcher) >= 0: + FAIL("sched-sedf let me disable extratime without a non-zero slice") + +# Stop the domain (nice shutdown) +domain.stop() diff -r dc50cdd66c5c -r 0ed4a312765b tools/xm-test/tests/sedf/01_sedf_multi_pos.py --- a/tools/xm-test/tests/sedf/01_sedf_multi_pos.py Tue Mar 14 20:10:21 2006 +++ /dev/null Tue Mar 14 20:50:35 2006 @@ -1,28 +0,0 @@ -#!/usr/bin/python - -# Copyright (C) International Business Machines Corp., 2005 -# Author: Dan Smith <danms@xxxxxxxxxx> - -from XmTestLib import * - -sedf_opts = "20000000 5000000 0 0 0" - -domain = XmTestDomain(extraConfig = {"sched":"sedf"}) - -try: - domain.start() -except DomainError, e: - if verbose: - print "Failed to create test domain because:" - print e.extra - FAIL(str(e)) - -for i in range(5): - status, output = traceCommand("xm sched-sedf %s %s" % (domain.getName(), - sedf_opts)) - if status != 0: - FAIL("[%i] xm sedf returned invalid %i != 0" % (i, status)) - - - - _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |