[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID f78e499dd669c5736abb8c3b8c6bf16cf3ec7f3f # Parent c10d4c6df48286edd885ee3b8553f2fdbf68d849 # Parent f026d4091322be717f66ce87e7c0dc372968a8f2 merge with xen-unstable.hg --- linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c | 2 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 32 + linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h | 14 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h | 21 - linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h | 8 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h | 21 - linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h | 19 - tools/firmware/vmxassist/vm86.c | 2 tools/ioemu/vl.c | 6 tools/misc/xenperf.c | 130 ++++++- tools/python/xen/xend/XendAPI.py | 3 tools/python/xen/xend/XendDomainInfo.py | 2 tools/python/xen/xend/XendStorageRepository.py | 27 - xen/arch/x86/hvm/svm/svm.c | 2 xen/arch/x86/hvm/vlapic.c | 166 ++++------ xen/arch/x86/hvm/vmx/io.c | 19 - xen/arch/x86/hvm/vmx/vmx.c | 2 xen/arch/x86/mm/shadow/common.c | 4 xen/arch/x86/mm/shadow/multi.c | 15 xen/arch/x86/traps.c | 119 ++++--- xen/arch/x86/x86_32/Makefile | 1 xen/arch/x86/x86_32/gpr_switch.S | 43 ++ xen/arch/x86/x86_64/Makefile | 1 xen/arch/x86/x86_64/gpr_switch.S | 63 +++ xen/include/asm-x86/hvm/vlapic.h | 27 - xen/include/public/arch-powerpc.h | 26 - xen/include/public/xencomm.h | 32 + 27 files changed, 544 insertions(+), 263 deletions(-) diff -r c10d4c6df482 -r f78e499dd669 linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Tue Nov 14 12:46:33 2006 -0700 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Tue Nov 14 14:59:37 2006 -0700 @@ -249,7 +249,7 @@ void __iomem * __ioremap(unsigned long p return NULL; area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; - flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; + flags |= _KERNPG_TABLE; if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr, phys_addr>>PAGE_SHIFT, size, __pgprot(flags), domid)) { diff -r c10d4c6df482 -r f78e499dd669 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Nov 14 12:46:33 2006 -0700 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Nov 14 14:59:37 2006 -0700 @@ -56,6 +56,11 @@ struct dma_mapping_ops* dma_ops; struct dma_mapping_ops* dma_ops; EXPORT_SYMBOL(dma_ops); +#ifdef CONFIG_XEN_COMPAT_030002 +unsigned int __kernel_page_user; +EXPORT_SYMBOL(__kernel_page_user); +#endif + extern unsigned long *contiguous_bitmap; static unsigned long dma_reserve __initdata; @@ -526,6 +531,33 @@ void __init xen_init_pt(void) addr_to_page(addr, page); addr = page[pud_index(__START_KERNEL_map)]; addr_to_page(addr, page); + +#ifdef CONFIG_XEN_COMPAT_030002 + /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER + in kernel PTEs. We check that here. */ + if (HYPERVISOR_xen_version(XENVER_version, NULL) <= 0x30000) { + unsigned long *pg; + pte_t pte; + + /* Mess with the initial mapping of page 0. It's not needed. */ + BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map); + addr = page[pmd_index(__START_KERNEL_map)]; + addr_to_page(addr, pg); + pte.pte = pg[pte_index(__START_KERNEL_map)]; + BUG_ON(!(pte.pte & _PAGE_PRESENT)); + + /* If _PAGE_USER isn't set, we obviously do not need it. */ + if (pte.pte & _PAGE_USER) { + /* _PAGE_USER is needed, but is it set implicitly? */ + pte.pte &= ~_PAGE_USER; + if ((HYPERVISOR_update_va_mapping(__START_KERNEL_map, + pte, 0) != 0) || + !(pg[pte_index(__START_KERNEL_map)] & _PAGE_USER)) + /* We need to explicitly specify _PAGE_USER. */ + __kernel_page_user = _PAGE_USER; + } + } +#endif /* Construct mapping of initial pte page in our own directories. */ init_level4_pgt[pgd_index(__START_KERNEL_map)] = diff -r c10d4c6df482 -r f78e499dd669 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h Tue Nov 14 12:46:33 2006 -0700 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h Tue Nov 14 14:59:37 2006 -0700 @@ -127,10 +127,24 @@ static inline maddr_t phys_to_machine(pa machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); return machine; } + static inline paddr_t machine_to_phys(maddr_t machine) { paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +static inline paddr_t pte_machine_to_phys(maddr_t machine) +{ + /* + * In PAE mode, the NX bit needs to be dealt with in the value + * passed to mfn_to_pfn(). On x86_64, we need to mask it off, + * but for i386 the conversion to ulong for the argument will + * clip it off. + */ + paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK); return phys; } diff -r c10d4c6df482 -r f78e499dd669 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Tue Nov 14 12:46:33 2006 -0700 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Tue Nov 14 14:59:37 2006 -0700 @@ -5,6 +5,16 @@ #define PAGE_SHIFT 12 #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifdef CONFIG_X86_PAE +#define __PHYSICAL_MASK_SHIFT 36 +#define __PHYSICAL_MASK ((1ULL << __PHYSICAL_MASK_SHIFT) - 1) +#define PHYSICAL_PAGE_MASK (~((1ULL << PAGE_SHIFT) - 1) & __PHYSICAL_MASK) +#else +#define __PHYSICAL_MASK_SHIFT 32 +#define __PHYSICAL_MASK (~0UL) +#define PHYSICAL_PAGE_MASK (PAGE_MASK & __PHYSICAL_MASK) +#endif #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) @@ -85,7 +95,7 @@ static inline unsigned long long pte_val if (x.pte_low) { ret = x.pte_low | (unsigned long long)x.pte_high << 32; - ret = machine_to_phys(ret) | 1; + ret = pte_machine_to_phys(ret) | 1; } else { ret = 0; } @@ -94,13 +104,13 @@ static inline unsigned long long pmd_val static inline unsigned long long pmd_val(pmd_t x) { unsigned long long ret = x.pmd; - if (ret) ret = machine_to_phys(ret) | 1; + if (ret) ret = pte_machine_to_phys(ret) | 1; return ret; } static inline unsigned long long pgd_val(pgd_t x) { unsigned long long ret = x.pgd; - if (ret) ret = machine_to_phys(ret) | 1; + if (ret) ret = pte_machine_to_phys(ret) | 1; return ret; } static inline unsigned long long pte_val_ma(pte_t x) @@ -115,7 +125,8 @@ typedef struct { unsigned long pgprot; } #define pgprot_val(x) ((x).pgprot) #include <asm/maddr.h> #define boot_pte_t pte_t /* or would you rather have a typedef */ -#define pte_val(x) (((x).pte_low & 1) ? machine_to_phys((x).pte_low) : \ +#define pte_val(x) (((x).pte_low & 1) ? \ + pte_machine_to_phys((x).pte_low) : \ (x).pte_low) #define pte_val_ma(x) ((x).pte_low) #define __pte(x) ({ unsigned long _x = (x); \ @@ -125,7 +136,7 @@ static inline unsigned long pgd_val(pgd_ static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = machine_to_phys(ret) | 1; + if (ret) ret = pte_machine_to_phys(ret) | 1; return ret; } #define HPAGE_SHIFT 22 diff -r c10d4c6df482 -r f78e499dd669 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h Tue Nov 14 12:46:33 2006 -0700 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h Tue Nov 14 14:59:37 2006 -0700 @@ -127,6 +127,14 @@ static inline paddr_t machine_to_phys(ma return phys; } +static inline paddr_t pte_machine_to_phys(maddr_t machine) +{ + paddr_t phys; + phys = mfn_to_pfn((machine & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK); + return phys; +} + /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(__pa(v))) #define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT)) diff -r c10d4c6df482 -r f78e499dd669 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Tue Nov 14 12:46:33 2006 -0700 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h Tue Nov 14 14:59:37 2006 -0700 @@ -33,6 +33,13 @@ #define PAGE_SIZE (1UL << PAGE_SHIFT) #endif #define PAGE_MASK (~(PAGE_SIZE-1)) + +/* See Documentation/x86_64/mm.txt for a description of the memory map. */ +#define __PHYSICAL_MASK_SHIFT 46 +#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) +#define __VIRTUAL_MASK_SHIFT 48 +#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) + #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) #define THREAD_ORDER 1 @@ -90,28 +97,28 @@ typedef struct { unsigned long pgd; } pg typedef struct { unsigned long pgprot; } pgprot_t; -#define pte_val(x) (((x).pte & 1) ? machine_to_phys((x).pte) : \ +#define pte_val(x) (((x).pte & 1) ? pte_machine_to_phys((x).pte) : \ (x).pte) #define pte_val_ma(x) ((x).pte) static inline unsigned long pmd_val(pmd_t x) { unsigned long ret = x.pmd; - if (ret) ret = machine_to_phys(ret); + if (ret) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long pud_val(pud_t x) { unsigned long ret = x.pud; - if (ret) ret = machine_to_phys(ret); + if (ret) ret = pte_machine_to_phys(ret); return ret; } static inline unsigned long pgd_val(pgd_t x) { unsigned long ret = x.pgd; - if (ret) ret = machine_to_phys(ret); + if (ret) ret = pte_machine_to_phys(ret); return ret; } @@ -162,12 +169,6 @@ static inline pgd_t __pgd(unsigned long /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) - -/* See Documentation/x86_64/mm.txt for a description of the memory map. */ -#define __PHYSICAL_MASK_SHIFT 46 -#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) -#define __VIRTUAL_MASK_SHIFT 48 -#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) #define KERNEL_TEXT_SIZE (40UL*1024*1024) #define KERNEL_TEXT_START 0xffffffff80000000UL diff -r c10d4c6df482 -r f78e499dd669 linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Tue Nov 14 12:46:33 2006 -0700 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Tue Nov 14 14:59:37 2006 -0700 @@ -205,8 +205,14 @@ static inline pte_t ptep_get_and_clear_f #define _PAGE_PROTNONE 0x080 /* If not present */ #define _PAGE_NX (1UL<<_PAGE_BIT_NX) +#ifdef CONFIG_XEN_COMPAT_030002 +extern unsigned int __kernel_page_user; +#else +#define __kernel_page_user 0 +#endif + #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user) #define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) @@ -219,13 +225,13 @@ static inline pte_t ptep_get_and_clear_f #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX) #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define __PAGE_KERNEL \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user) #define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user) #define __PAGE_KERNEL_NOCACHE \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user) #define __PAGE_KERNEL_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX) + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user) #define __PAGE_KERNEL_VSYSCALL \ (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define __PAGE_KERNEL_VSYSCALL_NOCACHE \ @@ -422,7 +428,8 @@ static inline pud_t *pud_offset_k(pgd_t can temporarily clear it. */ #define pmd_present(x) (pmd_val(x)) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) +#define pmd_bad(x) ((pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \ + != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT))) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) diff -r c10d4c6df482 -r f78e499dd669 tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Tue Nov 14 12:46:33 2006 -0700 +++ b/tools/firmware/vmxassist/vm86.c Tue Nov 14 14:59:37 2006 -0700 @@ -816,9 +816,7 @@ static int static int mov_to_seg(struct regs *regs, unsigned prefix, unsigned opc) { - unsigned eip = regs->eip - 1; unsigned modrm = fetch8(regs); - unsigned addr = operand(prefix, regs, modrm); /* Only need to emulate segment loads in real->protected mode. */ if (mode != VM86_REAL_TO_PROTECTED) diff -r c10d4c6df482 -r f78e499dd669 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Tue Nov 14 12:46:33 2006 -0700 +++ b/tools/ioemu/vl.c Tue Nov 14 14:59:37 2006 -0700 @@ -6489,9 +6489,9 @@ int main(int argc, char **argv) } if (ram_size > MMIO_START) { - for (i = 0 ; i < MEM_G >> PAGE_SHIFT; i++) - page_array[MMIO_START >> PAGE_SHIFT + i] = - page_array[IO_PAGE_START >> PAGE_SHIFT + 1]; + for (i = 0 ; i < (MEM_G >> PAGE_SHIFT); i++) + page_array[(MMIO_START >> PAGE_SHIFT) + i] = + page_array[(IO_PAGE_START >> PAGE_SHIFT) + 1]; } phys_ram_base = xc_map_foreign_batch(xc_handle, domid, diff -r c10d4c6df482 -r f78e499dd669 tools/misc/xenperf.c --- a/tools/misc/xenperf.c Tue Nov 14 12:46:33 2006 -0700 +++ b/tools/misc/xenperf.c Tue Nov 14 14:59:37 2006 -0700 @@ -10,7 +10,6 @@ * Description: */ - #include <xenctrl.h> #include <stdio.h> #include <stdlib.h> @@ -18,6 +17,57 @@ #include <errno.h> #include <string.h> +#define X(name) [__HYPERVISOR_##name] = #name +const char *hypercall_name_table[64] = +{ + X(set_trap_table), + X(mmu_update), + X(set_gdt), + X(stack_switch), + X(set_callbacks), + X(fpu_taskswitch), + X(sched_op_compat), + X(platform_op), + X(set_debugreg), + X(get_debugreg), + X(update_descriptor), + X(memory_op), + X(multicall), + X(update_va_mapping), + X(set_timer_op), + X(event_channel_op_compat), + X(xen_version), + X(console_io), + X(physdev_op_compat), + X(grant_table_op), + X(vm_assist), + X(update_va_mapping_otherdomain), + X(iret), + X(vcpu_op), + X(set_segment_base), + X(mmuext_op), + X(acm_op), + X(nmi_op), + X(sched_op), + X(callback_op), + X(xenoprof_op), + X(event_channel_op), + X(physdev_op), + X(hvm_op), + X(sysctl), + X(domctl), + X(kexec_op), + X(arch_0), + X(arch_1), + X(arch_2), + X(arch_3), + X(arch_4), + X(arch_5), + X(arch_6), + X(arch_7), +}; +#undef X + int lock_pages(void *addr, size_t len) { int e = 0; @@ -30,7 +80,7 @@ void unlock_pages(void *addr, size_t len void unlock_pages(void *addr, size_t len) { #ifndef __sun__ - munlock(addr, len); + munlock(addr, len); #endif } @@ -38,10 +88,11 @@ int main(int argc, char *argv[]) { int i, j, xc_handle; xc_perfc_desc_t *pcd; - xc_perfc_val_t *pcv; - xc_perfc_val_t *val; - int num_desc, num_val; - unsigned int sum, reset = 0, full = 0; + xc_perfc_val_t *pcv; + xc_perfc_val_t *val; + int num_desc, num_val; + unsigned int sum, reset = 0, full = 0, pretty = 0; + char hypercall_name[36]; if ( argc > 1 ) { @@ -52,6 +103,10 @@ int main(int argc, char *argv[]) { case 'f': full = 1; + break; + case 'p': + full = 1; + pretty = 1; break; case 'r': reset = 1; @@ -66,6 +121,7 @@ int main(int argc, char *argv[]) printf("%s: [-r]\n", argv[0]); printf("no args: print digested counters\n"); printf(" -f : print full arrays/histograms\n"); + printf(" -p : print full arrays/histograms in pretty format\n"); printf(" -r : reset counters\n"); return 0; } @@ -91,21 +147,21 @@ int main(int argc, char *argv[]) return 0; } - if ( xc_perfc_control(xc_handle, XEN_SYSCTL_PERFCOP_query, - NULL, NULL, &num_desc, &num_val) != 0 ) - { - fprintf(stderr, "Error getting number of perf counters: %d (%s)\n", - errno, strerror(errno)); - return 1; - } + if ( xc_perfc_control(xc_handle, XEN_SYSCTL_PERFCOP_query, + NULL, NULL, &num_desc, &num_val) != 0 ) + { + fprintf(stderr, "Error getting number of perf counters: %d (%s)\n", + errno, strerror(errno)); + return 1; + } pcd = malloc(sizeof(*pcd) * num_desc); - pcv = malloc(sizeof(*pcv) * num_val); + pcv = malloc(sizeof(*pcv) * num_val); if ( pcd == NULL - || lock_pages(pcd, sizeof(*pcd) * num_desc) != 0 - || pcv == NULL - || lock_pages(pcd, sizeof(*pcv) * num_val) != 0) + || lock_pages(pcd, sizeof(*pcd) * num_desc) != 0 + || pcv == NULL + || lock_pages(pcd, sizeof(*pcv) * num_val) != 0) { fprintf(stderr, "Could not alloc or lock buffers: %d (%s)\n", errno, strerror(errno)); @@ -113,7 +169,7 @@ int main(int argc, char *argv[]) } if ( xc_perfc_control(xc_handle, XEN_SYSCTL_PERFCOP_query, - pcd, pcv, NULL, NULL) != 0 ) + pcd, pcv, NULL, NULL) != 0 ) { fprintf(stderr, "Error getting perf counter: %d (%s)\n", errno, strerror(errno)); @@ -123,7 +179,7 @@ int main(int argc, char *argv[]) unlock_pages(pcd, sizeof(*pcd) * num_desc); unlock_pages(pcv, sizeof(*pcv) * num_val); - val = pcv; + val = pcv; for ( i = 0; i < num_desc; i++ ) { printf ("%-35s ", pcd[i].name); @@ -134,11 +190,37 @@ int main(int argc, char *argv[]) printf ("T=%10u ", (unsigned int)sum); if ( full || (pcd[i].nr_vals <= 4) ) - for ( j = 0; j < pcd[i].nr_vals; j++ ) - printf(" %10u", (unsigned int)val[j]); - - printf("\n"); - val += pcd[i].nr_vals; + { + if ( pretty && (strcmp(pcd[i].name, "hypercalls") == 0) ) + { + printf("\n"); + for( j = 0; j < pcd[i].nr_vals; j++ ) + { + if ( val[j] == 0 ) + continue; + if ( (j < 64) && hypercall_name_table[j] ) + strncpy(hypercall_name, hypercall_name_table[j], + sizeof(hypercall_name)); + else + sprintf(hypercall_name, "[%d]", j); + hypercall_name[sizeof(hypercall_name)-1]='\0'; + printf("%-35s ", hypercall_name); + printf("%12u\n", (unsigned int)val[j]); + } + } + else + { + for ( j = 0; j < pcd[i].nr_vals; j++ ) + printf(" %10u", (unsigned int)val[j]); + printf("\n"); + } + } + else + { + printf("\n"); + } + + val += pcd[i].nr_vals; } return 0; diff -r c10d4c6df482 -r f78e499dd669 tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Tue Nov 14 12:46:33 2006 -0700 +++ b/tools/python/xen/xend/XendAPI.py Tue Nov 14 14:59:37 2006 -0700 @@ -481,7 +481,8 @@ class XendAPI: def host_get_record(self, session, host_ref): node = XendNode.instance() dom = XendDomain.instance() - record = {'name_label': node.name, + record = {'uuid': node.uuid, + 'name_label': node.name, 'name_description': '', 'software_version': node.xen_version(), 'resident_VMs': dom.get_domain_refs(), diff -r c10d4c6df482 -r f78e499dd669 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Tue Nov 14 12:46:33 2006 -0700 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Nov 14 14:59:37 2006 -0700 @@ -520,7 +520,7 @@ class XendDomainInfo: self._waitForDevice(dev_type, devid) return self.getDeviceController(dev_type).sxpr(devid) - def device_configure(self, dev_config, devid): + def device_configure(self, dev_config, devid = None): """Configure an existing device. @param dev_config: device configuration diff -r c10d4c6df482 -r f78e499dd669 tools/python/xen/xend/XendStorageRepository.py --- a/tools/python/xen/xend/XendStorageRepository.py Tue Nov 14 12:46:33 2006 -0700 +++ b/tools/python/xen/xend/XendStorageRepository.py Tue Nov 14 14:59:37 2006 -0700 @@ -31,10 +31,8 @@ XEND_STORAGE_DIR = "/var/lib/xend/storag XEND_STORAGE_DIR = "/var/lib/xend/storage/" XEND_STORAGE_QCOW_FILENAME = "%s.qcow" XEND_STORAGE_VDICFG_FILENAME = "%s.vdi.xml" -DF_COMMAND = "df -lPk" QCOW_CREATE_COMMAND = "/usr/sbin/qcow-create %d %s" -KB = 1024 MB = 1024 *1024 class DeviceInvalidError(Exception): @@ -151,23 +149,6 @@ class XendStorageRepository: finally: self.lock.release() - def _get_df(self): - """Returns the output of 'df' in a dictionary where the keys - are the Linux device numbers, and the values are it's corresponding - free space in bytes - - @rtype: dictionary - """ - df = commands.getoutput(DF_COMMAND) - devnum_free = {} - for line in df.split('\n')[1:]: - words = line.split() - mount_point = words[-1] - dev_no = os.stat(mount_point).st_dev - free_kb = int(words[3]) - devnum_free[dev_no] = free_kb * KB - return devnum_free - def _get_free_space(self): """Returns the amount of free space in bytes available in the storage partition. Note that this may not be used if the storage repository @@ -175,12 +156,8 @@ class XendStorageRepository: @rtype: int """ - df = self._get_df() - devnum = os.stat(self.storage_dir).st_dev - if df.has_key(devnum): - return df[devnum] - raise DeviceInvalidError("Device not found for storage path: %s" % - self.storage_dir) + stfs = os.statvfs(self.storage_dir) + return stfs.f_bavail * stfs.f_frsize def _has_space_available_for(self, size_bytes): """Returns whether there is enough space for an image in the diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/hvm/svm/svm.c Tue Nov 14 14:59:37 2006 -0700 @@ -990,7 +990,7 @@ static void svm_vmexit_do_cpuid(struct v cpuid(input, &eax, &ebx, &ecx, &edx); if (input == 0x00000001 || input == 0x80000001 ) { - if ( !vlapic_global_enabled(vcpu_vlapic(v)) ) + if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) { /* Since the apic is disabled, avoid any confusion about SMP cpus being available */ diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/hvm/vlapic.c Tue Nov 14 14:59:37 2006 -0700 @@ -71,17 +71,22 @@ static unsigned int vlapic_lvt_mask[VLAP #define APIC_DEST_NOSHORT 0x0 #define APIC_DEST_MASK 0x800 -#define vlapic_lvt_enabled(vlapic, lvt_type) \ +#define vlapic_lvt_enabled(vlapic, lvt_type) \ (!(vlapic_get_reg(vlapic, lvt_type) & APIC_LVT_MASKED)) -#define vlapic_lvt_vector(vlapic, lvt_type) \ +#define vlapic_lvt_vector(vlapic, lvt_type) \ (vlapic_get_reg(vlapic, lvt_type) & APIC_VECTOR_MASK) -#define vlapic_lvt_dm(vlapic, lvt_type) \ +#define vlapic_lvt_dm(vlapic, lvt_type) \ (vlapic_get_reg(vlapic, lvt_type) & APIC_MODE_MASK) -#define vlapic_lvtt_period(vlapic) \ +#define vlapic_lvtt_period(vlapic) \ (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC) + +#define vlapic_base_address(vlapic) \ + (vlapic->apic_base_msr & MSR_IA32_APICBASE_BASE) + +static int vlapic_reset(struct vlapic *vlapic); /* * Generic APIC bitmap vector update & search routines. @@ -238,8 +243,7 @@ static int vlapic_match_dest(struct vcpu if ( dest_mode == 0 ) { /* Physical mode. */ - if ( (dest == 0xFF) || /* broadcast? */ - (GET_APIC_ID(vlapic_get_reg(target, APIC_ID)) == dest) ) + if ( (dest == 0xFF) || (dest == v->vcpu_id) ) result = 1; } else @@ -283,7 +287,7 @@ static int vlapic_accept_irq(struct vcpu case APIC_DM_FIXED: case APIC_DM_LOWEST: /* FIXME add logic for vcpu on reset */ - if ( unlikely(vlapic == NULL || !vlapic_enabled(vlapic)) ) + if ( unlikely(!vlapic_enabled(vlapic)) ) break; if ( vlapic_test_and_set_irr(vector, vlapic) && trig_mode ) @@ -319,7 +323,7 @@ static int vlapic_accept_irq(struct vcpu if ( trig_mode && !(level & APIC_INT_ASSERT) ) break; /* FIXME How to check the situation after vcpu reset? */ - if ( test_and_clear_bit(_VCPUF_initialised, &v->vcpu_flags) ) + if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) { gdprintk(XENLOG_ERR, "Reset hvm vcpu not supported yet\n"); goto exit_and_crash; @@ -371,21 +375,15 @@ struct vlapic *apic_round_robin( old = next = d->arch.hvm_domain.round_info[vector]; - /* the vcpu array is arranged according to vcpu_id */ do { if ( ++next == MAX_VIRT_CPUS ) next = 0; - if ( (d->vcpu[next] == NULL) || - !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) ) + if ( (d->vcpu[next] == NULL) || !test_bit(next, &bitmap) ) continue; - - if ( test_bit(next, &bitmap) ) - { - target = vcpu_vlapic(d->vcpu[next]); - if ( vlapic_enabled(target) ) - break; - target = NULL; - } + target = vcpu_vlapic(d->vcpu[next]); + if ( vlapic_enabled(target) ) + break; + target = NULL; } while ( next != old ); d->arch.hvm_domain.round_info[vector] = next; @@ -398,10 +396,9 @@ void vlapic_EOI_set(struct vlapic *vlapi { int vector = vlapic_find_highest_isr(vlapic); - /* Not every write EOI will has correpsoning ISR, - one example is when Kernel check timer on setup_IO_APIC */ + /* Some EOI writes may not have a matching to an in-service interrupt. */ if ( vector == -1 ) - return ; + return; vlapic_clear_vector(vector, vlapic->regs + APIC_ISR); @@ -538,7 +535,7 @@ static unsigned long vlapic_read(struct unsigned int tmp; unsigned long result; struct vlapic *vlapic = vcpu_vlapic(v); - unsigned int offset = address - vlapic->base_address; + unsigned int offset = address - vlapic_base_address(vlapic); if ( offset > APIC_TDCR ) return 0; @@ -588,7 +585,7 @@ static void vlapic_write(struct vcpu *v, unsigned long len, unsigned long val) { struct vlapic *vlapic = vcpu_vlapic(v); - unsigned int offset = address - vlapic->base_address; + unsigned int offset = address - vlapic_base_address(vlapic); if ( offset != 0xb0 ) HVM_DBG_LOG(DBG_LEVEL_VLAPIC, @@ -641,10 +638,6 @@ static void vlapic_write(struct vcpu *v, switch ( offset ) { - case APIC_ID: /* Local APIC ID */ - vlapic_set_reg(vlapic, APIC_ID, val); - break; - case APIC_TASKPRI: vlapic_set_reg(vlapic, APIC_TASKPRI, val & 0xff); vlapic->flush_tpr_threshold = 1; @@ -670,7 +663,7 @@ static void vlapic_write(struct vcpu *v, int i; uint32_t lvt_val; - vlapic->status |= VLAPIC_SOFTWARE_DISABLE_MASK; + vlapic->disabled |= VLAPIC_SW_DISABLED; for ( i = 0; i < VLAPIC_LVT_NUM; i++ ) { @@ -678,17 +671,11 @@ static void vlapic_write(struct vcpu *v, vlapic_set_reg(vlapic, APIC_LVTT + 0x10 * i, lvt_val | APIC_LVT_MASKED); } - - if ( (vlapic_get_reg(vlapic, APIC_LVT0) & APIC_MODE_MASK) - == APIC_DM_EXTINT ) - clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); } else { - vlapic->status &= ~VLAPIC_SOFTWARE_DISABLE_MASK; - if ( (vlapic_get_reg(vlapic, APIC_LVT0) & APIC_MODE_MASK) - == APIC_DM_EXTINT ) - set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + vlapic->disabled &= ~VLAPIC_SW_DISABLED; + vlapic->flush_tpr_threshold = 1; } break; @@ -712,26 +699,11 @@ static void vlapic_write(struct vcpu *v, case APIC_LVT0: /* LVT LINT0 Reg */ case APIC_LVT1: /* LVT Lint1 Reg */ case APIC_LVTERR: /* LVT Error Reg */ - { - if ( vlapic->status & VLAPIC_SOFTWARE_DISABLE_MASK ) + if ( vlapic_sw_disabled(vlapic) ) val |= APIC_LVT_MASKED; - val &= vlapic_lvt_mask[(offset - APIC_LVTT) >> 4]; - vlapic_set_reg(vlapic, offset, val); - - if ( (vlapic_vcpu(vlapic)->vcpu_id == 0) && (offset == APIC_LVT0) ) - { - if ( (val & APIC_MODE_MASK) == APIC_DM_EXTINT ) - if ( val & APIC_LVT_MASKED) - clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); - else - set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); - else - clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); - } - } - break; + break; case APIC_TMICT: { @@ -773,10 +745,8 @@ static int vlapic_range(struct vcpu *v, static int vlapic_range(struct vcpu *v, unsigned long addr) { struct vlapic *vlapic = vcpu_vlapic(v); - - return (vlapic_global_enabled(vlapic) && - (addr >= vlapic->base_address) && - (addr < vlapic->base_address + PAGE_SIZE)); + unsigned long offset = addr - vlapic_base_address(vlapic); + return (!vlapic_hw_disabled(vlapic) && (offset < PAGE_SIZE)); } struct hvm_mmio_handler vlapic_mmio_handler = { @@ -787,17 +757,23 @@ struct hvm_mmio_handler vlapic_mmio_hand void vlapic_msr_set(struct vlapic *vlapic, uint64_t value) { + if ( (vlapic->apic_base_msr ^ value) & MSR_IA32_APICBASE_ENABLE ) + { + if ( value & MSR_IA32_APICBASE_ENABLE ) + { + vlapic_reset(vlapic); + vlapic->disabled &= ~VLAPIC_HW_DISABLED; + } + else + { + vlapic->disabled |= VLAPIC_HW_DISABLED; + } + } + vlapic->apic_base_msr = value; - vlapic->base_address = vlapic->apic_base_msr & MSR_IA32_APICBASE_BASE; - - if ( !(value & MSR_IA32_APICBASE_ENABLE) ) - set_bit(_VLAPIC_GLOB_DISABLE, &vlapic->status ); - else - clear_bit(_VLAPIC_GLOB_DISABLE, &vlapic->status); HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "apic base msr is 0x%016"PRIx64", and base address is 0x%lx.", - vlapic->apic_base_msr, vlapic->base_address); + "apic base msr is 0x%016"PRIx64".", vlapic->apic_base_msr); } void vlapic_timer_fn(void *data) @@ -845,8 +821,15 @@ int vlapic_accept_pic_intr(struct vcpu * int vlapic_accept_pic_intr(struct vcpu *v) { struct vlapic *vlapic = vcpu_vlapic(v); - - return vlapic ? test_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status) : 1; + uint32_t lvt0 = vlapic_get_reg(vlapic, APIC_LVT0); + + /* + * Only CPU0 is wired to the 8259A. INTA cycles occur if LINT0 is set up + * accept ExtInts, or if the LAPIC is disabled (so LINT0 behaves as INTR). + */ + return ((v->vcpu_id == 0) && + (((lvt0 & (APIC_MODE_MASK|APIC_LVT_MASKED)) == APIC_DM_EXTINT) || + vlapic_hw_disabled(vlapic))); } int cpu_get_apic_interrupt(struct vcpu *v, int *mode) @@ -854,7 +837,7 @@ int cpu_get_apic_interrupt(struct vcpu * struct vlapic *vlapic = vcpu_vlapic(v); int highest_irr; - if ( !vlapic || !vlapic_enabled(vlapic) ) + if ( !vlapic_enabled(vlapic) ) return -1; highest_irr = vlapic_find_highest_irr(vlapic); @@ -886,9 +869,6 @@ void vlapic_post_injection(struct vcpu * void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) { struct vlapic *vlapic = vcpu_vlapic(v); - - if ( unlikely(vlapic == NULL) ) - return; switch ( deliver_mode ) { @@ -920,36 +900,38 @@ void vlapic_post_injection(struct vcpu * } } +/* Reset the VLPAIC back to its power-on/reset state. */ static int vlapic_reset(struct vlapic *vlapic) { struct vcpu *v = vlapic_vcpu(vlapic); int i; - vlapic_set_reg(vlapic, APIC_ID, v->vcpu_id << 24); - + vlapic_set_reg(vlapic, APIC_ID, v->vcpu_id << 24); vlapic_set_reg(vlapic, APIC_LVR, VLAPIC_VERSION); + + for ( i = 0; i < 8; i++ ) + { + vlapic_set_reg(vlapic, APIC_IRR + 0x10 * i, 0); + vlapic_set_reg(vlapic, APIC_ISR + 0x10 * i, 0); + vlapic_set_reg(vlapic, APIC_TMR + 0x10 * i, 0); + } + vlapic_set_reg(vlapic, APIC_ICR, 0); + vlapic_set_reg(vlapic, APIC_ICR2, 0); + vlapic_set_reg(vlapic, APIC_LDR, 0); + vlapic_set_reg(vlapic, APIC_TASKPRI, 0); + vlapic_set_reg(vlapic, APIC_TMICT, 0); + vlapic_set_reg(vlapic, APIC_TMCCT, 0); + vlapic_set_tdcr(vlapic, 0); + + vlapic_set_reg(vlapic, APIC_DFR, 0xffffffffU); for ( i = 0; i < VLAPIC_LVT_NUM; i++ ) vlapic_set_reg(vlapic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); - vlapic_set_reg(vlapic, APIC_DFR, 0xffffffffU); - vlapic_set_reg(vlapic, APIC_SPIV, 0xff); - - vlapic->apic_base_msr = MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - - vlapic->flush_tpr_threshold = 0; - - vlapic_set_tdcr(vlapic, 0); - - vlapic->base_address = vlapic->apic_base_msr & - MSR_IA32_APICBASE_BASE; - - HVM_DBG_LOG(DBG_LEVEL_VLAPIC, - "vcpu=%p, id=%d, vlapic_apic_base_msr=0x%016"PRIx64", " - "base_address=0x%0lx.", - v, GET_APIC_ID(vlapic_get_reg(vlapic, APIC_ID)), - vlapic->apic_base_msr, vlapic->base_address); + vlapic->disabled |= VLAPIC_SW_DISABLED; + + vlapic->flush_tpr_threshold = 1; return 1; } @@ -974,6 +956,7 @@ int vlapic_init(struct vcpu *v) vlapic_reset(vlapic); + vlapic->apic_base_msr = MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; if ( v->vcpu_id == 0 ) vlapic->apic_base_msr |= MSR_IA32_APICBASE_BSP; @@ -986,7 +969,6 @@ int vlapic_init(struct vcpu *v) { vlapic_set_reg(vlapic, APIC_LVT0, APIC_MODE_EXTINT << 8); vlapic_set_reg(vlapic, APIC_LVT1, APIC_MODE_NMI << 8); - set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); } #endif diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/hvm/vmx/io.c --- a/xen/arch/x86/hvm/vmx/io.c Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/hvm/vmx/io.c Tue Nov 14 14:59:37 2006 -0700 @@ -69,20 +69,21 @@ static inline int is_interruptibility_st #ifdef __x86_64__ static void update_tpr_threshold(struct vlapic *vlapic) { - int highest_irr, tpr; + int max_irr, tpr; /* Clear the work-to-do flag /then/ do the work. */ vlapic->flush_tpr_threshold = 0; mb(); - highest_irr = vlapic_find_highest_irr(vlapic); + if ( !vlapic_enabled(vlapic) || + ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) ) + { + __vmwrite(TPR_THRESHOLD, 0); + return; + } + tpr = vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xF0; - - if ( highest_irr == -1 ) - __vmwrite(TPR_THRESHOLD, 0); - else - __vmwrite(TPR_THRESHOLD, - (highest_irr > tpr) ? (tpr >> 4) : (highest_irr >> 4)); + __vmwrite(TPR_THRESHOLD, (max_irr > tpr) ? (tpr >> 4) : (max_irr >> 4)); } #else #define update_tpr_threshold(v) ((void)0) @@ -115,7 +116,7 @@ asmlinkage void vmx_intr_assist(void) pic_set_xen_irq(pic, callback_irq, local_events_need_delivery()); } - if ( vlapic_enabled(vlapic) && vlapic->flush_tpr_threshold ) + if ( vlapic->flush_tpr_threshold ) update_tpr_threshold(vlapic); has_ext_irq = cpu_has_pending_irq(v); diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Nov 14 14:59:37 2006 -0700 @@ -853,7 +853,7 @@ static void vmx_do_cpuid(struct cpu_user /* Mask off reserved bits. */ ecx &= ~VMX_VCPU_CPUID_L1_ECX_RESERVED; - if ( !vlapic_global_enabled(vcpu_vlapic(v)) ) + if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) clear_bit(X86_FEATURE_APIC, &edx); #if CONFIG_PAGING_LEVELS >= 3 diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/mm/shadow/common.c Tue Nov 14 14:59:37 2006 -0700 @@ -1953,9 +1953,11 @@ int shadow_remove_write_access(struct vc case 3: GUESS(0x70381C00000UL + (fault_addr >> 27), 3); break; } - /* Linux direct map at 0xffff810000000000 */ + /* 64bit Linux direct map at 0xffff810000000000; older kernels + * had it at 0x0000010000000000UL */ gfn = sh_mfn_to_gfn(v->domain, gmfn); GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4); + GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4); } #endif /* CONFIG_PAGING_LEVELS >= 4 */ #endif /* CONFIG_PAGING_LEVELS >= 3 */ diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/mm/shadow/multi.c Tue Nov 14 14:59:37 2006 -0700 @@ -2623,6 +2623,10 @@ static int sh_page_fault(struct vcpu *v, * Fall through to the normal fault handing logic */ perfc_incrc(shadow_fault_fast_fail); SHADOW_PRINTK("fast path false alarm!\n"); + /* Don't pass the reserved-bit bit: if we look at the fault + * below and decide to pass it to the guest, the reserved-bit + * bit won't make sense there. */ + regs->error_code &= ~PFEC_reserved_bit; } } #endif /* SHOPT_FAST_FAULT_PATH */ @@ -3266,8 +3270,9 @@ sh_set_toplevel_shadow(struct vcpu *v, } else { - /* This guest MFN is a pagetable. Must revoke write access. */ - if ( shadow_remove_write_access(v, gmfn, GUEST_PAGING_LEVELS, 0) != 0 ) + /* This guest MFN is a pagetable. Must revoke write access + * (and can't use heuristics because we have no linear map here). */ + if ( shadow_remove_write_access(v, gmfn, 0, 0) != 0 ) flush_tlb_mask(v->domain->domain_dirty_cpumask); /* Make sure there's enough free shadow memory. */ shadow_prealloc(d, SHADOW_MAX_ORDER); @@ -3773,7 +3778,7 @@ sh_x86_emulate_write(struct vcpu *v, uns shadow_validate_guest_pt_write(v, mfn, addr, bytes_on_page); bytes -= bytes_on_page; /* If we are writing zeros to this page, might want to unshadow */ - if ( *(u8 *)addr == 0 ) + if ( likely(bytes_on_page >= 4) && (*(u32 *)addr == 0) ) check_for_early_unshadow(v, mfn); sh_unmap_domain_page(addr); } @@ -3818,7 +3823,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u vaddr, prev, old, new, *(unsigned long *)addr, bytes); /* If we are writing zeros to this page, might want to unshadow */ - if ( *(u8 *)addr == 0 ) + if ( likely(bytes >= 4) && (*(u32 *)addr == 0) ) check_for_early_unshadow(v, mfn); sh_unmap_domain_page(addr); @@ -3853,7 +3858,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, rv = X86EMUL_CMPXCHG_FAILED; /* If we are writing zeros to this page, might want to unshadow */ - if ( *(u8 *)addr == 0 ) + if ( *(u32 *)addr == 0 ) check_for_early_unshadow(v, mfn); sh_unmap_domain_page(addr); diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/traps.c Tue Nov 14 14:59:37 2006 -0700 @@ -985,8 +985,7 @@ static inline int admin_io_okay( return ioports_access_permitted(v->domain, port, port + bytes - 1); } -/* Check admin limits. Silently fail the access if it is disallowed. */ -static inline unsigned char inb_user( +static inline int guest_inb_okay( unsigned int port, struct vcpu *v, struct cpu_user_regs *regs) { /* @@ -996,19 +995,21 @@ static inline unsigned char inb_user( * Note that we could emulate bit 4 instead of directly reading port 0x61, * but there's not really a good reason to do so. */ - if ( admin_io_okay(port, 1, v, regs) || (port == 0x61) ) - return inb(port); - return ~0; -} -//#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0) -#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0) -#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0) -#define outb_user(_v, _p, _d, _r) \ - (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0)) -#define outw_user(_v, _p, _d, _r) \ - (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0)) -#define outl_user(_v, _p, _d, _r) \ - (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0)) + return (admin_io_okay(port, 1, v, regs) || (port == 0x61)); +} +#define guest_inw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r) +#define guest_inl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r) +#define guest_outb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r) +#define guest_outw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r) +#define guest_outl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r) + +/* I/O emulation support. Helper routines for, and type of, the stack stub.*/ +void host_to_guest_gpr_switch(struct cpu_user_regs *) + __attribute__((__regparm__(1))); +unsigned long guest_to_host_gpr_switch(unsigned long) + __attribute__((__regparm__(1))); +typedef unsigned long (*io_emul_stub_t)(struct cpu_user_regs *) + __attribute__((__regparm__(1))); /* Instruction fetch with error handling. */ #define insn_fetch(_type, _size, cs, eip) \ @@ -1028,6 +1029,7 @@ static int emulate_privileged_op(struct unsigned long *reg, eip = regs->eip, cs = regs->cs, res; u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0; unsigned int port, i, op_bytes = 4, data, rc; + char io_emul_stub[16]; u32 l, h; /* Legacy prefixes. */ @@ -1068,6 +1070,9 @@ static int emulate_privileged_op(struct opcode = insn_fetch(u8, 1, cs, eip); } #endif + + if ( opcode == 0x0f ) + goto twobyte_opcode; /* Input/Output String instructions. */ if ( (opcode >= 0x6c) && (opcode <= 0x6f) ) @@ -1083,16 +1088,17 @@ static int emulate_privileged_op(struct case 0x6d: /* INSW/INSL */ if ( !guest_io_okay((u16)regs->edx, op_bytes, v, regs) ) goto fail; + port = (u16)regs->edx; switch ( op_bytes ) { case 1: - data = (u8)inb_user((u16)regs->edx, v, regs); + data = (u8)(guest_inb_okay(port, v, regs) ? inb(port) : ~0); break; case 2: - data = (u16)inw_user((u16)regs->edx, v, regs); + data = (u16)(guest_inw_okay(port, v, regs) ? inw(port) : ~0); break; case 4: - data = (u32)inl_user((u16)regs->edx, v, regs); + data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0); break; } if ( (rc = copy_to_user((void *)regs->edi, &data, op_bytes)) != 0 ) @@ -1115,16 +1121,20 @@ static int emulate_privileged_op(struct propagate_page_fault(regs->esi + op_bytes - rc, 0); return EXCRET_fault_fixed; } + port = (u16)regs->edx; switch ( op_bytes ) { case 1: - outb_user((u8)data, (u16)regs->edx, v, regs); + if ( guest_outb_okay(port, v, regs) ) + outb((u8)data, port); break; case 2: - outw_user((u16)data, (u16)regs->edx, v, regs); + if ( guest_outw_okay(port, v, regs) ) + outw((u16)data, port); break; case 4: - outl_user((u32)data, (u16)regs->edx, v, regs); + if ( guest_outl_okay(port, v, regs) ) + outl((u32)data, port); break; } regs->esi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes); @@ -1140,6 +1150,27 @@ static int emulate_privileged_op(struct goto done; } + + /* + * Very likely to be an I/O instruction (IN/OUT). + * Build an on-stack stub to execute the instruction with full guest + * GPR context. This is needed for some systems which (ab)use IN/OUT + * to communicate with BIOS code in system-management mode. + */ + /* call host_to_guest_gpr_switch */ + io_emul_stub[0] = 0xe8; + *(s32 *)&io_emul_stub[1] = + (char *)host_to_guest_gpr_switch - &io_emul_stub[5]; + /* data16 or nop */ + io_emul_stub[5] = (op_bytes != 2) ? 0x90 : 0x66; + /* <io-access opcode> */ + io_emul_stub[6] = opcode; + /* imm8 or nop */ + io_emul_stub[7] = 0x90; + /* jmp guest_to_host_gpr_switch */ + io_emul_stub[8] = 0xe9; + *(s32 *)&io_emul_stub[9] = + (char *)guest_to_host_gpr_switch - &io_emul_stub[13]; /* I/O Port and Interrupt Flag instructions. */ switch ( opcode ) @@ -1148,21 +1179,31 @@ static int emulate_privileged_op(struct op_bytes = 1; case 0xe5: /* IN imm8,%eax */ port = insn_fetch(u8, 1, cs, eip); + io_emul_stub[7] = port; /* imm8 */ exec_in: if ( !guest_io_okay(port, op_bytes, v, regs) ) goto fail; switch ( op_bytes ) { case 1: - regs->eax &= ~0xffUL; - regs->eax |= (u8)inb_user(port, v, regs); + res = regs->eax & ~0xffUL; + if ( guest_inb_okay(port, v, regs) ) + regs->eax = res | (u8)((io_emul_stub_t)io_emul_stub)(regs); + else + regs->eax = res | (u8)~0; break; case 2: - regs->eax &= ~0xffffUL; - regs->eax |= (u16)inw_user(port, v, regs); + res = regs->eax & ~0xffffUL; + if ( guest_inw_okay(port, v, regs) ) + regs->eax = res | (u16)((io_emul_stub_t)io_emul_stub)(regs); + else + regs->eax = res | (u16)~0; break; case 4: - regs->eax = (u32)inl_user(port, v, regs); + if ( guest_inl_okay(port, v, regs) ) + regs->eax = (u32)((io_emul_stub_t)io_emul_stub)(regs); + else + regs->eax = (u32)~0; break; } goto done; @@ -1177,19 +1218,23 @@ static int emulate_privileged_op(struct op_bytes = 1; case 0xe7: /* OUT %eax,imm8 */ port = insn_fetch(u8, 1, cs, eip); + io_emul_stub[7] = port; /* imm8 */ exec_out: if ( !guest_io_okay(port, op_bytes, v, regs) ) goto fail; switch ( op_bytes ) { case 1: - outb_user((u8)regs->eax, port, v, regs); + if ( guest_outb_okay(port, v, regs) ) + ((io_emul_stub_t)io_emul_stub)(regs); break; case 2: - outw_user((u16)regs->eax, port, v, regs); + if ( guest_outw_okay(port, v, regs) ) + ((io_emul_stub_t)io_emul_stub)(regs); break; case 4: - outl_user((u32)regs->eax, port, v, regs); + if ( guest_outl_okay(port, v, regs) ) + ((io_emul_stub_t)io_emul_stub)(regs); break; } goto done; @@ -1212,15 +1257,13 @@ static int emulate_privileged_op(struct */ /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/ goto done; - - case 0x0f: /* Two-byte opcode */ - break; - - default: - goto fail; - } - - /* Remaining instructions only emulated from guest kernel. */ + } + + /* No decode of this single-byte opcode. */ + goto fail; + + twobyte_opcode: + /* Two-byte opcodes only emulated from guest kernel. */ if ( !guest_kernel_mode(v, regs) ) goto fail; diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/x86_32/Makefile --- a/xen/arch/x86/x86_32/Makefile Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/x86_32/Makefile Tue Nov 14 14:59:37 2006 -0700 @@ -1,5 +1,6 @@ obj-y += domain_page.o obj-y += domain_page.o obj-y += entry.o +obj-y += gpr_switch.o obj-y += mm.o obj-y += seg_fixup.o obj-y += traps.o diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/x86_64/Makefile --- a/xen/arch/x86/x86_64/Makefile Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/arch/x86/x86_64/Makefile Tue Nov 14 14:59:37 2006 -0700 @@ -1,3 +1,4 @@ obj-y += entry.o obj-y += entry.o +obj-y += gpr_switch.o obj-y += mm.o obj-y += traps.o diff -r c10d4c6df482 -r f78e499dd669 xen/include/asm-x86/hvm/vlapic.h --- a/xen/include/asm-x86/hvm/vlapic.h Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/include/asm-x86/hvm/vlapic.h Tue Nov 14 14:59:37 2006 -0700 @@ -33,22 +33,23 @@ #define VLAPIC_ID(vlapic) \ (GET_APIC_ID(vlapic_get_reg(vlapic, APIC_ID))) -#define _VLAPIC_GLOB_DISABLE 0x0 -#define VLAPIC_GLOB_DISABLE_MASK 0x1 -#define VLAPIC_SOFTWARE_DISABLE_MASK 0x2 -#define _VLAPIC_BSP_ACCEPT_PIC 0x3 - -#define vlapic_enabled(vlapic) \ - (!((vlapic)->status & \ - (VLAPIC_GLOB_DISABLE_MASK | VLAPIC_SOFTWARE_DISABLE_MASK))) - -#define vlapic_global_enabled(vlapic) \ - (!(test_bit(_VLAPIC_GLOB_DISABLE, &(vlapic)->status))) +/* + * APIC can be disabled in two ways: + * 1. 'Hardware disable': via IA32_APIC_BASE_MSR[11] + * CPU should behave as if it does not have an APIC. + * 2. 'Software disable': via APIC_SPIV[8]. + * APIC is visible but does not respond to interrupt messages. + */ +#define VLAPIC_HW_DISABLED 0x1 +#define VLAPIC_SW_DISABLED 0x2 +#define vlapic_sw_disabled(vlapic) ((vlapic)->disabled & VLAPIC_SW_DISABLED) +#define vlapic_hw_disabled(vlapic) ((vlapic)->disabled & VLAPIC_HW_DISABLED) +#define vlapic_disabled(vlapic) ((vlapic)->disabled) +#define vlapic_enabled(vlapic) (!vlapic_disabled(vlapic)) struct vlapic { - uint32_t status; uint64_t apic_base_msr; - unsigned long base_address; + uint32_t disabled; /* VLAPIC_xx_DISABLED */ uint32_t timer_divisor; struct timer vlapic_timer; int timer_pending_count; diff -r c10d4c6df482 -r f78e499dd669 xen/include/public/arch-powerpc.h --- a/xen/include/public/arch-powerpc.h Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/include/public/arch-powerpc.h Tue Nov 14 14:59:37 2006 -0700 @@ -1,17 +1,21 @@ /* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. * * Copyright (C) IBM Corp. 2005, 2006 * diff -r c10d4c6df482 -r f78e499dd669 xen/include/public/xencomm.h --- a/xen/include/public/xencomm.h Tue Nov 14 12:46:33 2006 -0700 +++ b/xen/include/public/xencomm.h Tue Nov 14 14:59:37 2006 -0700 @@ -1,19 +1,23 @@ /* - * Copyright (C) 2006 Hollis Blanchard <hollisb@xxxxxxxxxx>, IBM Corporation + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2006 */ #ifndef _XEN_XENCOMM_H_ diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/x86_32/gpr_switch.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_32/gpr_switch.S Tue Nov 14 14:59:37 2006 -0700 @@ -0,0 +1,43 @@ +/* + * GPR context switch between host and guest. + * Used by IO-port-access emulation stub. + * + * Copyright (c) 2006, Novell, Inc. + */ + +#include <xen/config.h> +#include <asm/asm_defns.h> + +ENTRY(host_to_guest_gpr_switch) + movl (%esp), %ecx + movl %eax, (%esp) + movl UREGS_edx(%eax), %edx + pushl %ebx + movl UREGS_ebx(%eax), %ebx + pushl %ebp + movl UREGS_ebp(%eax), %ebp + pushl %esi + movl UREGS_esi(%eax), %esi + pushl %edi + movl UREGS_edi(%eax), %edi + pushl %ecx + movl UREGS_ecx(%eax), %ecx + movl UREGS_eax(%eax), %eax + ret + +ENTRY(guest_to_host_gpr_switch) + pushl %edx + movl 5*4(%esp), %edx + movl %eax, UREGS_eax(%edx) + popl UREGS_edx(%edx) + movl %edi, UREGS_edi(%edx) + popl %edi + movl %esi, UREGS_esi(%edx) + popl %esi + movl %ebp, UREGS_ebp(%edx) + popl %ebp + movl %ebx, UREGS_ebx(%edx) + popl %ebx + movl %ecx, UREGS_ecx(%edx) + popl %ecx + ret diff -r c10d4c6df482 -r f78e499dd669 xen/arch/x86/x86_64/gpr_switch.S --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/gpr_switch.S Tue Nov 14 14:59:37 2006 -0700 @@ -0,0 +1,63 @@ +/* + * GPR context switch between host and guest. + * Used by IO-port-access emulation stub. + * + * Copyright (c) 2006, Novell, Inc. + */ + +#include <xen/config.h> +#include <asm/asm_defns.h> + +ENTRY(host_to_guest_gpr_switch) + movq (%rsp), %rcx + movq %rdi, (%rsp) + movq UREGS_rdx(%rdi), %rdx + pushq %rbx + movq UREGS_rax(%rdi), %rax + movq UREGS_rbx(%rdi), %rbx + pushq %rbp + movq UREGS_rsi(%rdi), %rsi + movq UREGS_rbp(%rdi), %rbp + pushq %r12 + movq UREGS_r8(%rdi), %r8 + movq UREGS_r12(%rdi), %r12 + pushq %r13 + movq UREGS_r9(%rdi), %r9 + movq UREGS_r13(%rdi), %r13 + pushq %r14 + movq UREGS_r10(%rdi), %r10 + movq UREGS_r14(%rdi), %r14 + pushq %r15 + movq UREGS_r11(%rdi), %r11 + movq UREGS_r15(%rdi), %r15 + pushq %rcx + movq UREGS_rcx(%rdi), %rcx + movq UREGS_rdi(%rdi), %rdi + ret + +ENTRY(guest_to_host_gpr_switch) + pushq %rdi + movq 7*8(%rsp), %rdi + movq %rax, UREGS_rax(%rdi) + popq UREGS_rdi(%rdi) + movq %r15, UREGS_r15(%rdi) + movq %r11, UREGS_r11(%rdi) + popq %r15 + movq %r14, UREGS_r14(%rdi) + movq %r10, UREGS_r10(%rdi) + popq %r14 + movq %r13, UREGS_r13(%rdi) + movq %r9, UREGS_r9(%rdi) + popq %r13 + movq %r12, UREGS_r12(%rdi) + movq %r8, UREGS_r8(%rdi) + popq %r12 + movq %rbp, UREGS_rbp(%rdi) + movq %rsi, UREGS_rsi(%rdi) + popq %rbp + movq %rbx, UREGS_rbx(%rdi) + movq %rdx, UREGS_rdx(%rdi) + popq %rbx + movq %rcx, UREGS_rcx(%rdi) + popq %rcx + ret _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |