diff -r f2cf898c7ff8 tools/debugger/gdbsx/xg/xg_main.c --- a/tools/debugger/gdbsx/xg/xg_main.c Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/debugger/gdbsx/xg/xg_main.c Thu Nov 17 15:37:30 2011 -0800 @@ -80,6 +80,7 @@ int xgtrc_on = 0; struct xen_domctl domctl; /* just use a global domctl */ static int _hvm_guest; /* hvm guest? 32bit HVMs have 64bit context */ +static int _hybrid_guest; static domid_t _dom_id; /* guest domid */ static int _max_vcpu_id; /* thus max_vcpu_id+1 VCPUs */ static int _dom0_fd; /* fd of /dev/privcmd */ @@ -308,6 +309,8 @@ xg_attach(int domid, int guest_bitness) _max_vcpu_id = domctl.u.getdomaininfo.max_vcpu_id; _hvm_guest = (domctl.u.getdomaininfo.flags & XEN_DOMINF_hvm_guest); + _hybrid_guest = (domctl.u.getdomaininfo.flags & XEN_DOMINF_hybrid_guest); + return _max_vcpu_id; } @@ -368,7 +371,7 @@ _change_TF(vcpuid_t which_vcpu, int gues int sz = sizeof(anyc); /* first try the MTF for hvm guest. otherwise do manually */ - if (_hvm_guest) { + if (_hvm_guest || _hybrid_guest) { domctl.u.debug_op.vcpu = which_vcpu; domctl.u.debug_op.op = setit ? XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON : XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF; diff -r f2cf898c7ff8 tools/libxc/Makefile --- a/tools/libxc/Makefile Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxc/Makefile Thu Nov 17 15:37:30 2011 -0800 @@ -58,7 +58,7 @@ GUEST_SRCS-$(CONFIG_IA64) += xc_dom_i -include $(XEN_TARGET_ARCH)/Makefile CFLAGS += -Werror -Wmissing-prototypes -CFLAGS += $(INCLUDES) -I. -I../xenstore -I../include +CFLAGS += $(INCLUDES) -I. -I../xenstore -I../include -g -O0 # Needed for posix_fadvise64() in xc_linux.c CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE diff -r f2cf898c7ff8 tools/libxc/xc_dom.h --- a/tools/libxc/xc_dom.h Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxc/xc_dom.h Thu Nov 17 15:37:30 2011 -0800 @@ -110,6 +110,9 @@ struct xc_dom_image { struct xc_dom_arch *arch_hooks; /* allocate up to virt_alloc_end */ int (*allocate) (struct xc_dom_image * dom, xen_vaddr_t up_to); + + /* hybrid flags */ + char hybrid_hap; }; /* --- pluggable kernel loader ------------------------------------- */ @@ -241,7 +244,8 @@ static inline void *xc_dom_vaddr_to_ptr( static inline int xc_dom_feature_translated(struct xc_dom_image *dom) { - return elf_xen_feature_get(XENFEAT_auto_translated_physmap, dom->f_active); + return(dom->hybrid_hap || + elf_xen_feature_get(XENFEAT_auto_translated_physmap, dom->f_active)); } static inline xen_pfn_t xc_dom_p2m_host(struct xc_dom_image *dom, xen_pfn_t pfn) diff -r f2cf898c7ff8 tools/libxc/xc_dom_x86.c --- a/tools/libxc/xc_dom_x86.c Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxc/xc_dom_x86.c Thu Nov 17 15:37:30 2011 -0800 @@ -372,7 +372,8 @@ static int setup_pgtables_x86_64(struct pgpfn = (addr - dom->parms.virt_base) >> PAGE_SHIFT_X86; l1tab[l1off] = pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT; - if ( (addr >= dom->pgtables_seg.vstart) && + if ( !dom->hybrid_hap && + (addr >= dom->pgtables_seg.vstart) && (addr < dom->pgtables_seg.vend) ) l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */ if ( l1off == (L1_PAGETABLE_ENTRIES_X86_64 - 1) ) @@ -819,7 +820,7 @@ int arch_setup_bootlate(struct xc_dom_im } /* Map grant table frames into guest physmap. */ - for ( i = 0; ; i++ ) + for ( i = 0; !dom->hybrid_hap ; i++ ) { xatp.domid = dom->guest_domid; xatp.space = XENMAPSPACE_grant_table; diff -r f2cf898c7ff8 tools/libxl/Makefile --- a/tools/libxl/Makefile Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxl/Makefile Thu Nov 17 15:37:30 2011 -0800 @@ -12,7 +12,7 @@ XLUMAJOR = 1.0 XLUMINOR = 0 CFLAGS += -Werror -CFLAGS += -I. -fPIC +CFLAGS += -I. -fPIC -g -O0 CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore) LIBS = $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore) diff -r f2cf898c7ff8 tools/libxl/libxl.c --- a/tools/libxl/libxl.c Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxl/libxl.c Thu Nov 17 15:37:30 2011 -0800 @@ -99,6 +99,7 @@ int libxl_domain_make(struct libxl_ctx * if (!uuid_string) return ERROR_NOMEM; flags = info->hvm ? XEN_DOMCTL_CDF_hvm_guest : 0; + flags |= info->hybrid ? XEN_DOMCTL_CDF_hybrid_guest : 0; flags |= info->hap ? XEN_DOMCTL_CDF_hap : 0; flags |= info->oos ? 0 : XEN_DOMCTL_CDF_oos_off; *domid = -1; diff -r f2cf898c7ff8 tools/libxl/libxl.h --- a/tools/libxl/libxl.h Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxl/libxl.h Thu Nov 17 15:37:30 2011 -0800 @@ -78,6 +78,7 @@ const libxl_version_info* libxl_get_vers typedef struct { bool hvm; + bool hybrid; bool hap; bool oos; int ssidref; @@ -97,6 +98,8 @@ typedef struct { uint32_t shadow_memkb; const char *kernel; int hvm; + int hybrid; + int hybrid_hap; union { struct { bool pae; diff -r f2cf898c7ff8 tools/libxl/libxl_dom.c --- a/tools/libxl/libxl_dom.c Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxl/libxl_dom.c Thu Nov 17 15:37:30 2011 -0800 @@ -69,7 +69,7 @@ int build_pre(struct libxl_ctx *ctx, uin (info->max_memkb + info->u.pv.slack_memkb)); xc_domain_set_tsc_info(ctx->xch, domid, info->tsc_mode, 0, 0, 0); - if (info->hvm) { + if (info->hvm || info->hybrid) { unsigned long shadow; shadow = (info->shadow_memkb + 1023) / 1024; xc_shadow_control(ctx->xch, domid, XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION, NULL, 0, &shadow, 0, NULL); @@ -139,13 +139,16 @@ int build_pv(struct libxl_ctx *ctx, uint { struct xc_dom_image *dom; int ret; - int flags = 0; + int flags; /* start info flags: start_info_x86_64() */ + + flags = info->hybrid ? SIF_IS_HYBRID : 0; dom = xc_dom_allocate(info->u.pv.cmdline, info->u.pv.features); if (!dom) { XL_LOG_ERRNO(ctx, XL_LOG_ERROR, "xc_dom_allocate failed"); return -1; } + dom->hybrid_hap = info->hybrid_hap ? 1 : 0; ret = xc_dom_linux_build(ctx->xch, dom, domid, info->target_memkb / 1024, info->kernel, info->u.pv.ramdisk, flags, state->store_port, &state->store_mfn, diff -r f2cf898c7ff8 tools/libxl/xl_cmdimpl.c --- a/tools/libxl/xl_cmdimpl.c Fri Jul 15 23:21:24 2011 +0000 +++ b/tools/libxl/xl_cmdimpl.c Thu Nov 17 15:37:30 2011 -0800 @@ -197,6 +197,11 @@ static void init_build_info(libxl_domain } else { b_info->u.pv.slack_memkb = 8 * 1024; } + if (c_info->hybrid) { + b_info->hybrid = 1; + if (c_info->hap) + b_info->hybrid_hap = 1; + } } static void init_dm_info(libxl_device_model_info *dm_info, @@ -469,6 +474,11 @@ static void parse_config_data(const char !strncmp(buf, "hvm", strlen(buf))) c_info->hvm = 1; + c_info->hybrid = 0; + if (!xlu_cfg_get_long (config, "hybrid", &l)) + c_info->hybrid = 1; + + c_info->hap = 0; if (!xlu_cfg_get_long (config, "hap", &l)) c_info->hap = l; diff -r f2cf898c7ff8 xen/arch/x86/debug.c --- a/xen/arch/x86/debug.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/debug.c Thu Nov 17 15:37:30 2011 -0800 @@ -43,7 +43,6 @@ extern void kdbp(const char *fmt, ...); typedef unsigned long dbgva_t; typedef unsigned char dbgbyte_t; - /* Returns: mfn for the given (hvm guest) vaddr */ static unsigned long dbg_hvm_va2mfn(dbgva_t vaddr, struct domain *dp, int toaddr) @@ -55,6 +54,7 @@ dbg_hvm_va2mfn(dbgva_t vaddr, struct dom DBGP2("vaddr:%lx domid:%d\n", vaddr, dp->domain_id); gfn = paging_gva_to_gfn(dp->vcpu[0], vaddr, &pfec); + if ( gfn == INVALID_GFN ) { DBGP2("kdb:bad gfn from gva_to_gfn\n"); @@ -200,7 +200,7 @@ dbg_rw_guest_mem(dbgva_t addr, dbgbyte_t pagecnt = min_t(long, PAGE_SIZE - (addr & ~PAGE_MASK), len); - mfn = (dp->is_hvm + mfn = ( (is_hvm_domain(dp) || is_hyb_hap_domain(dp)) ? dbg_hvm_va2mfn(addr, dp, toaddr) : dbg_pv_va2mfn(addr, dp, pgd3)); if ( mfn == INVALID_MFN ) @@ -225,7 +225,6 @@ dbg_rw_guest_mem(dbgva_t addr, dbgbyte_t buf += pagecnt; len -= pagecnt; } - return len; } diff -r f2cf898c7ff8 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/domain.c Thu Nov 17 15:37:30 2011 -0800 @@ -160,6 +160,7 @@ void dump_pageframe_info(struct domain * spin_unlock(&d->page_alloc_lock); } + KASSERT(!is_hybrid_domain(d) ); if ( is_hvm_domain(d) ) { p2m_pod_dump_data(d); @@ -354,7 +355,7 @@ int vcpu_initialise(struct vcpu *v) paging_vcpu_init(v); - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) { if ( (rc = hvm_vcpu_initialise(v)) != 0 ) return rc; @@ -410,7 +411,7 @@ int arch_domain_create(struct domain *d, int rc = -ENOMEM; d->arch.hvm_domain.hap_enabled = - is_hvm_domain(d) && + (is_hvm_or_hyb_domain(d)) && hvm_funcs.hap_supported && (domcr_flags & DOMCRF_hap); d->arch.hvm_domain.mem_sharing_enabled = 0; @@ -508,7 +509,7 @@ int arch_domain_create(struct domain *d, mce_init_msr(d); } - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) { if ( (rc = hvm_domain_initialise(d)) != 0 ) { @@ -562,7 +563,7 @@ void arch_domain_destroy(struct domain * unsigned int i; #endif - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) hvm_domain_destroy(d); pci_release_devices(d); @@ -608,6 +609,8 @@ unsigned long pv_guest_cr4_fixup(unsigne return (hv_cr4 & hv_cr4_mask) | (guest_cr4 & ~hv_cr4_mask); } +extern void hybrid_update_cr3(struct vcpu *); + /* This is called by arch_final_setup_guest and do_boot_vcpu */ int arch_set_info_guest( struct vcpu *v, vcpu_guest_context_u c) @@ -628,7 +631,7 @@ int arch_set_info_guest( #endif flags = c(flags); - if ( !is_hvm_vcpu(v) ) + if ( !is_hvm_or_hyb_domain(d) ) { if ( !compat ) { @@ -677,7 +680,7 @@ int arch_set_info_guest( v->fpu_initialised = !!(flags & VGCF_I387_VALID); v->arch.flags &= ~TF_kernel_mode; - if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ ) + if ( (flags & VGCF_in_kernel) || is_hvm_or_hyb_vcpu(v) ) v->arch.flags |= TF_kernel_mode; if ( !compat ) @@ -689,18 +692,13 @@ int arch_set_info_guest( v->arch.guest_context.user_regs.eflags |= 2; - if ( is_hvm_vcpu(v) ) + if ( is_hvm_or_hyb_vcpu(v) ) { hvm_set_info_guest(v); - goto out; + if ( !is_hybrid_vcpu(v) ) + goto out; } - /* Only CR0.TS is modifiable by guest or admin. */ - v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS; - v->arch.guest_context.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS; - - init_int80_direct_trap(v); - /* IOPL privileges are virtualised. */ v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3; v->arch.guest_context.user_regs.eflags &= ~X86_EFLAGS_IOPL; @@ -708,38 +706,50 @@ int arch_set_info_guest( /* Ensure real hardware interrupts are enabled. */ v->arch.guest_context.user_regs.eflags |= X86_EFLAGS_IF; - cr4 = v->arch.guest_context.ctrlreg[4]; - v->arch.guest_context.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(cr4) : - real_cr4_to_pv_guest_cr4(mmu_cr4_features); - memset(v->arch.guest_context.debugreg, 0, sizeof(v->arch.guest_context.debugreg)); for ( i = 0; i < 8; i++ ) (void)set_debugreg(v, i, c(debugreg[i])); + if ( !is_hybrid_vcpu(v) ) + { + /* Only CR0.TS is modifiable by guest or admin. */ + v->arch.guest_context.ctrlreg[0] &= X86_CR0_TS; + v->arch.guest_context.ctrlreg[0] |= read_cr0() & ~X86_CR0_TS; + + init_int80_direct_trap(v); + + cr4 = v->arch.guest_context.ctrlreg[4]; + v->arch.guest_context.ctrlreg[4] = cr4 ? pv_guest_cr4_fixup(cr4) : + real_cr4_to_pv_guest_cr4(mmu_cr4_features); + } + if ( v->is_initialised ) goto out; if ( v->vcpu_id == 0 ) d->vm_assist = c(vm_assist); - if ( !compat ) - rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents); + if ( !is_hybrid_vcpu(v) ) + { + if ( !compat ) + rc = (int)set_gdt(v, c.nat->gdt_frames, c.nat->gdt_ents); #ifdef CONFIG_COMPAT - else - { - unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)]; - unsigned int i, n = (c.cmp->gdt_ents + 511) / 512; + else + { + unsigned long gdt_frames[ARRAY_SIZE(c.cmp->gdt_frames)]; + unsigned int i, n = (c.cmp->gdt_ents + 511) / 512; - if ( n > ARRAY_SIZE(c.cmp->gdt_frames) ) - return -EINVAL; - for ( i = 0; i < n; ++i ) - gdt_frames[i] = c.cmp->gdt_frames[i]; - rc = (int)set_gdt(v, gdt_frames, c.cmp->gdt_ents); + if ( n > ARRAY_SIZE(c.cmp->gdt_frames) ) + return -EINVAL; + for ( i = 0; i < n; ++i ) + gdt_frames[i] = c.cmp->gdt_frames[i]; + rc = (int)set_gdt(v, gdt_frames, c.cmp->gdt_ents); + } +#endif + if ( rc != 0 ) + return rc; } -#endif - if ( rc != 0 ) - return rc; if ( !compat ) { @@ -751,10 +761,17 @@ int arch_set_info_guest( : !get_page_and_type(mfn_to_page(cr3_pfn), d, PGT_base_page_table)) ) { - destroy_gdt(v); + if ( !is_hybrid_vcpu(v) ) + destroy_gdt(v); return -EINVAL; } + if (is_hybrid_vcpu(v) && paging_mode_enabled(d)) + { + v->arch.cr3 = cr3_pfn; + v->arch.hvm_vcpu.guest_cr[3] = c.nat->ctrlreg[3]; + } + v->arch.guest_table = pagetable_from_pfn(cr3_pfn); #ifdef __x86_64__ @@ -782,7 +799,8 @@ int arch_set_info_guest( } else if ( !(flags & VGCF_in_kernel) ) { - destroy_gdt(v); + if ( !is_hybrid_vcpu(v) ) + destroy_gdt(v); return -EINVAL; } } @@ -818,6 +836,13 @@ int arch_set_info_guest( paging_update_paging_modes(v); update_cr3(v); + if (is_hybrid_vcpu(v)) + { + if (paging_mode_enabled(d)) /* HAP is enabled */ + hvm_update_host_cr3(v); /* GUEST_CR3 updated in update_cr3() */ + else + hybrid_update_cr3(v); + } out: if ( flags & VGCF_online ) @@ -1347,10 +1372,10 @@ static void update_runstate_area(struct static inline int need_full_gdt(struct vcpu *v) { - return (!is_hvm_vcpu(v) && !is_idle_vcpu(v)); + return (!is_hvm_vcpu(v) && !is_idle_vcpu(v) && !is_hybrid_vcpu(v)); } -static void __context_switch(void) +static noinline void __context_switch(void) { struct cpu_user_regs *stack_regs = guest_cpu_user_regs(); unsigned int cpu = smp_processor_id(); @@ -1475,18 +1500,30 @@ void context_switch(struct vcpu *prev, s /* Re-enable interrupts before restoring state which may fault. */ local_irq_enable(); - if ( !is_hvm_vcpu(next) ) + if ( !is_hvm_or_hyb_vcpu(next) ) { load_LDT(next); load_segments(next); } } - context_saved(prev); if (prev != next) update_runstate_area(next); +#if 0 +{ +struct vcpu_runstate_info rst; +struct vcpu_runstate_info *tp = + (struct vcpu_runstate_info *)(runstate_guest(next)).p; +if (tp) + copy_from_guest(&rst, runstate_guest(next), 1); + +kdbtrc(0xeeffee, rst.state, (ulong)next, (ulong)tp, + (ulong)next->runstate.state); +} +#endif + schedule_tail(next); BUG(); } @@ -2034,7 +2071,7 @@ int domain_relinquish_resources(struct d BUG(); } - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) hvm_domain_relinquish_resources(d); return 0; @@ -2115,7 +2152,7 @@ void vcpu_mark_events_pending(struct vcp if ( already_pending ) return; - if ( is_hvm_vcpu(v) ) + if ( is_hvm_or_hyb_vcpu(v) ) hvm_assert_evtchn_irq(v); else vcpu_kick(v); diff -r f2cf898c7ff8 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/domctl.c Thu Nov 17 15:37:30 2011 -0800 @@ -466,6 +466,7 @@ long arch_do_domctl( goto sethvmcontext_out; ret = -EINVAL; + KASSERT(!is_hybrid_domain(d)); if ( !is_hvm_domain(d) ) goto sethvmcontext_out; @@ -503,6 +504,7 @@ long arch_do_domctl( goto gethvmcontext_out; ret = -EINVAL; + KASSERT(!is_hybrid_domain(d)); if ( !is_hvm_domain(d) ) goto gethvmcontext_out; @@ -558,6 +560,7 @@ long arch_do_domctl( goto gethvmcontext_partial_out; ret = -EINVAL; + KASSERT(!is_hybrid_domain(d)); if ( !is_hvm_domain(d) ) goto gethvmcontext_partial_out; @@ -719,6 +722,7 @@ long arch_do_domctl( case XEN_DOMCTL_SENDTRIGGER_POWER: { ret = -EINVAL; + KASSERT(!is_hybrid_domain(d)); if ( is_hvm_domain(d) ) { ret = 0; @@ -731,6 +735,7 @@ long arch_do_domctl( { extern void hvm_acpi_sleep_button(struct domain *d); + KASSERT(!is_hybrid_domain(d)); ret = -EINVAL; if ( is_hvm_domain(d) ) { @@ -1285,7 +1290,7 @@ long arch_do_domctl( goto debug_op_out; ret = -EINVAL; - if ( !is_hvm_domain(d)) + if ( !is_hvm_or_hyb_domain(d)) goto debug_op_out; ret = hvm_debug_op(v, domctl->u.debug_op.op); @@ -1465,8 +1470,9 @@ void arch_get_info_guest(struct vcpu *v, c(flags |= VGCF_i387_valid); if ( !test_bit(_VPF_down, &v->pause_flags) ) c(flags |= VGCF_online); - - if ( is_hvm_vcpu(v) ) + + /* HYBRID TDB: debugregs? Verify this again */ + if ( is_hvm_or_hyb_vcpu(v) ) { struct segment_register sreg; memset(c.nat->ctrlreg, 0, sizeof(c.nat->ctrlreg)); diff -r f2cf898c7ff8 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/hvm.c Thu Nov 17 15:37:30 2011 -0800 @@ -227,6 +227,9 @@ void hvm_do_resume(struct vcpu *v) { ioreq_t *p; + if (is_hybrid_vcpu(v)) + return; + pt_restore_timer(v); /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */ @@ -367,16 +370,21 @@ int hvm_domain_initialise(struct domain return -EINVAL; } - spin_lock_init(&d->arch.hvm_domain.pbuf_lock); spin_lock_init(&d->arch.hvm_domain.irq_lock); - spin_lock_init(&d->arch.hvm_domain.uc_lock); - - INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list); - spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock); - hvm_init_guest_time(d); - - d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1; + + if (is_hybrid_domain(d)) { + if (!d->arch.hvm_domain.hap_enabled) + return 0; + } else { + spin_lock_init(&d->arch.hvm_domain.pbuf_lock); + spin_lock_init(&d->arch.hvm_domain.uc_lock); + + INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list); + spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock); + + d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1; + } hvm_init_cacheattr_region_list(d); @@ -384,20 +392,22 @@ int hvm_domain_initialise(struct domain if ( rc != 0 ) goto fail1; - vpic_init(d); - - rc = vioapic_init(d); - if ( rc != 0 ) - goto fail1; - - stdvga_init(d); - - rtc_init(d); - - hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq); - hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); - - register_portio_handler(d, 0xe9, 1, hvm_print_line); + if (!is_hybrid_domain(d)) { + vpic_init(d); + + rc = vioapic_init(d); + if ( rc != 0 ) + goto fail1; + + stdvga_init(d); + + rtc_init(d); + + hvm_init_ioreq_page(d, &d->arch.hvm_domain.ioreq); + hvm_init_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); + + register_portio_handler(d, 0xe9, 1, hvm_print_line); + } rc = hvm_funcs.domain_initialise(d); if ( rc != 0 ) @@ -406,9 +416,11 @@ int hvm_domain_initialise(struct domain return 0; fail2: - rtc_deinit(d); - stdvga_deinit(d); - vioapic_deinit(d); + if (!is_hybrid_domain(d)) { + rtc_deinit(d); + stdvga_deinit(d); + vioapic_deinit(d); + } fail1: hvm_destroy_cacheattr_region_list(d); return rc; @@ -418,6 +430,10 @@ extern void msixtbl_pt_cleanup(struct do void hvm_domain_relinquish_resources(struct domain *d) { + if (is_hybrid_domain(d)) { + printk("MUK: WARN: Hybrid ignoring pit/pmtimer/hpet cleanup\n"); + return; + } hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq); hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq); @@ -436,10 +452,14 @@ void hvm_domain_relinquish_resources(str void hvm_domain_destroy(struct domain *d) { hvm_funcs.domain_destroy(d); + + if (is_hybrid_domain(d)) + return; + + hvm_destroy_cacheattr_region_list(d); rtc_deinit(d); stdvga_deinit(d); vioapic_deinit(d); - hvm_destroy_cacheattr_region_list(d); } static int hvm_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) @@ -737,13 +757,25 @@ static int hvm_load_cpu_ctxt(struct doma HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, 1, HVMSR_PER_VCPU); +static noinline int hybrid_vcpu_finish_init(struct vcpu *v) +{ + if ( v->vcpu_id == 0 ) + hvm_set_guest_tsc(v, 0); + + /* PV guests by default have a 100Hz ticker. */ + v->periodic_period = MILLISECS(10); /* ???? */ + + return 0; +} + int hvm_vcpu_initialise(struct vcpu *v) { int rc; hvm_asid_flush_vcpu(v); - if ( cpu_has_xsave ) + /* HYBRID TBD: investigate xsave/xrestore for hybrid ??? */ + if ( cpu_has_xsave && !is_hybrid_vcpu(v) ) { /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ void *xsave_area = _xmalloc(xsave_cntxt_size, 64); @@ -755,12 +787,21 @@ int hvm_vcpu_initialise(struct vcpu *v) v->arch.hvm_vcpu.xfeature_mask = XSTATE_FP_SSE; } - if ( (rc = vlapic_init(v)) != 0 ) + if ( !is_hybrid_vcpu(v) && ((rc = vlapic_init(v)) != 0) ) goto fail1; if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 ) goto fail2; + tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet, + (void(*)(unsigned long))hvm_assert_evtchn_irq, + (unsigned long)v); + + v->arch.guest_context.user_regs.eflags = 2; + + if (is_hybrid_vcpu(v)) + return hybrid_vcpu_finish_init(v); + /* Create ioreq event channel. */ rc = alloc_unbound_xen_event_channel(v, 0); if ( rc < 0 ) @@ -780,12 +821,6 @@ int hvm_vcpu_initialise(struct vcpu *v) if ( rc != 0 ) goto fail3; - tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet, - (void(*)(unsigned long))hvm_assert_evtchn_irq, - (unsigned long)v); - - v->arch.guest_context.user_regs.eflags = 2; - if ( v->vcpu_id == 0 ) { /* NB. All these really belong in hvm_domain_initialise(). */ @@ -828,6 +863,8 @@ void hvm_vcpu_down(struct vcpu *v) struct domain *d = v->domain; int online_count = 0; +printk("MUK: hvm_vcpu_down(): kdb trap\n"); + /* Doesn't halt us immediately, but we'll never return to guest context. */ set_bit(_VPF_down, &v->pause_flags); vcpu_sleep_nosync(v); @@ -2222,6 +2259,14 @@ static long hvm_vcpu_op( case VCPUOP_stop_singleshot_timer: rc = do_vcpu_op(cmd, vcpuid, arg); break; + + case VCPUOP_is_up: + case VCPUOP_up: + if (is_hybrid_vcpu(current)) { + rc = do_vcpu_op(cmd, vcpuid, arg); + break; + } + default: rc = -ENOSYS; break; @@ -2292,11 +2337,17 @@ static long hvm_vcpu_op_compat32( return rc; } -static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = { +hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = { [ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op, [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op, [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op, + HYPERCALL(set_debugreg), + HYPERCALL(multicall), + HYPERCALL(update_va_mapping), HYPERCALL(xen_version), + HYPERCALL(console_io), + HYPERCALL(vm_assist), + HYPERCALL(mmuext_op), HYPERCALL(event_channel_op), HYPERCALL(sched_op), HYPERCALL(set_timer_op), @@ -2321,6 +2372,31 @@ static hvm_hypercall_t *hvm_hypercall32_ #endif /* defined(__x86_64__) */ +/* Returns: 1 if hcall is valid, 0 otherwise. */ +static int hcall_valid(uint32_t eax) +{ +#ifndef __x86_64__ + if ( unlikely(eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] ) +#else + if ( unlikely(eax >= NR_hypercalls) || !hvm_hypercall64_table[eax] || + (!is_hybrid_vcpu(current) && + ( (eax==__HYPERVISOR_set_trap_table) || + (eax==__HYPERVISOR_set_debugreg) || + (eax==__HYPERVISOR_update_descriptor) || + (eax==__HYPERVISOR_multicall) || + (eax==__HYPERVISOR_update_va_mapping) || + (eax==__HYPERVISOR_console_io) || + (eax==__HYPERVISOR_set_segment_base) || + (eax==__HYPERVISOR_vm_assist) || + (eax==__HYPERVISOR_mmuext_op) ) ) || + ((is_hybrid_vcpu(current) && hap_enabled(current->domain)) && + (eax==__HYPERVISOR_update_va_mapping)) ) +#endif + return 0; + + return 1; +} + int hvm_do_hypercall(struct cpu_user_regs *regs) { struct vcpu *curr = current; @@ -2349,8 +2425,7 @@ int hvm_do_hypercall(struct cpu_user_reg if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) ) return viridian_hypercall(regs); - if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] ) - { + if ( !hcall_valid(eax)) { regs->eax = -ENOSYS; return HVM_HCALL_completed; } @@ -2734,12 +2809,46 @@ static int hvmop_flush_tlb_all(void) return 0; } +static noinline long _do_hybrid_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) +{ + long rc = -EINVAL; + struct xen_hvm_param a; + struct domain *d; + + if (op == HVMOP_set_param) { + if ( copy_from_guest(&a, arg, 1) ) + return -EFAULT; + + rc = rcu_lock_target_domain_by_id(a.domid, &d); + if ( rc != 0 ) + return rc; + + if (a.index == HVM_PARAM_CALLBACK_IRQ) { + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + uint64_t via = a.value; + uint8_t via_type = (uint8_t)(via >> 56) + 1; + + if (via_type == HVMIRQ_callback_vector) { + hvm_irq->callback_via_type = HVMIRQ_callback_vector; + hvm_irq->callback_via.vector = (uint8_t)via; + rc = 0; + } + } + } + KASSERT(rc == 0); + return rc; +} + long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) { struct domain *curr_d = current->domain; long rc = 0; + if (is_hybrid_domain(curr_d)) { + return (_do_hybrid_op(op, arg)); + } + switch ( op ) { case HVMOP_set_param: diff -r f2cf898c7ff8 xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/irq.c Thu Nov 17 15:37:30 2011 -0800 @@ -333,6 +333,9 @@ struct hvm_intack hvm_vcpu_has_pending_i && vcpu_info(v, evtchn_upcall_pending) ) return hvm_intack_vector(plat->irq.callback_via.vector); + if (is_hybrid_vcpu(v)) /* Hybrid TBD: See NMI / MCE below */ + return hvm_intack_none; + if ( unlikely(v->nmi_pending) ) return hvm_intack_nmi; diff -r f2cf898c7ff8 xen/arch/x86/hvm/mtrr.c --- a/xen/arch/x86/hvm/mtrr.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/mtrr.c Thu Nov 17 15:37:30 2011 -0800 @@ -573,6 +573,7 @@ int32_t hvm_get_mem_pinned_cacheattr( uint32_t *type) { struct hvm_mem_pinned_cacheattr_range *range; + KASSERT(!is_hybrid_domain(d)); *type = 0; @@ -601,6 +602,8 @@ int32_t hvm_set_mem_pinned_cacheattr( { struct hvm_mem_pinned_cacheattr_range *range; + KASSERT(!is_hybrid_domain(d)); + if ( !((type == PAT_TYPE_UNCACHABLE) || (type == PAT_TYPE_WRCOMB) || (type == PAT_TYPE_WRTHROUGH) || diff -r f2cf898c7ff8 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Nov 17 15:37:30 2011 -0800 @@ -419,7 +419,7 @@ void kdb_dump_vmcb(domid_t did, int vid) rcu_read_lock(&domlist_read_lock); for_each_domain (dp) { - if (!is_hvm_domain(dp) || dp->is_dying) + if (!is_hvm_or_hyb_domain(dp) || dp->is_dying) continue; if (did != 0 && did != dp->domain_id) continue; diff -r f2cf898c7ff8 xen/arch/x86/hvm/vmx/Makefile --- a/xen/arch/x86/hvm/vmx/Makefile Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/vmx/Makefile Thu Nov 17 15:37:30 2011 -0800 @@ -5,3 +5,4 @@ obj-y += vmcs.o obj-y += vmx.o obj-y += vpmu.o obj-y += vpmu_core2.o +obj-y += hybrid.o diff -r f2cf898c7ff8 xen/arch/x86/hvm/vmx/hybrid.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/hvm/vmx/hybrid.c Thu Nov 17 15:37:30 2011 -0800 @@ -0,0 +1,576 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void vmx_do_extint(struct cpu_user_regs *regs); +extern void vmx_do_cpuid(struct cpu_user_regs *regs); +enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised }; +extern enum handler_return long_mode_do_msr_read(struct cpu_user_regs *); +extern enum handler_return long_mode_do_msr_write(struct cpu_user_regs *); + + +volatile int mukprint=0, mukspin=1; +#define dbgp0(...) dprintk(XENLOG_ERR, __VA_ARGS__); +#define dbgp1(...) {(mukprint==1) ? kdbp(__VA_ARGS__):0;} +#define dbgp2(...) {(mukprint==2) ? kdbp(__VA_ARGS__):0;} + + +/* returns : 0 success */ +static noinline int vmxit_msr_read(struct cpu_user_regs *regs) +{ + uint inst_len = __get_instruction_length(); + int rc=1; + + u64 msr_content = 0; + switch (regs->ecx) + { + case MSR_IA32_MISC_ENABLE: + { + rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); + msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL | + MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL; + break; + } + default: + { + rdmsrl(regs->ecx, msr_content); + break; + } + } + regs->eax = (uint32_t)msr_content; + regs->edx = (uint32_t)(msr_content >> 32); + __update_guest_eip(inst_len); + rc = 0; + +#if 0 + rc = (long_mode_do_msr_read(regs) == HNDL_done) ? 0 : 1; + + if ( hvm_msr_read_intercept(regs) == X86EMUL_OKAY ) { + __update_guest_eip(inst_len); + rc = 0; + } +#endif + + dbgp1("msr read c:%lx a:%lx d:%lx RIP:%lx RSP:%lx\n", regs->ecx, regs->eax, + regs->edx, vmr(GUEST_RIP), vmr(GUEST_RSP)); + return rc; +} + +/* for now just scratch the cpu since nothing else will run on it. eventually + * we need to save and restore these MSRs + * returns : 0 success */ +static noinline int vmxit_msr_write(struct cpu_user_regs *regs) +{ + uint inst_len = __get_instruction_length(); + int rc=1; +#if 0 + wrmsr(regs->ecx, regs->eax, regs->edx); + + rc = (long_mode_do_msr_write(regs) == HNDL_done) ? 0 : 1; + return rc; +#endif + + dbgp1("MUK: msr write:0x%lx. eax:0x%lx edx:0x%lx\n", regs->ecx, + regs->eax,regs->edx); + if ( hvm_msr_write_intercept(regs) == X86EMUL_OKAY ) { + __update_guest_eip(inst_len); + rc = 0; + } + return rc; +} + +/* rc == 0: handled the MTF vmexit */ +static noinline int vmxit_mtf(struct cpu_user_regs *regs) +{ + struct vcpu *vp = current; + int rc=1, ss=vp->arch.hvm_vcpu.single_step; + + dbgp2("\n"); + vp->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vp->arch.hvm_vmx.exec_control); + vp->arch.hvm_vcpu.single_step = 0; + + /* kdb will set hvm_vcpu.single_step again if ss command */ + if (kdb_handle_trap_entry(TRAP_debug, regs)) { /* TBD: ifdef KDB */ + rc = 0; + } else if ( vp->domain->debugger_attached && ss ) { + domain_pause_for_debugger(); + rc = 0; + } + return rc; +} + +volatile int mukprintpf; +/* rc == 0: handled the exception or NMI */ +static noinline int vmxit_exception(struct cpu_user_regs *regs) +{ + unsigned int vector = (__vmread(VM_EXIT_INTR_INFO)) & INTR_INFO_VECTOR_MASK; + int rc=1; + + dbgp2(" exception. vec:%d cs:%x\n", vector, vmr(GUEST_CS_SELECTOR)); + if (vector == TRAP_debug) { + if (kdb_handle_trap_entry(vector, regs)) /* TBD: ifdef KDB */ + rc = 0; + else { + domain_pause_for_debugger(); + rc = 0; + } + } + if (vector == TRAP_int3) { + int inst_len = __get_instruction_length(); + __update_guest_eip(inst_len); + + if (kdb_handle_trap_entry(vector, regs)) + rc = 0; + else { + kdbp("[%d]MUK: domain pause for debugger\n", smp_processor_id()); + current->arch.gdbsx_vcpu_event = TRAP_int3; + domain_pause_for_debugger(); + rc = 0; + } + } + + if (vector == TRAP_no_device) { + vmx_fpu_dirty_intercept(); + rc = 0; + } + + if (vector == TRAP_gp_fault) { + regs->error_code = __vmread(VM_EXIT_INTR_ERROR_CODE); + kdbp("MUK: inject GP: errcode:0x%04x RIP:%016lx RSP:%016lx\n", + regs->error_code, (ulong)vmr(GUEST_RIP), + (ulong)vmr(GUEST_RSP)); + + kdb_trap_immed(KDB_TRAP_NONFATAL); + /* vmx_inject_hw_exception(TRAP_gp_fault, regs->error_code); */ + rc = 1; + } + + if (vector == TRAP_page_fault) { + extern int fixup_page_fault(unsigned long , struct cpu_user_regs *); + ulong eflags_sav = regs->eflags; + unsigned long va = __vmread(EXIT_QUALIFICATION); + + regs->error_code = __vmread(VM_EXIT_INTR_ERROR_CODE); + + if (mukprintpf) + kdbp("MUK:PF va:%016lx errcode:0x%04x RIP:%016lx RSP:%016lx", + va, regs->error_code, (ulong)vmr(GUEST_RIP), + (ulong)vmr(GUEST_RSP)); + + regs->eflags |= X86_EFLAGS_IF; + if (fixup_page_fault(va, regs) == 0) { + if (mukprintpf) + kdbp(" NOT "); + current->arch.hvm_vcpu.guest_cr[2] = va; + vmx_inject_hw_exception(TRAP_page_fault, regs->error_code); + } + regs->eflags = eflags_sav; + if (mukprintpf) + kdbp(" fixedup\n"); + rc = 0; + } + + /* TBD: call do_guest_trap() here */ + if (rc) + kdbp("MUK: Unhandled trap vector:%d\n", vector); + return rc; +} + +int vmxit_invlpg(void) +{ + int inst_len = __get_instruction_length(); + ulong vaddr = __vmread(EXIT_QUALIFICATION); + + KASSERT(hap_enabled(current->domain)); + __update_guest_eip(inst_len); + vpid_sync_vcpu_gva(current, vaddr); + return 0; +} + +/* rc == 0: success */ +static noinline int vmxit_vmcall(struct cpu_user_regs *regs) +{ + extern void *hvm_hypercall64_table[NR_hypercalls]; + int rc, inst_len=__get_instruction_length(); + + if (regs->eax >= NR_hypercalls || hvm_hypercall64_table[regs->eax] ==NULL) { + kdbp("MUK: UnImplemented HCALL:%d\n", regs->eax); + return 1; + } + dbgp2("vmxit_vmcall: hcall eax:$%ld\n", regs->eax); + if (regs->eax == __HYPERVISOR_sched_op && regs->rdi == SCHEDOP_shutdown) { + kdbp("MUK: SCHEDOP_shutdown\n"); + return 1; + } + + rc = hvm_do_hypercall(regs); +#if 0 + extern int hybrid_do_hypercall(struct cpu_user_regs *regs); + rc = hybrid_do_hypercall(regs); +#endif + + if (rc != HVM_HCALL_preempted) + __update_guest_eip(inst_len); + + if (rc != HVM_HCALL_completed) { + printk("hvm_do_hypercall rc:%d\n", rc); + rc = 1; + } else + rc = 0; + + return rc; +} + +static noinline uint64_t *get_gpr_ptr(struct cpu_user_regs *regs, uint gpr) +{ + switch (gpr) + { + case VMX_CONTROL_REG_ACCESS_GPR_EAX: + return ®s->eax; + case VMX_CONTROL_REG_ACCESS_GPR_ECX: + return ®s->ecx; + case VMX_CONTROL_REG_ACCESS_GPR_EDX: + return ®s->edx; + case VMX_CONTROL_REG_ACCESS_GPR_EBX: + return ®s->ebx; + case VMX_CONTROL_REG_ACCESS_GPR_ESP: + return ®s->esp; + case VMX_CONTROL_REG_ACCESS_GPR_EBP: + return ®s->ebp; + case VMX_CONTROL_REG_ACCESS_GPR_ESI: + return ®s->esi; + case VMX_CONTROL_REG_ACCESS_GPR_EDI: + return ®s->edi; + case VMX_CONTROL_REG_ACCESS_GPR_R8: + return ®s->r8; + case VMX_CONTROL_REG_ACCESS_GPR_R9: + return ®s->r9; + case VMX_CONTROL_REG_ACCESS_GPR_R10: + return ®s->r10; + case VMX_CONTROL_REG_ACCESS_GPR_R11: + return ®s->r11; + case VMX_CONTROL_REG_ACCESS_GPR_R12: + return ®s->r12; + case VMX_CONTROL_REG_ACCESS_GPR_R13: + return ®s->r13; + case VMX_CONTROL_REG_ACCESS_GPR_R14: + return ®s->r14; + case VMX_CONTROL_REG_ACCESS_GPR_R15: + return ®s->r15; + default: + return NULL; + } +} +/* rc == 0: success */ +static noinline int access_cr0(struct cpu_user_regs *regs, uint acc_typ, + uint64_t *regp) +{ + struct vcpu *vp = current; + + if (acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR ) + { + unsigned long new_cr0 = *regp; + unsigned long old_cr0 = __vmread(GUEST_CR0); + + dbgp2("MUK:writing to CR0. RIP:%lx val:0x%lx\n", vmr(GUEST_RIP),*regp); + if ( (u32)new_cr0 != new_cr0 ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest setting upper 32 bits in CR0: %lx", new_cr0); + return 1; + } + + new_cr0 &= ~HVM_CR0_GUEST_RESERVED_BITS; + /* ET is reserved and should be always be 1. */ + new_cr0 |= X86_CR0_ET; + + /* hybrid cannot change to real mode */ + if ( (new_cr0 & (X86_CR0_PE|X86_CR0_PG)) != (X86_CR0_PG|X86_CR0_PE) ) { + kdbp("Guest attempting to turn off PE/PG. CR0:%lx\n", new_cr0); + return 1; + } + /* TS going from 1 to 0 */ + if ( (old_cr0 & X86_CR0_TS) && ((new_cr0 & X86_CR0_TS)==0) ) + vmx_fpu_enter(vp); + + vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = new_cr0; + __vmwrite(GUEST_CR0, new_cr0); + __vmwrite(CR0_READ_SHADOW, new_cr0); + } else { + *regp = __vmread(GUEST_CR0); + } + return 0; +} + +/* rc == 0: success */ +static noinline int access_cr4(struct cpu_user_regs *regs, uint acc_typ, + uint64_t *regp) +{ + if (acc_typ == VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR ) + { + u64 old_cr4 = __vmread(GUEST_CR4); + /* kdbp("MUK:writing to CR4. val:0x%lx\n", *regp); */ + + if ( (old_cr4 ^ (*regp)) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) + vpid_sync_all(); + + /* hybrid_verify_cr4_wr(*regp)); */ + __vmwrite(GUEST_CR4, *regp); + } else { + *regp = __vmread(GUEST_CR4); + kdbp("MUK: read cr4. val:0x%lx\n", *regp); + } + return 0; +} + +/* rc == 0: success */ +static noinline int vmxit_cr_access(struct cpu_user_regs *regs) +{ + unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); + uint inst_len = __get_instruction_length(); + uint acc_typ = exit_qualification & VMX_CONTROL_REG_ACCESS_TYPE; + int cr, rc = 1; + + switch ( acc_typ ) + { + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_TO_CR: + case VMX_CONTROL_REG_ACCESS_TYPE_MOV_FROM_CR: + { + uint gpr = exit_qualification & VMX_CONTROL_REG_ACCESS_GPR; + uint64_t *regp = get_gpr_ptr(regs, gpr); + cr = exit_qualification & VMX_CONTROL_REG_ACCESS_NUM; + + if (regp == NULL) + break; + + /* pl don't embed switch statements */ + if (cr == 0) + rc = access_cr0(regs, acc_typ, regp); + else if (cr == 4) + rc = access_cr4(regs, acc_typ, regp); + + if (rc == 0) + __update_guest_eip(inst_len); + break; + } + case VMX_CONTROL_REG_ACCESS_TYPE_CLTS: + { +#if 0 + unsigned long cr0 = __vmread(GUEST_CR0); + cr0 &= ~X86_CR0_TS; +#endif + struct vcpu *vp = current; + unsigned long cr0 = vp->arch.hvm_vcpu.guest_cr[0] & ~X86_CR0_TS; + vp->arch.hvm_vcpu.hw_cr[0] = vp->arch.hvm_vcpu.guest_cr[0] = cr0; + vmx_fpu_enter(vp); + __vmwrite(GUEST_CR0, cr0); + __vmwrite(CR0_READ_SHADOW, cr0); + __update_guest_eip(inst_len); + rc = 0; + } + } + return rc; +} + +#if 0 +/* emulate write_cr3(read_cr3()) in guest. */ +static noinline int vmxit_invvpid(void) +{ + hvm_asid_flush_vcpu(current); + return 0; +} +#endif + +volatile int mukprtsc=1; +void hybrid_vmx_vmexit_handler(struct cpu_user_regs *regs) +{ + unsigned int vector, exit_reason = __vmread(VM_EXIT_REASON); + int rc=0, ccpu = smp_processor_id(); + struct vcpu *vp = current; + + dbgp1("MUK:[%d]left VMCS exitreas:%d RIP:%lx RSP:%lx EFLAGS:%lx CR0:%lx\n", + ccpu, exit_reason, vmr(GUEST_RIP), vmr(GUEST_RSP), regs->rflags, + vmr(GUEST_CR0)); + + KASSERT( (vmr(GUEST_CR0)) != 0x8); + switch ( (uint16_t)exit_reason ) + { + case EXIT_REASON_EXCEPTION_NMI: /* 0 */ + rc = vmxit_exception(regs); + break; + + case EXIT_REASON_EXTERNAL_INTERRUPT: /* 1 */ + { + vector = __vmread(VM_EXIT_INTR_INFO); + vector &= INTR_INFO_VECTOR_MASK; + dbgp2("MUK: [%d] exit vmcs reas:%d vec:%d cr0:0x%016lx\n", ccpu, + exit_reason, vector, vmr(GUEST_CR0)); + vmx_do_extint(regs); + break; + } + + case EXIT_REASON_TRIPLE_FAULT: /* 2 */ + { +#if 0 + static int once; + if (!once) + kdbp("MUK:[%d]left VMCS exitreas:%d RIP:%lx RSP:%lx EFLAGS:%lx CR0:%lx\n", + ccpu, exit_reason, vmr(GUEST_RIP), vmr(GUEST_RSP), regs->rflags, + vmr(GUEST_CR0)); + once = 1; + vmx_inject_hw_exception(TRAP_gp_fault, regs->error_code); + rc = 0; +#endif + kdbp("MUK:[%d]left VMCS exitreas:%d RIP:%lx RSP:%lx EFLAGS:%lx CR3:%lx\n", + ccpu, exit_reason, vmr(GUEST_RIP), vmr(GUEST_RSP), regs->rflags, + vmr(GUEST_CR3)); + + __vmwrite(GUEST_CR3, 0x1803000); + if ( paging_mode_hap(vp->domain) && hvm_paging_enabled(vp) ) + vp->arch.hvm_vcpu.guest_cr[3] = vp->arch.hvm_vcpu.hw_cr[3] = + __vmread(GUEST_CR3); + kdb_trap_immed(KDB_TRAP_NONFATAL); + rc = 1; + break; + } + case EXIT_REASON_PENDING_VIRT_INTR: /* 7 */ + { + struct vcpu *v = current; + /* Disable the interrupt window. */ + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); + break; + } + + case EXIT_REASON_CPUID: /* 10 */ + { + int ilen=__get_instruction_length(); + __update_guest_eip(ilen); + dbgp2("cpuid:%d RIP:%lx\n", regs->eax, vmr(GUEST_RIP)); + vmx_do_cpuid(regs); + break; + } + +#if 0 + case EXIT_REASON_INVLPG: /* 14 */ + rc = vmxit_invlpg(); + break; +#endif + case EXIT_REASON_RDTSC: /* 16 */ + { +#if 0 + uint64_t tsc; + int ilen=__get_instruction_length(); + rdtscll(tsc); + regs->eax = (uint32_t)tsc; + regs->edx = (uint32_t)(tsc >> 32); +#endif + rdtsc(regs->eax, regs->edx); +if (mukprtsc) + kdbp(" RDTSC: eax:%lx edx:%lx\n", regs->eax, regs->edx); + __update_guest_eip(__get_instruction_length()); + rc = 0; + break; + } + + case EXIT_REASON_VMCALL: /* 18 */ + rc = vmxit_vmcall(regs); + break; + + case EXIT_REASON_CR_ACCESS: /* 28 */ + rc = vmxit_cr_access(regs); + break; + + case EXIT_REASON_DR_ACCESS: /* 29 */ + { + unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); + vmx_dr_access(exit_qualification, regs); + break; + } + case EXIT_REASON_MSR_READ: /* 31 */ + rc = vmxit_msr_read(regs); + break; + + case EXIT_REASON_MSR_WRITE: /* 32 */ + rc = vmxit_msr_write(regs); + break; + + case EXIT_REASON_MONITOR_TRAP_FLAG: /* 37 */ + rc = vmxit_mtf(regs); + break; +#if 0 + case EXIT_REASON_INVVPID: /* 53 */ + rc = vmxit_invvpid(); + break; +#endif + default: + rc = 1; + } + if (rc) { + unsigned long exit_qualification = __vmread(EXIT_QUALIFICATION); + local_irq_enable(); + kdbp("MUK: [%d] exit_reas:%d 0x%lx qual:%ld 0x%lx cr0:0x%016lx\n", + ccpu, exit_reason, exit_reason, exit_qualification, + exit_qualification, vmr(GUEST_CR0)); + kdbp("MUK: [%d] RIP:%lx RSP:%lx\n", ccpu, + vmr(GUEST_RIP), vmr(GUEST_RSP)); + domain_crash_synchronous(); + } + + /*dbgp("MUK: will enter vmcs: cs:%x ss:%x\n", vmr(GUEST_CS_SELECTOR), + vmr(GUEST_SS_SELECTOR)); */ + + dbgp1("MUK: will enter vmcs:RIP:%lx RSP:%lx cr0:%lx eflags:%lx\n", + vmr(GUEST_RIP), vmr(GUEST_RSP), vmr(GUEST_CR0), regs->rflags); + + local_irq_enable(); + KASSERT( (vmr(GUEST_CR0)) != 0x8); +} + +void hybrid_flush_tlb(void) +{ + vpid_sync_all(); +} + +void hybrid_do_invlpg(ulong addr) +{ + /* vpid_sync_all(); */ + vpid_sync_vcpu_gva(current, addr); +} + + diff -r f2cf898c7ff8 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/vmx/intr.c Thu Nov 17 15:37:30 2011 -0800 @@ -125,8 +125,9 @@ asmlinkage void vmx_intr_assist(void) return; } - /* Crank the handle on interrupt state. */ - pt_update_irq(v); + if (!is_hybrid_vcpu(v)) + /* Crank the handle on interrupt state. */ + pt_update_irq(v); do { intack = hvm_vcpu_has_pending_irq(v); diff -r f2cf898c7ff8 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Nov 17 15:37:30 2011 -0800 @@ -593,6 +593,311 @@ void vmx_disable_intercept_for_msr(struc } } + +void hybrid_update_cr3(struct vcpu *v) +{ + vmx_vmcs_enter(v); + __vmwrite(GUEST_CR3, v->arch.cr3); + __vmwrite(HOST_CR3, v->arch.cr3); + + vpid_sync_all(); + /* hvm_asid_flush_vcpu(v); */ + vmx_vmcs_exit(v); +} + +static int hybrid_construct_vmcs(struct vcpu *v) +{ + struct domain *d = v->domain; + uint16_t sysenter_cs; + unsigned long sysenter_eip; + u32 vmexit_ctl = vmx_vmexit_control; + u32 vmentry_ctl = vmx_vmentry_control; + u64 u64val; + + vmx_vmcs_enter(v); + + /* VMCS controls. */ + vmx_pin_based_exec_control &= ~PIN_BASED_VIRTUAL_NMIS; + __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); + + v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; + + if ( v->domain->arch.vtsc ) + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING; + + if ( paging_mode_hap(d) ) + { + v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING | + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + } + v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_ACTIVATE_IO_BITMAP; /* ??? */ + v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_MSR_BITMAP; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW; + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + + kdbp("MUK: writing proc based exec controls:%x\n", + v->arch.hvm_vmx.exec_control); + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); + + /* MSR access bitmap. */ + if ( cpu_has_vmx_msr_bitmap ) + { + unsigned long *msr_bitmap = alloc_xenheap_page(); + + if ( msr_bitmap == NULL ) + return -ENOMEM; + + memset(msr_bitmap, ~0, PAGE_SIZE); + v->arch.hvm_vmx.msr_bitmap = msr_bitmap; + __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); + + vmx_disable_intercept_for_msr(v, MSR_FS_BASE); + vmx_disable_intercept_for_msr(v, MSR_GS_BASE); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP); + + /* pure hvm doesn't do this. safe? see: long_mode_do_msr_write() */ +#if 0 + vmx_disable_intercept_for_msr(v, MSR_STAR); + vmx_disable_intercept_for_msr(v, MSR_LSTAR); + vmx_disable_intercept_for_msr(v, MSR_CSTAR); + vmx_disable_intercept_for_msr(v, MSR_SYSCALL_MASK); +#endif + vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE); + + kdbp("MUK: disabled intercepts for few msrs\n"); + + } else { + kdbp("MUK: CPU does NOT have msr bitmap\n"); + for (;;) cpu_relax(); + } + + if ( !cpu_has_vmx_vpid ) { + printk("ERROR: VPID support is required to run PV in HVM container\n"); + return -ESRCH; + } + + v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; + + if ( cpu_has_vmx_secondary_exec_control ) { + v->arch.hvm_vmx.secondary_exec_control &= ~0x4FF; /* turn off all */ +#if 0 + v->arch.hvm_vmx.secondary_exec_control &= + ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_RDTSCP; + + v->arch.hvm_vmx.secondary_exec_control &= + ~SECONDARY_EXEC_UNRESTRICTED_GUEST; +#endif + v->arch.hvm_vmx.secondary_exec_control |= + SECONDARY_EXEC_PAUSE_LOOP_EXITING; + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID; + + if ( paging_mode_hap(d) ) + v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_EPT; + + kdbp("MUK: muk_construct_vmcs: sec exec:0x%x\n", + v->arch.hvm_vmx.secondary_exec_control); + __vmwrite(SECONDARY_VM_EXEC_CONTROL, + v->arch.hvm_vmx.secondary_exec_control); + } else { + printk("ERROR: NO Secondary Exec control\n"); + return -ESRCH; + } + + __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vcpu.asid); + + if ( !paging_mode_hap(d) ) + vmexit_ctl &= ~(VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT); + __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl); + + #define VM_ENTRY_LOAD_DEBUG_CTLS 0x4 + #define VM_ENTRY_LOAD_EFER 0x8000 + #define GUEST_EFER 0x2806 /* see page 23-20 */ + #define GUEST_EFER_HIGH 0x2807 /* see page 23-20 */ + vmentry_ctl &= ~VM_ENTRY_LOAD_DEBUG_CTLS; + vmentry_ctl &= ~VM_ENTRY_LOAD_EFER; + vmentry_ctl &= ~VM_ENTRY_SMM; + vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR; + vmentry_ctl |= VM_ENTRY_IA32E_MODE; + if ( !paging_mode_hap(d) ) + vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_PAT; + kdbp("MUK:muk_construct_vmcs(). vmentry_ctl:0x%x\n", vmentry_ctl); + __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl); + + /* MSR intercepts. */ + __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + + /* Host data selectors. */ + __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS); + __vmwrite(HOST_FS_SELECTOR, 0); + __vmwrite(HOST_GS_SELECTOR, 0); + __vmwrite(HOST_FS_BASE, 0); + __vmwrite(HOST_GS_BASE, 0); + + vmx_set_host_env(v); + + /* Host control registers. */ + v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS; + __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); + __vmwrite(HOST_CR4, mmu_cr4_features|(cpu_has_xsave ? X86_CR4_OSXSAVE : 0)); + + /* Host CS:RIP. */ + __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); + __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler); + + /* Host SYSENTER CS:RIP. */ + rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs); + __vmwrite(HOST_SYSENTER_CS, sysenter_cs); + rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip); + __vmwrite(HOST_SYSENTER_EIP, sysenter_eip); + + __vmwrite(VM_ENTRY_INTR_INFO, 0); + + __vmwrite(CR3_TARGET_COUNT, 0); + + __vmwrite(GUEST_ACTIVITY_STATE, 0); + + __vmwrite(GUEST_CS_BASE, 0); + __vmwrite(GUEST_CS_LIMIT, ~0u); + __vmwrite(GUEST_CS_AR_BYTES, 0xa09b); /* CS.L == 1 */ + __vmwrite(GUEST_CS_SELECTOR, 0x10); + + __vmwrite(GUEST_DS_BASE, 0); + __vmwrite(GUEST_DS_LIMIT, ~0u); + __vmwrite(GUEST_DS_AR_BYTES, 0xc093); + __vmwrite(GUEST_DS_SELECTOR, 0x18); + + __vmwrite(GUEST_SS_BASE, 0); /* use same seg as DS */ + __vmwrite(GUEST_SS_LIMIT, ~0u); + __vmwrite(GUEST_SS_AR_BYTES, 0xc093); + __vmwrite(GUEST_SS_SELECTOR, 0x18); + + __vmwrite(GUEST_ES_SELECTOR, 0); + __vmwrite(GUEST_FS_SELECTOR, 0); + __vmwrite(GUEST_GS_SELECTOR, 0); + + /* Guest segment bases. */ + __vmwrite(GUEST_ES_BASE, 0); + __vmwrite(GUEST_FS_BASE, 0); + __vmwrite(GUEST_GS_BASE, 0); + + /* Guest segment limits. */ + __vmwrite(GUEST_ES_LIMIT, ~0u); + __vmwrite(GUEST_FS_LIMIT, ~0u); + __vmwrite(GUEST_GS_LIMIT, ~0u); + + /* Guest segment AR bytes. */ + __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */ + __vmwrite(GUEST_FS_AR_BYTES, 0xc093); + __vmwrite(GUEST_GS_AR_BYTES, 0xc093); + + /* Guest IDT. */ + __vmwrite(GUEST_GDTR_BASE, 0); + __vmwrite(GUEST_GDTR_LIMIT, 0); + + /* Guest LDT. */ + __vmwrite(GUEST_LDTR_AR_BYTES, 0x82); /* LDT */ + __vmwrite(GUEST_LDTR_SELECTOR, 0); + __vmwrite(GUEST_LDTR_BASE, 0); + __vmwrite(GUEST_LDTR_LIMIT, 0); + + /* Guest TSS. */ + __vmwrite(GUEST_TR_AR_BYTES, 0x8b); /* 32-bit TSS (busy) */ + __vmwrite(GUEST_TR_BASE, 0); + __vmwrite(GUEST_TR_LIMIT, 0xff); + + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + __vmwrite(GUEST_DR7, 0); + __vmwrite(VMCS_LINK_POINTER, ~0UL); + + if (paging_mode_hap(d)) { + __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0); + __vmwrite(EXCEPTION_BITMAP, + HVM_TRAP_MASK | TRAP_debug | + (1U<arch.hvm_vcpu.guest_cr[0] = X86_CR0_PG | X86_CR0_PE | X86_CR0_ET; + hvm_update_guest_cr(v, 0); + + v->arch.hvm_vcpu.guest_cr[4] = 0; + hvm_update_guest_cr(v, 4); +#endif + +#if 0 + u64val = X86_CR0_PG | X86_CR0_PE | X86_CR0_ET | X86_CR0_TS | + X86_CR0_NE | X86_CR0_WP; +#endif + /* make sure to set WP bit so rdonly pages are not written from CPL 0 */ + u64val = X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP; + __vmwrite(GUEST_CR0, u64val); + __vmwrite(CR0_READ_SHADOW, u64val); + v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = u64val; + + u64val = X86_CR4_PAE | X86_CR4_VMXE; + __vmwrite(GUEST_CR4, u64val); + __vmwrite(CR4_READ_SHADOW, u64val); + v->arch.hvm_vcpu.guest_cr[4] = u64val; + + __vmwrite(CR0_GUEST_HOST_MASK, ~0UL); + __vmwrite(CR4_GUEST_HOST_MASK, ~0UL); + + v->arch.hvm_vmx.vmx_realmode = 0; + + if ( paging_mode_hap(d) ) + { + __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp); +#ifdef __i386__ + __vmwrite(EPT_POINTER_HIGH, + d->arch.hvm_domain.vmx.ept_control.eptp >> 32); +#endif + } + + if ( cpu_has_vmx_pat && paging_mode_hap(d) ) + { + u64 host_pat, guest_pat; + + rdmsrl(MSR_IA32_CR_PAT, host_pat); + guest_pat = MSR_IA32_CR_PAT_RESET; + + __vmwrite(HOST_PAT, host_pat); + __vmwrite(GUEST_PAT, guest_pat); +#ifdef __i386__ +JUNK + __vmwrite(HOST_PAT_HIGH, host_pat >> 32); + __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32); +#endif + } + vmx_vmcs_exit(v); +#if 0 + paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ +#endif + return 0; +} + static int construct_vmcs(struct vcpu *v) { struct domain *d = v->domain; @@ -601,6 +906,9 @@ static int construct_vmcs(struct vcpu *v u32 vmexit_ctl = vmx_vmexit_control; u32 vmentry_ctl = vmx_vmentry_control; + if (is_hybrid_domain(d)) + return hybrid_construct_vmcs(v); + vmx_vmcs_enter(v); /* VMCS controls. */ @@ -1001,8 +1309,10 @@ void vmx_do_resume(struct vcpu *v) vmx_clear_vmcs(v); vmx_load_vmcs(v); - hvm_migrate_timers(v); - hvm_migrate_pirqs(v); + if (!is_hybrid_vcpu(v)) { + hvm_migrate_timers(v); + hvm_migrate_pirqs(v); + } vmx_set_host_env(v); hvm_asid_flush_vcpu(v); } @@ -1018,14 +1328,6 @@ void vmx_do_resume(struct vcpu *v) reset_stack_and_jump(vmx_asm_do_vmentry); } -static unsigned long vmr(unsigned long field) -{ - int rc; - unsigned long val; - val = __vmread_safe(field, &rc); - return rc ? 0 : val; -} - static void vmx_dump_sel(char *name, uint32_t selector) { uint32_t sel, attr, limit; @@ -1263,6 +1565,8 @@ static void noinline kdb_print_vmcs(stru vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR); vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT); vmx_dump_sel("TR", GUEST_TR_SELECTOR); + kdbp("Guest EFER = 0x%08x%08x\n", + (uint32_t)vmr(GUEST_EFER_HIGH), (uint32_t)vmr(GUEST_EFER)); kdbp("Guest PAT = 0x%08x%08x\n", (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT)); x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32; @@ -1276,6 +1580,10 @@ static void noinline kdb_print_vmcs(stru (int)vmr(GUEST_INTERRUPTIBILITY_INFO), (int)vmr(GUEST_ACTIVITY_STATE)); + kdbp("MSRs: entry_load:$%d exit_load:$%d exit_store:$%d\n", + vmr(VM_ENTRY_MSR_LOAD_COUNT), vmr(VM_EXIT_MSR_LOAD_COUNT), + vmr(VM_EXIT_MSR_STORE_COUNT)); + kdbp("\n*** Host State ***\n"); kdbp("RSP = 0x%016llx RIP = 0x%016llx\n", (unsigned long long)vmr(HOST_RSP), @@ -1316,6 +1624,9 @@ static void noinline kdb_print_vmcs(stru (uint32_t)vmr(VM_EXIT_CONTROLS)); kdbp("ExceptionBitmap=%08x\n", (uint32_t)vmr(EXCEPTION_BITMAP)); + kdbp("PAGE_FAULT_ERROR_CODE MASK:0x%lx MATCH:0x%lx\n", + (unsigned long)vmr(PAGE_FAULT_ERROR_CODE_MASK), + (unsigned long)vmr(PAGE_FAULT_ERROR_CODE_MATCH)); kdbp("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n", (uint32_t)vmr(VM_ENTRY_INTR_INFO), (uint32_t)vmr(VM_ENTRY_EXCEPTION_ERROR_CODE), @@ -1344,8 +1655,7 @@ static void noinline kdb_print_vmcs(stru * do __vmreads. So, the VMCS pointer can't be left cleared. * - Doing __vmpclear will set the vmx state to 'clear', so to resume a * vmlaunch must be done and not vmresume. This means, we must clear - * arch_vmx->launched. Just call __vmx_clear_vmcs(), hopefully it won't keep - * changing... + * arch_vmx->launched. */ void kdb_curr_cpu_flush_vmcs(void) { @@ -1358,12 +1668,14 @@ void kdb_curr_cpu_flush_vmcs(void) /* looks like we got one. unfortunately, current_vmcs points to vmcs * and not VCPU, so we gotta search the entire list... */ for_each_domain (dp) { - if ( !is_hvm_domain(dp) || dp->is_dying) + if ( !(is_hvm_or_hyb_domain(dp)) || dp->is_dying) continue; for_each_vcpu (dp, vp) { if (vp->arch.hvm_vmx.active_cpu == smp_processor_id()) { - __vmx_clear_vmcs(vp); + __vmpclear(virt_to_maddr(vp->arch.hvm_vmx.vmcs)); __vmptrld(virt_to_maddr(vp->arch.hvm_vmx.vmcs)); + vp->arch.hvm_vmx.launched = 0; + kdbp("KDB:[%d] vmcs flushed\n", smp_processor_id()); } } } @@ -1382,7 +1694,7 @@ void kdb_dump_vmcs(domid_t did, int vid) ASSERT(!local_irq_is_enabled()); /* kdb should always run disabled */ for_each_domain (dp) { - if ( !is_hvm_domain(dp) || dp->is_dying) + if ( !(is_hvm_or_hyb_domain(dp)) || dp->is_dying) continue; if (did != 0 && did != dp->domain_id) continue; @@ -1400,7 +1712,7 @@ void kdb_dump_vmcs(domid_t did, int vid) kdbp("\n"); } /* restore orig vmcs pointer for __vmreads in vmx_vmexit_handler() */ - if (is_hvm_vcpu(current)) + if (is_hvm_or_hyb_vcpu(current)) __vmptrld(virt_to_maddr(current->arch.hvm_vmx.vmcs)); } #endif diff -r f2cf898c7ff8 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Nov 17 15:37:30 2011 -0800 @@ -68,7 +68,6 @@ static void vmx_cpuid_intercept( unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); static void vmx_wbinvd_intercept(void); -static void vmx_fpu_dirty_intercept(void); static int vmx_msr_read_intercept(struct cpu_user_regs *regs); static int vmx_msr_write_intercept(struct cpu_user_regs *regs); static void vmx_invlpg_intercept(unsigned long vaddr); @@ -87,6 +86,8 @@ static int vmx_domain_initialise(struct d->arch.hvm_domain.vmx.ept_control.asr = pagetable_get_pfn(d->arch.phys_table); + if (is_hybrid_domain(d)) + return 0; if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 ) return rc; @@ -98,6 +99,10 @@ static void vmx_domain_destroy(struct do { if ( d->arch.hvm_domain.hap_enabled ) on_each_cpu(__ept_sync_domain, d, 1); + + if (is_hybrid_domain(d)) + return; + vmx_free_vlapic_mapping(d); } @@ -119,13 +124,19 @@ static int vmx_vcpu_initialise(struct vc return rc; } - vpmu_initialise(v); - - vmx_install_vlapic_mapping(v); - - /* %eax == 1 signals full real-mode support to the guest loader. */ - if ( v->vcpu_id == 0 ) - v->arch.guest_context.user_regs.eax = 1; + /* Hybrid TBD: pmu */ + if ( !is_hybrid_vcpu(v)) { + vpmu_initialise(v); + + vmx_install_vlapic_mapping(v); + + /* %eax == 1 signals full real-mode support to the guest loader. */ + if ( v->vcpu_id == 0 ) + v->arch.guest_context.user_regs.eax = 1; + } else { + /* for hvm_long_mode_enabled(v) */ + v->arch.hvm_vcpu.guest_efer = EFER_SCE | EFER_LMA | EFER_LME; + } return 0; } @@ -398,6 +409,9 @@ static int vmx_guest_x86_mode(struct vcp { unsigned int cs_ar_bytes; +if (is_hybrid_vcpu(v)) + return 8; + if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) ) return 0; if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) ) @@ -628,7 +642,7 @@ static int vmx_load_vmcs_ctxt(struct vcp return 0; } -static void vmx_fpu_enter(struct vcpu *v) +void vmx_fpu_enter(struct vcpu *v) { setup_fpu(v); __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device); @@ -657,6 +671,7 @@ static void vmx_fpu_leave(struct vcpu *v { v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_TS; __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); +KASSERT( (vmr(GUEST_CR0)) != 0x8); __vm_set_bit(EXCEPTION_BITMAP, TRAP_no_device); } } @@ -1155,6 +1170,7 @@ static void vmx_update_guest_cr(struct v v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask; __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); +KASSERT( (vmr(GUEST_CR0)) != 0x8); __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); break; } @@ -1299,6 +1315,7 @@ void vmx_inject_hw_exception(int trap, i if ( unlikely(intr_info & INTR_INFO_VALID_MASK) && (((intr_info >> 8) & 7) == X86_EVENTTYPE_HW_EXCEPTION) ) { + KASSERT(!is_hybrid_vcpu(curr)); trap = hvm_combine_hw_exceptions((uint8_t)intr_info, trap); if ( trap == TRAP_double_fault ) error_code = 0; @@ -1459,38 +1476,7 @@ void start_vmx(void) hvm_enable(&vmx_function_table); } -/* - * Not all cases receive valid value in the VM-exit instruction length field. - * Callers must know what they're doing! - */ -static int __get_instruction_length(void) -{ - int len; - len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */ - BUG_ON((len < 1) || (len > 15)); - return len; -} - -static void __update_guest_eip(unsigned long inst_len) -{ - struct cpu_user_regs *regs = guest_cpu_user_regs(); - unsigned long x; - - regs->eip += inst_len; - regs->eflags &= ~X86_EFLAGS_RF; - - x = __vmread(GUEST_INTERRUPTIBILITY_INFO); - if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) ) - { - x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS); - __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x); - } - - if ( regs->eflags & X86_EFLAGS_TF ) - vmx_inject_hw_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE); -} - -static void vmx_fpu_dirty_intercept(void) +void vmx_fpu_dirty_intercept(void) { struct vcpu *curr = current; @@ -1500,6 +1486,7 @@ static void vmx_fpu_dirty_intercept(void if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { curr->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; +KASSERT( (vmr(GUEST_CR0)) != 0x8); __vmwrite(GUEST_CR0, curr->arch.hvm_vcpu.hw_cr[0]); } } @@ -1531,7 +1518,7 @@ static void vmx_cpuid_intercept( HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx); } -static void vmx_do_cpuid(struct cpu_user_regs *regs) +void vmx_do_cpuid(struct cpu_user_regs *regs) { unsigned int eax, ebx, ecx, edx; @@ -1548,7 +1535,7 @@ static void vmx_do_cpuid(struct cpu_user regs->edx = edx; } -static void vmx_dr_access(unsigned long exit_qualification, +void vmx_dr_access(unsigned long exit_qualification, struct cpu_user_regs *regs) { struct vcpu *v = current; @@ -2037,7 +2024,7 @@ gp_fault: return X86EMUL_EXCEPTION; } -static void vmx_do_extint(struct cpu_user_regs *regs) +void vmx_do_extint(struct cpu_user_regs *regs) { unsigned int vector; @@ -2182,9 +2169,16 @@ static void vmx_failed_vmentry(unsigned break; } +#if defined(XEN_KDB_CONFIG) + { extern void kdb_dump_vmcs(domid_t did, int vid); + printk("\n************* VMCS Area **************\n"); + kdb_dump_vmcs(curr->domain->domain_id, (curr)->vcpu_id); + } +#else printk("************* VMCS Area **************\n"); vmcs_dump_vcpu(curr); printk("**************************************\n"); +#endif domain_crash(curr->domain); } @@ -2268,6 +2262,8 @@ err: return -1; } +extern void hybrid_vmx_vmexit_handler(struct cpu_user_regs *regs); + asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) { unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0; @@ -2278,6 +2274,11 @@ asmlinkage void vmx_vmexit_handler(struc v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] = __vmread(GUEST_CR3); + if ( is_hybrid_vcpu(v)) { + hybrid_vmx_vmexit_handler(regs); + return; + } + exit_reason = __vmread(VM_EXIT_REASON); if ( hvm_long_mode_enabled(v) ) @@ -2632,13 +2633,13 @@ asmlinkage void vmx_vmexit_handler(struc case EXIT_REASON_MONITOR_TRAP_FLAG: v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); - v->arch.hvm_vcpu.single_step = 0; #if defined(XEN_KDB_CONFIG) if (kdb_handle_trap_entry(TRAP_debug, regs)) break; #endif if ( v->domain->debugger_attached && v->arch.hvm_vcpu.single_step ) domain_pause_for_debugger(); + v->arch.hvm_vcpu.single_step = 0; break; case EXIT_REASON_PAUSE_INSTRUCTION: diff -r f2cf898c7ff8 xen/arch/x86/hvm/vpt.c --- a/xen/arch/x86/hvm/vpt.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/hvm/vpt.c Thu Nov 17 15:37:30 2011 -0800 @@ -289,6 +289,7 @@ void pt_intr_post(struct vcpu *v, struct if ( intack.source == hvm_intsrc_vector ) return; + KASSERT(!is_hybrid_vcpu(current)); spin_lock(&v->arch.hvm_vcpu.tm_lock); pt = is_pt_irq(v, intack); diff -r f2cf898c7ff8 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/mm.c Thu Nov 17 15:37:30 2011 -0800 @@ -490,9 +490,12 @@ void make_cr3(struct vcpu *v, unsigned l #endif /* !defined(__i386__) */ +/* calling hybrid_update_cr3 doesnt work because during context switch + * vmcs is not completely setup? */ void write_ptbase(struct vcpu *v) { - write_cr3(v->arch.cr3); + if (!is_hybrid_vcpu(v)) + write_cr3(v->arch.cr3); } /* @@ -2482,6 +2485,7 @@ int get_page_type_preemptible(struct pag } +extern void hybrid_update_cr3(struct vcpu *v); int new_guest_cr3(unsigned long mfn) { struct vcpu *curr = current; @@ -2530,6 +2534,9 @@ int new_guest_cr3(unsigned long mfn) write_ptbase(curr); + if (is_hybrid_vcpu(curr)) + hybrid_update_cr3(curr); + if ( likely(old_base_mfn != 0) ) { if ( paging_mode_refcounts(d) ) @@ -2863,10 +2870,23 @@ int do_mmuext_op( #endif case MMUEXT_TLB_FLUSH_LOCAL: + /* do this for both, flush_tlb_user and flush_tlb_kernel, for now. + * To debug: hvm_asid_flush_vcpu for flush_tlb_user, and + * vpid_sync_all for flush_tlb_kernel */ + if (is_hybrid_domain(d)) { + extern void hybrid_flush_tlb(void); + hybrid_flush_tlb(); + break; + } flush_tlb_local(); break; case MMUEXT_INVLPG_LOCAL: + if (is_hybrid_domain(d)) { + extern void hybrid_do_invlpg(ulong); + hybrid_do_invlpg(op.arg1.linear_addr); + break; + } if ( !paging_mode_enabled(d) || paging_invlpg(curr, op.arg1.linear_addr) != 0 ) flush_tlb_one_local(op.arg1.linear_addr); @@ -2877,6 +2897,10 @@ int do_mmuext_op( { cpumask_t pmask; + if (is_hybrid_domain(d)) { + printk("MUK:FIX: MMUEXT_TLB_FLUSH_MULTI/MMUEXT_INVLPG_MULTI\n"); + break; + } if ( unlikely(vcpumask_to_pcpumask(d, op.arg2.vcpumask, &pmask)) ) { okay = 0; @@ -4181,7 +4205,7 @@ long do_update_descriptor(u64 pa, u64 de mfn = gmfn_to_mfn(dom, gmfn); if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) || !mfn_valid(mfn) || - !check_descriptor(dom, &d) ) + (!is_hybrid_domain(dom) && !check_descriptor(dom, &d)) ) return -EINVAL; page = mfn_to_page(mfn); diff -r f2cf898c7ff8 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/mm/hap/hap.c Thu Nov 17 15:37:30 2011 -0800 @@ -705,8 +705,10 @@ void hap_vcpu_init(struct vcpu *v) static int hap_page_fault(struct vcpu *v, unsigned long va, struct cpu_user_regs *regs) { - HAP_ERROR("Intercepted a guest #PF (%u:%u) with HAP enabled.\n", - v->domain->domain_id, v->vcpu_id); + HAP_ERROR("Intercepted a guest #PF (%u:%u:VA %016lx IP:%016lx) with " + "HAP enabled.\n", v->domain->domain_id, v->vcpu_id,va, regs->rip); + + kdb_trap_immed(KDB_TRAP_NONFATAL); domain_crash(v->domain); return 0; } diff -r f2cf898c7ff8 xen/arch/x86/mm/mem_event.c --- a/xen/arch/x86/mm/mem_event.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/mm/mem_event.c Thu Nov 17 15:37:30 2011 -0800 @@ -216,7 +216,7 @@ int mem_event_domctl(struct domain *d, x /* Currently only EPT is supported */ rc = -ENODEV; - if ( !(is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled && + if ( !(is_hvm_or_hyb_domain(d) && d->arch.hvm_domain.hap_enabled && (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) ) break; diff -r f2cf898c7ff8 xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/mm/mem_sharing.c Thu Nov 17 15:37:30 2011 -0800 @@ -42,9 +42,6 @@ static void mem_sharing_audit(void); # define mem_sharing_audit() do {} while(0) #endif /* MEM_SHARING_AUDIT */ - -#define hap_enabled(d) \ - (is_hvm_domain(d) && (d)->arch.hvm_domain.hap_enabled) #define mem_sharing_enabled(d) \ (is_hvm_domain(d) && (d)->arch.hvm_domain.mem_sharing_enabled) diff -r f2cf898c7ff8 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/mm/p2m.c Thu Nov 17 15:37:30 2011 -0800 @@ -1569,7 +1569,7 @@ int p2m_init(struct domain *d) p2m->get_entry_current = p2m_gfn_to_mfn_current; p2m->change_entry_type_global = p2m_change_type_global; - if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled && + if ( is_hvm_or_hyb_domain(d) && d->arch.hvm_domain.hap_enabled && (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ) ept_p2m_init(d); @@ -1596,7 +1596,7 @@ int set_p2m_entry(struct domain *d, unsi while ( todo ) { - if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled ) + if ( is_hvm_or_hyb_domain(d) && d->arch.hvm_domain.hap_enabled ) order = ((((gfn | mfn_x(mfn) | todo) & (SUPERPAGE_PAGES - 1)) == 0) && hvm_hap_has_2mb(d)) ? 9 : 0; else diff -r f2cf898c7ff8 xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/mm/paging.c Thu Nov 17 15:37:30 2011 -0800 @@ -29,8 +29,6 @@ #include #include -#define hap_enabled(d) (is_hvm_domain(d) && (d)->arch.hvm_domain.hap_enabled) - /* Printouts */ #define PAGING_PRINTK(_f, _a...) \ debugtrace_printk("pg: %s(): " _f, __func__, ##_a) diff -r f2cf898c7ff8 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/time.c Thu Nov 17 15:37:30 2011 -0800 @@ -879,7 +879,7 @@ static void __update_vcpu_system_time(st _u.tsc_to_system_mul = t->tsc_scale.mul_frac; _u.tsc_shift = (s8)t->tsc_scale.shift; } - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) _u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset; /* Don't bother unless timestamp record has changed or we are forced. */ @@ -947,7 +947,7 @@ static void update_domain_rtc(void) rcu_read_lock(&domlist_read_lock); for_each_domain ( d ) - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) rtc_update_clock(d); rcu_read_unlock(&domlist_read_lock); @@ -956,7 +956,7 @@ static void update_domain_rtc(void) void domain_set_time_offset(struct domain *d, int32_t time_offset_seconds) { d->time_offset_seconds = time_offset_seconds; - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) rtc_update_clock(d); } @@ -1856,7 +1856,6 @@ void tsc_set_info(struct domain *d, d->arch.vtsc = 0; return; } - switch ( d->arch.tsc_mode = tsc_mode ) { case TSC_MODE_NEVER_EMULATE: @@ -1901,7 +1900,7 @@ void tsc_set_info(struct domain *d, break; } d->arch.incarnation = incarnation + 1; - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) hvm_set_rdtsc_exiting(d, d->arch.vtsc); } diff -r f2cf898c7ff8 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/traps.c Thu Nov 17 15:37:30 2011 -0800 @@ -1217,7 +1217,7 @@ static int spurious_page_fault( return is_spurious; } -static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs) +int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs) { struct vcpu *v = current; struct domain *d = v->domain; @@ -3228,7 +3228,6 @@ void load_TR(void) .base = (long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY), .limit = LAST_RESERVED_GDT_BYTE }; - _set_tssldt_desc( this_cpu(gdt_table) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY, (unsigned long)tss, diff -r f2cf898c7ff8 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/arch/x86/x86_64/traps.c Thu Nov 17 15:37:30 2011 -0800 @@ -617,7 +617,7 @@ static void hypercall_page_initialise_ri void hypercall_page_initialise(struct domain *d, void *hypercall_page) { memset(hypercall_page, 0xCC, PAGE_SIZE); - if ( is_hvm_domain(d) ) + if ( is_hvm_or_hyb_domain(d) ) hvm_hypercall_page_initialise(d, hypercall_page); else if ( !is_pv_32bit_domain(d) ) hypercall_page_initialise_ring3_kernel(hypercall_page); diff -r f2cf898c7ff8 xen/common/domain.c --- a/xen/common/domain.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/common/domain.c Thu Nov 17 15:37:30 2011 -0800 @@ -238,8 +238,13 @@ struct domain *domain_create( spin_lock_init(&d->shutdown_lock); d->shutdown_code = -1; - if ( domcr_flags & DOMCRF_hvm ) + if ( domcr_flags & DOMCRF_hybrid ) { + d->is_hybrid = 1; + printk("Hybrid guest with%s ept. Domid:%d\n", + (domcr_flags&DOMCRF_hap) ? "" : " no", domid); + } else if ( domcr_flags & DOMCRF_hvm ) { d->is_hvm = 1; + } if ( domid == 0 ) { @@ -588,7 +593,8 @@ void domain_pause_for_debugger(void) for_each_vcpu ( d, v ) vcpu_sleep_nosync(v); - send_guest_global_virq(dom0, VIRQ_DEBUGGER); + if (current->arch.gdbsx_vcpu_event == 0) + send_guest_global_virq(dom0, VIRQ_DEBUGGER); } /* Complete domain destroy after RCU readers are not holding old references. */ diff -r f2cf898c7ff8 xen/common/domctl.c --- a/xen/common/domctl.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/common/domctl.c Thu Nov 17 15:37:30 2011 -0800 @@ -132,6 +132,8 @@ void getdomaininfo(struct domain *d, str if ( is_hvm_domain(d) ) info->flags |= XEN_DOMINF_hvm_guest; + else if ( is_hybrid_domain(d) ) + info->flags |= XEN_DOMINF_hybrid_guest; xsm_security_domaininfo(d, info); @@ -394,7 +396,8 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc if ( supervisor_mode_kernel || (op->u.createdomain.flags & ~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap | - XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off)) ) + XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off | + XEN_DOMCTL_CDF_hybrid_guest)) ) break; dom = op->domain; @@ -430,6 +433,8 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc domcr_flags |= DOMCRF_s3_integrity; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_oos_off ) domcr_flags |= DOMCRF_oos_off; + if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hybrid_guest ) + domcr_flags |= DOMCRF_hybrid; ret = -ENOMEM; d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref); diff -r f2cf898c7ff8 xen/common/kernel.c --- a/xen/common/kernel.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/common/kernel.c Thu Nov 17 15:37:30 2011 -0800 @@ -239,13 +239,16 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL if ( supervisor_mode_kernel ) fi.submap |= 1U << XENFEAT_supervisor_mode_kernel; #ifdef CONFIG_X86 - if ( !is_hvm_vcpu(current) ) + if ( !is_hvm_vcpu(current) && + !paging_mode_translate(current->domain) ) /* hybrid */ fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) | (1U << XENFEAT_highmem_assist) | (1U << XENFEAT_gnttab_map_avail_bits); else fi.submap |= (1U << XENFEAT_hvm_safe_pvclock) | (1U << XENFEAT_hvm_callback_vector); + if ( is_hybrid_vcpu(current) ) + fi.submap |= (1U << XENFEAT_hvm_callback_vector); #endif break; default: diff -r f2cf898c7ff8 xen/common/memory.c --- a/xen/common/memory.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/common/memory.c Thu Nov 17 15:37:30 2011 -0800 @@ -89,7 +89,7 @@ static void increase_reservation(struct a->nr_done = i; } -static void populate_physmap(struct memop_args *a) +static noinline void populate_physmap(struct memop_args *a) { struct page_info *page; unsigned long i, j; @@ -134,6 +134,7 @@ static void populate_physmap(struct memo } mfn = page_to_mfn(page); + guest_physmap_add_page(d, gpfn, mfn, a->extent_order); if ( !paging_mode_translate(d) ) diff -r f2cf898c7ff8 xen/common/timer.c --- a/xen/common/timer.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/common/timer.c Thu Nov 17 15:37:30 2011 -0800 @@ -546,13 +546,20 @@ void kdb_dump_timer_queues(void) struct timers *ts; unsigned long sz, offs; char buf[KSYM_NAME_LEN+1]; - int cpu, j; - s_time_t now = NOW(); + int cpu, j; + u64 tsc; for_each_online_cpu( cpu ) { ts = &per_cpu(timers, cpu); - kdbp("CPU[%02d]: NOW:0x%08x%08x\n", cpu, (u32)(now>>32), (u32)now); + kdbp("CPU[%02d]:", cpu); + + if (cpu == smp_processor_id()) { + s_time_t now = NOW(); + rdtscll(tsc); + kdbp("NOW:0x%08x%08x TSC:0x%016lx\n", (u32)(now>>32),(u32)now, tsc); + } else + kdbp("\n"); /* timers in the heap */ for ( j = 1; j <= GET_HEAP_SIZE(ts->heap); j++ ) { diff -r f2cf898c7ff8 xen/include/asm-x86/desc.h --- a/xen/include/asm-x86/desc.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/asm-x86/desc.h Thu Nov 17 15:37:30 2011 -0800 @@ -58,7 +58,8 @@ #ifndef __ASSEMBLY__ #if defined(__x86_64__) -#define GUEST_KERNEL_RPL(d) (is_pv_32bit_domain(d) ? 1 : 3) +#define GUEST_KERNEL_RPL(d) (is_hybrid_domain(d) ? 0 : \ + is_pv_32bit_domain(d) ? 1 : 3) #elif defined(__i386__) #define GUEST_KERNEL_RPL(d) ((void)(d), 1) #endif @@ -67,6 +68,9 @@ #define __fixup_guest_selector(d, sel) \ ({ \ uint16_t _rpl = GUEST_KERNEL_RPL(d); \ + if (d->is_hybrid) { \ + printk("MUK: hybrid domain fixing up selector\n"); \ + } \ (sel) = (((sel) & 3) >= _rpl) ? (sel) : (((sel) & ~3) | _rpl); \ }) diff -r f2cf898c7ff8 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/asm-x86/domain.h Thu Nov 17 15:37:30 2011 -0800 @@ -18,7 +18,7 @@ #endif #define is_pv_32on64_vcpu(v) (is_pv_32on64_domain((v)->domain)) -#define is_hvm_pv_evtchn_domain(d) (is_hvm_domain(d) && \ +#define is_hvm_pv_evtchn_domain(d) (is_hvm_or_hyb_domain(d) && \ d->arch.hvm_domain.irq.callback_via_type == HVMIRQ_callback_vector) #define is_hvm_pv_evtchn_vcpu(v) (is_hvm_pv_evtchn_domain(v->domain)) diff -r f2cf898c7ff8 xen/include/asm-x86/event.h --- a/xen/include/asm-x86/event.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/asm-x86/event.h Thu Nov 17 15:37:30 2011 -0800 @@ -18,7 +18,7 @@ int hvm_local_events_need_delivery(struc static inline int local_events_need_delivery(void) { struct vcpu *v = current; - return (is_hvm_vcpu(v) ? hvm_local_events_need_delivery(v) : + return ( is_hvm_or_hyb_vcpu(v) ? hvm_local_events_need_delivery(v) : (vcpu_info(v, evtchn_upcall_pending) && !vcpu_info(v, evtchn_upcall_mask))); } diff -r f2cf898c7ff8 xen/include/asm-x86/guest_access.h --- a/xen/include/asm-x86/guest_access.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/asm-x86/guest_access.h Thu Nov 17 15:37:30 2011 -0800 @@ -14,19 +14,19 @@ /* Raw access functions: no type checking. */ #define raw_copy_to_guest(dst, src, len) \ - (is_hvm_vcpu(current) ? \ + ((is_hvm_vcpu(current) || is_hyb_hap_vcpu(current)) ? \ copy_to_user_hvm((dst), (src), (len)) : \ copy_to_user((dst), (src), (len))) #define raw_copy_from_guest(dst, src, len) \ - (is_hvm_vcpu(current) ? \ + ((is_hvm_vcpu(current) || is_hyb_hap_vcpu(current)) ? \ copy_from_user_hvm((dst), (src), (len)) : \ copy_from_user((dst), (src), (len))) #define __raw_copy_to_guest(dst, src, len) \ - (is_hvm_vcpu(current) ? \ + ((is_hvm_vcpu(current) || is_hyb_hap_vcpu(current)) ? \ copy_to_user_hvm((dst), (src), (len)) : \ __copy_to_user((dst), (src), (len))) #define __raw_copy_from_guest(dst, src, len) \ - (is_hvm_vcpu(current) ? \ + ((is_hvm_vcpu(current) || is_hyb_hap_vcpu(current)) ? \ copy_from_user_hvm((dst), (src), (len)) : \ __copy_from_user((dst), (src), (len))) diff -r f2cf898c7ff8 xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/asm-x86/hvm/domain.h Thu Nov 17 15:37:30 2011 -0800 @@ -98,5 +98,8 @@ struct hvm_domain { }; }; +#define hap_enabled(d) \ + (is_hvm_or_hyb_domain(d) && (d)->arch.hvm_domain.hap_enabled) + #endif /* __ASM_X86_HVM_DOMAIN_H__ */ diff -r f2cf898c7ff8 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Nov 17 15:37:30 2011 -0800 @@ -144,10 +144,12 @@ struct hvm_function_table { extern struct hvm_function_table hvm_funcs; extern int hvm_enabled; +int hybrid_domain_initialise(struct domain *d); int hvm_domain_initialise(struct domain *d); void hvm_domain_relinquish_resources(struct domain *d); void hvm_domain_destroy(struct domain *d); +int hybrid_vcpu_initialise(struct vcpu *v); int hvm_vcpu_initialise(struct vcpu *v); void hvm_vcpu_destroy(struct vcpu *v); void hvm_vcpu_down(struct vcpu *v); diff -r f2cf898c7ff8 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Thu Nov 17 15:37:30 2011 -0800 @@ -110,6 +110,7 @@ void vmx_update_debug_state(struct vcpu #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 @@ -284,6 +285,14 @@ static inline unsigned long __vmread_saf return ecx; } +static inline unsigned long vmr(unsigned long field) +{ + int rc; + unsigned long val; + val = __vmread_safe(field, &rc); + return rc ? 0 : val; +} + static inline void __vm_set_bit(unsigned long field, unsigned int bit) { __vmwrite(field, __vmread(field) | (1UL << bit)); @@ -410,6 +419,8 @@ void vmx_inject_nmi(void); void ept_p2m_init(struct domain *d); void ept_walk_table(struct domain *d, unsigned long gfn); +void hybrid_vmx_vmexit_handler(struct cpu_user_regs *regs); + /* EPT violation qualifications definitions */ #define _EPT_READ_VIOLATION 0 #define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION) @@ -430,4 +441,39 @@ void ept_walk_table(struct domain *d, un #define EPT_PAGETABLE_ENTRIES 512 +/* + * Not all cases receive valid value in the VM-exit instruction length field. + * Callers must know what they're doing! + */ +static inline int __get_instruction_length(void) +{ + int len; + len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe: callers audited */ + BUG_ON((len < 1) || (len > 15)); + return len; +} + +static inline void __update_guest_eip(unsigned long inst_len) +{ + struct cpu_user_regs *regs = guest_cpu_user_regs(); + unsigned long x; + + regs->eip += inst_len; + regs->eflags &= ~X86_EFLAGS_RF; + + x = __vmread(GUEST_INTERRUPTIBILITY_INFO); + if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) ) + { + x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS); + __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x); + } + + if ( regs->eflags & X86_EFLAGS_TF ) + vmx_inject_hw_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE); +} + +extern void vmx_dr_access(unsigned long, struct cpu_user_regs *); +extern void vmx_fpu_enter(struct vcpu *v); +extern void vmx_fpu_dirty_intercept(void); + #endif /* __ASM_X86_HVM_VMX_VMX_H__ */ diff -r f2cf898c7ff8 xen/include/public/domctl.h --- a/xen/include/public/domctl.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/public/domctl.h Thu Nov 17 15:37:30 2011 -0800 @@ -64,6 +64,9 @@ struct xen_domctl_createdomain { /* Disable out-of-sync shadow page tables? */ #define _XEN_DOMCTL_CDF_oos_off 3 #define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) + /* Is this a hybrid guest? */ +#define _XEN_DOMCTL_CDF_hybrid_guest 4 +#define XEN_DOMCTL_CDF_hybrid_guest (1U<<_XEN_DOMCTL_CDF_hybrid_guest) }; typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); @@ -93,6 +96,9 @@ struct xen_domctl_getdomaininfo { /* Being debugged. */ #define _XEN_DOMINF_debugged 6 #define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) + /* domain is hybrid */ +#define _XEN_DOMINF_hybrid_guest 7 +#define XEN_DOMINF_hybrid_guest (1U<<_XEN_DOMINF_hybrid_guest) /* XEN_DOMINF_shutdown guest-supplied code. */ #define XEN_DOMINF_shutdownmask 255 #define XEN_DOMINF_shutdownshift 16 diff -r f2cf898c7ff8 xen/include/public/xen.h --- a/xen/include/public/xen.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/public/xen.h Thu Nov 17 15:37:30 2011 -0800 @@ -594,6 +594,7 @@ typedef struct start_info start_info_t; #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ #define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_IS_HYBRID (1<<3) /* Is it a PV running in HVM container? */ #define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ /* diff -r f2cf898c7ff8 xen/include/xen/lib.h --- a/xen/include/xen/lib.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/xen/lib.h Thu Nov 17 15:37:30 2011 -0800 @@ -39,6 +39,17 @@ do { #else #define ASSERT(p) ((void)0) #endif +#ifdef XEN_KDB_CONFIG + #define KASSERT(p) \ + do { if (!(p)) { \ + kdbp("KASSERT in %s at %d\n", __FUNCTION__, __LINE__); \ + kdb_trap_immed(KDB_TRAP_NONFATAL); \ + } \ + } while (0) +#else +#define KASSERT(p) \ + do { if ( unlikely(!(p)) ) assert_failed(#p); } while (0) +#endif #define ABS(_x) ({ \ typeof(_x) __x = (_x); \ @@ -126,6 +137,8 @@ extern void add_taint(unsigned); extern void kdb_trap_immed(int); extern void kdbtrc(unsigned int, unsigned int, uint64_t, uint64_t, uint64_t); extern void kdbp(const char *fmt, ...); +extern volatile int mukkdbdbg; +#define mukkdbp(...) {(mukkdbdbg) ? kdbp(__VA_ARGS__):0;} #endif #endif /* __LIB_H__ */ diff -r f2cf898c7ff8 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/xen/sched.h Thu Nov 17 15:37:30 2011 -0800 @@ -228,6 +228,7 @@ struct domain /* Is this an HVM guest? */ bool_t is_hvm; + bool_t is_hybrid; /* Does this guest need iommu mappings? */ bool_t need_iommu; /* Is this guest fully privileged (aka dom0)? */ @@ -388,6 +389,9 @@ struct domain *domain_create( /* DOMCRF_oos_off: dont use out-of-sync optimization for shadow page tables */ #define _DOMCRF_oos_off 4 #define DOMCRF_oos_off (1U<<_DOMCRF_oos_off) + /* DOMCRF_hybrid: Create PV domain in HVM container */ +#define _DOMCRF_hybrid 5 +#define DOMCRF_hybrid (1U<<_DOMCRF_hybrid) /* * rcu_lock_domain_by_id() is more efficient than get_domain_by_id(). @@ -590,10 +594,17 @@ uint64_t get_cpu_idle_time(unsigned int #define is_hvm_domain(d) ((d)->is_hvm) #define is_hvm_vcpu(v) (is_hvm_domain(v->domain)) +#define is_hybrid_domain(d) ((d)->is_hybrid) +#define is_hybrid_vcpu(v) (is_hybrid_domain(v->domain)) +#define is_hvm_or_hyb_domain(d) (is_hvm_domain(d) || is_hybrid_domain(d)) +#define is_hvm_or_hyb_vcpu(v) (is_hvm_or_hyb_domain(v->domain)) #define is_pinned_vcpu(v) ((v)->domain->is_pinned || \ cpus_weight((v)->cpu_affinity) == 1) #define need_iommu(d) ((d)->need_iommu) +#define is_hyb_hap_domain(d) (is_hybrid_domain(d) && hap_enabled(d)) +#define is_hyb_hap_vcpu(v) (is_hyb_hap_domain(v->domain)) + void set_vcpu_migration_delay(unsigned int delay); unsigned int get_vcpu_migration_delay(void); diff -r f2cf898c7ff8 xen/include/xen/xencomm.h --- a/xen/include/xen/xencomm.h Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/include/xen/xencomm.h Thu Nov 17 15:37:30 2011 -0800 @@ -79,7 +79,7 @@ static inline unsigned long xencomm_inli * Copy an array of objects from guest context via a guest handle. * Optionally specify an offset into the guest array. */ -#define copy_from_guest_offset(ptr, hnd, idx, nr) \ +#define copy_from_guest_offset(ptr, hnd, idx, nr) \ JUNK __copy_from_guest_offset(ptr, hnd, idx, nr) /* Copy sub-field of a structure from guest context via a guest handle. */ diff -r f2cf898c7ff8 xen/kdb/kdb_cmds.c --- a/xen/kdb/kdb_cmds.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/kdb/kdb_cmds.c Thu Nov 17 15:37:30 2011 -0800 @@ -173,7 +173,7 @@ kdb_do_cmds(struct cpu_user_regs *regs) /* ===================== Util functions ==================================== */ -static int +int kdb_vcpu_valid(struct vcpu *in_vp) { struct domain *dp; @@ -298,15 +298,13 @@ kdb_str2domid(const char *domstr, domid_ } static struct domain * -kdb_strdomid2ptr(const char *domstr) +kdb_strdomid2ptr(const char *domstr, int perror) { - ulong l; - struct domain *dp; - if (!kdb_str2ulong(domstr, &l) || !(dp=kdb_domid2ptr((domid_t)l))) { - kdbp("Invalid domid:%s\n", domstr); - return NULL; - } else - return dp; + domid_t domid; + if (kdb_str2domid(domstr, &domid, perror)) { + return(kdb_domid2ptr(domid)); + } + return NULL; } /* return a guest bitness: 32 or 64 */ @@ -319,7 +317,7 @@ kdb_guest_bitness(domid_t domid) if (is_idle_domain(dp)) retval = HYPSZ; - else if (is_hvm_domain(dp)) + else if (is_hvm_or_hyb_domain(dp)) retval = (hvm_long_mode_enabled(dp->vcpu[0])) ? HYPSZ : 32; else retval = is_pv_32bit_domain(dp) ? 32 : HYPSZ; @@ -825,7 +823,7 @@ struct Xgt_desc_struct { unsigned long address __attribute__((packed)); }; -static void +void kdb_show_special_regs(struct cpu_user_regs *regs) { struct Xgt_desc_struct desc; @@ -958,7 +956,8 @@ kdb_cmdf_ss(int argc, const char **argv, #define KDB_HALT_INSTR 0xf4 kdbbyt_t byte; - domid_t id = guest_mode(regs) ? current->domain->domain_id : DOMID_IDLE; + struct domain *dp = current->domain; + domid_t id = guest_mode(regs) ? dp->domain_id : DOMID_IDLE; if (argc > 1 && *argv[1] == '?') return kdb_usgf_ss(); @@ -977,16 +976,12 @@ kdb_cmdf_ss(int argc, const char **argv, kdbp("kdb: Failed to read byte at: %lx\n", regs->KDBIP); return KDB_CPU_MAIN_KDB; } - if (guest_mode(regs) && is_hvm_vcpu(current)) + if (guest_mode(regs) && is_hvm_or_hyb_vcpu(current)) { + dp->debugger_attached = 1; /* see svm_do_resume/vmx_do_ */ current->arch.hvm_vcpu.single_step = 1; - else + } else regs->eflags |= X86_EFLAGS_TF; -#if 0 - if (guest_mode(regs) && is_hvm_vcpu(current)) { - struct domain *dp = current->domain; - dp->debugger_attached = 1; /* see svm_do_resume/vmx_do_ */ - } -#endif + return KDB_CPU_SS; } @@ -1052,7 +1047,7 @@ kdb_cmdf_ssb(int argc, const char **argv kdbp("%s: regs not available\n", __FUNCTION__); return KDB_CPU_MAIN_KDB; } - if (is_hvm_vcpu(current)) + if (is_hvm_or_hyb_vcpu(current)) current->domain->debugger_attached = 1; /* vmx/svm_do_resume()*/ regs->eflags |= X86_EFLAGS_TF; @@ -1640,7 +1635,7 @@ kdb_set_bp(domid_t domid, kdbva_t addr, return KDBMAXSBP; } /* make sure swbp reporting is enabled in the vmcb/vmcs */ - if (is_hvm_domain(kdb_domid2ptr(domid))) { + if (is_hvm_or_hyb_domain(kdb_domid2ptr(domid))) { struct domain *dp = kdb_domid2ptr(domid); dp->debugger_attached = 1; /* see svm_do_resume/vmx_do_ */ KDBGP("debugger_attached set. domid:%d\n", domid); @@ -1693,7 +1688,7 @@ kdb_cmdf_bp(int argc, const char **argv, if (domidstrp && !kdb_str2domid(domidstrp, &domid, 1)) { return kdb_usgf_bp(); } - if (argc > 3 && is_hvm_domain(kdb_domid2ptr(domid))) { + if (argc > 3 && is_hvm_or_hyb_domain(kdb_domid2ptr(domid))) { kdbp("HVM domain not supported yet for conditional bp\n"); return KDB_CPU_MAIN_KDB; } @@ -1741,7 +1736,7 @@ kdb_cmdf_btp(int argc, const char **argv argsidx = 2; /* assume 3rd arg is not domid */ if (argc > 3 && kdb_str2domid(argv[2], &domid, 0)) { - if (is_hvm_domain(kdb_domid2ptr(domid))) { + if (is_hvm_or_hyb_domain(kdb_domid2ptr(domid))) { kdbp("HVM domains are not currently supprted\n"); return KDB_CPU_MAIN_KDB; } else @@ -1893,7 +1888,7 @@ kdb_cmdf_vcpuh(int argc, const char **ar return kdb_usgf_vcpuh(); if (!kdb_str2ulong(argv[1], (ulong *)&vp) || !kdb_vcpu_valid(vp) || - !is_hvm_vcpu(vp)) { + !is_hvm_or_hyb_vcpu(vp)) { kdbp("kdb: Bad VCPU: %s\n", argv[1]); return KDB_CPU_MAIN_KDB; @@ -2042,11 +2037,12 @@ kdb_display_vcpu(struct vcpu *vp) kdbp(" cpu_affinity:0x%lx vcpu_dirty_cpumask:0x%lx sched_priv:0x%p\n", vp->cpu_affinity.bits[0], vp->vcpu_dirty_cpumask.bits[0], vp->sched_priv); - kdbp(" &runstate: %p state: %x\n", &vp->runstate, vp->runstate.state); + kdbp(" &runstate: %p state: %x guestptr:%p\n", &vp->runstate, + vp->runstate.state, runstate_guest(vp)); kdbp("\n"); kdbp(" arch info: (%p)\n", &vp->arch); kdbp(" guest_context: VGCF_ flags:%lx", gp->flags); /* VGCF_in_kernel */ - if (is_hvm_vcpu(vp)) + if (is_hvm_or_hyb_vcpu(vp)) kdbp(" (HVM guest: IP, SP, EFLAGS may be stale)"); kdbp("\n"); kdb_print_uregs(&gp->user_regs); @@ -2146,7 +2142,7 @@ static void kdb_pr_dom_pg_modes(struct d if ( paging_mode_external(d) ) kdbp(" external(PG_external) "); } else - kdbp("disabled"); + kdbp(" disabled"); kdbp("\n"); } @@ -2198,6 +2194,19 @@ static void noinline kdb_print_dom_event #endif } +static void kdb_prnt_hvm_dom_info(struct domain *dp) +{ + kdbp(" HVM info: Hap is%s enabled\n", + dp->arch.hvm_domain.hap_enabled ? "" : " not"); + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + struct vmx_domain *vdp = &dp->arch.hvm_domain.vmx; + kdbp(" EPT: ept_mt:%x ept_wl:%x asr:%013lx\n", + vdp->ept_control.ept_mt, vdp->ept_control.ept_wl, + vdp->ept_control.asr); + } +} + /* display one domain info */ static void kdb_display_dom(struct domain *dp) @@ -2240,9 +2249,9 @@ kdb_display_dom(struct domain *dp) kdbp(" mapcnt:"); kdb_print_spin_lock("mapcnt: lk:", &gp->lock, "\n"); } - kdbp(" hvm:%d priv:%d dbg:%d dying:%d paused:%d\n", - dp->is_hvm, dp->is_privileged, dp->debugger_attached, - dp->is_dying, dp->is_paused_by_controller); + kdbp(" hvm:%d hybrid:%d priv:%d dbg:%d dying:%d paused:%d\n", + dp->is_hvm, dp->is_hybrid, dp->is_privileged, + dp->debugger_attached, dp->is_dying, dp->is_paused_by_controller); kdb_print_spin_lock(" shutdown: lk:", &dp->shutdown_lock, "\n"); kdbp(" shutn:%d shut:%d code:%d \n", dp->is_shutting_down, dp->is_shut_down, dp->shutdown_code); @@ -2266,7 +2275,10 @@ kdb_display_dom(struct domain *dp) kdbp(" &mapchache:0x%xp\n", &ap->mapcache); #endif kdbp(" ioport:0x%p &hvm_dom:0x%p\n", ap->ioport_caps, &ap->hvm_domain); - kdbp(" &pging_dom:%p mode:%lx", &ap->paging, ap->paging.mode); + if (is_hvm_or_hyb_domain(dp)) + kdb_prnt_hvm_dom_info(dp); + + kdbp(" &pging_dom:%p mode: %lx", &ap->paging, ap->paging.mode); kdb_pr_dom_pg_modes(dp); kdbp(" p2m ptr:%p pages:{%p, %p}\n", ap->p2m, ap->p2m->pages.next, KDB_PGLLE(ap->p2m->pages)); @@ -2556,7 +2568,7 @@ kdb_cmdf_didt(int argc, const char **arg } #endif -struct gdte { +struct gdte { /* same for TSS and LDT */ ulong limit0:16; ulong base0:24; /* linear address base, not pa */ ulong acctype:4; /* Type: access rights */ @@ -2576,15 +2588,23 @@ union gdte_u { u64 gval; }; -struct sgdte { /* system gdte */ +struct call_gdte { unsigned short offs0:16; unsigned short sel:16; unsigned short misc0:16; unsigned short offs1:16; }; +struct idt_gdte { + unsigned long offs0:16; + unsigned long sel:16; + unsigned long ist:3; + unsigned long unused0:13; + unsigned long offs1:16; +}; union sgdte_u { - struct sgdte sgdte; + struct call_gdte cgdte; + struct idt_gdte igdte; u64 sgval; }; @@ -2611,7 +2631,7 @@ static char *kdb_ret_acctype(uint acctyp static kdb_cpu_cmd_t kdb_usgf_dgdt(void) { - kdbp("dgdt [gdt-ptr hex-gdt-size] dump GDT table on current cpu or for " + kdbp("dgdt [gdt-ptr decimal-byte-size] dump GDT table on current cpu or for" "given vcpu\n"); return KDB_CPU_MAIN_KDB; } @@ -2620,9 +2640,9 @@ kdb_cmdf_dgdt(int argc, const char **arg { struct Xgt_desc_struct desc; union gdte_u u1; - ulong addr, end; + ulong start_addr, end_addr, taddr=0; domid_t domid = DOMID_IDLE; - int i; + int idx; if (argc > 1 && *argv[1] == '?') return kdb_usgf_dgdt(); @@ -2631,62 +2651,62 @@ kdb_cmdf_dgdt(int argc, const char **arg if (argc != 3) return kdb_usgf_dgdt(); - if (kdb_str2ulong(argv[1], (ulong *)&addr) && - kdb_str2ulong(argv[2], (ulong *)&end)) { - end += addr; + if (kdb_str2ulong(argv[1], (ulong *)&start_addr) && + kdb_str2deci(argv[2], (int *)&taddr)) { + end_addr = start_addr + taddr; } else { kdbp("dgdt: Bad arg:%s or %s\n", argv[1], argv[2]); return kdb_usgf_dgdt(); } } else { __asm__ __volatile__ ("sgdt (%0) \n" :: "a"(&desc) : "memory"); - addr = (ulong)desc.address; - end = (ulong)desc.address + desc.size; + start_addr = (ulong)desc.address; + end_addr = (ulong)desc.address + desc.size; } - kdbp("GDT: Will skip null desc at 0, addr:%lx end:%lx\n", addr, end); - addr += 8; /* skip null descriptor */ - + kdbp("GDT: Will skip null desc at 0, start:%lx end:%lx\n", start_addr, + end_addr); kdbp("[idx] sel --- val -------- Accs DPL P AVL L DB G " "--Base Addr ---- Limit\n"); kdbp(" Type\n"); - for (i=1; addr < end; i++, addr += sizeof(ulong)) { + /* skip first 8 null bytes */ + /* the cpu multiplies the index by 8 and adds to GDT.base */ + for (taddr = start_addr+8; taddr < end_addr; taddr += sizeof(ulong)) { /* not all entries are mapped. do this to avoid GP even if hyp */ - if (!kdb_read_mem(addr, (kdbbyt_t *)&u1, sizeof(u1),domid) || !u1.gval) + if (!kdb_read_mem(taddr, (kdbbyt_t *)&u1, sizeof(u1),domid) || !u1.gval) continue; if (u1.gval == 0xffffffffffffffff || u1.gval == 0x5555555555555555) continue; /* what an effin x86 mess */ + idx = (taddr - start_addr) / 8; if (u1.gdte.S == 0) { /* System Desc are 16 bytes in 64bit mode */ - addr += sizeof(ulong); - i++; + taddr += sizeof(ulong); continue; } +kdbp("ADDR: %lx\n", taddr); kdbp("[%04x] %04x %016lx %4s %x %d %d %d %d %d %016lx %05x\n", - i, (i<<3), u1.gval, kdb_ret_acctype(u1.gdte.acctype), u1.gdte.DPL, + idx, (idx<<3), u1.gval, kdb_ret_acctype(u1.gdte.acctype), + u1.gdte.DPL, u1.gdte.P, u1.gdte.AVL, u1.gdte.L, u1.gdte.DB, u1.gdte.G, (u64)((u64)u1.gdte.base0 | (u64)((u64)u1.gdte.base1<<24)), u1.gdte.limit0 | (u1.gdte.limit1<<16)); } - kdbp("\nSystem descriptors (S=0) :\n"); - addr = (ulong)desc.address + 8; /* skip null descriptor */ - - for (i=1; addr < end; i++, addr += sizeof(ulong)) { - union sgdte_u u2; + kdbp("\nSystem descriptors (S=0) : (skipping 0th entry)\n"); + for (taddr=start_addr+8; taddr < end_addr; taddr += sizeof(ulong)) { uint acctype; - u64 upper, offs0_64=0, offs32_63=0; + u64 upper, addr64=0; /* not all entries are mapped. do this to avoid GP even if hyp */ - if (kdb_read_mem(addr, (kdbbyt_t *)&u1, sizeof(u1),domid)==0 || + if (kdb_read_mem(taddr, (kdbbyt_t *)&u1, sizeof(u1), domid)==0 || u1.gval == 0 || u1.gdte.S == 1) { continue; } - - addr += sizeof(ulong); - if (kdb_read_mem(addr, (kdbbyt_t *)&upper, 8, domid) == 0) { + idx = (taddr - start_addr) / 8; + taddr += sizeof(ulong); + if (kdb_read_mem(taddr, (kdbbyt_t *)&upper, 8, domid) == 0) { kdbp("Could not read upper 8 bytes of system desc\n"); upper = 0; } @@ -2695,11 +2715,11 @@ kdb_cmdf_dgdt(int argc, const char **arg acctype != 14 && acctype != 15) continue; - kdbp("[%04x] %04x val:%016lx DPL:%x P:%d acctype:%x ", - i, (i<<3), u1.gval, u1.gdte.DPL, u1.gdte.P, acctype); - - u2.sgval = u1.gval; - offs32_63 = (u64)((upper & 0xFFFFFFFF)) << 32; +kdbp("ADDR: %lx\n", taddr); + kdbp("[%04x] %04x val:%016lx DPL:%x P:%d type:%x ", + idx, (idx<<3), u1.gval, u1.gdte.DPL, u1.gdte.P, acctype); + + upper = (u64)((u64)(upper & 0xFFFFFFFF) << 32); /* Vol 3A: table: 3-2 page: 3-19 */ if (acctype == 2) { @@ -2722,17 +2742,28 @@ kdb_cmdf_dgdt(int argc, const char **arg } if (acctype == 2 || acctype == 9 || acctype == 11) { - kdbp(" AVL:%d L:%d D/B:%d G:%d Base Addr:%016lx Limit:%x\n", - u1.gdte.AVL, u1.gdte.L, u1.gdte.DB, u1.gdte.G, - u1.gdte.base0 | (u1.gdte.base1<<24) | offs32_63, - u1.gdte.limit0 | (u1.gdte.limit1<<16)); - - } else if (acctype == 12 || acctype == 14 || acctype == 15) { - offs0_64 = u2.sgdte.offs0 | (u64)u2.sgdte.offs1<<48 | offs32_63; - kdbp(" Entry: %04x:%016lx\n", u2.sgdte.sel, offs0_64); - } - - i++; + kdbp(" AVL:%d G:%d Base Addr:%016lx Limit:%x\n", + u1.gdte.AVL, u1.gdte.G, + (u64)((u64)u1.gdte.base0 | ((u64)u1.gdte.base1<<24)| upper), + (u32)u1.gdte.limit0 | (u32)((u32)u1.gdte.limit1<<16)); + + } else if (acctype == 12) { + union sgdte_u u2; + u2.sgval = u1.gval; + + addr64 = (u64)((u64)u2.cgdte.offs0 | + (u64)((u64)u2.cgdte.offs1<<16) | upper); + kdbp(" Entry: %04x:%016lx\n", u2.cgdte.sel, addr64); + } else if (acctype == 14 || acctype == 15) { + union sgdte_u u2; + u2.sgval = u1.gval; + + addr64 = (u64)((u64)u2.igdte.offs0 | + (u64)((u64)u2.igdte.offs1<<16) | upper); + kdbp(" Entry: %04x:%016lx ist:%03x\n", u2.igdte.sel, addr64, + u2.igdte.ist); + } else + kdbp(" Error: Unrecongized type:%lx\n", acctype); } return KDB_CPU_MAIN_KDB; } @@ -2781,6 +2812,7 @@ kdb_cmdf_mmu(int argc, const char **argv kdbp("CONFIG_PAGING_LEVELS:%d\n", CONFIG_PAGING_LEVELS); kdbp("__HYPERVISOR_COMPAT_VIRT_START: %lx\n", (ulong)__HYPERVISOR_COMPAT_VIRT_START); + kdbp("&MPT[0] == %016lx\n", &machine_to_phys_mapping[0]); kdbp("\nFIRST_RESERVED_GDT_PAGE: %x\n", FIRST_RESERVED_GDT_PAGE); kdbp("FIRST_RESERVED_GDT_ENTRY: %lx\n", (ulong)FIRST_RESERVED_GDT_ENTRY); @@ -2794,11 +2826,17 @@ kdb_cmdf_mmu(int argc, const char **argv kdbp("\tcpu:%d gdt_table:%p\n", cpu, per_cpu(compat_gdt_table, cpu)); } kdbp("\n"); + kdbp(" Per cpu tss:\n"); + for_each_online_cpu(cpu) { + struct tss_struct *tssp = &per_cpu(init_tss, cpu); + kdbp("\tcpu:%d tss:%p (rsp0:%016lx)\n", cpu, tssp, tssp->rsp0); + } #ifdef USER_MAPPINGS_ARE_GLOBAL kdbp("USER_MAPPINGS_ARE_GLOBAL is defined\n"); #else kdbp("USER_MAPPINGS_ARE_GLOBAL is NOT defined\n"); #endif + kdbp("\n"); return KDB_CPU_MAIN_KDB; } @@ -2873,8 +2911,8 @@ kdb_cmdf_p2m(int argc, const char **argv struct domain *dp; ulong gpfn; - if (argc < 3 || - (dp=kdb_strdomid2ptr(argv[1])) == NULL || + if (argc < 3 || + (dp=kdb_strdomid2ptr(argv[1], 1)) == NULL || !kdb_str2ulong(argv[2], &gpfn)) { return kdb_usgf_p2m(); @@ -3408,6 +3446,7 @@ kdb_cmdf_info(int argc, const char **arg kdbp(" CONFIG_PAGING_ASSISTANCE"); #endif kdbp("\n"); + kdbp("MAX_VIRT_CPUS:$%d MAX_HVM_VCPUS:$%d\n", MAX_VIRT_CPUS,MAX_HVM_VCPUS); kdbp("NR_EVENT_CHANNELS: $%d\n", NR_EVENT_CHANNELS); kdbp("NR_EVTCHN_BUCKETS: $%d\n", NR_EVTCHN_BUCKETS); diff -r f2cf898c7ff8 xen/kdb/kdb_io.c --- a/xen/kdb/kdb_io.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/kdb/kdb_io.c Thu Nov 17 15:37:30 2011 -0800 @@ -39,7 +39,7 @@ kdb_key_valid(int key) { /* note: isspace() is more than ' ', hence we don't use it here */ if (isalnum(key) || key == ' ' || key == K_BACKSPACE || key == '\n' || - key == '?' || key == K_UNDERSCORE || key == '=') + key == '?' || key == K_UNDERSCORE || key == '=' || key == '!') return 1; return 0; } diff -r f2cf898c7ff8 xen/kdb/kdbmain.c --- a/xen/kdb/kdbmain.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/kdb/kdbmain.c Thu Nov 17 15:37:30 2011 -0800 @@ -258,7 +258,7 @@ kdb_check_dbtrap(kdb_reason_t *reasp, in KDBGP("ccpu:%d trapcpu:%d\n", ccpu, a_trap_cpu); kdb_cpu_cmd[a_trap_cpu] = KDB_CPU_QUIT; *reasp = KDB_REASON_PAUSE_IPI; - regs->eflags &= ~X86_EFLAGS_TF; + regs->eflags &= ~X86_EFLAGS_TF; /* hvm: exit handler ss = 0 */ kdb_init_cpu = -1; } else { kdb_end_session(ccpu, regs); @@ -364,7 +364,10 @@ kdbmain(kdb_reason_t reason, struct cpu_ } } else if (rc == 2) { /* one of ours but condition not met */ kdb_begin_session(); - regs->eflags |= X86_EFLAGS_TF; + if (guest_mode(regs) && is_hvm_or_hyb_vcpu(current)) + current->arch.hvm_vcpu.single_step = 1; + else + regs->eflags |= X86_EFLAGS_TF; kdb_cpu_cmd[ccpu] = KDB_CPU_INSTALL_BP; goto out; } @@ -401,7 +404,10 @@ kdbmain(kdb_reason_t reason, struct cpu_ if (!cpus_empty(kdb_cpu_traps)) { /* execute current instruction without 0xcc */ kdb_dbg_prnt_ctrps("nempty:", ccpu); - regs->eflags |= X86_EFLAGS_TF; + if (guest_mode(regs) && is_hvm_or_hyb_vcpu(current)) + current->arch.hvm_vcpu.single_step = 1; + else + regs->eflags |= X86_EFLAGS_TF; kdb_cpu_cmd[ccpu] = KDB_CPU_INSTALL_BP; goto out; } @@ -415,7 +421,10 @@ kdbmain(kdb_reason_t reason, struct cpu_ if (kdb_swbp_exists()) { if (reason == KDB_REASON_BPEXCP) { /* do delayed install */ - regs->eflags |= X86_EFLAGS_TF; + if (guest_mode(regs) && is_hvm_or_hyb_vcpu(current)) + current->arch.hvm_vcpu.single_step = 1; + else + regs->eflags |= X86_EFLAGS_TF; kdb_cpu_cmd[ccpu] = KDB_CPU_INSTALL_BP; goto out; } @@ -518,9 +527,7 @@ kdb_handle_trap_entry(int vector, struct * depending on the hardware. Also, for now assume it's fatal */ KDBGP("kdbtrp:ccpu:%d vec:%d\n", ccpu, vector); rc = kdbmain_fatal(regs, TRAP_nmi); - } else { - KDBGP("kdbtrp: unhandled trap:ccpu:%d vec:%d\n", ccpu, vector); - } + } return rc; } @@ -659,7 +666,7 @@ kdb_gettrapname(int trapno) #define KDBTRCMAX 1 /* set this to max number of recs to trace. each rec * is 32 bytes */ -volatile int kdb_trcon=0; /* turn tracing ON: set here or via the trcon cmd */ +volatile int kdb_trcon=1; /* turn tracing ON: set here or via the trcon cmd */ typedef struct { union { diff -r f2cf898c7ff8 xen/kdb/x86/udis86-1.7/kdb_dis.c --- a/xen/kdb/x86/udis86-1.7/kdb_dis.c Fri Jul 15 23:21:24 2011 +0000 +++ b/xen/kdb/x86/udis86-1.7/kdb_dis.c Thu Nov 17 15:37:30 2011 -0800 @@ -65,11 +65,13 @@ kdb_prnt_addr2sym(domid_t domid, kdbva_t p = kdb_guest_addr2sym(addr, domid, &offs); } else symbols_lookup(addr, &sz, &offs, buf); + snprintf(pbuf, 150, "%s%s+%lx", prefix, p, offs); if (*nl != '\n') kdbp("%-30s%s", pbuf, nl); /* prints more than 30 if needed */ else kdbp("%s%s", pbuf, nl); + } static int