[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] Big merge the HVM full-virtualisation abstractions.



# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID f1b361b05bf39e8b5d09520eee74deaa5d897d1d
# Parent  e4eb12a6e003e94bae704998326baef689ea1c30
Big merge the HVM full-virtualisation abstractions.

This changeset contains all differences between
xen-unstable.hg and xen-unstable-hvm.hg.

All information and comments for changesets unique to
xen-unstable-hvm.hg are included below.

Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>

> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Tue Jan 31 04:29:51 2006 -0400
> files:       xen/include/asm-ia64/mm.h
> description:
> Fix ia64 compile.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Tue Jan 31 02:21:49 2006 -0400
> files:       xen/include/xen/domain_page.h
> description:
> Eliminate unused fields (pfn,va) warnings.
> 
> Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Fri Jan 27 04:31:44 2006 -0400
> files:       tools/python/xen/xend/image.py
> description:
> Name cleanup. vmx refers to VT-x specific code, HVM is used for all others.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        root@xxxxxxxxxxxxxxxx
> date:        Sun Jan 22 18:39:58 2006 -0500
> files:       xen/arch/x86/hvm/platform.c
> description:
> support prefix 0x67 in hvm decode logic, gfxboot patch causes 0x67 prefix.
> 
> 
> user:        twoller@xxxxxxxxxxxxxxxx
> date:        Sun Jan 22 18:35:59 2006 -0500
> files:       xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c 
> xen/include/asm-x86/hvm/svm/svm.h
> description:
> check for valid shared_page_va/vmcb before deallocating in SVM.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Jan 22 02:56:20 2006 -0400
> files:       xen/include/asm-x86/hvm/svm/svm.h 
> xen/include/asm-x86/hvm/svm/vmmcall.h
> description:
> Some SVM header file cleanups.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Fri Jan 20 11:50:23 2006 -0400
> files:       xen/arch/x86/domain.c
> description:
> Need to be more diligent about when to call into the HVM abstraction layer
> and when not.
> 
> Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Wed Jan 18 15:14:56 2006 -0400
> files:       xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/x86_32/exits.S 
> xen/arch/x86/hvm/svm/x86_64/exits.S xen/arch/x86/hvm/vmx/x86_32/exits.S 
> xen/arch/x86/hvm/vmx/x86_64/exits.S
> description:
> Added missing copyright statements and updated svm.c's copyright to properly
> reflect that it was derived from vmx.c.
> 
> Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Tue Jan 17 09:42:11 2006 -0400
> files:       xen/arch/ia64/Makefile xen/arch/ia64/vmx/vlsapic.c 
> xen/arch/ia64/vmx/vmx_init.c xen/include/asm-ia64/vmx_platform.h
> description:
> Make sure ia64 builds again with the new directory structure.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxx
> date:        Tue Jan 17 08:28:51 2006 -0400
> files:       xen/arch/x86/Makefile xen/arch/x86/hvm/i8259.c 
> xen/arch/x86/hvm/vioapic.c
> description:
> The device models now belong in hvm.
> 
> Signed-ff-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Jan 15 16:40:58 2006 -0400
> files:       xen/arch/x86/Makefile xen/arch/x86/cpu/amd.c 
> xen/arch/x86/cpu/intel.c xen/arch/x86/dm/hvm_vioapic.c 
> xen/arch/x86/dm/i8259.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c 
> xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/intercept.c xen/arch/x86/hvm/io.c 
> xen/arch/x86/hvm/platform.c xen/arch/x86/hvm/svm/emulate.c 
> xen/arch/x86/hvm/svm/instrlen.c xen/arch/x86/hvm/svm/intr.c 
> xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c 
> xen/arch/x86/hvm/svm/x86_32/exits.S xen/arch/x86/hvm/svm/x86_64/exits.S 
> xen/arch/x86/hvm/vlapic.c xen/arch/x86/hvm/vmx/io.c 
> xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c 
> xen/arch/x86/hvm/vmx/x86_32/exits.S xen/arch/x86/hvm/vmx/x86_64/exits.S 
> xen/arch/x86/i387.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/traps.c 
> xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/traps.c 
> xen/include/asm-x86/domain.h xen/include/asm-x86/hvm/domain.h 
> xen/include/asm-x86/hvm/hvm.h xen/include/asm-x86/hvm/io.h 
> xen/include/asm-x86/hvm/support.h xen/include/asm-x86/hvm/svm/emulate.h 
> xen/include/asm-x86/hvm/svm/intr.h xen/include/asm-x86/hvm/svm/svm.h 
> xen/include/asm-x86/hvm/svm/vmcb.h xen/include/asm-x86/hvm/svm/vmmcall.h 
> xen/include/asm-x86/hvm/vcpu.h xen/include/asm-x86/hvm/vioapic.h 
> xen/include/asm-x86/hvm/vlapic.h xen/include/asm-x86/hvm/vmx/cpu.h 
> xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/hvm/vmx/vmx.h 
> xen/include/asm-x86/hvm/vpic.h xen/include/asm-x86/hvm/vpit.h 
> xen/include/asm-x86/shadow.h
> description:
> As suggested by Keir, I restructured the hvm/vmx/svm tree. The new
> directory structure looks like:
> 
>       xen/arch/hvm/
>       xen/arch/hvm/vmx
>       xen/arch/hvm/vmx/x86_32
>       xen/arch/hvm/vmx/x86_64
>       xen/arch/hvm/svm
>       xen/arch/hvm/svm/x86_32
>       xen/arch/hvm/svm/x86_64
> 
>       xen/include/hvm/
>       xen/include/hvm/vmx
>       xen/include/hvm/svm
> 
> Many files have been renamed and had their hvm_/vmx_/svm_ suffix removed
> because this is now clear from the directory where the file resides.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sat Jan 14 17:03:28 2006 -0400
> files:       xen/arch/ia64/vmx/vlsapic.c xen/include/asm-ia64/vmx_platform.h
> description:
> Name change fix for ia64.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sat Jan 14 15:12:59 2006 -0400
> files:       xen/arch/x86/dm/hvm_vioapic.c xen/arch/x86/dm/i8259.c 
> xen/arch/x86/hvm.c xen/arch/x86/hvm_intercept.c xen/arch/x86/svm.c 
> xen/arch/x86/svm_intr.c xen/arch/x86/svm_vmcb.c xen/arch/x86/vmx.c 
> xen/arch/x86/vmx_io.c xen/arch/x86/vmx_vmcs.c 
> xen/include/asm-x86/hvm_domain.h xen/include/asm-x86/hvm_vcpu.h
> description:
> Move VMX/SVM print buffer to hvm_domain.
> 
> Cleanup variable names. The suffix hvm_ is redundant in hvm_domain.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Fri Jan 13 17:03:18 2006 -0500
> files:       xen/arch/x86/svm.c
> description:
> remove unnecessary spin_unlock in asid_fetch code for svm.
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Wed Jan 11 20:00:36 2006 -0500
> files:       xen/arch/x86/svm.c
> description:
> remove unneeded update_pagetables() during svm PF handling (resolved with 
> ASID code rework).
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Tue Jan 10 02:45:32 2006 -0400
> files:       xen/arch/x86/hvm.c xen/arch/x86/vmx_io.c
> description:
> Factor out cpu_get_interrupt(). It is used by VMX and SVM.
> 
> Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Mon Jan  9 00:03:30 2006 -0400
> files:       xen/arch/x86/hvm.c xen/arch/x86/svm.c xen/arch/x86/vmx.c 
> xen/include/asm-x86/hvm_support.h xen/include/asm-x86/hvm_vcpu.h
> description:
> Introduce small print buffer per domain rather than a single global one.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Jan  8 23:07:12 2006 -0400
> files:       xen/arch/x86/dom0_ops.c xen/include/asm-x86/hvm_support.h 
> xen/include/public/arch-x86_32.h xen/include/public/arch-x86_64.h
> description:
> More cleanup. There is no point in distinguishing between SVM and VMX,
> a single HVM flag bit suffices.
> 
> Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Jan  8 12:05:59 2006 -0400
> files:       xen/arch/x86/svm.c xen/arch/x86/vmx.c
> description:
> Both VMX & SVM now print writes to the debug port (0xE9) on the console.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sat Jan  7 13:55:27 2006 -0400
> files:       xen/arch/x86/svm.c xen/arch/x86/svm_intr.c 
> xen/arch/x86/svm_vmcb.c xen/arch/x86/vmx.c xen/arch/x86/vmx_io.c 
> xen/arch/x86/vmx_vmcs.c xen/arch/x86/x86_32/asm-offsets.c 
> xen/arch/x86/x86_64/asm-offsets.c xen/include/asm-x86/domain.h 
> xen/include/asm-x86/vmx.h
> description:
> Introduce shorthands to improve code legibility.
> 
> Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Wed Jan  4 06:12:10 2006 -0400
> files:       tools/examples/xmexample.hvm
> description:
> Minor spelling mistakes.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Wed Jan  4 03:37:55 2006 -0400
> files:       xen/include/public/hvm/hvm_info_table.h 
> xen/include/public/hvm/ioreq.h xen/include/public/hvm/vmx_assist.h
> description:
> Missed adding new files.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Mon Jan  2 01:23:54 2006 -0400
> files:       tools/debugger/libxendebug/xendebug.c tools/libxc/xc_hvm_build.c
> description:
> Finalize Xin Li's patch: vmx/svm_identify is no longer necessary.
> Xen debug should test for HVM instead of VMX.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Sun Jan  1 03:22:39 2006 -0500
> files:       tools/ioemu/vl.c
> description:
> change hardcoded VTXEN to HVMXEN.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Jan  1 02:22:47 2006 -0400
> files:       tools/libxc/xc_hvm_build.c tools/python/xen/xend/image.py 
> xen/arch/x86/domain.c
> description:
> Avoid xen crash if there is no VMX support. If a platform
> doesn't support VMX, creating VMX domain will crash xen
> HV.
> 
> Signed-off-by: Xin Li <xin.b.li@xxxxxxxxx>
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> [ vmx/svm_identify are still needed in hvm builder, but this may not
> really be necessary. I need to check this. - lvd ]
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Fri Dec 30 21:38:56 2005 -0500
> files:       xen/arch/x86/svm.c xen/arch/x86/svm_vmcb.c 
> xen/arch/x86/x86_32/entry.S xen/include/asm-x86/svm.h 
> xen/include/asm-x86/svm_vmcb.h
> description:
> add multi-core support for guest ASIDs for SVM partitions.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Dec 25 20:44:43 2005 -0400
> files:       tools/firmware/vmxassist/head.S
> description:
> Make sure vmxassist still works in its debug environment.
> 
> Signed-Off-By: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Fri Dec 23 18:27:57 2005 -0400
> files:       tools/libxc/xc_ia64_stubs.c
> description:
> Fixed libxc ia64, xend uses xc_hvm_build instead of xc_vmx_build.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Wed Dec 21 21:39:17 2005 -0500
> files:       xen/arch/x86/svm_intr.c xen/arch/x86/svm_vmcb.c
> description:
> cleanup of svm specific code.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Wed Dec 21 17:18:25 2005 -0400
> files:       tools/python/xen/xm/tests/test_create.py tools/xm-test/README
> description:
> Minor name cleanups. xm-test isn't VMX specific. Instead use HVM.
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Tue Dec 20 20:01:38 2005 -0500
> files:       xen/arch/x86/svm_vmcb.c
> description:
> reword comments.
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Tue Dec 20 20:01:07 2005 -0500
> files:       xen/arch/x86/svm_intr.c
> description:
> add additional VLAPIC delivery modes.
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Tue Dec 20 20:00:22 2005 -0500
> files:       xen/arch/x86/svm.c
> description:
> reformat misaligned code.
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Tue Dec 20 19:59:37 2005 -0500
> files:       xen/arch/x86/svm.c
> description:
> Add additional AMD SVM specific CPUID logic.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Thu Dec 15 03:06:15 2005 -0400
> files:       tools/xm-test/tests/block-create/01_block_attach_device_pos.py 
> tools/xm-test/tests/block-create/02_block_attach_file_device_pos.py 
> tools/xm-test/tests/block-create/04_block_attach_device_repeatedly_pos.py 
> tools/xm-test/tests/block-create/05_block_attach_and_dettach_device_repeatedly_pos.py
>  tools/xm-test/tests/block-create/06_block_attach_baddomain_neg.py 
> tools/xm-test/tests/block-create/07_block_attach_baddevice_neg.py 
> tools/xm-test/tests/block-create/08_block_attach_bad_filedevice_neg.py 
> tools/xm-test/tests/block-create/09_block_attach_and_dettach_device_check_data_pos.py
>  tools/xm-test/tests/block-create/10_block_attach_dettach_multiple_devices.py 
> tools/xm-test/tests/block-create/11_block_attach_shared_dom0.py 
> tools/xm-test/tests/block-create/12_block_attach_shared_domU.py 
> tools/xm-test/tests/block-destroy/01_block-destroy_btblock_pos.py 
> tools/xm-test/tests/block-destroy/02_block-destroy_rtblock_pos.py 
> tools/xm-test/tests/block-destroy/04_block-destroy_nonattached_neg.py 
> tools/xm-test/tests/block-destroy/05_block-destroy_byname_pos.py 
> tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py 
> tools/xm-test/tests/block-list/01_block-list_pos.py 
> tools/xm-test/tests/block-list/02_block-list_attachbd_pos.py 
> tools/xm-test/tests/block-list/03_block-list_anotherbd_pos.py 
> tools/xm-test/tests/block-list/04_block-list_nodb_pos.py 
> tools/xm-test/tests/block-list/06_block-list_checkremove_pos.py 
> tools/xm-test/tests/memmax/01_memmax_badparm_neg.py 
> tools/xm-test/tests/memset/01_memset_basic_pos.py 
> tools/xm-test/tests/memset/02_memset_badparm_neg.py 
> tools/xm-test/tests/memset/03_memset_random_pos.py 
> tools/xm-test/tests/memset/04_memset_smallmem_pos.py 
> tools/xm-test/tests/migrate/01_migrate_localhost_pos.py 
> tools/xm-test/tests/network-attach/01_network_attach_pos.py 
> tools/xm-test/tests/network-attach/02_network_attach_detach_pos.py 
> tools/xm-test/tests/network-attach/03_network_attach_detach_multiple_pos.py 
> tools/xm-test/tests/restore/01_restore_basic_pos.py 
> tools/xm-test/tests/restore/02_restore_badparm_neg.py 
> tools/xm-test/tests/restore/03_restore_badfilename_neg.py 
> tools/xm-test/tests/restore/04_restore_withdevices_pos.py 
> tools/xm-test/tests/save/01_save_basic_pos.py 
> tools/xm-test/tests/save/02_save_badparm_neg.py 
> tools/xm-test/tests/save/03_save_bogusfile_neg.py 
> tools/xm-test/tests/sysrq/01_sysrq_basic_neg.py 
> tools/xm-test/tests/sysrq/02_sysrq_sync_pos.py 
> tools/xm-test/tests/sysrq/03_sysrq_withreboot_pos.py
> description:
> Adding SKIP() to tests that aren't supported for VMX domains.
> 
> Signed-off-by: Dan Stekloff <dsteklof@xxxxxxxxxx>
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Tue Dec 13 22:43:47 2005 -0400
> files:       xen/arch/x86/svm.c xen/arch/x86/vmx.c xen/arch/x86/vmx_io.c 
> xen/include/asm-x86/hvm_vpit.h xen/include/asm-x86/msr.h
> description:
> Support VMX guest accesses to IA32_TIME_STAMP_COUNTER MSR.
> 
> Signed-off-by: Haifeng Xue <haifeng.xue@xxxxxxxxx>
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxx>
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Tue Dec 13 19:49:53 2005 -0500
> files:       xen/arch/x86/svm.c xen/arch/x86/svm_emulate.c 
> xen/arch/x86/svm_instrlen.c xen/arch/x86/svm_intr.c xen/arch/x86/svm_vmcb.c 
> xen/include/asm-x86/svm.h xen/include/asm-x86/svm_emulate.h 
> xen/include/asm-x86/svm_intr.h xen/include/asm-x86/svm_vmcb.h 
> xen/include/asm-x86/svm_vmmcall.h
> description:
> Add SVM base files to repository.
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Tue Dec 13 19:49:02 2005 -0500
> files:       xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_64/entry.S
> description:
> Add SVM entry points for launch/resume.
> 
> 
> user:        twoller@xxxxxxxxxxxxx
> date:        Tue Dec 13 19:47:38 2005 -0500
> files:       .hgignore
> description:
> Add hvmloader files to ignore list.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Mon Dec 12 22:58:54 2005 -0400
> files:       docs/src/user.tex tools/firmware/README xen/include/asm-x86/hvm.h
> description:
> Removed dirty words (by request).
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Mon Dec 12 05:48:22 2005 -0400
> files:       tools/firmware/hvmloader/mkhex
> description:
> Fix file mode.
> 
> Signed-off-by: Leendert van Doorn <leendert@xxxxxxxxxxxxxx>
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Mon Dec 12 04:50:42 2005 -0400
> files:       xen/Rules.mk xen/arch/x86/cpu/amd.c xen/arch/x86/cpu/intel.c 
> xen/arch/x86/dm/hvm_vioapic.c xen/arch/x86/dm/i8259.c xen/arch/x86/dom0_ops.c 
> xen/arch/x86/domain.c xen/arch/x86/hvm.c xen/arch/x86/hvm_intercept.c 
> xen/arch/x86/hvm_io.c xen/arch/x86/hvm_platform.c xen/arch/x86/hvm_vlapic.c 
> xen/arch/x86/mpparse.c xen/arch/x86/shadow.c xen/arch/x86/vmx.c 
> xen/arch/x86/vmx_io.c xen/arch/x86/vmx_vmcs.c 
> xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/entry.S 
> xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/entry.S 
> xen/arch/x86/x86_64/mm.c xen/include/asm-x86/config.h 
> xen/include/asm-x86/cpufeature.h xen/include/asm-x86/domain.h 
> xen/include/asm-x86/hvm_domain.h xen/include/asm-x86/hvm_io.h 
> xen/include/asm-x86/hvm_support.h xen/include/asm-x86/hvm_vcpu.h 
> xen/include/asm-x86/hvm_vioapic.h xen/include/asm-x86/hvm_vlapic.h 
> xen/include/asm-x86/hvm_vpic.h xen/include/asm-x86/hvm_vpit.h 
> xen/include/asm-x86/mpspec.h xen/include/asm-x86/msr.h 
> xen/include/asm-x86/processor.h xen/include/asm-x86/vmx.h 
> xen/include/asm-x86/vmx_cpu.h xen/include/asm-x86/vmx_vmcs.h
> description:
> Phase 3 of HVM integration: This patchset introduces the refactoring of
> virtualization architecture independent functions from specific ones.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Dec 11 07:02:51 2005 -0400
> files:       xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c 
> xen/arch/x86/i387.c xen/arch/x86/shadow.c xen/arch/x86/vmx.c 
> xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/traps.c 
> xen/include/asm-x86/hvm.h xen/include/asm-x86/hvm_support.h 
> xen/include/asm-x86/regs.h xen/include/asm-x86/shadow.h 
> xen/include/asm-x86/vmx_vmcs.h
> description:
> Phase 2 of HVM integration: This patchset introduces the hvm interface
> to the hypervisor and modifies all the non-vmx specific files to use it.
> 
> 
> user:        leendert@xxxxxxxxxxxxxxxxxxxxxxx
> date:        Sun Dec 11 01:10:00 2005 -0400
> files:       tools/examples/Makefile tools/examples/README 
> tools/examples/xmexample.hvm tools/ioemu/exec-all.h tools/ioemu/hw/i8254.c 
> tools/ioemu/hw/i8259.c tools/ioemu/monitor.c 
> tools/ioemu/target-i386-dm/helper2.c tools/ioemu/vl.c tools/libxc/Makefile 
> tools/libxc/xc_hvm_build.c tools/libxc/xc_ptrace.c 
> tools/libxc/xc_ptrace_core.c tools/libxc/xenguest.h 
> tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/image.py 
> tools/python/xen/xm/create.py tools/xm-test/configure.ac 
> tools/xm-test/lib/XmTestLib/XenDomain.py 
> tools/xm-test/lib/XmTestLib/config.py.in tools/xm-test/ramdisk/Makefile.am 
> tools/xm-test/ramdisk/bin/create_disk_image 
> tools/xm-test/tests/block-list/04_block-list_nodb_pos.py 
> xen/include/public/arch-x86_32.h xen/include/public/arch-x86_64.h
> description:
> Phase 1 of HVM integration: This patchset updates the tools. Most of the
> tools are machine independent except for some detection code in the hvm
> domain builder.

diff -r e4eb12a6e003 -r f1b361b05bf3 .hgignore
--- a/.hgignore Mon Jan 30 17:51:35 2006
+++ b/.hgignore Tue Jan 31 10:49:51 2006
@@ -108,6 +108,8 @@
 ^tools/firmware/.*\.sym$
 ^tools/firmware/.*bios/.*bios.*\.txt$
 ^tools/firmware/acpi/acpigen$
+^tools/firmware/hvmloader/hvmloader$
+^tools/firmware/hvmloader/roms\.h$
 ^tools/firmware/rombios/BIOS-bochs-latest$
 ^tools/firmware/rombios/_rombios_\.c$
 ^tools/firmware/rombios/rombios\.s$
diff -r e4eb12a6e003 -r f1b361b05bf3 docs/src/user.tex
--- a/docs/src/user.tex Mon Jan 30 17:51:35 2006
+++ b/docs/src/user.tex Tue Jan 31 10:49:51 2006
@@ -137,7 +137,7 @@
 libraries \emph{do not} require modification.
 
 With hardware CPU virtualization as provided by Intel VT and AMD
-Pacifica technology, the ability to run an unmodified guest OS kernel
+SVM technology, the ability to run an unmodified guest OS kernel
 is available.  No porting of the OS is required, although some
 additional driver support is necessary within Xen itself.  Unlike
 traditional full virtualization hypervisors, which suffer a tremendous
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/debugger/libxendebug/xendebug.c
--- a/tools/debugger/libxendebug/xendebug.c     Mon Jan 30 17:51:35 2006
+++ b/tools/debugger/libxendebug/xendebug.c     Tue Jan 31 10:49:51 2006
@@ -355,7 +355,7 @@
 
     if ( (pde = ctxt->cr3_virt[vcpu][vtopdi(va)]) == 0) /* logical address */
         return 0;
-    if (ctxt->context[vcpu].flags & VGCF_VMX_GUEST)
+    if (ctxt->context[vcpu].flags & VGCF_HVM_GUEST)
         pde = ctxt->page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
     if (pde != ctxt->pde_phys[vcpu]) 
     {
@@ -370,7 +370,7 @@
 
     if ((page = ctxt->pde_virt[vcpu][vtopti(va)]) == 0) /* logical address */
         return 0;
-    if (ctxt->context[vcpu].flags & VGCF_VMX_GUEST)
+    if (ctxt->context[vcpu].flags & VGCF_HVM_GUEST)
         page = ctxt->page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
     if (page != ctxt->page_phys[vcpu] || protection != ctxt->page_perm[vcpu]) 
     {
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/examples/Makefile
--- a/tools/examples/Makefile   Mon Jan 30 17:51:35 2006
+++ b/tools/examples/Makefile   Tue Jan 31 10:49:51 2006
@@ -16,7 +16,7 @@
 XEN_CONFIGS = xend-config.sxp
 XEN_CONFIGS += xmexample1 
 XEN_CONFIGS += xmexample2
-XEN_CONFIGS += xmexample.vmx
+XEN_CONFIGS += xmexample.hvm
 XEN_CONFIGS += xmexample.vti
 
 # Xen script dir and scripts to go there.
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/examples/README
--- a/tools/examples/README     Mon Jan 30 17:51:35 2006
+++ b/tools/examples/README     Tue Jan 31 10:49:51 2006
@@ -32,6 +32,6 @@
 xmexample3          - an advanced configuration script for 'xm create' 
                       that utilizes the vmid
 xmexample.nbd       - configuration script that uses NBD filesystems
-xmexample.vmx       - a configuration script for creating a vmx domain with
+xmexample.hvm       - a configuration script for creating a hvm domain with
                       'xm create'
 xmexample.vti       - a configuration script for creating a domain on vti
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/Makefile
--- a/tools/firmware/Makefile   Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/Makefile   Tue Jan 31 10:49:51 2006
@@ -1,9 +1,9 @@
 XEN_ROOT = ../..
 include $(XEN_ROOT)/tools/Rules.mk
 
-# vmxloader is a 32-bit protected mode binary.
+# hvmloader is a 32-bit protected mode binary.
 # It belongs in /usr/lib, not /usr/lib64.
-TARGET      := vmxassist/vmxloader
+TARGET      := hvmloader/hvmloader
 INSTALL_DIR := $(DESTDIR)/usr/lib/xen/boot
 
 SUBDIRS :=
@@ -11,6 +11,7 @@
 SUBDIRS += vgabios
 SUBDIRS += acpi
 SUBDIRS += vmxassist
+SUBDIRS += hvmloader
 
 .PHONY: all install clean
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/README
--- a/tools/firmware/README     Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/README     Tue Jan 31 10:49:51 2006
@@ -1,11 +1,11 @@
-Domain FirmWare support
+Domain firmware support
 -----------------------
 
 One of the key advantages of full virtualization hardware support (such
-as Intel's VT or AMD's Pacifica) is the ability to run unmodified guest
-operating systems.  However, since most OSes rely on BIOS support during
-their early bringup, we need to provide a surrogate ROMBIOS and VGABIOS
-firmware layer.
+as Intel's VT or AMD's SVM extensions) is the ability to run unmodified
+guest operating systems.  However, since most OSes rely on BIOS support
+during their early bringup, we need to provide a surrogate ROMBIOS and
+VGABIOS firmware layer.
 
 What's more, we need to support real-mode which is required by
 the firmware and bootstrap loaders. Real-mode support is especially
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/rombios/rombios.c  Tue Jan 31 10:49:51 2006
@@ -26,8 +26,8 @@
 
 // ROM BIOS for use with Bochs/Plex x86 emulation environment
 
-#define VMXASSIST
-#undef VMXTEST
+#define HVMASSIST
+#undef HVMTEST
 
 // Xen full virtualization does not handle unaligned IO with page crossing.
 // Disable 32-bit PIO as a workaround.
@@ -177,8 +177,8 @@
 #define BASE_MEM_IN_K   (640 - EBDA_SIZE)
 
   // Define the application NAME
-#ifdef VMXASSIST
-#  define BX_APPNAME "VMXAssist"
+#ifdef HVMASSIST
+#  define BX_APPNAME "HVMAssist"
 #elif PLEX86
 #  define BX_APPNAME "Plex86"
 #else
@@ -1430,7 +1430,7 @@
 ASM_END
 }
 
-#ifdef VMXASSIST
+#ifdef HVMASSIST
 void
 copy_e820_table()
 {
@@ -1440,7 +1440,7 @@
   write_word(0xe000, 0x8, nr_entries);
   memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
 }
-#endif /* VMXASSIST */
+#endif /* HVMASSIST */
 
 #if BX_DEBUG_SERIAL
 /* serial debug port*/
@@ -1520,7 +1520,7 @@
   if (c == '\n') uart_tx_byte(BX_DEBUG_PORT, '\r');
   uart_tx_byte(BX_DEBUG_PORT, c);
 #endif
-#ifdef VMXASSIST
+#ifdef HVMASSIST
   outb(0xE9, c);
 #endif
 #if BX_VIRTUAL_PORTS
@@ -4089,7 +4089,7 @@
          case 0x20: // coded by osmaker aka K.J.
             if(regs.u.r32.edx == 0x534D4150) /* SMAP */
             {
-#ifdef VMXASSIST
+#ifdef HVMASSIST
                if ((regs.u.r16.bx / 0x14) * 0x14 == regs.u.r16.bx) {
                    Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
 
@@ -9595,7 +9595,7 @@
   ;; int 1C already points at dummy_iret_handler (above)
   mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
   out 0x43, al
-#ifdef VMXASSIST
+#ifdef HVMASSIST
   mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
   out 0x40, al ; lsb
   mov al, #0xe9
@@ -9702,22 +9702,10 @@
   mov al, #0x11 ; send initialisation commands
   out 0x20, al
   out 0xa0, al
-#ifdef VMXASSIST
-  ;; The vm86 emulator expects interrupts to be mapped beyond the reserved
-  ;; vectors (0 through 31). Since rombios fully controls the hardware, we
-  ;; map it the way the emulator needs it and expect that it will do the
-  ;; proper 8086 interrupt translation (that is, master pic base is at 0x8
-  ;; and slave pic base is at 0x70).
-  mov al, #0x20
-  out 0x21, al
-  mov al, #0x28
-  out 0xa1, al
-#else
   mov al, #0x08
   out 0x21, al
   mov al, #0x70
   out 0xa1, al
-#endif
   mov al, #0x04
   out 0x21, al
   mov al, #0x02
@@ -9734,7 +9722,7 @@
 #endif
   out  0xa1, AL ;slave  pic: unmask IRQ 12, 13, 14
 
-#ifdef VMXASSIST
+#ifdef HVMASSIST
   call _copy_e820_table
 #endif
 
@@ -10368,13 +10356,13 @@
   HALT(__LINE__)
   iret
 
-#ifdef VMXTEST
+#ifdef HVMTEST
 .org 0xffe0
   jmp 0xf000:post;
 #endif
 
 .org 0xfff0 ; Power-up Entry Point
-#ifdef VMXTEST
+#ifdef HVMTEST
   jmp 0xd000:0x0003;
 #else
   jmp 0xf000:post
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/Makefile
--- a/tools/firmware/vmxassist/Makefile Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/vmxassist/Makefile Tue Jan 31 10:49:51 2006
@@ -37,13 +37,7 @@
 
 OBJECTS = head.o trap.o vm86.o setup.o util.o
 
-all: vmxloader
-
-vmxloader: roms.h vmxloader.c acpi.h acpi_madt.c
-       $(CC) $(CFLAGS) $(DEFINES) -c vmxloader.c -c acpi_madt.c
-       $(CC) -o vmxloader.tmp -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 
vmxloader.o acpi_madt.o
-       objcopy vmxloader.tmp vmxloader
-       rm -f vmxloader.tmp
+all: vmxassist.bin
 
 vmxassist.bin: vmxassist.ld $(OBJECTS)
        $(CPP) $(DEFINES) vmxassist.ld > vmxassist.tmp
@@ -68,15 +62,6 @@
 util.o: machine.h util.c
        $(CC) $(CFLAGS) -c util.c
 
-roms.h:        ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin 
../vgabios/VGABIOS-lgpl-latest.cirrus.bin vmxassist.bin
-       ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
-       ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
-       ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> 
roms.h
-       ./mkhex vmxassist vmxassist.bin >> roms.h
-
-acpi.h: ../acpi/acpi.bin
-       ./mkhex acpi ../acpi/acpi.bin > acpi.h
-
 offsets.h: gen
        ./gen > offsets.h
 
@@ -84,7 +69,7 @@
        $(HOSTCC) $(HOSTCFLAGS) -I. $(XENINC) -o gen gen.c
 
 clean:
-       rm -f vmxassist vmxassist.tmp vmxassist.bin vmxassist.run vmxassist.sym 
head.s roms.h acpi.h
-       rm -f vmxloader vmxloader.tmp vmxloader.o $(OBJECTS)
+       rm -f vmxassist vmxassist.tmp vmxassist.bin vmxassist.run vmxassist.sym 
head.s
+       rm -f $(OBJECTS)
        rm -f gen gen.o offsets.h
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/head.S
--- a/tools/firmware/vmxassist/head.S   Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/vmxassist/head.S   Tue Jan 31 10:49:51 2006
@@ -111,6 +111,9 @@
        cli
 
        /* save register parameters to C land */
+#ifdef TEST
+       xorl    %edx, %edx
+#endif
        movl    %edx, booting_cpu
        movl    %ebx, booting_vector
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/setup.c
--- a/tools/firmware/vmxassist/setup.c  Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/vmxassist/setup.c  Tue Jan 31 10:49:51 2006
@@ -194,6 +194,12 @@
 }
 
 void
+setiomap(int port)
+{
+       tss.iomap[port >> 3] |= 1 << (port & 7);
+}
+
+void
 enter_real_mode(struct regs *regs)
 {
        /* mask off TSS busy bit */
@@ -217,6 +223,13 @@
                }
                regs->uesp = 0;
                regs->uss = 0;
+
+               /* intercept accesses to the PIC */
+               setiomap(PIC_MASTER+PIC_CMD);
+               setiomap(PIC_MASTER+PIC_IMR);
+               setiomap(PIC_SLAVE+PIC_CMD);
+               setiomap(PIC_SLAVE+PIC_IMR);
+
                printf("Starting emulated 16-bit real-mode: ip=%04x:%04x\n",
                        regs->cs, regs->eip);
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c   Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/vmxassist/vm86.c   Tue Jan 31 10:49:51 2006
@@ -736,6 +736,83 @@
        regs->eflags &= ~EFLAGS_IF;
        regs->eip = read16(address(regs, 0, n * 4));
        regs->cs = read16(address(regs, 0, n * 4 + 2));
+}
+
+/*
+ * Most port I/O operations are passed unmodified. We do have to be
+ * careful and make sure the emulated program isn't remapping the
+ * interrupt vectors. The following simple state machine catches
+ * these attempts and rewrites them.
+ */
+int
+outbyte(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       static char icw2[2] = { 0 };
+       int al, port;
+
+       switch (opc) {
+       case 0xE6: /* outb port, al */
+               port = fetch8(regs);
+               break;
+       case 0xEE: /* outb (%dx), al */
+               port = MASK16(regs->edx);
+               break;
+       default:
+               return 0;
+       }
+
+       al = regs->eax & 0xFF;
+
+       switch (port) {
+       case PIC_MASTER + PIC_CMD:
+               if (al & (1 << 4)) /* A0=0,D4=1 -> ICW1 */
+                       icw2[0] = 1;
+               break;
+       case PIC_MASTER + PIC_IMR:
+               if (icw2[0]) {
+                       icw2[0] = 0;
+                       printf("Remapping master: ICW2 0x%x -> 0x%x\n",
+                               al, NR_EXCEPTION_HANDLER);
+                       al = NR_EXCEPTION_HANDLER;
+               }
+               break;
+
+       case PIC_SLAVE  + PIC_CMD:
+               if (al & (1 << 4)) /* A0=0,D4=1 -> ICW1 */
+                       icw2[1] = 1;
+               break;
+       case PIC_SLAVE  + PIC_IMR:
+               if (icw2[1]) {
+                       icw2[1] = 0;
+                       printf("Remapping slave: ICW2 0x%x -> 0x%x\n",
+                               al, NR_EXCEPTION_HANDLER+8);
+                       al = NR_EXCEPTION_HANDLER+8;
+               }
+               break;
+       }
+
+       outb(port, al);
+       return 1;
+}
+
+int
+inbyte(struct regs *regs, unsigned prefix, unsigned opc)
+{
+       int port;
+
+       switch (opc) {
+       case 0xE4: /* inb al, port */
+               port = fetch8(regs);
+               break;
+       case 0xEC: /* inb al, (%dx) */
+               port = MASK16(regs->edx);
+               break;
+       default:
+               return 0;
+       }
+
+       regs->eax = (regs->eax & ~0xFF) | inb(port);
+       return 1;
 }
 
 enum { OPC_INVALID, OPC_EMULATED };
@@ -885,6 +962,16 @@
                        }
                        return OPC_EMULATED;
 
+               case 0xE4:      /* inb al, port */
+                       if (!inbyte(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
+               case 0xE6:      /* outb port, al */
+                       if (!outbyte(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
                case 0xEA:      /* jmpl */
                        if ((mode == VM86_REAL_TO_PROTECTED) ||
                            (mode == VM86_PROTECTED_TO_REAL)) {
@@ -902,6 +989,16 @@
                                return OPC_EMULATED;
                        }
                        goto invalid;
+
+               case 0xEC:      /* inb al, (%dx) */
+                       if (!inbyte(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
+               case 0xEE:      /* outb (%dx), al */
+                       if (!outbyte(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
 
                case 0xF0:      /* lock */
                        TRACE((regs, regs->eip - eip, "lock"));
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/vmxassist.ld
--- a/tools/firmware/vmxassist/vmxassist.ld     Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/vmxassist/vmxassist.ld     Tue Jan 31 10:49:51 2006
@@ -2,7 +2,6 @@
  * vmxassist.ld
  */
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-/*OUTPUT_ARCH(i386)*/
 ENTRY(_start)
 
 SECTIONS
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/ioemu/exec-all.h
--- a/tools/ioemu/exec-all.h    Mon Jan 30 17:51:35 2006
+++ b/tools/ioemu/exec-all.h    Tue Jan 31 10:49:51 2006
@@ -584,5 +584,5 @@
 
 //#define DEBUG_UNUSED_IOPORT
 //#define DEBUG_IOPORT
-#define TARGET_VMX
-
+#define TARGET_HVM
+
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/ioemu/hw/i8254.c
--- a/tools/ioemu/hw/i8254.c    Mon Jan 30 17:51:35 2006
+++ b/tools/ioemu/hw/i8254.c    Tue Jan 31 10:49:51 2006
@@ -50,7 +50,7 @@
     int64_t next_transition_time;
     QEMUTimer *irq_timer;
     int irq;
-    int vmx_channel; /* Is this accelerated by VMX ? */
+    int hvm_channel; /* Is this accelerated by HVM ? */
 } PITChannelState;
 
 struct PITState {
@@ -61,8 +61,8 @@
 
 static void pit_irq_timer_update(PITChannelState *s, int64_t current_time);
 
-/* currently operate which channel for vmx use */
-int vmx_channel = -1;
+/* currently operate which channel for hvm use */
+int hvm_channel = -1;
 extern FILE *logfile;
 static int pit_get_count(PITChannelState *s)
 {
@@ -215,7 +215,7 @@
     return s->gate;
 }
 
-void pit_reset_vmx_vectors()
+void pit_reset_hvm_vectors()
 {
     extern shared_iopage_t *shared_page;
     ioreq_t *req; 
@@ -225,18 +225,18 @@
     irq = 0;
 
     for(i = 0; i < 3; i++) {
-        if (pit_state.channels[i].vmx_channel)
+        if (pit_state.channels[i].hvm_channel)
              break;
     }
     
     if (i == 3)
         return;
 
-    /* Assumes just one VMX accelerated channel */
-    vmx_channel = i;
-    s = &pit_state.channels[vmx_channel];
+    /* Assumes just one HVM accelerated channel */
+    hvm_channel = i;
+    s = &pit_state.channels[hvm_channel];
     fprintf(logfile,
-       "VMX_PIT:guest init pit channel %d!\n", vmx_channel);
+       "HVM_PIT:guest init pit channel %d!\n", hvm_channel);
     req = &shared_page->vcpu_iodata[0].vp_ioreq;
 
     req->state = STATE_IORESP_HOOK;
@@ -247,9 +247,9 @@
      */
     req->u.data = s->count;
     req->u.data |= (irq << 16);
-    req->u.data |= (vmx_channel << 24);
+    req->u.data |= (hvm_channel << 24);
     req->u.data |= ((s->rw_mode) << 26);
-    fprintf(logfile, "VMX_PIT:pass info 0x%llx to HV!\n", req->u.data);
+    fprintf(logfile, "HVM_PIT:pass info 0x%llx to HV!\n", req->u.data);
 }
 
 static inline void pit_load_count(PITChannelState *s, int val)
@@ -261,9 +261,9 @@
 
     /* guest init this pit channel for periodic mode. we do not update related
      * timer so the channel never send intr from device model*/
-    if (vmx_channel != -1 && s->mode == 2) {
-        pit_reset_vmx_vectors();
-        vmx_channel = -1;
+    if (hvm_channel != -1 && s->mode == 2) {
+        pit_reset_hvm_vectors();
+        hvm_channel = -1;
     }
 
 /*    pit_irq_timer_update(s, s->count_load_time);*/
@@ -323,8 +323,8 @@
         }
     } else {
         s = &pit->channels[addr];
-        s->vmx_channel = 1;
-        vmx_channel = addr;
+        s->hvm_channel = 1;
+        hvm_channel = addr;
         switch(s->write_state) {
         default:
         case RW_STATE_LSB:
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/ioemu/hw/i8259.c
--- a/tools/ioemu/hw/i8259.c    Mon Jan 30 17:51:35 2006
+++ b/tools/ioemu/hw/i8259.c    Tue Jan 31 10:49:51 2006
@@ -31,7 +31,7 @@
 //#define DEBUG_IRQ_LATENCY
 //#define DEBUG_IRQ_COUNT
 
-extern void pit_reset_vmx_vectors();
+extern void pit_reset_hvm_vectors();
 
 typedef struct PicState {
     uint8_t last_irr; /* edge detection */
@@ -368,7 +368,7 @@
         case 1:
             s->irq_base = val & 0xf8;
             s->init_state = 2;
-            pit_reset_vmx_vectors();
+            pit_reset_hvm_vectors();
             break;
         case 2:
             if (s->init4) {
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/ioemu/monitor.c
--- a/tools/ioemu/monitor.c     Mon Jan 30 17:51:35 2006
+++ b/tools/ioemu/monitor.c     Tue Jan 31 10:49:51 2006
@@ -225,10 +225,10 @@
     }
 }
 
-extern void destroy_vmx_domain(void);
+extern void destroy_hvm_domain(void);
 static void do_quit(void)
 {
-    destroy_vmx_domain();
+    destroy_hvm_domain();
     exit(0);
 }
 
@@ -506,8 +506,8 @@
       "", "show i8259 (PIC) state", },
     { "pci", "", pci_info,
       "", "show PCI info", },
-    { "vmxiopage", "", sp_info,
-      "", "show VMX device model shared page info", },
+    { "hvmiopage", "", sp_info,
+      "", "show HVM device model shared page info", },
     { NULL, NULL, },
 };
 
@@ -700,7 +700,7 @@
                 args[nb_args++] = (void *)has_option;
             }
             break;
-        /* TODO: add more commands we need here to support vmx device model */
+        /* TODO: add more commands we need here to support hvm device model */
         case '/':
         case 'i':
         default:
@@ -772,14 +772,14 @@
 
 static void monitor_start_input(void)
 {
-    readline_start("(VTXen) ", 0, monitor_handle_command1, NULL);
+    readline_start("(HVMXen) ", 0, monitor_handle_command1, NULL);
 }
 
 void monitor_init(CharDriverState *hd, int show_banner)
 {
     monitor_hd = hd;
     if (show_banner) {
-        term_printf("VMX device model. type 'q' to exit\n");
+        term_printf("HVM device model. type 'q' to exit\n");
     }
     qemu_chr_add_read_handler(hd, term_can_read, term_read, NULL);
     monitor_start_input();
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/ioemu/target-i386-dm/helper2.c
--- a/tools/ioemu/target-i386-dm/helper2.c      Mon Jan 30 17:51:35 2006
+++ b/tools/ioemu/target-i386-dm/helper2.c      Tue Jan 31 10:49:51 2006
@@ -20,7 +20,9 @@
 
 /*
  * Main cpu loop for handling I/O requests coming from a virtual machine
+ *
  * Copyright © 2004, Intel Corporation.
+ * Copyright © 2005, International Business Machines Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU Lesser General Public License,
@@ -394,7 +396,7 @@
 int xc_handle;
 
 void
-destroy_vmx_domain(void)
+destroy_hvm_domain(void)
 {
     extern FILE* logfile;
     char destroy_cmd[32];
@@ -467,11 +469,11 @@
             (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
         }
     }
-    destroy_vmx_domain();
+    destroy_hvm_domain();
     return 0;
 }
 
-static void qemu_vmx_reset(void *unused)
+static void qemu_hvm_reset(void *unused)
 {
     char cmd[64];
 
@@ -489,7 +491,7 @@
     int rc;
 
     cpu_exec_init();
-    qemu_register_reset(qemu_vmx_reset, NULL);
+    qemu_register_reset(qemu_hvm_reset, NULL);
     env = malloc(sizeof(CPUX86State));
     if (!env)
         return NULL;
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c  Mon Jan 30 17:51:35 2006
+++ b/tools/ioemu/vl.c  Tue Jan 31 10:49:51 2006
@@ -125,7 +125,7 @@
 int pit_min_timer_count = 0;
 int nb_nics;
 char bridge[16];
-char domain_name[1024] = { 'V', 'T', 'X', 'E', 'N', '-'};
+char domain_name[1024] = { 'H','V', 'M', 'X', 'E', 'N', '-'};
 NetDriverState nd_table[MAX_NICS];
 QEMUTimer *gui_timer;
 QEMUTimer *polling_timer;
@@ -826,8 +826,8 @@
     {
         /* get times() syscall frequency */
         timer_freq = sysconf(_SC_CLK_TCK);
-
-#ifndef TARGET_VMX
+      
+#ifndef TARGET_HVM
         /* timer signal */
         sigfillset(&act.sa_mask);
         act.sa_flags = 0;
@@ -869,7 +869,7 @@
             pit_min_timer_count = ((uint64_t)itv.it_interval.tv_usec *
                                    PIT_FREQ) / 1000000;
         }
-#endif /* TARGET_VMX */
+#endif /* TARGET_HVM */
     }
 #endif
 }
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/Makefile
--- a/tools/libxc/Makefile      Mon Jan 30 17:51:35 2006
+++ b/tools/libxc/Makefile      Tue Jan 31 10:49:51 2006
@@ -45,7 +45,7 @@
 BUILD_SRCS += xc_load_aout9.c
 BUILD_SRCS += xc_linux_restore.c
 BUILD_SRCS += xc_linux_save.c
-BUILD_SRCS += xc_vmx_build.c
+BUILD_SRCS += xc_hvm_build.c
 endif
 
 CFLAGS   += -Wall
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/xc_ia64_stubs.c
--- a/tools/libxc/xc_ia64_stubs.c       Mon Jan 30 17:51:35 2006
+++ b/tools/libxc/xc_ia64_stubs.c       Tue Jan 31 10:49:51 2006
@@ -621,7 +621,7 @@
     return -1;
 }
 
-int xc_vmx_build(int xc_handle,
+int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
                  const char *image_name,
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/xc_linux_build.c
--- a/tools/libxc/xc_linux_build.c      Mon Jan 30 17:51:35 2006
+++ b/tools/libxc/xc_linux_build.c      Tue Jan 31 10:49:51 2006
@@ -815,7 +815,7 @@
         start_info->mod_len      = initrd_len;
     }
     if ( cmdline != NULL )
-    {
+    { 
         strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
         start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
     }
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Mon Jan 30 17:51:35 2006
+++ b/tools/libxc/xc_ptrace.c   Tue Jan 31 10:49:51 2006
@@ -240,7 +240,7 @@
     }
     if ( (pde = cr3_virt[cpu][vtopdi(va)]) == 0 )
         goto error_out;
-    if ( (ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]) )
+    if ( (ctxt[cpu].flags & VGCF_HVM_GUEST) && paging_enabled(&ctxt[cpu]) )
         pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
     if ( pde != pde_phys[cpu] )
     {
@@ -255,7 +255,7 @@
     }
     if ( (page = pde_virt[cpu][vtopti(va)]) == 0 )
         goto error_out;
-    if ( (ctxt[cpu].flags & VGCF_VMX_GUEST) && paging_enabled(&ctxt[cpu]) )
+    if ( (ctxt[cpu].flags & VGCF_HVM_GUEST) && paging_enabled(&ctxt[cpu]) )
         page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
     if ( (page != page_phys[cpu]) || (perm != prev_perm[cpu]) )
     {
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/xc_ptrace_core.c
--- a/tools/libxc/xc_ptrace_core.c      Mon Jan 30 17:51:35 2006
+++ b/tools/libxc/xc_ptrace_core.c      Tue Jan 31 10:49:51 2006
@@ -126,7 +126,7 @@
     } 
     if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
         goto error_out;
-    if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+    if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         pde = p2m_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
     if (pde != pde_phys[cpu]) 
     {
@@ -142,7 +142,7 @@
     }
     if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
         goto error_out;
-    if (ctxt[cpu].flags & VGCF_VMX_GUEST)
+    if (ctxt[cpu].flags & VGCF_HVM_GUEST)
         page = p2m_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
     if (page != page_phys[cpu]) 
     {
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Mon Jan 30 17:51:35 2006
+++ b/tools/libxc/xenguest.h    Tue Jan 31 10:49:51 2006
@@ -53,7 +53,7 @@
                    unsigned int console_evtchn,
                    unsigned long *console_mfn);
 
-int xc_vmx_build(int xc_handle,
+int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
                  const char *image_name,
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jan 30 17:51:35 2006
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue Jan 31 10:49:51 2006
@@ -355,7 +355,7 @@
                         "console_mfn", console_mfn);
 }
 
-static PyObject *pyxc_vmx_build(XcObject *self,
+static PyObject *pyxc_hvm_build(XcObject *self,
                                 PyObject *args,
                                 PyObject *kwds)
 {
@@ -369,16 +369,15 @@
     unsigned long store_mfn = 0;
 
     static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn",
-                                "memsize", "image", "vcpus", "acpi", "apic",
-                                NULL };
-
+                               "memsize", "image", "vcpus", "acpi", "apic",
+                               NULL };
     if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisiii", kwd_list,
                                       &dom, &control_evtchn, &store_evtchn,
-                                      &memsize, &image, &vcpus, &acpi, &apic) )
-        return NULL;
-
-    if ( xc_vmx_build(self->xc_handle, dom, memsize, image, control_evtchn,
-                      vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
+                                     &memsize, &image, &vcpus, &acpi, &apic) )
+        return NULL;
+
+    if ( xc_hvm_build(self->xc_handle, dom, memsize, image, control_evtchn,
+                     vcpus, acpi, apic, store_evtchn, &store_mfn) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
     return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
@@ -965,12 +964,12 @@
       " vcpus   [int, 1]:   Number of Virtual CPUS in domain.\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
-    { "vmx_build", 
-      (PyCFunction)pyxc_vmx_build, 
-      METH_VARARGS | METH_KEYWORDS, "\n"
-      "Build a new VMX guest OS.\n"
+    { "hvm_build", 
+      (PyCFunction)pyxc_hvm_build, 
+      METH_VARARGS | METH_KEYWORDS, "\n"
+      "Build a new HVM guest OS.\n"
       " dom     [int]:      Identifier of domain to build into.\n"
-      " image   [str]:      Name of VMX loader image file.\n"
+      " image   [str]:      Name of HVM loader image file.\n"
       " vcpus   [int, 1]:   Number of Virtual CPUS in domain.\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Mon Jan 30 17:51:35 2006
+++ b/tools/python/xen/xend/image.py    Tue Jan 31 10:49:51 2006
@@ -143,7 +143,7 @@
 
     def getDomainMemory(self, mem):
         """@return The memory required, in KiB, by the domain to store the
-        given amount, also in KiB.  This is normally just mem, but VMX domains
+        given amount, also in KiB.  This is normally just mem, but HVM domains
         have overheads to account for."""
         return mem
 
@@ -183,21 +183,21 @@
                               cmdline        = self.cmdline,
                               ramdisk        = self.ramdisk)
 
-class VmxImageHandler(ImageHandler):
-
-    ostype = "vmx"
+class HVMImageHandler(ImageHandler):
+
+    ostype = "hvm"
 
     def configure(self, imageConfig, deviceConfig):
         ImageHandler.configure(self, imageConfig, deviceConfig)
 
         info = xc.xeninfo()
-        if not 'hvm' in info['xen_caps']:
-            raise VmError("vmx: not an Intel VT platform, we stop creating!")
+       if not 'hvm' in info['xen_caps']:
+           raise VmError("Not an HVM capable platform, we stop creating!")
 
         self.dmargs = self.parseDeviceModelArgs(imageConfig, deviceConfig)
         self.device_model = sxp.child_value(imageConfig, 'device_model')
         if not self.device_model:
-            raise VmError("vmx: missing device model")
+            raise VmError("hvm: missing device model")
         self.display = sxp.child_value(imageConfig, 'display')
         self.xauthority = sxp.child_value(imageConfig, 'xauthority')
 
@@ -217,7 +217,7 @@
         # Create an event channel
         self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(),
                                                       remote_dom=0)
-        log.info("VMX device model port: %d", self.device_channel)
+        log.info("HVM device model port: %d", self.device_channel)
 
         store_evtchn = self.vm.getStorePort()
 
@@ -232,7 +232,7 @@
 
         self.register_shutdown_watch()
 
-        return xc.vmx_build(dom            = self.vm.getDomid(),
+        return xc.hvm_build(dom            = self.vm.getDomid(),
                             image          = self.kernel,
                             control_evtchn = self.device_channel,
                             store_evtchn   = store_evtchn,
@@ -283,7 +283,7 @@
                     continue;
                 vbddev_list = ['hda', 'hdb', 'hdc', 'hdd']
                 if vbddev not in vbddev_list:
-                    raise VmError("vmx: for qemu vbd type=file&dev=hda~hdd")
+                    raise VmError("hvm: for qemu vbd type=file&dev=hda~hdd")
                 ret.append("-%s" % vbddev)
                 ret.append("%s" % vbdparam)
             if name == 'vif':
@@ -405,8 +405,8 @@
     def register_shutdown_watch(self):
         """ add xen store watch on control/shutdown """
         self.shutdownWatch = xswatch(self.vm.dompath + "/control/shutdown", \
-                                    self.vmx_shutdown)
-        log.debug("vmx shutdown watch registered")
+                                    self.hvm_shutdown)
+        log.debug("hvm shutdown watch registered")
 
     def unregister_shutdown_watch(self):
         """Remove the watch on the control/shutdown, if any. Nothrow
@@ -416,11 +416,11 @@
             if self.shutdownWatch:
                 self.shutdownWatch.unwatch()
         except:
-            log.exception("Unwatching vmx shutdown watch failed.")
+            log.exception("Unwatching hvm shutdown watch failed.")
         self.shutdownWatch = None
-        log.debug("vmx shutdown watch unregistered")
-
-    def vmx_shutdown(self, _):
+        log.debug("hvm shutdown watch unregistered")
+
+    def hvm_shutdown(self, _):
         """ watch call back on node control/shutdown,
             if node changed, this function will be called
         """
@@ -429,7 +429,7 @@
         vm = xd.domain_lookup( self.vm.getDomid() )
 
         reason = vm.readDom('control/shutdown')
-        log.debug("vmx_shutdown fired, shutdown reason=%s", reason)
+        log.debug("hvm_shutdown fired, shutdown reason=%s", reason)
         for x in shutdown_reasons.keys():
             if shutdown_reasons[x] == reason:
                 vm.info['shutdown'] = 1
@@ -444,7 +444,7 @@
 imageHandlerClasses = {}
 
 
-for h in LinuxImageHandler, VmxImageHandler:
+for h in LinuxImageHandler, HVMImageHandler:
     imageHandlerClasses[h.ostype] = h
 
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Mon Jan 30 17:51:35 2006
+++ b/tools/python/xen/xm/create.py     Tue Jan 31 10:49:51 2006
@@ -162,11 +162,11 @@
 
 gopts.var('acpi', val='ACPI',
           fn=set_int, default=0,
-          use="Disable or enable ACPI of VMX domain.")
+          use="Disable or enable ACPI of HVM domain.")
 
 gopts.var('apic', val='APIC',
           fn=set_int, default=0,
-          use="Disable or enable APIC of VMX domain.")
+          use="Disable or enable APIC of HVM domain.")
 
 gopts.var('vcpus', val='VCPUS',
           fn=set_int, default=1,
@@ -441,8 +441,8 @@
     if vals.extra:
         config_image.append(['args', vals.extra])
 
-    if vals.builder == 'vmx':
-        configure_vmx(config_image, vals)
+    if vals.builder == 'hvm':
+        configure_hvm(config_image, vals)
         
     return config_image
     
@@ -536,8 +536,8 @@
         config_devs.append(['device', config_vif])
 
 
-def configure_vmx(config_image, vals):
-    """Create the config for VMX devices.
+def configure_hvm(config_image, vals):
+    """Create the config for HVM devices.
     """
     args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb',
              'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'audio',
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/python/xen/xm/tests/test_create.py
--- a/tools/python/xen/xm/tests/test_create.py  Mon Jan 30 17:51:35 2006
+++ b/tools/python/xen/xm/tests/test_create.py  Tue Jan 31 10:49:51 2006
@@ -141,15 +141,15 @@
                  })
             
 
-    def testVMXConfigFile(self):
+    def testHVMConfigFile(self):
         (fd, fname) = tempfile.mkstemp()
         try:
             os.write(fd,
                      '''
-kernel = "/usr/lib/xen/boot/vmxloader"
-builder='vmx'
+kernel = "/usr/lib/xen/boot/hvmloader"
+builder='hvm'
 memory = 128
-name = "ExampleVMXDomain"
+name = "ExampleHVMDomain"
 vcpus=1
 vif = [ 'type=ioemu, bridge=xenbr0' ]
 disk = [ 'file:/var/images/min-el3-i386.img,ioemu:hda,w' ]
@@ -163,10 +163,10 @@
             os.close(fd)
 
         self.t('-f %s display=fakedisplay' % fname,
-               { 'kernel'      : '/usr/lib/xen/boot/vmxloader',
-                 'builder'     : 'vmx',
+               { 'kernel'      : '/usr/lib/xen/boot/hvmloader',
+                 'builder'     : 'hvm',
                  'memory'      : 128,
-                 'name'        : 'ExampleVMXDomain',
+                 'name'        : 'ExampleHVMDomain',
                  'vcpus'       : 1,
                  'nics'        : -1,
                  'vif'         : ['type=ioemu, bridge=xenbr0'],
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/xm-test/README
--- a/tools/xm-test/README      Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/README      Tue Jan 31 10:49:51 2006
@@ -60,26 +60,26 @@
 =========================
 
 If you'd like to build and run this with hardware virtual machine assist
-(HVM) support to test fully virtualized disk images on VMX hardware, 
-please add the --enable-vmx-support option to configure:
+(HVM) support to test fully virtualized disk images on VMX/SVM hardware, 
+please add the --enable-hvm-support option to configure:
 
   # ./autogen
-  # ./configure --enable-vmx-support
+  # ./configure --enable-hvm-support
   # make
 
 The ramdisk/bin/create_disk_image script, which builds the full virt
 disk.img, requires Lilo 22.7+ to be installed on the system. Lilo is 
 used to install the bootloader on the disk.img.
 
-If HVM / VMX support is enabled, the ramdisk/bin/create_disk_image script
+If HVM support is enabled, the ramdisk/bin/create_disk_image script
 will be run to create a full virt disk.img in the ramdisk directory. The
 script, by default, will look in /boot for the first non-Xen kernel it
 runs across. If you'd like to set xm-test to use a specific kernel,
 rather than the first one it finds in /boot, you can configure it in
-with the "--with-vmx-kernel=KERNEL" option:
-
+with the "--with-hvm-kernel=KERNEL" option:
+ 
 # ./autogen
-# ./configure --enable-vmx-support --with-vmx-kernel=KERNEL
+# ./configure --enable-hvm-support --with-hvm-kernel=KERNEL
 # make
 
 Otherwise, you can always rerun the create script using the -k option
@@ -92,14 +92,14 @@
 want to tell the script that the driver is built into the kernel, please
 use the "--with-driver-dir=DRVDIR" configure option. If built into
 the kernel, please use the key word "builtin" with the option:
-
+ 
 # ./autogen
-# ./configure --enable-vmx-support --with-driver-dir=builtin
+# ./configure --enable-hvm-support --with-driver-dir=builtin
 - or -
-# ./configure --enable-vmx-support --with-driver-dir=/driver/directory
+# ./configure --enable-hvm-support --with-driver-dir=/driver/directory
 # make
-
-Xm-test will look for disk.img in the ramdisk directory when run by 
+ 
+Xm-test will look for disk.img in the ramdisk directory when run by
 default.
 
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/xm-test/configure.ac
--- a/tools/xm-test/configure.ac        Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/configure.ac        Tue Jan 31 10:49:51 2006
@@ -15,18 +15,18 @@
 RD_PATH=../../ramdisk
 TENV="PYTHONPATH=$PYTHONPATH:$TESTLIB:/usr/lib/python RD_PATH=$RD_PATH"
 
-AC_ARG_ENABLE(vmx-support,
-       [[  --enable-vmx-support           enable hardware virtual machine 
assist]],
+AC_ARG_ENABLE(hvm-support,
+       [[  --enable-hvm-support           enable hardware virtual machine 
assist]],
        [
-               ENABLE_VMX=True
+               ENABLE_HVM=True
        ],[
-               ENABLE_VMX=False
+               ENABLE_HVM=False
        ])
 
-if test "x$ENABLE_VMX" = "xTrue"; then
+if test "x$ENABLE_HVM" = "xTrue"; then
        if test "$LILO" = "no"; then 
                AC_MSG_ERROR([lilo not found
-lilo version 22.7 or greater must be installed for testing with vmx enabled.])
+lilo version 22.7 or greater must be installed for testing with hvm enabled.])
        else
                pass=`$LILO -V | sed -e "s/LILO version //" | awk -F "." '{if 
($1 >=22 && $2 >= 7) print "true"; else print "false"}'`
                if test "$pass" != "true"; then
@@ -35,16 +35,16 @@
        fi
 fi
 
-AM_CONDITIONAL(VMX, test x$ENABLE_VMX = xTrue)
-AC_SUBST(ENABLE_VMX)
+AM_CONDITIONAL(HVM, test x$ENABLE_HVM = xTrue)
+AC_SUBST(ENABLE_HVM)
 
-AC_ARG_WITH(vmx-kernel,
-       [[  --with-vmx-kernel=kernel       Use this kernel for vmx disk.img 
testing]],
-       VMXKERNEL=$withval,
-       VMXKERNEL="no")
+AC_ARG_WITH(hvm-kernel,
+      [[  --with-hvm-kernel=kernel       Use this kernel for hvm disk.img 
testing]],
+      HVMKERNEL=$withval,
+      HVMKERNEL="no")
 
-dnl substitute @VMXKERNEL@ in all Makefiles
-AC_SUBST(VMXKERNEL)
+dnl substitute @HVMKERNEL@ in all Makefiles
+AC_SUBST(HVMKERNEL)
 
 AC_ARG_WITH(driver-dir,
        [[  --with-driver-dir=drvdir       Look in this directory for the 
pcnet32 driver for the vmx disk.img. drvdir can equal key word "builtin" if 
driver is built into the kernel]],
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/xm-test/lib/XmTestLib/XenDomain.py
--- a/tools/xm-test/lib/XmTestLib/XenDomain.py  Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/lib/XmTestLib/XenDomain.py  Tue Jan 31 10:49:51 2006
@@ -70,14 +70,14 @@
                     "root"         : "/dev/ram0",
                     "ramdisk"      : getRdPath() + "/initrd.img"
                     }
-VmxDefaults =      {"memory"       : 64,
+HVMDefaults =      {"memory"       : 64,
                     "vcpus"        : 1,
                     "acpi"         : 0,
                     "apic"         : 0,
                     "disk"         : ["file:%s/disk.img,ioemu:%s,w" %
                                    (getRdPath(), BLOCK_ROOT_DEV)],
-                    "kernel"       : "/usr/lib/xen/boot/vmxloader",
-                    "builder"      : "vmx",
+                    "kernel"       : "/usr/lib/xen/boot/hvmloader",
+                    "builder"      : "hvm",
                     "sdl"          : 0,
                     "vnc"          : 0,
                     "vncviewer"    : 0,
@@ -86,8 +86,8 @@
                     "device_model" : getDeviceModel()
                     }
 
-if ENABLE_VMX_SUPPORT:
-    configDefaults = VmxDefaults
+if ENABLE_HVM_SUPPORT:
+    configDefaults = HVMDefaults
 else:
     configDefaults = ParavirtDefaults
 
@@ -247,7 +247,7 @@
 
     def start(self):
         XenDomain.start(self)
-        if ENABLE_VMX_SUPPORT:
+        if ENABLE_HVM_SUPPORT:
             waitForBoot()
 
     def startNow(self):
@@ -271,7 +271,7 @@
 
     print str(c)
 
-    
+
 
 #    c.write("/tmp/foo.conf")
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/xm-test/lib/XmTestLib/config.py.in
--- a/tools/xm-test/lib/XmTestLib/config.py.in  Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/lib/XmTestLib/config.py.in  Tue Jan 31 10:49:51 2006
@@ -1,4 +1,4 @@
 #!/usr/bin/python
 
-ENABLE_VMX_SUPPORT = @ENABLE_VMX@
+ENABLE_HVM_SUPPORT = @ENABLE_HVM@
 
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/xm-test/ramdisk/Makefile.am
--- a/tools/xm-test/ramdisk/Makefile.am Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/ramdisk/Makefile.am Tue Jan 31 10:49:51 2006
@@ -9,12 +9,12 @@
 
 BR_ROOT = build_i386/root
 
-VMX_SCRIPT = bin/create_disk_image
+HVM_SCRIPT = bin/create_disk_image
 
 XMTEST_MAJ_VER = $(shell echo @PACKAGE_VERSION@ | perl -pe 
's/(\d+)\.(\d+)\.\d+/\1.\2/')
 XMTEST_VER_IMG = initrd-$(XMTEST_MAJ_VER).img
 
-if VMX
+if HVM
 all: initrd.img disk.img
 else
 all: initrd.img
@@ -44,16 +44,16 @@
        ln -sf $(XMTEST_VER_IMG) initrd.img
 
 disk.img: $(XMTEST_VER_IMG)
-       chmod a+x $(VMX_SCRIPT)
-       @if test ! "$(VMXKERNEL)" = "no" -a ! "$(DRVDIR)" = "no"; then \
-               $(VMX_SCRIPT) -r $(XMTEST_VER_IMG) -k $(VMXKERNEL) \
+       chmod a+x $(HVM_SCRIPT)
+       @if test ! "$(HVMKERNEL)" = "no" -a ! "$(DRVDIR)" = "no"; then \
+               $(HVM_SCRIPT) -r $(XMTEST_VER_IMG) -k $(HVMKERNEL) \
                        -d $(DRVDIR); \
-       elif test "$(VMXKERNEL)" = "no" -a ! "$(DRVDIR)" = "no"; then \
-               $(VMX_SCRIPT) -r $(XMTEST_VER_IMG) -d $(DRVDIR); \
-       elif test ! "$(VMXKERNEL)" = "no" -a "$(DRVDIR)" = "no"; then \
-               $(VMX_SCRIPT) -r $(XMTEST_VER_IMG) -k $(VMXKERNEL); \
+       elif test "$(HVMKERNEL)" = "no" -a ! "$(DRVDIR)" = "no"; then \
+               $(HVM_SCRIPT) -r $(XMTEST_VER_IMG) -d $(DRVDIR); \
+       elif test ! "$(HVMKERNEL)" = "no" -a "$(DRVDIR)" = "no"; then \
+               $(HVM_SCRIPT) -r $(XMTEST_VER_IMG) -k $(HVMKERNEL); \
        else \
-               $(VMX_SCRIPT) -r $(XMTEST_VER_IMG); \
+               $(HVM_SCRIPT) -r $(XMTEST_VER_IMG); \
        fi
 
 existing:
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/xm-test/ramdisk/bin/create_disk_image
--- a/tools/xm-test/ramdisk/bin/create_disk_image       Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/ramdisk/bin/create_disk_image       Tue Jan 31 10:49:51 2006
@@ -46,7 +46,7 @@
 function usage()
 {
        cat << EOU
-Command creates a vmx guest disk image for xm-test. 
+Command creates a hvm guest disk image for xm-test. 
 
 Usage: $0 [OPTIONS]
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/01_block_attach_device_pos.py
--- a/tools/xm-test/tests/block-create/01_block_attach_device_pos.py    Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/01_block_attach_device_pos.py    Tue Jan 
31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/02_block_attach_file_device_pos.py
--- a/tools/xm-test/tests/block-create/02_block_attach_file_device_pos.py       
Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/02_block_attach_file_device_pos.py       
Tue Jan 31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/04_block_attach_device_repeatedly_pos.py
--- a/tools/xm-test/tests/block-create/04_block_attach_device_repeatedly_pos.py 
Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/04_block_attach_device_repeatedly_pos.py 
Tue Jan 31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/05_block_attach_and_dettach_device_repeatedly_pos.py
--- 
a/tools/xm-test/tests/block-create/05_block_attach_and_dettach_device_repeatedly_pos.py
     Mon Jan 30 17:51:35 2006
+++ 
b/tools/xm-test/tests/block-create/05_block_attach_and_dettach_device_repeatedly_pos.py
     Tue Jan 31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/06_block_attach_baddomain_neg.py
--- a/tools/xm-test/tests/block-create/06_block_attach_baddomain_neg.py Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/06_block_attach_baddomain_neg.py Tue Jan 
31 10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 status, output = traceCommand("xm block-attach NOT-EXIST phy:ram1 sdb1 w")
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/07_block_attach_baddevice_neg.py
--- a/tools/xm-test/tests/block-create/07_block_attach_baddevice_neg.py Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/07_block_attach_baddevice_neg.py Tue Jan 
31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/08_block_attach_bad_filedevice_neg.py
--- a/tools/xm-test/tests/block-create/08_block_attach_bad_filedevice_neg.py    
Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/08_block_attach_bad_filedevice_neg.py    
Tue Jan 31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/09_block_attach_and_dettach_device_check_data_pos.py
--- 
a/tools/xm-test/tests/block-create/09_block_attach_and_dettach_device_check_data_pos.py
     Mon Jan 30 17:51:35 2006
+++ 
b/tools/xm-test/tests/block-create/09_block_attach_and_dettach_device_check_data_pos.py
     Tue Jan 31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/10_block_attach_dettach_multiple_devices.py
--- 
a/tools/xm-test/tests/block-create/10_block_attach_dettach_multiple_devices.py  
    Mon Jan 30 17:51:35 2006
+++ 
b/tools/xm-test/tests/block-create/10_block_attach_dettach_multiple_devices.py  
    Tue Jan 31 10:49:51 2006
@@ -46,8 +46,8 @@
 
     return 0, None
        
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/11_block_attach_shared_dom0.py
--- a/tools/xm-test/tests/block-create/11_block_attach_shared_dom0.py   Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/11_block_attach_shared_dom0.py   Tue Jan 
31 10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 # Mount /dev/ram0
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-create/12_block_attach_shared_domU.py
--- a/tools/xm-test/tests/block-create/12_block_attach_shared_domU.py   Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-create/12_block_attach_shared_domU.py   Tue Jan 
31 10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-attach not supported for HVM domains")
 
 config = {"disk":"phy:/dev/ram0,hda1,w"}
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-destroy/01_block-destroy_btblock_pos.py
--- a/tools/xm-test/tests/block-destroy/01_block-destroy_btblock_pos.py Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-destroy/01_block-destroy_btblock_pos.py Tue Jan 
31 10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-detach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-detach not supported for HVM domains")
 
 config = {"disk":"phy:/dev/ram0,hda1,w"}
 domain = XmTestDomain(extraConfig=config)
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-destroy/02_block-destroy_rtblock_pos.py
--- a/tools/xm-test/tests/block-destroy/02_block-destroy_rtblock_pos.py Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-destroy/02_block-destroy_rtblock_pos.py Tue Jan 
31 10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-detach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-detach not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-destroy/04_block-destroy_nonattached_neg.py
--- a/tools/xm-test/tests/block-destroy/04_block-destroy_nonattached_neg.py     
Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/block-destroy/04_block-destroy_nonattached_neg.py     
Tue Jan 31 10:49:51 2006
@@ -7,8 +7,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-detach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-detach not supported for HVM domains")
 
 domain = XmTestDomain()
                                                                                
                        
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-destroy/05_block-destroy_byname_pos.py
--- a/tools/xm-test/tests/block-destroy/05_block-destroy_byname_pos.py  Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-destroy/05_block-destroy_byname_pos.py  Tue Jan 
31 10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-detach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-detach not supported for HVM domains")
 
 config = {"disk":"phy:/dev/ram0,hda1,w"}
 domain = XmTestDomain(extraConfig=config)
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py
--- a/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py      
Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/block-destroy/06_block-destroy_check_list_pos.py      
Tue Jan 31 10:49:51 2006
@@ -26,8 +26,8 @@
     else:
         return False
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-detach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-detach not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-list/01_block-list_pos.py
--- a/tools/xm-test/tests/block-list/01_block-list_pos.py       Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/block-list/01_block-list_pos.py       Tue Jan 31 
10:49:51 2006
@@ -8,8 +8,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-list not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-list not supported for HVM domains")
 
 config = {"disk":"phy:/dev/ram0,hda1,w"}
 domain = XmTestDomain(extraConfig=config)
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-list/02_block-list_attachbd_pos.py
--- a/tools/xm-test/tests/block-list/02_block-list_attachbd_pos.py      Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-list/02_block-list_attachbd_pos.py      Tue Jan 
31 10:49:51 2006
@@ -8,8 +8,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-list not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-list not supported for HVM domains")
 
 domain = XmTestDomain()
                                                                                
               
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-list/03_block-list_anotherbd_pos.py
--- a/tools/xm-test/tests/block-list/03_block-list_anotherbd_pos.py     Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-list/03_block-list_anotherbd_pos.py     Tue Jan 
31 10:49:51 2006
@@ -8,8 +8,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-list not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-list not supported for HVM domains")
 
 config = {"disk":"phy:/dev/ram0,hda1,w"}
 domain = XmTestDomain(extraConfig=config)
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-list/04_block-list_nodb_pos.py
--- a/tools/xm-test/tests/block-list/04_block-list_nodb_pos.py  Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/block-list/04_block-list_nodb_pos.py  Tue Jan 31 
10:49:51 2006
@@ -8,8 +8,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-list not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-list not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/block-list/06_block-list_checkremove_pos.py
--- a/tools/xm-test/tests/block-list/06_block-list_checkremove_pos.py   Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/block-list/06_block-list_checkremove_pos.py   Tue Jan 
31 10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Block-list not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Block-list not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/create/14_create_blockroot_pos.py
--- a/tools/xm-test/tests/create/14_create_blockroot_pos.py     Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/create/14_create_blockroot_pos.py     Tue Jan 31 
10:49:51 2006
@@ -17,7 +17,7 @@
 # if verbose:
 #     print "Using %s" % output
 
-if ENABLE_VMX_SUPPORT:
+if ENABLE_HVM_SUPPORT:
     domain = XmTestDomain(name="14_create_blockroot")
 else:
     config = {"memory" : "64",
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/memmax/01_memmax_badparm_neg.py
--- a/tools/xm-test/tests/memmax/01_memmax_badparm_neg.py       Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/memmax/01_memmax_badparm_neg.py       Tue Jan 31 
10:49:51 2006
@@ -8,8 +8,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Mem-max not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Mem-max not supported for HVM domains")
 
 status, output = traceCommand("xm mem-max")
 eyecatcher = "Error:"
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/memset/01_memset_basic_pos.py
--- a/tools/xm-test/tests/memset/01_memset_basic_pos.py Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/memset/01_memset_basic_pos.py Tue Jan 31 10:49:51 2006
@@ -20,8 +20,8 @@
 import time 
 from XmTestLib import * 
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Mem-set not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Mem-set not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain() 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/memset/02_memset_badparm_neg.py
--- a/tools/xm-test/tests/memset/02_memset_badparm_neg.py       Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/memset/02_memset_badparm_neg.py       Tue Jan 31 
10:49:51 2006
@@ -18,8 +18,8 @@
 
 from XmTestLib import * 
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Mem-set not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Mem-set not supported for HVM domains")
 
 # destroy no parm input - negative test
 status, output = traceCommand("xm mem-set")
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/memset/03_memset_random_pos.py
--- a/tools/xm-test/tests/memset/03_memset_random_pos.py        Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/memset/03_memset_random_pos.py        Tue Jan 31 
10:49:51 2006
@@ -8,8 +8,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Mem-set not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Mem-set not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/memset/04_memset_smallmem_pos.py
--- a/tools/xm-test/tests/memset/04_memset_smallmem_pos.py      Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/memset/04_memset_smallmem_pos.py      Tue Jan 31 
10:49:51 2006
@@ -5,8 +5,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Mem-set not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Mem-set not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/migrate/01_migrate_localhost_pos.py
--- a/tools/xm-test/tests/migrate/01_migrate_localhost_pos.py   Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/migrate/01_migrate_localhost_pos.py   Tue Jan 31 
10:49:51 2006
@@ -17,8 +17,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Migrate currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Migrate currently not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/network-attach/01_network_attach_pos.py
--- a/tools/xm-test/tests/network-attach/01_network_attach_pos.py       Mon Jan 
30 17:51:35 2006
+++ b/tools/xm-test/tests/network-attach/01_network_attach_pos.py       Tue Jan 
31 10:49:51 2006
@@ -8,8 +8,8 @@
 from XmTestLib import *
 from network_utils import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Network-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Network-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/network-attach/02_network_attach_detach_pos.py
--- a/tools/xm-test/tests/network-attach/02_network_attach_detach_pos.py        
Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/network-attach/02_network_attach_detach_pos.py        
Tue Jan 31 10:49:51 2006
@@ -10,8 +10,8 @@
 from XmTestLib import *
 from network_utils import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Network-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Network-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/network-attach/03_network_attach_detach_multiple_pos.py
--- 
a/tools/xm-test/tests/network-attach/03_network_attach_detach_multiple_pos.py   
    Mon Jan 30 17:51:35 2006
+++ 
b/tools/xm-test/tests/network-attach/03_network_attach_detach_multiple_pos.py   
    Tue Jan 31 10:49:51 2006
@@ -10,8 +10,8 @@
 from XmTestLib import *
 from network_utils import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Network-attach not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Network-attach not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/restore/01_restore_basic_pos.py
--- a/tools/xm-test/tests/restore/01_restore_basic_pos.py       Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/restore/01_restore_basic_pos.py       Tue Jan 31 
10:49:51 2006
@@ -12,8 +12,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Restore currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Restore currently not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/restore/02_restore_badparm_neg.py
--- a/tools/xm-test/tests/restore/02_restore_badparm_neg.py     Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/restore/02_restore_badparm_neg.py     Tue Jan 31 
10:49:51 2006
@@ -12,8 +12,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Restore currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Restore currently not supported for HVM domains")
 
 status, output = traceCommand("xm restore -x")
 eyecatcher1 = "Error:"
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/restore/03_restore_badfilename_neg.py
--- a/tools/xm-test/tests/restore/03_restore_badfilename_neg.py Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/restore/03_restore_badfilename_neg.py Tue Jan 31 
10:49:51 2006
@@ -12,8 +12,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Restore currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Restore currently not supported for HVM domains")
 
 status, output = traceCommand("xm restore /tmp/NON_EXIST")
 eyecatcher1 = "Error:"
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/restore/04_restore_withdevices_pos.py
--- a/tools/xm-test/tests/restore/04_restore_withdevices_pos.py Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/restore/04_restore_withdevices_pos.py Tue Jan 31 
10:49:51 2006
@@ -7,8 +7,8 @@
 
 import re
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Restore currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Restore currently not supported for HVM domains")
 
 config = {"disk": ["phy:/dev/ram0,hda1,w", "phy:/dev/ram1,hdb2,w"],
           "vif":  ['', '']}
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/save/01_save_basic_pos.py
--- a/tools/xm-test/tests/save/01_save_basic_pos.py     Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/save/01_save_basic_pos.py     Tue Jan 31 10:49:51 2006
@@ -7,8 +7,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Save currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Save currently not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/save/02_save_badparm_neg.py
--- a/tools/xm-test/tests/save/02_save_badparm_neg.py   Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/save/02_save_badparm_neg.py   Tue Jan 31 10:49:51 2006
@@ -12,8 +12,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Save currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Save currently not supported for HVM domains")
 
 status, output = traceCommand("xm save -x")
 eyecatcher1 = "Error:"
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/save/03_save_bogusfile_neg.py
--- a/tools/xm-test/tests/save/03_save_bogusfile_neg.py Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/save/03_save_bogusfile_neg.py Tue Jan 31 10:49:51 2006
@@ -10,8 +10,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Save currently not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Save currently not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/sysrq/01_sysrq_basic_neg.py
--- a/tools/xm-test/tests/sysrq/01_sysrq_basic_neg.py   Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/sysrq/01_sysrq_basic_neg.py   Tue Jan 31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Sysrq not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Sysrq not supported for HVM domains")
 
 status, output = traceCommand("xm sysrq does_not_exist s");
 
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/sysrq/02_sysrq_sync_pos.py
--- a/tools/xm-test/tests/sysrq/02_sysrq_sync_pos.py    Mon Jan 30 17:51:35 2006
+++ b/tools/xm-test/tests/sysrq/02_sysrq_sync_pos.py    Tue Jan 31 10:49:51 2006
@@ -9,8 +9,8 @@
 
 from XmTestLib import *
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Sysrq not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Sysrq not supported for HVM domains")
 
 # Create a domain (default XmTestDomain, with our ramdisk)
 domain = XmTestDomain()
diff -r e4eb12a6e003 -r f1b361b05bf3 
tools/xm-test/tests/sysrq/03_sysrq_withreboot_pos.py
--- a/tools/xm-test/tests/sysrq/03_sysrq_withreboot_pos.py      Mon Jan 30 
17:51:35 2006
+++ b/tools/xm-test/tests/sysrq/03_sysrq_withreboot_pos.py      Tue Jan 31 
10:49:51 2006
@@ -7,8 +7,8 @@
 
 import time
 
-if ENABLE_VMX_SUPPORT:
-    SKIP("Sysrq not supported for VMX domains")
+if ENABLE_HVM_SUPPORT:
+    SKIP("Sysrq not supported for HVM domains")
 
 domain = XmTestDomain()
 
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/Rules.mk
--- a/xen/Rules.mk      Mon Jan 30 17:51:35 2006
+++ b/xen/Rules.mk      Tue Jan 31 10:49:51 2006
@@ -53,6 +53,9 @@
 CFLAGS += -g -DVERBOSE
 endif
 
+# There is no real reason to compile without it
+CFLAGS += -g
+
 ifeq ($(crash_debug),y)
 CFLAGS += -g -DCRASH_DEBUG
 endif
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/ia64/Makefile
--- a/xen/arch/ia64/Makefile    Mon Jan 30 17:51:35 2006
+++ b/xen/arch/ia64/Makefile    Tue Jan 31 10:49:51 2006
@@ -15,7 +15,7 @@
 OBJS += vmx_init.o vmx_virt.o vmx_vcpu.o vmx_process.o vmx_vsa.o vmx_ivt.o\
        vmx_phy_mode.o vmx_utility.o vmx_interrupt.o vmx_entry.o vmmu.o \
        vtlb.o mmio.o vlsapic.o vmx_hypercall.o mm.o vmx_support.o \
-       pal_emul.o vmx_irq_ia64.o vmx_vioapic.o
+       pal_emul.o vmx_irq_ia64.o hvm_vioapic.o
 
 # lib files from xen/arch/ia64/linux/ (linux/arch/ia64/lib)
 OBJS +=        bitop.o clear_page.o flush.o copy_page_mck.o                    
\
@@ -94,13 +94,19 @@
         || ln -s $(BASEDIR)/include/xen $(BASEDIR)/include/linux
        [ -e $(BASEDIR)/include/asm-ia64/xen ] \
         || ln -s $(BASEDIR)/include/asm-ia64/linux 
$(BASEDIR)/include/asm-ia64/xen
-# Link to DM file in Xen for ia64/vti
-       [ -e $(BASEDIR)/include/asm-ia64/vmx_vpic.h ] \
-        || ln -s ../../include/asm-x86/vmx_vpic.h 
$(BASEDIR)/include/asm-ia64/vmx_vpic.h
-       [ -e $(BASEDIR)/include/asm-ia64/vmx_vioapic.h ] \
-        || ln -s ../../include/asm-x86/vmx_vioapic.h 
$(BASEDIR)/include/asm-ia64/vmx_vioapic.h
-       [ -e $(BASEDIR)/arch/ia64/vmx/vmx_vioapic.c ] \
-        || ln -s ../../../arch/x86/dm/vmx_vioapic.c 
$(BASEDIR)/arch/ia64/vmx/vmx_vioapic.c
+# Link to HVM files in Xen for ia64/vti
+       [ -e $(BASEDIR)/include/asm-ia64/hvm ] \
+        || mkdir $(BASEDIR)/include/asm-ia64/hvm
+       [ -e $(BASEDIR)/include/asm-ia64/hvm/support.h ] \
+        || ln -s ../../../include/asm-x86/hvm/support.h 
$(BASEDIR)/include/asm-ia64/hvm/support.h
+       [ -e $(BASEDIR)/include/asm-ia64/hvm/io.h ] \
+        || ln -s ../../../include/asm-x86/hvm/io.h 
$(BASEDIR)/include/asm-ia64/hvm/io.h
+       [ -e $(BASEDIR)/include/asm-ia64/hvm/vpic.h ] \
+        || ln -s ../../../include/asm-x86/hvm/vpic.h 
$(BASEDIR)/include/asm-ia64/hvm/vpic.h
+       [ -e $(BASEDIR)/include/asm-ia64/hvm/vioapic.h ] \
+        || ln -s ../../../include/asm-x86/hvm/vioapic.h 
$(BASEDIR)/include/asm-ia64/hvm/vioapic.h
+       [ -e $(BASEDIR)/arch/ia64/vmx/hvm_vioapic.c ] \
+        || ln -s ../../../arch/x86/hvm/vioapic.c 
$(BASEDIR)/arch/ia64/vmx/hvm_vioapic.c
 # Solve circular reference on asm-offsets.h
        [ -f $(BASEDIR)/include/asm-ia64/asm-offsets.h ] \
         || echo "#define IA64_TASK_SIZE 0" > 
$(BASEDIR)/include/asm-ia64/asm-offsets.h
@@ -136,6 +142,8 @@
 clean:
        rm -f *.o *~ core  xen.lds.s 
$(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s map.out
        rm -f asm-xsi-offsets.s $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
+       rm -f $(BASEDIR)/arch/ia64/vmx/hvm_*.c
+       rm -rf $(BASEDIR)/include/asm-ia64/hvm
        rm -f linux/lib/*.o
 
 .PHONY: default clean
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/ia64/vmx/mmio.c
--- a/xen/arch/ia64/vmx/mmio.c  Mon Jan 30 17:51:35 2006
+++ b/xen/arch/ia64/vmx/mmio.c  Tue Jan 31 10:49:51 2006
@@ -207,7 +207,7 @@
 extern struct vmx_mmio_handler vioapic_mmio_handler;
 static void mmio_access(VCPU *vcpu, u64 src_pa, u64 *dest, size_t s, int ma, 
int dir)
 {
-    struct virutal_platform_def *v_plat;
+    struct virtual_platform_def *v_plat;
     //mmio_type_t iot;
     unsigned long iot;
     struct vmx_mmio_handler *vioapic_handler = &vioapic_mmio_handler;
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/ia64/vmx/vlsapic.c
--- a/xen/arch/ia64/vmx/vlsapic.c       Mon Jan 30 17:51:35 2006
+++ b/xen/arch/ia64/vmx/vlsapic.c       Tue Jan 31 10:49:51 2006
@@ -38,7 +38,7 @@
 #include <asm/vmx_pal_vsa.h>
 #include <asm/kregs.h>
 #include <asm/vmx_platform.h>
-#include <asm/vmx_vioapic.h>
+#include <asm/hvm/vioapic.h>
 
 //u64  fire_itc;
 //u64  fire_itc2;
@@ -278,7 +278,7 @@
        do {
            irqs = *(volatile uint16_t*)virq_line;
        } while ((uint16_t)cmpxchg(virq_line, irqs, 0) != irqs);
-       vmx_vioapic_do_irqs(v->domain, irqs);
+       hvm_vioapic_do_irqs(v->domain, irqs);
     }
 
     virq_line = &spg->pic_clear_irr;
@@ -286,7 +286,7 @@
        do {
            irqs = *(volatile uint16_t*)virq_line;
        } while ((uint16_t)cmpxchg(virq_line, irqs, 0) != irqs);
-       vmx_vioapic_do_irqs_clear(v->domain, irqs);
+       hvm_vioapic_do_irqs_clear(v->domain, irqs);
     }
 }
 
@@ -300,7 +300,7 @@
     spg->pic_clear_irr = 0;
 }
 
-int ioapic_match_logical_addr(vmx_vioapic_t *s, int number, uint16_t dest)
+int ioapic_match_logical_addr(hvm_vioapic_t *s, int number, uint16_t dest)
 {
     return (VLAPIC_ID(s->lapic_info[number]) == dest);
 }
@@ -311,14 +311,14 @@
                                uint32_t bitmap)
 {
     uint8_t bit;
-    vmx_vioapic_t *s;
+    hvm_vioapic_t *s;
     
     if (!bitmap) {
        printk("<apic_round_robin> no bit on bitmap\n");
        return NULL;
     }
 
-    s = &d->arch.vmx_platform.vmx_vioapic;
+    s = &d->arch.vmx_platform.vioapic;
     for (bit = 0; bit < s->lapic_count; bit++) {
        if (bitmap & (1 << bit))
            return s->lapic_info[bit];
@@ -351,7 +351,7 @@
 
 #ifdef V_IOSAPIC_READY
     vcpu->arch.arch_vmx.vlapic.vcpu = vcpu;
-    vmx_vioapic_add_lapic(&vcpu->arch.arch_vmx.vlapic, vcpu);
+    hvm_vioapic_add_lapic(&vcpu->arch.arch_vmx.vlapic, vcpu);
 #endif
     DPRINTK("VLSAPIC inservice base=%lp\n", &VLSAPIC_INSVC(vcpu,0) );
 }
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/ia64/vmx/vmx_init.c
--- a/xen/arch/ia64/vmx/vmx_init.c      Mon Jan 30 17:51:35 2006
+++ b/xen/arch/ia64/vmx/vmx_init.c      Tue Jan 31 10:49:51 2006
@@ -48,7 +48,7 @@
 #include <asm/vmx.h>
 #include <xen/mm.h>
 #include <public/arch-ia64.h>
-#include <asm/vmx_vioapic.h>
+#include <asm/hvm/vioapic.h>
 
 /* Global flag to identify whether Intel vmx feature is on */
 u32 vmx_enabled = 0;
@@ -394,7 +394,7 @@
        vmx_virq_line_init(d);
 
        /* Initialize iosapic model within hypervisor */
-       vmx_vioapic_init(d);
-}
-
-
+       hvm_vioapic_init(d);
+}
+
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/ia64/vmx/vmx_vcpu.c
--- a/xen/arch/ia64/vmx/vmx_vcpu.c      Mon Jan 30 17:51:35 2006
+++ b/xen/arch/ia64/vmx/vmx_vcpu.c      Tue Jan 31 10:49:51 2006
@@ -198,7 +198,7 @@
 }
 
 
-struct virutal_platform_def *
+struct virtual_platform_def *
 vmx_vcpu_get_plat(VCPU *vcpu)
 {
     return &(vcpu->domain->arch.vmx_platform);
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/Makefile     Tue Jan 31 10:49:51 2006
@@ -4,10 +4,14 @@
 OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S))
 OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c))
-OBJS += $(patsubst %.c,%.o,$(wildcard dm/*.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard genapic/*.c))
 OBJS += $(patsubst %.c,%.o,$(wildcard cpu/*.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard hvm/*.c))
+OBJS += $(patsubst %.c,%.o,$(wildcard hvm/vmx/*.c))
+OBJS += $(patsubst %.S,%.o,$(wildcard hvm/vmx/$(TARGET_SUBARCH)/*.S))
+OBJS += $(patsubst %.c,%.o,$(wildcard hvm/svm/*.c))
+OBJS += $(patsubst %.S,%.o,$(wildcard hvm/svm/$(TARGET_SUBARCH)/*.S))
 
 ifeq ($(TARGET_SUBARCH),x86_64) 
 OBJS := $(subst cpu/centaur.o,,$(OBJS))
@@ -74,9 +78,15 @@
        rm -f x86_64/*.o x86_64/*~ x86_64/core
        rm -f mtrr/*.o mtrr/*~ mtrr/core
        rm -f acpi/*.o acpi/*~ acpi/core
-       rm -f dm/*.o dm/*~ dm/core
        rm -f genapic/*.o genapic/*~ genapic/core
        rm -f cpu/*.o cpu/*~ cpu/core
+       rm -f hvm/*.o hvm/*~ hvm/core
+       rm -f hvm/vmx/*.o hvm/vmx/*~ hvm/vmx/core
+       rm -f hvm/vmx/x86_32/*.o hvm/vmx/x86_32/*~ hvm/vmx/x86_32/core
+       rm -f hvm/vmx/x86_64/*.o hvm/vmx/x86_64/*~ hvm/vmx/x86_64/core
+       rm -f hvm/svm/*.o hvm/svm/*~ hvm/svm/core
+       rm -f hvm/svm/x86_32/*.o hvm/svm/x86_32/*~ hvm/svm/x86_32/core
+       rm -f hvm/svm/x86_64/*.o hvm/svm/x86_64/*~ hvm/svm/x86_64/core
        rm -f xen.lds
 
 .PHONY: default clean
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/cpu/amd.c    Tue Jan 31 10:49:51 2006
@@ -3,11 +3,19 @@
 #include <xen/bitops.h>
 #include <xen/mm.h>
 #include <xen/smp.h>
+#include <xen/sched.h>
 #include <asm/io.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
+#include <asm/hvm/vcpu.h>
+#include <asm/hvm/support.h>
+
 
 #include "cpu.h"
+
+
+#define                AMD_C1_CLOCK_RAMP                       0x80000084
+#define                AMD_ADVPM_TSC_INVARIANT         0x80000007
 
 /*
  * amd_flush_filter={on,off}. Forcibly Enable or disable the TLB flush
@@ -40,6 +48,99 @@
  
 extern void vide(void);
 __asm__(".text\n.align 4\nvide: ret");
+
+
+/*
+ *     Check if C1-Clock ramping enabled in  PMM7.CpuLowPwrEnh
+ *     On 8th-Generation cores only. Assume BIOS has setup
+ *     all Northbridges equivalently.
+ */
+
+static int c1_ramp_8gen(void) 
+{
+       u32 l;
+
+       /*      Read dev=0x18, function = 3, offset=0x87  */
+       l = AMD_C1_CLOCK_RAMP;
+       /*      fill in dev (18) + function (3) */
+       /*      direct cfc/cf8 should be safe here */
+       l += (((0x18) << 3) + 0x3) << 8; 
+       outl(l, 0xcf8);
+       return (1 & (inl(0xcfc) >> 24));
+}
+
+/*
+ * returns TRUE if ok to use TSC
+ */
+
+static int use_amd_tsc(struct cpuinfo_x86 *c) 
+{ 
+       if (c->x86 < 0xf) {
+               /*
+                *      TSC drift doesn't exist on 7th Gen or less
+                *      However, OS still needs to consider effects
+                *      of P-state changes on TSC
+               */
+               return 1;
+       } else if ( cpuid_edx(AMD_ADVPM_TSC_INVARIANT) & 0x100 ) {
+               /*
+                *      CPUID.AdvPowerMgmtInfo.TscInvariant
+                *      EDX bit 8, 8000_0007
+                *      Invariant TSC on 8th Gen or newer, use it
+                *      (assume all cores have invariant TSC)
+               */
+               return 1;
+       } else if ((mp_get_num_processors() == 1) && (c->x86_num_cores == 1)) {
+               /*
+                *      OK to use TSC on uni-processor-uni-core
+                *      However, OS still needs to consider effects
+                *      of P-state changes on TSC
+               */
+               return 1;
+       } else if ( (mp_get_num_processors() == 1) && (c->x86 == 0x0f) 
+                               && !c1_ramp_8gen()) {
+               /*
+                *      Use TSC on 8th Gen uni-proc with C1_ramp off 
+                *      However, OS still needs to consider effects
+                *      of P-state changes on TSC
+               */
+               return 1;
+       } else { 
+               return 0;
+       }
+}
+
+/*
+ *     Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh
+ *     On 8th-Generation cores only. Assume BIOS has setup
+ *     all Northbridges equivalently.
+ */
+
+static void amd_disable_c1_ramping(void) 
+{
+       u32 l, h;
+       int i;
+
+       for (i=0; i < NR_CPUS;i++) {
+               /* Read from the Northbridge for Node x. until we get invalid 
data */
+               /* fill in dev (18 + cpu#) + function (3) */
+               l = AMD_C1_CLOCK_RAMP + ((((0x18 + i) << 3) + 0x3) << 8);
+               /*      direct cfc/cf8 should be safe here */
+               outl(l, 0xcf8);
+               h = inl(0xcfc);
+               if (h != 0xFFFFFFFF) {
+                       h &= 0xFCFFFFFF; /* clears pmm7[1:0]  */
+                       outl(l, 0xcf8);
+                       outl(h, 0xcfc);
+                       printk ("AMD: Disabling C1 Clock Ramping Node #%x\n",i);
+               }
+               else {
+                       i = NR_CPUS;
+               }
+                       
+       }
+       return;
+}
 
 static void __init init_amd(struct cpuinfo_x86 *c)
 {
@@ -245,6 +346,18 @@
                printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
                       cpu, c->x86_num_cores, cpu_core_id[cpu]);
        }
+#endif
+       /*
+        * Prevent TSC drift in non single-processor, single-core platforms
+        */
+       if ( !use_amd_tsc(c) && (c->x86 == 0x0f) && c1_ramp_8gen() && 
+                       (smp_processor_id() == 0)) {
+               /* Disable c1 Clock Ramping on all cores */
+               amd_disable_c1_ramping();
+       }
+
+#ifdef CONFIG_SVM
+       start_svm();
 #endif
 }
 
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c  Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/cpu/intel.c  Tue Jan 31 10:49:51 2006
@@ -10,7 +10,9 @@
 #include <asm/mpspec.h>
 #include <asm/apic.h>
 #include <mach_apic.h>
-#include <asm/vmx_vmcs.h>
+#include <asm/hvm/vpit.h>
+#include <asm/hvm/vcpu.h>
+#include <asm/hvm/support.h>
 
 #include "cpu.h"
 
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/dom0_ops.c   Tue Jan 31 10:49:51 2006
@@ -20,6 +20,8 @@
 #include <xen/iocap.h>
 #include <asm/shadow.h>
 #include <asm/irq.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
 #include <asm/processor.h>
 #include <public/sched_ctl.h>
 
@@ -448,15 +450,12 @@
 void arch_getdomaininfo_ctxt(
     struct vcpu *v, struct vcpu_guest_context *c)
 {
-    extern void save_vmx_cpu_user_regs(struct cpu_user_regs *);
-
     memcpy(c, &v->arch.guest_context, sizeof(*c));
 
-    if ( VMX_DOMAIN(v) )
-    {
-        save_vmx_cpu_user_regs(&c->user_regs);
-        __vmread(CR0_READ_SHADOW, &c->ctrlreg[0]);
-        __vmread(CR4_READ_SHADOW, &c->ctrlreg[4]);
+    if ( HVM_DOMAIN(v) )
+    {
+       hvm_store_cpu_guest_regs(v, &c->user_regs);
+       hvm_store_cpu_guest_ctrl_regs(v, c->ctrlreg);
     }
     else
     {
@@ -470,8 +469,8 @@
         c->flags |= VGCF_I387_VALID;
     if ( KERNEL_MODE(v, &v->arch.guest_context.user_regs) )
         c->flags |= VGCF_IN_KERNEL;
-    if (VMX_DOMAIN(v))
-        c->flags |= VGCF_VMX_GUEST;
+    if ( HVM_DOMAIN(v) )
+        c->flags |= VGCF_HVM_GUEST;
 
     c->ctrlreg[3] = pagetable_get_paddr(v->arch.guest_table);
 
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/domain.c     Tue Jan 31 10:49:51 2006
@@ -35,7 +35,8 @@
 #include <asm/shadow.h>
 #include <xen/console.h>
 #include <xen/elf.h>
-#include <asm/vmx.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
 #include <asm/msr.h>
 #include <xen/kernel.h>
 #include <xen/multicall.h>
@@ -153,8 +154,7 @@
      */
     smp_send_stop();
     disable_IO_APIC();
-
-    stop_vmx();
+    hvm_disable();
 
     /* Rebooting needs to touch the page at absolute address 0. */
     *((unsigned short *)__va(0x472)) = reboot_mode;
@@ -354,26 +354,26 @@
      * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
      * If SS RPL or DPL differs from CS RPL then we'll #GP.
      */
-    if ( !(c->flags & VGCF_VMX_GUEST) )
+    if ( !(c->flags & VGCF_HVM_GUEST) )
     {
         if ( ((c->user_regs.cs & 3) == 0) ||
              ((c->user_regs.ss & 3) == 0) )
             return -EINVAL;
     }
     else if ( !hvm_enabled )
-        return -EINVAL;
+      return -EINVAL;
 
     clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
     if ( c->flags & VGCF_I387_VALID )
         set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
 
     v->arch.flags &= ~TF_kernel_mode;
-    if ( (c->flags & VGCF_IN_KERNEL) || (c->flags & VGCF_VMX_GUEST) )
+    if ( (c->flags & VGCF_IN_KERNEL) || (c->flags & VGCF_HVM_GUEST) )
         v->arch.flags |= TF_kernel_mode;
 
     memcpy(&v->arch.guest_context, c, sizeof(*c));
 
-    if ( !(c->flags & VGCF_VMX_GUEST) )
+    if ( !(c->flags & VGCF_HVM_GUEST) )
     {
         /* IOPL privileges are virtualised. */
         v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3;
@@ -384,9 +384,7 @@
     }
     else if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
     {
-        return modify_vmcs(
-            &v->arch.arch_vmx,
-            &v->arch.guest_context.user_regs);
+       hvm_modify_guest_state(v);
     }
 
     if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
@@ -418,7 +416,7 @@
             return -EINVAL;
         }
     }
-    else if ( !(c->flags & VGCF_VMX_GUEST) )
+    else if ( !(c->flags & VGCF_HVM_GUEST) )
     {
         if ( !get_page_and_type(pfn_to_page(phys_basetab>>PAGE_SHIFT), d,
                                 PGT_base_page_table) )
@@ -428,14 +426,17 @@
         }
     }
 
-    if ( c->flags & VGCF_VMX_GUEST )
-    {
-        /* VMX uses the initially provided page tables as the P2M map. */
+    if ( c->flags & VGCF_HVM_GUEST )
+    {
+        /* HVM uses the initially provided page tables as the P2M map. */
         if ( !pagetable_get_paddr(d->arch.phys_table) )
             d->arch.phys_table = v->arch.guest_table;
         v->arch.guest_table = mk_pagetable(0);
 
-        vmx_final_setup_guest(v);
+       if (!hvm_initialize_guest_resources(v))
+            return -EINVAL;
+          
+       hvm_switch_on = 1;
     }
 
     update_pagetables(v);
@@ -611,8 +612,8 @@
     struct cpu_user_regs      *regs = &ctxt->user_regs;
     unsigned int dirty_segment_mask = 0;
 
-    if ( VMX_DOMAIN(v) )
-        rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
+    if ( HVM_DOMAIN(v) )
+       hvm_save_segments(v);
 
     __asm__ __volatile__ ( "mov %%ds,%0" : "=m" (regs->ds) );
     __asm__ __volatile__ ( "mov %%es,%0" : "=m" (regs->es) );
@@ -704,7 +705,7 @@
             loaddebug(&n->arch.guest_context, 7);
         }
 
-        if ( !VMX_DOMAIN(n) )
+        if ( !HVM_DOMAIN(n) )
         {
             set_int80_direct_trap(n);
             switch_kernel_stack(n, cpu);
@@ -764,15 +765,16 @@
         /* Re-enable interrupts before restoring state which may fault. */
         local_irq_enable();
 
-        if ( VMX_DOMAIN(next) )
-        {
-            vmx_restore_msrs(next);
+        if ( HVM_DOMAIN(next) )
+        {
+            hvm_restore_msrs(next);
         }
         else
         {
             load_LDT(next);
             load_segments(next);
-            vmx_load_msrs(next);
+           if ( HVM_DOMAIN(next) )
+                hvm_load_msrs(next);
         }
     }
 
@@ -962,7 +964,8 @@
             v->arch.guest_table_user = mk_pagetable(0);
         }
 
-        vmx_relinquish_resources(v);
+       if ( HVM_DOMAIN(v) )
+            hvm_relinquish_guest_resources(v);
     }
 
     shadow_mode_disable(d);
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/i387.c
--- a/xen/arch/x86/i387.c       Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/i387.c       Tue Jan 31 10:49:51 2006
@@ -12,6 +12,7 @@
 #include <xen/sched.h>
 #include <asm/current.h>
 #include <asm/processor.h>
+#include <asm/hvm/support.h>
 #include <asm/i387.h>
 
 void init_fpu(void)
@@ -29,7 +30,7 @@
      * This causes us to set the real flag, so we'll need
      * to temporarily clear it while saving f-p state.
      */
-    if ( VMX_DOMAIN(tsk) || (tsk->arch.guest_context.ctrlreg[0] & X86_CR0_TS) )
+    if ( HVM_DOMAIN(tsk) || (tsk->arch.guest_context.ctrlreg[0] & X86_CR0_TS) )
         clts();
 
     if ( cpu_has_fxsr )
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/mpparse.c
--- a/xen/arch/x86/mpparse.c    Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/mpparse.c    Tue Jan 31 10:49:51 2006
@@ -730,6 +730,11 @@
         */
 }
 
+int __init mp_get_num_processors(void)
+{
+    return num_processors;
+}
+
 static int __init smp_scan_config (unsigned long base, unsigned long length)
 {
        unsigned int *bp = phys_to_virt(base);
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/shadow.c
--- a/xen/arch/x86/shadow.c     Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/shadow.c     Tue Jan 31 10:49:51 2006
@@ -226,7 +226,7 @@
             if (d->arch.ops->guest_paging_levels == PAGING_L2)
             {
 #if CONFIG_PAGING_LEVELS >= 3
-                /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1
+                /* For 32-bit HVM guest, 2 shadow L1s to simulate 1 guest L1
                  * So need allocate 2 continues shadow L1 each time.
                  */
                 page = alloc_domheap_pages(NULL, SL1_ORDER, 0);
@@ -602,7 +602,7 @@
 #if CONFIG_PAGING_LEVELS >=3
     if (d->arch.ops->guest_paging_levels == PAGING_L2)
     {
-        /* for 32-bit VMX guest on 64-bit or PAE host,
+        /* for 32-bit HVM guest on 64-bit or PAE host,
          * need update two L2 entries each time
          */
         if ( !get_shadow_ref(sl1mfn))
@@ -2838,7 +2838,7 @@
 /* 64-bit shadow-mode code testing */
 /****************************************************************************/
 /*
- * init_bl2() is for 32-bit VMX guest on 64-bit host
+ * init_bl2() is for 32-bit HVM guest on 64-bit host
  * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2
  */
 static inline unsigned long init_bl2(l4_pgentry_t *spl4e, unsigned long smfn)
@@ -2938,7 +2938,7 @@
 #if CONFIG_PAGING_LEVELS >= 3
 /*
  * validate_bl2e_change()
- * The code is for 32-bit VMX gues on 64-bit host.
+ * The code is for 32-bit HVM guest on 64-bit host.
  * To sync guest L2.
  */
 
@@ -3078,7 +3078,7 @@
             shadow_map_into_current(v, va, PAGING_L3, PAGING_L4);
             __shadow_get_l4e(v, va, &sl4e);
         } else {
-            printk("For non VMX shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
+            printk("For non HVM shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
         }
     }
 
@@ -3089,7 +3089,7 @@
             shadow_map_into_current(v, va, PAGING_L2, PAGING_L3);
             __shadow_get_l3e(v, va, &sl3e);
         } else {
-            printk("For non VMX shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
+            printk("For non HVM shadow, create_l1_shadow:%d\n", 
create_l2_shadow);
         }
          shadow_update_min_max(l4e_get_pfn(sl4e), l3_table_offset(va));
 
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/x86_32/asm-offsets.c Tue Jan 31 10:49:51 2006
@@ -65,11 +65,24 @@
            arch.guest_context.kernel_ss);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
     BLANK();
+
+#ifdef CONFIG_SVM
+    OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
+    OFFSET(VCPU_svm_hsa_pa,  struct vcpu, arch.hvm_svm.host_save_pa);
+    OFFSET(VCPU_svm_vmcb, struct vcpu, arch.hvm_svm.vmcb);
+    OFFSET(VCPU_svm_vmexit_tsc, struct vcpu, arch.hvm_svm.vmexit_tsc);
+    BLANK();
+
+    OFFSET(VMCB_rax, struct vmcb_struct, rax);
+    OFFSET(VMCB_tsc_offset, struct vmcb_struct, tsc_offset);
+    BLANK();
+#endif
 
     OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
     OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/x86_32/entry.S       Tue Jan 31 10:49:51 2006
@@ -72,124 +72,6 @@
         andl $~3,reg;            \
         movl (reg),reg;
 
-#ifdef CONFIG_VMX
-/*
- * At VMExit time the processor saves the guest selectors, esp, eip, 
- * and eflags. Therefore we don't save them, but simply decrement 
- * the kernel stack pointer to make it consistent with the stack frame 
- * at usual interruption time. The eflags of the host is not saved by VMX, 
- * and we set it to the fixed value.
- *
- * We also need the room, especially because orig_eax field is used 
- * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
- *   (10) u32 gs;                 
- *   (9)  u32 fs;
- *   (8)  u32 ds;
- *   (7)  u32 es;
- *               <- get_stack_bottom() (= HOST_ESP)
- *   (6)  u32 ss;
- *   (5)  u32 esp;
- *   (4)  u32 eflags;
- *   (3)  u32 cs;
- *   (2)  u32 eip;
- * (2/1)  u16 entry_vector;
- * (1/1)  u16 error_code;
- * However, get_stack_bottom() actually returns 20 bytes before the real
- * bottom of the stack to allow space for:
- * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
- */
-#define VMX_MONITOR_EFLAGS     0x202 /* IF on */
-#define NR_SKIPPED_REGS        6       /* See the above explanation */
-#define VMX_SAVE_ALL_NOSEGREGS \
-        pushl $VMX_MONITOR_EFLAGS; \
-        popf; \
-        subl $(NR_SKIPPED_REGS*4), %esp; \
-        movl $0, 0xc(%esp); /* eflags==0 identifies cpu_user_regs as VMX guest 
*/ \
-        pushl %eax; \
-        pushl %ebp; \
-        pushl %edi; \
-        pushl %esi; \
-        pushl %edx; \
-        pushl %ecx; \
-        pushl %ebx;
-
-#define VMX_RESTORE_ALL_NOSEGREGS   \
-        popl %ebx;  \
-        popl %ecx;  \
-        popl %edx;  \
-        popl %esi;  \
-        popl %edi;  \
-        popl %ebp;  \
-        popl %eax;  \
-        addl $(NR_SKIPPED_REGS*4), %esp
-
-ENTRY(vmx_asm_vmexit_handler)
-        /* selectors are restored/saved by VMX */
-        VMX_SAVE_ALL_NOSEGREGS
-        call trace_vmexit
-        call vmx_vmexit_handler
-        jmp vmx_asm_do_resume
-
-.macro vmx_asm_common launch initialized
-1:
-/* vmx_test_all_events */
-        .if \initialized
-        GET_CURRENT(%ebx)
-/*test_all_events:*/
-        xorl %ecx,%ecx
-        notl %ecx
-        cli                             # tests must not race interrupts
-/*test_softirqs:*/  
-        movl VCPU_processor(%ebx),%eax
-        shl  $IRQSTAT_shift,%eax
-        test %ecx,irq_stat(%eax,1)
-        jnz 2f
-
-/* vmx_restore_all_guest */
-        call vmx_intr_assist
-        call load_cr2
-        call trace_vmentry
-        .endif
-        VMX_RESTORE_ALL_NOSEGREGS
-        /* 
-         * Check if we are going back to VMX-based VM
-         * By this time, all the setups in the VMCS must be complete.
-         */
-        .if \launch
-        /* VMLAUNCH */
-        .byte 0x0f,0x01,0xc2
-        pushf
-        call vm_launch_fail
-        .else
-        /* VMRESUME */
-        .byte 0x0f,0x01,0xc3
-        pushf
-        call vm_resume_fail
-        .endif
-        /* Should never reach here */
-        hlt
-
-        ALIGN
-        .if \initialized
-2:
-/* vmx_process_softirqs */
-        sti       
-        call do_softirq
-        jmp 1b
-        ALIGN
-        .endif
-.endm
-
-ENTRY(vmx_asm_do_launch)
-    vmx_asm_common 1 0
-
-ENTRY(vmx_asm_do_resume)
-    vmx_asm_common 0 1
-
-ENTRY(vmx_asm_do_relaunch)
-    vmx_asm_common 1 1
-
-#endif
 
         ALIGN
 restore_all_guest:
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/x86_32/traps.c       Tue Jan 31 10:49:51 2006
@@ -10,74 +10,65 @@
 #include <xen/symbols.h>
 #include <asm/current.h>
 #include <asm/flushtlb.h>
-#include <asm/vmx.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
 
 /* All CPUs have their own IDT to allow int80 direct trap. */
 idt_entry_t *idt_tables[NR_CPUS] = { 0 };
 
 void show_registers(struct cpu_user_regs *regs)
 {
-    unsigned long ss, ds, es, fs, gs, cs;
-    unsigned long eip, esp, eflags, cr0, cr3;
+    struct cpu_user_regs faultregs;
+    unsigned long faultcrs[8];
     const char *context;
 
-    if ( VMX_DOMAIN(current) && (regs->eflags == 0) )
-    {
-        __vmread(GUEST_RIP, &eip);
-        __vmread(GUEST_RSP, &esp);
-        __vmread(GUEST_RFLAGS, &eflags);
-        __vmread(GUEST_SS_SELECTOR, &ss);
-        __vmread(GUEST_DS_SELECTOR, &ds);
-        __vmread(GUEST_ES_SELECTOR, &es);
-        __vmread(GUEST_FS_SELECTOR, &fs);
-        __vmread(GUEST_GS_SELECTOR, &gs);
-        __vmread(GUEST_CS_SELECTOR, &cs);
-        __vmread(CR0_READ_SHADOW, &cr0);
-        __vmread(GUEST_CR3, &cr3);
-        context = "vmx guest";
+    if ( HVM_DOMAIN(current) && regs->eflags == 0 )
+    {
+       context = "hvm";
+       hvm_load_cpu_guest_regs(current, &faultregs);
+       hvm_store_cpu_guest_ctrl_regs(current, faultcrs);
     }
     else
     {
-        eip    = regs->eip;
-        eflags = regs->eflags;
-        cr0    = read_cr0();
-        cr3    = read_cr3();
-
-        __asm__ ( "movl %%fs,%0 ; movl %%gs,%1" : "=r" (fs), "=r" (gs) );
-
+       faultregs = *regs;
         if ( GUEST_MODE(regs) )
         {
-            esp = regs->esp;
-            ss  = regs->ss & 0xffff;
-            ds  = regs->ds & 0xffff;
-            es  = regs->es & 0xffff;
-            cs  = regs->cs & 0xffff;
             context = "guest";
-        }
-        else
-        {
-            esp = (unsigned long)&regs->esp;
-            ss  = __HYPERVISOR_DS;
-            ds  = __HYPERVISOR_DS;
-            es  = __HYPERVISOR_DS;
-            cs  = __HYPERVISOR_CS;
+            faultregs.ss &= 0xFFFF;
+            faultregs.ds &= 0xFFFF;
+            faultregs.es &= 0xFFFF;
+            faultregs.cs &= 0xFFFF;
+       }
+       else 
+       {
             context = "hypervisor";
-        }
-    }
-
-    printk("CPU:    %d\nEIP:    %04lx:[<%08lx>]",
-           smp_processor_id(), (unsigned long)0xffff & regs->cs, eip);
-    if ( !GUEST_MODE(regs) )
-        print_symbol(" %s", eip);
-    printk("\nEFLAGS: %08lx   CONTEXT: %s\n", eflags, context);
+            faultregs.esp = (unsigned long)&regs->esp;
+            faultregs.ss = __HYPERVISOR_DS;
+            faultregs.ds = __HYPERVISOR_DS;
+            faultregs.es = __HYPERVISOR_DS;
+            faultregs.cs = __HYPERVISOR_CS;
+       }
+        __asm__ ("movw %%fs,%0 ; movw %%gs,%1"
+                : "=r" (faultregs.fs), "=r" (faultregs.gs) );
+
+       faultcrs[0] = read_cr0();
+       faultcrs[3] = read_cr3();
+    }
+
+    printk("CPU:    %d\nEIP:    %04x:[<%08x>]",
+           smp_processor_id(), faultregs.cs, faultregs.eip);
+    if ( !HVM_DOMAIN(current) && !GUEST_MODE(regs) )
+        print_symbol(" %s", faultregs.eip);
+    printk("\nEFLAGS: %08x   CONTEXT: %s\n", faultregs.eflags, context);
     printk("eax: %08x   ebx: %08x   ecx: %08x   edx: %08x\n",
            regs->eax, regs->ebx, regs->ecx, regs->edx);
-    printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08lx\n",
-           regs->esi, regs->edi, regs->ebp, esp);
-    printk("cr0: %08lx   cr3: %08lx\n", cr0, cr3);
-    printk("ds: %04lx   es: %04lx   fs: %04lx   gs: %04lx   "
-           "ss: %04lx   cs: %04lx\n",
-           ds, es, fs, gs, ss, cs);
+    printk("esi: %08x   edi: %08x   ebp: %08x   esp: %08x\n",
+           regs->esi, regs->edi, regs->ebp, faultregs.esp);
+    printk("cr0: %08lx   cr3: %08lx\n", faultcrs[0], faultcrs[3]);
+    printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
+           "ss: %04x   cs: %04x\n",
+           faultregs.ds, faultregs.es, faultregs.fs,
+          faultregs.gs, faultregs.ss, faultregs.cs);
 
     show_stack(regs);
 }
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/x86_64/asm-offsets.c Tue Jan 31 10:49:51 2006
@@ -65,11 +65,24 @@
            arch.guest_context.syscall_callback_eip);
     OFFSET(VCPU_kernel_sp, struct vcpu,
            arch.guest_context.kernel_sp);
+    OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
     OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
     DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
     DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
     BLANK();
+
+#ifdef CONFIG_SVM
+    OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa);
+    OFFSET(VCPU_svm_hsa_pa,  struct vcpu, arch.hvm_svm.host_save_pa);
+    OFFSET(VCPU_svm_vmcb, struct vcpu, arch.hvm_svm.vmcb);
+    OFFSET(VCPU_svm_vmexit_tsc, struct vcpu, arch.hvm_svm.vmexit_tsc);
+    BLANK();
+
+    OFFSET(VMCB_rax, struct vmcb_struct, rax);
+    OFFSET(VMCB_tsc_offset, struct vmcb_struct, tsc_offset);
+    BLANK();
+#endif
 
     OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
     OFFSET(VCPUINFO_upcall_mask, vcpu_info_t, evtchn_upcall_mask);
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/x86_64/entry.S       Tue Jan 31 10:49:51 2006
@@ -187,135 +187,6 @@
         movw  $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
         call  create_bounce_frame
         jmp   test_all_events
-
-#ifdef CONFIG_VMX
-/*
- * At VMExit time the processor saves the guest selectors, rsp, rip, 
- * and rflags. Therefore we don't save them, but simply decrement 
- * the kernel stack pointer to make it consistent with the stack frame 
- * at usual interruption time. The rflags of the host is not saved by VMX, 
- * and we set it to the fixed value.
- *
- * We also need the room, especially because orig_eax field is used 
- * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
- *   (10) u64 gs;                 
- *   (9)  u64 fs;
- *   (8)  u64 ds;
- *   (7)  u64 es;
- *               <- get_stack_bottom() (= HOST_ESP)
- *   (6)  u64 ss;
- *   (5)  u64 rsp;
- *   (4)  u64 rflags;
- *   (3)  u64 cs;
- *   (2)  u64 rip;
- * (2/1)  u32 entry_vector;
- * (1/1)  u32 error_code;
- */
-#define VMX_MONITOR_RFLAGS     0x202 /* IF on */
-#define NR_SKIPPED_REGS        6       /* See the above explanation */
-#define VMX_SAVE_ALL_NOSEGREGS \
-        pushq $VMX_MONITOR_RFLAGS; \
-        popfq; \
-        subq $(NR_SKIPPED_REGS*8), %rsp; \
-        pushq %rdi; \
-        pushq %rsi; \
-        pushq %rdx; \
-        pushq %rcx; \
-        pushq %rax; \
-        pushq %r8;  \
-        pushq %r9;  \
-        pushq %r10; \
-        pushq %r11; \
-        pushq %rbx; \
-        pushq %rbp; \
-        pushq %r12; \
-        pushq %r13; \
-        pushq %r14; \
-        pushq %r15; \
-
-#define VMX_RESTORE_ALL_NOSEGREGS \
-        popq %r15; \
-        popq %r14; \
-        popq %r13; \
-        popq %r12; \
-        popq %rbp; \
-        popq %rbx; \
-        popq %r11; \
-        popq %r10; \
-        popq %r9;  \
-        popq %r8;  \
-        popq %rax; \
-        popq %rcx; \
-        popq %rdx; \
-        popq %rsi; \
-        popq %rdi; \
-        addq $(NR_SKIPPED_REGS*8), %rsp; \
-
-ENTRY(vmx_asm_vmexit_handler)
-        /* selectors are restored/saved by VMX */
-        VMX_SAVE_ALL_NOSEGREGS
-        call vmx_vmexit_handler
-        jmp vmx_asm_do_resume
-
-.macro vmx_asm_common launch initialized 
-1:
-        .if \initialized
-/* vmx_test_all_events */
-        GET_CURRENT(%rbx)
-/* test_all_events: */
-        cli                             # tests must not race interrupts
-/*test_softirqs:*/  
-        movl  VCPU_processor(%rbx),%eax
-        shl   $IRQSTAT_shift,%rax
-        leaq  irq_stat(%rip), %rdx
-        testl $~0,(%rdx,%rax,1)
-        jnz  2f 
-
-/* vmx_restore_all_guest */
-        call vmx_intr_assist
-        call load_cr2
-        .endif
-        /* 
-         * Check if we are going back to VMX-based VM
-         * By this time, all the setups in the VMCS must be complete.
-         */
-        VMX_RESTORE_ALL_NOSEGREGS
-        .if \launch
-        /* VMLUANCH */
-        .byte 0x0f,0x01,0xc2
-        pushfq
-        call vm_launch_fail
-        .else
-        /* VMRESUME */
-        .byte 0x0f,0x01,0xc3
-        pushfq
-        call vm_resume_fail
-        .endif
-        /* Should never reach here */
-        hlt
-
-        ALIGN
-
-        .if \initialized
-2:
-/* vmx_process_softirqs */
-        sti       
-        call do_softirq
-        jmp 1b
-        ALIGN
-        .endif
-.endm
-
-ENTRY(vmx_asm_do_launch)
-      vmx_asm_common 1 0
-
-ENTRY(vmx_asm_do_resume)
-      vmx_asm_common 0 1
-
-ENTRY(vmx_asm_do_relaunch)
-      vmx_asm_common 1 1
-
-#endif
 
         ALIGN
 /* %rbx: struct vcpu */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/x86_64/mm.c  Tue Jan 31 10:49:51 2006
@@ -188,7 +188,7 @@
     l3_pgentry_t l3e;
     l2_pgentry_t l2e;
     unsigned long mfn, v;
-    unsigned int i;
+    unsigned int i = 0;
     long rc = 0;
 
     switch ( op )
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/x86_64/traps.c       Tue Jan 31 10:49:51 2006
@@ -13,51 +13,57 @@
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
 #include <asm/shadow.h>
-#include <asm/vmx.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
 
 void show_registers(struct cpu_user_regs *regs)
 {
-    unsigned long rip, rsp, rflags, cs, cr0, cr3;
+    struct cpu_user_regs faultregs;
+    unsigned long faultcrs[8];
     const char *context;
 
-    if ( VMX_DOMAIN(current) && (regs->eflags == 0) )
-    {
-        __vmread(GUEST_RIP, &rip);
-        __vmread(GUEST_RSP, &rsp);
-        __vmread(GUEST_RFLAGS, &rflags);
-        __vmread(GUEST_CS_SELECTOR, &cs);
-        __vmread(CR0_READ_SHADOW, &cr0);
-        __vmread(GUEST_CR3, &cr3);
-        context = "vmx guest";
+    if ( HVM_DOMAIN(current) && regs->eflags == 0 )
+    {
+       context = "hvm";
+       hvm_load_cpu_guest_regs(current, &faultregs);
+       hvm_store_cpu_guest_ctrl_regs(current, faultcrs);
     }
     else
     {
-        rip     = regs->rip;
-        rflags  = regs->rflags;
-        cr0     = read_cr0();
-        cr3     = read_cr3();
-        rsp     = regs->rsp;
-        cs      = regs->cs & 0xffff;
-        context = GUEST_MODE(regs) ? "guest" : "hypervisor";
-    }
-
-    printk("CPU:    %d\nRIP:    %04lx:[<%016lx>]",
-           smp_processor_id(), cs, rip);
-    if ( !GUEST_MODE(regs) )
-        print_symbol(" %s", rip);
-    printk("\nRFLAGS: %016lx   CONTEXT: %s\n", rflags, context);
+       faultregs = *regs;
+
+        if ( GUEST_MODE(regs) )
+        {
+            context = "guest";
+       }
+       else 
+       {
+            context = "hypervisor";
+            faultregs.esp = (unsigned long)&regs->esp;
+       }
+
+       faultcrs[0] = read_cr0();
+       faultcrs[3] = read_cr3();
+    }
+
+    printk("CPU:    %d\nRIP:    %04x:[<%016lx>]",
+           smp_processor_id(), faultregs.cs, faultregs.rip);
+    if ( !HVM_DOMAIN(current) && !GUEST_MODE(regs) )
+        print_symbol(" %s", faultregs.rip);
+
+    printk("\nRFLAGS: %016lx   CONTEXT: %s\n", faultregs.rflags, context);
     printk("rax: %016lx   rbx: %016lx   rcx: %016lx\n",
            regs->rax, regs->rbx, regs->rcx);
     printk("rdx: %016lx   rsi: %016lx   rdi: %016lx\n",
            regs->rdx, regs->rsi, regs->rdi);
     printk("rbp: %016lx   rsp: %016lx   r8:  %016lx\n",
-           regs->rbp, rsp, regs->r8);
+           regs->rbp, faultregs.rsp, regs->r8);
     printk("r9:  %016lx   r10: %016lx   r11: %016lx\n",
            regs->r9,  regs->r10, regs->r11);
     printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
            regs->r12, regs->r13, regs->r14);
     printk("r15: %016lx   cr0: %016lx   cr3: %016lx\n",
-           regs->r15, cr0, cr3);
+           regs->r15, faultcrs[0], faultcrs[3]);
 
     show_stack(regs);
 }
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-ia64/domain.h
--- a/xen/include/asm-ia64/domain.h     Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-ia64/domain.h     Tue Jan 31 10:49:51 2006
@@ -26,7 +26,8 @@
     /* System pages out of guest memory, like for xenstore/console */
     unsigned long sys_pgnr;
     unsigned long max_pfn; /* Max pfn including I/O holes */
-    struct virutal_platform_def     vmx_platform;
+    struct virtual_platform_def     vmx_platform;
+#define        hvm_domain vmx_platform /* platform defs are not vmx specific */
 
     u64 xen_vastart;
     u64 xen_vaend;
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-ia64/mm.h Tue Jan 31 10:49:51 2006
@@ -414,6 +414,10 @@
 #define INVALID_M2P_ENTRY        (~0U)
 #define VALID_M2P(_e)            (!((_e) & (1U<<63)))
 #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
+
+#define set_pfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
+#define get_pfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
+
 /* If pmt table is provided by control pannel later, we need __get_user
 * here. However if it's allocated by HV, we should access it directly
 */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-ia64/vmx_platform.h
--- a/xen/include/asm-ia64/vmx_platform.h       Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-ia64/vmx_platform.h       Tue Jan 31 10:49:51 2006
@@ -21,16 +21,16 @@
 
 #include <public/xen.h>
 #include <public/arch-ia64.h>
-#include <asm/vmx_vioapic.h>
+#include <asm/hvm/vioapic.h>
 
 struct mmio_list;
-typedef struct virutal_platform_def {
+typedef struct virtual_platform_def {
     unsigned long       shared_page_va;
     unsigned long       pib_base;
     unsigned char       xtp;
     struct mmio_list    *mmio;
     /* One IOSAPIC now... */
-    struct vmx_vioapic   vmx_vioapic;
+    struct hvm_vioapic  vioapic;
 } vir_plat_t;
 
 static inline int __fls(uint32_t word)
@@ -63,7 +63,7 @@
 
 /* As long as we register vlsapic to ioapic controller, it's said enabled */
 #define vlapic_enabled(l) 1
-#define vmx_apic_support(d) 1
+#define hvm_apic_support(d) 1
 
 #define VLAPIC_DELIV_MODE_FIXED                0x0
 #define VLAPIC_DELIV_MODE_REDIR                0x1
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-ia64/vmx_vcpu.h
--- a/xen/include/asm-ia64/vmx_vcpu.h   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-ia64/vmx_vcpu.h   Tue Jan 31 10:49:51 2006
@@ -113,7 +113,7 @@
 extern uint64_t guest_read_vivr(VCPU *vcpu);
 extern void vmx_inject_vhpi(VCPU *vcpu, u8 vec);
 extern int vmx_vcpu_pend_interrupt(VCPU *vcpu, uint8_t vector);
-extern struct virutal_platform_def *vmx_vcpu_get_plat(VCPU *vcpu);
+extern struct virtual_platform_def *vmx_vcpu_get_plat(VCPU *vcpu);
 extern void memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s);
 extern void memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, 
size_t s);
 extern void memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, 
size_t s);
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h      Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/config.h      Tue Jan 31 10:49:51 2006
@@ -19,6 +19,7 @@
 #define CONFIG_X86_HT 1
 #define CONFIG_SHADOW 1
 #define CONFIG_VMX 1
+#define CONFIG_SVM 1
 #define CONFIG_SMP 1
 #define CONFIG_X86_LOCAL_APIC 1
 #define CONFIG_X86_GOOD_APIC 1
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h  Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/cpufeature.h  Tue Jan 31 10:49:51 2006
@@ -90,6 +90,7 @@
 /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
 #define X86_FEATURE_LAHF_LM    (5*32+ 0) /* LAHF/SAHF in long mode */
 #define X86_FEATURE_CMP_LEGACY (5*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVME        (5*32+ 2) /* Secure Virtual Machine */
 
 #define cpu_has(c, bit)                test_bit(bit, (c)->x86_capability)
 #define boot_cpu_has(bit)      test_bit(bit, boot_cpu_data.x86_capability)
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h      Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/domain.h      Tue Jan 31 10:49:51 2006
@@ -1,10 +1,10 @@
-
 #ifndef __ASM_DOMAIN_H__
 #define __ASM_DOMAIN_H__
 
 #include <xen/config.h>
 #include <xen/mm.h>
-#include <asm/vmx_vmcs.h>
+#include <asm/hvm/vcpu.h>
+#include <asm/hvm/domain.h>
 
 struct trap_bounce {
     unsigned long  error_code;
@@ -108,7 +108,7 @@
     struct list_head free_shadow_frames;
 
     pagetable_t         phys_table;         /* guest 1:1 pagetable */
-    struct vmx_platform vmx_platform;
+    struct hvm_domain   hvm_domain;
 
     /* Shadow-translated guest: Pseudophys base address of reserved area. */
     unsigned long first_reserved_pfn;
@@ -137,7 +137,7 @@
 #endif
 
     /* Virtual Machine Extensions */
-    struct arch_vmx_struct arch_vmx;
+    struct hvm_vcpu hvm_vcpu;
 
     /*
      * Every domain has a L1 pagetable of its own. Per-domain mappings
@@ -166,6 +166,10 @@
     unsigned long shadow_ldt_mapcnt;
 } __cacheline_aligned;
 
+/* shorthands to improve code legibility */
+#define hvm_vmx         hvm_vcpu.u.vmx
+#define hvm_svm         hvm_vcpu.u.svm
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/mpspec.h
--- a/xen/include/asm-x86/mpspec.h      Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/mpspec.h      Tue Jan 31 10:49:51 2006
@@ -35,6 +35,7 @@
 extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 
gsi);
 extern void mp_config_acpi_legacy_irqs (void);
 extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low);
+extern int mp_get_num_processors(void);
 #endif /*CONFIG_ACPI_BOOT*/
 
 #define PHYSID_ARRAY_SIZE      BITS_TO_LONGS(MAX_APICS)
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/msr.h
--- a/xen/include/asm-x86/msr.h Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/msr.h Tue Jan 31 10:49:51 2006
@@ -88,7 +88,7 @@
 /* Intel defined MSRs. */
 #define MSR_IA32_P5_MC_ADDR            0
 #define MSR_IA32_P5_MC_TYPE            1
-#define MSR_IA32_TIME_STAMP_COUNTER    0x10
+#define MSR_IA32_TIME_STAMP_COUNTER     0x10
 #define MSR_IA32_PLATFORM_ID           0x17
 #define MSR_IA32_EBL_CR_POWERON                0x2a
 
@@ -128,11 +128,13 @@
 #define _EFER_LME 8  /* Long mode enable */
 #define _EFER_LMA 10 /* Long mode active (read-only) */
 #define _EFER_NX 11  /* No execute enable */
+#define _EFER_SVME 12
 
 #define EFER_SCE (1<<_EFER_SCE)
 #define EFER_LME (1<<_EFER_LME)
 #define EFER_LMA (1<<_EFER_LMA)
 #define EFER_NX (1<<_EFER_NX)
+#define EFER_SVME (1<<_EFER_SVME)
 
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PLATFORM_ID   0x17
@@ -271,7 +273,6 @@
 #define MSR_P4_U2L_ESCR0               0x3b0
 #define MSR_P4_U2L_ESCR1               0x3b1
 
-/* AMD Defined MSRs */
 #define MSR_K6_EFER                    0xC0000080
 #define MSR_K6_STAR                    0xC0000081
 #define MSR_K6_WHCR                    0xC0000082
@@ -293,6 +294,11 @@
 #define MSR_K7_FID_VID_CTL             0xC0010041
 #define MSR_K7_FID_VID_STATUS          0xC0010042
 
+#define MSR_K8_TOP_MEM1                        0xC001001A
+#define MSR_K8_TOP_MEM2                        0xC001001D
+#define MSR_K8_VM_HSAVE_PA             0xC0010117
+#define MSR_K8_SYSCFG                  0xC0000010      
+
 /* Centaur-Hauls/IDT defined MSRs. */
 #define MSR_IDT_FCR1                   0x107
 #define MSR_IDT_FCR2                   0x108
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/processor.h   Tue Jan 31 10:49:51 2006
@@ -57,6 +57,7 @@
 #define X86_CR0_MP              0x00000002 /* Monitor Coprocessor      (RW) */
 #define X86_CR0_EM              0x00000004 /* Require FPU Emulation    (RO) */
 #define X86_CR0_TS              0x00000008 /* Task Switched            (RW) */
+#define X86_CR0_ET              0x00000010 /* Extension type           (RO) */
 #define X86_CR0_NE              0x00000020 /* Numeric Error Reporting  (RW) */
 #define X86_CR0_WP              0x00010000 /* Supervisor Write Protect (RW) */
 #define X86_CR0_AM              0x00040000 /* Alignment Checking       (RW) */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/regs.h
--- a/xen/include/asm-x86/regs.h        Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/regs.h        Tue Jan 31 10:49:51 2006
@@ -33,10 +33,6 @@
 
 #define GUEST_MODE(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
 
-#ifdef CONFIG_VMX
-#define GUEST_CONTEXT(_ed, _r) ( (VMX_DOMAIN(_ed) && ((_r)->eflags == 0)) || 
GUEST_MODE(_r) )
-#else
-#define GUEST_CONTEXT(_ed, _r) GUEST_MODE(_r)
-#endif
+#define GUEST_CONTEXT(_ed, _r) ((HVM_DOMAIN(_ed) && ((_r)->eflags == 0)) || 
GUEST_MODE(_r))
 
 #endif /* __X86_REGS_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h      Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/shadow.h      Tue Jan 31 10:49:51 2006
@@ -31,7 +31,9 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/processor.h>
-#include <asm/vmx.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/regs.h>
 #include <public/dom0_ops.h>
 #include <asm/shadow_public.h>
 #include <asm/page-guest32.h>
@@ -168,8 +170,6 @@
 static inline void update_hl2e(struct vcpu *v, unsigned long va);
 #endif
 
-extern void vmx_shadow_clear_state(struct domain *);
-
 static inline int page_is_page_table(struct pfn_info *page)
 {
     struct domain *owner = page_get_owner(page);
@@ -1707,21 +1707,18 @@
     struct domain *d = v->domain;
     int paging_enabled;
 
-#ifdef CONFIG_VMX
-    if ( VMX_DOMAIN(v) )
-        paging_enabled = vmx_paging_enabled(v);
-
+    if ( HVM_DOMAIN(v) )
+        paging_enabled = hvm_paging_enabled(v);
     else
-#endif
         // HACK ALERT: there's currently no easy way to figure out if a domU
         // has set its arch.guest_table to zero, vs not yet initialized it.
         //
         paging_enabled = !!pagetable_get_paddr(v->arch.guest_table);
 
     /*
-     * We don't call __update_pagetables() when vmx guest paging is
+     * We don't call __update_pagetables() when hvm guest paging is
      * disabled as we want the linear_pg_table to be inaccessible so that
-     * we bail out early of shadow_fault() if the vmx guest tries illegal
+     * we bail out early of shadow_fault() if the hvm guest tries illegal
      * accesses while it thinks paging is turned off.
      */
     if ( unlikely(shadow_mode_enabled(d)) && paging_enabled )
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Mon Jan 30 17:51:35 2006
+++ b/xen/include/public/arch-x86_32.h  Tue Jan 31 10:49:51 2006
@@ -114,7 +114,7 @@
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
 #define VGCF_I387_VALID (1<<0)
-#define VGCF_VMX_GUEST  (1<<1)
+#define VGCF_HVM_GUEST  (1<<1)
 #define VGCF_IN_KERNEL  (1<<2)
     unsigned long flags;                    /* VGCF_* flags                 */
     cpu_user_regs_t user_regs;              /* User-level CPU registers     */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Mon Jan 30 17:51:35 2006
+++ b/xen/include/public/arch-x86_64.h  Tue Jan 31 10:49:51 2006
@@ -196,7 +196,7 @@
     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
 #define VGCF_I387_VALID (1<<0)
-#define VGCF_VMX_GUEST  (1<<1)
+#define VGCF_HVM_GUEST  (1<<1)
 #define VGCF_IN_KERNEL  (1<<2)
     unsigned long flags;                    /* VGCF_* flags                 */
     cpu_user_regs_t user_regs;              /* User-level CPU registers     */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/xen/domain_page.h
--- a/xen/include/xen/domain_page.h     Mon Jan 30 17:51:35 2006
+++ b/xen/include/xen/domain_page.h     Tue Jan 31 10:49:51 2006
@@ -48,6 +48,8 @@
 {
     ASSERT(cache != NULL);
     cache->flags = 0;
+    cache->pfn = 0;
+    cache->va = NULL;
 }
 
 static inline void *
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/examples/xmexample.hvm
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/tools/examples/xmexample.hvm      Tue Jan 31 10:49:51 2006
@@ -0,0 +1,152 @@
+#  -*- mode: python; -*-
+#============================================================================
+# Python configuration setup for 'xm create'.
+# This script sets the parameters used when a domain is created using 'xm 
create'.
+# You use a separate script for each domain you want to create, or 
+# you can set the parameters for the domain on the xm command line.
+#============================================================================
+
+import os, re
+arch = os.uname()[4]
+if re.search('64', arch):
+    arch_libdir = 'lib64'
+else:
+    arch_libdir = 'lib'
+
+#----------------------------------------------------------------------------
+# Kernel image file.
+kernel = "/usr/lib/xen/boot/hvmloader"
+
+# The domain build function. HVM domain uses 'hvm'.
+builder='hvm'
+
+# Initial memory allocation (in megabytes) for the new domain.
+memory = 128
+
+# A name for your domain. All domains must have different names.
+name = "ExampleHVMDomain"
+
+#-----------------------------------------------------------------------------
+# the number of cpus guest platform has, default=1
+#vcpus=1
+
+# enable/disable HVM guest ACPI, default=0 (disabled)
+#acpi=0
+
+# enable/disable HVM guest APIC, default=0 (disabled)
+#apic=0
+
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = ""         # leave to Xen to pick
+#cpus = "0"        # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # run on cpus 0,2,3,5
+
+# Optionally define mac and/or bridge for the network interfaces.
+# Random MACs are assigned if not given.
+#vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0' ]
+# type=ioemu specify the NIC is an ioemu device not netfront
+vif = [ 'type=ioemu, bridge=xenbr0' ]
+
+#----------------------------------------------------------------------------
+# Define the disk devices you want the domain to have access to, and
+# what you want them accessible as.
+# Each disk entry is of the form phy:UNAME,DEV,MODE
+# where UNAME is the device, DEV is the device name the domain will see,
+# and MODE is r for read-only, w for read-write.
+
+#disk = [ 'phy:hda1,hda1,r' ]
+disk = [ 'file:/var/images/min-el3-i386.img,ioemu:hda,w' ]
+
+#----------------------------------------------------------------------------
+# Configure the behaviour when a domain exits.  There are three 'reasons'
+# for a domain to stop: poweroff, reboot, and crash.  For each of these you
+# may specify:
+#
+#   "destroy",        meaning that the domain is cleaned up as normal;
+#   "restart",        meaning that a new domain is started in place of the old
+#                     one;
+#   "preserve",       meaning that no clean-up is done until the domain is
+#                     manually destroyed (using xm destroy, for example); or
+#   "rename-restart", meaning that the old domain is not cleaned up, but is
+#                     renamed and a new domain started in its place.
+#
+# The default is
+#
+#   on_poweroff = 'destroy'
+#   on_reboot   = 'restart'
+#   on_crash    = 'restart'
+#
+# For backwards compatibility we also support the deprecated option restart
+#
+# restart = 'onreboot' means on_poweroff = 'destroy'
+#                            on_reboot   = 'restart'
+#                            on_crash    = 'destroy'
+#
+# restart = 'always'   means on_poweroff = 'restart'
+#                            on_reboot   = 'restart'
+#                            on_crash    = 'restart'
+#
+# restart = 'never'    means on_poweroff = 'destroy'
+#                            on_reboot   = 'destroy'
+#                            on_crash    = 'destroy'
+
+#on_poweroff = 'destroy'
+#on_reboot   = 'restart'
+#on_crash    = 'restart'
+
+#============================================================================
+
+# New stuff
+device_model = '/usr/' + arch_libdir + '/xen/bin/qemu-dm'
+
+#-----------------------------------------------------------------------------
+# Disk image for 
+#cdrom=
+
+#-----------------------------------------------------------------------------
+# boot on floppy (a), hard disk (c) or CD-ROM (d) 
+#boot=[a|c|d]
+#-----------------------------------------------------------------------------
+#  write to temporary files instead of disk image files
+#snapshot=1
+
+#----------------------------------------------------------------------------
+# enable SDL library for graphics, default = 0
+sdl=0
+
+#----------------------------------------------------------------------------
+# enable VNC library for graphics, default = 1
+vnc=1
+
+#----------------------------------------------------------------------------
+# enable spawning vncviewer(only valid when vnc=1), default = 1
+vncviewer=1
+
+#----------------------------------------------------------------------------
+# no graphics, use serial port
+#nographic=0
+
+
+#-----------------------------------------------------------------------------
+#   serial port re-direct to pty deivce, /dev/pts/n 
+#   then xm console or minicom can connect
+#serial='pty'
+
+#----------------------------------------------------------------------------
+# enable ne2000, default = 0(use pcnet)
+ne2000=0
+
+
+#-----------------------------------------------------------------------------
+#   enable audio support
+#audio=1
+
+
+#-----------------------------------------------------------------------------
+#    set the real time clock to local time [default=0 i.e. set to utc]
+#localtime=1
+
+
+#-----------------------------------------------------------------------------
+#    start in full screen
+#full-screen=1   
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/hvmloader/Makefile
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/hvmloader/Makefile Tue Jan 31 10:49:51 2006
@@ -0,0 +1,56 @@
+#
+# Makefile
+#
+# Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+# Copyright (c) 2005, International Business Machines Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place - Suite 330, Boston, MA 02111-1307 USA.
+#
+
+XEN_ROOT = ../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+# The HVM loader is started in 32-bit mode at the address below:
+LOADADDR = 0x100000
+
+DEFINES  =-DDEBUG
+XENINC   =-I$(XEN_ROOT)/tools/libxc
+
+OBJECTS         = hvmloader.o acpi_madt.o 
+
+CC       = gcc
+OBJCOPY  = objcopy
+CFLAGS   = $(DEFINES) -I. $(XENINC) -Wall -fno-builtin -O2 -msoft-float
+CFLAGS  += -m32 -march=i686
+LDFLAGS  = -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,$(LOADADDR)
+
+all: hvmloader
+
+hvmloader: roms.h hvmloader.c acpi_madt.c
+       $(CC) $(CFLAGS) -c hvmloader.c acpi_madt.c
+       $(CC) $(LDFLAGS) -o hvmloader.tmp hvmloader.o acpi_madt.o
+       $(OBJCOPY) hvmloader.tmp hvmloader
+       rm -f hvmloader.tmp
+
+roms.h:        ../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin 
../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin
+       ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
+       ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
+       ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> 
roms.h
+       ./mkhex vmxassist ../vmxassist/vmxassist.bin >> roms.h
+       ./mkhex acpi ../acpi/acpi.bin >> roms.h
+
+clean:
+       rm -f roms.h acpi.h
+       rm -f hvmloader hvmloader.tmp hvmloader.o $(OBJECTS)
+
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/hvmloader/acpi_madt.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/hvmloader/acpi_madt.c      Tue Jan 31 10:49:51 2006
@@ -0,0 +1,190 @@
+/*
+ * acpi_madt.c: Update ACPI MADT table for multiple processor guest.
+ *
+ * Yu Ke, ke.yu@xxxxxxxxx
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include "../acpi/acpi2_0.h"
+#include "../acpi/acpi_madt.h"
+
+#include <xen/hvm/hvm_info_table.h>
+
+#define NULL ((void*)0)
+
+extern int puts(const char *s);
+
+static struct hvm_info_table *table = NULL;
+
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+       char signature[] = "HVM INFO";
+       uint8_t *ptr = (uint8_t *)t;
+       uint8_t sum = 0;
+       int i;
+
+       /* strncmp(t->signature, "HVM INFO", 8) */
+       for (i = 0; i < 8; i++) {
+               if (signature[i] != t->signature[i]) {
+                       puts("Bad hvm info signature\n");
+                       return 0;
+               }
+       }
+
+       for (i = 0; i < t->length; i++)
+               sum += ptr[i];
+
+       return (sum == 0);
+}
+
+/* xc_vmx_builder wrote hvm info at 0x9F800. Return it. */
+static struct hvm_info_table *
+get_hvm_info_table(void)
+{
+       struct hvm_info_table *t;
+
+       if (table != NULL)
+               return table;
+
+       t = (struct hvm_info_table *)HVM_INFO_PADDR;
+
+       if (!validate_hvm_info(t)) {
+               puts("Bad hvm info table\n");
+               return NULL;
+       }
+
+       table = t;
+
+       return table;
+}
+
+int
+get_vcpu_nr(void)
+{
+       struct hvm_info_table *t = get_hvm_info_table();
+       return (t ? t->nr_vcpus : 1); /* default 1 vcpu */
+}
+
+int
+get_acpi_enabled(void)
+{
+       struct hvm_info_table *t = get_hvm_info_table();
+       return (t ? t->acpi_enabled : 0); /* default no acpi */
+}
+
+
+static void *
+acpi_madt_get_madt(unsigned char *acpi_start)
+{
+       ACPI_2_0_RSDP *rsdp=NULL;
+       ACPI_2_0_RSDT *rsdt=NULL;
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
+
+       rsdp = (ACPI_2_0_RSDP *)(acpi_start + sizeof(ACPI_2_0_FACS));
+       if (rsdp->Signature != ACPI_2_0_RSDP_SIGNATURE) {
+               puts("Bad RSDP signature\n");
+               return NULL;
+       }
+
+       rsdt= (ACPI_2_0_RSDT *)
+               (acpi_start + rsdp->RsdtAddress - ACPI_PHYSICAL_ADDRESS);
+       if (rsdt->Header.Signature != ACPI_2_0_RSDT_SIGNATURE) {
+               puts("Bad RSDT signature\n");
+               return NULL;
+       }
+
+       madt = (ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *)
+               ( acpi_start+ rsdt->Entry[1] - ACPI_PHYSICAL_ADDRESS);
+       if (madt->Header.Header.Signature !=
+           ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE_SIGNATURE) {
+               puts("Bad MADT signature \n");
+               return NULL;
+       }
+
+       return madt;
+}
+
+static void
+set_checksum(void *start, int checksum_offset, int len)
+{
+       unsigned char sum = 0;
+       unsigned char *ptr;
+
+       ptr = start;
+       ptr[checksum_offset] = 0;
+       while (len--)
+               sum += *ptr++;
+
+       ptr = start;
+       ptr[checksum_offset] = -sum;
+}
+
+static int
+acpi_madt_set_local_apics(
+       int nr_vcpu,
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
+{
+       int i;
+
+       if ((nr_vcpu > MAX_VIRT_CPUS) || (nr_vcpu < 0) || !madt)
+               return -1;
+
+       for (i = 0; i < nr_vcpu; i++) {
+               madt->LocalApic[i].Type            = ACPI_PROCESSOR_LOCAL_APIC;
+               madt->LocalApic[i].Length          = sizeof 
(ACPI_LOCAL_APIC_STRUCTURE);
+               madt->LocalApic[i].AcpiProcessorId = i;
+               madt->LocalApic[i].ApicId          = i;
+               madt->LocalApic[i].Flags           = 1;
+       }
+
+       madt->Header.Header.Length =
+               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
+               (MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
+
+       return 0;
+}
+
+#define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
+
+int acpi_madt_update(unsigned char *acpi_start)
+{
+       int rc;
+       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
+
+       madt = acpi_madt_get_madt(acpi_start);
+       if (!madt)
+               return -1;
+
+       rc = acpi_madt_set_local_apics(get_vcpu_nr(), madt);
+       if (rc != 0)
+               return rc;
+
+       set_checksum(
+               madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
+               madt->Header.Header.Length);
+
+       return 0;
+}
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/hvmloader/hvmloader.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/hvmloader/hvmloader.c      Tue Jan 31 10:49:51 2006
@@ -0,0 +1,211 @@
+/*
+ * hvmloader.c: HVM ROMBIOS/VGABIOS/ACPI/VMXAssist image loader.
+ *
+ * A quicky so that we can boot rom images as if they were a Linux kernel.
+ * This code will copy the rom images (ROMBIOS/VGABIOS/VM86) into their
+ * respective spaces and transfer control to VM86 to execute the BIOSes.
+ *
+ * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include "roms.h"
+#include "../acpi/acpi2_0.h"  /* for ACPI_PHYSICAL_ADDRESS */
+
+/* memory map */
+#define VGABIOS_PHYSICAL_ADDRESS       0x000C0000
+#define        VMXASSIST_PHYSICAL_ADDRESS      0x000D0000
+#define        ROMBIOS_PHYSICAL_ADDRESS        0x000F0000
+
+/* invoke SVM's paged realmode support */
+#define SVM_VMMCALL_RESET_TO_REALMODE  0x00000001
+
+/*
+ * C runtime start off
+ */
+asm(
+"      .text                           \n"
+"      .globl  _start                  \n"
+"_start:                               \n"
+"      cld                             \n"
+"      cli                             \n"
+"      lgdt    gdt_desr                \n"
+"      movl    $stack_top, %esp        \n"
+"      movl    %esp, %ebp              \n"
+"      call    main                    \n"
+"      jmp     halt                    \n"
+"                                      \n"
+"gdt_desr:                             \n"
+"      .word   gdt_end - gdt - 1       \n"
+"      .long   gdt                     \n"
+"                                      \n"
+"      .align  8                       \n"
+"gdt:                                  \n"
+"      .quad   0x0000000000000000      \n"
+"      .quad   0x00CF92000000FFFF      \n"
+"      .quad   0x00CF9A000000FFFF      \n"
+"gdt_end:                              \n"
+"                                      \n"
+"halt:                                 \n"
+"      sti                             \n"
+"      jmp     .                       \n"
+"                                      \n"
+"      .bss                            \n"
+"      .align  8                       \n"
+"stack:                                        \n"
+"      .skip   0x4000                  \n"
+"stack_top:                            \n"
+);
+
+extern int get_acpi_enabled(void);
+extern int acpi_madt_update(unsigned char* acpi_start);
+
+static inline void
+outw(unsigned short addr, unsigned short val)
+{
+        __asm__ __volatile__ ("outw %%ax, %%dx" :: "d"(addr), "a"(val));
+}
+
+static inline void
+outb(unsigned short addr, unsigned char val)
+{
+        __asm__ __volatile__ ("outb %%al, %%dx" :: "d"(addr), "a"(val));
+}
+
+static inline unsigned char
+inb(unsigned short addr)
+{
+        unsigned char val;
+
+        __asm__ __volatile__ ("inb %w1,%0" : "=a" (val) : "Nd" (addr));
+        return val;
+}
+
+void *
+memcpy(void *dest, const void *src, unsigned n)
+{
+       int t0, t1, t2;
+
+       __asm__ __volatile__(
+               "cld\n"
+               "rep; movsl\n"
+               "testb $2,%b4\n"
+               "je 1f\n"
+               "movsw\n"
+               "1: testb $1,%b4\n"
+               "je 2f\n"
+               "movsb\n"
+               "2:"
+               : "=&c" (t0), "=&D" (t1), "=&S" (t2)
+               : "0" (n/4), "q" (n), "1" ((long) dest), "2" ((long) src)
+               : "memory"
+       );
+       return dest;
+}
+
+int
+puts(const char *s)
+{
+       while (*s)
+               outb(0xE9, *s++);
+       return 0;
+}
+
+int
+cirrus_check(void)
+{
+       outw(0x3C4, 0x9206);
+       return inb(0x3C5) == 0x12;
+}
+
+int 
+vmmcall(int edi, int esi, int edx, int ecx, int ebx)
+{
+        int eax;
+
+        __asm__ __volatile__(
+               ".byte 0x0F,0x01,0xD9"
+                : "=a" (eax)
+               : "a"(0x58454E00), /* XEN\0 key */
+                 "b"(ebx), "c"(ecx), "d"(edx), "D"(edi), "S"(esi)
+       );
+        return eax;
+}
+
+int
+check_amd(void)
+{
+       char id[12];
+
+        __asm__ __volatile__(
+               "cpuid" 
+               : "=b" (*(int *)(&id[0])),
+                 "=c" (*(int *)(&id[8])),
+                 "=d" (*(int *)(&id[4]))
+               : "a" (0)
+       );
+       return __builtin_memcmp(id, "AuthenticAMD", 12) == 0;
+}
+
+int
+main(void)
+{
+       puts("HVM Loader\n");
+
+       puts("Loading ROMBIOS ...\n");
+       memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, sizeof(rombios));
+       if (cirrus_check()) {
+               puts("Loading Cirrus VGABIOS ...\n");
+               memcpy((void *)VGABIOS_PHYSICAL_ADDRESS,
+                       vgabios_cirrusvga, sizeof(vgabios_cirrusvga));
+       } else {
+               puts("Loading Standard VGABIOS ...\n");
+               memcpy((void *)VGABIOS_PHYSICAL_ADDRESS,
+                       vgabios_stdvga, sizeof(vgabios_stdvga));
+       }
+
+       if (get_acpi_enabled() != 0) {
+               puts("Loading ACPI ...\n");
+               acpi_madt_update((unsigned char *) acpi);
+               if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
+                       /*
+                        * Make sure acpi table does not overlap rombios
+                        * currently acpi less than 8K will be OK.
+                        */
+                        memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi,
+                                                               sizeof(acpi));
+               }
+       }
+
+       if (check_amd()) {
+               /* AMD implies this is SVM */
+                puts("SVM go ...\n");
+                vmmcall(SVM_VMMCALL_RESET_TO_REALMODE, 0, 0, 0, 0);
+       } else {
+               puts("Loading VMXAssist ...\n");
+               memcpy((void *)VMXASSIST_PHYSICAL_ADDRESS,
+                               vmxassist, sizeof(vmxassist));
+
+               puts("VMX go ...\n");
+               __asm__ __volatile__(
+                       "jmp *%%eax"
+                       : : "a" (VMXASSIST_PHYSICAL_ADDRESS), "d" (0)
+               );
+       }
+
+       puts("Failed to invoke ROMBIOS\n");
+       return 0;
+}
+
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/hvmloader/mkhex
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/tools/firmware/hvmloader/mkhex    Tue Jan 31 10:49:51 2006
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+#
+# mkhex: Generate C embeddable hexdumps
+#
+# Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+# Copyright (c) 2005, International Business Machines Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place - Suite 330, Boston, MA 02111-1307 USA.
+#
+
+echo "unsigned $1[] = {"
+od -v -t x $2 | sed 's/^[0-9]* /0x/' | sed 's/ /, 0x/g' | sed 's/$/,/'
+echo "};"
+
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/xc_hvm_build.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/tools/libxc/xc_hvm_build.c        Tue Jan 31 10:49:51 2006
@@ -0,0 +1,849 @@
+/******************************************************************************
+ * xc_hvm_build.c
+ */
+
+#include <stddef.h>
+#include "xg_private.h"
+#define ELFSIZE 32
+#include "xc_elf.h"
+#include <stdlib.h>
+#include <unistd.h>
+#include <zlib.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/ioreq.h>
+
+#define HVM_LOADER_ENTR_ADDR  0x00100000
+
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#ifdef __x86_64__
+#define L3_PROT (_PAGE_PRESENT)
+#endif
+
+#define E820MAX        128
+
+#define E820_RAM          1
+#define E820_RESERVED     2
+#define E820_ACPI         3
+#define E820_NVS          4
+#define E820_IO          16
+#define E820_SHARED_PAGE 17
+#define E820_XENSTORE    18
+
+#define E820_MAP_PAGE       0x00090000
+#define E820_MAP_NR_OFFSET  0x000001E8
+#define E820_MAP_OFFSET     0x000002D0
+
+struct e820entry {
+    uint64_t addr;
+    uint64_t size;
+    uint32_t type;
+} __attribute__((packed));
+
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p)  ((_p)&PAGE_MASK)
+
+static int
+parseelfimage(
+    char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
+static int
+loadelfimage(
+    char *elfbase, int xch, uint32_t dom, unsigned long *parray,
+    struct domain_setup_info *dsi);
+
+static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
+{
+    struct e820entry *e820entry =
+        (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
+    unsigned char nr_map = 0;
+
+    /* XXX: Doesn't work for > 4GB yet */
+    e820entry[nr_map].addr = 0x0;
+    e820entry[nr_map].size = 0x9F800;
+    e820entry[nr_map].type = E820_RAM;
+    nr_map++;
+
+    e820entry[nr_map].addr = 0x9F800;
+    e820entry[nr_map].size = 0x800;
+    e820entry[nr_map].type = E820_RESERVED;
+    nr_map++;
+
+    e820entry[nr_map].addr = 0xA0000;
+    e820entry[nr_map].size = 0x20000;
+    e820entry[nr_map].type = E820_IO;
+    nr_map++;
+
+    e820entry[nr_map].addr = 0xF0000;
+    e820entry[nr_map].size = 0x10000;
+    e820entry[nr_map].type = E820_RESERVED;
+    nr_map++;
+
+#define STATIC_PAGES    2       /* for ioreq_t and store_mfn */
+    /* Most of the ram goes here */
+    e820entry[nr_map].addr = 0x100000;
+    e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
+    e820entry[nr_map].type = E820_RAM;
+    nr_map++;
+
+    /* Statically allocated special pages */
+
+    /* Shared ioreq_t page */
+    e820entry[nr_map].addr = mem_size - PAGE_SIZE;
+    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].type = E820_SHARED_PAGE;
+    nr_map++;
+
+    /* For xenstore */
+    e820entry[nr_map].addr = mem_size - 2*PAGE_SIZE;
+    e820entry[nr_map].size = PAGE_SIZE;
+    e820entry[nr_map].type = E820_XENSTORE;
+    nr_map++;
+
+    e820entry[nr_map].addr = mem_size;
+    e820entry[nr_map].size = 0x3 * PAGE_SIZE;
+    e820entry[nr_map].type = E820_NVS;
+    nr_map++;
+
+    e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
+    e820entry[nr_map].size = 0xA * PAGE_SIZE;
+    e820entry[nr_map].type = E820_ACPI;
+    nr_map++;
+
+    e820entry[nr_map].addr = 0xFEC00000;
+    e820entry[nr_map].size = 0x1400000;
+    e820entry[nr_map].type = E820_IO;
+    nr_map++;
+
+    return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
+}
+
+static void
+set_hvm_info_checksum(struct hvm_info_table *t)
+{
+    uint8_t *ptr = (uint8_t *)t, sum = 0;
+    unsigned int i;
+
+    t->checksum = 0;
+
+    for (i = 0; i < t->length; i++)
+        sum += *ptr++;
+
+    t->checksum = -sum;
+}
+
+/*
+ * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
+ * hvmloader will use this info to set BIOS accordingly
+ */
+static int set_hvm_info(int xc_handle, uint32_t dom,
+                        unsigned long *pfn_list, unsigned int vcpus,
+                        unsigned int acpi, unsigned int apic)
+{
+    char *va_map;
+    struct hvm_info_table *va_hvm;
+
+
+    va_map = xc_map_foreign_range(
+        xc_handle,
+        dom,
+        PAGE_SIZE,
+        PROT_READ|PROT_WRITE,
+        pfn_list[HVM_INFO_PFN]);
+    
+    if ( va_map == NULL )
+        return -1;
+
+    va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
+    memset(va_hvm, 0, sizeof(*va_hvm));
+    strncpy(va_hvm->signature, "HVM INFO", 8);
+    va_hvm->length       = sizeof(struct hvm_info_table);
+    va_hvm->acpi_enabled = acpi;
+    va_hvm->apic_enabled = apic;
+    va_hvm->nr_vcpus     = vcpus;
+
+    set_hvm_info_checksum(va_hvm);
+
+    munmap(va_map, PAGE_SIZE);
+
+    return 0;
+}
+
+#ifdef __i386__
+static int zap_mmio_range(int xc_handle, uint32_t dom,
+                          l2_pgentry_32_t *vl2tab,
+                          unsigned long mmio_range_start,
+                          unsigned long mmio_range_size)
+{
+    unsigned long mmio_addr;
+    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+    unsigned long vl2e;
+    l1_pgentry_32_t *vl1tab;
+
+    mmio_addr = mmio_range_start & PAGE_MASK;
+    for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
+        vl2e = vl2tab[l2_table_offset(mmio_addr)];
+        if (vl2e == 0)
+            continue;
+        vl1tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE,
+            PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+        if ( vl1tab == 0 )
+        {
+            PERROR("Failed zap MMIO range");
+            return -1;
+        }
+        vl1tab[l1_table_offset(mmio_addr)] = 0;
+        munmap(vl1tab, PAGE_SIZE);
+    }
+    return 0;
+}
+
+static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l2tab,
+                           unsigned char e820_map_nr, unsigned char *e820map)
+{
+    unsigned int i;
+    struct e820entry *e820entry = (struct e820entry *)e820map;
+
+    l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                   PROT_READ|PROT_WRITE,
+                                                   l2tab >> PAGE_SHIFT);
+    if ( vl2tab == 0 )
+        return -1;
+
+    for ( i = 0; i < e820_map_nr; i++ )
+    {
+        if ( (e820entry[i].type == E820_IO) &&
+             (zap_mmio_range(xc_handle, dom, vl2tab,
+                             e820entry[i].addr, e820entry[i].size) == -1))
+            return -1;
+    }
+
+    munmap(vl2tab, PAGE_SIZE);
+    return 0;
+}
+#else
+static int zap_mmio_range(int xc_handle, uint32_t dom,
+                          l3_pgentry_t *vl3tab,
+                          unsigned long mmio_range_start,
+                          unsigned long mmio_range_size)
+{
+    unsigned long mmio_addr;
+    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+    unsigned long vl2e = 0;
+    unsigned long vl3e;
+    l1_pgentry_t *vl1tab;
+    l2_pgentry_t *vl2tab;
+
+    mmio_addr = mmio_range_start & PAGE_MASK;
+    for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
+    {
+        vl3e = vl3tab[l3_table_offset(mmio_addr)];
+        if ( vl3e == 0 )
+            continue;
+
+        vl2tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
+        if ( vl2tab == NULL )
+        {
+            PERROR("Failed zap MMIO range");
+            return -1;
+        }
+
+        vl2e = vl2tab[l2_table_offset(mmio_addr)];
+        if ( vl2e == 0 )
+        {
+            munmap(vl2tab, PAGE_SIZE);
+            continue;
+        }
+
+        vl1tab = xc_map_foreign_range(
+            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
+        if ( vl1tab == NULL )
+        {
+            PERROR("Failed zap MMIO range");
+            munmap(vl2tab, PAGE_SIZE);
+            return -1;
+        }
+
+        vl1tab[l1_table_offset(mmio_addr)] = 0;
+        munmap(vl2tab, PAGE_SIZE);
+        munmap(vl1tab, PAGE_SIZE);
+    }
+    return 0;
+}
+
+static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l3tab,
+                           unsigned char e820_map_nr, unsigned char *e820map)
+{
+    unsigned int i;
+    struct e820entry *e820entry = (struct e820entry *)e820map;
+
+    l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                PROT_READ|PROT_WRITE,
+                                                l3tab >> PAGE_SHIFT);
+    if (vl3tab == 0)
+        return -1;
+    for ( i = 0; i < e820_map_nr; i++ ) {
+        if ( (e820entry[i].type == E820_IO) &&
+             (zap_mmio_range(xc_handle, dom, vl3tab,
+                             e820entry[i].addr, e820entry[i].size) == -1) )
+            return -1;
+    }
+    munmap(vl3tab, PAGE_SIZE);
+    return 0;
+}
+
+#endif
+
+static int setup_guest(int xc_handle,
+                       uint32_t dom, int memsize,
+                       char *image, unsigned long image_size,
+                       unsigned long nr_pages,
+                       vcpu_guest_context_t *ctxt,
+                       unsigned long shared_info_frame,
+                       unsigned int control_evtchn,
+                       unsigned int vcpus,
+                      unsigned int acpi,
+                       unsigned int apic,
+                       unsigned int store_evtchn,
+                       unsigned long *store_mfn)
+{
+    l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
+    l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
+    unsigned long *page_array = NULL;
+#ifdef __x86_64__
+    l3_pgentry_t *vl3tab=NULL;
+    unsigned long l3tab;
+#endif
+    unsigned long l2tab = 0;
+    unsigned long l1tab = 0;
+    unsigned long count, i;
+    shared_info_t *shared_info;
+    void *e820_page;
+    unsigned char e820_map_nr;
+    xc_mmu_t *mmu = NULL;
+    int rc;
+
+    unsigned long nr_pt_pages;
+    unsigned long ppt_alloc;
+
+    struct domain_setup_info dsi;
+    unsigned long vpt_start;
+    unsigned long vpt_end;
+    unsigned long v_end;
+
+    unsigned long shared_page_frame = 0;
+    shared_iopage_t *sp;
+
+    memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+    if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
+        goto error_out;
+
+    if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
+    {
+        PERROR("Guest OS must load to a page boundary.\n");
+        goto error_out;
+    }
+
+    /* memsize is in megabytes */
+    v_end              = (unsigned long)memsize << 20;
+
+#ifdef __i386__
+    nr_pt_pages = 1 + ((memsize + 3) >> 2);
+#else
+    nr_pt_pages = 5 + ((memsize + 1) >> 1);
+#endif
+    vpt_start   = v_end;
+    vpt_end     = vpt_start + (nr_pt_pages * PAGE_SIZE);
+
+    printf("VIRTUAL MEMORY ARRANGEMENT:\n"
+           " Loaded HVM loader: %08lx->%08lx\n"
+           " Page tables:   %08lx->%08lx\n"
+           " TOTAL:         %08lx->%08lx\n",
+           dsi.v_kernstart, dsi.v_kernend,
+           vpt_start, vpt_end,
+           dsi.v_start, v_end);
+    printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
+
+    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
+    {
+        ERROR("Initial guest OS requires too much space\n"
+               "(%luMB is greater than %luMB limit)\n",
+               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
+        goto error_out;
+    }
+
+    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
+    {
+        PERROR("Could not allocate memory");
+        goto error_out;
+    }
+
+    if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
+    {
+        PERROR("Could not get the page frame list");
+        goto error_out;
+    }
+
+    loadelfimage(image, xc_handle, dom, page_array, &dsi);
+
+    if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
+        goto error_out;
+
+    /* First allocate page for page dir or pdpt */
+    ppt_alloc = vpt_start >> PAGE_SHIFT;
+    if ( page_array[ppt_alloc] > 0xfffff )
+    {
+        unsigned long nmfn;
+        nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
+        if ( nmfn == 0 )
+        {
+            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
+            goto error_out;
+        }
+        page_array[ppt_alloc] = nmfn;
+    }
+
+#ifdef __i386__
+    l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+    ctxt->ctrlreg[3] = l2tab;
+
+    if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                        PROT_READ|PROT_WRITE,
+                                        l2tab >> PAGE_SHIFT)) == NULL )
+        goto error_out;
+    memset(vl2tab, 0, PAGE_SIZE);
+    vl2e = &vl2tab[l2_table_offset(0)];
+    for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
+    {
+        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+        {
+            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+            if ( vl1tab != NULL )
+                munmap(vl1tab, PAGE_SIZE);
+            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                PROT_READ|PROT_WRITE,
+                                                l1tab >> PAGE_SHIFT)) == NULL )
+            {
+                munmap(vl2tab, PAGE_SIZE);
+                goto error_out;
+            }
+            memset(vl1tab, 0, PAGE_SIZE);
+            vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
+            *vl2e++ = l1tab | L2_PROT;
+        }
+
+        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        vl1e++;
+    }
+    munmap(vl1tab, PAGE_SIZE);
+    munmap(vl2tab, PAGE_SIZE);
+#else
+    l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+    ctxt->ctrlreg[3] = l3tab;
+
+    if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                        PROT_READ|PROT_WRITE,
+                                        l3tab >> PAGE_SHIFT)) == NULL )
+        goto error_out;
+    memset(vl3tab, 0, PAGE_SIZE);
+
+    /* Fill in every PDPT entry. */
+    for ( i = 0; i < L3_PAGETABLE_ENTRIES_PAE; i++ )
+    {
+        l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+        if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                            PROT_READ|PROT_WRITE,
+                                            l2tab >> PAGE_SHIFT)) == NULL )
+            goto error_out;
+        memset(vl2tab, 0, PAGE_SIZE);
+        munmap(vl2tab, PAGE_SIZE);
+        vl2tab = NULL;
+        vl3tab[i] = l2tab | L3_PROT;
+    }
+
+    for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
+    {
+        if ( !(count & ((1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)) - 1)) 
)
+        {
+            l2tab = vl3tab[count >> (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]
+                    & PAGE_MASK;
+
+            if (vl2tab != NULL)
+                munmap(vl2tab, PAGE_SIZE);
+
+            if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                PROT_READ|PROT_WRITE,
+                                                l2tab >> PAGE_SHIFT)) == NULL )
+                goto error_out;
+
+            vl2e = &vl2tab[l2_table_offset(count << PAGE_SHIFT)];
+        }
+        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+        {
+            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+            if ( vl1tab != NULL )
+                munmap(vl1tab, PAGE_SIZE);
+            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                                PROT_READ|PROT_WRITE,
+                                                l1tab >> PAGE_SHIFT)) == NULL )
+            {
+                munmap(vl2tab, PAGE_SIZE);
+                goto error_out;
+            }
+            memset(vl1tab, 0, PAGE_SIZE);
+            vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
+            *vl2e++ = l1tab | L2_PROT;
+        }
+
+        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        vl1e++;
+    }
+
+    munmap(vl1tab, PAGE_SIZE);
+    munmap(vl2tab, PAGE_SIZE);
+    munmap(vl3tab, PAGE_SIZE);
+#endif
+    /* Write the machine->phys table entries. */
+    for ( count = 0; count < nr_pages; count++ )
+    {
+        if ( xc_add_mmu_update(xc_handle, mmu,
+                               (page_array[count] << PAGE_SHIFT) |
+                               MMU_MACHPHYS_UPDATE, count) )
+            goto error_out;
+    }
+
+    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
+        fprintf(stderr, "Couldn't set hvm info for HVM guest.\n");
+        goto error_out;
+    }
+
+    if ( (e820_page = xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
+        goto error_out;
+    memset(e820_page, 0, PAGE_SIZE);
+    e820_map_nr = build_e820map(e820_page, v_end);
+#if defined (__i386__)
+    if (zap_mmio_ranges(xc_handle, dom, l2tab, e820_map_nr,
+                        ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
+#else
+    if (zap_mmio_ranges(xc_handle, dom, l3tab, e820_map_nr,
+                        ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
+#endif
+        goto error_out;
+    munmap(e820_page, PAGE_SIZE);
+
+    /* shared_info page starts its life empty. */
+    if ( (shared_info = xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         shared_info_frame)) == 0 )
+        goto error_out;
+    memset(shared_info, 0, sizeof(shared_info_t));
+    /* Mask all upcalls... */
+    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+        shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    munmap(shared_info, PAGE_SIZE);
+
+    /* Populate the event channel port in the shared page */
+    shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
+    if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
+         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
+         shared_page_frame)) == 0 )
+        goto error_out;
+    memset(sp, 0, PAGE_SIZE);
+    sp->sp_global.eport = control_evtchn;
+    munmap(sp, PAGE_SIZE);
+
+    *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
+    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
+        goto error_out;
+
+    /* Send the page update requests down to the hypervisor. */
+    if ( xc_finish_mmu_updates(xc_handle, mmu) )
+        goto error_out;
+
+    free(mmu);
+    free(page_array);
+
+    /*
+     * Initial register values:
+     */
+    ctxt->user_regs.ds = 0;
+    ctxt->user_regs.es = 0;
+    ctxt->user_regs.fs = 0;
+    ctxt->user_regs.gs = 0;
+    ctxt->user_regs.ss = 0;
+    ctxt->user_regs.cs = 0;
+    ctxt->user_regs.eip = dsi.v_kernentry;
+    ctxt->user_regs.edx = 0;
+    ctxt->user_regs.eax = 0;
+    ctxt->user_regs.esp = 0;
+    ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot 
cpu */
+    ctxt->user_regs.ecx = 0;
+    ctxt->user_regs.esi = 0;
+    ctxt->user_regs.edi = 0;
+    ctxt->user_regs.ebp = 0;
+
+    ctxt->user_regs.eflags = 0;
+
+    return 0;
+
+ error_out:
+    free(mmu);
+    free(page_array);
+    return -1;
+}
+
+int xc_hvm_build(int xc_handle,
+                 uint32_t domid,
+                 int memsize,
+                 const char *image_name,
+                 unsigned int control_evtchn,
+                 unsigned int vcpus,
+                unsigned int acpi,
+                 unsigned int apic,
+                 unsigned int store_evtchn,
+                 unsigned long *store_mfn)
+{
+    dom0_op_t launch_op, op;
+    int rc, i;
+    vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
+    unsigned long nr_pages;
+    char         *image = NULL;
+    unsigned long image_size;
+    xen_capabilities_info_t xen_caps;
+
+    if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
+    {
+        PERROR("Failed to get xen version info");
+        goto error_out;
+    }
+
+    if ( !strstr(xen_caps, "hvm") )
+    {
+       PERROR("CPU doesn't support HVM extensions or "
+              "the extensions are not enabled");
+        goto error_out;
+    }
+
+    if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
+    {
+        PERROR("Could not find total pages for domain");
+        goto error_out;
+    }
+
+    if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
+        goto error_out;
+
+    if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
+    {
+        PERROR("%s: ctxt mlock failed", __func__);
+        return 1;
+    }
+
+    op.cmd = DOM0_GETDOMAININFO;
+    op.u.getdomaininfo.domain = (domid_t)domid;
+    if ( (xc_dom0_op(xc_handle, &op) < 0) ||
+         ((uint16_t)op.u.getdomaininfo.domain != domid) )
+    {
+        PERROR("Could not get info on domain");
+        goto error_out;
+    }
+
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->flags = VGCF_HVM_GUEST;
+    if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
+                     ctxt, op.u.getdomaininfo.shared_info_frame, 
control_evtchn,
+                     vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
+    {
+        ERROR("Error constructing guest OS");
+        goto error_out;
+    }
+
+    free(image);
+
+    /* FPU is set up to default initial state. */
+    memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+
+    /* Virtual IDT is empty at start-of-day. */
+    for ( i = 0; i < 256; i++ )
+    {
+        ctxt->trap_ctxt[i].vector = i;
+        ctxt->trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+    }
+
+    /* No LDT. */
+    ctxt->ldt_ents = 0;
+
+    /* Use the default Xen-provided GDT. */
+    ctxt->gdt_ents = 0;
+
+    /* No debugging. */
+    memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
+
+    /* No callback handlers. */
+#if defined(__i386__)
+    ctxt->event_callback_cs     = FLAT_KERNEL_CS;
+    ctxt->event_callback_eip    = 0;
+    ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
+    ctxt->failsafe_callback_eip = 0;
+#elif defined(__x86_64__)
+    ctxt->event_callback_eip    = 0;
+    ctxt->failsafe_callback_eip = 0;
+    ctxt->syscall_callback_eip  = 0;
+#endif
+
+    memset( &launch_op, 0, sizeof(launch_op) );
+
+    launch_op.u.setvcpucontext.domain = (domid_t)domid;
+    launch_op.u.setvcpucontext.vcpu   = 0;
+    launch_op.u.setvcpucontext.ctxt   = ctxt;
+
+    launch_op.cmd = DOM0_SETVCPUCONTEXT;
+    rc = xc_dom0_op(xc_handle, &launch_op);
+
+    return rc;
+
+ error_out:
+    free(image);
+    return -1;
+}
+
+static inline int is_loadable_phdr(Elf32_Phdr *phdr)
+{
+    return ((phdr->p_type == PT_LOAD) &&
+            ((phdr->p_flags & (PF_W|PF_X)) != 0));
+}
+
+static int parseelfimage(char *elfbase,
+                         unsigned long elfsize,
+                         struct domain_setup_info *dsi)
+{
+    Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
+    Elf32_Phdr *phdr;
+    Elf32_Shdr *shdr;
+    unsigned long kernstart = ~0UL, kernend=0UL;
+    char *shstrtab;
+    int h;
+
+    if ( !IS_ELF(*ehdr) )
+    {
+        ERROR("Kernel image does not have an ELF header.");
+        return -EINVAL;
+    }
+
+    if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
+    {
+        ERROR("ELF program headers extend beyond end of image.");
+        return -EINVAL;
+    }
+
+    if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
+    {
+        ERROR("ELF section headers extend beyond end of image.");
+        return -EINVAL;
+    }
+
+    /* Find the section-header strings table. */
+    if ( ehdr->e_shstrndx == SHN_UNDEF )
+    {
+        ERROR("ELF image has no section-header strings table (shstrtab).");
+        return -EINVAL;
+    }
+    shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
+                          (ehdr->e_shstrndx*ehdr->e_shentsize));
+    shstrtab = elfbase + shdr->sh_offset;
+
+    for ( h = 0; h < ehdr->e_phnum; h++ )
+    {
+        phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
+        if ( !is_loadable_phdr(phdr) )
+            continue;
+        if ( phdr->p_paddr < kernstart )
+            kernstart = phdr->p_paddr;
+        if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
+            kernend = phdr->p_paddr + phdr->p_memsz;
+    }
+
+    if ( (kernstart > kernend) ||
+         (ehdr->e_entry < kernstart) ||
+         (ehdr->e_entry > kernend) )
+    {
+        ERROR("Malformed ELF image.");
+        return -EINVAL;
+    }
+
+    dsi->v_start = 0x00000000;
+
+    dsi->v_kernstart = kernstart;
+    dsi->v_kernend   = kernend;
+    dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
+
+    dsi->v_end       = dsi->v_kernend;
+
+    return 0;
+}
+
+static int
+loadelfimage(
+    char *elfbase, int xch, uint32_t dom, unsigned long *parray,
+    struct domain_setup_info *dsi)
+{
+    Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
+    Elf32_Phdr *phdr;
+    int h;
+
+    char         *va;
+    unsigned long pa, done, chunksz;
+
+    for ( h = 0; h < ehdr->e_phnum; h++ )
+    {
+        phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
+        if ( !is_loadable_phdr(phdr) )
+            continue;
+
+        for ( done = 0; done < phdr->p_filesz; done += chunksz )
+        {
+            pa = (phdr->p_paddr + done) - dsi->v_start;
+            if ((va = xc_map_foreign_range(
+                xch, dom, PAGE_SIZE, PROT_WRITE,
+                parray[pa >> PAGE_SHIFT])) == 0)
+                return -1;
+            chunksz = phdr->p_filesz - done;
+            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+            memcpy(va + (pa & (PAGE_SIZE-1)),
+                   elfbase + phdr->p_offset + done, chunksz);
+            munmap(va, PAGE_SIZE);
+        }
+
+        for ( ; done < phdr->p_memsz; done += chunksz )
+        {
+            pa = (phdr->p_paddr + done) - dsi->v_start;
+            if ((va = xc_map_foreign_range(
+                xch, dom, PAGE_SIZE, PROT_WRITE,
+                parray[pa >> PAGE_SHIFT])) == 0)
+                return -1;
+            chunksz = phdr->p_memsz - done;
+            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
+                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
+            memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
+            munmap(va, PAGE_SIZE);
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/hvm.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/hvm.c    Tue Jan 31 10:49:51 2006
@@ -0,0 +1,308 @@
+/*
+ * hvm.c: Common hardware virtual machine abstractions.
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <xen/softirq.h>
+#include <xen/domain_page.h>
+#include <asm/current.h>
+#include <asm/io.h>
+#include <asm/shadow.h>
+#include <asm/regs.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/spinlock.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/shadow.h>
+#if CONFIG_PAGING_LEVELS >= 3
+#include <asm/shadow_64.h>
+#endif
+#include <public/sched.h>
+#include <public/hvm/ioreq.h>
+#include <public/hvm/hvm_info_table.h>
+
+int hvm_enabled = 0;
+int hvm_switch_on = 0;
+
+unsigned int opt_hvm_debug_level = 0;
+integer_param("hvm_debug", opt_hvm_debug_level);
+
+struct hvm_function_table hvm_funcs;
+
+static void hvm_map_io_shared_page(struct domain *d)
+{
+    int i;
+    unsigned char e820_map_nr;
+    struct e820entry *e820entry;
+    unsigned char *p;
+    unsigned long mpfn;
+    unsigned long gpfn = 0;
+
+    local_flush_tlb_pge();
+
+    mpfn = get_mfn_from_pfn(E820_MAP_PAGE >> PAGE_SHIFT);
+    if (mpfn == INVALID_MFN) {
+        printk("Can not find E820 memory map page for HVM domain.\n");
+        domain_crash_synchronous();
+    }
+
+    p = map_domain_page(mpfn);
+    if (p == NULL) {
+        printk("Can not map E820 memory map page for HVM domain.\n");
+        domain_crash_synchronous();
+    }
+
+    e820_map_nr = *(p + E820_MAP_NR_OFFSET);
+    e820entry = (struct e820entry *)(p + E820_MAP_OFFSET);
+
+    for ( i = 0; i < e820_map_nr; i++ )
+    {
+        if (e820entry[i].type == E820_SHARED_PAGE)
+        {
+            gpfn = (e820entry[i].addr >> PAGE_SHIFT);
+            break;
+        }
+    }
+
+    if ( gpfn == 0 ) {
+        printk("Can not get io request shared page"
+               " from E820 memory map for HVM domain.\n");
+        unmap_domain_page(p);
+        domain_crash_synchronous();
+    }
+    unmap_domain_page(p);
+
+    /* Initialise shared page */
+    mpfn = get_mfn_from_pfn(gpfn);
+    if (mpfn == INVALID_MFN) {
+        printk("Can not find io request shared page for HVM domain.\n");
+        domain_crash_synchronous();
+    }
+
+    p = map_domain_page_global(mpfn);
+    if (p == NULL) {
+        printk("Can not map io request shared page for HVM domain.\n");
+        domain_crash_synchronous();
+    }
+    d->arch.hvm_domain.shared_page_va = (unsigned long)p;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "eport: %x\n", iopacket_port(d));
+
+    clear_bit(iopacket_port(d),
+              &d->shared_info->evtchn_mask[0]);
+}
+
+static int validate_hvm_info(struct hvm_info_table *t)
+{
+    char signature[] = "HVM INFO";
+    uint8_t *ptr = (uint8_t *)t;
+    uint8_t sum = 0;
+    int i;
+
+    /* strncmp(t->signature, "HVM INFO", 8) */
+    for ( i = 0; i < 8; i++ ) {
+        if ( signature[i] != t->signature[i] ) {
+            printk("Bad hvm info signature\n");
+            return 0;
+        }
+    }
+
+    for ( i = 0; i < t->length; i++ )
+        sum += ptr[i];
+
+    return (sum == 0);
+}
+
+static void hvm_get_info(struct domain *d)
+{
+    unsigned char *p;
+    unsigned long mpfn;
+    struct hvm_info_table *t;
+
+    mpfn = get_mfn_from_pfn(HVM_INFO_PFN);
+    if ( mpfn == INVALID_MFN ) {
+        printk("Can not get info page mfn for HVM domain.\n");
+        domain_crash_synchronous();
+    }
+
+    p = map_domain_page(mpfn);
+    if ( p == NULL ) {
+        printk("Can not map info page for HVM domain.\n");
+        domain_crash_synchronous();
+    }
+
+    t = (struct hvm_info_table *)(p + HVM_INFO_OFFSET);
+
+    if ( validate_hvm_info(t) ) {
+        d->arch.hvm_domain.nr_vcpus = t->nr_vcpus;
+        d->arch.hvm_domain.apic_enabled = t->apic_enabled;
+    } else {
+        printk("Bad hvm info table\n");
+        d->arch.hvm_domain.nr_vcpus = 1;
+        d->arch.hvm_domain.apic_enabled = 0;
+    }
+
+    unmap_domain_page(p);
+}
+
+void hvm_setup_platform(struct domain* d)
+{
+    struct hvm_domain *platform;
+
+    if (!(HVM_DOMAIN(current) && (current->vcpu_id == 0)))
+        return;
+
+    hvm_map_io_shared_page(d);
+    hvm_get_info(d);
+
+    platform = &d->arch.hvm_domain;
+    pic_init(&platform->vpic, pic_irq_request, &platform->interrupt_request);
+    register_pic_io_hook();
+
+    if ( hvm_apic_support(d) ) {
+        spin_lock_init(&d->arch.hvm_domain.round_robin_lock);
+        hvm_vioapic_init(d);
+    }
+}
+
+void pic_irq_request(int *interrupt_request, int level)
+{
+    if (level)
+        *interrupt_request = 1;
+    else
+        *interrupt_request = 0;
+}
+
+void hvm_pic_assist(struct vcpu *v)
+{
+    global_iodata_t *spg;
+    u16   *virq_line, irqs;
+    struct hvm_virpic *pic = &v->domain->arch.hvm_domain.vpic;
+    
+    spg = &get_sp(v->domain)->sp_global;
+    virq_line  = &spg->pic_clear_irr;
+    if ( *virq_line ) {
+        do {
+            irqs = *(volatile u16*)virq_line;
+        } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
+        do_pic_irqs_clear(pic, irqs);
+    }
+    virq_line  = &spg->pic_irr;
+    if ( *virq_line ) {
+        do {
+            irqs = *(volatile u16*)virq_line;
+        } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
+        do_pic_irqs(pic, irqs);
+    }
+}
+
+int cpu_get_interrupt(struct vcpu *v, int *type)
+{
+    int intno;
+    struct hvm_virpic *s = &v->domain->arch.hvm_domain.vpic;
+
+    if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) {
+        /* set irq request if a PIC irq is still pending */
+        /* XXX: improve that */
+        pic_update_irq(s);
+        return intno;
+    }
+    /* read the irq from the PIC */
+    if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 )
+        return intno;
+
+    return -1;
+}
+
+/*
+ * Copy from/to guest virtual.
+ */
+int
+hvm_copy(void *buf, unsigned long vaddr, int size, int dir)
+{
+    unsigned long gpa, mfn;
+    char *addr;
+    int count;
+
+    while (size > 0) {
+        count = PAGE_SIZE - (vaddr & ~PAGE_MASK);
+        if (count > size)
+            count = size;
+
+        if (hvm_paging_enabled(current)) {
+            gpa = gva_to_gpa(vaddr);
+            mfn = get_mfn_from_pfn(gpa >> PAGE_SHIFT);
+        } else
+            mfn = get_mfn_from_pfn(vaddr >> PAGE_SHIFT);
+        if (mfn == INVALID_MFN)
+            return 0;
+
+        addr = (char *)map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
+
+        if (dir == HVM_COPY_IN)
+            memcpy(buf, addr, count);
+        else
+            memcpy(addr, buf, count);
+
+        unmap_domain_page(addr);
+
+        vaddr += count;
+        buf += count;
+        size -= count;
+    }
+
+    return 1;
+}
+
+/*
+ * HVM specific printbuf. Mostly used for hvmloader chit-chat.
+ */
+void hvm_print_line(struct vcpu *v, const char c)
+{
+    int *index = &v->domain->arch.hvm_domain.pbuf_index;
+    char *pbuf = v->domain->arch.hvm_domain.pbuf;
+
+    if (*index == HVM_PBUF_SIZE-2 || c == '\n') {
+        if (*index == HVM_PBUF_SIZE-2)
+           pbuf[(*index)++] = c;
+        pbuf[*index] = '\0';
+        printk("(GUEST: %u) %s\n", v->domain->domain_id, pbuf);
+       *index = 0;
+    } else
+       pbuf[(*index)++] = c;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/i8259.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/i8259.c  Tue Jan 31 10:49:51 2006
@@ -0,0 +1,548 @@
+/*
+ * QEMU 8259 interrupt controller emulation
+ * 
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2005 Intel Corperation
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/xmalloc.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+#include <asm/current.h>
+
+/* set irq level. If an edge is detected, then the IRR is set to 1 */
+static inline void pic_set_irq1(PicState *s, int irq, int level)
+{
+    int mask;
+    mask = 1 << irq;
+    if (s->elcr & mask) {
+        /* level triggered */
+        if (level) {
+            s->irr |= mask;
+            s->last_irr |= mask;
+        } else {
+            s->irr &= ~mask;
+            s->last_irr &= ~mask;
+        }
+    } else {
+        /* edge triggered */
+        if (level) {
+            if ((s->last_irr & mask) == 0) {
+                s->irr |= mask;
+           }
+            s->last_irr |= mask;
+        } else {
+            s->last_irr &= ~mask;
+        }
+    }
+}
+
+/* return the highest priority found in mask (highest = smallest
+   number). Return 8 if no irq */
+static inline int get_priority(PicState *s, int mask)
+{
+    int priority;
+    if (mask == 0)
+        return 8;
+    priority = 0;
+    while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
+        priority++;
+    return priority;
+}
+
+/* return the pic wanted interrupt. return -1 if none */
+static int pic_get_irq(PicState *s)
+{
+    int mask, cur_priority, priority;
+
+    mask = s->irr & ~s->imr;
+    priority = get_priority(s, mask);
+    if (priority == 8)
+        return -1;
+    /* compute current priority. If special fully nested mode on the
+       master, the IRQ coming from the slave is not taken into account
+       for the priority computation. */
+    mask = s->isr;
+    if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
+        mask &= ~(1 << 2);
+    cur_priority = get_priority(s, mask);
+    if (priority < cur_priority) {
+        /* higher priority found: an irq should be generated */
+        return (priority + s->priority_add) & 7;
+    } else {
+        return -1;
+    }
+}
+
+/* raise irq to CPU if necessary. must be called every time the active
+   irq may change */
+/* XXX: should not export it, but it is needed for an APIC kludge */
+void pic_update_irq(struct hvm_virpic *s)
+{
+    int irq2, irq;
+
+    /* first look at slave pic */
+    irq2 = pic_get_irq(&s->pics[1]);
+    if (irq2 >= 0) {
+        /* if irq request by slave pic, signal master PIC */
+        pic_set_irq1(&s->pics[0], 2, 1);
+        pic_set_irq1(&s->pics[0], 2, 0);
+    }
+    /* look at requested irq */
+    irq = pic_get_irq(&s->pics[0]);
+    if (irq >= 0) {
+        s->irq_request(s->irq_request_opaque, 1);
+    }
+}
+
+void pic_set_irq_new(void *opaque, int irq, int level)
+{
+    struct hvm_virpic *s = opaque;
+
+    hvm_vioapic_set_irq(current->domain, irq, level);
+    pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+    /* used for IOAPIC irqs */
+    if (s->alt_irq_func)
+        s->alt_irq_func(s->alt_irq_opaque, irq, level);
+    pic_update_irq(s);
+}
+
+void do_pic_irqs (struct hvm_virpic *s, uint16_t irqs)
+{
+    s->pics[1].irr |= (uint8_t)(irqs >> 8);
+    s->pics[0].irr |= (uint8_t) irqs;
+    hvm_vioapic_do_irqs(current->domain, irqs);
+    pic_update_irq(s);
+}
+
+void do_pic_irqs_clear (struct hvm_virpic *s, uint16_t irqs)
+{
+    s->pics[1].irr &= ~(uint8_t)(irqs >> 8);
+    s->pics[0].irr &= ~(uint8_t) irqs;
+    hvm_vioapic_do_irqs_clear(current->domain, irqs);
+    pic_update_irq(s);
+}
+
+/* obsolete function */
+void pic_set_irq(struct hvm_virpic *isa_pic, int irq, int level)
+{
+    pic_set_irq_new(isa_pic, irq, level);
+}
+
+/* acknowledge interrupt 'irq' */
+static inline void pic_intack(PicState *s, int irq)
+{
+    if (s->auto_eoi) {
+        if (s->rotate_on_auto_eoi)
+            s->priority_add = (irq + 1) & 7;
+    } else {
+        s->isr |= (1 << irq);
+    }
+    /* We don't clear a level sensitive interrupt here */
+    if (!(s->elcr & (1 << irq)))
+        s->irr &= ~(1 << irq);
+}
+
+int pic_read_irq(struct hvm_virpic *s)
+{
+    int irq, irq2, intno;
+
+    irq = pic_get_irq(&s->pics[0]);
+    if (irq >= 0) {
+        pic_intack(&s->pics[0], irq);
+        if (irq == 2) {
+            irq2 = pic_get_irq(&s->pics[1]);
+            if (irq2 >= 0) {
+                pic_intack(&s->pics[1], irq2);
+            } else {
+                /* spurious IRQ on slave controller */
+                irq2 = 7;
+            }
+            intno = s->pics[1].irq_base + irq2;
+            irq = irq2 + 8;
+        } else {
+            intno = s->pics[0].irq_base + irq;
+        }
+    } else {
+        /* spurious IRQ on host controller */
+        printk("spurious IRQ irq got=%d\n",irq);
+        irq = 7;
+        intno = s->pics[0].irq_base + irq;
+    }
+    pic_update_irq(s);
+        
+    return intno;
+}
+
+static void update_shared_irr(struct hvm_virpic *s, PicState *c)
+{
+    uint8_t *pl, *pe;
+
+    get_sp(current->domain)->sp_global.pic_elcr = 
+               s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
+    pl =(uint8_t*)&get_sp(current->domain)->sp_global.pic_last_irr;
+    pe =(uint8_t*)&get_sp(current->domain)->sp_global.pic_elcr;
+    if ( c == &s->pics[0] ) {
+         *pl = c->last_irr;
+         *pe = c->elcr;
+    }
+    else {
+         *(pl+1) = c->last_irr;
+         *(pe+1) = c->elcr;
+    }
+}
+
+static void pic_reset(void *opaque)
+{
+    PicState *s = opaque;
+
+    s->last_irr = 0;
+    s->irr = 0;
+    s->imr = 0;
+    s->isr = 0;
+    s->priority_add = 0;
+    s->irq_base = 0;
+    s->read_reg_select = 0;
+    s->poll = 0;
+    s->special_mask = 0;
+    s->init_state = 0;
+    s->auto_eoi = 0;
+    s->rotate_on_auto_eoi = 0;
+    s->special_fully_nested_mode = 0;
+    s->init4 = 0;
+    s->elcr = 0;
+}
+
+static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PicState *s = opaque;
+    int priority, cmd, irq;
+
+    addr &= 1;
+    if (addr == 0) {
+        if (val & 0x10) {
+            /* init */
+            pic_reset(s);
+            update_shared_irr(s->pics_state, s);
+            /* deassert a pending interrupt */
+            s->pics_state->irq_request(s->pics_state->irq_request_opaque, 0);
+            s->init_state = 1;
+            s->init4 = val & 1;
+            if (val & 0x02)
+                hw_error("single mode not supported");
+            if (val & 0x08)
+                hw_error("level sensitive irq not supported");
+        } else if (val & 0x08) {
+            if (val & 0x04)
+                s->poll = 1;
+            if (val & 0x02)
+                s->read_reg_select = val & 1;
+            if (val & 0x40)
+                s->special_mask = (val >> 5) & 1;
+        } else {
+            cmd = val >> 5;
+            switch(cmd) {
+            case 0:
+            case 4:
+                s->rotate_on_auto_eoi = cmd >> 2;
+                break;
+            case 1: /* end of interrupt */
+            case 5:
+                priority = get_priority(s, s->isr);
+                if (priority != 8) {
+                    irq = (priority + s->priority_add) & 7;
+                    s->isr &= ~(1 << irq);
+                    if (cmd == 5)
+                        s->priority_add = (irq + 1) & 7;
+                    pic_update_irq(s->pics_state);
+                }
+                break;
+            case 3:
+                irq = val & 7;
+                s->isr &= ~(1 << irq);
+                pic_update_irq(s->pics_state);
+                break;
+            case 6:
+                s->priority_add = (val + 1) & 7;
+                pic_update_irq(s->pics_state);
+                break;
+            case 7:
+                irq = val & 7;
+                s->isr &= ~(1 << irq);
+                s->priority_add = (irq + 1) & 7;
+                pic_update_irq(s->pics_state);
+                break;
+            default:
+                /* no operation */
+                break;
+            }
+        }
+    } else {
+        switch(s->init_state) {
+        case 0:
+            /* normal mode */
+            s->imr = val;
+            pic_update_irq(s->pics_state);
+            break;
+        case 1:
+            s->irq_base = val & 0xf8;
+            s->init_state = 2;
+            break;
+        case 2:
+            if (s->init4) {
+                s->init_state = 3;
+            } else {
+                s->init_state = 0;
+            }
+            break;
+        case 3:
+            s->special_fully_nested_mode = (val >> 4) & 1;
+            s->auto_eoi = (val >> 1) & 1;
+            s->init_state = 0;
+            break;
+        }
+    }
+}
+
+static uint32_t pic_poll_read (PicState *s, uint32_t addr1)
+{
+    int ret;
+
+    ret = pic_get_irq(s);
+    if (ret >= 0) {
+        if (addr1 >> 7) {
+            s->pics_state->pics[0].isr &= ~(1 << 2);
+            s->pics_state->pics[0].irr &= ~(1 << 2);
+        }
+        s->irr &= ~(1 << ret);
+        s->isr &= ~(1 << ret);
+        if (addr1 >> 7 || ret != 2)
+            pic_update_irq(s->pics_state);
+    } else {
+        ret = 0x07;
+        pic_update_irq(s->pics_state);
+    }
+
+    return ret;
+}
+
+static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
+{
+    PicState *s = opaque;
+    unsigned int addr;
+    int ret;
+
+    addr = addr1;
+    addr &= 1;
+    if (s->poll) {
+        ret = pic_poll_read(s, addr1);
+        s->poll = 0;
+    } else {
+        if (addr == 0) {
+            if (s->read_reg_select)
+                ret = s->isr;
+            else
+                ret = s->irr;
+        } else {
+            ret = s->imr;
+        }
+    }
+    return ret;
+}
+
+/* memory mapped interrupt status */
+/* XXX: may be the same than pic_read_irq() */
+uint32_t pic_intack_read(struct hvm_virpic *s)
+{
+    int ret;
+
+    ret = pic_poll_read(&s->pics[0], 0x00);
+    if (ret == 2)
+        ret = pic_poll_read(&s->pics[1], 0x80) + 8;
+    /* Prepare for ISR read */
+    s->pics[0].read_reg_select = 1;
+    
+    return ret;
+}
+
+static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PicState *s = opaque;
+    s->elcr = val & s->elcr_mask;
+}
+
+static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1)
+{
+    PicState *s = opaque;
+    return s->elcr;
+}
+
+/* XXX: add generic master/slave system */
+static void pic_init1(int io_addr, int elcr_addr, PicState *s)
+{
+    pic_reset(s);
+}
+
+void pic_init(struct hvm_virpic *s, void (*irq_request)(), 
+              void *irq_request_opaque)
+{
+    memset(s, 0, sizeof(*s));
+    pic_init1(0x20, 0x4d0, &s->pics[0]);
+    pic_init1(0xa0, 0x4d1, &s->pics[1]);
+    s->pics[0].elcr_mask = 0xf8;
+    s->pics[1].elcr_mask = 0xde;
+    s->irq_request = irq_request;
+    s->irq_request_opaque = irq_request_opaque;
+    s->pics[0].pics_state = s;
+    s->pics[1].pics_state = s;
+    return; 
+}
+
+void pic_set_alt_irq_func(struct hvm_virpic *s, void (*alt_irq_func)(),
+                          void *alt_irq_opaque)
+{
+    s->alt_irq_func = alt_irq_func;
+    s->alt_irq_opaque = alt_irq_opaque;
+}
+
+static int intercept_pic_io(ioreq_t *p)
+{
+    struct hvm_virpic  *pic;
+    struct vcpu *v = current;
+    uint32_t data;
+    
+    if ( p->size != 1 || p->count != 1) {
+        printk("PIC_IO wrong access size %d!\n", (int)p->size);
+        return 1;
+    }
+    pic = &v->domain->arch.hvm_domain.vpic;
+    if ( p->dir == 0 ) {
+        if(p->pdata_valid) 
+            hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
+        else
+            data = p->u.data;
+        pic_ioport_write((void*)&pic->pics[p->addr>>7],
+                (uint32_t) p->addr, (uint32_t) (data & 0xff));
+    }
+    else {
+        data = pic_ioport_read(
+            (void*)&pic->pics[p->addr>>7], (uint32_t) p->addr);
+        if(p->pdata_valid) 
+            hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_OUT);
+        else 
+            p->u.data = (u64)data;
+    }
+    return 1;
+}
+
+static int intercept_elcr_io(ioreq_t *p)
+{
+    struct hvm_virpic  *s;
+    struct vcpu *v = current;
+    uint32_t data;
+    
+    if ( p->size != 1 || p->count != 1 ) {
+        printk("PIC_IO wrong access size %d!\n", (int)p->size);
+        return 1;
+    }
+
+    s = &v->domain->arch.hvm_domain.vpic;
+    if ( p->dir == 0 ) {
+        if(p->pdata_valid) 
+            hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
+        else
+            data = p->u.data;
+        elcr_ioport_write((void*)&s->pics[p->addr&1],
+                (uint32_t) p->addr, (uint32_t)( data & 0xff));
+       get_sp(current->domain)->sp_global.pic_elcr = 
+            s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
+    }
+    else {
+        data = (u64) elcr_ioport_read(
+                (void*)&s->pics[p->addr&1], (uint32_t) p->addr);
+        if(p->pdata_valid) 
+            hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_OUT);
+        else 
+            p->u.data = (u64)data;
+
+    }
+    return 1;
+}
+void register_pic_io_hook (void)
+{
+    register_portio_handler(0x20, 2, intercept_pic_io); 
+    register_portio_handler(0x4d0, 1, intercept_elcr_io); 
+    register_portio_handler(0xa0, 2, intercept_pic_io); 
+    register_portio_handler(0x4d1, 1, intercept_elcr_io); 
+}
+
+
+/* IRQ handling */
+int cpu_get_pic_interrupt(struct vcpu *v, int *type)
+{
+    int intno;
+    struct hvm_virpic *s = &v->domain->arch.hvm_domain.vpic;
+    struct hvm_domain *plat = &v->domain->arch.hvm_domain;
+
+    if ( !vlapic_accept_pic_intr(v) )
+        return -1;
+
+    if ( !plat->interrupt_request )
+        return -1;
+
+    plat->interrupt_request = 0;
+    /* read the irq from the PIC */
+    intno = pic_read_irq(s);
+    *type = VLAPIC_DELIV_MODE_EXT;
+    return intno;
+}
+
+int is_pit_irq(struct vcpu *v, int irq, int type)
+{
+    int pit_vec;
+
+    if (type == VLAPIC_DELIV_MODE_EXT)
+        pit_vec = v->domain->arch.hvm_domain.vpic.pics[0].irq_base;
+    else
+        pit_vec =
+          v->domain->arch.hvm_domain.vioapic.redirtbl[0].RedirForm.vector;
+
+    return (irq == pit_vec);
+}
+
+int is_irq_enabled(struct vcpu *v, int irq)
+{
+    struct hvm_virpic *vpic=&v->domain->arch.hvm_domain.vpic;
+        
+    if ( irq & 8 ) {
+        return !( (1 << (irq&7)) & vpic->pics[1].imr);
+    }
+    else {
+        return !( (1 << irq) & vpic->pics[0].imr);
+    }
+}
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/intercept.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/intercept.c      Tue Jan 31 10:49:51 2006
@@ -0,0 +1,457 @@
+/*
+ * intercept.c: Handle performance critical I/O packets in hypervisor space
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/domain.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <io_ports.h>
+#include <xen/event.h>
+
+
+extern struct hvm_mmio_handler vlapic_mmio_handler;
+extern struct hvm_mmio_handler vioapic_mmio_handler;
+
+#define HVM_MMIO_HANDLER_NR 2
+
+struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
+{
+    &vlapic_mmio_handler,
+    &vioapic_mmio_handler
+};
+
+static inline void hvm_mmio_access(struct vcpu *v,
+                                   ioreq_t *p,
+                                   hvm_mmio_read_t read_handler,
+                                   hvm_mmio_write_t write_handler)
+{
+    ioreq_t *req;
+    vcpu_iodata_t *vio = get_vio(v->domain, v->vcpu_id);
+    unsigned int tmp1, tmp2;
+    unsigned long data;
+
+    if (vio == NULL) {
+        printk("vlapic_access: bad shared page\n");
+        domain_crash_synchronous();
+    }
+
+    req = &vio->vp_ioreq;
+
+    switch (req->type) {
+    case IOREQ_TYPE_COPY:
+    {
+        int sign = (req->df) ? -1 : 1, i;
+
+        if (!req->pdata_valid) {
+            if (req->dir == IOREQ_READ){
+                req->u.data = read_handler(v, req->addr, req->size);
+            } else {                 /* req->dir != IOREQ_READ */
+                write_handler(v, req->addr, req->size, req->u.data);
+            }
+        } else {                     /* !req->pdata_valid */
+            if (req->dir == IOREQ_READ) {
+                for (i = 0; i < req->count; i++) {
+                    data = read_handler(v,
+                      req->addr + (sign * i * req->size),
+                      req->size);
+                    hvm_copy(&data,
+                      (unsigned long)p->u.pdata + (sign * i * req->size),
+                      p->size,
+                      HVM_COPY_OUT);
+                }
+            } else {                  /* !req->dir == IOREQ_READ */
+                for (i = 0; i < req->count; i++) {
+                    hvm_copy(&data,
+                      (unsigned long)p->u.pdata + (sign * i * req->size),
+                      p->size,
+                      HVM_COPY_IN);
+                    write_handler(v,
+                      req->addr + (sign * i * req->size),
+                      req->size, data);
+                }
+            }
+        }
+        break;
+    }
+
+    case IOREQ_TYPE_AND:
+        tmp1 = read_handler(v, req->addr, req->size);
+        if (req->dir == IOREQ_WRITE) {
+            tmp2 = tmp1 & (unsigned long) req->u.data;
+            write_handler(v, req->addr, req->size, tmp2);
+        }
+        req->u.data = tmp1;
+        break;
+
+    case IOREQ_TYPE_OR:
+        tmp1 = read_handler(v, req->addr, req->size);
+        if (req->dir == IOREQ_WRITE) {
+            tmp2 = tmp1 | (unsigned long) req->u.data;
+            write_handler(v, req->addr, req->size, tmp2);
+        }
+        req->u.data = tmp1;
+        break;
+
+    case IOREQ_TYPE_XOR:
+        tmp1 = read_handler(v, req->addr, req->size);
+        if (req->dir == IOREQ_WRITE) {
+            tmp2 = tmp1 ^ (unsigned long) req->u.data;
+            write_handler(v, req->addr, req->size, tmp2);
+        }
+        req->u.data = tmp1;
+        break;
+
+    default:
+        printk("error ioreq type for local APIC %x\n", req->type);
+        domain_crash_synchronous();
+        break;
+    }
+}
+
+int hvm_mmio_intercept(ioreq_t *p)
+{
+    struct vcpu *v = current;
+    int i;
+
+    /* XXX currently only APIC use intercept */
+    if ( !hvm_apic_support(v->domain) )
+        return 0;
+
+    for ( i = 0; i < HVM_MMIO_HANDLER_NR; i++ ) {
+        if ( hvm_mmio_handlers[i]->check_handler(v, p->addr) ) {
+            hvm_mmio_access(v, p,
+                            hvm_mmio_handlers[i]->read_handler,
+                           hvm_mmio_handlers[i]->write_handler);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+/*
+ * Check if the request is handled inside xen
+ * return value: 0 --not handled; 1 --handled
+ */
+int hvm_io_intercept(ioreq_t *p, int type)
+{
+    struct vcpu *v = current;
+    struct hvm_io_handler *handler =
+                           &(v->domain->arch.hvm_domain.io_handler);
+    int i;
+    unsigned long addr, size;
+
+    for (i = 0; i < handler->num_slot; i++) {
+        if( type != handler->hdl_list[i].type)
+            continue;
+        addr = handler->hdl_list[i].addr;
+        size = handler->hdl_list[i].size;
+        if (p->addr >= addr &&
+            p->addr <  addr + size)
+            return handler->hdl_list[i].action(p);
+    }
+    return 0;
+}
+
+int register_io_handler(unsigned long addr, unsigned long size,
+                        intercept_action_t action, int type)
+{
+    struct vcpu *v = current;
+    struct hvm_io_handler *handler =
+                             &(v->domain->arch.hvm_domain.io_handler);
+    int num = handler->num_slot;
+
+    if (num >= MAX_IO_HANDLER) {
+        printk("no extra space, register io interceptor failed!\n");
+        domain_crash_synchronous();
+    }
+
+    handler->hdl_list[num].addr = addr;
+    handler->hdl_list[num].size = size;
+    handler->hdl_list[num].action = action;
+    handler->hdl_list[num].type = type;
+    handler->num_slot++;
+
+    return 1;
+}
+
+static void pit_cal_count(struct hvm_virpit *vpit)
+{
+    u64 nsec_delta = (unsigned int)((NOW() - vpit->inject_point));
+
+    if (nsec_delta > vpit->period)
+        HVM_DBG_LOG(DBG_LEVEL_1,
+                   "HVM_PIT: long time has passed from last injection!");
+
+    if(vpit->init_val == 0)
+    {
+        printk("PIT init value == 0!\n");
+        domain_crash_synchronous();
+    }
+
+    vpit->count = vpit->init_val
+                  - ((nsec_delta * PIT_FREQ / 1000000000ULL) % vpit->init_val);
+}
+
+static void pit_latch_io(struct hvm_virpit *vpit)
+{
+    pit_cal_count(vpit);
+
+    switch(vpit->read_state) {
+    case MSByte:
+        vpit->count_MSB_latched=1;
+        break;
+    case LSByte:
+        vpit->count_LSB_latched=1;
+        break;
+    case LSByte_multiple:
+        vpit->count_LSB_latched=1;
+        vpit->count_MSB_latched=1;
+        break;
+    case MSByte_multiple:
+        HVM_DBG_LOG(DBG_LEVEL_1,
+                   "HVM_PIT: latch PIT counter before MSB_multiple!");
+        vpit->read_state=LSByte_multiple;
+        vpit->count_LSB_latched=1;
+        vpit->count_MSB_latched=1;
+        break;
+    default:
+        domain_crash_synchronous();
+    }
+}
+
+static int pit_read_io(struct hvm_virpit *vpit)
+{
+    if(vpit->count_LSB_latched) {
+        /* Read Least Significant Byte */
+        if(vpit->read_state==LSByte_multiple) {
+            vpit->read_state=MSByte_multiple;
+        }
+        vpit->count_LSB_latched=0;
+        return (vpit->count & 0xFF);
+    } else if(vpit->count_MSB_latched) {
+        /* Read Most Significant Byte */
+        if(vpit->read_state==MSByte_multiple) {
+            vpit->read_state=LSByte_multiple;
+        }
+        vpit->count_MSB_latched=0;
+        return ((vpit->count>>8) & 0xFF);
+    } else {
+        /* Unlatched Count Read */
+        HVM_DBG_LOG(DBG_LEVEL_1, "HVM_PIT: unlatched read");
+        pit_cal_count(vpit);
+        if(!(vpit->read_state & 0x1)) {
+            /* Read Least Significant Byte */
+            if(vpit->read_state==LSByte_multiple) {
+                vpit->read_state=MSByte_multiple;
+            }
+            return (vpit->count & 0xFF);
+        } else {
+            /* Read Most Significant Byte */
+            if(vpit->read_state==MSByte_multiple) {
+                vpit->read_state=LSByte_multiple;
+            }
+            return ((vpit->count>>8) & 0xFF);
+        }
+    }
+}
+
+/* hvm_io_assist light-weight version, specific to PIT DM */ 
+static void resume_pit_io(ioreq_t *p)
+{
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    unsigned long old_eax = regs->eax;
+    p->state = STATE_INVALID;
+
+    switch(p->size) {
+    case 1:
+        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
+        break;
+    case 2:
+        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
+        break;
+    case 4:
+        regs->eax = (p->u.data & 0xffffffff);
+        break;
+    default:
+        BUG();
+    }
+}
+
+/* the intercept action for PIT DM retval:0--not handled; 1--handled */
+int intercept_pit_io(ioreq_t *p)
+{
+    struct vcpu *v = current;
+    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+
+    if (p->size != 1 ||
+        p->pdata_valid ||
+        p->type != IOREQ_TYPE_PIO)
+        return 0;
+    
+    if (p->addr == PIT_MODE &&
+        p->dir == 0 &&    /* write */
+        ((p->u.data >> 4) & 0x3) == 0 && /* latch command */
+        ((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */
+        pit_latch_io(vpit);
+        return 1;
+    }
+
+    if (p->addr == (PIT_CH0 + vpit->channel) &&
+        p->dir == 1) { /* read */
+        p->u.data = pit_read_io(vpit);
+        resume_pit_io(p);
+        return 1;
+    }
+
+    return 0;
+}
+
+/* hooks function for the HLT instruction emulation wakeup */
+void hlt_timer_fn(void *data)
+{
+    struct vcpu *v = data;
+    
+    evtchn_set_pending(v, iopacket_port(v->domain));
+}
+
+static __inline__ void missed_ticks(struct hvm_virpit*vpit)
+{
+    int        missed_ticks;
+
+    missed_ticks = (NOW() - vpit->scheduled)/(s_time_t) vpit->period;
+    if ( missed_ticks > 0 ) {
+        vpit->pending_intr_nr += missed_ticks;
+        vpit->scheduled += missed_ticks * vpit->period;
+    }
+}
+
+/* hooks function for the PIT when the guest is active */
+static void pit_timer_fn(void *data)
+{
+    struct vcpu *v = data;
+    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+
+    /* pick up missed timer tick */
+    missed_ticks(vpit);
+
+    vpit->pending_intr_nr++;
+    if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
+        vpit->scheduled += vpit->period;
+        set_timer(&vpit->pit_timer, vpit->scheduled);
+    }
+}
+
+void pickup_deactive_ticks(struct hvm_virpit *vpit)
+{
+
+    if ( !active_timer(&(vpit->pit_timer)) ) {
+        /* pick up missed timer tick */
+        missed_ticks(vpit);
+    
+        vpit->scheduled += vpit->period;
+        set_timer(&vpit->pit_timer, vpit->scheduled);
+    }
+}
+
+/* Only some PIT operations such as load init counter need a hypervisor hook.
+ * leave all other operations in user space DM
+ */
+void hvm_hooks_assist(struct vcpu *v)
+{
+    vcpu_iodata_t *vio = get_vio(v->domain, v->vcpu_id);
+    ioreq_t *p = &vio->vp_ioreq;
+    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    int rw_mode, reinit = 0;
+
+    /* load init count*/
+    if (p->state == STATE_IORESP_HOOK) {
+        /* set up actimer, handle re-init */
+        if ( active_timer(&(vpit->pit_timer)) ) {
+            HVM_DBG_LOG(DBG_LEVEL_1, "HVM_PIT: guest reset PIT with channel 
%lx!\n", (unsigned long) ((p->u.data >> 24) & 0x3) );
+            stop_timer(&(vpit->pit_timer));
+            reinit = 1;
+ 
+        }
+        else {
+            init_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
+        }
+
+        /* init count for this channel */
+        vpit->init_val = (p->u.data & 0xFFFF) ;
+        /* frequency(ns) of pit */
+        vpit->period = DIV_ROUND(((vpit->init_val) * 1000000000ULL), PIT_FREQ);
+        HVM_DBG_LOG(DBG_LEVEL_1,"HVM_PIT: guest set init pit freq:%u ns, 
initval:0x%x\n", vpit->period, vpit->init_val);
+        if (vpit->period < 900000) { /* < 0.9 ms */
+            printk("HVM_PIT: guest programmed too small an init_val: %x\n",
+                   vpit->init_val);
+            vpit->period = 1000000;
+        }
+         vpit->period_cycles = (u64)vpit->period * cpu_khz / 1000000L;
+         printk("HVM_PIT: guest freq in cycles=%lld\n",(long 
long)vpit->period_cycles);
+
+        vpit->channel = ((p->u.data >> 24) & 0x3);
+        vpit->first_injected = 0;
+
+        vpit->count_LSB_latched = 0;
+        vpit->count_MSB_latched = 0;
+
+        rw_mode = ((p->u.data >> 26) & 0x3);
+        switch(rw_mode) {
+        case 0x1:
+            vpit->read_state=LSByte;
+            break;
+        case 0x2:
+            vpit->read_state=MSByte;
+            break;
+        case 0x3:
+            vpit->read_state=LSByte_multiple;
+            break;
+        default:
+            printk("HVM_PIT:wrong PIT rw_mode!\n");
+            break;
+        }
+
+        vpit->scheduled = NOW() + vpit->period;
+        set_timer(&vpit->pit_timer, vpit->scheduled);
+
+        /*restore the state*/
+        p->state = STATE_IORESP_READY;
+
+        /* register handler to intercept the PIT io when vm_exit */
+        if (!reinit) {
+            register_portio_handler(0x40, 4, intercept_pit_io); 
+        }
+    }
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/io.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/io.c     Tue Jan 31 10:49:51 2006
@@ -0,0 +1,759 @@
+/*
+ * io.c: Handling I/O and interrupts.
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/trace.h>
+#include <xen/event.h>
+
+#include <xen/hypercall.h>
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/shadow.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vpit.h>
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/vlapic.h>
+
+#include <public/sched.h>
+#include <public/hvm/ioreq.h>
+
+#if defined (__i386__)
+static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
+{
+    switch (size) {
+    case BYTE:
+        switch (index) {
+        case 0:
+            regs->eax &= 0xFFFFFF00;
+            regs->eax |= (value & 0xFF);
+            break;
+        case 1:
+            regs->ecx &= 0xFFFFFF00;
+            regs->ecx |= (value & 0xFF);
+            break;
+        case 2:
+            regs->edx &= 0xFFFFFF00;
+            regs->edx |= (value & 0xFF);
+            break;
+        case 3:
+            regs->ebx &= 0xFFFFFF00;
+            regs->ebx |= (value & 0xFF);
+            break;
+        case 4:
+            regs->eax &= 0xFFFF00FF;
+            regs->eax |= ((value & 0xFF) << 8);
+            break;
+        case 5:
+            regs->ecx &= 0xFFFF00FF;
+            regs->ecx |= ((value & 0xFF) << 8);
+            break;
+        case 6:
+            regs->edx &= 0xFFFF00FF;
+            regs->edx |= ((value & 0xFF) << 8);
+            break;
+        case 7:
+            regs->ebx &= 0xFFFF00FF;
+            regs->ebx |= ((value & 0xFF) << 8);
+            break;
+        default:
+            printk("Error: size:%x, index:%x are invalid!\n", size, index);
+            domain_crash_synchronous();
+            break;
+        }
+        break;
+    case WORD:
+        switch (index) {
+        case 0:
+            regs->eax &= 0xFFFF0000;
+            regs->eax |= (value & 0xFFFF);
+            break;
+        case 1:
+            regs->ecx &= 0xFFFF0000;
+            regs->ecx |= (value & 0xFFFF);
+            break;
+        case 2:
+            regs->edx &= 0xFFFF0000;
+            regs->edx |= (value & 0xFFFF);
+            break;
+        case 3:
+            regs->ebx &= 0xFFFF0000;
+            regs->ebx |= (value & 0xFFFF);
+            break;
+        case 4:
+            regs->esp &= 0xFFFF0000;
+            regs->esp |= (value & 0xFFFF);
+            break;
+        case 5:
+            regs->ebp &= 0xFFFF0000;
+            regs->ebp |= (value & 0xFFFF);
+            break;
+        case 6:
+            regs->esi &= 0xFFFF0000;
+            regs->esi |= (value & 0xFFFF);
+            break;
+        case 7:
+            regs->edi &= 0xFFFF0000;
+            regs->edi |= (value & 0xFFFF);
+            break;
+        default:
+            printk("Error: size:%x, index:%x are invalid!\n", size, index);
+            domain_crash_synchronous();
+            break;
+        }
+        break;
+    case LONG:
+        switch (index) {
+        case 0:
+            regs->eax = value;
+            break;
+        case 1:
+            regs->ecx = value;
+            break;
+        case 2:
+            regs->edx = value;
+            break;
+        case 3:
+            regs->ebx = value;
+            break;
+        case 4:
+            regs->esp = value;
+            break;
+        case 5:
+            regs->ebp = value;
+            break;
+        case 6:
+            regs->esi = value;
+            break;
+        case 7:
+            regs->edi = value;
+            break;
+        default:
+            printk("Error: size:%x, index:%x are invalid!\n", size, index);
+            domain_crash_synchronous();
+            break;
+        }
+        break;
+    default:
+        printk("Error: size:%x, index:%x are invalid!\n", size, index);
+        domain_crash_synchronous();
+        break;
+    }
+}
+#else
+static inline void __set_reg_value(unsigned long *reg, int size, long value)
+{
+    switch (size) {
+    case BYTE_64:
+        *reg &= ~0xFF;
+        *reg |= (value & 0xFF);
+        break;
+    case WORD:
+        *reg &= ~0xFFFF;
+        *reg |= (value & 0xFFFF);
+        break;
+    case LONG:
+        *reg &= ~0xFFFFFFFF;
+        *reg |= (value & 0xFFFFFFFF);
+        break;
+    case QUAD:
+        *reg = value;
+        break;
+    default:
+        printk("Error: <__set_reg_value>: size:%x is invalid\n", size);
+        domain_crash_synchronous();
+    }
+}
+
+static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
+{
+    if (size == BYTE) {
+        switch (index) {
+        case 0:
+            regs->rax &= ~0xFF;
+            regs->rax |= (value & 0xFF);
+            break;
+        case 1:
+            regs->rcx &= ~0xFF;
+            regs->rcx |= (value & 0xFF);
+            break;
+        case 2:
+            regs->rdx &= ~0xFF;
+            regs->rdx |= (value & 0xFF);
+            break;
+        case 3:
+            regs->rbx &= ~0xFF;
+            regs->rbx |= (value & 0xFF);
+            break;
+        case 4:
+            regs->rax &= 0xFFFFFFFFFFFF00FF;
+            regs->rax |= ((value & 0xFF) << 8);
+            break;
+        case 5:
+            regs->rcx &= 0xFFFFFFFFFFFF00FF;
+            regs->rcx |= ((value & 0xFF) << 8);
+            break;
+        case 6:
+            regs->rdx &= 0xFFFFFFFFFFFF00FF;
+            regs->rdx |= ((value & 0xFF) << 8);
+            break;
+        case 7:
+            regs->rbx &= 0xFFFFFFFFFFFF00FF;
+            regs->rbx |= ((value & 0xFF) << 8);
+            break;
+        default:
+            printk("Error: size:%x, index:%x are invalid!\n", size, index);
+            domain_crash_synchronous();
+            break;
+        }
+        return;
+    }
+
+    switch (index) {
+    case 0:
+        __set_reg_value(&regs->rax, size, value);
+        break;
+    case 1:
+        __set_reg_value(&regs->rcx, size, value);
+        break;
+    case 2:
+        __set_reg_value(&regs->rdx, size, value);
+        break;
+    case 3:
+        __set_reg_value(&regs->rbx, size, value);
+        break;
+    case 4:
+        __set_reg_value(&regs->rsp, size, value);
+        break;
+    case 5:
+        __set_reg_value(&regs->rbp, size, value);
+        break;
+    case 6:
+        __set_reg_value(&regs->rsi, size, value);
+        break;
+    case 7:
+        __set_reg_value(&regs->rdi, size, value);
+        break;
+    case 8:
+        __set_reg_value(&regs->r8, size, value);
+        break;
+    case 9:
+        __set_reg_value(&regs->r9, size, value);
+        break;
+    case 10:
+        __set_reg_value(&regs->r10, size, value);
+        break;
+    case 11:
+        __set_reg_value(&regs->r11, size, value);
+        break;
+    case 12:
+        __set_reg_value(&regs->r12, size, value);
+        break;
+    case 13:
+        __set_reg_value(&regs->r13, size, value);
+        break;
+    case 14:
+        __set_reg_value(&regs->r14, size, value);
+        break;
+    case 15:
+        __set_reg_value(&regs->r15, size, value);
+        break;
+    default:
+        printk("Error: <set_reg_value> Invalid index\n");
+        domain_crash_synchronous();
+    }
+    return;
+}
+#endif
+
+extern long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs);
+
+static inline void set_eflags_CF(int size, unsigned long v1,
+                                 unsigned long v2, struct cpu_user_regs *regs)
+{
+    unsigned long mask = (1 << (8 * size)) - 1;
+
+    if ((v1 & mask) > (v2 & mask))
+        regs->eflags |= X86_EFLAGS_CF;
+    else
+        regs->eflags &= ~X86_EFLAGS_CF;
+}
+
+static inline void set_eflags_OF(int size, unsigned long v1,
+                                 unsigned long v2, unsigned long v3, struct 
cpu_user_regs *regs)
+{
+    if ((v3 ^ v2) & (v3 ^ v1) & (1 << ((8 * size) - 1)))
+        regs->eflags |= X86_EFLAGS_OF;
+}
+
+static inline void set_eflags_AF(int size, unsigned long v1,
+                                 unsigned long v2, unsigned long v3, struct 
cpu_user_regs *regs)
+{
+    if ((v1 ^ v2 ^ v3) & 0x10)
+        regs->eflags |= X86_EFLAGS_AF;
+}
+
+static inline void set_eflags_ZF(int size, unsigned long v1,
+                                 struct cpu_user_regs *regs)
+{
+    unsigned long mask = (1 << (8 * size)) - 1;
+
+    if ((v1 & mask) == 0)
+        regs->eflags |= X86_EFLAGS_ZF;
+}
+
+static inline void set_eflags_SF(int size, unsigned long v1,
+                                 struct cpu_user_regs *regs)
+{
+    if (v1 & (1 << ((8 * size) - 1)))
+        regs->eflags |= X86_EFLAGS_SF;
+}
+
+static char parity_table[256] = {
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
+};
+
+static inline void set_eflags_PF(int size, unsigned long v1,
+                                 struct cpu_user_regs *regs)
+{
+    if (parity_table[v1 & 0xFF])
+        regs->eflags |= X86_EFLAGS_PF;
+}
+
+static void hvm_pio_assist(struct cpu_user_regs *regs, ioreq_t *p,
+                           struct mmio_op *mmio_opp)
+{
+    unsigned long old_eax;
+    int sign = p->df ? -1 : 1;
+
+    if (p->dir == IOREQ_WRITE) {
+        if (p->pdata_valid) {
+            regs->esi += sign * p->count * p->size;
+            if (mmio_opp->flags & REPZ)
+                regs->ecx -= p->count;
+        }
+    } else {
+        if (mmio_opp->flags & OVERLAP) {
+            unsigned long addr;
+
+            regs->edi += sign * p->count * p->size;
+            if (mmio_opp->flags & REPZ)
+                regs->ecx -= p->count;
+
+            addr = regs->edi;
+            if (sign > 0)
+                addr -= p->size;
+            hvm_copy(&p->u.data, addr, p->size, HVM_COPY_OUT);
+        } else if (p->pdata_valid) {
+            regs->edi += sign * p->count * p->size;
+            if (mmio_opp->flags & REPZ)
+                regs->ecx -= p->count;
+        } else {
+            old_eax = regs->eax;
+            switch (p->size) {
+            case 1:
+                regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
+                break;
+            case 2:
+                regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
+                break;
+            case 4:
+                regs->eax = (p->u.data & 0xffffffff);
+                break;
+            default:
+                printk("Error: %s unknown port size\n", __FUNCTION__);
+                domain_crash_synchronous();
+            }
+        }
+    }
+}
+
+static void hvm_mmio_assist(struct vcpu *v, struct cpu_user_regs *regs,
+                            ioreq_t *p, struct mmio_op *mmio_opp)
+{
+    int sign = p->df ? -1 : 1;
+    int size = -1, index = -1;
+    unsigned long value = 0, diff = 0;
+    unsigned long src, dst;
+
+    src = mmio_opp->operand[0];
+    dst = mmio_opp->operand[1];
+    size = operand_size(src);
+
+    switch (mmio_opp->instr) {
+    case INSTR_MOV:
+        if (dst & REGISTER) {
+            index = operand_index(dst);
+            set_reg_value(size, index, 0, regs, p->u.data);
+        }
+        break;
+
+    case INSTR_MOVZX:
+        if (dst & REGISTER) {
+            switch (size) {
+            case BYTE:
+                p->u.data &= 0xFFULL;
+                break;
+
+            case WORD:
+                p->u.data &= 0xFFFFULL;
+                break;
+
+            case LONG:
+                p->u.data &= 0xFFFFFFFFULL;
+                break;
+
+            default:
+                printk("Impossible source operand size of movzx instr: %d\n", 
size);
+                domain_crash_synchronous();
+            }
+            index = operand_index(dst);
+            set_reg_value(operand_size(dst), index, 0, regs, p->u.data);
+        }
+        break;
+
+    case INSTR_MOVSX:
+        if (dst & REGISTER) {
+            switch (size) {
+            case BYTE:
+                p->u.data &= 0xFFULL;
+                if ( p->u.data & 0x80ULL )
+                    p->u.data |= 0xFFFFFFFFFFFFFF00ULL;
+                break;
+
+            case WORD:
+                p->u.data &= 0xFFFFULL;
+                if ( p->u.data & 0x8000ULL )
+                    p->u.data |= 0xFFFFFFFFFFFF0000ULL;
+                break;
+
+            case LONG:
+                p->u.data &= 0xFFFFFFFFULL;
+                if ( p->u.data & 0x80000000ULL )
+                    p->u.data |= 0xFFFFFFFF00000000ULL;
+                break;
+
+            default:
+                printk("Impossible source operand size of movsx instr: %d\n", 
size);
+                domain_crash_synchronous();
+            }
+            index = operand_index(dst);
+            set_reg_value(operand_size(dst), index, 0, regs, p->u.data);
+        }
+        break;
+
+    case INSTR_MOVS:
+        sign = p->df ? -1 : 1;
+        regs->esi += sign * p->count * p->size;
+        regs->edi += sign * p->count * p->size;
+
+        if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) {
+            unsigned long addr = regs->edi;
+
+            if (sign > 0)
+                addr -= p->size;
+            hvm_copy(&p->u.data, addr, p->size, HVM_COPY_OUT);
+        }
+
+        if (mmio_opp->flags & REPZ)
+            regs->ecx -= p->count;
+        break;
+
+    case INSTR_STOS:
+        sign = p->df ? -1 : 1;
+        regs->edi += sign * p->count * p->size;
+        if (mmio_opp->flags & REPZ)
+            regs->ecx -= p->count;
+        break;
+
+    case INSTR_AND:
+        if (src & REGISTER) {
+            index = operand_index(src);
+            value = get_reg_value(size, index, 0, regs);
+            diff = (unsigned long) p->u.data & value;
+        } else if (src & IMMEDIATE) {
+            value = mmio_opp->immediate;
+            diff = (unsigned long) p->u.data & value;
+        } else if (src & MEMORY) {
+            index = operand_index(dst);
+            value = get_reg_value(size, index, 0, regs);
+            diff = (unsigned long) p->u.data & value;
+            set_reg_value(size, index, 0, regs, diff);
+        }
+
+        /*
+         * The OF and CF flags are cleared; the SF, ZF, and PF
+         * flags are set according to the result. The state of
+         * the AF flag is undefined.
+         */
+        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+        set_eflags_ZF(size, diff, regs);
+        set_eflags_SF(size, diff, regs);
+        set_eflags_PF(size, diff, regs);
+        break;
+
+    case INSTR_OR:
+        if (src & REGISTER) {
+            index = operand_index(src);
+            value = get_reg_value(size, index, 0, regs);
+            diff = (unsigned long) p->u.data | value;
+        } else if (src & IMMEDIATE) {
+            value = mmio_opp->immediate;
+            diff = (unsigned long) p->u.data | value;
+        } else if (src & MEMORY) {
+            index = operand_index(dst);
+            value = get_reg_value(size, index, 0, regs);
+            diff = (unsigned long) p->u.data | value;
+            set_reg_value(size, index, 0, regs, diff);
+        }
+
+        /*
+         * The OF and CF flags are cleared; the SF, ZF, and PF
+         * flags are set according to the result. The state of
+         * the AF flag is undefined.
+         */
+        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+        set_eflags_ZF(size, diff, regs);
+        set_eflags_SF(size, diff, regs);
+        set_eflags_PF(size, diff, regs);
+        break;
+
+    case INSTR_XOR:
+        if (src & REGISTER) {
+            index = operand_index(src);
+            value = get_reg_value(size, index, 0, regs);
+            diff = (unsigned long) p->u.data ^ value;
+        } else if (src & IMMEDIATE) {
+            value = mmio_opp->immediate;
+            diff = (unsigned long) p->u.data ^ value;
+        } else if (src & MEMORY) {
+            index = operand_index(dst);
+            value = get_reg_value(size, index, 0, regs);
+            diff = (unsigned long) p->u.data ^ value;
+            set_reg_value(size, index, 0, regs, diff);
+        }
+
+        /*
+         * The OF and CF flags are cleared; the SF, ZF, and PF
+         * flags are set according to the result. The state of
+         * the AF flag is undefined.
+         */
+        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+        set_eflags_ZF(size, diff, regs);
+        set_eflags_SF(size, diff, regs);
+        set_eflags_PF(size, diff, regs);
+        break;
+
+    case INSTR_CMP:
+        if (src & REGISTER) {
+            index = operand_index(src);
+            value = get_reg_value(size, index, 0, regs);
+            diff = (unsigned long) p->u.data - value;
+        } else if (src & IMMEDIATE) {
+            value = mmio_opp->immediate;
+            diff = (unsigned long) p->u.data - value;
+        } else if (src & MEMORY) {
+            index = operand_index(dst);
+            value = get_reg_value(size, index, 0, regs);
+            diff = value - (unsigned long) p->u.data;
+        }
+
+        /*
+         * The CF, OF, SF, ZF, AF, and PF flags are set according
+         * to the result
+         */
+        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
+                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+        set_eflags_CF(size, value, (unsigned long) p->u.data, regs);
+        set_eflags_OF(size, diff, value, (unsigned long) p->u.data, regs);
+        set_eflags_AF(size, diff, value, (unsigned long) p->u.data, regs);
+        set_eflags_ZF(size, diff, regs);
+        set_eflags_SF(size, diff, regs);
+        set_eflags_PF(size, diff, regs);
+        break;
+
+    case INSTR_TEST:
+        if (src & REGISTER) {
+            index = operand_index(src);
+            value = get_reg_value(size, index, 0, regs);
+        } else if (src & IMMEDIATE) {
+            value = mmio_opp->immediate;
+        } else if (src & MEMORY) {
+            index = operand_index(dst);
+            value = get_reg_value(size, index, 0, regs);
+        }
+        diff = (unsigned long) p->u.data & value;
+
+        /*
+         * Sets the SF, ZF, and PF status flags. CF and OF are set to 0
+         */
+        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
+                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
+        set_eflags_ZF(size, diff, regs);
+        set_eflags_SF(size, diff, regs);
+        set_eflags_PF(size, diff, regs);
+        break;
+
+    case INSTR_BT:
+        index = operand_index(src);
+        value = get_reg_value(size, index, 0, regs);
+
+        if (p->u.data & (1 << (value & ((1 << 5) - 1))))
+            regs->eflags |= X86_EFLAGS_CF;
+        else
+            regs->eflags &= ~X86_EFLAGS_CF;
+
+        break;
+    }
+
+    hvm_load_cpu_guest_regs(v, regs);
+}
+
+void hvm_io_assist(struct vcpu *v)
+{
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    struct mmio_op *mmio_opp;
+    struct cpu_user_regs *inst_decoder_regs;
+
+    mmio_opp = &v->arch.hvm_vcpu.mmio_op;
+    inst_decoder_regs = mmio_opp->inst_decoder_regs;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+
+    if (vio == 0) {
+        HVM_DBG_LOG(DBG_LEVEL_1,
+                    "bad shared page: %lx", (unsigned long) vio);
+        printf("bad shared page: %lx\n", (unsigned long) vio);
+        domain_crash_synchronous();
+    }
+
+    p = &vio->vp_ioreq;
+    if (p->state == STATE_IORESP_HOOK)
+        hvm_hooks_assist(v);
+
+    /* clear IO wait HVM flag */
+    if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) {
+        if (p->state == STATE_IORESP_READY) {
+            p->state = STATE_INVALID;
+            clear_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags);
+
+            if (p->type == IOREQ_TYPE_PIO)
+                hvm_pio_assist(regs, p, mmio_opp);
+            else
+                hvm_mmio_assist(v, regs, p, mmio_opp);
+        }
+        /* else an interrupt send event raced us */
+    }
+}
+
+int hvm_clear_pending_io_event(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    int port = iopacket_port(d);
+
+    /* evtchn_pending_sel bit is shared by other event channels. */
+    if (!d->shared_info->evtchn_pending[port/BITS_PER_LONG])
+        clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel);
+
+    /* Note: HVM domains may need upcalls as well. */
+    if (!v->vcpu_info->evtchn_pending_sel)
+        clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
+
+    /* Clear the pending bit for port. */
+    return test_and_clear_bit(port, &d->shared_info->evtchn_pending[0]);
+}
+
+/*
+ * Because we've cleared the pending events first, we need to guarantee that
+ * all events to be handled by xen for HVM domains are taken care of here.
+ *
+ * interrupts are guaranteed to be checked before resuming guest.
+ * HVM upcalls have been already arranged for if necessary.
+ */
+void hvm_check_events(struct vcpu *v)
+{
+    /*
+     * Clear the event *before* checking for work. This should
+     * avoid the set-and-check races
+     */
+    if (hvm_clear_pending_io_event(current))
+        hvm_io_assist(v);
+}
+
+/*
+ * On exit from hvm_wait_io, we're guaranteed to have a I/O response
+ * from the device model.
+ */
+void hvm_wait_io(void)
+{
+    int port = iopacket_port(current->domain);
+
+    do {
+        if (!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
+           do_sched_op(SCHEDOP_block, 0);
+
+        hvm_check_events(current);
+        if (!test_bit(ARCH_HVM_IO_WAIT, &current->arch.hvm_vcpu.ioflags))
+            break;
+        /*
+        * Events other than IOPACKET_PORT might have woken us up.
+        * In that case, safely go back to sleep.
+        */
+        clear_bit(port/BITS_PER_LONG, &current->vcpu_info->evtchn_pending_sel);
+        clear_bit(0, &current->vcpu_info->evtchn_upcall_pending);
+    } while(1);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/platform.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/platform.c       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,951 @@
+/*
+ * platform.c: handling x86 platform related MMIO instructions
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <asm/shadow.h>
+#include <xen/domain_page.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <public/hvm/ioreq.h>
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#if CONFIG_PAGING_LEVELS >= 3
+#include <asm/shadow_64.h>
+#endif
+
+#define DECODE_success  1
+#define DECODE_failure  0
+
+extern long evtchn_send(int lport);
+
+#if defined (__x86_64__)
+static inline long __get_reg_value(unsigned long reg, int size)
+{
+    switch(size) {
+    case BYTE_64:
+        return (char)(reg & 0xFF);
+    case WORD:
+        return (short)(reg & 0xFFFF);
+    case LONG:
+        return (int)(reg & 0xFFFFFFFF);
+    case QUAD:
+        return (long)(reg);
+    default:
+        printf("Error: (__get_reg_value) Invalid reg size\n");
+        domain_crash_synchronous();
+    }
+}
+
+long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
+{
+    if (size == BYTE) {
+        switch (index) {
+        case 0: /* %al */
+            return (char)(regs->rax & 0xFF);
+        case 1: /* %cl */
+            return (char)(regs->rcx & 0xFF);
+        case 2: /* %dl */
+            return (char)(regs->rdx & 0xFF);
+        case 3: /* %bl */
+            return (char)(regs->rbx & 0xFF);
+        case 4: /* %ah */
+            return (char)((regs->rax & 0xFF00) >> 8);
+        case 5: /* %ch */
+            return (char)((regs->rcx & 0xFF00) >> 8);
+        case 6: /* %dh */
+            return (char)((regs->rdx & 0xFF00) >> 8);
+        case 7: /* %bh */
+            return (char)((regs->rbx & 0xFF00) >> 8);
+        default:
+            printf("Error: (get_reg_value) Invalid index value\n");
+            domain_crash_synchronous();
+        }
+        /* NOTREACHED */
+    }
+
+    switch (index) {
+    case 0: return __get_reg_value(regs->rax, size);
+    case 1: return __get_reg_value(regs->rcx, size);
+    case 2: return __get_reg_value(regs->rdx, size);
+    case 3: return __get_reg_value(regs->rbx, size);
+    case 4: return __get_reg_value(regs->rsp, size);
+    case 5: return __get_reg_value(regs->rbp, size);
+    case 6: return __get_reg_value(regs->rsi, size);
+    case 7: return __get_reg_value(regs->rdi, size);
+    case 8: return __get_reg_value(regs->r8, size);
+    case 9: return __get_reg_value(regs->r9, size);
+    case 10: return __get_reg_value(regs->r10, size);
+    case 11: return __get_reg_value(regs->r11, size);
+    case 12: return __get_reg_value(regs->r12, size);
+    case 13: return __get_reg_value(regs->r13, size);
+    case 14: return __get_reg_value(regs->r14, size);
+    case 15: return __get_reg_value(regs->r15, size);
+    default:
+        printf("Error: (get_reg_value) Invalid index value\n");
+        domain_crash_synchronous();
+    }
+}
+#elif defined (__i386__)
+static inline long __get_reg_value(unsigned long reg, int size)
+{
+    switch(size) {
+    case WORD:
+        return (short)(reg & 0xFFFF);
+    case LONG:
+        return (int)(reg & 0xFFFFFFFF);
+    default:
+        printf("Error: (__get_reg_value) Invalid reg size\n");
+        domain_crash_synchronous();
+    }
+}
+
+long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
+{
+    if (size == BYTE) {
+        switch (index) {
+        case 0: /* %al */
+            return (char)(regs->eax & 0xFF);
+        case 1: /* %cl */
+            return (char)(regs->ecx & 0xFF);
+        case 2: /* %dl */
+            return (char)(regs->edx & 0xFF);
+        case 3: /* %bl */
+            return (char)(regs->ebx & 0xFF);
+        case 4: /* %ah */
+            return (char)((regs->eax & 0xFF00) >> 8);
+        case 5: /* %ch */
+            return (char)((regs->ecx & 0xFF00) >> 8);
+        case 6: /* %dh */
+            return (char)((regs->edx & 0xFF00) >> 8);
+        case 7: /* %bh */
+            return (char)((regs->ebx & 0xFF00) >> 8);
+        default:
+            printf("Error: (get_reg_value) Invalid index value\n");
+            domain_crash_synchronous();
+        }
+    }
+
+    switch (index) {
+    case 0: return __get_reg_value(regs->eax, size);
+    case 1: return __get_reg_value(regs->ecx, size);
+    case 2: return __get_reg_value(regs->edx, size);
+    case 3: return __get_reg_value(regs->ebx, size);
+    case 4: return __get_reg_value(regs->esp, size);
+    case 5: return __get_reg_value(regs->ebp, size);
+    case 6: return __get_reg_value(regs->esi, size);
+    case 7: return __get_reg_value(regs->edi, size);
+    default:
+        printf("Error: (get_reg_value) Invalid index value\n");
+        domain_crash_synchronous();
+    }
+}
+#endif
+
+static inline unsigned char *check_prefix(unsigned char *inst,
+                                          struct instruction *thread_inst, 
unsigned char *rex_p)
+{
+    while (1) {
+        switch (*inst) {
+            /* rex prefix for em64t instructions */
+        case 0x40 ... 0x4e:
+            *rex_p = *inst;
+            break;
+        case 0xf3: /* REPZ */
+            thread_inst->flags = REPZ;
+            break;
+        case 0xf2: /* REPNZ */
+            thread_inst->flags = REPNZ;
+            break;
+        case 0xf0: /* LOCK */
+            break;
+        case 0x2e: /* CS */
+        case 0x36: /* SS */
+        case 0x3e: /* DS */
+        case 0x26: /* ES */
+        case 0x64: /* FS */
+        case 0x65: /* GS */
+            thread_inst->seg_sel = *inst;
+            break;
+        case 0x66: /* 32bit->16bit */
+            thread_inst->op_size = WORD;
+            break;
+        case 0x67:
+            break;
+        default:
+            return inst;
+        }
+        inst++;
+    }
+}
+
+static inline unsigned long get_immediate(int op16,const unsigned char *inst, 
int op_size)
+{
+    int mod, reg, rm;
+    unsigned long val = 0;
+    int i;
+
+    mod = (*inst >> 6) & 3;
+    reg = (*inst >> 3) & 7;
+    rm = *inst & 7;
+
+    inst++; //skip ModR/M byte
+    if (mod != 3 && rm == 4) {
+        inst++; //skip SIB byte
+    }
+
+    switch(mod) {
+    case 0:
+        if (rm == 5 || rm == 4) {
+            if (op16)
+                inst = inst + 2; //disp16, skip 2 bytes
+            else
+                inst = inst + 4; //disp32, skip 4 bytes
+        }
+        break;
+    case 1:
+        inst++; //disp8, skip 1 byte
+        break;
+    case 2:
+        if (op16)
+            inst = inst + 2; //disp16, skip 2 bytes
+        else
+            inst = inst + 4; //disp32, skip 4 bytes
+        break;
+    }
+
+    if (op_size == QUAD)
+        op_size = LONG;
+
+    for (i = 0; i < op_size; i++) {
+        val |= (*inst++ & 0xff) << (8 * i);
+    }
+
+    return val;
+}
+
+static inline int get_index(const unsigned char *inst, unsigned char rex)
+{
+    int mod, reg, rm;
+    int rex_r, rex_b;
+
+    mod = (*inst >> 6) & 3;
+    reg = (*inst >> 3) & 7;
+    rm = *inst & 7;
+
+    rex_r = (rex >> 2) & 1;
+    rex_b = rex & 1;
+
+    //Only one operand in the instruction is register
+    if (mod == 3) {
+        return (rm + (rex_b << 3));
+    } else {
+        return (reg + (rex_r << 3));
+    }
+    return 0;
+}
+
+static void init_instruction(struct instruction *mmio_inst)
+{
+    mmio_inst->instr = 0;
+    mmio_inst->op_size = 0;
+    mmio_inst->immediate = 0;
+    mmio_inst->seg_sel = 0;
+
+    mmio_inst->operand[0] = 0;
+    mmio_inst->operand[1] = 0;
+
+    mmio_inst->flags = 0;
+}
+
+#define GET_OP_SIZE_FOR_BYTE(op_size)       \
+    do {                                    \
+        if (rex)                            \
+            op_size = BYTE_64;              \
+        else                                \
+            op_size = BYTE;                 \
+    } while(0)
+
+#define GET_OP_SIZE_FOR_NONEBYTE(op_size)   \
+    do {                                    \
+        if (rex & 0x8)                      \
+            op_size = QUAD;                 \
+        else if (op_size != WORD)           \
+            op_size = LONG;                 \
+    } while(0)
+
+
+/*
+ * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax)
+ */
+static int mem_acc(unsigned char size, struct instruction *instr)
+{
+    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
+    instr->operand[1] = mk_operand(size, 0, 0, REGISTER);
+    return DECODE_success;
+}
+
+/*
+ * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32)
+ */
+static int acc_mem(unsigned char size, struct instruction *instr)
+{
+    instr->operand[0] = mk_operand(size, 0, 0, REGISTER);
+    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
+    return DECODE_success;
+}
+
+/*
+ * Decode mem,reg operands (as in <opcode> r32/16, m32/16)
+ */
+static int mem_reg(unsigned char size, unsigned char *opcode,
+                   struct instruction *instr, unsigned char rex)
+{
+    int index = get_index(opcode + 1, rex);
+
+    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
+    instr->operand[1] = mk_operand(size, index, 0, REGISTER);
+    return DECODE_success;
+}
+
+/*
+ * Decode reg,mem operands (as in <opcode> m32/16, r32/16)
+ */
+static int reg_mem(unsigned char size, unsigned char *opcode,
+                   struct instruction *instr, unsigned char rex)
+{
+    int index = get_index(opcode + 1, rex);
+
+    instr->operand[0] = mk_operand(size, index, 0, REGISTER);
+    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
+    return DECODE_success;
+}
+
+static int hvm_decode(int realmode, unsigned char *opcode, struct instruction 
*instr)
+{
+    unsigned char size_reg = 0;
+    unsigned char rex = 0;
+    int index;
+
+    init_instruction(instr);
+
+    opcode = check_prefix(opcode, instr, &rex);
+
+    if (realmode) { /* meaning is reversed */
+        if (instr->op_size == WORD)
+            instr->op_size = LONG;
+        else if (instr->op_size == LONG)
+            instr->op_size = WORD;
+        else if (instr->op_size == 0)
+            instr->op_size = WORD;
+    }
+
+    switch (*opcode) {
+    case 0x0B: /* or m32/16, r32/16 */
+        instr->instr = INSTR_OR;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0x20: /* and r8, m8 */
+        instr->instr = INSTR_AND;
+        instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(size_reg);
+        return reg_mem(size_reg, opcode, instr, rex);
+
+    case 0x21: /* and r32/16, m32/16 */
+        instr->instr = INSTR_AND;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x23: /* and m32/16, r32/16 */
+        instr->instr = INSTR_AND;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0x30: /* xor r8, m8 */
+        instr->instr = INSTR_XOR;
+        instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(size_reg);
+        return reg_mem(size_reg, opcode, instr, rex);
+
+    case 0x31: /* xor r32/16, m32/16 */
+        instr->instr = INSTR_XOR;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x39: /* cmp r32/16, m32/16 */
+        instr->instr = INSTR_CMP;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x80:
+    case 0x81:
+        {
+            unsigned char ins_subtype = (opcode[1] >> 3) & 7;
+
+            if (opcode[0] == 0x80) {
+                GET_OP_SIZE_FOR_BYTE(size_reg);
+                instr->op_size = BYTE;
+            } else {
+                GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+                size_reg = instr->op_size;
+            }
+
+            instr->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
+            instr->immediate = get_immediate(realmode, opcode+1, 
instr->op_size);
+            instr->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
+
+            switch (ins_subtype) {
+                case 7: /* cmp $imm, m32/16 */
+                    instr->instr = INSTR_CMP;
+                    return DECODE_success;
+
+                case 1: /* or $imm, m32/16 */
+                    instr->instr = INSTR_OR;
+                    return DECODE_success;
+
+                default:
+                    printf("%x, This opcode isn't handled yet!\n", *opcode);
+                    return DECODE_failure;
+            }
+        }
+
+    case 0x84:  /* test m8, r8 */
+        instr->instr = INSTR_TEST;
+        instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(size_reg);
+        return mem_reg(size_reg, opcode, instr, rex);
+
+    case 0x88: /* mov r8, m8 */
+        instr->instr = INSTR_MOV;
+        instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(size_reg);
+        return reg_mem(size_reg, opcode, instr, rex);
+
+    case 0x89: /* mov r32/16, m32/16 */
+        instr->instr = INSTR_MOV;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return reg_mem(instr->op_size, opcode, instr, rex);
+
+    case 0x8A: /* mov m8, r8 */
+        instr->instr = INSTR_MOV;
+        instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(size_reg);
+        return mem_reg(size_reg, opcode, instr, rex);
+
+    case 0x8B: /* mov m32/16, r32/16 */
+        instr->instr = INSTR_MOV;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return mem_reg(instr->op_size, opcode, instr, rex);
+
+    case 0xA0: /* mov <addr>, al */
+        instr->instr = INSTR_MOV;
+        instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(size_reg);
+        return mem_acc(size_reg, instr);
+
+    case 0xA1: /* mov <addr>, ax/eax */
+        instr->instr = INSTR_MOV;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return mem_acc(instr->op_size, instr);
+
+    case 0xA2: /* mov al, <addr> */
+        instr->instr = INSTR_MOV;
+        instr->op_size = BYTE;
+        GET_OP_SIZE_FOR_BYTE(size_reg);
+        return acc_mem(size_reg, instr);
+
+    case 0xA3: /* mov ax/eax, <addr> */
+        instr->instr = INSTR_MOV;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return acc_mem(instr->op_size, instr);
+
+    case 0xA4: /* movsb */
+        instr->instr = INSTR_MOVS;
+        instr->op_size = BYTE;
+        return DECODE_success;
+
+    case 0xA5: /* movsw/movsl */
+        instr->instr = INSTR_MOVS;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return DECODE_success;
+
+    case 0xAA: /* stosb */
+        instr->instr = INSTR_STOS;
+        instr->op_size = BYTE;
+        return DECODE_success;
+
+    case 0xAB: /* stosw/stosl */
+        instr->instr = INSTR_STOS;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        return DECODE_success;
+
+    case 0xC6:
+        if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */
+            instr->instr = INSTR_MOV;
+            instr->op_size = BYTE;
+
+            instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+            instr->immediate = get_immediate(realmode, opcode+1, 
instr->op_size);
+            instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+
+            return DECODE_success;
+        } else
+            return DECODE_failure;
+
+    case 0xC7:
+        if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */
+            instr->instr = INSTR_MOV;
+            GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+
+            instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
+            instr->immediate = get_immediate(realmode, opcode+1, 
instr->op_size);
+            instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+
+            return DECODE_success;
+        } else
+            return DECODE_failure;
+
+    case 0xF6:
+    case 0xF7:
+        if (((opcode[1] >> 3) & 7) == 0) { /* test $imm8/16/32, m8/16/32 */
+            instr->instr = INSTR_TEST;
+
+            if (opcode[0] == 0xF6) {
+                GET_OP_SIZE_FOR_BYTE(size_reg);
+                instr->op_size = BYTE;
+            } else {
+                GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+                size_reg = instr->op_size;
+            }
+
+            instr->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
+            instr->immediate = get_immediate(realmode, opcode+1, 
instr->op_size);
+            instr->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
+
+            return DECODE_success;
+        } else
+            return DECODE_failure;
+
+    case 0x0F:
+        break;
+
+    default:
+        printf("%x, This opcode isn't handled yet!\n", *opcode);
+        return DECODE_failure;
+    }
+
+    switch (*++opcode) {
+    case 0xB6: /* movzx m8, r16/r32/r64 */
+        instr->instr = INSTR_MOVZX;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        index = get_index(opcode + 1, rex);
+        instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
+        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
+        return DECODE_success;
+
+    case 0xB7: /* movzx m16/m32, r32/r64 */
+        instr->instr = INSTR_MOVZX;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        index = get_index(opcode + 1, rex);
+        if (rex & 0x8)
+            instr->operand[0] = mk_operand(LONG, 0, 0, MEMORY);
+        else
+            instr->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
+        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
+        return DECODE_success;
+
+    case 0xBE: /* movsx m8, r16/r32/r64 */
+        instr->instr = INSTR_MOVSX;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        index = get_index(opcode + 1, rex);
+        instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
+        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
+        return DECODE_success;
+
+    case 0xBF: /* movsx m16, r32/r64 */
+        instr->instr = INSTR_MOVSX;
+        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
+        index = get_index(opcode + 1, rex);
+        instr->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
+        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
+        return DECODE_success;
+
+    case 0xA3: /* bt r32, m32 */
+        instr->instr = INSTR_BT;
+        index = get_index(opcode + 1, rex);
+        instr->op_size = LONG;
+        instr->operand[0] = mk_operand(instr->op_size, index, 0, REGISTER);
+        instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
+        return DECODE_success;
+
+    default:
+        printf("0f %x, This opcode isn't handled yet\n", *opcode);
+        return DECODE_failure;
+    }
+}
+
+int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int 
inst_len)
+{
+    if (inst_len > MAX_INST_LEN || inst_len <= 0)
+        return 0;
+    if (!hvm_copy(buf, guest_eip, inst_len, HVM_COPY_IN))
+        return 0;
+    return inst_len;
+}
+
+void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
+                  unsigned long count, int size, long value, int dir, int 
pvalid)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (vio == NULL) {
+        printk("bad shared page: %lx\n", (unsigned long) vio);
+        domain_crash_synchronous();
+    }
+
+    if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) {
+        printf("HVM I/O has not yet completed\n");
+        domain_crash_synchronous();
+    }
+    set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags);
+
+    p = &vio->vp_ioreq;
+    p->dir = dir;
+    p->pdata_valid = pvalid;
+
+    p->type = IOREQ_TYPE_PIO;
+    p->size = size;
+    p->addr = port;
+    p->count = count;
+    p->df = regs->eflags & EF_DF ? 1 : 0;
+
+    if (pvalid) {
+        if (hvm_paging_enabled(current))
+            p->u.pdata = (void *) gva_to_gpa(value);
+        else
+            p->u.pdata = (void *) value; /* guest VA == guest PA */
+    } else
+        p->u.data = value;
+
+    if (hvm_portio_intercept(p)) {
+        p->state = STATE_IORESP_READY;
+        hvm_io_assist(v);
+        return;
+    }
+
+    p->state = STATE_IOREQ_READY;
+
+    evtchn_send(iopacket_port(v->domain));
+    hvm_wait_io();
+}
+
+void send_mmio_req(unsigned char type, unsigned long gpa,
+                   unsigned long count, int size, long value, int dir, int 
pvalid)
+{
+    struct vcpu *v = current;
+    vcpu_iodata_t *vio;
+    ioreq_t *p;
+    struct cpu_user_regs *regs;
+    extern long evtchn_send(int lport);
+
+    regs = current->arch.hvm_vcpu.mmio_op.inst_decoder_regs;
+
+    vio = get_vio(v->domain, v->vcpu_id);
+    if (vio == NULL) {
+        printf("bad shared page\n");
+        domain_crash_synchronous();
+    }
+
+    p = &vio->vp_ioreq;
+
+    if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags)) {
+        printf("HVM I/O has not yet completed\n");
+        domain_crash_synchronous();
+    }
+
+    set_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags);
+    p->dir = dir;
+    p->pdata_valid = pvalid;
+
+    p->type = type;
+    p->size = size;
+    p->addr = gpa;
+    p->count = count;
+    p->df = regs->eflags & EF_DF ? 1 : 0;
+
+    if (pvalid) {
+        if (hvm_paging_enabled(v))
+            p->u.pdata = (void *) gva_to_gpa(value);
+        else
+            p->u.pdata = (void *) value; /* guest VA == guest PA */
+    } else
+        p->u.data = value;
+
+    if (hvm_mmio_intercept(p)){
+        p->state = STATE_IORESP_READY;
+        hvm_io_assist(v);
+        return;
+    }
+
+    p->state = STATE_IOREQ_READY;
+
+    evtchn_send(iopacket_port(v->domain));
+    hvm_wait_io();
+}
+
+static void mmio_operands(int type, unsigned long gpa, struct instruction 
*inst,
+                          struct mmio_op *mmio_opp, struct cpu_user_regs *regs)
+{
+    unsigned long value = 0;
+    int index, size_reg;
+
+    size_reg = operand_size(inst->operand[0]);
+
+    mmio_opp->flags = inst->flags;
+    mmio_opp->instr = inst->instr;
+    mmio_opp->operand[0] = inst->operand[0]; /* source */
+    mmio_opp->operand[1] = inst->operand[1]; /* destination */
+    mmio_opp->immediate = inst->immediate;
+
+    if (inst->operand[0] & REGISTER) { /* dest is memory */
+        index = operand_index(inst->operand[0]);
+        value = get_reg_value(size_reg, index, 0, regs);
+        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
+    } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */
+        value = inst->immediate;
+        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
+    } else if (inst->operand[0] & MEMORY) { /* dest is register */
+        /* send the request and wait for the value */
+        if ( (inst->instr == INSTR_MOVZX) || (inst->instr == INSTR_MOVSX) )
+            send_mmio_req(type, gpa, 1, size_reg, 0, IOREQ_READ, 0);
+        else
+            send_mmio_req(type, gpa, 1, inst->op_size, 0, IOREQ_READ, 0);
+    } else {
+        printf("mmio_operands: invalid operand\n");
+        domain_crash_synchronous();
+    }
+}
+
+#define GET_REPEAT_COUNT() \
+     (mmio_inst.flags & REPZ ? (realmode ? regs->ecx & 0xFFFF : regs->ecx) : 1)
+
+void handle_mmio(unsigned long va, unsigned long gpa)
+{
+    unsigned long inst_len, inst_addr;
+    struct mmio_op *mmio_opp;
+    struct cpu_user_regs *regs;
+    struct instruction mmio_inst;
+    unsigned char inst[MAX_INST_LEN];
+    int i, realmode, ret;
+    struct vcpu *v = current;
+
+    mmio_opp = &v->arch.hvm_vcpu.mmio_op;
+
+    regs = mmio_opp->inst_decoder_regs;
+    hvm_store_cpu_guest_regs(v, regs);
+
+    if ((inst_len = hvm_instruction_length(v)) <= 0) {
+        printf("handle_mmio: failed to get instruction length\n");
+        domain_crash_synchronous();
+    }
+
+    realmode = hvm_realmode(v);
+    if (realmode)
+        inst_addr = (regs->cs << 4) + regs->eip;
+    else
+        inst_addr = regs->eip;
+
+    memset(inst, 0, MAX_INST_LEN);
+    ret = inst_copy_from_guest(inst, inst_addr, inst_len);
+    if (ret != inst_len) {
+        printf("handle_mmio: failed to copy instruction\n");
+        domain_crash_synchronous();
+    }
+
+    init_instruction(&mmio_inst);
+
+    if (hvm_decode(realmode, inst, &mmio_inst) == DECODE_failure) {
+        printf("handle_mmio: failed to decode instruction\n");
+        printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:",
+               va, gpa, inst_len);
+        for (i = 0; i < inst_len; i++)
+            printf(" %02x", inst[i] & 0xFF);
+        printf("\n");
+        domain_crash_synchronous();
+    }
+
+    regs->eip += inst_len; /* advance %eip */
+
+    switch (mmio_inst.instr) {
+    case INSTR_MOV:
+        mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs);
+        break;
+
+    case INSTR_MOVS:
+    {
+        unsigned long count = GET_REPEAT_COUNT();
+        unsigned long size = mmio_inst.op_size;
+        int sign = regs->eflags & EF_DF ? -1 : 1;
+        unsigned long addr = 0;
+        int dir;
+
+        /* determine non-MMIO address */
+        if (realmode) {
+            if (((regs->es << 4) + (regs->edi & 0xFFFF)) == va) {
+                dir = IOREQ_WRITE;
+                addr = (regs->ds << 4) + (regs->esi & 0xFFFF);
+            } else {
+                dir = IOREQ_READ;
+                addr = (regs->es << 4) + (regs->edi & 0xFFFF);
+            }
+        } else {
+            if (va == regs->edi) {
+                dir = IOREQ_WRITE;
+                addr = regs->esi;
+            } else {
+                dir = IOREQ_READ;
+                addr = regs->edi;
+            }
+        }
+
+        mmio_opp->flags = mmio_inst.flags;
+        mmio_opp->instr = mmio_inst.instr;
+
+        /*
+         * In case of a movs spanning multiple pages, we break the accesses
+         * up into multiple pages (the device model works with non-continguous
+         * physical guest pages). To copy just one page, we adjust %ecx and
+         * do not advance %eip so that the next "rep movs" copies the next 
page.
+         * Unaligned accesses, for example movsl starting at PGSZ-2, are
+         * turned into a single copy where we handle the overlapping memory
+         * copy ourself. After this copy succeeds, "rep movs" is executed
+         * again.
+         */
+        if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
+            unsigned long value = 0;
+
+            mmio_opp->flags |= OVERLAP;
+
+            regs->eip -= inst_len; /* do not advance %eip */
+
+            if (dir == IOREQ_WRITE)
+                hvm_copy(&value, addr, size, HVM_COPY_IN);
+            send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0);
+        } else {
+            if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) 
{
+                regs->eip -= inst_len; /* do not advance %eip */
+
+                if (sign > 0)
+                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
+                else
+                    count = (addr & ~PAGE_MASK) / size;
+            }
+
+            send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1);
+        }
+        break;
+    }
+
+    case INSTR_MOVZX:
+    case INSTR_MOVSX:
+        mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs);
+        break;
+
+    case INSTR_STOS:
+        /*
+         * Since the destination is always in (contiguous) mmio space we don't
+         * need to break it up into pages.
+         */
+        mmio_opp->flags = mmio_inst.flags;
+        mmio_opp->instr = mmio_inst.instr;
+        send_mmio_req(IOREQ_TYPE_COPY, gpa,
+                      GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, 
IOREQ_WRITE, 0);
+        break;
+
+    case INSTR_OR:
+        mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mmio_opp, regs);
+        break;
+
+    case INSTR_AND:
+        mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mmio_opp, regs);
+        break;
+
+    case INSTR_XOR:
+        mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mmio_opp, regs);
+        break;
+
+    case INSTR_CMP:        /* Pass through */
+    case INSTR_TEST:
+        mmio_opp->flags = mmio_inst.flags;
+        mmio_opp->instr = mmio_inst.instr;
+        mmio_opp->operand[0] = mmio_inst.operand[0]; /* source */
+        mmio_opp->operand[1] = mmio_inst.operand[1]; /* destination */
+        mmio_opp->immediate = mmio_inst.immediate;
+
+        /* send the request and wait for the value */
+        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1,
+                      mmio_inst.op_size, 0, IOREQ_READ, 0);
+        break;
+
+    case INSTR_BT:
+        {
+            unsigned long value = 0;
+            int index, size;
+
+            mmio_opp->instr = mmio_inst.instr;
+            mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */
+            mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */
+
+            index = operand_index(mmio_inst.operand[0]);
+            size = operand_size(mmio_inst.operand[0]);
+            value = get_reg_value(size, index, 0, regs);
+
+            send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
+                          mmio_inst.op_size, 0, IOREQ_READ, 0);
+            break;
+        }
+
+    default:
+        printf("Unhandled MMIO instruction\n");
+        domain_crash_synchronous();
+    }
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/svm/emulate.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/svm/emulate.c    Tue Jan 31 10:49:51 2006
@@ -0,0 +1,517 @@
+/*
+ * emulate.c: handling SVM emulate instructions help.
+ * Copyright (c) 2005 AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/trace.h>
+#include <asm/msr.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/hvm/svm/emulate.h>
+
+#ifdef CONFIG_SVM
+
+extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
+        int inst_len);
+
+#define REX_PREFIX_BASE 0x40
+#define REX_X           0x02
+#define REX_W           0x08
+#define REX_R           0x04
+#define REX_B           0x01
+
+#define IS_REX_PREFIX(prefix) ((prefix & 0xf0) == REX_PREFIX_BASE)
+
+#define DECODE_MODRM_MOD(modrm) ((modrm & 0xC0) >> 6)
+
+#define DECODE_MODRM_REG(prefix, modrm)                             \
+    ((prefix & REX_R) && IS_REX_PREFIX(prefix))                     \
+        ? (0x08 | ((modrm >> 3) & 0x07)) : ((modrm >> 3) & 0x07)
+
+#define DECODE_MODRM_RM(prefix, modrm)                              \
+    ((prefix & REX_B) && IS_REX_PREFIX(prefix))                     \
+        ? (0x08 | (modrm & 0x07)) : (modrm & 0x07)
+
+#define DECODE_SIB_SCALE(sib) DECODE_MODRM_MOD(sib)
+
+#define DECODE_SIB_INDEX(prefix, sib)                               \
+    ((prefix & REX_X) && IS_REX_PREFIX(prefix))                     \
+        ? (0x08 | ((sib >> 3) & 0x07)) : ((sib >> 3) & 0x07)
+
+#define DECODE_SIB_BASE(prefix, sib) DECODE_MODRM_RM(prefix, sib)
+
+
+static inline unsigned long DECODE_GPR_VALUE(struct vmcb_struct *vmcb, 
+        struct cpu_user_regs *regs, u8 gpr_rm)
+{
+    unsigned long value;
+    switch (gpr_rm) 
+    { 
+    case 0x0: 
+        value = regs->eax;
+        break;
+    case 0x1:
+        value = regs->ecx;
+        break;
+    case 0x2:
+        value = regs->edx;
+        break;
+    case 0x3:
+        value = regs->ebx;
+        break;
+    case 0x4:
+        value = (unsigned long)vmcb->rsp;
+    case 0x5:
+        value = regs->ebp; 
+        break;
+    case 0x6:
+        value = regs->esi;
+        break;
+    case 0x7:
+        value = regs->edi;
+        break;
+#if X86_64
+    case 0x8:
+        value = regs->r8;
+        break;
+    case 0x9:
+        value = regs->r9;
+        break;
+    case 0xA:
+        value = regs->r10;
+        break;
+    case 0xB:
+        value = regs->r11;
+        break;
+    case 0xC:
+        value = regs->r12;
+        break;
+    case 0xD:
+        value = regs->r13;
+        break;
+    case 0xE:
+        value = regs->r14;
+        break;
+    case 0xF:
+        value = regs->r15;
+        break;
+#endif
+    default:
+        printk("Invlaid gpr_rm = %d\n", gpr_rm);
+        ASSERT(0);
+        value = (unsigned long)-1; /* error retrun */
+    }
+    return value;
+}
+
+
+#define CHECK_LENGTH64(num) \
+    if (num > length) \
+    { \
+        *size = 0; \
+        return (unsigned long) -1; \
+    }
+
+#if 0
+/*
+ * hv_is_canonical - checks if the given address is canonical
+ */
+static inline u64 hv_is_canonical(u64 addr)
+{
+    u64 bits = addr & (u64)0xffff800000000000;
+    return (u64)((bits == (u64)0xffff800000000000) || (bits == (u64)0x0));
+}
+#endif
+
+#define modrm operand [0]
+
+#define sib operand [1]
+
+
+unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 
+        struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, 
+        u8 *size)
+{
+    unsigned long effective_addr = (unsigned long) -1;
+    u8 length, modrm_mod, modrm_rm;
+    u32 disp = 0;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "get_effective_addr_modrm64(): prefix = %x, "
+            "length = %d, operand[0,1] = %x %x.\n", prefix, *size, operand [0],
+            operand [1]);
+
+    if ((NULL == size) || (NULL == operand) || (1 > *size))
+    {
+        *size = 0;
+        return effective_addr;
+    }
+
+    modrm_mod = DECODE_MODRM_MOD(modrm);
+    modrm_rm = DECODE_MODRM_RM(prefix, modrm);
+
+    length = *size;
+    *size = 1;
+    switch (modrm_rm)
+    {
+    case 0x4:
+#if __x86_64__
+    case 0xC:
+#endif
+        if (modrm_mod < 3)
+        {
+            *size = length;
+            effective_addr = get_effective_addr_sib(vmcb, regs, prefix, 
operand, size);
+        }
+        else
+        {
+            effective_addr = DECODE_GPR_VALUE(vmcb, regs, modrm_rm);
+        }
+        break;
+
+    case 0x5:
+        if (0 < modrm_mod)
+        {
+            effective_addr = regs->ebp;
+            *size = 1;
+            break;
+        }
+
+        CHECK_LENGTH64(*size + (u8)sizeof(u32));
+
+        memcpy (&disp, operand + 1, sizeof (u32));
+        *size += sizeof (u32);
+        if (vmcb->cs.attributes.fields.l) // 64-bit mode
+            return vmcb->rip + disp;
+        else
+            return disp;
+
+#if __x86_64__
+    case 0xD:
+        if (0 < modrm_mod)
+        {
+            *size = 1;
+            effective_addr = regs->r13;
+            break;
+        }
+
+        CHECK_LENGTH64(*size + (u8)sizeof(u32));
+
+        memcpy (&disp, operand + 1, sizeof (u32));
+        *size += sizeof (u32);
+
+        /* 64-bit mode */
+        if (vmcb->cs.attributes.fields.l)
+            return vmcb->rip + disp;
+        else
+            return disp;
+
+#endif
+    default:
+        effective_addr = DECODE_GPR_VALUE(vmcb, regs, modrm_rm);
+
+    }
+
+    if (3 > modrm_mod)
+    {
+        if (1 == modrm_mod )
+        {
+            CHECK_LENGTH64(*size + (u8)sizeof(u8));
+            disp = sib;
+            *size += sizeof (u8);
+        }
+        else if (2 == modrm_mod )
+        {
+            CHECK_LENGTH64(*size + sizeof (u32));
+            memcpy (&disp, operand + 1, sizeof (u32));
+            *size += sizeof (u32);
+        }
+
+        effective_addr += disp;
+    }
+
+    return effective_addr;
+}
+
+
+unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, 
+        struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, 
+        u8 *size)
+{
+    unsigned long base, effective_addr = (unsigned long)-1;
+    u8 sib_scale, sib_idx, sib_base, length;
+    u32 disp = 0;
+
+    if (NULL == size || NULL == operand || 2 > *size)
+    {
+        *size = 0;
+        return effective_addr;
+    }
+
+    sib_scale = DECODE_SIB_SCALE(sib);
+    sib_idx = DECODE_SIB_INDEX(prefix, sib);
+    sib_base = DECODE_SIB_BASE(prefix, sib);
+
+    base = DECODE_GPR_VALUE(vmcb, regs, sib_base);
+
+    if ((unsigned long)-1 == base)
+    {
+        /* 
+         * Surely this is wrong. base should be allowed to be -1, even if
+         * it's not the usual case...
+         */
+        *size = 0;
+        return base;
+    }
+
+    length = *size;
+    *size = 2;
+    if (0x5 == (sib_base & 0x5))
+    {
+        switch (DECODE_MODRM_MOD(modrm))
+        {
+        case 0:
+            CHECK_LENGTH64(*size + (u8)sizeof(u32));
+            memcpy (&disp, operand + 2, sizeof(u32));
+            *size += sizeof(u32);
+            base = disp;
+            break;
+
+        case 1:
+            CHECK_LENGTH64(*size + (u8)sizeof (u8));
+            *size += sizeof(u8);
+            base += operand [2];
+            break;
+
+        case 2:
+            CHECK_LENGTH64(*size + (u8)sizeof (u32));
+            memcpy(&disp, operand + 2, sizeof(u32));
+            *size += sizeof(u32);
+            base += disp;
+        }
+    }
+
+    if (4 == sib_idx)
+        return base;
+
+    effective_addr = DECODE_GPR_VALUE(vmcb, regs, sib_idx);
+
+    effective_addr <<= sib_scale;
+
+    return (effective_addr + base);
+}
+
+
+/* Get the register/mode number of src register in ModRM register. */
+unsigned int decode_dest_reg(u8 m)
+{
+#if __x86_64__
+    ASSERT(0); /* Need to adjust for REX prefix if applicable */
+#endif
+    return (m >> 3) & 7;
+}
+
+unsigned int decode_src_reg(u8 m)
+{
+#if __x86_64__
+    ASSERT(0); /* Need to adjust for REX prefix if applicable */
+#endif
+    return m & 7;
+}
+
+
+unsigned long svm_rip2pointer(struct vmcb_struct *vmcb)
+{
+    /*
+     * The following is subtle. Intuitively this code would be something like:
+     *
+     *  if (16bit) addr = (cs << 4) + rip; else addr = rip;
+     *
+     * However, this code doesn't work for code executing after CR0.PE=0,
+     * but before the %cs has been updated. We don't get signalled when
+     * %cs is update, but fortunately, base contain the valid base address
+     * no matter what kind of addressing is used.
+     */
+    return vmcb->cs.base + vmcb->rip;
+}
+
+
+#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
+
+/* 
+ * Here's how it works:
+ * First byte: Length. 
+ * Following bytes: Opcode bytes. 
+ * Special case: Last byte, if zero, doesn't need to match. 
+ */
+MAKE_INSTR(INVD,   2, 0x0f, 0x08);
+MAKE_INSTR(CPUID,  2, 0x0f, 0xa2);
+MAKE_INSTR(RDMSR,  2, 0x0f, 0x32);
+MAKE_INSTR(WRMSR,  2, 0x0f, 0x30);
+MAKE_INSTR(RDTSC,  2, 0x0f, 0x31);
+MAKE_INSTR(RDTSCP, 3, 0x0f, 0x01, 0xf9);
+MAKE_INSTR(CLI,    1, 0xfa);
+MAKE_INSTR(STI,    1, 0xfb);
+MAKE_INSTR(RDPMC,  2, 0x0f, 0x33);
+MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
+MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
+MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
+MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
+MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
+MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
+MAKE_INSTR(PAUSE,  2, 0xf3, 0x90);
+MAKE_INSTR(SKINIT, 3, 0x0f, 0x01, 0xde);
+MAKE_INSTR(MOV2CR, 3, 0x0f, 0x22, 0x00);
+MAKE_INSTR(MOVCR2, 3, 0x0f, 0x20, 0x00);
+MAKE_INSTR(MOV2DR, 3, 0x0f, 0x23, 0x00);
+MAKE_INSTR(MOVDR2, 3, 0x0f, 0x21, 0x00);
+MAKE_INSTR(PUSHF,  1, 0x9c);
+MAKE_INSTR(POPF,   1, 0x9d);
+MAKE_INSTR(RSM,    2, 0x0f, 0xaa);
+MAKE_INSTR(INVLPG, 3, 0x0f, 0x01, 0x00);
+MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
+MAKE_INSTR(HLT,    1, 0xf4);
+MAKE_INSTR(CLTS,   2, 0x0f, 0x06);
+MAKE_INSTR(LMSW,   3, 0x0f, 0x01, 0x00);
+MAKE_INSTR(SMSW,   3, 0x0f, 0x01, 0x00);
+
+static const u8 *opc_bytes[INSTR_MAX_COUNT] = 
+{
+    [INSTR_INVD]   = OPCODE_INVD,
+    [INSTR_CPUID]  = OPCODE_CPUID,
+    [INSTR_RDMSR]  = OPCODE_RDMSR,
+    [INSTR_WRMSR]  = OPCODE_WRMSR,
+    [INSTR_RDTSC]  = OPCODE_RDTSC,
+    [INSTR_RDTSCP] = OPCODE_RDTSCP,
+    [INSTR_CLI]    = OPCODE_CLI,
+    [INSTR_STI]    = OPCODE_STI,
+    [INSTR_RDPMC]  = OPCODE_RDPMC,
+    [INSTR_CLGI]   = OPCODE_CLGI,
+    [INSTR_STGI]   = OPCODE_STGI,
+    [INSTR_VMRUN]  = OPCODE_VMRUN,
+    [INSTR_VMLOAD] = OPCODE_VMLOAD,
+    [INSTR_VMSAVE] = OPCODE_VMSAVE,
+    [INSTR_VMCALL] = OPCODE_VMCALL,
+    [INSTR_PAUSE]  = OPCODE_PAUSE,
+    [INSTR_SKINIT] = OPCODE_SKINIT,
+    [INSTR_MOV2CR] = OPCODE_MOV2CR,
+    [INSTR_MOVCR2] = OPCODE_MOVCR2,
+    [INSTR_MOV2DR] = OPCODE_MOV2DR,
+    [INSTR_MOVDR2] = OPCODE_MOVDR2,
+    [INSTR_PUSHF]  = OPCODE_PUSHF,
+    [INSTR_POPF]   = OPCODE_POPF,
+    [INSTR_RSM]    = OPCODE_RSM,
+    [INSTR_INVLPG] = OPCODE_INVLPG,
+    [INSTR_INVLPGA]= OPCODE_INVLPGA,
+    [INSTR_CLTS]   = OPCODE_CLTS,
+    [INSTR_HLT]    = OPCODE_HLT,
+    [INSTR_LMSW]   = OPCODE_LMSW,
+    [INSTR_SMSW]   = OPCODE_SMSW
+};
+
+/* 
+ * Intel has a vmcs entry to give the instruction length. AMD doesn't.  So we
+ * have to do a little bit of work to find out... 
+ *
+ * The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer
+ * to enough bytes to satisfy the instruction including prefix bytes.
+ */
+unsigned int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
+        enum instruction_index *list, unsigned int list_count, 
+        u8 *guest_eip_buf, enum instruction_index *match)
+{
+    unsigned int inst_len = 0; 
+    unsigned int i;
+    unsigned int j;
+    int found = 0;
+    enum instruction_index instr = 0;
+    u8 buffer[MAX_INST_LEN];
+    u8 *buf;
+    const u8 *opcode = NULL;
+
+    if (guest_eip_buf)
+    {
+        buf = guest_eip_buf;
+    }
+    else
+    {
+        inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), MAX_INST_LEN);
+        buf = buffer;
+    }
+
+    for (j = 0; j < list_count; j++)
+    {
+        instr = list[j];
+        opcode = opc_bytes[instr];
+        ASSERT(opcode);
+
+        while (inst_len < MAX_INST_LEN && 
+                is_prefix(buf[inst_len]) && 
+                !is_prefix(opcode[1]))
+            inst_len++;
+
+        ASSERT(opcode[0] <= 15);    /* Make sure the table is correct. */
+        found = 1;
+
+        for (i = 0; i < opcode[0]; i++)
+        {
+            /* If the last byte is zero, we just accept it without checking */
+            if (i == opcode[0]-1 && opcode[i+1] == 0)
+                break;
+
+            if (buf[inst_len+i] != opcode[i+1])
+            {
+                found = 0;
+                break;
+            }
+        }
+
+        if (found)
+            break;
+    }
+
+    /* It's a match */
+    if (found)
+    {
+        inst_len += opcode[0];
+
+        ASSERT(inst_len <= MAX_INST_LEN);
+
+        if (match)
+            *match = instr;
+
+        return inst_len;
+    }
+
+    printk("%s: Mismatch between expected and actual instruction bytes: "
+            "eip = %lx\n",  __func__, (unsigned long)vmcb->rip);
+    return 0;
+}
+#endif /* CONFIG_SVM */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/svm/instrlen.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/svm/instrlen.c   Tue Jan 31 10:49:51 2006
@@ -0,0 +1,435 @@
+/*
+ * instrlen.c - calculates the instruction length for all operating modes
+ * 
+ * Travis Betak, travis.betak@xxxxxxx
+ * Copyright (c) 2005 AMD
+ *
+ * Essentially a very, very stripped version of Keir Fraser's work in 
+ * x86_emulate.c.  Used primarily for MMIO.
+ */
+
+/*
+ * TODO: the way in which we use svm_instrlen is very inefficient as is now 
+ * stands.  it will be worth while to return the actual instruction buffer
+ * along with the instruction length since we are getting the instruction 
length
+ * so we know how much of the buffer we need to fetch.
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <asm/regs.h>
+#define DPRINTF DPRINTK
+#include <asm-x86/x86_emulate.h>
+
+#ifdef CONFIG_SVM
+/*
+ * Opcode effective-address decode tables.
+ * Note that we only emulate instructions that have at least one memory
+ * operand (excluding implicit stack references). We assume that stack
+ * references and instruction fetches will never occur in special memory
+ * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
+ * not be handled.
+ */
+
+/* Operand sizes: 8-bit operands or specified/overridden size. */
+#define BYTE_OP      (1<<0)  /* 8-bit operands. */
+/* Destination operand type. */
+#define IMPLICIT_OPS (1<<1)  /* Implicit in opcode. No generic decode. */
+#define DST_REG      (2<<1)  /* Register operand. */
+#define DST_MEM      (3<<1)  /* Memory operand. */
+#define DST_MASK     (3<<1)
+/* Source operand type. */
+#define SRC_NONE     (0<<3)  /* No source operand. */
+#define SRC_IMPLICIT (0<<3)  /* Source operand is implicit in the opcode. */
+#define SRC_REG      (1<<3)  /* Register operand. */
+#define SRC_MEM      (2<<3)  /* Memory operand. */
+#define SRC_IMM      (3<<3)  /* Immediate operand. */
+#define SRC_IMMBYTE  (4<<3)  /* 8-bit sign-extended immediate operand. */
+#define SRC_MASK     (7<<3)
+/* Generic MODRM decode. */
+#define MODRM       (1<<6)
+/* Destination is only written; never read. */
+#define Mov         (1<<7)
+
+static u8 opcode_table[256] = {
+    /* 0x00 - 0x07 */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x08 - 0x0F */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x10 - 0x17 */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x18 - 0x1F */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x20 - 0x27 */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x28 - 0x2F */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x30 - 0x37 */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x38 - 0x3F */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, 0,
+    /* 0x40 - 0x4F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x50 - 0x5F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x60 - 0x6F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x70 - 0x7F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x80 - 0x87 */
+    BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMM | MODRM,
+    BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMMBYTE | MODRM,
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    /* 0x88 - 0x8F */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM,
+    BYTE_OP | DST_REG | SRC_MEM | MODRM, DST_REG | SRC_MEM | MODRM,
+    0, 0, 0, DST_MEM | SRC_NONE | MODRM | Mov,
+    /* 0x90 - 0x9F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xA0 - 0xA7 */
+    BYTE_OP | DST_REG | SRC_MEM | Mov, DST_REG | SRC_MEM | Mov,
+    BYTE_OP | DST_MEM | SRC_REG | Mov, DST_MEM | SRC_REG | Mov,
+    BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov,
+    BYTE_OP | IMPLICIT_OPS, IMPLICIT_OPS,
+    /* 0xA8 - 0xAF */
+    0, 0, BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov,
+    BYTE_OP | IMPLICIT_OPS | Mov, IMPLICIT_OPS | Mov,
+    BYTE_OP | IMPLICIT_OPS, IMPLICIT_OPS,
+    /* 0xB0 - 0xBF */
+    SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, 
+    SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE, SRC_IMMBYTE,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xC0 - 0xC7 */
+    BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMMBYTE | MODRM, 0, 0,
+    0, 0, BYTE_OP | DST_MEM | SRC_IMM | MODRM, DST_MEM | SRC_IMM | MODRM,
+    /* 0xC8 - 0xCF */
+    0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xD0 - 0xD7 */
+    BYTE_OP | DST_MEM | SRC_IMPLICIT | MODRM, DST_MEM | SRC_IMPLICIT | MODRM,
+    BYTE_OP | DST_MEM | SRC_IMPLICIT | MODRM, DST_MEM | SRC_IMPLICIT | MODRM,
+    0, 0, 0, 0,
+    /* 0xD8 - 0xDF */
+    0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xE0 - 0xEF */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xF0 - 0xF7 */
+    0, 0, 0, 0,
+    0, 0, BYTE_OP | DST_MEM | SRC_NONE | MODRM, DST_MEM | SRC_NONE | MODRM,
+    /* 0xF8 - 0xFF */
+    0, 0, 0, 0,
+    0, 0, BYTE_OP | DST_MEM | SRC_NONE | MODRM, DST_MEM | SRC_NONE | MODRM
+};
+
+static u8 twobyte_table[256] = {
+    /* 0x00 - 0x0F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0,
+    /* 0x10 - 0x1F */
+    0, 0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x20 - 0x2F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x30 - 0x3F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x40 - 0x47 */
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    /* 0x48 - 0x4F */
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    DST_REG | SRC_MEM | MODRM | Mov, DST_REG | SRC_MEM | MODRM | Mov,
+    /* 0x50 - 0x5F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x60 - 0x6F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x70 - 0x7F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x80 - 0x8F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0x90 - 0x9F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xA0 - 0xA7 */
+    0, 0, 0, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0,
+    /* 0xA8 - 0xAF */
+    0, 0, 0, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0,
+    /* 0xB0 - 0xB7 */
+    BYTE_OP | DST_MEM | SRC_REG | MODRM, DST_MEM | SRC_REG | MODRM, 0,
+    DST_MEM | SRC_REG | MODRM,
+    0, 0,
+    DST_REG | SRC_MEM | MODRM,
+    DST_REG | SRC_REG | MODRM,
+
+    /* 0xB8 - 0xBF */
+    0, 0, DST_MEM | SRC_IMMBYTE | MODRM, DST_MEM | SRC_REG | MODRM, 0, 0, 0, 0,
+    /* 0xC0 - 0xCF */
+    0, 0, 0, 0, 0, 0, 0, IMPLICIT_OPS | MODRM, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xD0 - 0xDF */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xE0 - 0xEF */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0xF0 - 0xFF */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* read from guest memory */
+extern int inst_copy_from_guest(unsigned char *buf, unsigned long eip,
+        int length);
+extern void svm_dump_inst(unsigned long eip);
+
+/* 
+ * insn_fetch - fetch the next 1 to 4 bytes from instruction stream 
+ * 
+ * @_type:   u8, u16, u32, s8, s16, or s32
+ * @_size:   1, 2, or 4 bytes
+ * @_eip:    address to fetch from guest memory
+ * @_length: updated! increments the current instruction length counter by 
_size
+ *
+ * INTERNAL this is used internally by svm_instrlen to fetch the next byte,
+ * word, or dword from guest memory at location _eip.  we currently use a local
+ * unsigned long as the storage buffer since the most bytes we're gonna get
+ * is limited to 4.
+ */
+#define insn_fetch(_type, _size, _eip, _length) \
+({  unsigned long _x; \
+        if ((rc = inst_copy_from_guest((unsigned char *)(&(_x)), \
+                (unsigned long)(_eip), _size)) \
+                    != _size) \
+        goto done; \
+    (_eip) += (_size); \
+    (_length) += (_size); \
+    (_type)_x; \
+})
+
+/**
+ * get_instruction_length - returns the current instructions length
+ *
+ * @regs: guest register state
+ * @cr2:  target address
+ * @ops:  guest memory operations
+ * @mode: guest operating mode
+ *
+ * EXTERNAL this routine calculates the length of the current instruction
+ * pointed to by eip.  The guest state is _not_ changed by this routine.
+ */
+unsigned long svm_instrlen(struct cpu_user_regs *regs, int mode)
+{
+    u8 b, d, twobyte = 0;
+    u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
+    unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
+    unsigned int i;
+    int rc = 0;
+    u32 length = 0;
+    u8 tmp;
+
+    /* Copy the registers so we don't alter the guest's present state */
+    volatile struct cpu_user_regs _regs = *regs;
+
+        /* Check for Real Mode */
+    if (mode == 2)
+        _regs.eip += (_regs.cs << 4); 
+
+    /* Legacy prefix check */
+    for (i = 0; i < 8; i++) {
+        switch (b = insn_fetch(u8, 1, _regs.eip, length)) {
+        case 0x66:  /* operand-size override */
+            op_bytes ^= 6;  /* switch between 2/4 bytes */
+            break;
+        case 0x67:  /* address-size override */
+            ad_bytes ^= (mode == 8) ? 12 : 6; /* 2/4/8 bytes */
+            break;
+        case 0x2e:  /* CS override */
+        case 0x3e:  /* DS override */
+        case 0x26:  /* ES override */
+        case 0x64:  /* FS override */
+        case 0x65:  /* GS override */
+        case 0x36:  /* SS override */
+        case 0xf0:  /* LOCK */
+        case 0xf3:  /* REP/REPE/REPZ */
+        case 0xf2:  /* REPNE/REPNZ */
+            break;
+        default:
+            goto done_prefixes;
+        }
+    }
+
+done_prefixes:
+
+    /* REX prefix check */
+    if ((mode == 8) && ((b & 0xf0) == 0x40))
+    {
+        if (b & 8)
+            op_bytes = 8;   /* REX.W */
+        modrm_reg = (b & 4) << 1;   /* REX.R */
+        /* REX.B and REX.X do not need to be decoded. */
+        b = insn_fetch(u8, 1, _regs.eip, length);
+    }
+
+    /* Opcode byte(s). */
+    d = opcode_table[b];
+    if (d == 0) 
+    {
+        /* Two-byte opcode? */
+        if (b == 0x0f) {
+            twobyte = 1;
+            b = insn_fetch(u8, 1, _regs.eip, length);
+            d = twobyte_table[b];
+        }
+
+        /* Unrecognised? */
+        if (d == 0)
+            goto cannot_emulate;
+    }
+
+    /* MODRM and SIB bytes. */
+    if (d & MODRM) 
+    {
+        modrm = insn_fetch(u8, 1, _regs.eip, length);
+        modrm_mod |= (modrm & 0xc0) >> 6;
+        modrm_reg |= (modrm & 0x38) >> 3;
+        modrm_rm |= (modrm & 0x07);
+        switch (modrm_mod) 
+        {
+        case 0:
+            if ((modrm_rm == 4) &&
+                (((insn_fetch(u8, 1, _regs.eip,
+                      length)) & 7) == 5)) 
+            {
+                length += 4;
+                _regs.eip += 4; /* skip SIB.base disp32 */
+            } 
+            else if (modrm_rm == 5) 
+            {
+                length += 4;
+                _regs.eip += 4; /* skip disp32 */
+            }
+            break;
+        case 1:
+            if (modrm_rm == 4) 
+            {
+                insn_fetch(u8, 1, _regs.eip, length);
+            }
+            length += 1;
+            _regs.eip += 1; /* skip disp8 */
+            break;
+        case 2:
+            if (modrm_rm == 4)
+            {
+                insn_fetch(u8, 1, _regs.eip, length);
+            }
+            length += 4;
+            _regs.eip += 4; /* skip disp32 */
+            break;
+        case 3:
+            DPRINTF("Cannot parse ModRM.mod == 3.\n");
+            goto cannot_emulate;
+        }
+    }
+
+    /* Decode and fetch the destination operand: register or memory. */
+    switch (d & DST_MASK) 
+    {
+    case IMPLICIT_OPS:
+        /* Special instructions do their own operand decoding. */
+        goto done;
+    }
+
+    /* Decode and fetch the source operand: register, memory or immediate */
+    switch (d & SRC_MASK) 
+    {
+    case SRC_IMM:
+        tmp = (d & BYTE_OP) ? 1 : op_bytes;
+        if (tmp == 8)
+            tmp = 4;
+        /* NB. Immediates are sign-extended as necessary. */
+        switch (tmp) {
+        case 1:
+            insn_fetch(s8, 1, _regs.eip, length);
+            break;
+        case 2:
+            insn_fetch(s16, 2, _regs.eip, length);
+            break;
+        case 4:
+            insn_fetch(s32, 4, _regs.eip, length);
+            break;
+        }
+        break;
+    case SRC_IMMBYTE:
+        insn_fetch(s8, 1, _regs.eip, length);
+        break;
+    }
+
+    if (twobyte)
+        goto done;
+
+    switch (b) 
+    {
+    case 0xa0:
+    case 0xa1:      /* mov */
+        length += ad_bytes;
+        _regs.eip += ad_bytes;  /* skip src displacement */
+        break;
+    case 0xa2:
+    case 0xa3:      /* mov */
+        length += ad_bytes;
+        _regs.eip += ad_bytes;  /* skip dst displacement */
+        break;
+    case 0xf6:
+    case 0xf7:      /* Grp3 */
+        switch (modrm_reg) 
+        {
+        case 0:
+        case 1: /* test */
+            /* 
+             * Special case in Grp3: test has an 
+             * immediate source operand. 
+             */
+            tmp = (d & BYTE_OP) ? 1 : op_bytes;
+            if (tmp == 8)
+                tmp = 4;
+            switch (tmp) 
+            {
+            case 1:
+                insn_fetch(s8, 1, _regs.eip, length);
+                break;
+            case 2:
+                insn_fetch(s16, 2, _regs.eip, length);
+                break;
+            case 4:
+                insn_fetch(s32, 4, _regs.eip, length);
+                break;
+            }
+            goto done;
+        }
+        break;
+    }
+
+done:
+    return length;
+
+cannot_emulate:
+    DPRINTF("Cannot emulate %02x at address %x (eip %x, mode %d)\n",
+        b, _regs.eip, regs->eip, mode);
+        svm_dump_inst(_regs.eip);
+    return (unsigned long)-1;
+}
+#endif /* CONFIG_SVM */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/svm/intr.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/svm/intr.c       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,219 @@
+/*
+ * intr.c: Interrupt handling for SVM.
+ * Copyright (c) 2005, AMD Inc. 
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/trace.h>
+#include <xen/errno.h>
+#include <xen/shadow.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/svm/svm.h>
+#include <asm/hvm/svm/intr.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <public/hvm/ioreq.h>
+#include <xen/domain_page.h>
+
+#ifdef CONFIG_SVM
+
+/*
+ * Most of this code is copied from vmx_io.c and modified 
+ * to be suitable for SVM.
+ */
+#define BSP_CPU(v)    (!(v->vcpu_id))
+
+static inline int svm_inject_extint(struct vcpu *v, int trap, int error_code)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    vintr_t intr;
+
+    ASSERT(vmcb);
+
+    /* Save all fields */
+    intr = vmcb->vintr;
+    /* Update only relevant fields */    
+    intr.fields.irq = 1;
+    intr.fields.intr_masking = 1;
+    intr.fields.vector = trap;
+    intr.fields.prio = 0xF;
+    vmcb->vintr = intr;
+//  printf( "IRQ = %d\n", trap );
+    return 0;
+}
+
+void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    u64    drift;
+
+    if ( vpit->first_injected )
+        drift = vpit->period_cycles * vpit->pending_intr_nr;
+    else
+        drift = 0;
+    vmcb->tsc_offset = ( 0 - drift );
+}
+
+static inline void
+interrupt_post_injection(struct vcpu * v, int vector, int type)
+{
+    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+
+    switch(type)
+    {
+    case VLAPIC_DELIV_MODE_EXT:
+    case VLAPIC_DELIV_MODE_FIXED:
+    case VLAPIC_DELIV_MODE_LPRI:
+        if ( is_pit_irq(v, vector, type) ) {
+            if ( !vpit->first_injected ) {
+                vpit->first_injected = 1;
+                vpit->pending_intr_nr = 0;
+            }
+            else if (vpit->pending_intr_nr) {
+                --vpit->pending_intr_nr;
+            }
+            vpit->inject_point = NOW();
+            svm_set_tsc_shift (v, vpit);
+        }
+        break;
+
+    default:
+        printk("Not support interrupt type: %d\n", type);
+        break;
+    }
+}
+
+asmlinkage void svm_intr_assist(void) 
+{
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    struct hvm_domain *plat=&v->domain->arch.hvm_domain; 
+    struct hvm_virpit *vpit = &plat->vpit;
+    struct hvm_virpic *pic= &plat->vpic;
+    int intr_type = VLAPIC_DELIV_MODE_EXT;
+    int intr_vector = -1;
+    int re_injecting = 0;
+    unsigned long rflags;
+
+    ASSERT(vmcb);
+
+    /* Check if an Injection is active */
+    if (v->arch.hvm_svm.injecting_event) {
+       /* Previous Interrupt delivery caused this Intercept? */
+       if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) 
{
+           v->arch.hvm_svm.saved_irq_vector = vmcb->exitintinfo.fields.vector;
+//           printk("Injecting PF#: saving IRQ from ExitInfo\n");
+           vmcb->exitintinfo.bytes = 0;
+
+           /* bail out, we won't be injecting an interrupt this time */
+           return;
+       }
+    }
+
+    /* Guest's interrputs masked? */
+    rflags = vmcb->rflags;
+    if (irq_masked(rflags)) {
+        HVM_DBG_LOG(DBG_LEVEL_1, "Guest IRQs masked: rflags: %lx", rflags);
+       /* bail out, we won't be injecting an interrupt this time */
+       return;
+    }
+
+    /* Interrupt delivery caused an Intercept? */
+    if (vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0)) {
+//        printk("Re-injecting IRQ from ExitInfo\n");
+        intr_vector = vmcb->exitintinfo.fields.vector;
+        vmcb->exitintinfo.bytes = 0;
+        re_injecting = 1;
+    }
+    /* Previous interrupt still pending? */
+    else if (vmcb->vintr.fields.irq) {
+//        printk("Re-injecting IRQ from Vintr\n");
+        intr_vector = vmcb->vintr.fields.vector;
+        vmcb->vintr.bytes = 0;
+        re_injecting = 1;
+    }
+    /* Pending IRQ saved at last VMExit? */
+    else if ( v->arch.hvm_svm.saved_irq_vector >= 0) {
+//        printk("Re-Injecting saved IRQ\n");
+        intr_vector = v->arch.hvm_svm.saved_irq_vector;
+        v->arch.hvm_svm.saved_irq_vector = -1;
+        re_injecting = 1;
+    }
+    /* Now let's check for newer interrrupts  */
+    else {
+        /* Interrput pending at the PIC? */
+        hvm_pic_assist(v);
+
+        if (vpit->pending_intr_nr) {
+            pic_set_irq(pic, 0, 0);
+            pic_set_irq(pic, 0, 1);
+        }
+
+        if (plat->interrupt_request) {
+            intr_vector = cpu_get_interrupt(v, &intr_type);
+            plat->interrupt_request = 0;
+        }
+    }
+
+    /* have we got an interrupt to inject? */
+    if (intr_vector >= 0) {
+        switch (intr_type) {
+        case VLAPIC_DELIV_MODE_EXT:
+        case VLAPIC_DELIV_MODE_FIXED:
+        case VLAPIC_DELIV_MODE_LPRI:
+            /* Re-injecting a PIT interruptt? */
+            if (re_injecting && 
+                is_pit_irq(v, intr_vector, intr_type)) {
+                    ++vpit->pending_intr_nr;
+            }
+            /* let's inject this interrupt */
+            TRACE_3D(TRC_VMX_INT, v->domain->domain_id, intr_vector, 0);
+            svm_inject_extint(v, intr_vector, VMX_INVALID_ERROR_CODE);
+            interrupt_post_injection(v, intr_vector, intr_type);
+            break;
+        case VLAPIC_DELIV_MODE_SMI:
+        case VLAPIC_DELIV_MODE_NMI:
+        case VLAPIC_DELIV_MODE_INIT:
+        case VLAPIC_DELIV_MODE_STARTUP:
+        default:
+            printk("Unsupported interrupt type: %d\n", intr_type);
+            BUG();
+            break;
+        }
+    }
+}
+
+#endif /* CONFIG_SVM */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/svm/svm.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/svm/svm.c        Tue Jan 31 10:49:51 2006
@@ -0,0 +1,2707 @@
+/*
+ * svm.c: handling SVM architecture-related VM exits
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <xen/softirq.h>
+#include <xen/hypercall.h>
+#include <asm/current.h>
+#include <asm/io.h>
+#include <asm/shadow.h>
+#include <asm/regs.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/spinlock.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/svm/svm.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/hvm/svm/emulate.h>
+#include <asm/hvm/svm/vmmcall.h>
+#include <asm/hvm/svm/intr.h>
+#include <asm/shadow.h>
+#if CONFIG_PAGING_LEVELS >= 3
+#include <asm/shadow_64.h>
+#endif
+#include <public/sched.h>
+#include <public/hvm/ioreq.h>
+
+#ifdef CONFIG_SVM
+
+#define SVM_EXTRA_DEBUG
+
+#ifdef TRACE_BUFFER
+static unsigned long trace_values[NR_CPUS][4];
+#define TRACE_VMEXIT(index,value) trace_values[current->processor][index]=value
+#else
+#define TRACE_VMEXIT(index,value) ((void)0)
+#endif
+
+/* Useful define */
+#define MAX_INST_SIZE  15
+
+/* 
+ * External functions, etc. We should move these to some suitable header 
file(s) */
+
+extern long evtchn_send(int lport);
+extern void do_nmi(struct cpu_user_regs *, unsigned long);
+extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
+                                int inst_len);
+extern asmlinkage void do_IRQ(struct cpu_user_regs *);
+extern void smp_apic_timer_interrupt(struct cpu_user_regs *);
+extern void timer_interrupt(int, void *, struct cpu_user_regs *);
+extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
+       unsigned long count, int size, long value, int dir, int pvalid);
+extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
+extern void svm_dump_inst(unsigned long eip);
+extern int svm_dbg_on;
+void svm_manual_event_injection32(struct vcpu *v, struct cpu_user_regs *regs, 
+        int vector, int has_code);
+void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
+
+static struct asid_pool ASIDpool[NR_CPUS];
+
+/*
+ * Initializes the POOL of ASID used by the guests per core.
+ */
+void asidpool_init( int core )
+{
+    int i;
+    ASIDpool[core].asid_lock = SPIN_LOCK_UNLOCKED;
+    spin_lock(&ASIDpool[core].asid_lock);
+    /* Host ASID is always in use */
+    ASIDpool[core].asid[INITIAL_ASID] = ASID_INUSE;
+    for( i=1; i<ASID_MAX; i++ )
+    {
+       ASIDpool[core].asid[i] = ASID_AVAILABLE;
+    }
+    spin_unlock(&ASIDpool[core].asid_lock);
+}
+
+
+/* internal function to get the next available ASID */
+static int asidpool_fetch_next( struct vmcb_struct *vmcb, int core )
+{
+    int i;   
+    for( i = 1; i < ASID_MAX; i++ )
+    {
+        if( ASIDpool[core].asid[i] == ASID_AVAILABLE )
+        {
+            vmcb->guest_asid = i;
+            ASIDpool[core].asid[i] = ASID_INUSE;
+            return i;
+        }
+    }
+    return -1;
+}
+
+
+/*
+ * This functions assigns on the passed VMCB, the next
+ * available ASID number. If none are available, the
+ * TLB flush flag is set, and all retireds ASID
+ * are made available. 
+ *
+ *  Returns: 1 -- sucess;
+ *           0 -- failure -- no more ASID numbers 
+ *                           available.
+ */
+int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
+                             int oldcore, int newcore )
+{
+    int i; 
+    int res = 1;
+    static unsigned long cnt=0;
+
+    spin_lock(&ASIDpool[oldcore].asid_lock);
+    if( retire_current && vmcb->guest_asid ) {
+       ASIDpool[oldcore].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = 
ASID_RETIRED;
+    }
+    spin_unlock(&ASIDpool[oldcore].asid_lock);
+    spin_lock(&ASIDpool[newcore].asid_lock);
+    if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
+        if (svm_dbg_on)
+            printk( "SVM: tlb(%ld)\n", cnt++ );
+        /* FLUSH the TLB and all retired slots are made available */ 
+        vmcb->tlb_control = 1;
+        for( i = 1; i < ASID_MAX; i++ ) {
+            if( ASIDpool[newcore].asid[i] == ASID_RETIRED ) {
+                ASIDpool[newcore].asid[i] = ASID_AVAILABLE;
+            }
+        }
+        /* Get the First slot available */ 
+        res = asidpool_fetch_next( vmcb, newcore ) > 0;
+    }
+    spin_unlock(&ASIDpool[newcore].asid_lock);
+    return res;
+}
+
+void asidpool_retire( struct vmcb_struct *vmcb, int core )
+{
+   spin_lock(&ASIDpool[core].asid_lock);
+   if( vmcb->guest_asid ) {
+       ASIDpool[core].asid[ vmcb->guest_asid & (ASID_MAX-1) ] = ASID_RETIRED;
+   }
+   spin_unlock(&ASIDpool[core].asid_lock);
+}
+
+static inline int svm_inject_exception(struct vcpu *v, int trap, int 
error_code)
+{
+    void save_svm_cpu_user_regs(struct vcpu *, struct cpu_user_regs *);
+    struct cpu_user_regs regs;
+
+    printf("svm_inject_exception(trap %d, error_code 0x%x)\n",
+           trap, error_code);
+    save_svm_cpu_user_regs(v, &regs);
+    __hvm_bug(&regs);
+}
+
+void stop_svm(void)
+{
+    u32 eax, edx;    
+
+    /* We turn off the EFER_SVME bit. */
+    rdmsr(MSR_EFER, eax, edx);
+    eax &= ~EFER_SVME;
+    wrmsr(MSR_EFER, eax, edx);
+
+    printk("AMD SVM Extension is disabled.\n");
+}
+
+int svm_initialize_guest_resources(struct vcpu *v)
+{
+    svm_final_setup_guest(v);
+    return 1;
+}
+
+int svm_relinquish_guest_resources(struct vcpu *v)
+{
+    svm_relinquish_resources(v);
+    return 1;
+}
+
+void svm_store_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+#if defined (__x86_64__)
+    regs->rip    = vmcb->rip;
+    regs->rsp    = vmcb->rsp;
+    regs->rflags = vmcb->rflags;
+    regs->cs     = vmcb->cs.sel;
+    regs->ds     = vmcb->ds.sel;
+    regs->es     = vmcb->es.sel;
+    regs->ss     = vmcb->ss.sel;
+    regs->gs     = vmcb->gs.sel;
+    regs->fs     = vmcb->fs.sel;
+#elif defined (__i386__)
+    regs->eip    = vmcb->rip;
+    regs->esp    = vmcb->rsp;
+    regs->eflags = vmcb->rflags;
+    regs->cs     = vmcb->cs.sel;
+    regs->ds     = vmcb->ds.sel;
+    regs->es     = vmcb->es.sel;
+    regs->ss     = vmcb->ss.sel;
+    regs->gs     = vmcb->gs.sel;
+    regs->fs     = vmcb->fs.sel;
+#endif
+}
+
+void svm_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    svm_load_cpu_user_regs(v, regs);
+}
+
+#ifdef __x86_64__
+static struct svm_msr_state percpu_msr[NR_CPUS];
+
+static u32 msr_data_index[VMX_MSR_COUNT] =
+{
+    MSR_LSTAR, MSR_STAR, MSR_CSTAR,
+    MSR_SYSCALL_MASK, MSR_EFER,
+};
+
+void svm_save_segments(struct vcpu *v)
+{
+    rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_svm.msr_content.shadow_gs);
+}
+
+/*
+ * To avoid MSR save/restore at every VM exit/entry time, we restore
+ * the x86_64 specific MSRs at domain switch time. Since those MSRs are
+ * are not modified once set for generic domains, we don't save them,
+ * but simply reset them to the values set at percpu_traps_init().
+ */
+void svm_load_msrs(struct vcpu *n)
+{
+    struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
+    int i;
+
+    if ( !hvm_switch_on )
+        return;
+
+    while ( host_state->flags )
+    {
+        i = find_first_set_bit(host_state->flags);
+        wrmsrl(msr_data_index[i], host_state->msr_items[i]);
+        clear_bit(i, &host_state->flags);
+    }
+}
+
+static void svm_save_init_msrs(void)
+{
+    struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
+    int i;
+
+    for ( i = 0; i < SVM_MSR_COUNT; i++ )
+        rdmsrl(msr_data_index[i], host_state->msr_items[i]);
+}
+
+#define CASE_READ_MSR(address)                               \
+    case MSR_ ## address:                                    \
+    msr_content = msr->msr_items[SVM_INDEX_MSR_ ## address]; \
+    break
+
+#define CASE_WRITE_MSR(address)                              \
+    case MSR_ ## address:                                    \
+    msr->msr_items[SVM_INDEX_MSR_ ## address] = msr_content; \
+    if (!test_bit(SVM_INDEX_MSR_ ## address, &msr->flags))   \
+    {                                                        \
+        set_bit(SVM_INDEX_MSR_ ## address, &msr->flags);     \
+    }                                                        \
+    break
+
+
+#define IS_CANO_ADDRESS(add) 1
+
+static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
+    struct vcpu *vc = current;
+    struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
+    struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
+
+    switch (regs->ecx)
+    {
+    case MSR_EFER:
+        msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
+        HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", 
+                (unsigned long long)msr_content);
+
+        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
+            msr_content |= 1 << _EFER_LME;
+
+        if (SVM_LONG_GUEST(vc))
+            msr_content |= 1 << _EFER_LMA;
+
+        break;
+
+    case MSR_FS_BASE:
+        if (!(SVM_LONG_GUEST(vc)))
+            /* XXX should it be GP fault */
+            domain_crash_synchronous();
+        
+        msr_content = vmcb->fs.base;
+        break;
+
+    case MSR_GS_BASE:
+        if (!(SVM_LONG_GUEST(vc)))
+            domain_crash_synchronous();
+
+        msr_content = vmcb->gs.base;
+        break;
+
+    case MSR_SHADOW_GS_BASE:
+        msr_content = msr->shadow_gs;
+        break;
+
+    CASE_READ_MSR(STAR);
+    CASE_READ_MSR(LSTAR);
+    CASE_READ_MSR(CSTAR);
+    CASE_READ_MSR(SYSCALL_MASK);
+    default:
+        return 0;
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", 
+            msr_content);
+
+    regs->eax = msr_content & 0xffffffff;
+    regs->edx = msr_content >> 32;
+    return 1;
+}
+
+static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
+{
+    u64 msr_content = regs->eax | ((u64)regs->edx << 32); 
+    struct vcpu *vc = current;
+    struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
+    struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
+    struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx msr_content %lx\n", 
+                regs->ecx, msr_content);
+
+    switch (regs->ecx)
+    {
+    case MSR_EFER:
+        if ((msr_content & EFER_LME) ^ test_bit(SVM_CPU_STATE_LME_ENABLED,
+                                                &vc->arch.hvm_svm.cpu_state))
+        {
+            if (test_bit(SVM_CPU_STATE_PG_ENABLED, &vc->arch.hvm_svm.cpu_state)
+                    || !test_bit(SVM_CPU_STATE_PAE_ENABLED,
+                                 &vc->arch.hvm_svm.cpu_state))
+            {
+                svm_inject_exception(vc, TRAP_gp_fault, 0);
+            }
+        }
+
+        if (msr_content & EFER_LME)
+            set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
+
+        /* No update for LME/LMA since it have no effect */
+        msr->msr_items[SVM_INDEX_MSR_EFER] = msr_content;
+        if (msr_content & ~(EFER_LME | EFER_LMA))
+        {
+            msr->msr_items[SVM_INDEX_MSR_EFER] = msr_content;
+            if (!test_bit(SVM_INDEX_MSR_EFER, &msr->flags))
+            { 
+                rdmsrl(MSR_EFER, host_state->msr_items[SVM_INDEX_MSR_EFER]);
+                set_bit(SVM_INDEX_MSR_EFER, &host_state->flags);
+                set_bit(SVM_INDEX_MSR_EFER, &msr->flags);  
+                wrmsrl(MSR_EFER, msr_content);
+            }
+        }
+        break;
+
+    case MSR_FS_BASE:
+    case MSR_GS_BASE:
+        if (!(SVM_LONG_GUEST(vc)))
+            domain_crash_synchronous();
+
+        if (!IS_CANO_ADDRESS(msr_content))
+        {
+            HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
+            svm_inject_exception(vc, TRAP_gp_fault, 0);
+        }
+
+        if (regs->ecx == MSR_FS_BASE)
+           vmcb->fs.base = msr_content;
+        else 
+           vmcb->gs.base = msr_content;
+        break;
+
+    case MSR_SHADOW_GS_BASE:
+        if (!(SVM_LONG_GUEST(vc)))
+            domain_crash_synchronous();
+
+        vc->arch.hvm_svm.msr_content.shadow_gs = msr_content;
+        wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
+        break;
+
+    CASE_WRITE_MSR(STAR);
+    CASE_WRITE_MSR(LSTAR);
+    CASE_WRITE_MSR(CSTAR);
+    CASE_WRITE_MSR(SYSCALL_MASK);
+    default:
+        return 0;
+    }
+    return 1;
+}
+
+void
+svm_restore_msrs(struct vcpu *v)
+{
+    int i = 0;
+    struct svm_msr_state *guest_state;
+    struct svm_msr_state *host_state;
+    unsigned long guest_flags;
+
+    guest_state = &v->arch.hvm_svm.msr_content;;
+    host_state = &percpu_msr[smp_processor_id()];
+
+    wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
+    guest_flags = guest_state->flags;
+    if (!guest_flags)
+        return;
+
+    while (guest_flags){
+        i = find_first_set_bit(guest_flags);
+
+        HVM_DBG_LOG(DBG_LEVEL_2,
+                    "restore guest's index %d msr %lx with %lx\n",
+                    i, (unsigned long) msr_data_index[i], (unsigned long) 
guest_state->msr_items[i]);
+        set_bit(i, &host_state->flags);
+        wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
+        clear_bit(i, &guest_flags);
+    }
+}
+#else
+#define        svm_save_init_msrs()    ((void)0)
+
+static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
+{
+    return 0;
+}
+
+static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
+{
+    return 0;
+}
+#endif
+
+void svm_store_cpu_guest_ctrl_regs(struct vcpu *v, unsigned long crs[8])
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    crs[0] = vmcb->cr0;
+    crs[3] = vmcb->cr3;
+    crs[4] = vmcb->cr4;
+}
+
+void svm_modify_guest_state(struct vcpu *v)
+{
+    svm_modify_vmcb(v, &v->arch.guest_context.user_regs);
+}
+
+int svm_realmode(struct vcpu *v)
+{
+    unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+    unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
+
+    return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
+}
+
+int svm_instruction_length(struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
+
+    mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
+    return svm_instrlen(guest_cpu_user_regs(), mode);
+}
+
+int start_svm(void)
+{
+    u32 eax, ecx, edx;
+    
+    /* Xen does not fill x86_capability words except 0. */
+    ecx = cpuid_ecx(0x80000001);
+    boot_cpu_data.x86_capability[5] = ecx;
+    
+    if (!(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)))
+        return 0;
+    
+    rdmsr(MSR_EFER, eax, edx);
+    eax |= EFER_SVME;
+    wrmsr(MSR_EFER, eax, edx);
+    asidpool_init(smp_processor_id());    
+    printk("AMD SVM Extension is enabled for cpu %d.\n", smp_processor_id());
+    
+    svm_save_init_msrs();
+
+    /* Setup HVM interfaces */
+    hvm_funcs.disable = stop_svm;
+
+    hvm_funcs.initialize_guest_resources = svm_initialize_guest_resources;
+    hvm_funcs.relinquish_guest_resources = svm_relinquish_guest_resources;
+
+    hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
+    hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
+
+#ifdef __x86_64__
+    hvm_funcs.save_segments = svm_save_segments;
+    hvm_funcs.load_msrs = svm_load_msrs;
+    hvm_funcs.restore_msrs = svm_restore_msrs;
+#endif
+
+    hvm_funcs.store_cpu_guest_ctrl_regs = svm_store_cpu_guest_ctrl_regs;
+    hvm_funcs.modify_guest_state = svm_modify_guest_state;
+
+    hvm_funcs.realmode = svm_realmode;
+    hvm_funcs.paging_enabled = svm_paging_enabled;
+    hvm_funcs.instruction_length = svm_instruction_length;
+
+    hvm_enabled = 1;    
+
+    return 1;
+}
+
+int svm_dbg_on = 0;
+
+static inline int svm_do_debugout(unsigned long exit_code)
+{
+    int i;
+
+    static unsigned long counter = 0;
+    static unsigned long works[] =
+    {
+        VMEXIT_IOIO,
+        VMEXIT_HLT,
+        VMEXIT_CPUID,
+        VMEXIT_DR0_READ,
+        VMEXIT_DR1_READ,
+        VMEXIT_DR2_READ,
+        VMEXIT_DR3_READ,
+        VMEXIT_DR6_READ,
+        VMEXIT_DR7_READ,
+        VMEXIT_DR0_WRITE,
+        VMEXIT_DR1_WRITE,
+        VMEXIT_DR2_WRITE,
+        VMEXIT_DR3_WRITE,
+        VMEXIT_CR0_READ,
+        VMEXIT_CR0_WRITE,
+        VMEXIT_CR3_READ,
+        VMEXIT_CR4_READ, 
+        VMEXIT_MSR,
+        VMEXIT_CR0_WRITE,
+        VMEXIT_CR3_WRITE,
+        VMEXIT_CR4_WRITE,
+        VMEXIT_EXCEPTION_PF,
+        VMEXIT_INTR,
+        VMEXIT_INVLPG,
+        VMEXIT_EXCEPTION_NM
+    };
+
+
+#if 0
+    if (svm_dbg_on && exit_code != 0x7B)
+        return 1;
+#endif
+
+    counter++;
+
+#if 0
+    if ((exit_code == 0x4E 
+                || exit_code == VMEXIT_CR0_READ 
+                || exit_code == VMEXIT_CR0_WRITE) 
+            && counter < 200000)
+        return 0;
+
+    if ((exit_code == 0x4E) && counter < 500000)
+        return 0;
+#endif
+
+    for (i = 0; i < sizeof(works) / sizeof(works[0]); i++)
+        if (exit_code == works[i])
+            return 0;
+
+    return 1;
+}
+
+
+void save_svm_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *ctxt)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+
+    ctxt->eax = vmcb->rax;
+    ctxt->ss = vmcb->ss.sel;
+    ctxt->esp = vmcb->rsp;
+    ctxt->eflags = vmcb->rflags;
+    ctxt->cs = vmcb->cs.sel;
+    ctxt->eip = vmcb->rip;
+    
+    ctxt->gs = vmcb->gs.sel;
+    ctxt->fs = vmcb->fs.sel;
+    ctxt->es = vmcb->es.sel;
+    ctxt->ds = vmcb->ds.sel;
+}
+
+#if defined (__x86_64__)
+void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *c )
+{
+}
+#elif defined (__i386__)
+void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    regs->eip    = vmcb->rip;
+    regs->esp    = vmcb->rsp;
+    regs->eflags = vmcb->rflags;
+    regs->cs     = vmcb->cs.sel;
+    regs->ds     = vmcb->ds.sel;
+    regs->es     = vmcb->es.sel;
+    regs->ss     = vmcb->ss.sel;
+}
+#endif
+
+/* XXX Use svm_load_cpu_guest_regs instead */
+#if defined (__i386__)
+void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
+{ 
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
+    
+    /* Write the guest register value into VMCB */
+    vmcb->rax      = regs->eax;
+    vmcb->ss.sel   = regs->ss;
+    vmcb->rsp      = regs->esp;   
+    vmcb->rflags   = regs->eflags;
+    vmcb->cs.sel   = regs->cs;
+    vmcb->rip      = regs->eip;
+    if (regs->eflags & EF_TF)
+        *intercepts |= EXCEPTION_BITMAP_DB;
+    else
+        *intercepts &= ~EXCEPTION_BITMAP_DB;
+}
+#else /* (__i386__) */
+void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    u32 *intercepts = &v->arch.hvm_svm.vmcb->exception_intercepts;
+    
+    /* Write the guest register value into VMCB */
+    vmcb->rax      = regs->rax;
+    vmcb->ss.sel   = regs->ss;
+    vmcb->rsp      = regs->rsp;   
+    vmcb->rflags   = regs->rflags;
+    vmcb->cs.sel   = regs->cs;
+    vmcb->rip      = regs->rip;
+    if (regs->rflags & EF_TF)
+        *intercepts |= EXCEPTION_BITMAP_DB;
+    else
+        *intercepts &= ~EXCEPTION_BITMAP_DB;
+}
+#endif /* !(__i386__) */
+
+int svm_paging_enabled(struct vcpu *v)
+{
+    unsigned long cr0;
+
+    cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
+
+    return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
+}
+
+
+/* Make sure that xen intercepts any FP accesses from current */
+void svm_stts(struct vcpu *v) 
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);    
+
+    vmcb->cr0 |= X86_CR0_TS;
+
+    if (!(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS))
+        v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
+}
+
+static void arch_svm_do_launch(struct vcpu *v) 
+{
+    cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
+    int error;
+
+#if 0
+    if (svm_dbg_on)
+        printk("Do launch\n");
+#endif
+    error = construct_vmcb(&v->arch.hvm_svm, regs);
+    if ( error < 0 )
+    {
+        if (v->vcpu_id == 0) {
+            printk("Failed to construct a new VMCB for BSP.\n");
+        } else {
+            printk("Failed to construct a new VMCB for AP %d\n", v->vcpu_id);
+        }
+        domain_crash_synchronous();
+    }
+
+    svm_do_launch(v);
+#if 0
+    if (svm_dbg_on)
+        svm_dump_host_regs(__func__);
+#endif
+    reset_stack_and_jump(svm_asm_do_launch);
+}
+
+void svm_final_setup_guest(struct vcpu *v)
+{
+    v->arch.schedule_tail = arch_svm_do_launch;
+
+    if (v == v->domain->vcpu[0]) 
+    {
+       struct domain *d = v->domain;
+       struct vcpu *vc;
+
+       /* Initialize monitor page table */
+       for_each_vcpu(d, vc)
+           vc->arch.monitor_table = mk_pagetable(0);
+
+        /* 
+         * Required to do this once per domain
+         * TODO: add a seperate function to do these.
+         */
+        memset(&d->shared_info->evtchn_mask[0], 0xff, 
+               sizeof(d->shared_info->evtchn_mask));       
+
+        /* 
+         * Put the domain in shadow mode even though we're going to be using
+         * the shared 1:1 page table initially. It shouldn't hurt 
+         */
+        shadow_mode_enable(d, 
+                SHM_enable|SHM_refcounts|
+               SHM_translate|SHM_external|SHM_wr_pt_pte);
+    }
+}
+
+
+void svm_relinquish_resources(struct vcpu *v)
+{
+    struct hvm_virpit *vpit;
+    extern void destroy_vmcb(struct arch_svm_struct *); /* XXX */
+
+#if 0
+    /* 
+     * This is not stored at the moment. We need to keep it somewhere and free
+     * it Or maybe not, as it's a per-cpu-core item, and I guess we don't
+     * normally remove CPU's other than for hot-plug capable systems, where I
+     * guess we have to allocate and free host-save area in this case. Let's
+     * not worry about it at the moment, as loosing one page per CPU hot-plug
+     * event doesn't seem that excessive. But I may be wrong.
+     */
+    free_host_save_area(v->arch.hvm_svm.host_save_area);
+#endif
+
+    if (v->vcpu_id == 0) {
+        /* unmap IO shared page */
+        struct domain *d = v->domain;
+        if (d->arch.hvm_domain.shared_page_va)
+            unmap_domain_page((void *)d->arch.hvm_domain.shared_page_va);
+    }
+
+    destroy_vmcb(&v->arch.hvm_svm);
+    free_monitor_pagetable(v);
+    vpit = &v->domain->arch.hvm_domain.vpit;
+    kill_timer(&vpit->pit_timer);
+    kill_timer(&v->arch.hvm_svm.hlt_timer);
+    if ( hvm_apic_support(v->domain) ) {
+        kill_timer( &(VLAPIC(v)->vlapic_timer) );
+        xfree( VLAPIC(v) );
+    }
+}
+
+
+void arch_svm_do_resume(struct vcpu *v) 
+{
+    svm_do_resume(v);
+    reset_stack_and_jump(svm_asm_do_resume);
+}
+
+
+static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
+{
+    struct vcpu *v = current;
+    unsigned long eip;
+    unsigned long gpa; /* FIXME: PAE */
+    int result;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+
+//#if HVM_DEBUG
+    eip = vmcb->rip;
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, 
+            "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
+            va, eip, (unsigned long)regs->error_code);
+//#endif
+
+    if (!svm_paging_enabled(v)) 
+    {
+        handle_mmio(va, va);
+        TRACE_VMEXIT(2,2);
+        return 1;
+    }
+
+    update_pagetables(v);
+
+    gpa = gva_to_gpa(va);
+
+    /* Use 1:1 page table to identify MMIO address space */
+    if (mmio_space(gpa))
+    {
+       /* No support for APIC */
+        if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
+        { 
+            unsigned long inst_len;
+           inst_len = svm_instruction_length(v);
+            if (inst_len == (unsigned long)-1)
+            {
+                printf("%s: INST_LEN - Unable to decode properly.\n", 
__func__);
+                domain_crash_synchronous();
+            }
+
+            __update_guest_eip(vmcb, inst_len);
+
+            return 1;
+        }
+
+        TRACE_VMEXIT (2,2);
+        handle_mmio(va, gpa);
+
+        return 1;
+    }
+    
+    result = shadow_fault(va, regs);
+
+    if( result ) {
+        /* Let's make sure that the Guest TLB is flushed */
+        set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+    }
+
+    TRACE_VMEXIT (2,result);
+
+    return result;
+}
+
+
+static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
+{
+    struct vcpu *v = current;
+
+    clts();
+
+    setup_fpu(v);    
+
+    if (!(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS))
+        vmcb->cr0 &= ~X86_CR0_TS;
+    
+    vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
+}
+
+
+static void svm_do_general_protection_fault(struct vcpu *v, 
+        struct cpu_user_regs *regs) 
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned long eip, error_code;
+    eventinj_t event;
+
+    ASSERT(vmcb);
+
+    eip = vmcb->rip;
+    error_code = vmcb->exitinfo1;
+
+    HVM_DBG_LOG(DBG_LEVEL_1,
+                "svm_general_protection_fault: eip = %lx, erro_code = %lx",
+                eip, error_code);
+
+    HVM_DBG_LOG(DBG_LEVEL_1, 
+            "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
+            (unsigned long)regs->eax, (unsigned long)regs->ebx,
+            (unsigned long)regs->ecx, (unsigned long)regs->edx,
+            (unsigned long)regs->esi, (unsigned long)regs->edi);
+
+    
+    /* Reflect it back into the guest */
+    event.bytes = 0;
+    event.fields.v = 1;
+    event.fields.type = EVENTTYPE_EXCEPTION;
+    event.fields.vector = 13;
+    event.fields.ev = 1;
+    event.fields.errorcode = error_code;
+
+    vmcb->eventinj = event;
+}
+
+/* Reserved bits: [31:14], [12:1] */
+#define SVM_VCPU_CPUID_L1_RESERVED 0xffffdffe
+
+static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb, unsigned long input, 
+        struct cpu_user_regs *regs) 
+{
+    unsigned int eax, ebx, ecx, edx;
+    unsigned long eip;
+    struct vcpu *v = current;
+    unsigned int inst_len;
+
+    ASSERT(vmcb);
+
+    eip = vmcb->rip;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, 
+            "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
+            " (esi) %lx, (edi) %lx",
+            (unsigned long)regs->eax, (unsigned long)regs->ebx,
+            (unsigned long)regs->ecx, (unsigned long)regs->edx,
+            (unsigned long)regs->esi, (unsigned long)regs->edi);
+
+    cpuid(input, &eax, &ebx, &ecx, &edx);
+
+    if (input == 1)
+    {
+        if ( hvm_apic_support(v->domain) &&
+                !vlapic_global_enabled((VLAPIC(v))) )
+            clear_bit(X86_FEATURE_APIC, &edx);
+           
+#ifdef __x86_64__
+        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
+#endif
+        {
+            clear_bit(X86_FEATURE_PSE, &edx);
+            clear_bit(X86_FEATURE_PAE, &edx);
+            clear_bit(X86_FEATURE_PSE36, &edx);
+        }
+       
+        /* Clear out reserved bits. */
+        ecx &= ~SVM_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
+    }
+#ifdef __i386__
+    else if ( input == 0x80000001 )
+    {
+        /* Mask feature for Intel ia32e or AMD long mode. */
+        clear_bit(X86_FEATURE_LM & 31, &edx);
+    }
+#endif
+
+    regs->eax = (unsigned long)eax;
+    regs->ebx = (unsigned long)ebx;
+    regs->ecx = (unsigned long)ecx;
+    regs->edx = (unsigned long)edx;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, 
+            "svm_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, "
+            "ebx=%x, ecx=%x, edx=%x",
+            eip, input, eax, ebx, ecx, edx);
+
+    inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
+    __update_guest_eip(vmcb, inst_len);
+}
+
+
+static inline unsigned long *get_reg_p(unsigned int gpreg, 
+        struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
+{
+    unsigned long *reg_p = NULL;
+    switch (gpreg)
+    {
+    case SVM_REG_EAX:
+        reg_p = (unsigned long *)&regs->eax;
+        break;
+    case SVM_REG_EBX:
+        reg_p = (unsigned long *)&regs->ebx;
+        break;
+    case SVM_REG_ECX:
+        reg_p = (unsigned long *)&regs->ecx;
+        break;
+    case SVM_REG_EDX:
+        reg_p = (unsigned long *)&regs->edx;
+        break;
+    case SVM_REG_EDI:
+        reg_p = (unsigned long *)&regs->edi;
+        break;
+    case SVM_REG_ESI:
+        reg_p = (unsigned long *)&regs->esi;
+        break;
+    case SVM_REG_EBP:
+        reg_p = (unsigned long *)&regs->ebp;
+        break;
+    case SVM_REG_ESP:
+        reg_p = (unsigned long *)&vmcb->rsp;
+        break;
+#if __x86_64__
+    case SVM_REG_R8:
+        reg_p = (unsigned long *)&regs->r8;
+        break;
+    case SVM_REG_R9:
+        reg_p = (unsigned long *)&regs->r9;
+        break;
+    case SVM_REG_R10:
+        reg_p = (unsigned long *)&regs->r10;
+        break;
+    case SVM_REG_R11:
+        reg_p = (unsigned long *)&regs->r11;
+        break;
+    case SVM_REG_R12:
+        reg_p = (unsigned long *)&regs->r12;
+        break;
+    case SVM_REG_R13:
+        reg_p = (unsigned long *)&regs->r13;
+        break;
+    case SVM_REG_R14:
+        reg_p = (unsigned long *)&regs->r14;
+        break;
+    case SVM_REG_R15:
+        reg_p = (unsigned long *)&regs->r15;
+        break;
+#endif
+    default:
+        BUG();
+    } 
+    
+    return reg_p;
+}
+
+
+static inline unsigned long get_reg(unsigned int gpreg, 
+        struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
+{
+    unsigned long *gp;
+    gp = get_reg_p(gpreg, regs, vmcb);
+    return *gp;
+}
+
+
+static inline void set_reg(unsigned int gpreg, unsigned long value, 
+        struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
+{
+    unsigned long *gp;
+    gp = get_reg_p(gpreg, regs, vmcb);
+    *gp = value;
+}
+                           
+
+static void svm_dr_access (struct vcpu *v, unsigned int reg, unsigned int type,
+        struct cpu_user_regs *regs)
+{
+    unsigned long *reg_p = 0;
+    unsigned int gpreg = 0;
+    unsigned long eip;
+    unsigned int inst_len; 
+    struct vmcb_struct *vmcb;
+    u8 buffer[MAX_INST_LEN];
+
+    vmcb = v->arch.hvm_svm.vmcb;
+    
+    ASSERT(vmcb);
+
+    eip = vmcb->rip;
+    inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
+
+    ASSERT(buffer[0] == 0x0f && (buffer[1] & 0xFD) == 0x21);
+
+    gpreg = decode_src_reg(buffer[2]);
+#if DEBUG
+    ASSERT(reg == decode_dest_reg(buffer[2]));
+#endif
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
+            eip, reg, gpreg);
+
+    reg_p = get_reg_p(gpreg, regs, vmcb);
+        
+    switch (type) 
+    {
+    case TYPE_MOV_TO_DR: 
+        inst_len = __get_instruction_length(vmcb, INSTR_MOV2DR, buffer);
+        v->arch.guest_context.debugreg[reg] = *reg_p; 
+        break;
+    case TYPE_MOV_FROM_DR:
+        inst_len = __get_instruction_length(vmcb, INSTR_MOVDR2, buffer);
+        *reg_p = v->arch.guest_context.debugreg[reg];
+        break;
+    default:
+        __hvm_bug(regs);
+        break;
+    }
+    __update_guest_eip(vmcb, inst_len);
+}
+
+
+static unsigned int check_for_null_selector(struct vmcb_struct *vmcb, 
+        unsigned int dir, unsigned long *base, unsigned int real)
+
+{
+    unsigned char inst[MAX_INST_LEN];
+    segment_selector_t seg;
+    int i;
+
+    memset(inst, 0, MAX_INST_LEN);
+    if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst)) 
+            != MAX_INST_LEN) 
+    {
+        printk("check_for_null_selector: get guest instruction failed\n");
+        domain_crash_synchronous();
+    }
+
+    for (i = 0; i < MAX_INST_LEN; i++)
+    {
+        switch (inst[i])
+        {
+        case 0xf3: /* REPZ */
+        case 0xf2: /* REPNZ */
+        case 0xf0: /* LOCK */
+        case 0x66: /* data32 */
+        case 0x67: /* addr32 */
+#if __x86_64__
+            /* REX prefixes */
+        case 0x40:
+        case 0x41:
+        case 0x42:
+        case 0x43:
+        case 0x44:
+        case 0x45:
+        case 0x46:
+        case 0x47:
+
+        case 0x48:
+        case 0x49:
+        case 0x4a:
+        case 0x4b:
+        case 0x4c:
+        case 0x4d:
+        case 0x4e:
+        case 0x4f:
+#endif
+            continue;
+        case 0x2e: /* CS */
+            seg = vmcb->cs;
+            break;
+        case 0x36: /* SS */
+            seg = vmcb->ss;
+            break;
+        case 0x26: /* ES */
+            seg = vmcb->es;
+            break;
+        case 0x64: /* FS */
+            seg = vmcb->fs;
+            break;
+        case 0x65: /* GS */
+            seg = vmcb->gs;
+            break;
+        case 0x3e: /* DS */
+            /* FALLTHROUGH */
+            seg = vmcb->ds;
+            break;
+        default:
+            if (dir == IOREQ_READ)
+                seg = vmcb->es;
+            else
+                seg = vmcb->ds;
+        }
+        
+        /* In real Mode */
+        if (real)
+            seg.base = seg.sel << 4;
+
+        if (base)
+            *base = seg.base;
+
+        return seg.attributes.fields.p;
+    }
+
+    ASSERT(0);
+    return 0;
+}
+
+
+/* Get the address of INS/OUTS instruction */
+static inline unsigned long svm_get_io_address(struct vmcb_struct *vmcb, 
+        struct cpu_user_regs *regs, unsigned int dir, unsigned int real)
+{
+    unsigned long addr = 0;
+    unsigned long base = 0;
+
+    check_for_null_selector(vmcb, dir, &base, real);
+
+    if (dir == IOREQ_WRITE)
+    {
+        if (real)
+            addr = (regs->esi & 0xFFFF) + base;
+        else
+            addr = regs->esi + base;
+    }
+    else
+    {
+        if (real)
+            addr = (regs->edi & 0xFFFF) + base;
+        else
+            addr = regs->edi + base;
+    }
+
+    return addr;
+}
+
+
+static void svm_io_instruction(struct vcpu *v, struct cpu_user_regs *regs) 
+{
+    struct mmio_op *mmio_opp;
+    unsigned long eip, cs, eflags, cr0;
+    unsigned long port;
+    unsigned int real, size, dir;
+    ioio_info_t info;
+
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+    mmio_opp = &current->arch.hvm_vcpu.mmio_op;
+    mmio_opp->instr = INSTR_PIO;
+    mmio_opp->flags = 0;
+
+    eip = vmcb->rip;
+    cs =  vmcb->cs.sel;
+    eflags = vmcb->rflags;
+
+    info.bytes = vmcb->exitinfo1;
+
+    port = info.fields.port; /* port used to be addr */
+    dir = info.fields.type; /* direction */ 
+    if (info.fields.sz32) 
+        size = 4;
+    else if (info.fields.sz16)
+        size = 2;
+    else 
+        size = 1;
+
+    cr0 = vmcb->cr0;
+    real = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
+
+    HVM_DBG_LOG(DBG_LEVEL_IO, 
+                "svm_io_instruction: port 0x%lx real %d, eip=%lx:%lx, "
+                "exit_qualification = %lx",
+                (unsigned long) port, real, cs, eip, (unsigned 
long)info.bytes);
+
+    /* 
+     * On SVM, the RIP of the intruction following the IN/OUT is saved in
+     * ExitInfo2
+     */
+    vmcb->rip = vmcb->exitinfo2;
+
+    /* string instruction */
+    if (info.fields.str)
+    { 
+        unsigned long addr, count = 1;
+        int sign = regs->eflags & EF_DF ? -1 : 1;
+
+        addr = svm_get_io_address(vmcb, regs, dir, real);
+
+        /* "rep" prefix */
+        if (info.fields.rep) 
+        {
+            mmio_opp->flags |= REPZ;
+            count = real ? regs->ecx & 0xFFFF : regs->ecx;
+        }
+
+        /*
+         * Handle string pio instructions that cross pages or that
+         * are unaligned. See the comments in hvm_platform.c/handle_mmio()
+         */
+        if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
+        {
+            unsigned long value = 0;
+
+            mmio_opp->flags |= OVERLAP;
+
+            if (dir == IOREQ_WRITE)
+                hvm_copy(&value, addr, size, HVM_COPY_IN);
+
+            send_pio_req(regs, port, 1, size, value, dir, 0);
+        } 
+        else 
+        {
+            if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK))
+            {
+                if (sign > 0)
+                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
+                else
+                    count = (addr & ~PAGE_MASK) / size;
+            }
+
+            send_pio_req(regs, port, count, size, addr, dir, 1);
+        }
+    } 
+    else 
+    {
+        if (port == 0xe9 && dir == IOREQ_WRITE && size == 1) 
+            hvm_print_line(v, regs->eax); /* guest debug output */
+    
+        send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
+    }
+}
+
+
+static int svm_set_cr0(unsigned long value)
+{
+    struct vcpu *v = current;
+    unsigned long mfn;
+    int paging_enabled;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+
+    /* We don't want to lose PG.  ET is reserved and should be always be 1*/
+    paging_enabled = svm_paging_enabled(v);
+    value |= X86_CR0_ET;
+    vmcb->cr0 = value | X86_CR0_PG;
+    v->arch.hvm_svm.cpu_shadow_cr0 = value;
+
+    /* Check if FP Unit Trap need to be on */
+    if (value & X86_CR0_TS)
+    { 
+       vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
+
+    if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) 
+    {
+        /* The guest CR3 must be pointing to the guest physical. */
+        if (!VALID_MFN(mfn = 
+                    get_mfn_from_pfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))
+                || !get_page(pfn_to_page(mfn), v->domain))
+        {
+            printk("Invalid CR3 value = %lx\n", v->arch.hvm_svm.cpu_cr3);
+            domain_crash_synchronous(); /* need to take a clean path */
+        }
+
+#if defined(__x86_64__)
+        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state) 
+                && !test_bit(SVM_CPU_STATE_PAE_ENABLED, 
+                    &v->arch.hvm_svm.cpu_state))
+        {
+            HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
+            svm_inject_exception(v, TRAP_gp_fault, 0);
+        }
+
+        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
+        {
+            /* Here the PAE is should to be opened */
+            HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
+            set_bit(SVM_CPU_STATE_LMA_ENABLED,
+                    &v->arch.hvm_svm.cpu_state);
+#if 0
+            __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
+            vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
+            __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+#else
+           printk("Cannot yet set SVM_CPU_STATE_LMA_ENABLED\n");
+           domain_crash_synchronous();
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4 
+            if (!shadow_set_guest_paging_levels(v->domain, 4)) 
+            {
+                printk("Unsupported guest paging levels\n");
+                domain_crash_synchronous(); /* need to take a clean path */
+            }
+#endif
+        }
+        else
+        {
+#if CONFIG_PAGING_LEVELS >= 4
+            if (!shadow_set_guest_paging_levels(v->domain, 2))
+            {
+                printk("Unsupported guest paging levels\n");
+                domain_crash_synchronous(); /* need to take a clean path */
+            }
+#endif
+        }
+
+#if 0
+        unsigned long crn;
+
+        /* update CR4's PAE if needed */
+        __vmread(GUEST_CR4, &crn);
+        if ((!(crn & X86_CR4_PAE)) 
+                && test_bit(SVM_CPU_STATE_PAE_ENABLED, 
+                    &v->arch.hvm_svm.cpu_state))
+        {
+            HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
+            __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
+        }
+#else
+       printk("Cannot yet set SVM_CPU_STATE_PAE_ENABLED\n");
+       domain_crash_synchronous(); 
+#endif
+#elif defined(__i386__)
+       {
+            unsigned long old_base_mfn;
+            old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+            if (old_base_mfn)
+                put_page(pfn_to_page(old_base_mfn));
+       }
+#endif
+        /* Now arch.guest_table points to machine physical. */
+        v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+        update_pagetables(v);
+
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
+                (unsigned long) (mfn << PAGE_SHIFT));
+
+        set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+        vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
+
+        /* arch->shadow_table should hold the next CR3 for shadow */
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx\n", 
+                    v->arch.hvm_svm.cpu_cr3, mfn);
+    }
+
+    /*
+     * SVM implements paged real-mode and when we return to real-mode
+     * we revert back to the physical mappings that the domain builder
+     * created.
+     */
+    if ((value & X86_CR0_PE) == 0) {
+       if (value & X86_CR0_PG) {
+            svm_inject_exception(v, TRAP_gp_fault, 0);
+           return 0;
+       }
+
+        set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+        vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
+    }
+
+    return 1;
+}
+
+
+/*
+ * Read from control registers. CR0 and CR4 are read from the shadow.
+ */
+static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+    unsigned long value = 0;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb;
+
+    vmcb = v->arch.hvm_svm.vmcb;
+    ASSERT(vmcb);
+
+    switch (cr)
+    {
+    case 0:
+        value = v->arch.hvm_svm.cpu_shadow_cr0;
+        break;
+    case 2:
+        value = vmcb->cr2;
+        break;
+    case 3:
+        value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
+        break;
+    case 4:
+        value = vmcb->cr4;
+        break;
+    case 8:
+#if 0
+        value = vmcb->m_cr8;
+#else
+        ASSERT(0);
+#endif
+        break;
+        
+    default:
+        __hvm_bug(regs);
+    }
+
+    set_reg(gp, value, regs, vmcb);
+
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
+}
+
+
+/*
+ * Write to control registers
+ */
+static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
+{
+    unsigned long value;
+    unsigned long old_cr;
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+
+    value = get_reg(gpreg, regs, vmcb);
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
+    HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
+
+    switch (cr) 
+    {
+    case 0: 
+        return svm_set_cr0(value);
+
+    case 3: 
+    {
+        unsigned long old_base_mfn, mfn;
+
+        /* If paging is not enabled yet, simply copy the value to CR3. */
+        if (!svm_paging_enabled(v)) {
+            v->arch.hvm_svm.cpu_cr3 = value;
+            break;
+        }
+        set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+
+        /* We make a new one if the shadow does not exist. */
+        if (value == v->arch.hvm_svm.cpu_cr3) 
+        {
+            /* 
+             * This is simple TLB flush, implying the guest has 
+             * removed some translation or changed page attributes.
+             * We simply invalidate the shadow.
+             */
+            mfn = get_mfn_from_pfn(value >> PAGE_SHIFT);
+            if (mfn != pagetable_get_pfn(v->arch.guest_table))
+                __hvm_bug(regs);
+            shadow_sync_all(v->domain);
+        }
+        else 
+        {
+            /*
+             * If different, make a shadow. Check if the PDBR is valid
+             * first.
+             */
+            HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
+            if (((value >> PAGE_SHIFT) > v->domain->max_pages) 
+                    || !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT))
+                    || !get_page(pfn_to_page(mfn), v->domain))
+            {
+                printk("Invalid CR3 value=%lx\n", value);
+                domain_crash_synchronous(); /* need to take a clean path */
+            }
+
+            old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+            v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+
+            if (old_base_mfn)
+                put_page(pfn_to_page(old_base_mfn));
+
+            update_pagetables(v);
+            
+            /* arch.shadow_table should now hold the next CR3 for shadow*/
+            v->arch.hvm_svm.cpu_cr3 = value;
+            HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);
+            vmcb->cr3 = pagetable_get_paddr(v->arch.shadow_table);
+        }
+        break;
+    }
+
+    case 4:         
+        /* CR4 */
+        if (value & X86_CR4_PAE)
+            __hvm_bug(regs);    /* not implemented */
+
+        old_cr = vmcb->cr4;
+        
+        vmcb->cr4 = value;
+  
+        /*
+         * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
+         * all TLB entries except global entries.
+         */
+        if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE))
+        {
+            set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+            shadow_sync_all(v->domain);
+        }
+        break;
+
+    default:
+        printk("invalid cr: %d\n", cr);
+        __hvm_bug(regs);
+    }
+
+    return 1;
+}
+
+
+#define ARR_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+
+static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type,
+        struct cpu_user_regs *regs)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned int inst_len = 0;
+    unsigned int gpreg;
+    unsigned long value;
+    u8 buffer[6];   
+    int result = 1;
+    enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
+    enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
+    enum instruction_index match;
+
+    ASSERT(vmcb);
+
+    inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
+    
+    if (type == TYPE_MOV_TO_CR) 
+    {
+        inst_len = __get_instruction_length_from_list(vmcb, list_a, 
+                ARR_SIZE(list_a), buffer, &match);
+    }
+    else
+    {
+        inst_len = __get_instruction_length_from_list(vmcb, list_b, 
+                ARR_SIZE(list_b), buffer, &match);
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
+
+    switch (match) 
+    {
+    case INSTR_MOV2CR:
+        gpreg = decode_src_reg(buffer[2]);
+        result = mov_to_cr(gpreg, cr, regs);
+        break;
+
+    case INSTR_MOVCR2:
+        gpreg = decode_src_reg(buffer[2]);
+        mov_from_cr(cr, gpreg, regs);
+        break;
+
+    case INSTR_CLTS:
+        clts();
+        setup_fpu(current);
+        vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
+        v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */
+        vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
+        break;
+
+    case INSTR_LMSW:
+        if (svm_dbg_on)
+            svm_dump_inst(svm_rip2pointer(vmcb));
+        
+        gpreg = decode_src_reg(buffer[2]);
+        value = get_reg(gpreg, regs, vmcb) & 0xF;
+
+        if (svm_dbg_on)
+            printk("CR0-LMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg, 
+                    inst_len);
+
+        value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
+
+        if (svm_dbg_on)
+            printk("CR0-LMSW CR0 - New value=%lx\n", value);
+
+        result = svm_set_cr0(value);
+        break;
+
+    case INSTR_SMSW:
+        svm_dump_inst(svm_rip2pointer(vmcb));
+        value = v->arch.hvm_svm.cpu_shadow_cr0;
+        gpreg = decode_src_reg(buffer[2]);
+        set_reg(gpreg, value, regs, vmcb);
+
+        if (svm_dbg_on)
+            printk("CR0-SMSW value=%lx, reg=%d, inst_len=%d\n", value, gpreg, 
+                    inst_len);
+        break;
+
+    default:
+        __hvm_bug(regs);
+        break;
+    }
+
+    ASSERT(inst_len);
+
+    __update_guest_eip(vmcb, inst_len);
+    
+    return result;
+}
+
+static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs 
*regs)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned int  inst_len;
+    int64_t tsc_sum;
+
+    ASSERT(vmcb);
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access: ecx=%lx, eax=%lx, edx=%lx, "
+            "exitinfo = %lx", (unsigned long)regs->ecx, 
+            (unsigned long)regs->eax, (unsigned long)regs->edx, 
+            (unsigned long)vmcb->exitinfo1);
+
+    /* is it a read? */
+    if (vmcb->exitinfo1 == 0)
+    {
+        inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
+
+        regs->edx = 0;
+        switch (regs->ecx)
+        {
+        case MSR_IA32_SYSENTER_CS:
+            regs->eax = vmcb->sysenter_cs;
+            break;
+        case MSR_IA32_SYSENTER_ESP: 
+            regs->eax = vmcb->sysenter_esp;
+            break;
+        case MSR_IA32_SYSENTER_EIP:     
+            regs->eax = vmcb->sysenter_eip;
+            break;
+        case MSR_IA32_TIME_STAMP_COUNTER:
+            __asm__ __volatile__("rdtsc" : "=a" (regs->eax), "=d" (regs->edx));
+            tsc_sum = regs->edx;
+            tsc_sum = (tsc_sum << 32) + regs->eax;
+            tsc_sum += (int64_t) vmcb->tsc_offset;
+            regs->eax = tsc_sum & 0xFFFFFFFF;
+            regs->edx = (tsc_sum >> 32) & 0xFFFFFFFF;
+            break;
+        default:
+            if (long_mode_do_msr_read(regs))
+                goto done;
+            rdmsr_user(regs->ecx, regs->eax, regs->edx);
+            break;
+        }
+    }
+    else
+    {
+        inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
+
+        switch (regs->ecx)
+        {
+        case MSR_IA32_SYSENTER_CS:
+            vmcb->sysenter_cs = regs->eax;
+            break;
+        case MSR_IA32_SYSENTER_ESP: 
+            vmcb->sysenter_esp = regs->eax;
+            break;
+        case MSR_IA32_SYSENTER_EIP:     
+            vmcb->sysenter_eip = regs->eax;
+            break;
+        default:
+            long_mode_do_msr_write(regs);
+            break;
+        }
+    }
+
+done:
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "svm_do_msr_access returns: "
+                "ecx=%lx, eax=%lx, edx=%lx",
+                (unsigned long)regs->ecx, (unsigned long)regs->eax,
+                (unsigned long)regs->edx);
+
+    __update_guest_eip(vmcb, inst_len);
+}
+
+
+/*
+ * Need to use this exit to reschedule
+ */
+static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
+{
+    struct vcpu *v = current;
+    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+    s_time_t  next_pit = -1, next_wakeup;
+    unsigned int inst_len;
+
+    svm_stts(v);
+    inst_len = __get_instruction_length(vmcb, INSTR_HLT, NULL);
+    __update_guest_eip(vmcb, inst_len);
+
+    if ( !v->vcpu_id ) {
+        next_pit = get_pit_scheduled(v, vpit);
+    }
+    next_wakeup = get_apictime_scheduled(v);
+    if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) {
+        next_wakeup = next_pit;
+    }
+    if ( next_wakeup != - 1 )
+        set_timer(&current->arch.hvm_svm.hlt_timer, next_wakeup);
+    do_sched_op(SCHEDOP_block, 0);
+}
+
+
+static inline void svm_vmexit_do_mwait(void)
+{
+    return;
+}
+
+
+#ifdef XEN_DEBUGGER
+static void svm_debug_save_cpu_user_regs(struct vmcb_struct *vmcb, 
+        struct cpu_user_regs *regs)
+{
+    regs->eip = vmcb->rip;
+    regs->esp = vmcb->rsp;
+    regs->eflags = vmcb->rflags;
+
+    regs->xcs = vmcb->cs.sel;
+    regs->xds = vmcb->ds.sel;
+    regs->xes = vmcb->es.sel;
+    regs->xfs = vmcb->fs.sel;
+    regs->xgs = vmcb->gs.sel;
+    regs->xss = vmcb->ss.sel;
+}
+
+
+static void svm_debug_restore_cpu_user_regs(struct cpu_user_regs *regs)
+{
+    vmcb->ss.sel   = regs->xss;
+    vmcb->rsp      = regs->esp;
+    vmcb->rflags   = regs->eflags;
+    vmcb->cs.sel   = regs->xcs;
+    vmcb->rip      = regs->eip;
+
+    vmcb->gs.sel = regs->xgs;
+    vmcb->fs.sel = regs->xfs;
+    vmcb->es.sel = regs->xes;
+    vmcb->ds.sel = regs->xds;
+}
+#endif
+
+
+void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    u8 opcode[MAX_INST_SIZE], prefix, length = MAX_INST_SIZE;
+    unsigned long g_vaddr;
+    unsigned int inst_len;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+    /* 
+     * Unknown how many bytes the invlpg instruction will take.  Use the
+     * maximum instruction length here
+     */
+    if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
+    {
+        printk("svm_handle_invlpg (): Error reading memory %d bytes\n", 
length);
+       __hvm_bug(regs);
+    }
+
+    if (invlpga)
+    {
+        inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
+        __update_guest_eip(vmcb, inst_len);
+
+        /* 
+         * The address is implicit on this instruction At the moment, we don't
+         * use ecx (ASID) to identify individual guests pages 
+         */
+        g_vaddr = regs->eax;
+    }
+    else
+    {
+        /* What about multiple prefix codes? */
+        prefix = (is_prefix(opcode[0])?opcode[0]:0);
+        inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
+
+        inst_len--;
+        length -= inst_len;
+
+        /* 
+         * Decode memory operand of the instruction including ModRM, SIB, and
+         * displacement to get effecticve address and length in bytes.  Assume
+         * the system in either 32- or 64-bit mode.
+         */
+        g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, 
+                            &opcode[inst_len], &length);
+
+        inst_len += length;
+        __update_guest_eip (vmcb, inst_len);
+    }
+
+    /* Overkill, we may not this */
+    set_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+    shadow_invlpg(v, g_vaddr);
+}
+
+
+/*
+ * Reset to realmode causes execution to start at 0xF000:0xFFF0 in
+ * 16-bit realmode.  Basically, this mimics a processor reset.
+ *
+ * returns 0 on success, non-zero otherwise
+ */
+static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v, 
+        struct cpu_user_regs *regs)
+{
+    struct vmcb_struct *vmcb;
+
+    ASSERT(v);
+    ASSERT(regs);
+
+    vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+    
+    /* clear the vmcb and user regs */
+    memset(regs, 0, sizeof(struct cpu_user_regs));
+   
+    /* VMCB Control */
+    vmcb->tsc_offset = 0;
+
+    /* VMCB State */
+    vmcb->cr0 = X86_CR0_ET | X86_CR0_PG;
+    v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
+
+    vmcb->cr2 = 0;
+    vmcb->cr4 = 0;
+
+    /* This will jump to ROMBIOS */
+    vmcb->rip = 0xFFF0;
+
+    /* setup the segment registers and all their hidden states */
+    vmcb->cs.sel = 0xF000;
+    vmcb->cs.attributes.bytes = 0x089b;
+    vmcb->cs.limit = 0xffff;
+    vmcb->cs.base = 0x000F0000;
+
+    vmcb->ss.sel = 0x00;
+    vmcb->ss.attributes.bytes = 0x0893;
+    vmcb->ss.limit = 0xffff;
+    vmcb->ss.base = 0x00;
+
+    vmcb->ds.sel = 0x00;
+    vmcb->ds.attributes.bytes = 0x0893;
+    vmcb->ds.limit = 0xffff;
+    vmcb->ds.base = 0x00;
+    
+    vmcb->es.sel = 0x00;
+    vmcb->es.attributes.bytes = 0x0893;
+    vmcb->es.limit = 0xffff;
+    vmcb->es.base = 0x00;
+    
+    vmcb->fs.sel = 0x00;
+    vmcb->fs.attributes.bytes = 0x0893;
+    vmcb->fs.limit = 0xffff;
+    vmcb->fs.base = 0x00;
+    
+    vmcb->gs.sel = 0x00;
+    vmcb->gs.attributes.bytes = 0x0893;
+    vmcb->gs.limit = 0xffff;
+    vmcb->gs.base = 0x00;
+
+    vmcb->ldtr.sel = 0x00;
+    vmcb->ldtr.attributes.bytes = 0x0000;
+    vmcb->ldtr.limit = 0x0;
+    vmcb->ldtr.base = 0x00;
+
+    vmcb->gdtr.sel = 0x00;
+    vmcb->gdtr.attributes.bytes = 0x0000;
+    vmcb->gdtr.limit = 0x0;
+    vmcb->gdtr.base = 0x00;
+    
+    vmcb->tr.sel = 0;
+    vmcb->tr.attributes.bytes = 0;
+    vmcb->tr.limit = 0x0;
+    vmcb->tr.base = 0;
+
+    vmcb->idtr.sel = 0x00;
+    vmcb->idtr.attributes.bytes = 0x0000;
+    vmcb->idtr.limit = 0x3ff;
+    vmcb->idtr.base = 0x00;
+
+    vmcb->rax = 0;
+
+    return 0;
+}
+
+
+/*
+ * svm_do_vmmcall - SVM VMMCALL handler
+ *
+ * returns 0 on success, non-zero otherwise
+ */
+static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned int inst_len;
+
+    ASSERT(vmcb);
+    ASSERT(regs);
+
+    inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
+
+    /* VMMCALL sanity check */
+    if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
+    {
+        printf("VMMCALL CPL check failed\n");
+        return -1;
+    }
+
+    /* handle the request */
+    switch (regs->edi) 
+    {
+    case VMMCALL_RESET_TO_REALMODE:
+        if (svm_do_vmmcall_reset_to_realmode(v, regs)) 
+        {
+            printf("svm_do_vmmcall_reset_to_realmode() failed\n");
+            return -1;
+        }
+    
+        /* since we just reset the VMCB, return without adjusting the eip */
+        return 0;
+    case VMMCALL_DEBUG:
+        printf("DEBUG features not implemented yet\n");
+        break;
+    default:
+    break;
+    }
+
+    hvm_print_line(v, regs->eax); /* provides the current domain */
+
+    __update_guest_eip(vmcb, inst_len);
+    return 0;
+}
+
+
+void svm_dump_inst(unsigned long eip)
+{
+    u8 opcode[256];
+    unsigned long ptr;
+    int len;
+    int i;
+
+    ptr = eip & ~0xff;
+    len = 0;
+
+    if (hvm_copy(opcode, ptr, sizeof(opcode), HVM_COPY_IN))
+        len = sizeof(opcode);
+
+    printf("Code bytes around(len=%d) %lx:", len, eip);
+    for (i = 0; i < len; i++)
+    {
+        if ((i & 0x0f) == 0)
+            printf("\n%08lx:", ptr+i);
+
+        printf("%02x ", opcode[i]);
+    }
+
+    printf("\n");
+}
+
+
+void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned long pt = pagetable_get_paddr(v->arch.shadow_table);
+
+    printf("%s: guest registers from %s:\n", __func__, from);
+#if defined (__x86_64__)
+    printk("rax: %016lx   rbx: %016lx   rcx: %016lx\n",
+           regs->rax, regs->rbx, regs->rcx);
+    printk("rdx: %016lx   rsi: %016lx   rdi: %016lx\n",
+           regs->rdx, regs->rsi, regs->rdi);
+    printk("rbp: %016lx   rsp: %016lx   r8:  %016lx\n",
+           regs->rbp, regs->rsp, regs->r8);
+    printk("r9:  %016lx   r10: %016lx   r11: %016lx\n",
+           regs->r9,  regs->r10, regs->r11);
+    printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
+           regs->r12, regs->r13, regs->r14);
+    printk("r15: %016lx   cr0: %016lx   cr3: %016lx\n",
+           regs->r15, v->arch.hvm_svm.cpu_shadow_cr0, vmcb->cr3);
+#else
+    printf("eax: %08x, ebx: %08x, ecx: %08x, edx: %08x\n", 
+           regs->eax, regs->ebx, regs->ecx, regs->edx);
+    printf("edi: %08x, esi: %08x, ebp: %08x, esp: %08x\n", 
+           regs->edi, regs->esi, regs->ebp, regs->esp);
+    printf("%s: guest cr0: %lx\n", __func__, 
+           v->arch.hvm_svm.cpu_shadow_cr0);
+    printf("guest CR3 = %llx\n", vmcb->cr3);
+#endif
+    printf("%s: pt = %lx\n", __func__, pt);
+}
+
+
+void svm_dump_host_regs(const char *from)
+{
+    struct vcpu *v = current;
+    unsigned long pt = pt = pagetable_get_paddr(v->arch.monitor_table);
+    unsigned long cr3, cr0;
+    printf("Host registers at %s\n", from);
+
+    __asm__ __volatile__ ("\tmov %%cr0,%0\n"
+                          "\tmov %%cr3,%1\n"
+                          : "=r" (cr0), "=r"(cr3)); 
+    printf("%s: pt = %lx, cr3 = %lx, cr0 = %lx\n", __func__, pt, cr3, cr0);
+}
+
+#ifdef SVM_EXTRA_DEBUG
+static char *exit_reasons[] = {
+    [VMEXIT_CR0_READ] = "CR0_READ",
+    [VMEXIT_CR1_READ] = "CR1_READ",
+    [VMEXIT_CR2_READ] = "CR2_READ",
+    [VMEXIT_CR3_READ] = "CR3_READ",
+    [VMEXIT_CR4_READ] = "CR4_READ",
+    [VMEXIT_CR5_READ] = "CR5_READ",
+    [VMEXIT_CR6_READ] = "CR6_READ",
+    [VMEXIT_CR7_READ] = "CR7_READ",
+    [VMEXIT_CR8_READ] = "CR8_READ",
+    [VMEXIT_CR9_READ] = "CR9_READ",
+    [VMEXIT_CR10_READ] = "CR10_READ",
+    [VMEXIT_CR11_READ] = "CR11_READ",
+    [VMEXIT_CR12_READ] = "CR12_READ",
+    [VMEXIT_CR13_READ] = "CR13_READ",
+    [VMEXIT_CR14_READ] = "CR14_READ",
+    [VMEXIT_CR15_READ] = "CR15_READ",
+    [VMEXIT_CR0_WRITE] = "CR0_WRITE",
+    [VMEXIT_CR1_WRITE] = "CR1_WRITE",
+    [VMEXIT_CR2_WRITE] = "CR2_WRITE",
+    [VMEXIT_CR3_WRITE] = "CR3_WRITE",
+    [VMEXIT_CR4_WRITE] = "CR4_WRITE",
+    [VMEXIT_CR5_WRITE] = "CR5_WRITE",
+    [VMEXIT_CR6_WRITE] = "CR6_WRITE",
+    [VMEXIT_CR7_WRITE] = "CR7_WRITE",
+    [VMEXIT_CR8_WRITE] = "CR8_WRITE",
+    [VMEXIT_CR9_WRITE] = "CR9_WRITE",
+    [VMEXIT_CR10_WRITE] = "CR10_WRITE",
+    [VMEXIT_CR11_WRITE] = "CR11_WRITE",
+    [VMEXIT_CR12_WRITE] = "CR12_WRITE",
+    [VMEXIT_CR13_WRITE] = "CR13_WRITE",
+    [VMEXIT_CR14_WRITE] = "CR14_WRITE",
+    [VMEXIT_CR15_WRITE] = "CR15_WRITE",
+    [VMEXIT_DR0_READ] = "DR0_READ",
+    [VMEXIT_DR1_READ] = "DR1_READ",
+    [VMEXIT_DR2_READ] = "DR2_READ",
+    [VMEXIT_DR3_READ] = "DR3_READ",
+    [VMEXIT_DR4_READ] = "DR4_READ",
+    [VMEXIT_DR5_READ] = "DR5_READ",
+    [VMEXIT_DR6_READ] = "DR6_READ",
+    [VMEXIT_DR7_READ] = "DR7_READ",
+    [VMEXIT_DR8_READ] = "DR8_READ",
+    [VMEXIT_DR9_READ] = "DR9_READ",
+    [VMEXIT_DR10_READ] = "DR10_READ",
+    [VMEXIT_DR11_READ] = "DR11_READ",
+    [VMEXIT_DR12_READ] = "DR12_READ",
+    [VMEXIT_DR13_READ] = "DR13_READ",
+    [VMEXIT_DR14_READ] = "DR14_READ",
+    [VMEXIT_DR15_READ] = "DR15_READ",
+    [VMEXIT_DR0_WRITE] = "DR0_WRITE",
+    [VMEXIT_DR1_WRITE] = "DR1_WRITE",
+    [VMEXIT_DR2_WRITE] = "DR2_WRITE",
+    [VMEXIT_DR3_WRITE] = "DR3_WRITE",
+    [VMEXIT_DR4_WRITE] = "DR4_WRITE",
+    [VMEXIT_DR5_WRITE] = "DR5_WRITE",
+    [VMEXIT_DR6_WRITE] = "DR6_WRITE",
+    [VMEXIT_DR7_WRITE] = "DR7_WRITE",
+    [VMEXIT_DR8_WRITE] = "DR8_WRITE",
+    [VMEXIT_DR9_WRITE] = "DR9_WRITE",
+    [VMEXIT_DR10_WRITE] = "DR10_WRITE",
+    [VMEXIT_DR11_WRITE] = "DR11_WRITE",
+    [VMEXIT_DR12_WRITE] = "DR12_WRITE",
+    [VMEXIT_DR13_WRITE] = "DR13_WRITE",
+    [VMEXIT_DR14_WRITE] = "DR14_WRITE",
+    [VMEXIT_DR15_WRITE] = "DR15_WRITE",
+    [VMEXIT_EXCEPTION_DE] = "EXCEPTION_DE",
+    [VMEXIT_EXCEPTION_DB] = "EXCEPTION_DB",
+    [VMEXIT_EXCEPTION_NMI] = "EXCEPTION_NMI",
+    [VMEXIT_EXCEPTION_BP] = "EXCEPTION_BP",
+    [VMEXIT_EXCEPTION_OF] = "EXCEPTION_OF",
+    [VMEXIT_EXCEPTION_BR] = "EXCEPTION_BR",
+    [VMEXIT_EXCEPTION_UD] = "EXCEPTION_UD",
+    [VMEXIT_EXCEPTION_NM] = "EXCEPTION_NM",
+    [VMEXIT_EXCEPTION_DF] = "EXCEPTION_DF",
+    [VMEXIT_EXCEPTION_09] = "EXCEPTION_09",
+    [VMEXIT_EXCEPTION_TS] = "EXCEPTION_TS",
+    [VMEXIT_EXCEPTION_NP] = "EXCEPTION_NP",
+    [VMEXIT_EXCEPTION_SS] = "EXCEPTION_SS",
+    [VMEXIT_EXCEPTION_GP] = "EXCEPTION_GP",
+    [VMEXIT_EXCEPTION_PF] = "EXCEPTION_PF",
+    [VMEXIT_EXCEPTION_15] = "EXCEPTION_15",
+    [VMEXIT_EXCEPTION_MF] = "EXCEPTION_MF",
+    [VMEXIT_EXCEPTION_AC] = "EXCEPTION_AC",
+    [VMEXIT_EXCEPTION_MC] = "EXCEPTION_MC",
+    [VMEXIT_EXCEPTION_XF] = "EXCEPTION_XF",
+    [VMEXIT_INTR] = "INTR",
+    [VMEXIT_NMI] = "NMI",
+    [VMEXIT_SMI] = "SMI",
+    [VMEXIT_INIT] = "INIT",
+    [VMEXIT_VINTR] = "VINTR",
+    [VMEXIT_CR0_SEL_WRITE] = "CR0_SEL_WRITE",
+    [VMEXIT_IDTR_READ] = "IDTR_READ",
+    [VMEXIT_GDTR_READ] = "GDTR_READ",
+    [VMEXIT_LDTR_READ] = "LDTR_READ",
+    [VMEXIT_TR_READ] = "TR_READ",
+    [VMEXIT_IDTR_WRITE] = "IDTR_WRITE",
+    [VMEXIT_GDTR_WRITE] = "GDTR_WRITE",
+    [VMEXIT_LDTR_WRITE] = "LDTR_WRITE",
+    [VMEXIT_TR_WRITE] = "TR_WRITE",
+    [VMEXIT_RDTSC] = "RDTSC",
+    [VMEXIT_RDPMC] = "RDPMC",
+    [VMEXIT_PUSHF] = "PUSHF",
+    [VMEXIT_POPF] = "POPF",
+    [VMEXIT_CPUID] = "CPUID",
+    [VMEXIT_RSM] = "RSM",
+    [VMEXIT_IRET] = "IRET",
+    [VMEXIT_SWINT] = "SWINT",
+    [VMEXIT_INVD] = "INVD",
+    [VMEXIT_PAUSE] = "PAUSE",
+    [VMEXIT_HLT] = "HLT",
+    [VMEXIT_INVLPG] = "INVLPG",
+    [VMEXIT_INVLPGA] = "INVLPGA",
+    [VMEXIT_IOIO] = "IOIO",
+    [VMEXIT_MSR] = "MSR",
+    [VMEXIT_TASK_SWITCH] = "TASK_SWITCH",
+    [VMEXIT_FERR_FREEZE] = "FERR_FREEZE",
+    [VMEXIT_SHUTDOWN] = "SHUTDOWN",
+    [VMEXIT_VMRUN] = "VMRUN",
+    [VMEXIT_VMMCALL] = "VMMCALL",
+    [VMEXIT_VMLOAD] = "VMLOAD",
+    [VMEXIT_VMSAVE] = "VMSAVE",
+    [VMEXIT_STGI] = "STGI",
+    [VMEXIT_CLGI] = "CLGI",
+    [VMEXIT_SKINIT] = "SKINIT",
+    [VMEXIT_RDTSCP] = "RDTSCP",
+    [VMEXIT_ICEBP] = "ICEBP",
+    [VMEXIT_NPF] = "NPF"
+};
+#endif /* SVM_EXTRA_DEBUG */
+
+#ifdef SVM_WALK_GUEST_PAGES
+void walk_shadow_and_guest_pt(unsigned long gva)
+{
+    l2_pgentry_t gpde;
+    l2_pgentry_t spde;
+    l1_pgentry_t gpte;
+    l1_pgentry_t spte;
+    struct vcpu        *v    = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    unsigned long gpa;
+
+    gpa = gva_to_gpa( gva );
+    printk( "gva = %lx, gpa=%lx, gCR3=%x\n", gva, gpa, (u32)vmcb->cr3 );
+    if( !svm_paging_enabled(v) || mmio_space( gpa ) )
+    {
+       return;
+    }
+
+    /* let's dump the guest and shadow page info */
+
+    __guest_get_l2e(v, gva, &gpde);
+    printk( "G-PDE = %x, flags=%x\n", gpde.l2, l2e_get_flags(gpde) );
+    __shadow_get_l2e( v, gva, &spde );
+    printk( "S-PDE = %x, flags=%x\n", spde.l2, l2e_get_flags(spde) );
+
+    if ( unlikely(!(l2e_get_flags(gpde) & _PAGE_PRESENT)) )
+        return;
+
+    spte = l1e_empty();
+
+    // This is actually overkill - we only need to make sure the hl2 is 
in-sync.
+    shadow_sync_va(v, gva);
+
+    gpte.l1 = 0;
+    __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], 
sizeof(gpte) );
+    printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) );
+    __copy_from_user( &spte, &phys_to_machine_mapping[ l1e_get_pfn( gpte ) ], 
sizeof(spte) );
+    printk( "S-PTE = %x, flags=%x\n", spte.l1, l1e_get_flags(spte));
+}
+#endif /* SVM_WALK_GUEST_PAGES */
+
+asmlinkage void svm_vmexit_handler(struct cpu_user_regs regs)
+{
+    unsigned int exit_reason;
+    unsigned long eip;
+    struct vcpu *v = current;
+    int error;
+    int do_debug = 0;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+    ASSERT(vmcb);
+
+    exit_reason = vmcb->exitcode;
+    save_svm_cpu_user_regs(v, &regs);
+    v->arch.hvm_svm.injecting_event = 0;
+
+    vmcb->tlb_control = 0;
+
+#ifdef SVM_EXTRA_DEBUG
+{
+#if defined(__i386__)
+#define        rip     eip
+#endif
+
+    static unsigned long intercepts_counter = 0;
+
+    if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 
+    {
+        if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))
+        {
+            printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", 
+                    intercepts_counter,
+                    exit_reasons[exit_reason], exit_reason, regs.cs,
+                   (unsigned long long) regs.rip,
+                   (unsigned long long) vmcb->exitinfo1,
+                   (unsigned long long) vmcb->exitinfo2,
+                   (unsigned long long) vmcb->exitintinfo.bytes);
+        }
+    } 
+    else if (svm_dbg_on 
+            && exit_reason != VMEXIT_IOIO 
+            && exit_reason != VMEXIT_INTR) 
+    {
+
+        if (exit_reasons[exit_reason])
+        {
+            printk("I%08ld,ExC=%s(%d),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", 
+                    intercepts_counter,
+                    exit_reasons[exit_reason], exit_reason, regs.cs,
+                   (unsigned long long) regs.rip,
+                   (unsigned long long) vmcb->exitinfo1,
+                   (unsigned long long) vmcb->exitinfo2,
+                   (unsigned long long) vmcb->exitintinfo.bytes);
+        } 
+        else 
+        {
+            
printk("I%08ld,ExC=%d(0x%x),IP=%x:%llx,I1=%llx,I2=%llx,INT=%llx\n", 
+                    intercepts_counter, exit_reason, exit_reason, regs.cs, 
+                   (unsigned long long) regs.rip,
+                   (unsigned long long) vmcb->exitinfo1,
+                   (unsigned long long) vmcb->exitinfo2,
+                   (unsigned long long) vmcb->exitintinfo.bytes);
+        }
+    }
+
+#ifdef SVM_WALK_GUEST_PAGES
+    if( exit_reason == VMEXIT_EXCEPTION_PF && ( ( vmcb->exitinfo2 == vmcb->rip 
)|| vmcb->exitintinfo.bytes) )
+    {
+       if (svm_paging_enabled(v) && !mmio_space(gva_to_gpa(vmcb->exitinfo2)))  
   
+           walk_shadow_and_guest_pt( vmcb->exitinfo2 );
+    }
+#endif
+
+    intercepts_counter++;
+
+#if 0
+    if (svm_dbg_on)
+        do_debug = svm_do_debugout(exit_reason);
+#endif
+
+    if (do_debug)
+    {
+        printk("%s:+ guest_table = 0x%08x, monitor_table = 0x%08x, "
+                "shadow_table = 0x%08x\n", 
+                __func__,
+               (int) v->arch.guest_table.pfn,
+               (int) v->arch.monitor_table.pfn, 
+                (int) v->arch.shadow_table.pfn);
+
+        svm_dump_vmcb(__func__, vmcb);
+        svm_dump_regs(__func__, &regs);
+        svm_dump_inst(svm_rip2pointer(vmcb));
+    }
+
+#if defined(__i386__)
+#undef rip
+#endif
+
+}
+#endif /* SVM_EXTRA_DEBUG */
+
+    if (exit_reason == -1)
+    {
+        printk("%s: exit_reason == -1 - Did someone clobber the VMCB\n", 
+                __func__);
+        BUG();
+        domain_crash_synchronous();
+    }
+
+    perfc_incra(vmexits, exit_reason);
+    eip = vmcb->rip;
+
+#ifdef SVM_EXTRA_DEBUG
+    if (do_debug)
+    {
+        printk("eip = %lx, exit_reason = %d (0x%x)\n", 
+                eip, exit_reason, exit_reason);
+    }
+#endif /* SVM_EXTRA_DEBUG */
+
+    TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
+
+    switch (exit_reason) 
+    {
+    case VMEXIT_EXCEPTION_DB:
+    {
+#ifdef XEN_DEBUGGER
+        svm_debug_save_cpu_user_regs(&regs);
+        pdb_handle_exception(1, &regs, 1);
+        svm_debug_restore_cpu_user_regs(&regs);
+#else
+        svm_store_cpu_user_regs(&regs, v);
+        domain_pause_for_debugger();  
+        do_sched_op(SCHEDOP_yield, 0);
+#endif
+    }
+    break;
+
+    case VMEXIT_NMI:
+        do_nmi(&regs, 0);
+        break;
+
+#ifdef XEN_DEBUGGER
+    case VMEXIT_EXCEPTION_BP:
+        svm_debug_save_cpu_user_regs(&regs);
+        pdb_handle_exception(3, &regs, 1);
+        svm_debug_restore_cpu_user_regs(&regs);
+        break;
+#endif
+
+    case VMEXIT_EXCEPTION_NM:
+        svm_do_no_device_fault(vmcb);
+        break;  
+
+    case VMEXIT_EXCEPTION_GP:
+        /* This should probably not be trapped in the future */
+        regs.error_code = vmcb->exitinfo1;
+        v->arch.hvm_svm.injecting_event = 1;
+        svm_do_general_protection_fault(v, &regs);
+        break;  
+
+    case VMEXIT_EXCEPTION_PF:
+    {
+        unsigned long va;
+        va = vmcb->exitinfo2;
+        regs.error_code = vmcb->exitinfo1;
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, 
+                "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
+                (unsigned long)regs.eax, (unsigned long)regs.ebx,
+                (unsigned long)regs.ecx, (unsigned long)regs.edx,
+                (unsigned long)regs.esi, (unsigned long)regs.edi);
+
+        v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
+
+//printk("PF1\n");
+        if (!(error = svm_do_page_fault(va, &regs))) 
+        {
+            v->arch.hvm_svm.injecting_event = 1;
+            /* Inject #PG using Interruption-Information Fields */
+            vmcb->eventinj.bytes = 0;
+            vmcb->eventinj.fields.v = 1;
+            vmcb->eventinj.fields.ev = 1;
+            vmcb->eventinj.fields.errorcode = regs.error_code;
+            vmcb->eventinj.fields.type = EVENTTYPE_EXCEPTION;
+            vmcb->eventinj.fields.vector = TRAP_page_fault;
+            v->arch.hvm_svm.cpu_cr2 = va;
+            vmcb->cr2 = va;
+            TRACE_3D(TRC_VMX_INT, v->domain->domain_id, 
+                    VMEXIT_EXCEPTION_PF, va);
+        }
+        break;
+    }
+
+    case VMEXIT_EXCEPTION_DF:
+        printk("Guest double fault");
+        BUG();
+        break;
+
+    case VMEXIT_INTR:
+        svm_stts(v);
+        raise_softirq(SCHEDULE_SOFTIRQ);
+        break;
+
+    case VMEXIT_GDTR_WRITE:
+        printk("WRITE to GDTR\n");
+        break;
+
+    case VMEXIT_TASK_SWITCH:
+        __hvm_bug(&regs);
+        break;
+
+    case VMEXIT_CPUID:
+        svm_vmexit_do_cpuid(vmcb, regs.eax, &regs);
+        break;
+
+    case VMEXIT_HLT:
+        svm_vmexit_do_hlt(vmcb);
+        break;
+
+    case VMEXIT_INVLPG:
+        svm_handle_invlpg(0, &regs);
+        break;
+
+    case VMEXIT_INVLPGA:
+        svm_handle_invlpg(1, &regs);
+        break;
+
+    case VMEXIT_VMMCALL:
+        svm_do_vmmcall(v, &regs);
+        break;
+
+    case VMEXIT_CR0_READ:
+        svm_cr_access(v, 0, TYPE_MOV_FROM_CR, &regs);
+        break;
+
+    case VMEXIT_CR2_READ:
+        svm_cr_access(v, 2, TYPE_MOV_FROM_CR, &regs);
+        break;
+
+    case VMEXIT_CR3_READ:
+        svm_cr_access(v, 3, TYPE_MOV_FROM_CR, &regs);
+        break;
+
+    case VMEXIT_CR4_READ:
+        svm_cr_access(v, 4, TYPE_MOV_FROM_CR, &regs);
+        break;
+
+    case VMEXIT_CR8_READ:
+        svm_cr_access(v, 8, TYPE_MOV_FROM_CR, &regs);
+        break;
+
+    case VMEXIT_CR0_WRITE:
+        svm_cr_access(v, 0, TYPE_MOV_TO_CR, &regs);
+        break;
+
+    case VMEXIT_CR2_WRITE:
+        svm_cr_access(v, 2, TYPE_MOV_TO_CR, &regs);
+        break;
+
+    case VMEXIT_CR3_WRITE:
+        svm_cr_access(v, 3, TYPE_MOV_TO_CR, &regs);
+        break;
+
+    case VMEXIT_CR4_WRITE:
+        svm_cr_access(v, 4, TYPE_MOV_TO_CR, &regs);
+        break;
+
+    case VMEXIT_CR8_WRITE:
+        svm_cr_access(v, 8, TYPE_MOV_TO_CR, &regs);
+        break;
+
+    case VMEXIT_DR0_READ:
+        svm_dr_access(v, 0, TYPE_MOV_FROM_DR, &regs);
+        break;
+
+    case VMEXIT_DR1_READ:
+        svm_dr_access(v, 1, TYPE_MOV_FROM_DR, &regs);
+        break;
+
+    case VMEXIT_DR2_READ:
+        svm_dr_access(v, 2, TYPE_MOV_FROM_DR, &regs);
+        break;
+
+    case VMEXIT_DR3_READ:
+        svm_dr_access(v, 3, TYPE_MOV_FROM_DR, &regs);
+        break;
+
+    case VMEXIT_DR6_READ:
+        svm_dr_access(v, 6, TYPE_MOV_FROM_DR, &regs);
+        break;
+
+    case VMEXIT_DR7_READ:
+        svm_dr_access(v, 7, TYPE_MOV_FROM_DR, &regs);
+        break;
+
+    case VMEXIT_DR0_WRITE:
+        svm_dr_access(v, 0, TYPE_MOV_TO_DR, &regs);
+        break;
+
+    case VMEXIT_DR1_WRITE:
+        svm_dr_access(v, 1, TYPE_MOV_TO_DR, &regs);
+        break;
+
+    case VMEXIT_DR2_WRITE:
+        svm_dr_access(v, 2, TYPE_MOV_TO_DR, &regs);
+        break;
+
+    case VMEXIT_DR3_WRITE:
+        svm_dr_access(v, 3, TYPE_MOV_TO_DR, &regs);
+        break;
+
+    case VMEXIT_DR6_WRITE:
+        svm_dr_access(v, 6, TYPE_MOV_TO_DR, &regs);
+        break;
+
+    case VMEXIT_DR7_WRITE:
+        svm_dr_access(v, 7, TYPE_MOV_TO_DR, &regs);
+        break;
+
+    case VMEXIT_IOIO:
+        svm_io_instruction(v, &regs);
+        break;
+
+    case VMEXIT_MSR:
+        svm_do_msr_access(v, &regs);
+        break;
+
+    case VMEXIT_SHUTDOWN:
+        printk("Guest shutdown exit\n");
+        domain_crash_synchronous();
+        break;
+
+    default:
+        printk("unexpected VMEXIT: exit reason = 0x%x, exitinfo1 = %llx, "
+               "exitinfo2 = %llx\n", exit_reason, 
+                                    (unsigned long long)vmcb->exitinfo1, 
+                                    (unsigned long long)vmcb->exitinfo2);
+        __hvm_bug(&regs);       /* should not happen */
+        break;
+    }
+
+#ifdef SVM_EXTRA_DEBUG
+    if (do_debug) 
+    {
+        printk("%s: Done switch on vmexit_code\n", __func__); 
+        svm_dump_regs(__func__, &regs);
+    }
+
+    if (do_debug) 
+    {
+        printk("vmexit_handler():- guest_table = 0x%08x, "
+                "monitor_table = 0x%08x, shadow_table = 0x%08x\n",
+                (int)v->arch.guest_table.pfn,
+               (int)v->arch.monitor_table.pfn, 
+                (int)v->arch.shadow_table.pfn);
+        printk("svm_vmexit_handler: Returning\n");
+    }
+#endif
+
+    return;
+}
+
+asmlinkage void svm_load_cr2(void)
+{
+    struct vcpu *v = current;
+
+    local_irq_disable();
+#ifdef __i386__
+    asm volatile("movl %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
+#else
+    asm volatile("movq %0,%%cr2": :"r" (v->arch.hvm_svm.cpu_cr2));
+#endif
+}
+
+asmlinkage void svm_asid(void)
+{
+    struct vcpu *v = current;
+    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+    int core = smp_processor_id();
+    /* 
+     * if need to assign new asid or if switching cores, 
+     * then retire asid for old core, and assign new for new core.
+     */
+    if( svm_dbg_on)
+        printk("old core %d new core 
%d\n",(int)v->arch.hvm_svm.core,(int)core);
+
+    if( test_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags) ||
+          (v->arch.hvm_svm.core != core)) {
+        if(!asidpool_assign_next(vmcb, 1, 
+                   v->arch.hvm_svm.core, core)) {
+           BUG();              
+        }
+    }
+    clear_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+}
+#endif /* CONFIG_SVM */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/svm/vmcb.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,605 @@
+/*
+ * vmcb.c: VMCB management
+ * Copyright (c) 2005, AMD Corporation.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/shadow.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/svm/svm.h>
+#include <asm/hvm/svm/intr.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/domain_page.h>
+
+#ifdef CONFIG_SVM
+
+extern int svm_dbg_on;
+extern int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
+                                  int oldcore, int newcore);
+
+#define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
+
+#define GUEST_SEGMENT_LIMIT 0xffffffff
+
+#define IOPM_SIZE   (12 * 1024)
+#define MSRPM_SIZE  (8  * 1024)
+
+struct vmcb_struct *alloc_vmcb(void) 
+{
+    struct vmcb_struct *vmcb = NULL;
+    unsigned int order;
+    order = get_order_from_bytes(sizeof(struct vmcb_struct)); 
+    ASSERT(order >= 0);
+    vmcb = alloc_xenheap_pages(order);
+    ASSERT(vmcb);
+
+    if (vmcb)
+        memset(vmcb, 0, sizeof(struct vmcb_struct));
+
+    return vmcb;
+}
+
+
+void free_vmcb(struct vmcb_struct *vmcb)
+{
+    unsigned int order;
+
+    order = get_order_from_bytes(sizeof(struct vmcb_struct));
+    ASSERT(vmcb);
+
+    if (vmcb)
+        free_xenheap_pages(vmcb, order);
+}
+
+
+struct host_save_area *alloc_host_save_area(void)
+{
+    unsigned int order = 0;
+    struct host_save_area *hsa = NULL;
+
+    hsa = alloc_xenheap_pages(order);
+    ASSERT(hsa);
+
+    if (hsa)
+        memset(hsa, 0, PAGE_SIZE);
+
+    return hsa;
+}
+
+
+void free_host_save_area(struct host_save_area *hsa)
+{
+    unsigned int order;
+
+    order = get_order_from_bytes(PAGE_SIZE);
+    ASSERT(hsa);
+
+    if (hsa)
+        free_xenheap_pages(hsa, order);
+}
+
+
+/* Set up intercepts to exit the guest into the hypervisor when we want it. */
+static int construct_vmcb_controls(struct arch_svm_struct *arch_svm)
+{
+    struct vmcb_struct *vmcb;
+    u32 *iopm;
+    u32 *msrpm;
+
+    vmcb = arch_svm->vmcb;
+
+    ASSERT(vmcb);
+
+    /* mask off all general 1 intercepts except those listed here */
+    vmcb->general1_intercepts = 
+        ~(GENERAL1_INTERCEPT_CR0_SEL_WRITE | GENERAL1_INTERCEPT_VINTR      | 
+          GENERAL1_INTERCEPT_IDTR_READ     | GENERAL1_INTERCEPT_IDTR_WRITE | 
+          GENERAL1_INTERCEPT_GDTR_READ     | GENERAL1_INTERCEPT_GDTR_WRITE |
+          GENERAL1_INTERCEPT_LDTR_READ     | GENERAL1_INTERCEPT_LDTR_WRITE | 
+          GENERAL1_INTERCEPT_TR_READ       | GENERAL1_INTERCEPT_TR_WRITE   |
+          GENERAL1_INTERCEPT_RDTSC         | GENERAL1_INTERCEPT_PUSHF      |
+          GENERAL1_INTERCEPT_SWINT         | GENERAL1_INTERCEPT_POPF       | 
+          GENERAL1_INTERCEPT_IRET          | GENERAL1_INTERCEPT_PAUSE      |
+          GENERAL1_INTERCEPT_TASK_SWITCH
+        );
+
+    /* turn on the general 2 intercepts */
+    vmcb->general2_intercepts = 
+        GENERAL2_INTERCEPT_VMRUN  | GENERAL2_INTERCEPT_VMMCALL | 
+        GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE  |
+        GENERAL2_INTERCEPT_STGI   | GENERAL2_INTERCEPT_CLGI    |
+        GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
+
+    /* read or write all debug registers 0 - 15 */
+    vmcb->dr_intercepts = 0;
+
+    /* RD/WR all control registers 0 - 15, but not read CR2 */
+    vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE);
+
+    /* The following is for I/O and MSR permision map */
+    iopm = alloc_xenheap_pages(get_order_from_bytes(IOPM_SIZE));
+
+    ASSERT(iopm);
+    memset(iopm, 0xff, IOPM_SIZE);
+    clear_bit(PC_DEBUG_PORT, iopm);
+    msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
+
+    ASSERT(msrpm);
+    memset(msrpm, 0xff, MSRPM_SIZE);
+
+    arch_svm->iopm = iopm;
+    arch_svm->msrpm = msrpm;
+
+    vmcb->iopm_base_pa = (u64) virt_to_phys(iopm);
+    vmcb->msrpm_base_pa = (u64) virt_to_phys(msrpm);
+
+    return 0;
+}
+
+
+/*
+ * modify guest eflags and execption bitmap for gdb
+ */
+int svm_modify_vmcb(struct vcpu *v, struct cpu_user_regs *regs)
+{
+    int error;
+    if ((error = load_vmcb(&v->arch.hvm_svm, v->arch.hvm_svm.host_save_pa))) 
+    {
+        printk("svm_modify_vmcb: load_vmcb failed: VMCB = %lx\n",
+                (unsigned long) v->arch.hvm_svm.host_save_pa);
+        return -EINVAL; 
+    }
+    svm_load_cpu_user_regs(v,regs);
+    return 0;
+}
+
+
+/*
+ * Initially set the same environement as host.
+ */
+static int construct_init_vmcb_guest(struct arch_svm_struct *arch_svm, 
+                                     struct cpu_user_regs *regs )
+{
+    int error = 0;
+    unsigned long crn;
+    segment_attributes_t attrib;
+    unsigned long dr7;
+    unsigned long eflags;
+    unsigned long shadow_cr;
+    struct vmcb_struct *vmcb = arch_svm->vmcb;
+    struct Xgt_desc_struct desc;
+
+    /* Allows IRQs to be shares */
+    vmcb->vintr.fields.intr_masking = 1;
+  
+    /* Set up event injection entry in VMCB. Just clear it. */
+    vmcb->eventinj.bytes = 0;
+
+    /* TSC */
+    vmcb->tsc_offset = 0;
+    
+    vmcb->cs.sel = regs->cs;
+    vmcb->es.sel = regs->es;
+    vmcb->ss.sel = regs->ss;
+    vmcb->ds.sel = regs->ds; 
+    vmcb->fs.sel = regs->fs;
+    vmcb->gs.sel = regs->gs;
+
+    /* Guest segment Limits. 64K for real mode*/
+    vmcb->cs.limit = GUEST_SEGMENT_LIMIT;
+    vmcb->es.limit = GUEST_SEGMENT_LIMIT;
+    vmcb->ss.limit = GUEST_SEGMENT_LIMIT;
+    vmcb->ds.limit = GUEST_SEGMENT_LIMIT;
+    vmcb->fs.limit = GUEST_SEGMENT_LIMIT;
+    vmcb->gs.limit = GUEST_SEGMENT_LIMIT;
+
+    /* Base address for segments */
+    vmcb->cs.base = 0;
+    vmcb->es.base = 0;
+    vmcb->ss.base = 0;
+    vmcb->ds.base = 0;
+    vmcb->fs.base = 0;
+    vmcb->gs.base = 0;
+
+    __asm__ __volatile__ ("sidt  (%0) \n" :: "a"(&desc) : "memory");
+    vmcb->idtr.base = desc.address;
+    vmcb->idtr.limit = desc.size;
+
+    /* Set up segment attributes */
+    attrib.bytes = 0;
+    attrib.fields.type = 0x3; /* type = 3 */
+    attrib.fields.s = 1; /* code or data, i.e. not system */
+    attrib.fields.dpl = 0; /* DPL = 0 */
+    attrib.fields.p = 1; /* segment present */
+    attrib.fields.db = 1; /* 32-bit */
+    attrib.fields.g = 1; /* 4K pages in limit */
+
+    /* Data selectors */
+    vmcb->es.attributes = attrib; 
+    vmcb->ss.attributes = attrib;
+    vmcb->ds.attributes = attrib;
+    vmcb->fs.attributes = attrib;
+    vmcb->gs.attributes = attrib;
+
+    /* Code selector */
+    attrib.fields.type = 0xb;   /* type=0xb -> executable/readable, accessed */
+    vmcb->cs.attributes = attrib;
+
+    /* Global descriptor table */
+    //NMERGE7500 - can probably remove access to gdtr
+    vmcb->gdtr.base = regs->edx;
+    regs->edx = 0;
+    ASSERT(regs->eax <= 0xFFFF); /* Make sure we're in the limit */
+    vmcb->gdtr.limit = regs->eax;
+    regs->eax = 0;
+
+    /* Local Descriptor Table */
+    attrib.fields.s = 0; /* not code or data segement */
+    attrib.fields.type = 0x2; /* LDT */
+    attrib.fields.db = 0; /* 16-bit */
+    attrib.fields.g = 0;   
+    vmcb->ldtr.attributes = attrib;
+
+    attrib.fields.type = 0xb; /* 32-bit TSS (busy) */
+    vmcb->tr.attributes = attrib;
+    vmcb->tr.base = 0;
+    vmcb->tr.limit = 0xff;
+
+    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) :);
+    vmcb->cr0 = crn;
+
+    /* Initally PG, PE are not set*/
+    shadow_cr = vmcb->cr0;
+    shadow_cr &= ~X86_CR0_PG;
+    arch_svm->cpu_shadow_cr0 = shadow_cr;
+
+    /* CR3 is set in svm_final_setup_guest */
+
+    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :); 
+    shadow_cr = crn;
+    vmcb->cr4 = shadow_cr;
+
+//MERGE7500 - should write a 0 instead to rsp?
+    vmcb->rsp = regs->esp;
+    vmcb->rip = regs->eip;
+
+    eflags = regs->eflags & ~HVM_EFLAGS_RESERVED_0; /* clear 0s */
+    eflags |= HVM_EFLAGS_RESERVED_1; /* set 1s */
+
+    vmcb->rflags = eflags;
+
+    __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
+    vmcb->dr7 = dr7;
+
+    return error;
+}
+
+
+/*
+ * destroy the vmcb.
+ */
+
+void destroy_vmcb(struct arch_svm_struct *arch_svm)
+{
+    if(arch_svm->vmcb != NULL)
+    {
+        asidpool_retire(arch_svm->vmcb, arch_svm->core);
+         free_vmcb(arch_svm->vmcb);
+    }
+    if(arch_svm->iopm != NULL) {
+        free_xenheap_pages(
+            arch_svm->iopm, get_order_from_bytes(IOPM_SIZE));
+        arch_svm->iopm = NULL;
+    }
+    if(arch_svm->msrpm != NULL) {
+        free_xenheap_pages(
+            arch_svm->msrpm, get_order_from_bytes(MSRPM_SIZE));
+        arch_svm->msrpm = NULL;
+    }
+    arch_svm->vmcb = NULL;
+}
+
+
+/*
+ * construct the vmcb.
+ */
+
+int construct_vmcb(struct arch_svm_struct *arch_svm, struct cpu_user_regs 
*regs)
+{
+    int error;
+    long rc=0;
+    struct host_save_area *hsa = NULL;
+    u64 phys_hsa;
+
+    memset(arch_svm, 0, sizeof(struct arch_svm_struct));
+
+    if (!(arch_svm->vmcb = alloc_vmcb())) {
+        printk("Failed to create a new VMCB\n");
+        rc = -ENOMEM;
+        goto err_out;
+    }
+
+    /* 
+     * The following code is for allocating host_save_area.
+     * Note: We either allocate a Host Save Area per core or per VCPU. 
+     * However, we do not want a global data structure 
+     * for HSA per core, we decided to implement a HSA for each VCPU. 
+     * It will waste space since VCPU number is larger than core number. 
+     * But before we find a better place for HSA for each core, we will 
+     * stay will this solution.
+     */
+
+    if (!(hsa = alloc_host_save_area())) 
+    {
+        printk("Failed to allocate Host Save Area\n");
+        rc = -ENOMEM;
+        goto err_out;
+    }
+
+    phys_hsa = (u64) virt_to_phys(hsa);
+    arch_svm->host_save_area = hsa;
+    arch_svm->host_save_pa   = phys_hsa;
+
+    arch_svm->vmcb_pa  = (u64) virt_to_phys(arch_svm->vmcb);
+
+    if ((error = load_vmcb(arch_svm, arch_svm->host_save_pa))) 
+    {
+        printk("construct_vmcb: load_vmcb failed: VMCB = %lx\n",
+               (unsigned long) arch_svm->host_save_pa);
+        rc = -EINVAL;         
+        goto err_out;
+    }
+
+    if ((error = construct_vmcb_controls(arch_svm))) 
+    {
+        printk("construct_vmcb: construct_vmcb_controls failed\n");
+        rc = -EINVAL;         
+        goto err_out;
+    }
+
+    /* guest selectors */
+    if ((error = construct_init_vmcb_guest(arch_svm, regs))) 
+    {
+        printk("construct_vmcb: construct_vmcb_guest failed\n");
+        rc = -EINVAL;         
+        goto err_out;
+    }
+
+    arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
+    if (regs->eflags & EF_TF)
+        arch_svm->vmcb->exception_intercepts |= EXCEPTION_BITMAP_DB;
+    else
+        arch_svm->vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_DB;
+
+    return 0;
+
+err_out:
+    destroy_vmcb(arch_svm);
+    return rc;
+}
+
+
+void svm_do_launch(struct vcpu *v)
+{
+    /* Update CR3, GDT, LDT, TR */
+    struct vmcb_struct *vmcb;
+    int core = smp_processor_id();
+    vmcb = v->arch.hvm_svm.vmcb;
+    ASSERT(vmcb);
+
+    svm_stts(v);
+
+    /* current core is the one we will perform the vmrun on */
+    v->arch.hvm_svm.core = core;
+    clear_bit(ARCH_SVM_VMCB_ASSIGN_ASID, &v->arch.hvm_svm.flags);
+    if(!asidpool_assign_next(vmcb, 0, core, core)) {
+        BUG();
+    }
+
+    if (v->vcpu_id == 0)
+        hvm_setup_platform(v->domain);
+
+    if (hvm_apic_support(v->domain))
+        vlapic_init(v);
+    init_timer(&v->arch.hvm_svm.hlt_timer,
+                               hlt_timer_fn, v, v->processor);
+
+    vmcb->ldtr.sel = 0;
+    vmcb->ldtr.base = 0;
+    vmcb->ldtr.limit = 0;
+    vmcb->ldtr.attributes.bytes = 0;
+
+    vmcb->efer = EFER_SVME; /* Make sure VMRUN won't return with -1 */
+    
+    if (svm_dbg_on) 
+    {
+        unsigned long pt;
+        pt = pagetable_get_paddr(v->arch.shadow_table);
+        printk("%s: shadow_table = %lx\n", __func__, pt);
+        pt = pagetable_get_paddr(v->arch.guest_table);
+        printk("%s: guest_table  = %lx\n", __func__, pt);
+        pt = pagetable_get_paddr(v->domain->arch.phys_table);
+        printk("%s: phys_table   = %lx\n", __func__, pt);
+    }
+    
+    if (svm_paging_enabled(v))
+    {
+        vmcb->cr3 = pagetable_get_paddr(v->arch.guest_table);
+    }
+    else
+    {
+        vmcb->cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
+    }
+
+    if (svm_dbg_on) 
+    {
+        printk("%s: cr3 = %lx ", __func__, (unsigned long)vmcb->cr3);
+        printk("init_guest_table: guest_table = 0x%08x, monitor_table = 
0x%08x,"
+                " shadow_table = 0x%08x\n", (int)v->arch.guest_table.pfn, 
+                (int)v->arch.monitor_table.pfn, (int)v->arch.shadow_table.pfn);
+    }
+
+    v->arch.schedule_tail = arch_svm_do_resume;
+
+    v->arch.hvm_svm.injecting_event  = 0;
+    v->arch.hvm_svm.saved_irq_vector = -1;
+
+    if (svm_dbg_on)
+        svm_dump_vmcb(__func__, vmcb);
+}
+
+
+int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa) 
+{
+    u32 phys_hsa_lo, phys_hsa_hi;
+    
+    phys_hsa_lo = (u32) phys_hsa;
+    phys_hsa_hi = (u32) (phys_hsa >> 32);
+    
+    wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
+    set_bit(ARCH_SVM_VMCB_LOADED, &arch_svm->flags); 
+    return 0;
+}
+
+
+/* 
+ * Resume the guest.
+ */
+void svm_do_resume(struct vcpu *v) 
+{
+    struct hvm_virpit *vpit = &v->domain->arch.hvm_domain.vpit;
+    
+    if (event_pending(v)) 
+    {
+        hvm_check_events(v);
+
+        if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags))
+            hvm_wait_io();
+    }
+
+    /* pick up the elapsed PIT ticks and re-enable pit_timer */
+    if ( vpit->first_injected ) {
+        pickup_deactive_ticks(vpit);
+    }
+    svm_set_tsc_shift(v, vpit);
+    
+    /* We can't resume the guest if we're waiting on I/O */
+    ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags));
+}
+
+
+void svm_launch_fail(unsigned long eflags)
+{
+    BUG();
+}
+
+
+void svm_resume_fail(unsigned long eflags)
+{
+    BUG();
+}
+
+
+void svm_dump_sel(char *name, segment_selector_t *s)
+{
+    printf("%s: sel=0x%04x, attr=0x%04x, limit=0x%08x, base=0x%016llx\n", 
+           name, s->sel, s->attributes.bytes, s->limit,
+          (unsigned long long)s->base);
+}
+
+
+void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb)
+{
+    printf("Dumping guest's current state at %s...\n", from);
+    printf("Size of VMCB = %d, address = %p\n", 
+            (int) sizeof(struct vmcb_struct), vmcb);
+
+    printf("cr_intercepts = 0x%08x dr_intercepts = 0x%08x exception_intercepts 
"
+            "= 0x%08x\n", vmcb->cr_intercepts, vmcb->dr_intercepts, 
+            vmcb->exception_intercepts);
+    printf("general1_intercepts = 0x%08x general2_intercepts = 0x%08x\n", 
+           vmcb->general1_intercepts, vmcb->general2_intercepts);
+    printf("iopm_base_pa = %016llx msrpm_base_pa = 0x%016llx tsc_offset = "
+            "0x%016llx\n", 
+           (unsigned long long) vmcb->iopm_base_pa,
+           (unsigned long long) vmcb->msrpm_base_pa,
+           (unsigned long long) vmcb->tsc_offset);
+    printf("tlb_control = 0x%08x vintr = 0x%016llx interrupt_shadow = "
+            "0x%016llx\n", vmcb->tlb_control,
+           (unsigned long long) vmcb->vintr.bytes,
+           (unsigned long long) vmcb->interrupt_shadow);
+    printf("exitcode = 0x%016llx exitintinfo = 0x%016llx\n", 
+           (unsigned long long) vmcb->exitcode,
+          (unsigned long long) vmcb->exitintinfo.bytes);
+    printf("exitinfo1 = 0x%016llx exitinfo2 = 0x%016llx \n",
+           (unsigned long long) vmcb->exitinfo1,
+          (unsigned long long) vmcb->exitinfo2);
+    printf("np_enable = 0x%016llx guest_asid = 0x%03x\n", 
+           (unsigned long long) vmcb->np_enable, vmcb->guest_asid);
+    printf("cpl = %d efer = 0x%016llx star = 0x%016llx lstar = 0x%016llx\n", 
+           vmcb->cpl, (unsigned long long) vmcb->efer,
+          (unsigned long long) vmcb->star, (unsigned long long) vmcb->lstar);
+    printf("CR0 = 0x%016llx CR2 = 0x%016llx\n",
+           (unsigned long long) vmcb->cr0, (unsigned long long) vmcb->cr2);
+    printf("CR3 = 0x%016llx CR4 = 0x%016llx\n", 
+           (unsigned long long) vmcb->cr3, (unsigned long long) vmcb->cr4);
+    printf("RSP = 0x%016llx  RIP = 0x%016llx\n", 
+           (unsigned long long) vmcb->rsp, (unsigned long long) vmcb->rip);
+    printf("RAX = 0x%016llx  RFLAGS=0x%016llx\n",
+           (unsigned long long) vmcb->rax, (unsigned long long) vmcb->rflags);
+    printf("DR6 = 0x%016llx, DR7 = 0x%016llx\n", 
+           (unsigned long long) vmcb->dr6, (unsigned long long) vmcb->dr7);
+    printf("CSTAR = 0x%016llx SFMask = 0x%016llx\n",
+           (unsigned long long) vmcb->cstar, (unsigned long long) 
vmcb->sfmask);
+    printf("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 
+           (unsigned long long) vmcb->kerngsbase,
+          (unsigned long long) vmcb->g_pat);
+    
+    /* print out all the selectors */
+    svm_dump_sel("CS", &vmcb->cs);
+    svm_dump_sel("DS", &vmcb->ds);
+    svm_dump_sel("SS", &vmcb->ss);
+    svm_dump_sel("ES", &vmcb->es);
+    svm_dump_sel("FS", &vmcb->fs);
+    svm_dump_sel("GS", &vmcb->gs);
+    svm_dump_sel("GDTR", &vmcb->gdtr);
+    svm_dump_sel("LDTR", &vmcb->ldtr);
+    svm_dump_sel("IDTR", &vmcb->idtr);
+    svm_dump_sel("TR", &vmcb->tr);
+}
+#endif /* CONFIG_SVM */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/svm/x86_32/exits.S
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/svm/x86_32/exits.S       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,221 @@
+/*
+ * exits.S: SVM architecture-specific exit handling.
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define GET_CURRENT(reg)         \
+        movl $STACK_SIZE-4, reg; \
+        orl  %esp, reg;          \
+        andl $~3,reg;            \
+        movl (reg),reg;
+
+/*
+ * At VMExit time the processor saves the guest selectors, esp, eip, 
+ * and eflags. Therefore we don't save them, but simply decrement 
+ * the kernel stack pointer to make it consistent with the stack frame 
+ * at usual interruption time. The eflags of the host is not saved by VMX, 
+ * and we set it to the fixed value.
+ *
+ * We also need the room, especially because orig_eax field is used 
+ * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
+ *   (10) u32 gs;                 
+ *   (9)  u32 fs;
+ *   (8)  u32 ds;
+ *   (7)  u32 es;
+ *               <- get_stack_bottom() (= HOST_ESP)
+ *   (6)  u32 ss;
+ *   (5)  u32 esp;
+ *   (4)  u32 eflags;
+ *   (3)  u32 cs;
+ *   (2)  u32 eip;
+ * (2/1)  u16 entry_vector;
+ * (1/1)  u16 error_code;
+ * However, get_stack_bottom() actually returns 20 bytes before the real
+ * bottom of the stack to allow space for:
+ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
+ */
+
+#define HVM_MONITOR_EFLAGS     0x202 /* IF on */
+#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define HVM_SAVE_ALL_NOSEGREGS \
+        pushl $HVM_MONITOR_EFLAGS; \
+        popf; \
+        subl $(NR_SKIPPED_REGS*4), %esp; \
+        movl $0, 0xc(%esp); /* eflags==0 identifies cpu_user_regs as HVM guest 
*/ \
+        pushl %eax; \
+        pushl %ebp; \
+        pushl %edi; \
+        pushl %esi; \
+        pushl %edx; \
+        pushl %ecx; \
+        pushl %ebx;
+
+#define HVM_RESTORE_ALL_NOSEGREGS   \
+        popl %ebx;  \
+        popl %ecx;  \
+        popl %edx;  \
+        popl %esi;  \
+        popl %edi;  \
+        popl %ebp;  \
+        popl %eax;  \
+        addl $(NR_SKIPPED_REGS*4), %esp
+
+#ifdef CONFIG_SVM
+
+        ALIGN
+
+#define VMRUN  .byte 0x0F,0x01,0xD8
+#define VMLOAD .byte 0x0F,0x01,0xDA
+#define VMSAVE .byte 0x0F,0x01,0xDB
+#define STGI   .byte 0x0F,0x01,0xDC
+#define CLGI   .byte 0x0F,0x01,0xDD
+
+#define DO_TSC_OFFSET 0
+#define DO_FPUSAVE    1
+        
+ENTRY(svm_asm_do_launch)
+        sti
+        CLGI                
+        GET_CURRENT(%ebx)
+        movl VCPU_svm_vmcb(%ebx), %ecx
+        movl 24(%esp), %eax
+        movl %eax, VMCB_rax(%ecx)
+        movl VCPU_svm_hsa_pa(%ebx), %eax
+        VMSAVE
+
+#if DO_FPUSAVE
+        mov      %cr0, %eax     
+        push %eax
+        clts
+        lea     VCPU_arch_guest_fpu_ctxt(%ebx), %eax
+        fxrstor (%eax)
+        pop      %eax
+        mov      %eax, %cr0
+#endif
+
+#if (DO_TSC_OFFSET)
+        pushl %edx /* eax and edx get trashed by rdtsc */
+        pushl %eax
+        rdtsc
+        subl VCPU_svm_vmexit_tsc(%ebx),%eax   /* tsc's from    */
+        sbbl VCPU_svm_vmexit_tsc+4(%ebx),%edx /* last #VMEXIT? */
+        subl %eax,VMCB_tsc_offset(%ecx)  /* subtract from running TSC_OFFSET */
+        sbbl %edx,VMCB_tsc_offset+4(%ecx)
+        subl $20000,VMCB_tsc_offset(%ecx)  /* fudge factor for VMXXX calls  */
+        sbbl $0,VMCB_tsc_offset+4(%ecx)
+
+        /* 
+         * TODO: may need to add a kludge factor to account for all the cycles 
+         * burned in VMLOAD, VMSAVE, VMRUN...
+         */
+
+        popl %eax
+        popl %edx
+        #endif
+
+        movl VCPU_svm_vmcb_pa(%ebx), %eax
+        popl %ebx
+        popl %ecx
+        popl %edx
+        popl %esi
+        popl %edi
+        popl %ebp
+
+        /* 
+         * Skip %eax, we need to have vmcb address in there.
+         * Don't worry, EAX is restored through the VMRUN instruction.
+         */
+        addl $4, %esp       
+        addl $(NR_SKIPPED_REGS*4), %esp
+        VMLOAD
+        VMRUN
+        VMSAVE
+        /* eax is the only register we're allowed to touch here... */
+
+#if DO_FPUSAVE
+        mov  %cr0, %eax
+        push %eax
+        clts
+        GET_CURRENT(%eax)
+        lea     VCPU_arch_guest_fpu_ctxt(%eax), %eax
+        fxsave (%eax)
+        fnclex
+        pop  %eax
+        mov  %eax, %cr0
+#endif
+        
+        GET_CURRENT(%eax)
+
+#if (DO_TSC_OFFSET)
+        pushl %edx
+        pushl %ebx
+        movl %eax,%ebx
+        rdtsc
+        movl %eax,VCPU_svm_vmexit_tsc(%ebx)
+        movl %edx,VCPU_svm_vmexit_tsc+4(%ebx)
+        movl %ebx,%eax
+        popl %ebx
+        popl %edx
+#endif
+
+        movl VCPU_svm_hsa_pa(%eax), %eax
+        VMLOAD
+
+        HVM_SAVE_ALL_NOSEGREGS
+        STGI
+        call svm_vmexit_handler
+        jmp  svm_asm_do_resume
+
+        ALIGN
+
+ENTRY(svm_asm_do_resume)
+svm_test_all_events:
+        GET_CURRENT(%ebx)
+/*test_all_events:*/
+        xorl %ecx,%ecx
+        notl %ecx
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/  
+        movl VCPU_processor(%ebx),%eax
+        shl  $IRQSTAT_shift,%eax
+        test %ecx,irq_stat(%eax,1)
+        jnz  svm_process_softirqs
+svm_restore_all_guest:
+        call svm_intr_assist
+        call svm_asid
+        call svm_load_cr2
+        sti
+        /* 
+         * Check if we are going back to SVM-based VM
+         * By this time, all the setups in the VMCB must be complete.
+         */
+        jmp svm_asm_do_launch
+
+        ALIGN
+svm_process_softirqs:
+        sti       
+        call do_softirq
+        jmp  svm_test_all_events
+#endif /* CONFIG_SVM */
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/svm/x86_64/exits.S
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/svm/x86_64/exits.S       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,179 @@
+/*
+ * exits.S: SVM architecture-specific exit handling.
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define GET_CURRENT(reg)         \
+        movq $STACK_SIZE-8, reg; \
+        orq  %rsp, reg;          \
+        andq $~7,reg;            \
+        movq (reg),reg;
+
+/*
+ * At VMExit time the processor saves the guest selectors, rsp, rip, 
+ * and rflags. Therefore we don't save them, but simply decrement 
+ * the kernel stack pointer to make it consistent with the stack frame 
+ * at usual interruption time. The rflags of the host is not saved by VMX, 
+ * and we set it to the fixed value.
+ *
+ * We also need the room, especially because orig_eax field is used 
+ * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
+ *   (10) u64 gs;                 
+ *   (9)  u64 fs;
+ *   (8)  u64 ds;
+ *   (7)  u64 es;
+ *               <- get_stack_bottom() (= HOST_ESP)
+ *   (6)  u64 ss;
+ *   (5)  u64 rsp;
+ *   (4)  u64 rflags;
+ *   (3)  u64 cs;
+ *   (2)  u64 rip;
+ * (2/1)  u32 entry_vector;
+ * (1/1)  u32 error_code;
+ */
+#define HVM_MONITOR_RFLAGS     0x202 /* IF on */
+#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define HVM_SAVE_ALL_NOSEGREGS \
+        pushq $HVM_MONITOR_RFLAGS; \
+        popfq; \
+        subq $(NR_SKIPPED_REGS*8), %rsp; \
+        pushq %rdi; \
+        pushq %rsi; \
+        pushq %rdx; \
+        pushq %rcx; \
+        pushq %rax; \
+        pushq %r8;  \
+        pushq %r9;  \
+        pushq %r10; \
+        pushq %r11; \
+        pushq %rbx; \
+        pushq %rbp; \
+        pushq %r12; \
+        pushq %r13; \
+        pushq %r14; \
+        pushq %r15; \
+
+#define HVM_RESTORE_ALL_NOSEGREGS \
+        popq %r15; \
+        popq %r14; \
+        popq %r13; \
+        popq %r12; \
+        popq %rbp; \
+        popq %rbx; \
+        popq %r11; \
+        popq %r10; \
+        popq %r9;  \
+        popq %r8;  \
+        popq %rax; \
+        popq %rcx; \
+        popq %rdx; \
+        popq %rsi; \
+        popq %rdi; \
+        addq $(NR_SKIPPED_REGS*8), %rsp; \
+
+#ifdef CONFIG_SVM
+#define VMRUN  .byte 0x0F,0x01,0xD8
+#define VMLOAD .byte 0x0F,0x01,0xDA
+#define VMSAVE .byte 0x0F,0x01,0xDB
+#define STGI   .byte 0x0F,0x01,0xDC
+#define CLGI   .byte 0x0F,0x01,0xDD
+
+ENTRY(svm_asm_do_launch)
+        sti
+        CLGI                
+        GET_CURRENT(%rbx)
+        movq VCPU_svm_vmcb(%rbx), %rcx
+        movq UREGS_rax(%rsp), %rax
+        movq %rax, VMCB_rax(%rcx)
+        movq VCPU_svm_hsa_pa(%rbx), %rax
+        VMSAVE
+       /* XXX FPU SAVE */
+       /* XXX DO TSC OFFSET */
+
+        movq VCPU_svm_vmcb_pa(%rbx), %rax
+        popq %r15
+        popq %r14
+        popq %r13
+        popq %r12
+        popq %rbp
+        popq %rbx
+        popq %r11
+        popq %r10
+        popq %r9
+        popq %r8
+        /*
+         * Skip %rax, we need to have vmcb address in there.
+         * Don't worry, RAX is restored through the VMRUN instruction.
+         */
+        addq $8, %rsp
+        popq %rcx
+        popq %rdx
+        popq %rsi
+        popq %rdi
+        addq $(NR_SKIPPED_REGS*8), %rsp
+
+        VMLOAD
+        VMRUN
+        VMSAVE
+        /* rax is the only register we're allowed to touch here... */
+
+       /* XXX FPU SAVE */
+        GET_CURRENT(%rax)
+       /* XXX DO TSC OFFSET */
+        movq VCPU_svm_hsa_pa(%rax), %rax
+        VMLOAD
+
+        HVM_SAVE_ALL_NOSEGREGS
+        STGI
+        call svm_vmexit_handler
+        jmp  svm_asm_do_resume
+
+ENTRY(svm_asm_do_resume)
+svm_test_all_events:
+       GET_CURRENT(%rbx)
+/*test_all_events:*/
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/
+       movl  VCPU_processor(%rbx),%eax
+        shl   $IRQSTAT_shift, %rax
+        leaq  irq_stat(%rip), %rdx
+        testl $~0, (%rdx, %rax, 1)
+        jnz   svm_process_softirqs
+svm_restore_all_guest:
+        call svm_intr_assist
+        call svm_load_cr2
+        sti
+        /*
+         * Check if we are going back to VMX-based VM
+         * By this time, all the setups in the VMCS must be complete.
+         */
+        jmp svm_asm_do_launch
+
+        ALIGN
+svm_process_softirqs:
+        sti
+        call do_softirq
+        jmp  svm_test_all_events
+#endif /* CONFIG_SVM */
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/vioapic.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/vioapic.c        Tue Jan 31 10:49:51 2006
@@ -0,0 +1,623 @@
+/*
+*  Copyright (C) 2001  MandrakeSoft S.A.
+*
+*    MandrakeSoft S.A.
+*    43, rue d'Aboukir
+*    75002 Paris - France
+*    http://www.linux-mandrake.com/
+*    http://www.mandrakesoft.com/
+*
+*  This library is free software; you can redistribute it and/or
+*  modify it under the terms of the GNU Lesser General Public
+*  License as published by the Free Software Foundation; either
+*  version 2 of the License, or (at your option) any later version.
+*
+*  This library is distributed in the hope that it will be useful,
+*  but WITHOUT ANY WARRANTY; without even the implied warranty of
+*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*  Lesser General Public License for more details.
+*
+*  You should have received a copy of the GNU Lesser General Public
+*  License along with this library; if not, write to the Free Software
+*  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+*/
+
+/*
+*  Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
+*  Ported to xen by using virtual IRQ line.
+*/
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/xmalloc.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <public/hvm/ioreq.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/support.h>
+#include <asm/current.h>
+
+#if defined(__ia64__)
+#define        opt_hvm_debug_level     opt_vmx_debug_level
+#endif
+
+static void ioapic_enable(hvm_vioapic_t *s, uint8_t enable)
+{
+    if (enable)
+        s->flags |= IOAPIC_ENABLE_FLAG;
+    else
+        s->flags &= ~IOAPIC_ENABLE_FLAG;
+}
+
+static void ioapic_dump_redir(hvm_vioapic_t *s, uint8_t entry)
+{
+    RedirStatus redir = s->redirtbl[entry];
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_dump_redir "
+      "entry %x vector %x deliver_mod %x destmode %x delivestatus %x "
+      "polarity %x remote_irr %x trigmod %x mask %x dest_id %x\n",
+      entry, redir.RedirForm.vector, redir.RedirForm.deliver_mode,
+      redir.RedirForm.destmode, redir.RedirForm.delivestatus,
+      redir.RedirForm.polarity, redir.RedirForm.remoteirr,
+      redir.RedirForm.trigmod, redir.RedirForm.mask,
+      redir.RedirForm.dest_id);
+}
+
+#ifdef HVM_DOMAIN_SAVE_RESTORE
+void ioapic_save(QEMUFile* f, void* opaque)
+{
+    printk("no implementation for ioapic_save\n");
+}
+
+int ioapic_load(QEMUFile* f, void* opaque, int version_id)
+{
+    printk("no implementation for ioapic_load\n");
+    return 0;
+}
+#endif
+
+static unsigned long hvm_vioapic_read_indirect(struct hvm_vioapic *s,
+                                              unsigned long addr,
+                                              unsigned long length)
+{
+    unsigned long result = 0;
+
+    ASSERT(s);
+
+    switch (s->ioregsel) {
+    case IOAPIC_REG_VERSION:
+        result = ((((IOAPIC_NUM_PINS-1) & 0xff) << 16)
+                  | (IOAPIC_VERSION_ID & 0x0f));
+        break;
+
+#ifndef __ia64__
+    case IOAPIC_REG_APIC_ID:
+        result = ((s->id & 0xf) << 24);
+        break;
+
+    case IOAPIC_REG_ARB_ID:
+        /* XXX how arb_id used on p4? */
+        result = ((s->id & 0xf) << 24);
+        break;
+#endif
+
+    default:
+        {
+            uint32_t redir_index = 0;
+            uint64_t redir_content = 0;
+
+            redir_index = (s->ioregsel - 0x10) >> 1;
+
+            if (redir_index >= 0 && redir_index < IOAPIC_NUM_PINS) {
+                redir_content = s->redirtbl[redir_index].value;
+
+                result = (s->ioregsel & 0x1)?
+                           (redir_content >> 32) & 0xffffffff :
+                           redir_content & 0xffffffff;
+            } else {
+                printk("upic_mem_readl:undefined ioregsel %x\n",
+                        s->ioregsel);
+                domain_crash_synchronous();
+            }
+            break;
+        }
+    } /* switch */
+
+    return result;
+}
+
+static unsigned long hvm_vioapic_read(struct vcpu *v,
+                                     unsigned long addr,
+                                     unsigned long length)
+{
+    struct hvm_vioapic *s = &(v->domain->arch.hvm_domain.vioapic);
+    uint32_t    result = 0;
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "hvm_vioapic_read addr %lx\n", addr);
+
+    ASSERT(s);
+
+    addr &= 0xff;
+
+    switch (addr) {
+    case IOAPIC_REG_SELECT:
+        result = s->ioregsel;
+        break;
+
+    case IOAPIC_REG_WINDOW:
+        result = hvm_vioapic_read_indirect(s, addr, length);
+        break;
+
+    default:
+          break;
+    }
+
+    return result;
+}
+
+static void hvm_vioapic_update_imr(struct hvm_vioapic *s, int index)
+{
+   if (s->redirtbl[index].RedirForm.mask)
+       set_bit(index, &s->imr);
+   else
+       clear_bit(index, &s->imr);
+}
+
+static void hvm_vioapic_write_indirect(struct hvm_vioapic *s,
+                                      unsigned long addr,
+                                      unsigned long length,
+                                      unsigned long val)
+{
+    switch (s->ioregsel) {
+    case IOAPIC_REG_VERSION:
+        printk("hvm_vioapic_write_indirect: version register read only\n");
+        break;
+
+#ifndef __ia64__
+    case IOAPIC_REG_APIC_ID:
+        s->id = (val >> 24) & 0xf;
+        break;
+
+    case IOAPIC_REG_ARB_ID:
+        s->arb_id = val;
+        break;
+#endif
+
+    default:
+        {
+            uint32_t redir_index = 0;
+
+            HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "hvm_vioapic_write_indirect "
+              "change redir index %x val %lx\n",
+              redir_index, val);
+
+            redir_index = (s->ioregsel - 0x10) >> 1;
+
+            if (redir_index >= 0 && redir_index < IOAPIC_NUM_PINS) {
+                uint64_t redir_content;
+
+                redir_content = s->redirtbl[redir_index].value;
+
+                if (s->ioregsel & 0x1)
+                    redir_content = (((uint64_t)val & 0xffffffff) << 32) |
+                                    (redir_content & 0xffffffff);
+                else
+                    redir_content = ((redir_content >> 32) << 32) |
+                                    (val & 0xffffffff);
+                s->redirtbl[redir_index].value = redir_content;
+               hvm_vioapic_update_imr(s, redir_index);
+            } else  {
+                printk("hvm_vioapic_write_indirect "
+                  "error register %x\n", s->ioregsel);
+            }
+            break;
+        }
+    } /* switch */
+}
+
+static void hvm_vioapic_write(struct vcpu *v,
+                             unsigned long addr,
+                             unsigned long length,
+                             unsigned long val)
+{
+    hvm_vioapic_t *s = &(v->domain->arch.hvm_domain.vioapic);
+
+    ASSERT(s);
+
+    addr &= 0xff;
+
+    switch (addr) {
+    case IOAPIC_REG_SELECT:
+        s->ioregsel = val;
+        break;
+
+    case IOAPIC_REG_WINDOW:
+        hvm_vioapic_write_indirect(s, addr, length, val);
+        break;
+
+#ifdef __ia64__
+    case IOAPIC_REG_EOI:
+        ioapic_update_EOI(v->domain, val);
+        break;
+#endif
+
+    default:
+        break;
+    }
+}
+
+static int hvm_vioapic_range(struct vcpu *v, unsigned long addr)
+{
+    hvm_vioapic_t *s = &(v->domain->arch.hvm_domain.vioapic);
+
+    if ((s->flags & IOAPIC_ENABLE_FLAG) &&
+        (addr >= s->base_address &&
+        (addr <= s->base_address + IOAPIC_MEM_LENGTH)))
+        return 1;
+    else
+        return 0;
+}
+
+struct hvm_mmio_handler vioapic_mmio_handler = {
+    .check_handler = hvm_vioapic_range,
+    .read_handler = hvm_vioapic_read,
+    .write_handler = hvm_vioapic_write
+};
+
+static void hvm_vioapic_reset(hvm_vioapic_t *s)
+{
+    int i;
+
+    memset(s, 0, sizeof(hvm_vioapic_t));
+
+    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+        s->redirtbl[i].RedirForm.mask = 0x1;
+       hvm_vioapic_update_imr(s, i);
+    }
+}
+
+static void ioapic_update_config(hvm_vioapic_t *s,
+                                 unsigned long address,
+                                 uint8_t enable)
+{
+    ASSERT(s);
+
+    ioapic_enable(s, enable);
+
+    if (address != s->base_address)
+        s->base_address = address;
+}
+
+static int ioapic_inj_irq(hvm_vioapic_t *s,
+                          struct vlapic * target,
+                          uint8_t vector,
+                          uint8_t trig_mode,
+                          uint8_t delivery_mode)
+{
+    int result = 0;
+
+    ASSERT(s && target);
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_inj_irq "
+      "irq %d trig %d delive mode %d\n",
+      vector, trig_mode, delivery_mode);
+
+    switch (delivery_mode) {
+    case VLAPIC_DELIV_MODE_FIXED:
+    case VLAPIC_DELIV_MODE_LPRI:
+        if (vlapic_set_irq(target, vector, trig_mode) && (trig_mode == 1))
+            printk("<ioapic_inj_irq> level interrupt happen before cleared\n");
+        result = 1;
+        break;
+    default:
+        printk("<ioapic_inj_irq> error delivery mode %d\n",
+                delivery_mode);
+        break;
+   }
+
+   return result;
+}
+
+#ifndef __ia64__
+static int ioapic_match_logical_addr(hvm_vioapic_t *s, int number, uint8_t 
dest)
+{
+    int result = 0;
+
+    ASSERT(s && s->lapic_info[number]);
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_match_logical_addr "
+      "number %i dest %x\n",
+      number, dest);
+
+    switch (((s->lapic_info[number]->dest_format >> 28) & 0xf)) {
+    case 0xf:
+        result =
+          (dest & ((s->lapic_info[number]->logical_dest >> 24) & 0xff)) != 0;
+        break;
+    case 0x0:
+        /* Should we support flat cluster mode ?*/
+        if ( ((s->lapic_info[number]->logical_dest >> 28)
+               == ((dest >> 0x4) & 0xf)) &&
+             (((s->lapic_info[number]->logical_dest >> 24) & 0xf)
+               & (dest  & 0xf)) )
+            result = 1;
+        break;
+    default:
+        printk("error DFR value for %x local apic\n", number);
+        break;
+    }
+
+    return result;
+}
+#else
+extern int ioapic_match_logical_addr(hvm_vioapic_t *s, int number, uint8_t 
dest);
+#endif
+
+static uint32_t ioapic_get_delivery_bitmask(hvm_vioapic_t *s,
+                                            uint16_t dest,
+                                            uint8_t dest_mode,
+                                            uint8_t vector,
+                                            uint8_t delivery_mode)
+{
+    uint32_t mask = 0;
+    int i;
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_get_delivery_bitmask "
+      "dest %d dest_mode %d "
+      "vector %d del_mode %d, lapic_count %d\n",
+      dest, dest_mode, vector, delivery_mode, s->lapic_count);
+
+    ASSERT(s);
+
+    if (dest_mode == 0) { /* Physical mode */
+        for (i = 0; i < s->lapic_count; i++) {
+           if (VLAPIC_ID(s->lapic_info[i]) == dest) {
+                mask = 1 << i;
+                break;
+            }
+        }
+    } else {
+        /* logical destination. call match_logical_addr for each APIC. */
+        if (dest != 0) {
+            for (i=0; i< s->lapic_count; i++) {
+                if ( s->lapic_info[i] &&
+                     ioapic_match_logical_addr(s, i, dest) ) {
+                    mask |= (1<<i);
+                }
+            }
+        }
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_get_delivery_bitmask "
+      "mask %x\n", mask);
+
+    return mask;
+}
+
+static void ioapic_deliver(hvm_vioapic_t *s, int irqno)
+{
+    uint16_t dest = s->redirtbl[irqno].RedirForm.dest_id;
+    uint8_t dest_mode = s->redirtbl[irqno].RedirForm.destmode;
+    uint8_t delivery_mode = s->redirtbl[irqno].RedirForm.deliver_mode;
+    uint8_t vector = s->redirtbl[irqno].RedirForm.vector;
+    uint8_t trig_mode = s->redirtbl[irqno].RedirForm.trigmod;
+    uint32_t deliver_bitmask;
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "IOAPIC deliver: "
+      "dest %x dest_mode %x delivery_mode %x vector %x trig_mode %x\n",
+      dest, dest_mode, delivery_mode, vector, trig_mode);
+
+    deliver_bitmask =
+      ioapic_get_delivery_bitmask(s, dest, dest_mode, vector, delivery_mode);
+
+    if (!deliver_bitmask) {
+        HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic deliver "
+          "no target on destination\n");
+
+        return;
+    }
+
+    switch (delivery_mode) {
+    case VLAPIC_DELIV_MODE_LPRI:
+    {
+        struct vlapic* target;
+
+        target = apic_round_robin(
+                s->domain, dest_mode, vector, deliver_bitmask);
+        if (target)
+            ioapic_inj_irq(s, target, vector, trig_mode, delivery_mode);
+        else{ 
+            HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic deliver "
+              "null round robin mask %x vector %x delivery_mode %x\n",
+              deliver_bitmask, vector, deliver_bitmask);
+        }
+        break;
+    }
+
+    case VLAPIC_DELIV_MODE_FIXED:
+    case VLAPIC_DELIV_MODE_EXT:
+    {
+        uint8_t bit;
+        for (bit = 0; bit < s->lapic_count; bit++) {
+            if (deliver_bitmask & (1 << bit)) {
+                if (s->lapic_info[bit]) {
+                    ioapic_inj_irq(s, s->lapic_info[bit],
+                                vector, trig_mode, delivery_mode);
+                }
+            }
+        }
+        break;
+    }
+
+    case VLAPIC_DELIV_MODE_SMI:
+    case VLAPIC_DELIV_MODE_NMI:
+    case VLAPIC_DELIV_MODE_INIT:
+    case VLAPIC_DELIV_MODE_STARTUP:
+    default:
+        printk("Not support delivey mode %d\n", delivery_mode);
+        break;
+    }
+}
+
+static int ioapic_get_highest_irq(hvm_vioapic_t *s)
+{
+    uint32_t irqs;
+
+    ASSERT(s);
+
+    irqs = s->irr & ~s->isr & ~s->imr;
+    return __fls(irqs);
+}
+
+
+static void service_ioapic(hvm_vioapic_t *s)
+{
+    int irqno;
+
+    while ((irqno = ioapic_get_highest_irq(s)) != -1) {
+
+        HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "service_ioapic "
+          "highest irqno %x\n", irqno);
+
+        if (!test_bit(irqno, &s->imr)) {
+            ioapic_deliver(s, irqno);
+        }
+
+        if (s->redirtbl[irqno].RedirForm.trigmod == IOAPIC_LEVEL_TRIGGER) {
+            s->isr |= (1 << irqno);
+        }
+
+        s->irr &= ~(1 << irqno);
+    }
+}
+
+void hvm_vioapic_do_irqs(struct domain *d, uint16_t irqs)
+{
+    hvm_vioapic_t *s = &(d->arch.hvm_domain.vioapic);
+
+    if (!hvm_apic_support(d))
+        return;
+
+    s->irr |= irqs & ~s->imr;
+    service_ioapic(s);
+}
+
+void hvm_vioapic_do_irqs_clear(struct domain *d, uint16_t irqs)
+{
+    hvm_vioapic_t *s = &(d->arch.hvm_domain.vioapic);
+
+    if (!hvm_apic_support(d))
+        return;
+
+    s->irr &= ~irqs;
+    service_ioapic(s);
+}
+
+void hvm_vioapic_set_irq(struct domain *d, int irq, int level)
+{
+    hvm_vioapic_t *s = &(d->arch.hvm_domain.vioapic);
+
+    if (!hvm_apic_support(d))
+        return ;
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_set_irq "
+      "irq %x level %x\n", irq, level);
+
+    if (irq < 0 || irq >= IOAPIC_NUM_PINS) {
+        printk("ioapic_set_irq irq %x is illegal\n", irq);
+        domain_crash_synchronous();
+    }
+
+    if (!IOAPICEnabled(s) || s->redirtbl[irq].RedirForm.mask)
+        return;
+
+    ioapic_dump_redir(s, irq);
+
+    if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
+        uint32_t bit = 1 << irq;
+        if (s->redirtbl[irq].RedirForm.trigmod == IOAPIC_LEVEL_TRIGGER) {
+            if (level)
+                s->irr |= bit;
+            else
+                s->irr &= ~bit;
+        } else {
+            if (level)
+                /* XXX No irr clear for edge interrupt */
+                s->irr |= bit;
+        }
+    }
+
+    service_ioapic(s);
+}
+
+/* XXX If level interrupt, use vector->irq table for performance */
+static int get_redir_num(hvm_vioapic_t *s, int vector)
+{
+    int i = 0;
+
+    ASSERT(s);
+
+    for(i = 0; i < IOAPIC_NUM_PINS - 1; i++) {
+        if (s->redirtbl[i].RedirForm.vector == vector)
+            return i;
+    }
+
+    return -1;
+}
+
+void ioapic_update_EOI(struct domain *d, int vector)
+{
+    hvm_vioapic_t *s = &(d->arch.hvm_domain.vioapic);
+    int redir_num;
+
+    if ((redir_num = get_redir_num(s, vector)) == -1) {
+        printk("Can't find redir item for %d EOI \n", vector);
+        return;
+    }
+
+    if (!test_and_clear_bit(redir_num, &s->isr)) {
+        printk("redir %d not set for %d  EOI\n", redir_num, vector);
+        return;
+    }
+}
+
+int hvm_vioapic_add_lapic(struct vlapic *vlapic, struct vcpu *v)
+{
+    hvm_vioapic_t *s = &(v->domain->arch.hvm_domain.vioapic);
+
+    if (v->vcpu_id != s->lapic_count) {
+        printk("hvm_vioapic_add_lapic "
+           "cpu_id not match vcpu_id %x lapic_count %x\n",
+           v->vcpu_id, s->lapic_count);
+        domain_crash_synchronous();
+    }
+
+    /* update count later for race condition on interrupt */
+    s->lapic_info[s->lapic_count] = vlapic;
+    s->lapic_count ++;
+
+    return s->lapic_count;
+}
+
+hvm_vioapic_t * hvm_vioapic_init(struct domain *d)
+{
+    int i = 0;
+    hvm_vioapic_t *s = &(d->arch.hvm_domain.vioapic);
+
+    HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "hvm_vioapic_init\n");
+
+    hvm_vioapic_reset(s);
+
+    s->domain = d;
+
+    for (i = 0; i < MAX_LAPIC_NUM; i++)
+        s->lapic_info[i] = NULL;
+
+    /* Remove after GFW ready */
+    ioapic_update_config(s, IOAPIC_DEFAULT_BASE_ADDRESS, 1);
+
+    return s;
+}
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/vlapic.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/vlapic.c Tue Jan 31 10:49:51 2006
@@ -0,0 +1,1034 @@
+/*
+ * vlapic.c: virtualize LAPIC for HVM vcpus.
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/xmalloc.h>
+#include <asm/shadow.h>
+#include <asm/page.h>
+#include <xen/event.h>
+#include <xen/trace.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <public/hvm/ioreq.h>
+
+/* XXX remove this definition after GFW enabled */
+#define VLAPIC_NO_BIOS
+
+extern unsigned int get_apic_bus_scale(void);
+
+static unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] =
+{
+    0x310ff, 0x117ff, 0x117ff, 0x1f7ff, 0x1f7ff, 0x117ff
+};
+
+int vlapic_find_highest_irr(struct vlapic *vlapic)
+{
+    int result;
+
+    result = find_highest_bit((uint32_t *)&vlapic->irr[0], INTR_LEN_32);
+
+    if (result != -1 && result < 16) {
+        printk("VLAPIC: irr on reserved bits %d\n ", result);
+        domain_crash_synchronous();
+    }
+
+    return result;
+}
+
+int hvm_apic_support(struct domain *d)
+{
+    return d->arch.hvm_domain.apic_enabled;
+}
+
+s_time_t get_apictime_scheduled(struct vcpu *v)
+{
+    struct vlapic *vlapic = VLAPIC(v);
+
+    if ( !hvm_apic_support(v->domain) || !vlapic_lvt_timer_enabled(vlapic) )
+        return -1;
+    return vlapic->vlapic_timer.expires;
+}
+
+int vlapic_find_highest_isr(struct vlapic *vlapic)
+{
+    int result;
+
+    result = find_highest_bit((uint32_t *)&vlapic->isr[0], INTR_LEN_32);
+
+    if (result != -1 && result < 16) {
+        int i = 0;
+        printk("VLAPIC: isr on reserved bits %d, isr is\n ", result);
+        for (i = 0; i < INTR_LEN_32; i += 2)
+            printk("%d: 0x%08x%08x\n", i, vlapic->isr[i], vlapic->isr[i+1]);
+        return -1;
+    }
+
+    return result;
+}
+
+uint32_t vlapic_update_ppr(struct vlapic *vlapic)
+{
+    uint32_t tpr, isrv, ppr;
+    int isr;
+
+    tpr = (vlapic->task_priority >> 4) & 0xf;      /* we want 7:4 */
+
+    isr = vlapic_find_highest_isr(vlapic);
+    if (isr != -1)
+        isrv = (isr >> 4) & 0xf;   /* ditto */
+    else
+        isrv = 0;
+
+    if (tpr >= isrv)
+        ppr = vlapic->task_priority & 0xff;
+    else
+        ppr = isrv << 4;  /* low 4 bits of PPR have to be cleared */
+
+    vlapic->processor_priority = ppr;
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC_INTERRUPT,
+                "vlapic_update_ppr: vlapic %p ppr %x isr %x isrv %x",
+                vlapic, ppr, isr, isrv);
+
+    return ppr;
+}
+
+/* This only for fixed delivery mode */
+static int vlapic_match_dest(struct vcpu *v, struct vlapic *source,
+                             int short_hand, int dest, int dest_mode,
+                             int delivery_mode)
+{
+    int result = 0;
+    struct vlapic *target = VLAPIC(v);
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest: "
+                "target %p source %p dest %x dest_mode %x short_hand %x "
+                "delivery_mode %x",
+                target, source, dest, dest_mode, short_hand, delivery_mode);
+
+    if ( unlikely(!target) &&
+         ( (delivery_mode != VLAPIC_DELIV_MODE_INIT) &&
+           (delivery_mode != VLAPIC_DELIV_MODE_STARTUP) &&
+           (delivery_mode != VLAPIC_DELIV_MODE_NMI) )) {
+        HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest "
+                    "uninitialized target v %p delivery_mode %x dest %x\n",
+                    v, delivery_mode, dest); 
+        return result; 
+    }
+
+    switch (short_hand) {
+    case VLAPIC_NO_SHORTHAND:
+        if (!dest_mode) {   /* Physical */
+            result = ((target ? target->id : v->vcpu_id ) == dest);
+        } else {            /* Logical */
+            if (!target) 
+                break;
+            if (((target->dest_format >> 28) & 0xf) == 0xf) {   /* Flat mode */
+                result = (target->logical_dest >> 24) & dest;
+            } else {
+                if ((delivery_mode == VLAPIC_DELIV_MODE_LPRI) &&
+                   (dest == 0xff)) {
+                    /* What shall we do now? */
+                    printk("Broadcast IPI with lowest priority "
+                           "delivery mode\n");
+                    domain_crash_synchronous();
+                }
+                result = (target->logical_dest == (dest & 0xf)) ?
+                  ((target->logical_dest >> 4) & (dest >> 4)) : 0;
+            }
+        }
+        break;
+
+    case VLAPIC_SHORTHAND_SELF:
+        if (target == source)
+            result = 1;
+        break;
+
+    case VLAPIC_SHORTHAND_INCLUDE_SELF:
+        result = 1;
+        break;
+
+    case VLAPIC_SHORTHAND_EXCLUDE_SELF:
+        if (target != source)
+            result = 1;
+        break;
+
+    default:
+        break;
+    }
+
+    return result;
+}
+
+/*
+ * Add a pending IRQ into lapic.
+ * Return 1 if successfully added and 0 if discarded.
+ */
+static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
+                             int vector, int level, int trig_mode)
+{
+    int        result = 0;
+    struct vlapic *vlapic = VLAPIC(v);
+
+    switch (delivery_mode) {
+    case VLAPIC_DELIV_MODE_FIXED:
+    case VLAPIC_DELIV_MODE_LPRI:
+        /* FIXME add logic for vcpu on reset */
+        if (unlikely(!vlapic || !vlapic_enabled(vlapic)))
+            return result;
+
+        if (test_and_set_bit(vector, &vlapic->irr[0])) {
+            printk("<vlapic_accept_irq>"
+                    "level trig mode repeatedly for vector %d\n", vector);
+            result = 0;
+        } else {
+            if (level) {
+                printk("<vlapic_accept_irq> level trig mode for vector %d\n", 
vector);
+                set_bit(vector, &vlapic->tmr[0]);
+            }
+        }
+        evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain));
+        result = 1;
+        break;
+
+    case VLAPIC_DELIV_MODE_RESERVED:
+        printk("Ignore deliver mode 3 in vlapic_accept_irq\n");
+        break;
+
+    case VLAPIC_DELIV_MODE_SMI:
+    case VLAPIC_DELIV_MODE_NMI:
+        /* Fixme */
+        printk("TODO: for guest SMI/NMI\n");
+        break;
+
+    case VLAPIC_DELIV_MODE_INIT:
+        if (!level && trig_mode == 1) {        //Deassert
+            printk("This hvm_vlapic is for P4, no work for De-assert init\n");
+        } else {
+            /* FIXME How to check the situation after vcpu reset? */
+            vlapic->init_sipi_sipi_state = 
VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI;
+            if (vlapic->vcpu) {
+                vcpu_pause(vlapic->vcpu);
+            }
+        }
+        break;
+
+    case VLAPIC_DELIV_MODE_STARTUP:
+        if (vlapic->init_sipi_sipi_state != 
VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI)
+            break;
+        vlapic->init_sipi_sipi_state = VLAPIC_INIT_SIPI_SIPI_STATE_NORM;
+        if (!vlapic->vcpu) {
+            /* XXX Call hvm_bringup_ap here */
+             result = 0;
+        }else{
+            //hvm_vcpu_reset(vlapic->vcpu);
+        }
+        break;
+
+    default:
+        printk("TODO: not support interrup type %x\n", delivery_mode);
+        domain_crash_synchronous();
+        break;
+    }
+
+    return result;
+}
+/*
+    This function is used by both ioapic and local APIC
+    The bitmap is for vcpu_id
+ */
+struct vlapic* apic_round_robin(struct domain *d,
+                                uint8_t dest_mode,
+                                uint8_t vector,
+                                uint32_t bitmap)
+{
+    int next, old;
+    struct vlapic* target = NULL;
+
+    if (dest_mode == 0) { //Physical mode
+        printk("<apic_round_robin> lowest priority for physical mode\n");
+        return NULL;
+    }
+
+    if (!bitmap) {
+        printk("<apic_round_robin> no bit on bitmap\n");
+        return NULL;
+    }
+
+    spin_lock(&d->arch.hvm_domain.round_robin_lock);
+
+    old = next = d->arch.hvm_domain.round_info[vector];
+
+    do {
+        /* the vcpu array is arranged according to vcpu_id */
+        if (test_bit(next, &bitmap)) {
+            target = d->vcpu[next]->arch.hvm_vcpu.vlapic;
+
+            if (!target || !vlapic_enabled(target)) {
+                printk("warning: targe round robin local apic disabled\n");
+                /* XXX should we domain crash?? Or should we return NULL */
+            }
+            break;
+        }
+
+        next ++;
+        if (!d->vcpu[next] ||
+            !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) ||
+            next == MAX_VIRT_CPUS)
+            next = 0;
+    }while(next != old);
+
+    d->arch.hvm_domain.round_info[vector] = next;
+    spin_unlock(&d->arch.hvm_domain.round_robin_lock);
+    return target;
+}
+
+void
+vlapic_EOI_set(struct vlapic *vlapic)
+{
+    int vector = vlapic_find_highest_isr(vlapic);
+
+    /* Not every write EOI will has correpsoning ISR,
+       one example is when Kernel check timer on setup_IO_APIC */
+    if (vector == -1) {
+        return ;
+    }
+
+    vlapic_clear_isr(vlapic, vector);
+    vlapic_update_ppr(vlapic);
+
+    if (test_and_clear_bit(vector, &vlapic->tmr[0]))
+        ioapic_update_EOI(vlapic->domain, vector);
+}
+
+int vlapic_check_vector(struct vlapic *vlapic,
+                        unsigned char dm, int vector)
+{
+    if ((dm == VLAPIC_DELIV_MODE_FIXED) && (vector < 16)) {
+        vlapic->err_status |= 0x40;
+        vlapic_accept_irq(vlapic->vcpu, VLAPIC_DELIV_MODE_FIXED,
+          vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 0, 0);
+        printk("<vlapic_check_vector>: check fail\n");
+        return 0;
+    }
+    return 1;
+}
+
+
+void vlapic_ipi(struct vlapic *vlapic)
+{
+    unsigned int dest = (vlapic->icr_high >> 24) & 0xff;
+    unsigned int short_hand = (vlapic->icr_low >> 18) & 3;
+    unsigned int trig_mode = (vlapic->icr_low >> 15) & 1;
+    unsigned int level = (vlapic->icr_low >> 14) & 1;
+    unsigned int dest_mode = (vlapic->icr_low >> 11) & 1;
+    unsigned int delivery_mode = (vlapic->icr_low >> 8) & 7;
+    unsigned int vector = (vlapic->icr_low & 0xff);
+
+    struct vlapic *target;
+    struct vcpu *v = NULL;
+    uint32_t lpr_map;
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_ipi: "
+                "icr_high %x icr_low %x "
+                "short_hand %x dest %x trig_mode %x level %x "
+                "dest_mode %x delivery_mode %x vector %x",
+                vlapic->icr_high, vlapic->icr_low,
+                short_hand, dest, trig_mode, level, dest_mode,
+                delivery_mode, vector);
+
+    for_each_vcpu ( vlapic->domain, v ) {
+        if (vlapic_match_dest(v, vlapic, short_hand,
+                              dest, dest_mode, delivery_mode)) {
+            if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) {
+                set_bit(v->vcpu_id, &lpr_map);
+            } else
+                vlapic_accept_irq(v, delivery_mode,
+                                  vector, level, trig_mode);
+        }
+    }
+
+    if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) {
+        v = vlapic->vcpu;
+        target = apic_round_robin(v->domain, dest_mode, vector, lpr_map);
+
+        if (target)
+            vlapic_accept_irq(target->vcpu, delivery_mode,
+                              vector, level, trig_mode);
+    }
+}
+
+static void vlapic_begin_timer(struct vlapic *vlapic)
+{
+    s_time_t cur = NOW(), offset;
+
+    offset = vlapic->timer_current *
+      (262144 / get_apic_bus_scale()) * vlapic->timer_divide_counter;
+    vlapic->vlapic_timer.expires = cur + offset;
+
+    set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires );
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_begin_timer: "
+                "bus_scale %x now %08x%08x expire %08x%08x "
+                "offset %08x%08x current %x",
+                get_apic_bus_scale(), (uint32_t)(cur >> 32), (uint32_t)cur,
+                (uint32_t)(vlapic->vlapic_timer.expires >> 32),
+                (uint32_t) vlapic->vlapic_timer.expires,
+                (uint32_t)(offset >> 32), (uint32_t)offset,
+                vlapic->timer_current);
+}
+
+void vlapic_read_aligned(struct vlapic *vlapic, unsigned int offset,
+                         unsigned int len, unsigned int *result)
+{
+    if (len != 4) {
+        HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
+                    "local apic read with len=%d (should be 4)", len);
+    }
+
+    *result = 0;
+
+    switch (offset) {
+    case APIC_ID:
+        *result = (vlapic->id) << 24;
+        break;
+
+    case APIC_LVR:
+        *result = vlapic->version;
+        break;
+
+    case APIC_TASKPRI:
+        *result = vlapic->task_priority;
+        break;
+
+    case APIC_ARBPRI:
+        printk("Access local APIC ARBPRI register which is for P6\n");
+        break;
+
+    case APIC_PROCPRI:
+        *result = vlapic->processor_priority;
+        break;
+
+    case APIC_EOI:      /* EOI is write only */
+        break;
+
+    case APIC_LDR:
+        *result = vlapic->logical_dest;
+        break;
+
+    case APIC_DFR:
+        *result = vlapic->dest_format;
+        break;
+
+    case APIC_SPIV:
+        *result = vlapic->spurious_vec;
+        break;
+
+    case APIC_ISR:
+    case 0x110:
+    case 0x120:
+    case 0x130:
+    case 0x140:
+    case 0x150:
+    case 0x160:
+    case 0x170:
+        *result = vlapic->isr[(offset - APIC_ISR) >> 4];
+        break;
+
+    case APIC_TMR:
+    case 0x190:
+    case 0x1a0:
+    case 0x1b0:
+    case 0x1c0:
+    case 0x1d0:
+    case 0x1e0:
+    case 0x1f0:
+        *result = vlapic->tmr[(offset - APIC_TMR) >> 4];
+        break;
+
+    case APIC_IRR:
+    case 0x210:
+    case 0x220:
+    case 0x230:
+    case 0x240:
+    case 0x250:
+    case 0x260:
+    case 0x270:
+        *result = vlapic->irr[(offset - APIC_IRR) >> 4];
+        break;
+
+    case APIC_ESR:
+        if (vlapic->err_write_count)
+            *result = vlapic->err_status;
+        break;
+
+    case APIC_ICR:
+        *result = vlapic->icr_low;
+        break;
+
+    case APIC_ICR2:
+        *result = vlapic->icr_high;
+        break;
+
+    case APIC_LVTT:     /* LVT Timer Reg */
+    case APIC_LVTTHMR:     /* LVT Thermal Monitor */
+    case APIC_LVTPC:     /* LVT Performance Counter */
+    case APIC_LVT0:     /* LVT LINT0 Reg */
+    case APIC_LVT1:     /* LVT Lint1 Reg */
+    case APIC_LVTERR:     /* LVT Error Reg */
+        *result = vlapic->lvt[(offset - APIC_LVTT) >> 4];
+        break;
+
+    case APIC_TMICT:
+        *result = vlapic->timer_initial;
+        break;
+
+    case APIC_TMCCT:         //Timer CCR
+        {
+            uint32_t counter;
+            s_time_t passed, cur = NOW();
+
+            if (cur <= vlapic->timer_current_update) {
+                passed = ~0x0LL - vlapic->timer_current_update + cur;
+                HVM_DBG_LOG(DBG_LEVEL_VLAPIC,"time elapsed");
+            }else
+                passed = cur - vlapic->timer_current_update;
+
+            counter = (passed * get_apic_bus_scale()) / (262144* 
vlapic->timer_divide_counter);
+            if (vlapic->timer_current > counter)
+                *result = vlapic->timer_current - counter;
+            else {
+                if (!vlapic_lvt_timer_period(vlapic))
+                    *result = 0;
+                //FIXME should we add interrupt here?
+                else
+                    //*result = counter % vlapic->timer_initial;
+                    *result = vlapic->timer_initial - (counter - 
vlapic->timer_current);
+            }
+            vlapic->timer_current = *result;
+            vlapic->timer_current_update = NOW();
+
+            HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
+                        "initial %x timer current %x "
+                        "update %08x%08x cur %08x%08x offset %d",
+                        vlapic->timer_initial, vlapic->timer_current,
+                        (uint32_t)(vlapic->timer_current_update >> 32),
+                        (uint32_t)vlapic->timer_current_update ,
+                        (uint32_t)(cur >> 32), (uint32_t)cur, counter);
+        }
+        break;
+
+    case APIC_TDCR:
+        *result = vlapic->timer_divconf;
+        break;
+
+    default:
+        printk("Read local APIC address %x not implemented\n",offset);
+        *result = 0;
+        break;
+    }
+}
+
+static unsigned long vlapic_read(struct vcpu *v, unsigned long address,
+                                 unsigned long len)
+{
+    unsigned int alignment;
+    unsigned int tmp;
+    unsigned long result;
+    struct vlapic *vlapic = VLAPIC(v);
+    unsigned int offset = address - vlapic->base_address;
+
+    if ( len != 4) {
+        /* some bugs on kernel cause read this with byte*/
+        HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
+                    "Local APIC read with len = %lx, should be 4 instead\n",
+                    len);
+    }
+
+    alignment = offset & 0x3;
+
+    vlapic_read_aligned(vlapic, offset & ~0x3, 4, &tmp);
+    switch (len) {
+    case 1:
+        result = *((unsigned char *)&tmp + alignment);
+        break;
+
+    case 2:
+        result = *(unsigned short *)((unsigned char *)&tmp + alignment);
+        break;
+
+    case 4:
+        result = *(unsigned int *)((unsigned char *)&tmp + alignment);
+        break;
+
+    default:
+        printk("Local APIC read with len = %lx, should be 4 instead\n", len);
+        domain_crash_synchronous();
+        break;
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
+                "vlapic_read offset %x with length %lx and the result is %lx",
+                offset, len, result);
+    return result;
+}
+
+static void vlapic_write(struct vcpu *v, unsigned long address,
+                         unsigned long len, unsigned long val)
+{
+    struct vlapic *vlapic = VLAPIC(v);
+    unsigned int offset = address - vlapic->base_address;
+
+    if (offset != 0xb0)
+        HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
+          "vlapic_write offset %x with length %lx source %lx",
+          offset, len, val);
+
+    /*
+     * According to IA 32 Manual, all resgiters should be accessed with
+     * 32 bits alignment.
+     */
+    if (len != 4) {
+        unsigned int tmp;
+        unsigned char alignment;
+
+        /* Some kernel do will access with byte/word alignment*/
+        printk("Notice: Local APIC write with len = %lx\n",len);
+        alignment = offset & 0x3;
+        tmp = vlapic_read(v, offset & (~0x3), 4);
+        switch (len) {
+        case 1:
+            /* XXX the saddr is a tmp variable from caller, so should be ok
+               But we should still change the following ref to val to 
+               local variable later */
+            val  = (tmp & ~(0xff << alignment)) |
+                        ((val & 0xff) << alignment);
+            break;
+
+        case 2:
+            if (alignment != 0x0 && alignment != 0x2) {
+                printk("alignment error for vlapic with len == 2\n");
+                    domain_crash_synchronous();
+            }
+
+            val = (tmp & ~(0xffff << alignment)) |
+                        ((val & 0xffff)  << alignment);
+            break;
+
+        case 3:
+            /* will it happen? */
+            printk("vlapic_write with len = 3 !!!\n");
+            domain_crash_synchronous();
+            break;
+
+        default:
+            printk("Local APIC write with len = %lx, should be 4 instead\n", 
len);
+            domain_crash_synchronous();
+            break;
+        }
+    }
+
+    offset &= 0xff0;
+
+    switch (offset) {
+    case APIC_ID:   /* Local APIC ID */
+        vlapic->id = ((val) >> 24) & VAPIC_ID_MASK;
+        break;
+
+    case APIC_TASKPRI:
+        vlapic->task_priority = val & 0xff;
+        vlapic_update_ppr(vlapic);
+        break;
+
+    case APIC_EOI:
+        vlapic_EOI_set(vlapic);
+        break;
+
+    case APIC_LDR:
+        vlapic->logical_dest = val & VAPIC_LDR_MASK;
+        break;
+
+    case APIC_DFR:
+        vlapic->dest_format = val ;
+        break;
+
+    case APIC_SPIV:
+        vlapic->spurious_vec = val & 0x1ff;
+        if (!(vlapic->spurious_vec & 0x100)) {
+            int i = 0;
+            for (i = 0; i < VLAPIC_LVT_NUM; i++)
+                vlapic->lvt[i] |= 0x10000;
+            vlapic->status |= VLAPIC_SOFTWARE_DISABLE_MASK;
+        }
+        else
+            vlapic->status &= ~VLAPIC_SOFTWARE_DISABLE_MASK;
+        break;
+
+    case APIC_ESR:
+        vlapic->err_write_count = !vlapic->err_write_count;
+        if (!vlapic->err_write_count)
+            vlapic->err_status = 0;
+        break;
+
+    case APIC_ICR:
+        /* No delay here, so we always clear the pending bit*/
+        vlapic->icr_low = val & ~(1 << 12);
+        vlapic_ipi(vlapic);
+        break;
+
+    case APIC_ICR2:
+        vlapic->icr_high = val & 0xff000000;
+        break;
+
+    case APIC_LVTT: // LVT Timer Reg
+    case APIC_LVTTHMR: // LVT Thermal Monitor
+    case APIC_LVTPC: // LVT Performance Counter
+    case APIC_LVT0: // LVT LINT0 Reg
+    case APIC_LVT1: // LVT Lint1 Reg
+    case APIC_LVTERR: // LVT Error Reg
+        {
+            int vt = (offset - APIC_LVTT) >> 4;
+
+            vlapic->lvt[vt] = val & vlapic_lvt_mask[vt];
+            if (vlapic->status & VLAPIC_SOFTWARE_DISABLE_MASK)
+                vlapic->lvt[vt] |= VLAPIC_LVT_BIT_MASK;
+
+            /* On hardware, when write vector less than 0x20 will error */
+            vlapic_check_vector(vlapic, vlapic_lvt_dm(vlapic->lvt[vt]),
+              vlapic_lvt_vector(vlapic, vt));
+
+            if (!vlapic->vcpu_id && (offset == APIC_LVT0)) {
+                if ((vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_DELIMOD)
+                            == 0x700) {
+                    if (!(vlapic->lvt[VLAPIC_LVT_LINT0] & 
VLAPIC_LVT_BIT_MASK)) {
+                        set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
+                    }else
+                        clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
+                }
+                else
+                    clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
+            }
+
+        }
+        break;
+
+    case APIC_TMICT:
+        if (vlapic_timer_active(vlapic))
+            stop_timer(&(vlapic->vlapic_timer));
+
+        vlapic->timer_initial = val;
+        vlapic->timer_current = val;
+        vlapic->timer_current_update = NOW();
+
+        vlapic_begin_timer(vlapic);
+
+        HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "timer_init %x timer_current %x"
+                    "timer_current_update %08x%08x",
+                    vlapic->timer_initial, vlapic->timer_current,
+                    (uint32_t)(vlapic->timer_current_update >> 32),
+                    (uint32_t)vlapic->timer_current_update);
+        break;
+
+    case APIC_TDCR:
+        {
+            //FIXME clean this code
+            unsigned char tmp1,tmp2;
+            tmp1 = (val & 0xf);
+            tmp2 = ((tmp1 & 0x3 )|((tmp1 & 0x8) >>1)) + 1;
+            vlapic->timer_divide_counter = 0x1<<tmp2;
+
+            HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
+                        "timer divider is 0x%x",
+                        vlapic->timer_divide_counter);
+        }
+        break;
+
+    default:
+        printk("Local APIC Write to read-only register\n");
+        break;
+    }
+}
+
+static int vlapic_range(struct vcpu *v, unsigned long addr)
+{
+    struct vlapic *vlapic = VLAPIC(v);
+
+    if (vlapic_global_enabled(vlapic) &&
+        (addr >= vlapic->base_address) &&
+        (addr <= (vlapic->base_address + VLOCAL_APIC_MEM_LENGTH)))
+        return 1;
+
+    return 0;
+}
+
+struct hvm_mmio_handler vlapic_mmio_handler = {
+    .check_handler = vlapic_range,
+    .read_handler = vlapic_read,
+    .write_handler = vlapic_write
+};
+
+void vlapic_msr_set(struct vlapic *vlapic, uint64_t value)
+{
+    /* When apic disabled */
+    if (!vlapic)
+        return;
+
+    if (vlapic->vcpu_id)
+        value &= ~MSR_IA32_APICBASE_BSP;
+
+    vlapic->apic_base_msr = value;
+    vlapic->base_address = vlapic_get_base_address(vlapic);
+
+    if (!(value & 0x800))
+        set_bit(_VLAPIC_GLOB_DISABLE, &vlapic->status );
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
+                "apic base msr = 0x%08x%08x,\nbase address = 0x%lx",
+                (uint32_t)(vlapic->apic_base_msr >> 32),
+                (uint32_t)vlapic->apic_base_msr,
+                vlapic->base_address);
+}
+
+static inline int vlapic_get_init_id(struct vcpu *v)
+{
+    return v->vcpu_id;
+}
+
+void vlapic_timer_fn(void *data)
+{
+    struct vlapic *vlapic;
+
+    vlapic = data;
+    if (!vlapic_enabled(vlapic)) return;
+
+    vlapic->timer_current_update = NOW();
+
+    if (vlapic_lvt_timer_enabled(vlapic)) {
+        if (!vlapic_irr_status(vlapic,
+              vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER))) {
+            test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER),
+              &vlapic->irr[0]);
+        }
+        else
+            vlapic->intr_pending_count[vlapic_lvt_vector(vlapic, 
VLAPIC_LVT_TIMER)]++;
+        evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain));
+    }
+
+    vlapic->timer_current_update = NOW();
+    if (vlapic_lvt_timer_period(vlapic)) {
+        s_time_t offset;
+
+        vlapic->timer_current = vlapic->timer_initial;
+        offset = vlapic->timer_current * (262144/get_apic_bus_scale()) * 
vlapic->timer_divide_counter;
+        vlapic->vlapic_timer.expires = NOW() + offset;
+        set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires);
+    }else {
+        vlapic->timer_current = 0;
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
+      "vlapic_timer_fn: now: %08x%08x expire %08x%08x init %x current %x",
+      (uint32_t)(NOW() >> 32),(uint32_t)NOW(),
+      (uint32_t)(vlapic->vlapic_timer.expires >> 32),
+      (uint32_t)vlapic->vlapic_timer.expires,
+      vlapic->timer_initial,vlapic->timer_current);
+}
+
+#if 0
+static int
+vlapic_check_direct_intr(struct vcpu *v, int * mode)
+{
+    struct vlapic *vlapic = VLAPIC(v);
+    int type;
+
+    type = __fls(vlapic->direct_intr.deliver_mode);
+    if (type == -1)
+        return -1;
+
+    *mode = type;
+    return 0;
+}
+#endif
+
+int
+vlapic_accept_pic_intr(struct vcpu *v)
+{
+    struct vlapic *vlapic = VLAPIC(v);
+
+    return vlapic ? test_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status) : 1;
+}
+
+int cpu_get_apic_interrupt(struct vcpu* v, int *mode)
+{
+    struct vlapic *vlapic = VLAPIC(v);
+
+    if (vlapic && vlapic_enabled(vlapic)) {
+        int highest_irr = vlapic_find_highest_irr(vlapic);
+
+        if (highest_irr != -1 && highest_irr >= vlapic->processor_priority) {
+            if (highest_irr < 0x10) {
+                vlapic->err_status |= 0x20;
+                /* XXX What will happen if this vector illegal stil */
+                HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
+                  "hvm_intr_assist: illegal vector number %x err_status %x",
+                  highest_irr,  vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR));
+
+                set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 
&vlapic->irr[0]);
+                highest_irr = vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR);
+            }
+
+            *mode = VLAPIC_DELIV_MODE_FIXED;
+            return highest_irr;
+        }
+    }
+    return -1;
+}
+
+void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) {
+    struct vlapic  *vlapic = VLAPIC(v);
+
+    if (!vlapic)
+        return;
+
+    switch (deliver_mode) {
+    case VLAPIC_DELIV_MODE_FIXED:
+    case VLAPIC_DELIV_MODE_LPRI:
+        vlapic_set_isr(vlapic, vector);
+        vlapic_clear_irr(vlapic, vector);
+        vlapic_update_ppr(vlapic);
+
+        if (vector == vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)) {
+            vlapic->intr_pending_count[vector]--;
+            if (vlapic->intr_pending_count[vector] > 0)
+                test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER),
+                  &vlapic->irr[0]);
+        }
+
+        break;
+        /*XXX deal with these later */
+
+    case VLAPIC_DELIV_MODE_RESERVED:
+        printk("Ignore deliver mode 3 in vlapic_post_injection\n");
+        break;
+
+    case VLAPIC_DELIV_MODE_SMI:
+    case VLAPIC_DELIV_MODE_NMI:
+    case VLAPIC_DELIV_MODE_INIT:
+    case VLAPIC_DELIV_MODE_STARTUP:
+        vlapic->direct_intr.deliver_mode &= ~(1 << deliver_mode);
+        break;
+
+    default:
+        printk("<vlapic_post_injection> error deliver mode\n");
+        break;
+    }
+}
+
+static int vlapic_reset(struct vlapic *vlapic)
+{
+    struct vcpu *v;
+    int apic_id, i;
+
+    ASSERT( vlapic != NULL );
+
+    v = vlapic->vcpu;
+
+    ASSERT( v != NULL );
+
+    apic_id = v->vcpu_id;
+
+    vlapic->domain = v->domain;
+
+    vlapic->id = apic_id;
+
+    vlapic->vcpu_id = v->vcpu_id;
+
+    vlapic->version = VLAPIC_VERSION;
+
+    vlapic->apic_base_msr = VLAPIC_BASE_MSR_INIT_VALUE;
+
+    if (apic_id == 0)
+        vlapic->apic_base_msr |= MSR_IA32_APICBASE_BSP;
+
+    vlapic->base_address = vlapic_get_base_address(vlapic);
+
+    for (i = 0; i < VLAPIC_LVT_NUM; i++)
+        vlapic->lvt[i] = VLAPIC_LVT_BIT_MASK;
+
+    vlapic->dest_format = 0xffffffffU;
+
+    vlapic->spurious_vec = 0xff;
+
+    hvm_vioapic_add_lapic(vlapic, v);
+
+    init_timer(&vlapic->vlapic_timer,
+                  vlapic_timer_fn, vlapic, v->processor);
+
+#ifdef VLAPIC_NO_BIOS
+    /*
+     * XXX According to mp sepcific, BIOS will enable LVT0/1,
+     * remove it after BIOS enabled
+     */
+    if (!v->vcpu_id) {
+        vlapic->lvt[VLAPIC_LVT_LINT0] = 0x700;
+        vlapic->lvt[VLAPIC_LVT_LINT1] = 0x500;
+        set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
+    }
+#endif
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_reset: "
+                "vcpu=%p id=%d vlapic_apic_base_msr=%08x%08x "
+                "vlapic_base_address=%0lx",
+                v, vlapic->id, (uint32_t)(vlapic->apic_base_msr >> 32),
+                (uint32_t)vlapic->apic_base_msr, vlapic->base_address);
+
+    return 1;
+}
+
+int vlapic_init(struct vcpu *v)
+{
+    struct vlapic *vlapic = NULL;
+
+    ASSERT( v != NULL );
+
+    HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_init %d", v->vcpu_id);
+
+    vlapic = xmalloc_bytes(sizeof(struct vlapic));
+    if (!vlapic) {
+        printk("malloc vlapic error for vcpu %x\n", v->vcpu_id);
+        return -ENOMEM;
+    }
+
+    memset(vlapic, 0, sizeof(struct vlapic));
+
+    VLAPIC(v) = vlapic;
+
+    vlapic->vcpu = v;
+
+    vlapic_reset(vlapic);
+
+    return 0;
+}
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/vmx/io.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/vmx/io.c Tue Jan 31 10:49:51 2006
@@ -0,0 +1,207 @@
+/*
+ * io.c: handling I/O, interrupts related VMX entry/exit
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/trace.h>
+#include <xen/event.h>
+
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/vlapic.h>
+#include <public/hvm/ioreq.h>
+
+#ifdef CONFIG_VMX
+
+#define BSP_CPU(v)    (!(v->vcpu_id))
+
+void vmx_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit)
+{
+    u64   drift;
+
+    if ( vpit->first_injected )
+        drift = vpit->period_cycles * vpit->pending_intr_nr;
+    else 
+        drift = 0;
+    vpit->shift = v->arch.hvm_vmx.tsc_offset - drift;
+    __vmwrite(TSC_OFFSET, vpit->shift);
+
+#if defined (__i386__)
+    __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>> 32));
+#endif
+}
+
+static inline void
+interrupt_post_injection(struct vcpu * v, int vector, int type)
+{
+    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+
+    if ( is_pit_irq(v, vector, type) ) {
+        if ( !vpit->first_injected ) {
+            vpit->pending_intr_nr = 0;
+            vpit->scheduled = NOW() + vpit->period;
+            set_timer(&vpit->pit_timer, vpit->scheduled);
+            vpit->first_injected = 1;
+        } else {
+            vpit->pending_intr_nr--;
+        }
+        vpit->inject_point = NOW();
+        vmx_set_tsc_shift (v, vpit);
+    }
+
+    switch(type)
+    {
+    case VLAPIC_DELIV_MODE_EXT:
+        break;
+
+    default:
+        vlapic_post_injection(v, vector, type);
+        break;
+    }
+}
+
+static inline void
+enable_irq_window(unsigned long cpu_exec_control)
+{
+    if (!(cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING)) {
+        cpu_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+        __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control);
+    }
+}
+
+static inline void
+disable_irq_window(unsigned long cpu_exec_control)
+{
+    if ( cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING ) {
+        cpu_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+        __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control);
+    }
+}
+
+asmlinkage void vmx_intr_assist(void)
+{
+    int intr_type = 0;
+    int highest_vector;
+    unsigned long intr_fields, eflags, interruptibility, cpu_exec_control;
+    struct vcpu *v = current;
+    struct hvm_domain *plat=&v->domain->arch.hvm_domain;
+    struct hvm_virpit *vpit = &plat->vpit;
+    struct hvm_virpic *pic= &plat->vpic;
+
+    hvm_pic_assist(v);
+    __vmread_vcpu(v, CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
+    if ( vpit->pending_intr_nr ) {
+        pic_set_irq(pic, 0, 0);
+        pic_set_irq(pic, 0, 1);
+    }
+
+    __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields);
+
+    if (intr_fields & INTR_INFO_VALID_MASK) {
+        enable_irq_window(cpu_exec_control);
+        HVM_DBG_LOG(DBG_LEVEL_1, "vmx_intr_assist: intr_fields: %lx",
+                    intr_fields);
+        return;
+    }
+
+    __vmread(GUEST_INTERRUPTIBILITY_INFO, &interruptibility);
+
+    if (interruptibility) {
+        enable_irq_window(cpu_exec_control);
+        HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility: %lx",interruptibility);
+        return;
+    }
+
+    __vmread(GUEST_RFLAGS, &eflags);
+    if (irq_masked(eflags)) {
+        enable_irq_window(cpu_exec_control);
+        return;
+    }
+
+    highest_vector = cpu_get_interrupt(v, &intr_type); 
+
+    if (highest_vector == -1) {
+        disable_irq_window(cpu_exec_control);
+        return;
+    }
+
+    switch (intr_type) {
+    case VLAPIC_DELIV_MODE_EXT:
+    case VLAPIC_DELIV_MODE_FIXED:
+    case VLAPIC_DELIV_MODE_LPRI:
+        vmx_inject_extint(v, highest_vector, VMX_INVALID_ERROR_CODE);
+        TRACE_3D(TRC_VMX_INT, v->domain->domain_id, highest_vector, 0);
+        break;
+    case VLAPIC_DELIV_MODE_SMI:
+    case VLAPIC_DELIV_MODE_NMI:
+    case VLAPIC_DELIV_MODE_INIT:
+    case VLAPIC_DELIV_MODE_STARTUP:
+    default:
+        printk("Unsupported interrupt type\n");
+        BUG();
+        break;
+    }
+
+    interrupt_post_injection(v, highest_vector, intr_type);
+    return;
+}
+
+void vmx_do_resume(struct vcpu *v)
+{
+    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+
+    vmx_stts();
+
+    if (event_pending(v)) {
+        hvm_check_events(v);
+
+        if (test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags))
+            hvm_wait_io();
+    }
+    /* pick up the elapsed PIT ticks and re-enable pit_timer */
+    if ( vpit->first_injected ) {
+        pickup_deactive_ticks(vpit);
+    }
+    vmx_set_tsc_shift(v,vpit);
+
+    /* We can't resume the guest if we're waiting on I/O */
+    ASSERT(!test_bit(ARCH_HVM_IO_WAIT, &v->arch.hvm_vcpu.ioflags));
+}
+#endif /* CONFIG_VMX */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/vmx/vmcs.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,558 @@
+/*
+ * vmcs.c: VMCS management
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/domain_page.h>
+#include <asm/current.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/flushtlb.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#if CONFIG_PAGING_LEVELS >= 4
+#include <asm/shadow_64.h>
+#endif
+
+#ifdef CONFIG_VMX
+
+int vmcs_size;
+
+struct vmcs_struct *alloc_vmcs(void)
+{
+    struct vmcs_struct *vmcs;
+    u32 vmx_msr_low, vmx_msr_high;
+
+    rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
+    vmcs_size = vmx_msr_high & 0x1fff;
+    vmcs = alloc_xenheap_pages(get_order_from_bytes(vmcs_size));
+    memset((char *)vmcs, 0, vmcs_size); /* don't remove this */
+
+    vmcs->vmcs_revision_id = vmx_msr_low;
+    return vmcs;
+}
+
+static void free_vmcs(struct vmcs_struct *vmcs)
+{
+    int order;
+
+    order = get_order_from_bytes(vmcs_size);
+    free_xenheap_pages(vmcs, order);
+}
+
+static int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
+{
+    int error;
+
+    if ((error = __vmptrld(phys_ptr))) {
+        clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
+        return error;
+    }
+    set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
+    return 0;
+}
+
+#if 0
+static int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
+{
+    /* take the current VMCS */
+    __vmptrst(phys_ptr);
+    clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
+    return 0;
+}
+#endif
+
+static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx)
+{
+    int error = 0;
+    void *io_bitmap_a;
+    void *io_bitmap_b;
+
+    error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+                       MONITOR_PIN_BASED_EXEC_CONTROLS);
+
+    error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
+
+    error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
+
+    /* need to use 0x1000 instead of PAGE_SIZE */
+    io_bitmap_a = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000));
+    io_bitmap_b = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000));
+    memset(io_bitmap_a, 0xff, 0x1000);
+    /* don't bother debug port access */
+    clear_bit(PC_DEBUG_PORT, io_bitmap_a);
+    memset(io_bitmap_b, 0xff, 0x1000);
+
+    error |= __vmwrite(IO_BITMAP_A, (u64) virt_to_phys(io_bitmap_a));
+    error |= __vmwrite(IO_BITMAP_B, (u64) virt_to_phys(io_bitmap_b));
+
+    arch_vmx->io_bitmap_a = io_bitmap_a;
+    arch_vmx->io_bitmap_b = io_bitmap_b;
+
+    return error;
+}
+
+#define GUEST_LAUNCH_DS         0x08
+#define GUEST_LAUNCH_CS         0x10
+#define GUEST_SEGMENT_LIMIT     0xffffffff
+#define HOST_SEGMENT_LIMIT      0xffffffff
+
+struct host_execution_env {
+    /* selectors */
+    unsigned short ldtr_selector;
+    unsigned short tr_selector;
+    unsigned short ds_selector;
+    unsigned short cs_selector;
+    /* limits */
+    unsigned short gdtr_limit;
+    unsigned short ldtr_limit;
+    unsigned short idtr_limit;
+    unsigned short tr_limit;
+    /* base */
+    unsigned long gdtr_base;
+    unsigned long ldtr_base;
+    unsigned long idtr_base;
+    unsigned long tr_base;
+    unsigned long ds_base;
+    unsigned long cs_base;
+#ifdef __x86_64__
+    unsigned long fs_base;
+    unsigned long gs_base;
+#endif
+};
+
+static void vmx_set_host_env(struct vcpu *v)
+{
+    unsigned int tr, cpu, error = 0;
+    struct host_execution_env host_env;
+    struct Xgt_desc_struct desc;
+
+    cpu = smp_processor_id();
+    __asm__ __volatile__ ("sidt  (%0) \n" :: "a"(&desc) : "memory");
+    host_env.idtr_limit = desc.size;
+    host_env.idtr_base = desc.address;
+    error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
+
+    __asm__ __volatile__ ("sgdt  (%0) \n" :: "a"(&desc) : "memory");
+    host_env.gdtr_limit = desc.size;
+    host_env.gdtr_base = desc.address;
+    error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
+
+    __asm__ __volatile__ ("str  (%0) \n" :: "a"(&tr) : "memory");
+    host_env.tr_selector = tr;
+    host_env.tr_limit = sizeof(struct tss_struct);
+    host_env.tr_base = (unsigned long) &init_tss[cpu];
+    error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
+    error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
+}
+
+static void vmx_do_launch(struct vcpu *v)
+{
+/* Update CR3, GDT, LDT, TR */
+    unsigned int  error = 0;
+    unsigned long cr0, cr4;
+    u64     host_tsc;
+
+    if (v->vcpu_id == 0)
+        hvm_setup_platform(v->domain);
+
+    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : );
+
+    error |= __vmwrite(GUEST_CR0, cr0);
+    cr0 &= ~X86_CR0_PG;
+    error |= __vmwrite(CR0_READ_SHADOW, cr0);
+    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+                       MONITOR_CPU_BASED_EXEC_CONTROLS);
+
+    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : );
+
+#ifdef __x86_64__
+    error |= __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
+#else
+    error |= __vmwrite(GUEST_CR4, cr4);
+#endif
+
+#ifdef __x86_64__
+    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
+#else
+    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE);
+#endif
+    error |= __vmwrite(CR4_READ_SHADOW, cr4);
+
+    vmx_stts();
+
+    if(hvm_apic_support(v->domain))
+        vlapic_init(v);
+
+    vmx_set_host_env(v);
+    init_timer(&v->arch.hvm_vmx.hlt_timer, hlt_timer_fn, v, v->processor);
+
+    error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
+    error |= __vmwrite(GUEST_LDTR_BASE, 0);
+    error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
+
+    error |= __vmwrite(GUEST_TR_BASE, 0);
+    error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
+
+    __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
+    __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
+    __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
+
+    v->arch.schedule_tail = arch_vmx_do_resume;
+    v->arch.hvm_vmx.launch_cpu = smp_processor_id();
+
+    /* init guest tsc to start from 0 */
+    rdtscll(host_tsc);
+    v->arch.hvm_vmx.tsc_offset = 0 - host_tsc;
+    vmx_set_tsc_shift(v, &v->domain->arch.hvm_domain.vpit);
+}
+
+/*
+ * Initially set the same environement as host.
+ */
+static inline int construct_init_vmcs_guest(cpu_user_regs_t *regs)
+{
+    int error = 0;
+    union vmcs_arbytes arbytes;
+    unsigned long dr7;
+    unsigned long eflags;
+
+    /* MSR */
+    error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
+    error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
+
+    error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+    error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+    error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+    /* interrupt */
+    error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+    /* mask */
+    error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL);
+    error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL);
+
+    error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+    error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+
+    /* TSC */
+    error |= __vmwrite(CR3_TARGET_COUNT, 0);
+
+    /* Guest Selectors */
+    error |= __vmwrite(GUEST_ES_SELECTOR, GUEST_LAUNCH_DS);
+    error |= __vmwrite(GUEST_SS_SELECTOR, GUEST_LAUNCH_DS);
+    error |= __vmwrite(GUEST_DS_SELECTOR, GUEST_LAUNCH_DS);
+    error |= __vmwrite(GUEST_FS_SELECTOR, GUEST_LAUNCH_DS);
+    error |= __vmwrite(GUEST_GS_SELECTOR, GUEST_LAUNCH_DS);
+    error |= __vmwrite(GUEST_CS_SELECTOR, GUEST_LAUNCH_CS);
+
+    /* Guest segment bases */
+    error |= __vmwrite(GUEST_ES_BASE, 0);
+    error |= __vmwrite(GUEST_SS_BASE, 0);
+    error |= __vmwrite(GUEST_DS_BASE, 0);
+    error |= __vmwrite(GUEST_FS_BASE, 0);
+    error |= __vmwrite(GUEST_GS_BASE, 0);
+    error |= __vmwrite(GUEST_CS_BASE, 0);
+
+    /* Guest segment Limits */
+    error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
+    error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
+    error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
+    error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
+    error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
+    error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
+
+    /* Guest segment AR bytes */
+    arbytes.bytes = 0;
+    arbytes.fields.seg_type = 0x3;          /* type = 3 */
+    arbytes.fields.s = 1;                   /* code or data, i.e. not system */
+    arbytes.fields.dpl = 0;                 /* DPL = 3 */
+    arbytes.fields.p = 1;                   /* segment present */
+    arbytes.fields.default_ops_size = 1;    /* 32-bit */
+    arbytes.fields.g = 1;
+    arbytes.fields.null_bit = 0;            /* not null */
+
+    error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
+    error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
+    error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
+    error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
+    error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
+
+    arbytes.fields.seg_type = 0xb;          /* type = 0xb */
+    error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
+
+    /* Guest GDT */
+    error |= __vmwrite(GUEST_GDTR_BASE, 0);
+    error |= __vmwrite(GUEST_GDTR_LIMIT, 0);
+
+    /* Guest IDT */
+    error |= __vmwrite(GUEST_IDTR_BASE, 0);
+    error |= __vmwrite(GUEST_IDTR_LIMIT, 0);
+
+    /* Guest LDT & TSS */
+    arbytes.fields.s = 0;                   /* not code or data segement */
+    arbytes.fields.seg_type = 0x2;          /* LTD */
+    arbytes.fields.default_ops_size = 0;    /* 16-bit */
+    arbytes.fields.g = 0;
+    error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
+
+    arbytes.fields.seg_type = 0xb;          /* 32-bit TSS (busy) */
+    error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
+    /* CR3 is set in vmx_final_setup_guest */
+
+    error |= __vmwrite(GUEST_RSP, 0);
+    error |= __vmwrite(GUEST_RIP, regs->eip);
+
+    /* Guest EFLAGS */
+    eflags = regs->eflags & ~HVM_EFLAGS_RESERVED_0; /* clear 0s */
+    eflags |= HVM_EFLAGS_RESERVED_1; /* set 1s */
+    error |= __vmwrite(GUEST_RFLAGS, eflags);
+
+    error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+    __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
+    error |= __vmwrite(GUEST_DR7, dr7);
+    error |= __vmwrite(VMCS_LINK_POINTER, 0xffffffff);
+    error |= __vmwrite(VMCS_LINK_POINTER_HIGH, 0xffffffff);
+
+    return error;
+}
+
+static inline int construct_vmcs_host()
+{
+    int error = 0;
+#ifdef __x86_64__
+    unsigned long fs_base;
+    unsigned long gs_base;
+#endif
+    unsigned long crn;
+
+    /* Host Selectors */
+    error |= __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
+    error |= __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
+    error |= __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
+#if defined (__i386__)
+    error |= __vmwrite(HOST_FS_SELECTOR, __HYPERVISOR_DS);
+    error |= __vmwrite(HOST_GS_SELECTOR, __HYPERVISOR_DS);
+    error |= __vmwrite(HOST_FS_BASE, 0);
+    error |= __vmwrite(HOST_GS_BASE, 0);
+
+#else
+    rdmsrl(MSR_FS_BASE, fs_base);
+    rdmsrl(MSR_GS_BASE, gs_base);
+    error |= __vmwrite(HOST_FS_BASE, fs_base);
+    error |= __vmwrite(HOST_GS_BASE, gs_base);
+
+#endif
+    error |= __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
+
+    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
+    error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
+
+    /* CR3 is set in vmx_final_setup_hostos */
+    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
+    error |= __vmwrite(HOST_CR4, crn);
+
+    error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
+#ifdef __x86_64__
+    /* TBD: support cr8 for 64-bit guest */
+    __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0);
+    __vmwrite(TPR_THRESHOLD, 0);
+    __vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
+#endif
+
+    return error;
+}
+
+/*
+ * Need to extend to support full virtualization.
+ */
+static int construct_vmcs(struct arch_vmx_struct *arch_vmx,
+                          cpu_user_regs_t *regs)
+{
+    int error;
+    long rc;
+    u64 vmcs_phys_ptr;
+
+    memset(arch_vmx, 0, sizeof(struct arch_vmx_struct));
+
+    /*
+     * Create a new VMCS
+     */
+    if (!(arch_vmx->vmcs = alloc_vmcs())) {
+        printk("Failed to create a new VMCS\n");
+        rc = -ENOMEM;
+        goto err_out;
+    }
+    vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
+
+    if ((error = __vmpclear(vmcs_phys_ptr))) {
+        printk("construct_vmcs: VMCLEAR failed\n");
+        rc = -EINVAL;
+        goto err_out;
+    }
+    if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
+        printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
+               (unsigned long) vmcs_phys_ptr);
+        rc = -EINVAL;
+        goto err_out;
+    }
+    if ((error = construct_vmcs_controls(arch_vmx))) {
+        printk("construct_vmcs: construct_vmcs_controls failed\n");
+        rc = -EINVAL;
+        goto err_out;
+    }
+    /* host selectors */
+    if ((error = construct_vmcs_host())) {
+        printk("construct_vmcs: construct_vmcs_host failed\n");
+        rc = -EINVAL;
+        goto err_out;
+    }
+    /* guest selectors */
+    if ((error = construct_init_vmcs_guest(regs))) {
+        printk("construct_vmcs: construct_vmcs_guest failed\n");
+        rc = -EINVAL;
+        goto err_out;
+    }
+    if ((error |= __vmwrite(EXCEPTION_BITMAP,
+                            MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
+        printk("construct_vmcs: setting Exception bitmap failed\n");
+        rc = -EINVAL;
+        goto err_out;
+    }
+
+    if (regs->eflags & EF_TF)
+        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+    else
+        __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+
+    return 0;
+
+err_out:
+    destroy_vmcs(arch_vmx);
+    return rc;
+}
+
+void destroy_vmcs(struct arch_vmx_struct *arch_vmx)
+{
+    free_vmcs(arch_vmx->vmcs);
+    arch_vmx->vmcs = NULL;
+
+    free_xenheap_pages(arch_vmx->io_bitmap_a, get_order_from_bytes(0x1000));
+    arch_vmx->io_bitmap_a = NULL;
+
+    free_xenheap_pages(arch_vmx->io_bitmap_b, get_order_from_bytes(0x1000));
+    arch_vmx->io_bitmap_b = NULL;
+}
+
+/*
+ * modify guest eflags and execption bitmap for gdb
+ */
+int modify_vmcs(struct arch_vmx_struct *arch_vmx,
+                struct cpu_user_regs *regs)
+{
+    int error;
+    u64 vmcs_phys_ptr, old, old_phys_ptr;
+    vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
+
+    old_phys_ptr = virt_to_phys(&old);
+    __vmptrst(old_phys_ptr);
+    if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
+        printk("modify_vmcs: load_vmcs failed: VMCS = %lx\n",
+               (unsigned long) vmcs_phys_ptr);
+        return -EINVAL;
+    }
+
+/* XXX VMX change modify_vmcs arg to v */
+    hvm_load_cpu_guest_regs(current, regs);
+
+    __vmptrld(old_phys_ptr);
+
+    return 0;
+}
+
+void vm_launch_fail(unsigned long eflags)
+{
+    unsigned long error;
+    __vmread(VM_INSTRUCTION_ERROR, &error);
+    printk("<vm_launch_fail> error code %lx\n", error);
+    __hvm_bug(guest_cpu_user_regs());
+}
+
+void vm_resume_fail(unsigned long eflags)
+{
+    unsigned long error;
+    __vmread(VM_INSTRUCTION_ERROR, &error);
+    printk("<vm_resume_fail> error code %lx\n", error);
+    __hvm_bug(guest_cpu_user_regs());
+}
+
+void arch_vmx_do_resume(struct vcpu *v)
+{
+    if ( v->arch.hvm_vmx.launch_cpu == smp_processor_id() )
+    {
+        load_vmcs(&v->arch.hvm_vmx, virt_to_phys(v->arch.hvm_vmx.vmcs));
+        vmx_do_resume(v);
+        reset_stack_and_jump(vmx_asm_do_resume);
+    }
+    else
+    {
+        __vmpclear(virt_to_phys(v->arch.hvm_vmx.vmcs));
+        load_vmcs(&v->arch.hvm_vmx, virt_to_phys(v->arch.hvm_vmx.vmcs));
+        vmx_do_resume(v);
+        vmx_set_host_env(v);
+        v->arch.hvm_vmx.launch_cpu = smp_processor_id();
+        reset_stack_and_jump(vmx_asm_do_relaunch);
+    }
+}
+
+void arch_vmx_do_launch(struct vcpu *v)
+{
+    int error;
+    cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
+
+    error = construct_vmcs(&v->arch.hvm_vmx, regs);
+    if ( error < 0 )
+    {
+        if (v->vcpu_id == 0) {
+            printk("Failed to construct a new VMCS for BSP.\n");
+        } else {
+            printk("Failed to construct a new VMCS for AP %d\n", v->vcpu_id);
+        }
+        domain_crash_synchronous();
+    }
+    vmx_do_launch(v);
+    reset_stack_and_jump(vmx_asm_do_launch);
+}
+#endif /* CONFIG_VMX */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/vmx/vmx.c
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/vmx/vmx.c        Tue Jan 31 10:49:51 2006
@@ -0,0 +1,2006 @@
+/*
+ * vmx.c: handling VMX architecture-related VM exits
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/trace.h>
+#include <xen/sched.h>
+#include <xen/irq.h>
+#include <xen/softirq.h>
+#include <xen/domain_page.h>
+#include <xen/hypercall.h>
+#include <asm/current.h>
+#include <asm/io.h>
+#include <asm/shadow.h>
+#include <asm/regs.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/spinlock.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/shadow.h>
+#if CONFIG_PAGING_LEVELS >= 3
+#include <asm/shadow_64.h>
+#endif
+#include <public/sched.h>
+#include <public/hvm/ioreq.h>
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/vlapic.h>
+
+
+#ifdef CONFIG_VMX
+
+static unsigned long trace_values[NR_CPUS][4];
+#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
+
+void vmx_final_setup_guest(struct vcpu *v)
+{
+    v->arch.schedule_tail = arch_vmx_do_launch;
+
+    if ( v->vcpu_id == 0 )
+    {
+        struct domain *d = v->domain;
+        struct vcpu *vc;
+
+        /* Initialize monitor page table */
+        for_each_vcpu(d, vc)
+            vc->arch.monitor_table = mk_pagetable(0);
+
+        /*
+         * Required to do this once per domain
+         * XXX todo: add a seperate function to do these.
+         */
+        memset(&d->shared_info->evtchn_mask[0], 0xff,
+               sizeof(d->shared_info->evtchn_mask));
+
+        /* Put the domain in shadow mode even though we're going to be using
+         * the shared 1:1 page table initially. It shouldn't hurt */
+        shadow_mode_enable(d,
+                           SHM_enable|SHM_refcounts|
+                           SHM_translate|SHM_external|SHM_wr_pt_pte);
+    }
+}
+
+void vmx_relinquish_resources(struct vcpu *v)
+{
+    struct hvm_virpit *vpit;
+    
+    if (v->vcpu_id == 0) {
+        /* unmap IO shared page */
+        struct domain *d = v->domain;
+        if ( d->arch.hvm_domain.shared_page_va )
+            unmap_domain_page_global(
+               (void *)d->arch.hvm_domain.shared_page_va);
+    }
+
+    destroy_vmcs(&v->arch.hvm_vmx);
+    free_monitor_pagetable(v);
+    vpit = &v->domain->arch.hvm_domain.vpit;
+    kill_timer(&vpit->pit_timer);
+    kill_timer(&v->arch.hvm_vmx.hlt_timer);
+    if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) )
+    {
+        kill_timer(&VLAPIC(v)->vlapic_timer);
+        xfree(VLAPIC(v));
+    }
+}
+
+#ifdef __x86_64__
+static struct vmx_msr_state percpu_msr[NR_CPUS];
+
+static u32 msr_data_index[VMX_MSR_COUNT] =
+{
+    MSR_LSTAR, MSR_STAR, MSR_CSTAR,
+    MSR_SYSCALL_MASK, MSR_EFER,
+};
+
+void vmx_save_segments(struct vcpu *v)
+{
+    rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_content.shadow_gs);
+}
+
+/*
+ * To avoid MSR save/restore at every VM exit/entry time, we restore
+ * the x86_64 specific MSRs at domain switch time. Since those MSRs are
+ * are not modified once set for generic domains, we don't save them,
+ * but simply reset them to the values set at percpu_traps_init().
+ */
+void vmx_load_msrs(struct vcpu *n)
+{
+    struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
+    int i;
+
+    if ( !hvm_switch_on )
+        return;
+
+    while ( host_state->flags )
+    {
+        i = find_first_set_bit(host_state->flags);
+        wrmsrl(msr_data_index[i], host_state->msr_items[i]);
+        clear_bit(i, &host_state->flags);
+    }
+}
+
+static void vmx_save_init_msrs(void)
+{
+    struct vmx_msr_state *host_state = &percpu_msr[smp_processor_id()];
+    int i;
+
+    for ( i = 0; i < VMX_MSR_COUNT; i++ )
+        rdmsrl(msr_data_index[i], host_state->msr_items[i]);
+}
+
+#define CASE_READ_MSR(address)              \
+    case MSR_ ## address:                 \
+    msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
+    break
+
+#define CASE_WRITE_MSR(address)                                     \
+    case MSR_ ## address:                                           \
+    {                                                               \
+        msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content;    \
+        if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) {    \
+            set_bit(VMX_INDEX_MSR_ ## address, &msr->flags);        \
+        }                                                           \
+        wrmsrl(MSR_ ## address, msr_content);                       \
+        set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags);     \
+    }                                                               \
+    break
+
+#define IS_CANO_ADDRESS(add) 1
+static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
+{
+    u64     msr_content = 0;
+    struct vcpu *vc = current;
+    struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
+    switch(regs->ecx){
+    case MSR_EFER:
+        msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
+        HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long 
long)msr_content);
+        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
+                     &vc->arch.hvm_vmx.cpu_state))
+            msr_content |= 1 << _EFER_LME;
+
+        if (VMX_LONG_GUEST(vc))
+            msr_content |= 1 << _EFER_LMA;
+        break;
+    case MSR_FS_BASE:
+        if (!(VMX_LONG_GUEST(vc)))
+            /* XXX should it be GP fault */
+            domain_crash_synchronous();
+        __vmread(GUEST_FS_BASE, &msr_content);
+        break;
+    case MSR_GS_BASE:
+        if (!(VMX_LONG_GUEST(vc)))
+            domain_crash_synchronous();
+        __vmread(GUEST_GS_BASE, &msr_content);
+        break;
+    case MSR_SHADOW_GS_BASE:
+        msr_content = msr->shadow_gs;
+        break;
+
+        CASE_READ_MSR(STAR);
+        CASE_READ_MSR(LSTAR);
+        CASE_READ_MSR(CSTAR);
+        CASE_READ_MSR(SYSCALL_MASK);
+    default:
+        return 0;
+    }
+    HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", 
msr_content);
+    regs->eax = msr_content & 0xffffffff;
+    regs->edx = msr_content >> 32;
+    return 1;
+}
+
+static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
+{
+    u64     msr_content = regs->eax | ((u64)regs->edx << 32);
+    struct vcpu *vc = current;
+    struct vmx_msr_state * msr = &vc->arch.hvm_vmx.msr_content;
+    struct vmx_msr_state * host_state =
+        &percpu_msr[smp_processor_id()];
+
+    HVM_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n",
+                regs->ecx, msr_content);
+
+    switch (regs->ecx){
+    case MSR_EFER:
+        if ((msr_content & EFER_LME) ^
+            test_bit(VMX_CPU_STATE_LME_ENABLED,
+                     &vc->arch.hvm_vmx.cpu_state)){
+            if (test_bit(VMX_CPU_STATE_PG_ENABLED,
+                         &vc->arch.hvm_vmx.cpu_state) ||
+                !test_bit(VMX_CPU_STATE_PAE_ENABLED,
+                          &vc->arch.hvm_vmx.cpu_state)){
+                vmx_inject_exception(vc, TRAP_gp_fault, 0);
+            }
+        }
+        if (msr_content & EFER_LME)
+            set_bit(VMX_CPU_STATE_LME_ENABLED,
+                    &vc->arch.hvm_vmx.cpu_state);
+        /* No update for LME/LMA since it have no effect */
+        msr->msr_items[VMX_INDEX_MSR_EFER] =
+            msr_content;
+        if (msr_content & ~(EFER_LME | EFER_LMA)){
+            msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
+            if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){
+                rdmsrl(MSR_EFER,
+                       host_state->msr_items[VMX_INDEX_MSR_EFER]);
+                set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
+                set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
+                wrmsrl(MSR_EFER, msr_content);
+            }
+        }
+        break;
+
+    case MSR_FS_BASE:
+    case MSR_GS_BASE:
+        if (!(VMX_LONG_GUEST(vc)))
+            domain_crash_synchronous();
+        if (!IS_CANO_ADDRESS(msr_content)){
+            HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
+            vmx_inject_exception(vc, TRAP_gp_fault, 0);
+        }
+        if (regs->ecx == MSR_FS_BASE)
+            __vmwrite(GUEST_FS_BASE, msr_content);
+        else
+            __vmwrite(GUEST_GS_BASE, msr_content);
+        break;
+
+    case MSR_SHADOW_GS_BASE:
+        if (!(VMX_LONG_GUEST(vc)))
+            domain_crash_synchronous();
+        vc->arch.hvm_vmx.msr_content.shadow_gs = msr_content;
+        wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
+        break;
+
+        CASE_WRITE_MSR(STAR);
+        CASE_WRITE_MSR(LSTAR);
+        CASE_WRITE_MSR(CSTAR);
+        CASE_WRITE_MSR(SYSCALL_MASK);
+    default:
+        return 0;
+    }
+    return 1;
+}
+
+void
+vmx_restore_msrs(struct vcpu *v)
+{
+    int i = 0;
+    struct vmx_msr_state *guest_state;
+    struct vmx_msr_state *host_state;
+    unsigned long guest_flags ;
+
+    guest_state = &v->arch.hvm_vmx.msr_content;;
+    host_state = &percpu_msr[smp_processor_id()];
+
+    wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
+    guest_flags = guest_state->flags;
+    if (!guest_flags)
+        return;
+
+    while (guest_flags){
+        i = find_first_set_bit(guest_flags);
+
+        HVM_DBG_LOG(DBG_LEVEL_2,
+                    "restore guest's index %d msr %lx with %lx\n",
+                    i, (unsigned long) msr_data_index[i], (unsigned long) 
guest_state->msr_items[i]);
+        set_bit(i, &host_state->flags);
+        wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
+        clear_bit(i, &guest_flags);
+    }
+}
+#else  /* __i386__ */
+#define  vmx_save_init_msrs()   ((void)0)
+
+static inline int  long_mode_do_msr_read(struct cpu_user_regs *regs){
+    return 0;
+}
+static inline int  long_mode_do_msr_write(struct cpu_user_regs *regs){
+    return 0;
+}
+#endif
+
+void stop_vmx(void)
+{
+    if (read_cr4() & X86_CR4_VMXE)
+        __vmxoff();
+}
+
+int vmx_initialize_guest_resources(struct vcpu *v)
+{
+    vmx_final_setup_guest(v);
+    return 1;
+}
+
+int vmx_relinquish_guest_resources(struct vcpu *v)
+{
+    vmx_relinquish_resources(v);
+    return 1;
+}
+
+void vmx_store_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
+{
+#if defined (__x86_64__)
+    __vmread(GUEST_RFLAGS, &regs->rflags);
+    __vmread(GUEST_SS_SELECTOR, &regs->ss);
+    __vmread(GUEST_CS_SELECTOR, &regs->cs);
+    __vmread(GUEST_DS_SELECTOR, &regs->ds);
+    __vmread(GUEST_ES_SELECTOR, &regs->es);
+    __vmread(GUEST_GS_SELECTOR, &regs->gs);
+    __vmread(GUEST_FS_SELECTOR, &regs->fs);
+    __vmread(GUEST_RIP, &regs->rip);
+    __vmread(GUEST_RSP, &regs->rsp);
+#elif defined (__i386__)
+    __vmread(GUEST_RFLAGS, &regs->eflags);
+    __vmread(GUEST_SS_SELECTOR, &regs->ss);
+    __vmread(GUEST_CS_SELECTOR, &regs->cs);
+    __vmread(GUEST_DS_SELECTOR, &regs->ds);
+    __vmread(GUEST_ES_SELECTOR, &regs->es);
+    __vmread(GUEST_GS_SELECTOR, &regs->gs);
+    __vmread(GUEST_FS_SELECTOR, &regs->fs);
+    __vmread(GUEST_RIP, &regs->eip);
+    __vmread(GUEST_RSP, &regs->esp);
+#else
+#error Unsupported architecture
+#endif
+}
+
+void vmx_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *regs)
+{
+#if defined (__x86_64__)
+    __vmwrite(GUEST_SS_SELECTOR, regs->ss);
+    __vmwrite(GUEST_RSP, regs->rsp);
+
+    __vmwrite(GUEST_RFLAGS, regs->rflags);
+    if (regs->rflags & EF_TF)
+        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+    else
+        __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+
+    __vmwrite(GUEST_CS_SELECTOR, regs->cs);
+    __vmwrite(GUEST_RIP, regs->rip);
+#elif defined (__i386__)
+    __vmwrite(GUEST_SS_SELECTOR, regs->ss);
+    __vmwrite(GUEST_RSP, regs->esp);
+
+    __vmwrite(GUEST_RFLAGS, regs->eflags);
+    if (regs->eflags & EF_TF)
+        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+    else
+        __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
+
+    __vmwrite(GUEST_CS_SELECTOR, regs->cs);
+    __vmwrite(GUEST_RIP, regs->eip);
+#else
+#error Unsupported architecture
+#endif
+}
+
+void vmx_store_cpu_guest_ctrl_regs(struct vcpu *v, unsigned long crs[8])
+{
+    __vmread(CR0_READ_SHADOW, &crs[0]);
+    __vmread(GUEST_CR3, &crs[3]);
+    __vmread(CR4_READ_SHADOW, &crs[4]);
+}
+
+void vmx_modify_guest_state(struct vcpu *v)
+{
+    modify_vmcs(&v->arch.hvm_vmx, &v->arch.guest_context.user_regs);
+}
+
+int vmx_realmode(struct vcpu *v)
+{
+    unsigned long rflags;
+
+    __vmread(GUEST_RFLAGS, &rflags);
+    return rflags & X86_EFLAGS_VM;
+}
+
+int vmx_instruction_length(struct vcpu *v)
+{
+    unsigned long inst_len;
+
+    if (__vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len))
+       return 0;
+    return inst_len;
+}
+
+extern long evtchn_send(int lport);
+void do_nmi(struct cpu_user_regs *);
+
+static int check_vmx_controls(ctrls, msr)
+{
+    u32 vmx_msr_low, vmx_msr_high;
+
+    rdmsr(msr, vmx_msr_low, vmx_msr_high);
+    if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) {
+        printk("Insufficient VMX capability 0x%x, "
+               "msr=0x%x,low=0x%8x,high=0x%x\n",
+               ctrls, msr, vmx_msr_low, vmx_msr_high);
+        return 0;
+    }
+    return 1;
+}
+
+int start_vmx(void)
+{
+    struct vmcs_struct *vmcs;
+    u32 ecx;
+    u32 eax, edx;
+    u64 phys_vmcs;      /* debugging */
+
+    /*
+     * Xen does not fill x86_capability words except 0.
+     */
+    ecx = cpuid_ecx(1);
+    boot_cpu_data.x86_capability[4] = ecx;
+
+    if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
+        return 0;
+
+    rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
+
+    if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
+        if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
+            printk("VMX disabled by Feature Control MSR.\n");
+            return 0;
+        }
+    }
+    else {
+        wrmsr(IA32_FEATURE_CONTROL_MSR,
+              IA32_FEATURE_CONTROL_MSR_LOCK |
+              IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
+    }
+
+    if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
+                            MSR_IA32_VMX_PINBASED_CTLS_MSR))
+        return 0;
+    if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
+                            MSR_IA32_VMX_PROCBASED_CTLS_MSR))
+        return 0;
+    if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
+                            MSR_IA32_VMX_EXIT_CTLS_MSR))
+        return 0;
+    if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
+                            MSR_IA32_VMX_ENTRY_CTLS_MSR))
+        return 0;
+
+    set_in_cr4(X86_CR4_VMXE);   /* Enable VMXE */
+
+    if (!(vmcs = alloc_vmcs())) {
+        printk("Failed to allocate VMCS\n");
+        return 0;
+    }
+
+    phys_vmcs = (u64) virt_to_phys(vmcs);
+
+    if (!(__vmxon(phys_vmcs))) {
+        printk("VMXON is done\n");
+    }
+
+    vmx_save_init_msrs();
+
+    /* Setup HVM interfaces */
+    hvm_funcs.disable = stop_vmx;
+
+    hvm_funcs.initialize_guest_resources = vmx_initialize_guest_resources;
+    hvm_funcs.relinquish_guest_resources = vmx_relinquish_guest_resources;
+
+    hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
+    hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
+
+#ifdef __x86_64__
+    hvm_funcs.save_segments = vmx_save_segments;
+    hvm_funcs.load_msrs = vmx_load_msrs;
+    hvm_funcs.restore_msrs = vmx_restore_msrs;
+#endif
+
+    hvm_funcs.store_cpu_guest_ctrl_regs = vmx_store_cpu_guest_ctrl_regs;
+    hvm_funcs.modify_guest_state = vmx_modify_guest_state;
+
+    hvm_funcs.realmode = vmx_realmode;
+    hvm_funcs.paging_enabled = vmx_paging_enabled;
+    hvm_funcs.instruction_length = vmx_instruction_length;
+
+    hvm_enabled = 1;
+
+    return 1;
+}
+
+/*
+ * Not all cases receive valid value in the VM-exit instruction length field.
+ */
+#define __get_instruction_length(len) \
+    __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
+     if ((len) < 1 || (len) > 15) \
+        __hvm_bug(&regs);
+
+static void inline __update_guest_eip(unsigned long inst_len)
+{
+    unsigned long current_eip;
+
+    __vmread(GUEST_RIP, &current_eip);
+    __vmwrite(GUEST_RIP, current_eip + inst_len);
+}
+
+
+static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
+{
+    unsigned long gpa; /* FIXME: PAE */
+    int result;
+
+#if 0 /* keep for debugging */
+    {
+        unsigned long eip;
+
+        __vmread(GUEST_RIP, &eip);
+        HVM_DBG_LOG(DBG_LEVEL_VMMU,
+                    "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
+                    va, eip, (unsigned long)regs->error_code);
+    }
+#endif
+
+    if (!vmx_paging_enabled(current)){
+        handle_mmio(va, va);
+        TRACE_VMEXIT (2,2);
+        return 1;
+    }
+    gpa = gva_to_gpa(va);
+
+    /* Use 1:1 page table to identify MMIO address space */
+    if ( mmio_space(gpa) ){
+        struct vcpu *v = current;
+        /* No support for APIC */
+        if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000) { 
+            u32 inst_len;
+            __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
+            __update_guest_eip(inst_len);
+            return 1;
+        }
+        TRACE_VMEXIT (2,2);
+        handle_mmio(va, gpa);
+        return 1;
+    }
+
+    result = shadow_fault(va, regs);
+    TRACE_VMEXIT (2,result);
+#if 0
+    if ( !result )
+    {
+        __vmread(GUEST_RIP, &eip);
+        printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
+    }
+#endif
+
+    return result;
+}
+
+static void vmx_do_no_device_fault(void)
+{
+    unsigned long cr0;
+    struct vcpu *v = current;
+
+    clts();
+    setup_fpu(current);
+    __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
+    if (!(cr0 & X86_CR0_TS)) {
+        __vmread_vcpu(v, GUEST_CR0, &cr0);
+        cr0 &= ~X86_CR0_TS;
+        __vmwrite(GUEST_CR0, cr0);
+    }
+    __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
+}
+
+/* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
+#define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46 
+
+static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs 
*regs)
+{
+    unsigned int eax, ebx, ecx, edx;
+    unsigned long eip;
+    struct vcpu *v = current;
+
+    __vmread(GUEST_RIP, &eip);
+
+    HVM_DBG_LOG(DBG_LEVEL_1,
+                "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
+                " (esi) %lx, (edi) %lx",
+                (unsigned long)regs->eax, (unsigned long)regs->ebx,
+                (unsigned long)regs->ecx, (unsigned long)regs->edx,
+                (unsigned long)regs->esi, (unsigned long)regs->edi);
+
+    cpuid(input, &eax, &ebx, &ecx, &edx);
+
+    if (input == 1)
+    {
+        if ( hvm_apic_support(v->domain) &&
+                !vlapic_global_enabled((VLAPIC(v))) )
+            clear_bit(X86_FEATURE_APIC, &edx);
+
+#ifdef __x86_64__
+        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
+#endif
+        {
+            clear_bit(X86_FEATURE_PSE, &edx);
+            clear_bit(X86_FEATURE_PAE, &edx);
+            clear_bit(X86_FEATURE_PSE36, &edx);
+        }
+
+        /* Unsupportable for virtualised CPUs. */
+        ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
+        clear_bit(X86_FEATURE_VMXE & 31, &ecx);
+        clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
+    }
+#ifdef __i386__
+    else if ( input == 0x80000001 )
+    {
+        /* Mask feature for Intel ia32e or AMD long mode. */
+        clear_bit(X86_FEATURE_LM & 31, &edx);
+    }
+#endif
+
+    regs->eax = (unsigned long) eax;
+    regs->ebx = (unsigned long) ebx;
+    regs->ecx = (unsigned long) ecx;
+    regs->edx = (unsigned long) edx;
+
+    HVM_DBG_LOG(DBG_LEVEL_1,
+                "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, 
ebx=%x, ecx=%x, edx=%x",
+                eip, input, eax, ebx, ecx, edx);
+
+}
+
+#define CASE_GET_REG_P(REG, reg)    \
+    case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
+
+static void vmx_dr_access (unsigned long exit_qualification, struct 
cpu_user_regs *regs)
+{
+    unsigned int reg;
+    unsigned long *reg_p = 0;
+    struct vcpu *v = current;
+    unsigned long eip;
+
+    __vmread(GUEST_RIP, &eip);
+
+    reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+    HVM_DBG_LOG(DBG_LEVEL_1,
+                "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
+                eip, reg, exit_qualification);
+
+    switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
+        CASE_GET_REG_P(EAX, eax);
+        CASE_GET_REG_P(ECX, ecx);
+        CASE_GET_REG_P(EDX, edx);
+        CASE_GET_REG_P(EBX, ebx);
+        CASE_GET_REG_P(EBP, ebp);
+        CASE_GET_REG_P(ESI, esi);
+        CASE_GET_REG_P(EDI, edi);
+    case REG_ESP:
+        break;
+    default:
+        __hvm_bug(regs);
+    }
+
+    switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
+    case TYPE_MOV_TO_DR:
+        /* don't need to check the range */
+        if (reg != REG_ESP)
+            v->arch.guest_context.debugreg[reg] = *reg_p;
+        else {
+            unsigned long value;
+            __vmread(GUEST_RSP, &value);
+            v->arch.guest_context.debugreg[reg] = value;
+        }
+        break;
+    case TYPE_MOV_FROM_DR:
+        if (reg != REG_ESP)
+            *reg_p = v->arch.guest_context.debugreg[reg];
+        else {
+            __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
+        }
+        break;
+    }
+}
+
+/*
+ * Invalidate the TLB for va. Invalidate the shadow page corresponding
+ * the address va.
+ */
+static void vmx_vmexit_do_invlpg(unsigned long va)
+{
+    unsigned long eip;
+    struct vcpu *v = current;
+
+    __vmread(GUEST_RIP, &eip);
+
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
+                eip, va);
+
+    /*
+     * We do the safest things first, then try to update the shadow
+     * copying from guest
+     */
+    shadow_invlpg(v, va);
+}
+
+static int check_for_null_selector(unsigned long eip)
+{
+    unsigned char inst[MAX_INST_LEN];
+    unsigned long sel;
+    int i, inst_len;
+    int inst_copy_from_guest(unsigned char *, unsigned long, int);
+
+    __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
+    memset(inst, 0, MAX_INST_LEN);
+    if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
+        printf("check_for_null_selector: get guest instruction failed\n");
+        domain_crash_synchronous();
+    }
+
+    for (i = 0; i < inst_len; i++) {
+        switch (inst[i]) {
+        case 0xf3: /* REPZ */
+        case 0xf2: /* REPNZ */
+        case 0xf0: /* LOCK */
+        case 0x66: /* data32 */
+        case 0x67: /* addr32 */
+            continue;
+        case 0x2e: /* CS */
+            __vmread(GUEST_CS_SELECTOR, &sel);
+            break;
+        case 0x36: /* SS */
+            __vmread(GUEST_SS_SELECTOR, &sel);
+            break;
+        case 0x26: /* ES */
+            __vmread(GUEST_ES_SELECTOR, &sel);
+            break;
+        case 0x64: /* FS */
+            __vmread(GUEST_FS_SELECTOR, &sel);
+            break;
+        case 0x65: /* GS */
+            __vmread(GUEST_GS_SELECTOR, &sel);
+            break;
+        case 0x3e: /* DS */
+            /* FALLTHROUGH */
+        default:
+            /* DS is the default */
+            __vmread(GUEST_DS_SELECTOR, &sel);
+        }
+        return sel == 0 ? 1 : 0;
+    }
+
+    return 0;
+}
+
+extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
+                         unsigned long count, int size, long value,
+                        int dir, int pvalid);
+
+static void vmx_io_instruction(struct cpu_user_regs *regs,
+                               unsigned long exit_qualification, unsigned long 
inst_len)
+{
+    struct mmio_op *mmio_opp;
+    unsigned long eip, cs, eflags;
+    unsigned long port, size, dir;
+    int vm86;
+
+    mmio_opp = &current->arch.hvm_vcpu.mmio_op;
+    mmio_opp->instr = INSTR_PIO;
+    mmio_opp->flags = 0;
+
+    __vmread(GUEST_RIP, &eip);
+    __vmread(GUEST_CS_SELECTOR, &cs);
+    __vmread(GUEST_RFLAGS, &eflags);
+    vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
+
+    HVM_DBG_LOG(DBG_LEVEL_1,
+                "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
+                "exit_qualification = %lx",
+                vm86, cs, eip, exit_qualification);
+
+    if (test_bit(6, &exit_qualification))
+        port = (exit_qualification >> 16) & 0xFFFF;
+    else
+        port = regs->edx & 0xffff;
+    TRACE_VMEXIT(2, port);
+    size = (exit_qualification & 7) + 1;
+    dir = test_bit(3, &exit_qualification); /* direction */
+
+    if (test_bit(4, &exit_qualification)) { /* string instruction */
+        unsigned long addr, count = 1;
+        int sign = regs->eflags & EF_DF ? -1 : 1;
+
+        __vmread(GUEST_LINEAR_ADDRESS, &addr);
+
+        /*
+         * In protected mode, guest linear address is invalid if the
+         * selector is null.
+         */
+        if (!vm86 && check_for_null_selector(eip))
+            addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
+
+        if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
+            mmio_opp->flags |= REPZ;
+            count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
+        }
+
+        /*
+         * Handle string pio instructions that cross pages or that
+         * are unaligned. See the comments in hvm_domain.c/handle_mmio()
+         */
+        if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
+            unsigned long value = 0;
+
+            mmio_opp->flags |= OVERLAP;
+            if (dir == IOREQ_WRITE)
+                hvm_copy(&value, addr, size, HVM_COPY_IN);
+            send_pio_req(regs, port, 1, size, value, dir, 0);
+        } else {
+            if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) 
{
+                if (sign > 0)
+                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
+                else
+                    count = (addr & ~PAGE_MASK) / size;
+            } else
+                __update_guest_eip(inst_len);
+
+            send_pio_req(regs, port, count, size, addr, dir, 1);
+        }
+    } else {
+        if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
+            hvm_print_line(current, regs->eax); /* guest debug output */
+
+        __update_guest_eip(inst_len);
+        send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
+    }
+}
+
+int
+vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
+{
+    unsigned long inst_len;
+    int error = 0;
+
+    error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
+    error |= __vmread(GUEST_RIP, &c->eip);
+    c->eip += inst_len; /* skip transition instruction */
+    error |= __vmread(GUEST_RSP, &c->esp);
+    error |= __vmread(GUEST_RFLAGS, &c->eflags);
+
+    error |= __vmread(CR0_READ_SHADOW, &c->cr0);
+    c->cr3 = v->arch.hvm_vmx.cpu_cr3;
+    error |= __vmread(CR4_READ_SHADOW, &c->cr4);
+
+    error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
+    error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
+
+    error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
+    error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
+
+    error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
+    error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
+    error |= __vmread(GUEST_CS_BASE, &c->cs_base);
+    error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
+
+    error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
+    error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
+    error |= __vmread(GUEST_DS_BASE, &c->ds_base);
+    error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
+
+    error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
+    error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
+    error |= __vmread(GUEST_ES_BASE, &c->es_base);
+    error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
+
+    error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
+    error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
+    error |= __vmread(GUEST_SS_BASE, &c->ss_base);
+    error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
+
+    error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
+    error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
+    error |= __vmread(GUEST_FS_BASE, &c->fs_base);
+    error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
+
+    error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
+    error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
+    error |= __vmread(GUEST_GS_BASE, &c->gs_base);
+    error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
+
+    error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
+    error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
+    error |= __vmread(GUEST_TR_BASE, &c->tr_base);
+    error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
+
+    error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
+    error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
+    error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
+    error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
+
+    return !error;
+}
+
+int
+vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
+{
+    unsigned long mfn, old_cr4, old_base_mfn;
+    int error = 0;
+
+    error |= __vmwrite(GUEST_RIP, c->eip);
+    error |= __vmwrite(GUEST_RSP, c->esp);
+    error |= __vmwrite(GUEST_RFLAGS, c->eflags);
+
+    error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
+
+    if (!vmx_paging_enabled(v)) {
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
+        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
+        goto skip_cr3;
+    }
+
+    if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
+        /*
+         * This is simple TLB flush, implying the guest has
+         * removed some translation or changed page attributes.
+         * We simply invalidate the shadow.
+         */
+        mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
+        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
+            printk("Invalid CR3 value=%x", c->cr3);
+            domain_crash_synchronous();
+            return 0;
+        }
+        shadow_sync_all(v->domain);
+    } else {
+        /*
+         * If different, make a shadow. Check if the PDBR is valid
+         * first.
+         */
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
+        if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
+            printk("Invalid CR3 value=%x", c->cr3);
+            domain_crash_synchronous();
+            return 0;
+        }
+        mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
+        if(!get_page(pfn_to_page(mfn), v->domain))
+                return 0;
+        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+        v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+        if (old_base_mfn)
+             put_page(pfn_to_page(old_base_mfn));
+        update_pagetables(v);
+        /*
+         * arch.shadow_table should now hold the next CR3 for shadow
+         */
+        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
+        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
+    }
+
+ skip_cr3:
+
+    error |= __vmread(CR4_READ_SHADOW, &old_cr4);
+    error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+    error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
+
+    error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
+    error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
+
+    error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
+    error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
+
+    error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
+    error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
+    error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
+    error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
+
+    error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
+    error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
+    error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
+    error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
+
+    error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
+    error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
+    error |= __vmwrite(GUEST_ES_BASE, c->es_base);
+    error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
+
+    error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
+    error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
+    error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
+    error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
+
+    error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
+    error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
+    error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
+    error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
+
+    error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
+    error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
+    error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
+    error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
+
+    error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
+    error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
+    error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
+    error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
+
+    error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
+    error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
+    error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
+    error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
+
+    return !error;
+}
+
+enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
+
+int
+vmx_assist(struct vcpu *v, int mode)
+{
+    struct vmx_assist_context c;
+    u32 magic;
+    u32 cp;
+
+    /* make sure vmxassist exists (this is not an error) */
+    if (!hvm_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), HVM_COPY_IN))
+        return 0;
+    if (magic != VMXASSIST_MAGIC)
+        return 0;
+
+    switch (mode) {
+        /*
+         * Transfer control to vmxassist.
+         * Store the current context in VMXASSIST_OLD_CONTEXT and load
+         * the new VMXASSIST_NEW_CONTEXT context. This context was created
+         * by vmxassist and will transfer control to it.
+         */
+    case VMX_ASSIST_INVOKE:
+        /* save the old context */
+        if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
+            goto error;
+        if (cp != 0) {
+            if (!vmx_world_save(v, &c))
+                goto error;
+            if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_OUT))
+                goto error;
+        }
+
+        /* restore the new context, this should activate vmxassist */
+        if (!hvm_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), HVM_COPY_IN))
+            goto error;
+        if (cp != 0) {
+            if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
+                goto error;
+            if (!vmx_world_restore(v, &c))
+                goto error;
+            return 1;
+        }
+        break;
+
+        /*
+         * Restore the VMXASSIST_OLD_CONTEXT that was saved by 
VMX_ASSIST_INVOKE
+         * above.
+         */
+    case VMX_ASSIST_RESTORE:
+        /* save the old context */
+        if (!hvm_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), HVM_COPY_IN))
+            goto error;
+        if (cp != 0) {
+            if (!hvm_copy(&c, cp, sizeof(c), HVM_COPY_IN))
+                goto error;
+            if (!vmx_world_restore(v, &c))
+                goto error;
+            return 1;
+        }
+        break;
+    }
+
+ error:
+    printf("Failed to transfer to vmxassist\n");
+    domain_crash_synchronous();
+    return 0;
+}
+
+static int vmx_set_cr0(unsigned long value)
+{
+    struct vcpu *v = current;
+    unsigned long mfn;
+    unsigned long eip;
+    int paging_enabled;
+    unsigned long vm_entry_value;
+    unsigned long old_cr0;
+
+    /*
+     * CR0: We don't want to lose PE and PG.
+     */
+    __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
+    paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
+    /* If OS don't use clts to clear TS bit...*/
+    if((old_cr0 & X86_CR0_TS) && !(value & X86_CR0_TS))
+    {
+            clts();
+            setup_fpu(v);
+    }
+
+
+    __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
+    __vmwrite(CR0_READ_SHADOW, value);
+
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
+
+    if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
+        /*
+         * The guest CR3 must be pointing to the guest physical.
+         */
+        if ( !VALID_MFN(mfn = get_mfn_from_pfn(
+            v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
+             !get_page(pfn_to_page(mfn), v->domain) )
+        {
+            printk("Invalid CR3 value = %lx", v->arch.hvm_vmx.cpu_cr3);
+            domain_crash_synchronous(); /* need to take a clean path */
+        }
+
+#if defined(__x86_64__)
+        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
+                     &v->arch.hvm_vmx.cpu_state) &&
+            !test_bit(VMX_CPU_STATE_PAE_ENABLED,
+                      &v->arch.hvm_vmx.cpu_state)){
+            HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
+            vmx_inject_exception(v, TRAP_gp_fault, 0);
+        }
+        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
+                     &v->arch.hvm_vmx.cpu_state)){
+            /* Here the PAE is should to be opened */
+            HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
+            set_bit(VMX_CPU_STATE_LMA_ENABLED,
+                    &v->arch.hvm_vmx.cpu_state);
+            __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
+            vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
+            __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+
+#if CONFIG_PAGING_LEVELS >= 4
+            if(!shadow_set_guest_paging_levels(v->domain, 4)) {
+                printk("Unsupported guest paging levels\n");
+                domain_crash_synchronous(); /* need to take a clean path */
+            }
+#endif
+        }
+        else
+        {
+#if CONFIG_PAGING_LEVELS >= 4
+            if(!shadow_set_guest_paging_levels(v->domain, 2)) {
+                printk("Unsupported guest paging levels\n");
+                domain_crash_synchronous(); /* need to take a clean path */
+            }
+#endif
+        }
+
+        {
+            unsigned long crn;
+            /* update CR4's PAE if needed */
+            __vmread(GUEST_CR4, &crn);
+            if ( (!(crn & X86_CR4_PAE)) &&
+                 test_bit(VMX_CPU_STATE_PAE_ENABLED,
+                          &v->arch.hvm_vmx.cpu_state) )
+            {
+                HVM_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
+                __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
+            }
+        }
+#endif
+        /*
+         * Now arch.guest_table points to machine physical.
+         */
+        v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+        update_pagetables(v);
+
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
+                    (unsigned long) (mfn << PAGE_SHIFT));
+
+        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
+        /*
+         * arch->shadow_table should hold the next CR3 for shadow
+         */
+        HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
+                    v->arch.hvm_vmx.cpu_cr3, mfn);
+    }
+
+    if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
+        if(v->arch.hvm_vmx.cpu_cr3) {
+            put_page(pfn_to_page(get_mfn_from_pfn(
+                      v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT)));
+            v->arch.guest_table = mk_pagetable(0);
+        }
+
+    /*
+     * VMX does not implement real-mode virtualization. We emulate
+     * real-mode by performing a world switch to VMXAssist whenever
+     * a partition disables the CR0.PE bit.
+     */
+    if ((value & X86_CR0_PE) == 0) {
+        if ( value & X86_CR0_PG ) {
+            /* inject GP here */
+            vmx_inject_exception(v, TRAP_gp_fault, 0);
+            return 0;
+        } else {
+            /*
+             * Disable paging here.
+             * Same to PE == 1 && PG == 0
+             */
+            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
+                         &v->arch.hvm_vmx.cpu_state)){
+                clear_bit(VMX_CPU_STATE_LMA_ENABLED,
+                          &v->arch.hvm_vmx.cpu_state);
+                __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
+                vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
+                __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
+            }
+        }
+
+        clear_all_shadow_status(v->domain);
+        if (vmx_assist(v, VMX_ASSIST_INVOKE)) {
+            set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.hvm_vmx.cpu_state);
+            __vmread(GUEST_RIP, &eip);
+            HVM_DBG_LOG(DBG_LEVEL_1,
+                        "Transfering control to vmxassist %%eip 0x%lx\n", eip);
+            return 0; /* do not update eip! */
+        }
+    } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+                        &v->arch.hvm_vmx.cpu_state)) {
+        __vmread(GUEST_RIP, &eip);
+        HVM_DBG_LOG(DBG_LEVEL_1,
+                    "Enabling CR0.PE at %%eip 0x%lx\n", eip);
+        if (vmx_assist(v, VMX_ASSIST_RESTORE)) {
+            clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+                      &v->arch.hvm_vmx.cpu_state);
+            __vmread(GUEST_RIP, &eip);
+            HVM_DBG_LOG(DBG_LEVEL_1,
+                        "Restoring to %%eip 0x%lx\n", eip);
+            return 0; /* do not update eip! */
+        }
+    }
+
+    return 1;
+}
+
+#define CASE_GET_REG(REG, reg)  \
+    case REG_ ## REG: value = regs->reg; break
+
+#define CASE_EXTEND_SET_REG \
+      CASE_EXTEND_REG(S)
+#define CASE_EXTEND_GET_REG \
+      CASE_EXTEND_REG(G)
+
+#ifdef __i386__
+#define CASE_EXTEND_REG(T)
+#else
+#define CASE_EXTEND_REG(T)    \
+    CASE_ ## T ## ET_REG(R8, r8); \
+    CASE_ ## T ## ET_REG(R9, r9); \
+    CASE_ ## T ## ET_REG(R10, r10); \
+    CASE_ ## T ## ET_REG(R11, r11); \
+    CASE_ ## T ## ET_REG(R12, r12); \
+    CASE_ ## T ## ET_REG(R13, r13); \
+    CASE_ ## T ## ET_REG(R14, r14); \
+    CASE_ ## T ## ET_REG(R15, r15);
+#endif
+
+
+/*
+ * Write to control registers
+ */
+static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
+{
+    unsigned long value;
+    unsigned long old_cr;
+    struct vcpu *v = current;
+
+    switch (gp) {
+        CASE_GET_REG(EAX, eax);
+        CASE_GET_REG(ECX, ecx);
+        CASE_GET_REG(EDX, edx);
+        CASE_GET_REG(EBX, ebx);
+        CASE_GET_REG(EBP, ebp);
+        CASE_GET_REG(ESI, esi);
+        CASE_GET_REG(EDI, edi);
+        CASE_EXTEND_GET_REG
+            case REG_ESP:
+                __vmread(GUEST_RSP, &value);
+        break;
+    default:
+        printk("invalid gp: %d\n", gp);
+        __hvm_bug(regs);
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
+    HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
+
+    switch(cr) {
+    case 0:
+    {
+        return vmx_set_cr0(value);
+    }
+    case 3:
+    {
+        unsigned long old_base_mfn, mfn;
+
+        /*
+         * If paging is not enabled yet, simply copy the value to CR3.
+         */
+        if (!vmx_paging_enabled(v)) {
+            v->arch.hvm_vmx.cpu_cr3 = value;
+            break;
+        }
+
+        /*
+         * We make a new one if the shadow does not exist.
+         */
+        if (value == v->arch.hvm_vmx.cpu_cr3) {
+            /*
+             * This is simple TLB flush, implying the guest has
+             * removed some translation or changed page attributes.
+             * We simply invalidate the shadow.
+             */
+            mfn = get_mfn_from_pfn(value >> PAGE_SHIFT);
+            if (mfn != pagetable_get_pfn(v->arch.guest_table))
+                __hvm_bug(regs);
+            shadow_sync_all(v->domain);
+        } else {
+            /*
+             * If different, make a shadow. Check if the PDBR is valid
+             * first.
+             */
+            HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
+            if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
+                 !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) ||
+                 !get_page(pfn_to_page(mfn), v->domain) )
+            {
+                printk("Invalid CR3 value=%lx", value);
+                domain_crash_synchronous(); /* need to take a clean path */
+            }
+            old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+            v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+            if (old_base_mfn)
+                put_page(pfn_to_page(old_base_mfn));
+            update_pagetables(v);
+            /*
+             * arch.shadow_table should now hold the next CR3 for shadow
+             */
+            v->arch.hvm_vmx.cpu_cr3 = value;
+            HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
+                        value);
+            __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
+        }
+        break;
+    }
+    case 4: /* CR4 */
+    {
+        if (value & X86_CR4_PAE){
+            set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
+        } else {
+            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
+                         &v->arch.hvm_vmx.cpu_state)){
+                vmx_inject_exception(v, TRAP_gp_fault, 0);
+            }
+            clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.hvm_vmx.cpu_state);
+        }
+
+        __vmread(CR4_READ_SHADOW, &old_cr);
+
+        __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
+        __vmwrite(CR4_READ_SHADOW, value);
+
+        /*
+         * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
+         * all TLB entries except global entries.
+         */
+        if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
+            shadow_sync_all(v->domain);
+        }
+        break;
+    }
+    default:
+        printk("invalid cr: %d\n", gp);
+        __hvm_bug(regs);
+    }
+
+    return 1;
+}
+
+#define CASE_SET_REG(REG, reg)      \
+    case REG_ ## REG:       \
+    regs->reg = value;      \
+    break
+
+/*
+ * Read from control registers. CR0 and CR4 are read from the shadow.
+ */
+static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
+{
+    unsigned long value;
+    struct vcpu *v = current;
+
+    if (cr != 3)
+        __hvm_bug(regs);
+
+    value = (unsigned long) v->arch.hvm_vmx.cpu_cr3;
+
+    switch (gp) {
+        CASE_SET_REG(EAX, eax);
+        CASE_SET_REG(ECX, ecx);
+        CASE_SET_REG(EDX, edx);
+        CASE_SET_REG(EBX, ebx);
+        CASE_SET_REG(EBP, ebp);
+        CASE_SET_REG(ESI, esi);
+        CASE_SET_REG(EDI, edi);
+        CASE_EXTEND_SET_REG
+            case REG_ESP:
+                __vmwrite(GUEST_RSP, value);
+        regs->esp = value;
+        break;
+    default:
+        printk("invalid gp: %d\n", gp);
+        __hvm_bug(regs);
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
+}
+
+static int vmx_cr_access(unsigned long exit_qualification, struct 
cpu_user_regs *regs)
+{
+    unsigned int gp, cr;
+    unsigned long value;
+    struct vcpu *v = current;
+
+    switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
+    case TYPE_MOV_TO_CR:
+        gp = exit_qualification & CONTROL_REG_ACCESS_REG;
+        cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+        TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
+        TRACE_VMEXIT(2,cr);
+        TRACE_VMEXIT(3,gp);
+        return mov_to_cr(gp, cr, regs);
+    case TYPE_MOV_FROM_CR:
+        gp = exit_qualification & CONTROL_REG_ACCESS_REG;
+        cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
+        TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
+        TRACE_VMEXIT(2,cr);
+        TRACE_VMEXIT(3,gp);
+        mov_from_cr(cr, gp, regs);
+        break;
+    case TYPE_CLTS:
+        TRACE_VMEXIT(1,TYPE_CLTS);
+        clts();
+        setup_fpu(current);
+
+        __vmread_vcpu(v, GUEST_CR0, &value);
+        value &= ~X86_CR0_TS; /* clear TS */
+        __vmwrite(GUEST_CR0, value);
+
+        __vmread_vcpu(v, CR0_READ_SHADOW, &value);
+        value &= ~X86_CR0_TS; /* clear TS */
+        __vmwrite(CR0_READ_SHADOW, value);
+        break;
+    case TYPE_LMSW:
+        TRACE_VMEXIT(1,TYPE_LMSW);
+        __vmread_vcpu(v, CR0_READ_SHADOW, &value);
+        value = (value & ~0xF) |
+            (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
+        return vmx_set_cr0(value);
+        break;
+    default:
+        __hvm_bug(regs);
+        break;
+    }
+    return 1;
+}
+
+static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
+    struct vcpu *v = current;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
+                (unsigned long)regs->ecx, (unsigned long)regs->eax,
+                (unsigned long)regs->edx);
+    switch (regs->ecx) {
+    case MSR_IA32_TIME_STAMP_COUNTER:
+    {
+        struct hvm_virpit *vpit;
+
+        rdtscll(msr_content);
+        vpit = &(v->domain->arch.hvm_domain.vpit);
+        msr_content += vpit->shift;
+        break;
+    }
+    case MSR_IA32_SYSENTER_CS:
+        __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        __vmread(GUEST_SYSENTER_ESP, &msr_content);
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        __vmread(GUEST_SYSENTER_EIP, &msr_content);
+        break;
+    case MSR_IA32_APICBASE:
+        msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
+        break;
+    default:
+        if(long_mode_do_msr_read(regs))
+            return;
+        rdmsr_user(regs->ecx, regs->eax, regs->edx);
+        break;
+    }
+
+    regs->eax = msr_content & 0xFFFFFFFF;
+    regs->edx = msr_content >> 32;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
+                "ecx=%lx, eax=%lx, edx=%lx",
+                (unsigned long)regs->ecx, (unsigned long)regs->eax,
+                (unsigned long)regs->edx);
+}
+
+static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
+{
+    u64 msr_content;
+    struct vcpu *v = current;
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
+                (unsigned long)regs->ecx, (unsigned long)regs->eax,
+                (unsigned long)regs->edx);
+
+    msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
+
+    switch (regs->ecx) {
+    case MSR_IA32_TIME_STAMP_COUNTER:
+    {
+        struct hvm_virpit *vpit;
+        u64 host_tsc, drift;
+
+        rdtscll(host_tsc);
+        vpit = &(v->domain->arch.hvm_domain.vpit);
+        drift = v->arch.hvm_vmx.tsc_offset - vpit->shift;
+        vpit->shift = msr_content - host_tsc;
+       v->arch.hvm_vmx.tsc_offset = vpit->shift + drift;
+        __vmwrite(TSC_OFFSET, vpit->shift);
+
+#if defined (__i386__)
+        __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
+#endif
+        break;
+    }
+    case MSR_IA32_SYSENTER_CS:
+        __vmwrite(GUEST_SYSENTER_CS, msr_content);
+        break;
+    case MSR_IA32_SYSENTER_ESP:
+        __vmwrite(GUEST_SYSENTER_ESP, msr_content);
+        break;
+    case MSR_IA32_SYSENTER_EIP:
+        __vmwrite(GUEST_SYSENTER_EIP, msr_content);
+        break;
+    case MSR_IA32_APICBASE:
+        vlapic_msr_set(VLAPIC(v), msr_content);
+        break;
+    default:
+        long_mode_do_msr_write(regs);
+        break;
+    }
+
+    HVM_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
+                "ecx=%lx, eax=%lx, edx=%lx",
+                (unsigned long)regs->ecx, (unsigned long)regs->eax,
+                (unsigned long)regs->edx);
+}
+
+/*
+ * Need to use this exit to reschedule
+ */
+void vmx_vmexit_do_hlt(void)
+{
+    struct vcpu *v=current;
+    struct hvm_virpit *vpit = &(v->domain->arch.hvm_domain.vpit);
+    s_time_t   next_pit=-1,next_wakeup;
+
+    if ( !v->vcpu_id ) {
+        next_pit = get_pit_scheduled(v,vpit);
+    }
+    next_wakeup = get_apictime_scheduled(v);
+    if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) {
+        next_wakeup = next_pit;
+    }
+    if ( next_wakeup != - 1 ) 
+        set_timer(&current->arch.hvm_vmx.hlt_timer, next_wakeup);
+    do_sched_op(SCHEDOP_block, 0);
+}
+
+static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
+{
+    unsigned int vector;
+    int error;
+
+    asmlinkage void do_IRQ(struct cpu_user_regs *);
+    void smp_apic_timer_interrupt(struct cpu_user_regs *);
+    void timer_interrupt(int, void *, struct cpu_user_regs *);
+    void smp_event_check_interrupt(void);
+    void smp_invalidate_interrupt(void);
+    void smp_call_function_interrupt(void);
+    void smp_spurious_interrupt(struct cpu_user_regs *regs);
+    void smp_error_interrupt(struct cpu_user_regs *regs);
+
+    if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
+        && !(vector & INTR_INFO_VALID_MASK))
+        __hvm_bug(regs);
+
+    vector &= 0xff;
+    local_irq_disable();
+
+    switch(vector) {
+    case LOCAL_TIMER_VECTOR:
+        smp_apic_timer_interrupt(regs);
+        break;
+    case EVENT_CHECK_VECTOR:
+        smp_event_check_interrupt();
+        break;
+    case INVALIDATE_TLB_VECTOR:
+        smp_invalidate_interrupt();
+        break;
+    case CALL_FUNCTION_VECTOR:
+        smp_call_function_interrupt();
+        break;
+    case SPURIOUS_APIC_VECTOR:
+        smp_spurious_interrupt(regs);
+        break;
+    case ERROR_APIC_VECTOR:
+        smp_error_interrupt(regs);
+        break;
+    default:
+        regs->entry_vector = vector;
+        do_IRQ(regs);
+        break;
+    }
+}
+
+#if defined (__x86_64__)
+void store_cpu_user_regs(struct cpu_user_regs *regs)
+{
+    __vmread(GUEST_SS_SELECTOR, &regs->ss);
+    __vmread(GUEST_RSP, &regs->rsp);
+    __vmread(GUEST_RFLAGS, &regs->rflags);
+    __vmread(GUEST_CS_SELECTOR, &regs->cs);
+    __vmread(GUEST_DS_SELECTOR, &regs->ds);
+    __vmread(GUEST_ES_SELECTOR, &regs->es);
+    __vmread(GUEST_RIP, &regs->rip);
+}
+#elif defined (__i386__)
+void store_cpu_user_regs(struct cpu_user_regs *regs)
+{
+    __vmread(GUEST_SS_SELECTOR, &regs->ss);
+    __vmread(GUEST_RSP, &regs->esp);
+    __vmread(GUEST_RFLAGS, &regs->eflags);
+    __vmread(GUEST_CS_SELECTOR, &regs->cs);
+    __vmread(GUEST_DS_SELECTOR, &regs->ds);
+    __vmread(GUEST_ES_SELECTOR, &regs->es);
+    __vmread(GUEST_RIP, &regs->eip);
+}
+#endif 
+
+#ifdef XEN_DEBUGGER
+void save_cpu_user_regs(struct cpu_user_regs *regs)
+{
+    __vmread(GUEST_SS_SELECTOR, &regs->xss);
+    __vmread(GUEST_RSP, &regs->esp);
+    __vmread(GUEST_RFLAGS, &regs->eflags);
+    __vmread(GUEST_CS_SELECTOR, &regs->xcs);
+    __vmread(GUEST_RIP, &regs->eip);
+
+    __vmread(GUEST_GS_SELECTOR, &regs->xgs);
+    __vmread(GUEST_FS_SELECTOR, &regs->xfs);
+    __vmread(GUEST_ES_SELECTOR, &regs->xes);
+    __vmread(GUEST_DS_SELECTOR, &regs->xds);
+}
+
+void restore_cpu_user_regs(struct cpu_user_regs *regs)
+{
+    __vmwrite(GUEST_SS_SELECTOR, regs->xss);
+    __vmwrite(GUEST_RSP, regs->esp);
+    __vmwrite(GUEST_RFLAGS, regs->eflags);
+    __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
+    __vmwrite(GUEST_RIP, regs->eip);
+
+    __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
+    __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
+    __vmwrite(GUEST_ES_SELECTOR, regs->xes);
+    __vmwrite(GUEST_DS_SELECTOR, regs->xds);
+}
+#endif
+
+asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
+{
+    unsigned int exit_reason, idtv_info_field;
+    unsigned long exit_qualification, eip, inst_len = 0;
+    struct vcpu *v = current;
+    int error;
+
+    if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
+        __hvm_bug(&regs);
+
+    perfc_incra(vmexits, exit_reason);
+
+    __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
+    if (idtv_info_field & INTR_INFO_VALID_MASK) {
+        __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+
+        __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
+        if (inst_len >= 1 && inst_len <= 15)
+            __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+
+        if (idtv_info_field & 0x800) { /* valid error code */
+            unsigned long error_code;
+            __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
+            __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+        }
+
+        HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
+    }
+
+    /* don't bother H/W interrutps */
+    if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
+        exit_reason != EXIT_REASON_VMCALL &&
+        exit_reason != EXIT_REASON_IO_INSTRUCTION)
+        HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
+
+    if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
+        printk("Failed vm entry\n");
+        domain_crash_synchronous();
+        return;
+    }
+
+    {
+        __vmread(GUEST_RIP, &eip);
+        TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
+        TRACE_VMEXIT(0,exit_reason);
+    }
+
+    switch (exit_reason) {
+    case EXIT_REASON_EXCEPTION_NMI:
+    {
+        /*
+         * We don't set the software-interrupt exiting (INT n).
+         * (1) We can get an exception (e.g. #PG) in the guest, or
+         * (2) NMI
+         */
+        int error;
+        unsigned int vector;
+        unsigned long va;
+
+        if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
+            || !(vector & INTR_INFO_VALID_MASK))
+            __hvm_bug(&regs);
+        vector &= 0xff;
+
+        TRACE_VMEXIT(1,vector);
+        perfc_incra(cause_vector, vector);
+
+        TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
+        switch (vector) {
+#ifdef XEN_DEBUGGER
+        case TRAP_debug:
+        {
+            save_cpu_user_regs(&regs);
+            pdb_handle_exception(1, &regs, 1);
+            restore_cpu_user_regs(&regs);
+            break;
+        }
+        case TRAP_int3:
+        {
+            save_cpu_user_regs(&regs);
+            pdb_handle_exception(3, &regs, 1);
+            restore_cpu_user_regs(&regs);
+            break;
+        }
+#else
+        case TRAP_debug:
+        {
+            void store_cpu_user_regs(struct cpu_user_regs *regs);
+
+            store_cpu_user_regs(&regs);
+            __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
+
+            domain_pause_for_debugger();
+            do_sched_op(SCHEDOP_yield, 0);
+
+            break;
+        }
+#endif
+        case TRAP_no_device:
+        {
+            vmx_do_no_device_fault();
+            break;
+        }
+        case TRAP_page_fault:
+        {
+            __vmread(EXIT_QUALIFICATION, &va);
+            __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
+
+            TRACE_VMEXIT(3,regs.error_code);
+            TRACE_VMEXIT(4,va);
+
+            HVM_DBG_LOG(DBG_LEVEL_VMMU,
+                        "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
+                        (unsigned long)regs.eax, (unsigned long)regs.ebx,
+                        (unsigned long)regs.ecx, (unsigned long)regs.edx,
+                        (unsigned long)regs.esi, (unsigned long)regs.edi);
+            v->arch.hvm_vcpu.mmio_op.inst_decoder_regs = &regs;
+
+            if (!(error = vmx_do_page_fault(va, &regs))) {
+                /*
+                 * Inject #PG using Interruption-Information Fields
+                 */
+                vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
+                v->arch.hvm_vmx.cpu_cr2 = va;
+                TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, 
va);
+            }
+            break;
+        }
+        case TRAP_nmi:
+            do_nmi(&regs);
+            break;
+        default:
+            vmx_reflect_exception(v);
+            break;
+        }
+        break;
+    }
+    case EXIT_REASON_EXTERNAL_INTERRUPT:
+        vmx_vmexit_do_extint(&regs);
+        break;
+    case EXIT_REASON_PENDING_INTERRUPT:
+        __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+                  MONITOR_CPU_BASED_EXEC_CONTROLS);
+        break;
+    case EXIT_REASON_TASK_SWITCH:
+        __hvm_bug(&regs);
+        break;
+    case EXIT_REASON_CPUID:
+        __get_instruction_length(inst_len);
+        vmx_vmexit_do_cpuid(regs.eax, &regs);
+        __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_HLT:
+        __get_instruction_length(inst_len);
+        __update_guest_eip(inst_len);
+        vmx_vmexit_do_hlt();
+        break;
+    case EXIT_REASON_INVLPG:
+    {
+        unsigned long   va;
+
+        __vmread(EXIT_QUALIFICATION, &va);
+        vmx_vmexit_do_invlpg(va);
+        __get_instruction_length(inst_len);
+        __update_guest_eip(inst_len);
+        break;
+    }
+    case EXIT_REASON_VMCALL:
+        __get_instruction_length(inst_len);
+        __vmread(GUEST_RIP, &eip);
+        __vmread(EXIT_QUALIFICATION, &exit_qualification);
+
+        hvm_print_line(v, regs.eax); /* provides the current domain */
+        __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_CR_ACCESS:
+    {
+        __vmread(GUEST_RIP, &eip);
+        __get_instruction_length(inst_len);
+        __vmread(EXIT_QUALIFICATION, &exit_qualification);
+
+        HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification 
= %lx",
+                    eip, inst_len, exit_qualification);
+        if (vmx_cr_access(exit_qualification, &regs))
+            __update_guest_eip(inst_len);
+        TRACE_VMEXIT(3,regs.error_code);
+        TRACE_VMEXIT(4,exit_qualification);
+        break;
+    }
+    case EXIT_REASON_DR_ACCESS:
+        __vmread(EXIT_QUALIFICATION, &exit_qualification);
+        vmx_dr_access(exit_qualification, &regs);
+        __get_instruction_length(inst_len);
+        __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_IO_INSTRUCTION:
+        __vmread(EXIT_QUALIFICATION, &exit_qualification);
+        __get_instruction_length(inst_len);
+        vmx_io_instruction(&regs, exit_qualification, inst_len);
+        TRACE_VMEXIT(4,exit_qualification);
+        break;
+    case EXIT_REASON_MSR_READ:
+        __get_instruction_length(inst_len);
+        vmx_do_msr_read(&regs);
+        __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_MSR_WRITE:
+        __vmread(GUEST_RIP, &eip);
+        vmx_do_msr_write(&regs);
+        __get_instruction_length(inst_len);
+        __update_guest_eip(inst_len);
+        break;
+    case EXIT_REASON_MWAIT_INSTRUCTION:
+        __hvm_bug(&regs);
+        break;
+    default:
+        __hvm_bug(&regs);       /* should not happen */
+    }
+}
+
+asmlinkage void vmx_load_cr2(void)
+{
+    struct vcpu *v = current;
+
+    local_irq_disable();
+#ifdef __i386__
+    asm volatile("movl %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
+#else
+    asm volatile("movq %0,%%cr2": :"r" (v->arch.hvm_vmx.cpu_cr2));
+#endif
+}
+
+asmlinkage void vmx_trace_vmentry (void)
+{
+    TRACE_5D(TRC_VMENTRY,
+             trace_values[smp_processor_id()][0],
+             trace_values[smp_processor_id()][1],
+             trace_values[smp_processor_id()][2],
+             trace_values[smp_processor_id()][3],
+             trace_values[smp_processor_id()][4]);
+    TRACE_VMEXIT(0,9);
+    TRACE_VMEXIT(1,9);
+    TRACE_VMEXIT(2,9);
+    TRACE_VMEXIT(3,9);
+    TRACE_VMEXIT(4,9);
+    return;
+}
+
+asmlinkage void vmx_trace_vmexit (void)
+{
+    TRACE_3D(TRC_VMEXIT,0,0,0);
+    return;
+}
+#endif /* CONFIG_VMX */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/vmx/x86_32/exits.S
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,153 @@
+/*
+ * exits.S: VMX architecture-specific exit handling.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define GET_CURRENT(reg)         \
+        movl $STACK_SIZE-4, reg; \
+        orl  %esp, reg;          \
+        andl $~3,reg;            \
+        movl (reg),reg;
+
+/*
+ * At VMExit time the processor saves the guest selectors, esp, eip, 
+ * and eflags. Therefore we don't save them, but simply decrement 
+ * the kernel stack pointer to make it consistent with the stack frame 
+ * at usual interruption time. The eflags of the host is not saved by VMX, 
+ * and we set it to the fixed value.
+ *
+ * We also need the room, especially because orig_eax field is used 
+ * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
+ *   (10) u32 gs;                 
+ *   (9)  u32 fs;
+ *   (8)  u32 ds;
+ *   (7)  u32 es;
+ *               <- get_stack_bottom() (= HOST_ESP)
+ *   (6)  u32 ss;
+ *   (5)  u32 esp;
+ *   (4)  u32 eflags;
+ *   (3)  u32 cs;
+ *   (2)  u32 eip;
+ * (2/1)  u16 entry_vector;
+ * (1/1)  u16 error_code;
+ * However, get_stack_bottom() actually returns 20 bytes before the real
+ * bottom of the stack to allow space for:
+ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers.
+ */
+
+#define HVM_MONITOR_EFLAGS     0x202 /* IF on */
+#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define HVM_SAVE_ALL_NOSEGREGS \
+        pushl $HVM_MONITOR_EFLAGS; \
+        popf; \
+        subl $(NR_SKIPPED_REGS*4), %esp; \
+        movl $0, 0xc(%esp); /* eflags==0 identifies cpu_user_regs as HVM guest 
*/ \
+        pushl %eax; \
+        pushl %ebp; \
+        pushl %edi; \
+        pushl %esi; \
+        pushl %edx; \
+        pushl %ecx; \
+        pushl %ebx;
+
+#define HVM_RESTORE_ALL_NOSEGREGS   \
+        popl %ebx;  \
+        popl %ecx;  \
+        popl %edx;  \
+        popl %esi;  \
+        popl %edi;  \
+        popl %ebp;  \
+        popl %eax;  \
+        addl $(NR_SKIPPED_REGS*4), %esp
+
+        ALIGN
+
+#ifdef CONFIG_VMX
+
+ENTRY(vmx_asm_vmexit_handler)
+        /* selectors are restored/saved by VMX */
+        HVM_SAVE_ALL_NOSEGREGS
+        call vmx_trace_vmexit
+        call vmx_vmexit_handler
+        jmp vmx_asm_do_resume
+
+.macro vmx_asm_common launch initialized
+1:
+/* vmx_test_all_events */
+        .if \initialized
+        GET_CURRENT(%ebx)
+/*test_all_events:*/
+        xorl %ecx,%ecx
+        notl %ecx
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/  
+        movl VCPU_processor(%ebx),%eax
+        shl  $IRQSTAT_shift,%eax
+        test %ecx,irq_stat(%eax,1)
+        jnz 2f
+
+/* vmx_restore_all_guest */
+        call vmx_intr_assist
+        call vmx_load_cr2
+        call vmx_trace_vmentry
+        .endif
+        HVM_RESTORE_ALL_NOSEGREGS
+        /* 
+         * Check if we are going back to VMX-based VM
+         * By this time, all the setups in the VMCS must be complete.
+         */
+        .if \launch
+        /* VMLAUNCH */
+        .byte 0x0f,0x01,0xc2
+        pushf
+        call vm_launch_fail
+        .else
+        /* VMRESUME */
+        .byte 0x0f,0x01,0xc3
+        pushf
+        call vm_resume_fail
+        .endif
+        /* Should never reach here */
+        hlt
+
+        ALIGN
+        .if \initialized
+2:
+/* vmx_process_softirqs */
+        sti       
+        call do_softirq
+        jmp 1b
+        ALIGN
+        .endif
+.endm
+
+ENTRY(vmx_asm_do_launch)
+    vmx_asm_common 1 0
+
+ENTRY(vmx_asm_do_resume)
+    vmx_asm_common 0 1
+
+ENTRY(vmx_asm_do_relaunch)
+    vmx_asm_common 1 1
+
+#endif /* CONFIG_VMX */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/hvm/vmx/x86_64/exits.S
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S       Tue Jan 31 10:49:51 2006
@@ -0,0 +1,160 @@
+/*
+ * exits.S: VMX architecture-specific exit handling.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/softirq.h>
+#include <asm/asm_defns.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <public/xen.h>
+
+#define GET_CURRENT(reg)         \
+        movq $STACK_SIZE-8, reg; \
+        orq  %rsp, reg;          \
+        andq $~7,reg;            \
+        movq (reg),reg;
+
+/*
+ * At VMExit time the processor saves the guest selectors, rsp, rip, 
+ * and rflags. Therefore we don't save them, but simply decrement 
+ * the kernel stack pointer to make it consistent with the stack frame 
+ * at usual interruption time. The rflags of the host is not saved by VMX, 
+ * and we set it to the fixed value.
+ *
+ * We also need the room, especially because orig_eax field is used 
+ * by do_IRQ(). Compared the cpu_user_regs, we skip pushing for the following:
+ *   (10) u64 gs;                 
+ *   (9)  u64 fs;
+ *   (8)  u64 ds;
+ *   (7)  u64 es;
+ *               <- get_stack_bottom() (= HOST_ESP)
+ *   (6)  u64 ss;
+ *   (5)  u64 rsp;
+ *   (4)  u64 rflags;
+ *   (3)  u64 cs;
+ *   (2)  u64 rip;
+ * (2/1)  u32 entry_vector;
+ * (1/1)  u32 error_code;
+ */
+#define HVM_MONITOR_RFLAGS     0x202 /* IF on */
+#define NR_SKIPPED_REGS        6       /* See the above explanation */
+#define HVM_SAVE_ALL_NOSEGREGS \
+        pushq $HVM_MONITOR_RFLAGS; \
+        popfq; \
+        subq $(NR_SKIPPED_REGS*8), %rsp; \
+        pushq %rdi; \
+        pushq %rsi; \
+        pushq %rdx; \
+        pushq %rcx; \
+        pushq %rax; \
+        pushq %r8;  \
+        pushq %r9;  \
+        pushq %r10; \
+        pushq %r11; \
+        pushq %rbx; \
+        pushq %rbp; \
+        pushq %r12; \
+        pushq %r13; \
+        pushq %r14; \
+        pushq %r15; \
+
+#define HVM_RESTORE_ALL_NOSEGREGS \
+        popq %r15; \
+        popq %r14; \
+        popq %r13; \
+        popq %r12; \
+        popq %rbp; \
+        popq %rbx; \
+        popq %r11; \
+        popq %r10; \
+        popq %r9;  \
+        popq %r8;  \
+        popq %rax; \
+        popq %rcx; \
+        popq %rdx; \
+        popq %rsi; \
+        popq %rdi; \
+        addq $(NR_SKIPPED_REGS*8), %rsp; \
+
+#ifdef CONFIG_VMX
+ENTRY(vmx_asm_vmexit_handler)
+        /* selectors are restored/saved by VMX */
+        HVM_SAVE_ALL_NOSEGREGS
+        call vmx_vmexit_handler
+        jmp vmx_asm_do_resume
+
+.macro vmx_asm_common launch initialized 
+1:
+        .if \initialized
+/* vmx_test_all_events */
+        GET_CURRENT(%rbx)
+/* test_all_events: */
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/  
+        movl  VCPU_processor(%rbx),%eax
+        shl   $IRQSTAT_shift,%rax
+        leaq  irq_stat(%rip), %rdx
+        testl $~0,(%rdx,%rax,1)
+        jnz  2f 
+
+/* vmx_restore_all_guest */
+        call vmx_intr_assist
+        call vmx_load_cr2
+        .endif
+        /* 
+         * Check if we are going back to VMX-based VM
+         * By this time, all the setups in the VMCS must be complete.
+         */
+        HVM_RESTORE_ALL_NOSEGREGS
+        .if \launch
+        /* VMLAUNCH */
+        .byte 0x0f,0x01,0xc2
+        pushfq
+        call vm_launch_fail
+        .else
+        /* VMRESUME */
+        .byte 0x0f,0x01,0xc3
+        pushfq
+        call vm_resume_fail
+        .endif
+        /* Should never reach here */
+        hlt
+
+        ALIGN
+
+        .if \initialized
+2:
+/* vmx_process_softirqs */
+        sti       
+        call do_softirq
+        jmp 1b
+        ALIGN
+        .endif
+.endm
+
+ENTRY(vmx_asm_do_launch)
+      vmx_asm_common 1 0
+
+ENTRY(vmx_asm_do_resume)
+      vmx_asm_common 0 1
+
+ENTRY(vmx_asm_do_relaunch)
+      vmx_asm_common 1 1
+
+#endif /* CONFIG_VMX */
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/domain.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/domain.h  Tue Jan 31 10:49:51 2006
@@ -0,0 +1,52 @@
+/*
+ * domain.h: HVM per domain definitions
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ * Copyright (c) 2005, International Business Machines Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_DOMAIN_H__
+#define __ASM_X86_HVM_DOMAIN_H__
+
+#include <asm/e820.h>
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/vpit.h>
+#include <asm/hvm/vlapic.h>
+#include <asm/hvm/vioapic.h>
+
+#define HVM_PBUF_SIZE   80
+
+struct hvm_domain {
+    unsigned long          shared_page_va;
+    unsigned int           nr_vcpus;
+    unsigned int           apic_enabled;
+
+    struct hvm_virpit      vpit;
+    struct hvm_virpic      vpic;
+    struct hvm_vioapic     vioapic;
+    struct hvm_io_handler  io_handler;
+
+    unsigned char          round_info[256];
+    spinlock_t             round_robin_lock;
+    int                    interrupt_request;
+
+    int                    pbuf_index;
+    char                   pbuf[HVM_PBUF_SIZE];
+};
+
+#endif /* __ASM_X86_HVM_DOMAIN_H__ */
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/hvm.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/hvm.h     Tue Jan 31 10:49:51 2006
@@ -0,0 +1,166 @@
+/*
+ * hvm.h: Hardware virtual machine assist interface definitions.
+ *
+ * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef __ASM_X86_HVM_HVM_H__
+#define __ASM_X86_HVM_HVM_H__
+
+/*
+ * The hardware virtual machine (HVM) interface abstracts away from the
+ * x86/x86_64 CPU virtualization assist specifics. Currently this interface
+ * supports Intel's VT-x and AMD's SVM extensions.
+ */
+
+struct hvm_function_table {
+    /*
+     *  Disable HVM functionality
+     */
+    void (*disable)(void);
+
+    /*
+     * Initialize/relinguish HVM guest resources
+     */
+    int (*initialize_guest_resources)(struct vcpu *v);
+    int (*relinquish_guest_resources)(struct vcpu *v);
+
+    /*
+     * Store and load guest state:
+     * 1) load/store guest register state,
+     * 2) load/store segment state (x86_64 only),
+     * 3) load/store msr register state (x86_64 only),
+     * 4) store guest control register state (used for panic dumps),
+     * 5) modify guest state (e.g., set debug flags).
+     */
+    void (*store_cpu_guest_regs)(struct vcpu *v, struct cpu_user_regs *r);
+    void (*load_cpu_guest_regs)(struct vcpu *v, struct cpu_user_regs *r);
+#ifdef __x86_64__
+    void (*save_segments)(struct vcpu *v);
+    void (*load_msrs)(struct vcpu *v);
+    void (*restore_msrs)(struct vcpu *v);
+#endif
+    void (*store_cpu_guest_ctrl_regs)(struct vcpu *v, unsigned long crs[8]);
+    void (*modify_guest_state)(struct vcpu *v);
+
+    /*
+     * Examine specifics of the guest state:
+     * 1) determine whether the guest is in real or vm8086 mode,
+     * 2) determine whether paging is enabled,
+     * 3) return the length of the instruction that caused an exit.
+     */
+    int (*realmode)(struct vcpu *v);
+    int (*paging_enabled)(struct vcpu *v);
+    int (*instruction_length)(struct vcpu *v);
+};
+
+extern struct hvm_function_table hvm_funcs;
+
+/*
+ * For convenience, we use short hands.
+ */
+static inline void
+hvm_disable(void)
+{
+    if (hvm_funcs.disable)
+       hvm_funcs.disable();
+}
+
+static inline int
+hvm_initialize_guest_resources(struct vcpu *v)
+{
+    if (hvm_funcs.initialize_guest_resources)
+       return hvm_funcs.initialize_guest_resources(v);
+    return 0;
+}
+
+static inline int
+hvm_relinquish_guest_resources(struct vcpu *v)
+{
+    if (hvm_funcs.relinquish_guest_resources)
+       return hvm_funcs.relinquish_guest_resources(v);
+    return 0;
+}
+
+static inline void
+hvm_store_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *r)
+{
+    hvm_funcs.store_cpu_guest_regs(v, r);
+}
+
+static inline void
+hvm_load_cpu_guest_regs(struct vcpu *v, struct cpu_user_regs *r)
+{
+    hvm_funcs.load_cpu_guest_regs(v, r);
+}
+
+#ifdef __x86_64__
+static inline void
+hvm_save_segments(struct vcpu *v)
+{
+    if (hvm_funcs.save_segments)
+        hvm_funcs.save_segments(v);
+}
+
+static inline void
+hvm_load_msrs(struct vcpu *v)
+{
+    if (hvm_funcs.load_msrs)
+        hvm_funcs.load_msrs(v);
+}
+
+static inline void
+hvm_restore_msrs(struct vcpu *v)
+{
+    if (hvm_funcs.restore_msrs)
+        hvm_funcs.restore_msrs(v);
+}
+#else
+#define        hvm_save_segments(v)    ((void)0)
+#define        hvm_load_msrs(v)        ((void)0)
+#define        hvm_restore_msrs(v)     ((void)0)
+#endif /* __x86_64__ */
+
+static inline void
+hvm_store_cpu_guest_ctrl_regs(struct vcpu *v, unsigned long crs[8])
+{
+    hvm_funcs.store_cpu_guest_ctrl_regs(v, crs);
+}
+
+static inline void
+hvm_modify_guest_state(struct vcpu *v)
+{
+    hvm_funcs.modify_guest_state(v);
+}
+
+static inline int
+hvm_realmode(struct vcpu *v)
+{
+    return hvm_funcs.realmode(v);
+}
+
+static inline int
+hvm_paging_enabled(struct vcpu *v)
+{
+    return hvm_funcs.paging_enabled(v);
+}
+
+static inline int
+hvm_instruction_length(struct vcpu *v)
+{
+    return hvm_funcs.instruction_length(v);
+}
+#endif /* __ASM_X86_HVM_HVM_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/io.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/io.h      Tue Jan 31 10:49:51 2006
@@ -0,0 +1,160 @@
+/*
+ * io.h: HVM IO support
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ASM_X86_HVM_IO_H__
+#define __ASM_X86_HVM_IO_H__
+
+#include <asm/hvm/vpic.h>
+#include <asm/hvm/vioapic.h>
+#include <public/hvm/ioreq.h>
+
+#define MAX_OPERAND_NUM 2
+
+#define mk_operand(size_reg, index, seg, flag) \
+    (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
+
+#define operand_size(operand)   \
+    ((operand >> 24) & 0xFF)
+
+#define operand_index(operand)  \
+    ((operand >> 16) & 0xFF)
+
+/* for instruction.operand[].size */
+#define BYTE       1
+#define WORD       2
+#define LONG       4
+#define QUAD       8
+#define BYTE_64    16
+
+/* for instruction.operand[].flag */
+#define REGISTER   0x1
+#define MEMORY     0x2
+#define IMMEDIATE  0x4
+
+/* for instruction.flags */
+#define REPZ       0x1
+#define REPNZ      0x2
+#define OVERLAP    0x4
+
+/* instruction type */
+#define INSTR_PIO   1
+#define INSTR_OR    2
+#define INSTR_AND   3
+#define INSTR_XOR   4
+#define INSTR_CMP   5
+#define INSTR_MOV   6
+#define INSTR_MOVS  7
+#define INSTR_MOVZX 8
+#define INSTR_MOVSX 9
+#define INSTR_STOS  10
+#define INSTR_TEST  11
+#define INSTR_BT    12
+
+struct instruction {
+    __s8    instr;        /* instruction type */
+    __s16   op_size;      /* the operand's bit size, e.g. 16-bit or 32-bit */
+    __u64   immediate;
+    __u16   seg_sel;      /* segmentation selector */
+    __u32   operand[MAX_OPERAND_NUM];   /* order is AT&T assembly */
+    __u32   flags;
+};
+
+#define MAX_INST_LEN      32
+
+struct mmio_op {
+    int                    flags;
+    int                    instr;       /* instruction */
+    unsigned long          operand[2];  /* operands */
+    unsigned long          immediate;   /* immediate portion */
+    struct cpu_user_regs   *inst_decoder_regs; /* current context */
+};
+
+#define MAX_IO_HANDLER              8
+
+#define VMX_PORTIO                  0
+#define VMX_MMIO                    1
+
+typedef int (*intercept_action_t)(ioreq_t *);
+typedef unsigned long (*hvm_mmio_read_t)(struct vcpu *v,
+                                         unsigned long addr,
+                                         unsigned long length);
+
+typedef void (*hvm_mmio_write_t)(struct vcpu *v,
+                               unsigned long addr,
+                               unsigned long length,
+                               unsigned long val);
+
+typedef int (*hvm_mmio_check_t)(struct vcpu *v, unsigned long addr);
+
+struct io_handler {
+    int                 type;
+    unsigned long       addr;
+    unsigned long       size;
+    intercept_action_t  action;
+};
+
+struct hvm_io_handler {
+    int     num_slot;
+    struct  io_handler hdl_list[MAX_IO_HANDLER];
+};
+
+struct hvm_mmio_handler {
+    hvm_mmio_check_t check_handler;
+    hvm_mmio_read_t read_handler;
+    hvm_mmio_write_t write_handler;
+};
+
+/* global io interception point in HV */
+extern int hvm_io_intercept(ioreq_t *p, int type);
+extern int register_io_handler(unsigned long addr, unsigned long size,
+                               intercept_action_t action, int type);
+
+static inline int hvm_portio_intercept(ioreq_t *p)
+{
+    return hvm_io_intercept(p, VMX_PORTIO);
+}
+
+int hvm_mmio_intercept(ioreq_t *p);
+
+static inline int register_portio_handler(unsigned long addr,
+                                          unsigned long size,
+                                          intercept_action_t action)
+{
+    return register_io_handler(addr, size, action, VMX_PORTIO);
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+static inline int irq_masked(unsigned long eflags)
+{
+    return ((eflags & X86_EFLAGS_IF) == 0);
+}
+#endif
+
+extern void handle_mmio(unsigned long, unsigned long);
+extern void hvm_wait_io(void);
+extern void hvm_io_assist(struct vcpu *v);
+extern void pic_irq_request(int *interrupt_request, int level);
+extern void hvm_pic_assist(struct vcpu *v);
+extern int cpu_get_interrupt(struct vcpu *v, int *type);
+
+// XXX - think about this, maybe use bit 30 of the mfn to signify an MMIO 
frame.
+#define mmio_space(gpa) (!VALID_MFN(get_mfn_from_pfn((gpa) >> PAGE_SHIFT)))
+
+#endif /* __ASM_X86_HVM_IO_H__ */
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/support.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/support.h Tue Jan 31 10:49:51 2006
@@ -0,0 +1,149 @@
+/*
+ * support.h: HVM support routines used by VT-x and SVM.
+ *
+ * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ASM_X86_HVM_SUPPORT_H__
+#define __ASM_X86_HVM_SUPPORT_H__
+
+#include <xen/sched.h>
+#include <asm/types.h>
+#include <asm/regs.h>
+#include <asm/processor.h>
+
+#define HVM_DEBUG 1
+
+#define        HVM_DOMAIN(v)   ((v)->arch.guest_context.flags & VGCF_HVM_GUEST)
+
+static inline shared_iopage_t *get_sp(struct domain *d)
+{
+    return (shared_iopage_t *) d->arch.hvm_domain.shared_page_va;
+}
+
+static inline vcpu_iodata_t *get_vio(struct domain *d, unsigned long cpu)
+{
+    return &get_sp(d)->vcpu_iodata[cpu];
+}
+
+static inline int iopacket_port(struct domain *d)
+{
+    return get_sp(d)->sp_global.eport;
+}
+
+/* XXX these are really VMX specific */
+#define TYPE_MOV_TO_DR          (0 << 4)
+#define TYPE_MOV_FROM_DR        (1 << 4)
+#define TYPE_MOV_TO_CR          (0 << 4)
+#define TYPE_MOV_FROM_CR        (1 << 4)
+#define TYPE_CLTS               (2 << 4)
+#define TYPE_LMSW               (3 << 4)
+
+enum hval_bitmaps {
+    EXCEPTION_BITMAP_TABLE=0,
+};
+ 
+#define EXCEPTION_BITMAP_DE     (1 << 0)        /* Divide Error */
+#define EXCEPTION_BITMAP_DB     (1 << 1)        /* Debug */
+#define EXCEPTION_BITMAP_NMI    (1 << 2)        /* NMI */
+#define EXCEPTION_BITMAP_BP     (1 << 3)        /* Breakpoint */
+#define EXCEPTION_BITMAP_OF     (1 << 4)        /* Overflow */
+#define EXCEPTION_BITMAP_BR     (1 << 5)        /* BOUND Range Exceeded */
+#define EXCEPTION_BITMAP_UD     (1 << 6)        /* Invalid Opcode */
+#define EXCEPTION_BITMAP_NM     (1 << 7)        /* Device Not Available */
+#define EXCEPTION_BITMAP_DF     (1 << 8)        /* Double Fault */
+/* reserved */
+#define EXCEPTION_BITMAP_TS     (1 << 10)       /* Invalid TSS */
+#define EXCEPTION_BITMAP_NP     (1 << 11)       /* Segment Not Present */
+#define EXCEPTION_BITMAP_SS     (1 << 12)       /* Stack-Segment Fault */
+#define EXCEPTION_BITMAP_GP     (1 << 13)       /* General Protection */
+#define EXCEPTION_BITMAP_PG     (1 << 14)       /* Page Fault */
+#define EXCEPTION_BITMAP_MF     (1 << 16)       /* x87 FPU Floating-Point 
Error (Math Fault)  */
+#define EXCEPTION_BITMAP_AC     (1 << 17)       /* Alignment Check */
+#define EXCEPTION_BITMAP_MC     (1 << 18)       /* Machine Check */
+#define EXCEPTION_BITMAP_XF     (1 << 19)       /* SIMD Floating-Point 
Exception */
+
+/* Pending Debug exceptions */
+#define PENDING_DEBUG_EXC_BP    (1 << 12)       /* break point */
+#define PENDING_DEBUG_EXC_BS    (1 << 14)       /* Single step */
+
+#ifdef XEN_DEBUGGER
+#define MONITOR_DEFAULT_EXCEPTION_BITMAP        \
+    ( EXCEPTION_BITMAP_PG |                     \
+      EXCEPTION_BITMAP_DB |                     \
+      EXCEPTION_BITMAP_BP |                     \
+      EXCEPTION_BITMAP_GP )
+#else
+#define MONITOR_DEFAULT_EXCEPTION_BITMAP        \
+    ( EXCEPTION_BITMAP_PG |                     \
+      EXCEPTION_BITMAP_GP )
+#endif
+
+#define PC_DEBUG_PORT   0x80
+
+#define VMX_INVALID_ERROR_CODE  -1
+
+/*
+ * This works for both 32bit & 64bit eflags filteration
+ * done in construct_init_vmc[sb]_guest()
+ */
+#define HVM_EFLAGS_RESERVED_0          0xffc08028 /* bitmap for 0 */
+#define HVM_EFLAGS_RESERVED_1          0x00000002 /* bitmap for 1 */
+
+#if HVM_DEBUG
+#define DBG_LEVEL_0                 (1 << 0)
+#define DBG_LEVEL_1                 (1 << 1)
+#define DBG_LEVEL_2                 (1 << 2)
+#define DBG_LEVEL_3                 (1 << 3)
+#define DBG_LEVEL_IO                (1 << 4)
+#define DBG_LEVEL_VMMU              (1 << 5)
+#define DBG_LEVEL_VLAPIC            (1 << 6)
+#define DBG_LEVEL_VLAPIC_TIMER      (1 << 7)
+#define DBG_LEVEL_VLAPIC_INTERRUPT  (1 << 7)
+#define DBG_LEVEL_IOAPIC            (1 << 9)
+
+extern unsigned int opt_hvm_debug_level;
+#define HVM_DBG_LOG(level, _f, _a...)           \
+    if ((level) & opt_hvm_debug_level)          \
+        printk("[HVM:%d.%d] " _f "\n",          \
+               current->domain->domain_id, current->vcpu_id, ## _a)
+#else
+#define HVM_DBG_LOG(level, _f, _a...)
+#endif
+
+#define  __hvm_bug(regs)                                        \
+    do {                                                        \
+        printk("__hvm_bug at %s:%d\n", __FILE__, __LINE__);     \
+        show_registers(regs);                                   \
+        domain_crash_synchronous();                             \
+    } while (0)
+
+extern int hvm_enabled;
+extern int hvm_switch_on;
+
+enum { HVM_COPY_IN = 0, HVM_COPY_OUT };
+extern int hvm_copy(void *buf, unsigned long vaddr, int size, int dir);
+
+extern void hvm_setup_platform(struct domain* d);
+extern int hvm_mmio_intercept(ioreq_t *p);
+extern int hvm_io_intercept(ioreq_t *p, int type);
+extern void hvm_check_events(struct vcpu *v);
+extern void hvm_hooks_assist(struct vcpu *v);
+extern void hvm_print_line(struct vcpu *v, const char c);
+extern void hlt_timer_fn(void *data);
+
+#endif /* __ASM_X86_HVM_SUPPORT_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/svm/emulate.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/svm/emulate.h     Tue Jan 31 10:49:51 2006
@@ -0,0 +1,161 @@
+/*
+ * emulate.h: SVM instruction emulation bits.
+ * Copyright (c) 2005, AMD Corporation.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_EMULATE_H__
+#define __ASM_X86_HVM_SVM_EMULATE_H__
+
+#ifdef CONFIG_SVM
+
+typedef enum OPERATING_MODE_ {
+    INVALID_OPERATING_MODE = -1,
+    LEGACY_MODE,
+    LEGACY_16BIT,
+    LONG_MODE,
+    COMP_MODE,
+    COMP_16BIT,
+    OPMODE_16BIT,
+
+    LEGACY_32BIT,
+    COMP_32BIT,
+    OPMODE_32BIT,
+
+    LONG_64BIT,
+    UNKNOWN_OP_MODE,
+    NUM_OPERATING_MODES
+} OPERATING_MODE;
+
+
+/* Enumerate some standard instructions that we support */
+enum instruction_index {
+    INSTR_INVD,
+    INSTR_CPUID,
+    INSTR_RDMSR,
+    INSTR_WRMSR,
+    INSTR_RDTSC,
+    INSTR_RDTSCP,
+    INSTR_CLI,
+    INSTR_STI,
+    INSTR_RDPMC,
+    INSTR_CLGI,
+    INSTR_STGI,
+    INSTR_VMRUN,
+    INSTR_VMLOAD,
+    INSTR_VMSAVE,
+    INSTR_VMCALL,
+    INSTR_PAUSE,
+    INSTR_SKINIT,
+    INSTR_MOV2CR, /* Mov register to CR */
+    INSTR_MOVCR2, /* Not MOV CR2, but MOV CRn to register  */
+    INSTR_MOV2DR,
+    INSTR_MOVDR2,
+    INSTR_PUSHF,
+    INSTR_POPF,
+    INSTR_RSM,
+    INSTR_INVLPG,
+    INSTR_INVLPGA,
+    INSTR_HLT,
+    INSTR_CLTS,
+    INSTR_LMSW,
+    INSTR_SMSW,
+    INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
+};
+
+
+extern unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 
+        struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, 
+        u8 *size);
+extern unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, 
+        struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, 
+        u8 *size);
+extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb);
+extern unsigned int decode_dest_reg(u8 modrm);
+extern unsigned int decode_src_reg(u8 modrm);
+extern unsigned long svm_rip2pointer(struct vmcb_struct *vmcb);
+extern unsigned int __get_instruction_length_from_list(struct vmcb_struct 
*vmcb,
+        enum instruction_index *list, unsigned int list_count, 
+        u8 *guest_eip_buf, enum instruction_index *match);
+
+
+static inline unsigned int __get_instruction_length(struct vmcb_struct *vmcb, 
+        enum instruction_index instr, u8 *guest_eip_buf)
+{
+    return __get_instruction_length_from_list(vmcb, &instr, 1, guest_eip_buf, 
+            NULL);
+}
+
+
+static inline unsigned int is_prefix(u8 opc)
+{
+    switch(opc)
+    {
+    case 0x66:
+    case 0x67:
+    case 0x2E:
+    case 0x3E:
+    case 0x26:
+    case 0x64:
+    case 0x65:
+    case 0x36:
+    case 0xF0:
+    case 0xF3:
+    case 0xF2:
+#if __x86_64__
+    case 0x40:
+    case 0x41:
+    case 0x42:
+    case 0x43:
+    case 0x44:
+    case 0x45:
+    case 0x46:
+    case 0x47:
+    case 0x48:
+    case 0x49:
+    case 0x4a:
+    case 0x4b:
+    case 0x4c:
+    case 0x4d:
+    case 0x4e:
+    case 0x4f:
+#endif /* __x86_64__ */
+        return 1;
+    }
+    return 0;
+}
+
+
+static void inline __update_guest_eip(struct vmcb_struct *vmcb, 
+        unsigned long inst_len) 
+{
+    vmcb->rip += inst_len;
+}
+
+#endif /* CONFIG_SVM */
+
+#endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/svm/intr.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/svm/intr.h        Tue Jan 31 10:49:51 2006
@@ -0,0 +1,34 @@
+/*
+ * intr.h: SVM Architecture related definitions
+ * Copyright (c) 2005, AMD Corporation.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_INTR_H__
+#define __ASM_X86_HVM_SVM_INTR_H__
+
+#ifdef CONFIG_SVM
+
+extern void svm_set_tsc_shift(struct vcpu *v, struct hvm_virpit *vpit);
+extern void svm_intr_assist(void);
+extern void svm_intr_assist_update(struct vcpu *v, int highest_vector);
+extern void svm_intr_assist_test_valid(struct vcpu *v, 
+        unsigned long *intr_result);
+
+#endif /* CONFIG_SVM */
+
+#endif /* __ASM_X86_HVM_SVM_INTR_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/svm/svm.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/svm/svm.h Tue Jan 31 10:49:51 2006
@@ -0,0 +1,91 @@
+/*
+ * svm.h: SVM Architecture related definitions
+ * Copyright (c) 2005, AMD Corporation.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_H__
+#define __ASM_X86_HVM_SVM_H__
+
+#include <xen/sched.h>
+#include <asm/types.h>
+#include <asm/regs.h>
+#include <asm/processor.h>
+#include <asm/hvm/svm/vmcb.h>
+#include <asm/i387.h>
+
+#ifdef CONFIG_SVM
+extern void asidpool_retire( struct vmcb_struct *vmcb, int core );
+
+extern void svm_asm_vmexit_handler(struct cpu_user_regs);
+extern void svm_setup_function_table(struct vcpu *v);
+
+extern int vmcb_size;
+extern unsigned int cpu_rev;
+
+extern void svm_stop(void);
+extern void svm_save_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs);
+extern void svm_load_cpu_user_regs(struct vcpu *v, struct cpu_user_regs *regs);
+extern int svm_modify_vmcb(struct vcpu *v, struct cpu_user_regs *regs);
+extern void svm_vmread(struct vcpu *v, int index, unsigned long *value);
+extern void svm_vmwrite(struct vcpu *v, int index, unsigned long value);
+extern void svm_final_setup_guest(struct vcpu *v); 
+extern int svm_paging_enabled(struct vcpu *v); 
+extern void svm_relinquish_resources(struct vcpu *v);
+extern void svm_dump_vmcb(const char *from, struct vmcb_struct *vmcb);
+extern void svm_stts(struct vcpu *v); 
+extern void svm_do_launch(struct vcpu *v);
+extern void svm_do_resume(struct vcpu *v);
+extern void arch_svm_do_resume(struct vcpu *v);
+extern int load_vmcb(struct arch_svm_struct *arch_svm, u64 phys_hsa);
+/* For debugging. Remove when no longer needed. */
+extern void svm_dump_host_regs(const char *from);
+
+/* ASID API */
+enum {
+    ASID_AVAILABLE = 0,
+    ASID_INUSE,
+    ASID_RETIRED
+};
+#define   INITIAL_ASID      0
+#define   ASID_MAX          64
+ 
+struct asid_pool {
+    spinlock_t asid_lock;
+    u32 asid[ASID_MAX];
+};
+
+#define SVM_REG_EAX (0) 
+#define SVM_REG_ECX (1) 
+#define SVM_REG_EDX (2) 
+#define SVM_REG_EBX (3) 
+#define SVM_REG_ESP (4) 
+#define SVM_REG_EBP (5) 
+#define SVM_REG_ESI (6) 
+#define SVM_REG_EDI (7) 
+#define SVM_REG_R8  (8)
+#define SVM_REG_R9  (9)
+#define SVM_REG_R10 (10)
+#define SVM_REG_R11 (11)
+#define SVM_REG_R12 (12)
+#define SVM_REG_R13 (13)
+#define SVM_REG_R14 (14)
+#define SVM_REG_R15 (15)
+
+#endif /* CONFIG_SVM */
+
+#endif /* __ASM_X86_HVM_SVM_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/svm/vmcb.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h        Tue Jan 31 10:49:51 2006
@@ -0,0 +1,503 @@
+/*
+ * vmcb.h: VMCB related definitions
+ * Copyright (c) 2005, AMD Corporation.
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_HVM_SVM_VMCB_H__
+#define __ASM_X86_HVM_SVM_VMCB_H__
+
+#include <asm/config.h>
+#include <asm/hvm/hvm.h>
+
+#ifdef CONFIG_SVM
+
+extern int start_svm(void);
+
+/* general 1 intercepts */
+enum GenericIntercept1bits
+{
+    GENERAL1_INTERCEPT_INTR          = 1 << 0,
+    GENERAL1_INTERCEPT_NMI           = 1 << 1,
+    GENERAL1_INTERCEPT_SMI           = 1 << 2,
+    GENERAL1_INTERCEPT_INIT          = 1 << 3,
+    GENERAL1_INTERCEPT_VINTR         = 1 << 4,
+    GENERAL1_INTERCEPT_CR0_SEL_WRITE = 1 << 5, 
+    GENERAL1_INTERCEPT_IDTR_READ     = 1 << 6,
+    GENERAL1_INTERCEPT_GDTR_READ     = 1 << 7,
+    GENERAL1_INTERCEPT_LDTR_READ     = 1 << 8,
+    GENERAL1_INTERCEPT_TR_READ       = 1 << 9,
+    GENERAL1_INTERCEPT_IDTR_WRITE    = 1 << 10,
+    GENERAL1_INTERCEPT_GDTR_WRITE    = 1 << 11,
+    GENERAL1_INTERCEPT_LDTR_WRITE    = 1 << 12,
+    GENERAL1_INTERCEPT_TR_WRITE      = 1 << 13,
+    GENERAL1_INTERCEPT_RDTSC         = 1 << 14,
+    GENERAL1_INTERCEPT_RDPMC         = 1 << 15,
+    GENERAL1_INTERCEPT_PUSHF         = 1 << 16,
+    GENERAL1_INTERCEPT_POPF          = 1 << 17,
+    GENERAL1_INTERCEPT_CPUID         = 1 << 18,
+    GENERAL1_INTERCEPT_RSM           = 1 << 19,
+    GENERAL1_INTERCEPT_IRET          = 1 << 20,
+    GENERAL1_INTERCEPT_SWINT         = 1 << 21,
+    GENERAL1_INTERCEPT_INVD          = 1 << 22,
+    GENERAL1_INTERCEPT_PAUSE         = 1 << 23,
+    GENERAL1_INTERCEPT_HLT           = 1 << 24,
+    GENERAL1_INTERCEPT_INVLPG        = 1 << 25,
+    GENERAL1_INTERCEPT_INVLPGA       = 1 << 26,
+    GENERAL1_INTERCEPT_IOIO_PROT     = 1 << 27,
+    GENERAL1_INTERCEPT_MSR_PROT      = 1 << 28,
+    GENERAL1_INTERCEPT_TASK_SWITCH   = 1 << 29,
+    GENERAL1_INTERCEPT_FERR_FREEZE   = 1 << 30,
+    GENERAL1_INTERCEPT_SHUTDOWN_EVT  = 1 << 31
+};
+
+/* general 2 intercepts */
+enum GenericIntercept2bits
+{
+    GENERAL2_INTERCEPT_VMRUN   = 1 << 0,
+    GENERAL2_INTERCEPT_VMMCALL = 1 << 1,
+    GENERAL2_INTERCEPT_VMLOAD  = 1 << 2,
+    GENERAL2_INTERCEPT_VMSAVE  = 1 << 3,
+    GENERAL2_INTERCEPT_STGI    = 1 << 4,
+    GENERAL2_INTERCEPT_CLGI    = 1 << 5,
+    GENERAL2_INTERCEPT_SKINIT  = 1 << 6,
+    GENERAL2_INTERCEPT_RDTSCP  = 1 << 7,
+    GENERAL2_INTERCEPT_ICEBP   = 1 << 8
+};
+
+
+/* control register intercepts */
+enum CRInterceptBits
+{
+    CR_INTERCEPT_CR0_READ   = 1 << 0,
+    CR_INTERCEPT_CR1_READ   = 1 << 1,
+    CR_INTERCEPT_CR2_READ   = 1 << 2,
+    CR_INTERCEPT_CR3_READ   = 1 << 3,
+    CR_INTERCEPT_CR4_READ   = 1 << 4,
+    CR_INTERCEPT_CR5_READ   = 1 << 5,
+    CR_INTERCEPT_CR6_READ   = 1 << 6,
+    CR_INTERCEPT_CR7_READ   = 1 << 7,
+    CR_INTERCEPT_CR8_READ   = 1 << 8,
+    CR_INTERCEPT_CR9_READ   = 1 << 9,
+    CR_INTERCEPT_CR10_READ  = 1 << 10,
+    CR_INTERCEPT_CR11_READ  = 1 << 11,
+    CR_INTERCEPT_CR12_READ  = 1 << 12,
+    CR_INTERCEPT_CR13_READ  = 1 << 13,
+    CR_INTERCEPT_CR14_READ  = 1 << 14,
+    CR_INTERCEPT_CR15_READ  = 1 << 15,
+    CR_INTERCEPT_CR0_WRITE  = 1 << 16,
+    CR_INTERCEPT_CR1_WRITE  = 1 << 17,
+    CR_INTERCEPT_CR2_WRITE  = 1 << 18,
+    CR_INTERCEPT_CR3_WRITE  = 1 << 19,
+    CR_INTERCEPT_CR4_WRITE  = 1 << 20,
+    CR_INTERCEPT_CR5_WRITE  = 1 << 21,
+    CR_INTERCEPT_CR6_WRITE  = 1 << 22,
+    CR_INTERCEPT_CR7_WRITE  = 1 << 23,
+    CR_INTERCEPT_CR8_WRITE  = 1 << 24,
+    CR_INTERCEPT_CR9_WRITE  = 1 << 25,
+    CR_INTERCEPT_CR10_WRITE = 1 << 26,
+    CR_INTERCEPT_CR11_WRITE = 1 << 27,
+    CR_INTERCEPT_CR12_WRITE = 1 << 28,
+    CR_INTERCEPT_CR13_WRITE = 1 << 29,
+    CR_INTERCEPT_CR14_WRITE = 1 << 30,
+    CR_INTERCEPT_CR15_WRITE = 1 << 31,
+};
+
+enum VMEXIT_EXITCODE
+{
+    /* control register read exitcodes */
+    VMEXIT_CR0_READ    =   0,
+    VMEXIT_CR1_READ    =   1,
+    VMEXIT_CR2_READ    =   2,
+    VMEXIT_CR3_READ    =   3,
+    VMEXIT_CR4_READ    =   4,
+    VMEXIT_CR5_READ    =   5,
+    VMEXIT_CR6_READ    =   6,
+    VMEXIT_CR7_READ    =   7,
+    VMEXIT_CR8_READ    =   8,
+    VMEXIT_CR9_READ    =   9,
+    VMEXIT_CR10_READ   =  10,
+    VMEXIT_CR11_READ   =  11,
+    VMEXIT_CR12_READ   =  12,
+    VMEXIT_CR13_READ   =  13,
+    VMEXIT_CR14_READ   =  14,
+    VMEXIT_CR15_READ   =  15,
+
+    /* control register write exitcodes */
+    VMEXIT_CR0_WRITE   =  16,
+    VMEXIT_CR1_WRITE   =  17,
+    VMEXIT_CR2_WRITE   =  18,
+    VMEXIT_CR3_WRITE   =  19,
+    VMEXIT_CR4_WRITE   =  20,
+    VMEXIT_CR5_WRITE   =  21,
+    VMEXIT_CR6_WRITE   =  22,
+    VMEXIT_CR7_WRITE   =  23,
+    VMEXIT_CR8_WRITE   =  24,
+    VMEXIT_CR9_WRITE   =  25,
+    VMEXIT_CR10_WRITE  =  26,
+    VMEXIT_CR11_WRITE  =  27,
+    VMEXIT_CR12_WRITE  =  28,
+    VMEXIT_CR13_WRITE  =  29,
+    VMEXIT_CR14_WRITE  =  30,
+    VMEXIT_CR15_WRITE  =  31,
+
+    /* debug register read exitcodes */
+    VMEXIT_DR0_READ    =  32,
+    VMEXIT_DR1_READ    =  33,
+    VMEXIT_DR2_READ    =  34,
+    VMEXIT_DR3_READ    =  35,
+    VMEXIT_DR4_READ    =  36,
+    VMEXIT_DR5_READ    =  37,
+    VMEXIT_DR6_READ    =  38,
+    VMEXIT_DR7_READ    =  39,
+    VMEXIT_DR8_READ    =  40,
+    VMEXIT_DR9_READ    =  41,
+    VMEXIT_DR10_READ   =  42,
+    VMEXIT_DR11_READ   =  43,
+    VMEXIT_DR12_READ   =  44,
+    VMEXIT_DR13_READ   =  45,
+    VMEXIT_DR14_READ   =  46,
+    VMEXIT_DR15_READ   =  47,
+
+    /* debug register write exitcodes */
+    VMEXIT_DR0_WRITE   =  48,
+    VMEXIT_DR1_WRITE   =  49,
+    VMEXIT_DR2_WRITE   =  50,
+    VMEXIT_DR3_WRITE   =  51,
+    VMEXIT_DR4_WRITE   =  52,
+    VMEXIT_DR5_WRITE   =  53,
+    VMEXIT_DR6_WRITE   =  54,
+    VMEXIT_DR7_WRITE   =  55,
+    VMEXIT_DR8_WRITE   =  56,
+    VMEXIT_DR9_WRITE   =  57,
+    VMEXIT_DR10_WRITE  =  58,
+    VMEXIT_DR11_WRITE  =  59,
+    VMEXIT_DR12_WRITE  =  60,
+    VMEXIT_DR13_WRITE  =  61,
+    VMEXIT_DR14_WRITE  =  62,
+    VMEXIT_DR15_WRITE  =  63,
+
+    /* processor exception exitcodes (VMEXIT_EXCP[0-31]) */
+    VMEXIT_EXCEPTION_DE  =  64, /* divide-by-zero-error */
+    VMEXIT_EXCEPTION_DB  =  65, /* debug */
+    VMEXIT_EXCEPTION_NMI =  66, /* non-maskable-interrupt */
+    VMEXIT_EXCEPTION_BP  =  67, /* breakpoint */
+    VMEXIT_EXCEPTION_OF  =  68, /* overflow */
+    VMEXIT_EXCEPTION_BR  =  69, /* bound-range */
+    VMEXIT_EXCEPTION_UD  =  70, /* invalid-opcode*/
+    VMEXIT_EXCEPTION_NM  =  71, /* device-not-available */
+    VMEXIT_EXCEPTION_DF  =  72, /* double-fault */
+    VMEXIT_EXCEPTION_09  =  73, /* unsupported (reserved) */
+    VMEXIT_EXCEPTION_TS  =  74, /* invalid-tss */
+    VMEXIT_EXCEPTION_NP  =  75, /* segment-not-present */
+    VMEXIT_EXCEPTION_SS  =  76, /* stack */
+    VMEXIT_EXCEPTION_GP  =  77, /* general-protection */
+    VMEXIT_EXCEPTION_PF  =  78, /* page-fault */
+    VMEXIT_EXCEPTION_15  =  79, /* reserved */
+    VMEXIT_EXCEPTION_MF  =  80, /* x87 floating-point exception-pending */
+    VMEXIT_EXCEPTION_AC  =  81, /* alignment-check */
+    VMEXIT_EXCEPTION_MC  =  82, /* machine-check */
+    VMEXIT_EXCEPTION_XF  =  83, /* simd floating-point */
+
+    /* exceptions 20-31 (exitcodes 84-95) are reserved */
+
+    /* ...and the rest of the #VMEXITs */
+    VMEXIT_INTR             =  96,
+    VMEXIT_NMI              =  97,
+    VMEXIT_SMI              =  98,
+    VMEXIT_INIT             =  99,
+    VMEXIT_VINTR            = 100,
+    VMEXIT_CR0_SEL_WRITE    = 101,
+    VMEXIT_IDTR_READ        = 102,
+    VMEXIT_GDTR_READ        = 103,
+    VMEXIT_LDTR_READ        = 104,
+    VMEXIT_TR_READ          = 105,
+    VMEXIT_IDTR_WRITE       = 106,
+    VMEXIT_GDTR_WRITE       = 107,
+    VMEXIT_LDTR_WRITE       = 108,
+    VMEXIT_TR_WRITE         = 109,
+    VMEXIT_RDTSC            = 110,
+    VMEXIT_RDPMC            = 111,
+    VMEXIT_PUSHF            = 112,
+    VMEXIT_POPF             = 113,
+    VMEXIT_CPUID            = 114,
+    VMEXIT_RSM              = 115,
+    VMEXIT_IRET             = 116,
+    VMEXIT_SWINT            = 117,
+    VMEXIT_INVD             = 118,
+    VMEXIT_PAUSE            = 119,
+    VMEXIT_HLT              = 120,
+    VMEXIT_INVLPG           = 121,
+    VMEXIT_INVLPGA          = 122,
+    VMEXIT_IOIO             = 123,
+    VMEXIT_MSR              = 124,
+    VMEXIT_TASK_SWITCH      = 125,
+    VMEXIT_FERR_FREEZE      = 126,
+    VMEXIT_SHUTDOWN         = 127,
+    VMEXIT_VMRUN            = 128,
+    VMEXIT_VMMCALL          = 129,
+    VMEXIT_VMLOAD           = 130,
+    VMEXIT_VMSAVE           = 131,
+    VMEXIT_STGI             = 132,
+    VMEXIT_CLGI             = 133,
+    VMEXIT_SKINIT           = 134,
+    VMEXIT_RDTSCP           = 135,
+    VMEXIT_ICEBP            = 136,
+    VMEXIT_NPF              = 1024, /* nested paging fault */
+    VMEXIT_INVALID          =  -1
+};
+
+enum {
+    SVM_CPU_STATE_PG_ENABLED=0,
+    SVM_CPU_STATE_PAE_ENABLED,
+    SVM_CPU_STATE_LME_ENABLED,      
+    SVM_CPU_STATE_LMA_ENABLED,
+    SVM_CPU_STATE_ASSIST_ENABLED,
+};  
+    
+#define SVM_LONG_GUEST(ed)    \
+  (test_bit(SVM_CPU_STATE_LMA_ENABLED, &ed->arch.hvm_svm.cpu_state))
+
+enum {
+    SVM_INDEX_MSR_LSTAR = 0,
+    SVM_INDEX_MSR_STAR,
+    SVM_INDEX_MSR_CSTAR,
+    SVM_INDEX_MSR_SYSCALL_MASK,
+    SVM_INDEX_MSR_EFER,
+
+    SVM_MSR_COUNT,
+};
+
+struct svm_msr_state {
+    unsigned long flags;
+    unsigned long msr_items[SVM_MSR_COUNT];
+    unsigned long shadow_gs;
+};
+
+/* 
+ * Attribute for segment selector. This is a copy of bit 40:47 & 52:55 of the
+ * segment descriptor. 
+ */
+typedef union
+{
+    u16 bytes;
+    struct
+    {
+        u16 type:4;    /* 0;  Bit 40-43 */
+        u16 s:   1;    /* 4;  Bit 44 */
+        u16 dpl: 2;    /* 5;  Bit 45-46 */
+        u16 p:   1;    /* 7;  Bit 47 */
+        u16 avl: 1;    /* 8;  Bit 52 */
+        u16 l:   1;    /* 9;  Bit 53 */
+        u16 db:  1;    /* 10; Bit 54 */
+        u16 g:   1;    /* 11; Bit 55 */
+    } fields;
+} __attribute__ ((packed)) segment_attributes_t;
+
+typedef struct 
+{
+    u16        sel;
+    segment_attributes_t attributes;
+    u32        limit;
+    u64        base;
+} __attribute__ ((packed)) segment_selector_t;
+
+typedef union 
+{
+    u64 bytes;
+    struct 
+    {
+        u64 vector:    8;
+        u64 type:      3;
+        u64 ev:        1;
+        u64 resvd1:   19;
+        u64 v:         1;
+        u64 errorcode:32; 
+    } fields;
+} __attribute__ ((packed)) eventinj_t;
+
+enum EVENTTYPES
+{
+    EVENTTYPE_INTR = 0,
+    EVENTTYPE_NMI = 2,
+    EVENTTYPE_EXCEPTION = 3,
+    EVENTTYPE_SWINT = 4,
+};
+
+typedef union 
+{
+    u64 bytes;
+    struct 
+    {
+        u64 tpr:          8;
+        u64 irq:          1;
+        u64 rsvd0:        7;
+        u64 prio:         4;
+        u64 ign_tpr:      1;
+        u64 rsvd1:        3;
+        u64 intr_masking: 1;
+        u64 rsvd2:        7;
+        u64 vector:       8;
+        u64 rsvd3:       24;
+    } fields;
+} __attribute__ ((packed)) vintr_t;
+
+typedef union
+{
+    u64 bytes;
+    struct 
+    {
+        u64 type: 1;
+        u64 rsv0: 1;
+        u64 str:  1;
+        u64 rep:  1;
+        u64 sz8:  1;
+        u64 sz16: 1;
+        u64 sz32: 1;
+        u64 rsv1: 9;
+        u64 port: 16;
+    } fields;
+} __attribute__ ((packed)) ioio_info_t;
+
+struct vmcb_struct {
+    u32 cr_intercepts;          /* offset 0x00 */
+    u32 dr_intercepts;          /* offset 0x04 */
+    u32 exception_intercepts;   /* offset 0x08 */
+    u32 general1_intercepts;    /* offset 0x0C */
+    u32 general2_intercepts;    /* offset 0x10 */
+    u32 res01;                  /* offset 0x14 */
+    u64 res02;                  /* offset 0x18 */
+    u64 res03;                  /* offset 0x20 */
+    u64 res04;                  /* offset 0x28 */
+    u64 res05;                  /* offset 0x30 */
+    u64 res06;                  /* offset 0x38 */
+    u64 iopm_base_pa;           /* offset 0x40 */
+    u64 msrpm_base_pa;          /* offset 0x48 */
+    u64 tsc_offset;             /* offset 0x50 */
+    u32 guest_asid;             /* offset 0x58 */
+    u8  tlb_control;            /* offset 0x5C */
+    u8  res07[3];
+    vintr_t vintr;             /* offset 0x60 */
+    u64 interrupt_shadow;       /* offset 0x68 */
+    u64 exitcode;               /* offset 0x70 */
+    u64 exitinfo1;              /* offset 0x78 */
+    u64 exitinfo2;              /* offset 0x80 */
+    eventinj_t  exitintinfo;    /* offset 0x88 */
+    u64 np_enable;              /* offset 0x90 */
+    u64 res08[2];
+    eventinj_t  eventinj;       /* offset 0xA8 */
+    u64 h_cr3;                  /* offset 0xB0 */
+    u64 res09[105];             /* offset 0xB8 pad to save area */
+
+    segment_selector_t es;      /* offset 1024 */
+    segment_selector_t cs;
+    segment_selector_t ss;
+    segment_selector_t ds;
+    segment_selector_t fs;
+    segment_selector_t gs;
+    segment_selector_t gdtr;
+    segment_selector_t ldtr;
+    segment_selector_t idtr;
+    segment_selector_t tr;
+    u64 res10[5];
+    u8 res11[3];
+    u8 cpl;
+    u32 res12;
+    u64 efer;                  /* offset 1024 + 0xD0 */
+    u64 res13[14];
+    u64 cr4;                   /* loffset 1024 + 0x148 */
+    u64 cr3;
+    u64 cr0;
+    u64 dr7;
+    u64 dr6;
+    u64 rflags;
+    u64 rip;
+    u64 res14[11];
+    u64 rsp;
+    u64 res15[3];
+    u64 rax;
+    u64 star;
+    u64 lstar;
+    u64 cstar;
+    u64 sfmask;
+    u64 kerngsbase;
+    u64 sysenter_cs;
+    u64 sysenter_esp;
+    u64 sysenter_eip;
+    u64 cr2;
+    u64 pdpe0;
+    u64 pdpe1;
+    u64 pdpe2;
+    u64 pdpe3;
+    u64 g_pat;
+    u64 res16[50];
+    u64 res17[128];
+    u64 res18[128];
+} __attribute__ ((packed));
+
+
+struct arch_svm_struct {
+    struct vmcb_struct *vmcb;
+    void               *host_save_area;
+    u64                 host_save_pa;
+    u64                 vmcb_pa;
+    u32                 *iopm;
+    u32                 *msrpm;
+    u64                 vmexit_tsc; /* tsc read at #VMEXIT. for TSC_OFFSET */
+    int                 injecting_event;
+    int                 saved_irq_vector;
+    u32                 core;        /* cpu of last vmexit */
+    
+    unsigned long       flags;      /* VMCB flags */
+    unsigned long       cpu_shadow_cr0; /* copy of guest read shadow CR0 */
+    unsigned long       cpu_cr2;
+    unsigned long       cpu_cr3;
+    unsigned long       cpu_state;
+    struct svm_msr_state msr_content;
+    struct timer        hlt_timer;  /* hlt ins emulation wakeup timer */
+};
+
+struct vmcb_struct *alloc_vmcb(void);
+struct host_save_area *alloc_host_save_area(void);
+void free_vmcb(struct vmcb_struct *vmcb);
+void free_host_save_area(struct host_save_area *hsa);
+void dump_vmcb(void);
+int  construct_vmcb(struct arch_svm_struct *, struct cpu_user_regs *); 
+
+#define VMCB_USE_HOST_ENV       1
+#define VMCB_USE_SEPARATE_ENV   0
+
+enum {
+    ARCH_SVM_VMCB_LOADED = 0,
+    ARCH_SVM_VMCB_ASSIGN_ASID
+};
+
+#define VMCB_EFLAGS_RESERVED_0          0xffc08028 /* bitmap for 0 */
+#define VMCB_EFLAGS_RESERVED_1          0x00000002 /* bitmap for 1 */
+
+#endif /* CONFIG_SVM */
+
+#endif /* ASM_X86_HVM_SVM_VMCS_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/svm/vmmcall.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/svm/vmmcall.h     Tue Jan 31 10:49:51 2006
@@ -0,0 +1,48 @@
+/*
+ * vmmcall.h: VMMCALL instruction support
+ *
+ * Travis Betak, travis.betak@xxxxxxx
+ * Copyright (c) 2005, AMD Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#ifndef __ASM_X86_HVM_SVM_VMMCALL_H__
+#define __ASM_X86_HVM_SVM_VMMCALL_H__
+
+#ifdef CONFIG_SVM
+
+/* VMMCALL command fields */
+#define VMMCALL_CODE_CPL_MASK     0xC0000000
+#define VMMCALL_CODE_MBZ_MASK     0x3FFF0000
+#define VMMCALL_CODE_COMMAND_MASK 0x0000FFFF
+
+#define MAKE_VMMCALL_CODE(cpl,func) ((cpl << 30) | (func))
+
+/* CPL=0 VMMCALL Requests */
+#define VMMCALL_RESET_TO_REALMODE   MAKE_VMMCALL_CODE(0,1)
+
+/* CPL=3 VMMCALL Requests */
+#define VMMCALL_DEBUG           MAKE_VMMCALL_CODE(3,1)
+
+/* return the cpl required for the vmmcall cmd */
+static inline int get_vmmcall_cpl(int cmd)
+{
+    return (cmd & VMMCALL_CODE_CPL_MASK) >> 30;
+}
+
+#endif /* CONFIG_SVM */
+
+#endif /* __ASM_X86_HVM_SVM_VMMCALL_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vcpu.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vcpu.h    Tue Jan 31 10:49:51 2006
@@ -0,0 +1,51 @@
+/*
+ * vcpu.h: HVM per vcpu definitions
+ *
+ * Copyright (c) 2005, International Business Machines Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ASM_X86_HVM_VCPU_H__
+#define __ASM_X86_HVM_VCPU_H__
+
+#include <asm/hvm/io.h>
+#include <asm/hvm/vlapic.h>
+
+#ifdef CONFIG_VMX
+#include <asm/hvm/vmx/vmcs.h>
+#endif
+#ifdef CONFIG_SVM
+#include <asm/hvm/svm/vmcb.h>
+#endif
+
+struct hvm_vcpu {
+    unsigned long       ioflags;
+    struct mmio_op      mmio_op;
+    struct vlapic       *vlapic;
+
+    union {
+#ifdef CONFIG_VMX
+        struct arch_vmx_struct vmx;
+#endif
+#ifdef CONFIG_SVM
+        struct arch_svm_struct svm;
+#endif
+    } u;
+};
+
+#define ARCH_HVM_IO_WAIT   1       /* Waiting for I/O completion */
+
+#endif /* __ASM_X86_HVM_VCPU_H__ */
+
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vioapic.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vioapic.h Tue Jan 31 10:49:51 2006
@@ -0,0 +1,124 @@
+/*
+ *
+ *  Copyright (C) 2001  MandrakeSoft S.A.
+ *
+ *    MandrakeSoft S.A.
+ *    43, rue d'Aboukir
+ *    75002 Paris - France
+ *    http://www.linux-mandrake.com/
+ *    http://www.mandrakesoft.com/
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __ASM_X86_HVM_IOAPIC_H__
+#define __ASM_X86_HVM_IOAPIC_H__
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/smp.h>
+
+#ifndef __ia64__
+#define IOAPIC_VERSION_ID 0x11
+#else
+#define IOAPIC_VERSION_ID 0x21
+#endif
+
+#define IOAPIC_NUM_PINS 24
+#define MAX_LAPIC_NUM   32
+
+#define IOAPIC_LEVEL_TRIGGER 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
+#define IOAPIC_MEM_LENGTH            0x100
+
+#define IOAPIC_ENABLE_MASK  0x0
+#define IOAPIC_ENABLE_FLAG  (1 << IOAPIC_ENABLE_MASK)
+#define IOAPICEnabled(s)    (s->flags & IOAPIC_ENABLE_FLAG)
+
+#define IOAPIC_REG_SELECT  0x0
+#define IOAPIC_REG_WINDOW  0x10
+
+#ifdef __ia64__
+#define IOAPIC_REG_ASSERTION    0x20
+#define IOAPIC_REG_EOI          0x40
+#endif
+
+#ifndef __ia64__
+#define IOAPIC_REG_APIC_ID 0x0
+#define IOAPIC_REG_ARB_ID  0x2
+#endif
+
+#define IOAPIC_REG_VERSION 0x1
+
+typedef union RedirStatus
+{
+    uint64_t value;
+    struct {
+        uint8_t vector;
+        uint8_t deliver_mode:3;
+        uint8_t destmode:1;
+        uint8_t delivestatus:1;
+        uint8_t polarity:1;
+        uint8_t remoteirr:1;
+        uint8_t trigmod:1;
+        uint8_t mask:1;         /* interrupt mask*/
+        uint8_t reserve:7;
+#ifndef __ia64__
+        uint8_t reserved[4];
+        uint8_t dest_id;
+#else
+        uint8_t reserved[3];
+        uint16_t dest_id;
+#endif
+    } RedirForm;
+} RedirStatus;
+
+#define IOAPIC_MEM_LENGTH    0x100
+#define IOAPIC_ENABLE_MASK   0x0
+#define IOAPIC_ENABLE_FLAG   (1 << IOAPIC_ENABLE_MASK)
+#define MAX_LAPIC_NUM        32
+
+typedef struct hvm_vioapic {
+    uint32_t irr;
+    uint32_t isr;           /* This is used for level trigger */
+    uint32_t imr;
+    uint32_t ioregsel;
+    uint32_t flags;
+    uint32_t lapic_count;
+    uint32_t id;
+    uint32_t arb_id;
+    unsigned long base_address;
+    RedirStatus redirtbl[IOAPIC_NUM_PINS];
+    struct vlapic *lapic_info[MAX_LAPIC_NUM];
+    struct domain *domain;
+} hvm_vioapic_t;
+
+hvm_vioapic_t *hvm_vioapic_init(struct domain *d);
+
+void hvm_vioapic_do_irqs_clear(struct domain *d, uint16_t irqs);
+void hvm_vioapic_do_irqs(struct domain *d, uint16_t irqs);
+void hvm_vioapic_set_irq(struct domain *d, int irq, int level);
+
+int hvm_vioapic_add_lapic(struct vlapic *vlapic, struct vcpu *v);
+
+void ioapic_update_EOI(struct domain *d, int vector);
+
+#ifdef HVM_DOMAIN_SAVE_RESTORE
+void ioapic_save(QEMUFile* f, void* opaque);
+int ioapic_load(QEMUFile* f, void* opaque, int version_id);
+#endif
+
+#endif /* __ASM_X86_HVM_IOAPIC_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vlapic.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vlapic.h  Tue Jan 31 10:49:51 2006
@@ -0,0 +1,253 @@
+/*
+ * hvm_vlapic.h: virtualize LAPIC definitions.
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ASM_X86_HVM_VLAPIC_H__
+#define __ASM_X86_HVM_VLAPIC_H__
+
+#include <asm/msr.h>
+#include <public/hvm/ioreq.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+static inline int __fls(uint32_t word)
+{
+    int bit;
+
+    __asm__("bsrl %1,%0"
+      :"=r" (bit)
+      :"rm" (word));
+    return word ? bit : -1;
+}
+#else
+#define __fls(x)    generic_fls(x)
+static __inline__ int generic_fls(uint32_t x)
+{
+    int r = 31;
+
+    if (!x)
+        return -1;
+    if (!(x & 0xffff0000u)) {
+        x <<= 16;
+        r -= 16;
+    }
+    if (!(x & 0xff000000u)) {
+        x <<= 8;
+        r -= 8;
+    }
+    if (!(x & 0xf0000000u)) {
+        x <<= 4;
+        r -= 4;
+    }
+    if (!(x & 0xc0000000u)) {
+        x <<= 2;
+        r -= 2;
+    }
+    if (!(x & 0x80000000u)) {
+        x <<= 1;
+        r -= 1;
+    }
+    return r;
+}
+#endif
+
+static __inline__ int find_highest_bit(uint32_t *data, int length)
+{
+    while(length && !data[--length]);
+    return __fls(data[length]) +  32 * length;
+}
+
+#define VLAPIC(v)                       (v->arch.hvm_vcpu.vlapic)
+
+#define VAPIC_ID_MASK                   0xff
+#define VAPIC_LDR_MASK                  (VAPIC_ID_MASK << 24)
+#define VLAPIC_VERSION                  0x00050014
+
+#define VLAPIC_BASE_MSR_MASK            0x00000000fffff900ULL
+#define VLAPIC_BASE_MSR_INIT_BASE_ADDR  0xfee00000U
+#define VLAPIC_BASE_MSR_BASE_ADDR_MASK  0xfffff000U
+#define VLAPIC_BASE_MSR_INIT_VALUE      (VLAPIC_BASE_MSR_INIT_BASE_ADDR | \
+                                         MSR_IA32_APICBASE_ENABLE)
+#define VLOCAL_APIC_MEM_LENGTH          (1 << 12)
+
+#define VLAPIC_LVT_TIMER                0
+#define VLAPIC_LVT_THERMAL              1
+#define VLAPIC_LVT_PERFORM              2
+#define VLAPIC_LVT_LINT0                3
+#define VLAPIC_LVT_LINT1                4
+#define VLAPIC_LVT_ERROR                5
+#define VLAPIC_LVT_NUM                  6
+
+#define VLAPIC_LVT_BIT_MASK             (1 << 16)
+#define VLAPIC_LVT_BIT_VECTOR           0xff
+#define VLAPIC_LVT_BIT_DELIMOD          (0x7 << 8)
+#define VLAPIC_LVT_BIT_DELISTATUS       (1 << 12)
+#define VLAPIC_LVT_BIT_POLARITY         (1 << 13)
+#define VLAPIC_LVT_BIT_IRR              (1 << 14)
+#define VLAPIC_LVT_BIT_TRIG             (1 << 15)
+#define VLAPIC_LVT_TIMERMODE            (1 << 17)
+
+#define VLAPIC_DELIV_MODE_FIXED          0x0
+#define VLAPIC_DELIV_MODE_LPRI           0x1
+#define VLAPIC_DELIV_MODE_SMI            0x2
+#define VLAPIC_DELIV_MODE_RESERVED       0x3
+#define VLAPIC_DELIV_MODE_NMI            0x4
+#define VLAPIC_DELIV_MODE_INIT           0x5
+#define VLAPIC_DELIV_MODE_STARTUP        0x6
+#define VLAPIC_DELIV_MODE_EXT            0x7
+
+
+
+#define VLAPIC_NO_SHORTHAND             0x0
+#define VLAPIC_SHORTHAND_SELF           0x1
+#define VLAPIC_SHORTHAND_INCLUDE_SELF   0x2
+#define VLAPIC_SHORTHAND_EXCLUDE_SELF   0x3
+
+#define vlapic_lvt_timer_enabled(vlapic)    \
+  (!(vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_BIT_MASK))
+
+#define vlapic_lvt_vector(vlapic, type)   \
+  (vlapic->lvt[type] & VLAPIC_LVT_BIT_VECTOR)
+
+#define vlapic_lvt_dm(value)        ((value >> 8) && 7)
+#define vlapic_lvt_timer_period(vlapic) \
+  (vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_TIMERMODE)
+
+#define vlapic_isr_status(vlapic,vector)    \
+  test_bit(vector, &vlapic->isr[0])
+
+#define vlapic_irr_status(vlapic,vector)    \
+  test_bit(vector, &vlapic->irr[0])
+
+#define vlapic_set_isr(vlapic,vector) \
+  test_and_set_bit(vector, &vlapic->isr[0])
+
+#define vlapic_set_irr(vlapic,vector)      \
+  test_and_set_bit(vector, &vlapic->irr[0])
+
+#define vlapic_clear_irr(vlapic,vector)      \
+  clear_bit(vector, &vlapic->irr[0])
+#define vlapic_clear_isr(vlapic,vector)     \
+  clear_bit(vector, &vlapic->isr[0])
+
+#define vlapic_enabled(vlapic)               \
+  (!(vlapic->status &                           \
+     (VLAPIC_GLOB_DISABLE_MASK | VLAPIC_SOFTWARE_DISABLE_MASK)))
+
+#define vlapic_global_enabled(vlapic)               \
+  !(test_bit(_VLAPIC_GLOB_DISABLE, &(vlapic)->status))
+
+#define VLAPIC_IRR(t) ((t)->irr[0])
+#define VLAPIC_ID(t)  ((t)->id)
+
+typedef struct direct_intr_info {
+    int deliver_mode;
+    int source[6];
+} direct_intr_info_t;
+
+#define VLAPIC_INIT_SIPI_SIPI_STATE_NORM          0
+#define VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI     1
+
+struct vlapic
+{
+    //FIXME check what would be 64 bit on EM64T
+    uint32_t           version;
+#define _VLAPIC_GLOB_DISABLE            0x0
+#define VLAPIC_GLOB_DISABLE_MASK        0x1
+#define VLAPIC_SOFTWARE_DISABLE_MASK    0x2
+#define _VLAPIC_BSP_ACCEPT_PIC          0x3
+    uint32_t           status;
+    uint32_t           id;
+    uint32_t           vcpu_id;
+    unsigned long      base_address;
+    uint32_t           isr[8];
+    uint32_t           irr[INTR_LEN_32];
+    uint32_t           tmr[INTR_LEN_32];
+    uint32_t           task_priority;
+    uint32_t           processor_priority;
+    uint32_t           logical_dest;
+    uint32_t           dest_format;
+    uint32_t           spurious_vec;
+    uint32_t           lvt[6];
+    uint32_t           timer_initial;
+    uint32_t           timer_current;
+    uint32_t           timer_divconf;
+    uint32_t           timer_divide_counter;
+    struct timer       vlapic_timer;
+    int                intr_pending_count[MAX_VECTOR];
+    s_time_t           timer_current_update;
+    uint32_t           icr_high;
+    uint32_t           icr_low;
+    direct_intr_info_t direct_intr;
+    uint32_t           err_status;
+    unsigned long      init_ticks;
+    uint32_t           err_write_count;
+    uint64_t           apic_base_msr;
+    uint32_t           init_sipi_sipi_state;
+    struct vcpu        *vcpu;
+    struct domain      *domain;
+};
+
+static inline int vlapic_set_irq(struct vlapic *t, uint8_t vec, uint8_t trig)
+{
+    int ret;
+
+    ret = test_and_set_bit(vec, &t->irr[0]);
+    if (trig)
+       test_and_set_bit(vec, &t->tmr[0]);
+
+    /* We may need to wake up target vcpu, besides set pending bit here */
+    return ret;
+}
+
+static inline int  vlapic_timer_active(struct vlapic *vlapic)
+{
+    return  active_timer(&(vlapic->vlapic_timer));
+}
+
+int vlapic_find_highest_irr(struct vlapic *vlapic);
+
+int vlapic_find_highest_isr(struct vlapic *vlapic);
+
+static uint32_t inline vlapic_get_base_address(struct vlapic *vlapic)
+{
+    return (vlapic->apic_base_msr & VLAPIC_BASE_MSR_BASE_ADDR_MASK);
+}
+
+void vlapic_post_injection(struct vcpu* v, int vector, int deliver_mode);
+
+int cpu_get_apic_interrupt(struct vcpu* v, int *mode);
+
+extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
+
+int vlapic_update(struct vcpu *v);
+
+extern int vlapic_init(struct vcpu *vc);
+
+extern void vlapic_msr_set(struct vlapic *vlapic, uint64_t value);
+
+int vlapic_accept_pic_intr(struct vcpu *v);
+
+struct vlapic* apic_round_robin(struct domain *d,
+                                uint8_t dest_mode,
+                                uint8_t vector,
+                                uint32_t bitmap);
+
+s_time_t get_apictime_scheduled(struct vcpu *v);
+int hvm_apic_support(struct domain *d);
+
+#endif /* __ASM_X86_HVM_VLAPIC_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vmx/cpu.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vmx/cpu.h Tue Jan 31 10:49:51 2006
@@ -0,0 +1,39 @@
+/*
+ * cpu.h: Virtual CPU state
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_HVM_VMX_CPU_H__
+#define __ASM_X86_HVM_VMX_CPU_H__
+
+#ifdef CONFIG_VMX
+
+/*
+ * Virtual CPU
+ */
+struct arch_state_struct {
+    unsigned long       mode_flags; /* vm86, 32-bit, 64-bit, etc. */
+    /* debug registers */
+    /* MSRs */
+};
+
+#define VMX_MF_VM86     0
+#define VMX_MF_32       1
+#define VMX_MF_64       2
+
+#endif /* CONFIG_VMX */
+
+#endif /* __ASM_X86_HVM_VMX_CPU_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vmx/vmcs.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h        Tue Jan 31 10:49:51 2006
@@ -0,0 +1,263 @@
+/*
+ * vmcs.h: VMCS related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_HVM_VMX_VMCS_H__
+#define __ASM_X86_HVM_VMX_VMCS_H__
+
+#include <asm/config.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/vmx/cpu.h>
+#include <public/hvm/vmx_assist.h>
+
+#ifdef CONFIG_VMX
+
+extern int start_vmx(void);
+extern void stop_vmx(void);
+
+void vmx_final_setup_guest(struct vcpu *v);
+void vmx_relinquish_resources(struct vcpu *v);
+
+void vmx_enter_scheduler(void);
+
+enum {
+    VMX_CPU_STATE_PG_ENABLED=0,
+    VMX_CPU_STATE_PAE_ENABLED,
+    VMX_CPU_STATE_LME_ENABLED,
+    VMX_CPU_STATE_LMA_ENABLED,
+    VMX_CPU_STATE_ASSIST_ENABLED,
+};
+
+#define VMX_LONG_GUEST(ed)    \
+  (test_bit(VMX_CPU_STATE_LMA_ENABLED, &ed->arch.hvm_vmx.cpu_state))
+
+struct vmcs_struct {
+    u32 vmcs_revision_id;
+    unsigned char data [0]; /* vmcs size is read from MSR */
+};
+
+extern int vmcs_size;
+
+enum {
+    VMX_INDEX_MSR_LSTAR = 0,
+    VMX_INDEX_MSR_STAR,
+    VMX_INDEX_MSR_CSTAR,
+    VMX_INDEX_MSR_SYSCALL_MASK,
+    VMX_INDEX_MSR_EFER,
+
+    VMX_MSR_COUNT,
+};
+
+struct vmx_msr_state {
+    unsigned long flags;
+    unsigned long msr_items[VMX_MSR_COUNT];
+    unsigned long shadow_gs;
+};
+
+struct arch_vmx_struct {
+    struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual. */
+    unsigned int            launch_cpu; /* VMCS is valid on this CPU. */
+    unsigned long           flags;  /* VMCS flags */
+    unsigned long           cpu_cr0; /* copy of guest CR0 */
+    unsigned long           cpu_shadow_cr0; /* copy of guest read shadow CR0 */
+    unsigned long           cpu_cr2; /* save CR2 */
+    unsigned long           cpu_cr3;
+    unsigned long           cpu_state;
+    unsigned long           cpu_based_exec_control;
+    struct vmx_msr_state    msr_content;
+    void                    *io_bitmap_a, *io_bitmap_b;
+    u64                     tsc_offset;
+    struct timer            hlt_timer;  /* hlt ins emulation wakeup timer */
+};
+
+#define vmx_schedule_tail(next)         \
+    (next)->thread.arch_vmx.arch_vmx_schedule_tail((next))
+
+#define ARCH_VMX_VMCS_LOADED    0       /* VMCS has been loaded and active */
+#define ARCH_VMX_VMCS_LAUNCH    1       /* Needs VMCS launch */
+#define ARCH_VMX_VMCS_RESUME    2       /* Needs VMCS resume */
+
+void vmx_do_resume(struct vcpu *);
+struct vmcs_struct *alloc_vmcs(void);
+int modify_vmcs(struct arch_vmx_struct *arch_vmx,
+                struct cpu_user_regs *regs);
+void destroy_vmcs(struct arch_vmx_struct *arch_vmx);
+
+#define VMCS_USE_HOST_ENV       1
+#define VMCS_USE_SEPARATE_ENV   0
+
+extern int vmcs_version;
+
+#define CPU_BASED_VIRTUAL_INTR_PENDING  0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING     0x00000008
+#define CPU_BASED_HLT_EXITING           0x00000080
+#define CPU_BASED_INVDPG_EXITING        0x00000200
+#define CPU_BASED_MWAIT_EXITING         0x00000400
+#define CPU_BASED_RDPMC_EXITING         0x00000800
+#define CPU_BASED_RDTSC_EXITING         0x00001000
+#define CPU_BASED_CR8_LOAD_EXITING      0x00080000
+#define CPU_BASED_CR8_STORE_EXITING     0x00100000
+#define CPU_BASED_TPR_SHADOW            0x00200000
+#define CPU_BASED_MOV_DR_EXITING        0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING     0x01000000
+#define CPU_BASED_ACTIVATE_IO_BITMAP    0x02000000
+#define CPU_BASED_MONITOR_EXITING       0x20000000
+#define CPU_BASED_PAUSE_EXITING         0x40000000
+#define PIN_BASED_EXT_INTR_MASK 0x1
+#define PIN_BASED_NMI_EXITING   0x8
+
+#define VM_EXIT_ACK_INTR_ON_EXIT        0x00008000
+#define VM_EXIT_HOST_ADD_SPACE_SIZE     0x00000200
+
+
+/* VMCS Encordings */
+enum vmcs_field {
+    GUEST_ES_SELECTOR               = 0x00000800,
+    GUEST_CS_SELECTOR               = 0x00000802,
+    GUEST_SS_SELECTOR               = 0x00000804,
+    GUEST_DS_SELECTOR               = 0x00000806,
+    GUEST_FS_SELECTOR               = 0x00000808,
+    GUEST_GS_SELECTOR               = 0x0000080a,
+    GUEST_LDTR_SELECTOR             = 0x0000080c,
+    GUEST_TR_SELECTOR               = 0x0000080e,
+    HOST_ES_SELECTOR                = 0x00000c00,
+    HOST_CS_SELECTOR                = 0x00000c02,
+    HOST_SS_SELECTOR                = 0x00000c04,
+    HOST_DS_SELECTOR                = 0x00000c06,
+    HOST_FS_SELECTOR                = 0x00000c08,
+    HOST_GS_SELECTOR                = 0x00000c0a,
+    HOST_TR_SELECTOR                = 0x00000c0c,
+    IO_BITMAP_A                     = 0x00002000, 
+    IO_BITMAP_A_HIGH                = 0x00002001, 
+    IO_BITMAP_B                     = 0x00002002, 
+    IO_BITMAP_B_HIGH                = 0x00002003, 
+    VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
+    VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+    VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
+    VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+    VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
+    VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+    TSC_OFFSET                      = 0x00002010,
+    TSC_OFFSET_HIGH                 = 0x00002011,
+    VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
+    VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+    VMCS_LINK_POINTER               = 0x00002800,
+    VMCS_LINK_POINTER_HIGH          = 0x00002801,
+    GUEST_IA32_DEBUGCTL             = 0x00002802,
+    GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+    PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+    CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,   
+    EXCEPTION_BITMAP                = 0x00004004,
+    PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
+    PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
+    CR3_TARGET_COUNT                = 0x0000400a,
+    VM_EXIT_CONTROLS                = 0x0000400c,
+    VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+    VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+    VM_ENTRY_CONTROLS               = 0x00004012,
+    VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+    VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
+    VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
+    VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
+    TPR_THRESHOLD                   = 0x0000401c,
+    SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+    VM_INSTRUCTION_ERROR            = 0x00004400,
+    VM_EXIT_REASON                  = 0x00004402,
+    VM_EXIT_INTR_INFO               = 0x00004404,   
+    VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
+    IDT_VECTORING_INFO_FIELD        = 0x00004408,
+    IDT_VECTORING_ERROR_CODE        = 0x0000440a,
+    VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
+    VMX_INSTRUCTION_INFO            = 0x0000440e,
+    GUEST_ES_LIMIT                  = 0x00004800,
+    GUEST_CS_LIMIT                  = 0x00004802,
+    GUEST_SS_LIMIT                  = 0x00004804,
+    GUEST_DS_LIMIT                  = 0x00004806,
+    GUEST_FS_LIMIT                  = 0x00004808,
+    GUEST_GS_LIMIT                  = 0x0000480a,
+    GUEST_LDTR_LIMIT                = 0x0000480c,
+    GUEST_TR_LIMIT                  = 0x0000480e,
+    GUEST_GDTR_LIMIT                = 0x00004810,
+    GUEST_IDTR_LIMIT                = 0x00004812,
+    GUEST_ES_AR_BYTES               = 0x00004814,
+    GUEST_CS_AR_BYTES               = 0x00004816,
+    GUEST_SS_AR_BYTES               = 0x00004818,
+    GUEST_DS_AR_BYTES               = 0x0000481a,
+    GUEST_FS_AR_BYTES               = 0x0000481c,
+    GUEST_GS_AR_BYTES               = 0x0000481e,
+    GUEST_LDTR_AR_BYTES             = 0x00004820,
+    GUEST_TR_AR_BYTES               = 0x00004822,
+    GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
+    GUEST_SYSENTER_CS               = 0x0000482A,
+    HOST_IA32_SYSENTER_CS           = 0x00004c00,
+    CR0_GUEST_HOST_MASK             = 0x00006000,
+    CR4_GUEST_HOST_MASK             = 0x00006002,
+    CR0_READ_SHADOW                 = 0x00006004,
+    CR4_READ_SHADOW                 = 0x00006006,
+    CR3_TARGET_VALUE0               = 0x00006008, 
+    CR3_TARGET_VALUE1               = 0x0000600a, 
+    CR3_TARGET_VALUE2               = 0x0000600c, 
+    CR3_TARGET_VALUE3               = 0x0000600e, 
+    EXIT_QUALIFICATION              = 0x00006400,
+    GUEST_LINEAR_ADDRESS            = 0x0000640a,
+    GUEST_CR0                       = 0x00006800,
+    GUEST_CR3                       = 0x00006802,
+    GUEST_CR4                       = 0x00006804,
+    GUEST_ES_BASE                   = 0x00006806,
+    GUEST_CS_BASE                   = 0x00006808,
+    GUEST_SS_BASE                   = 0x0000680a,
+    GUEST_DS_BASE                   = 0x0000680c,
+    GUEST_FS_BASE                   = 0x0000680e,
+    GUEST_GS_BASE                   = 0x00006810,
+    GUEST_LDTR_BASE                 = 0x00006812,
+    GUEST_TR_BASE                   = 0x00006814,
+    GUEST_GDTR_BASE                 = 0x00006816,    
+    GUEST_IDTR_BASE                 = 0x00006818,
+    GUEST_DR7                       = 0x0000681a,
+    GUEST_RSP                       = 0x0000681c,
+    GUEST_RIP                       = 0x0000681e,
+    GUEST_RFLAGS                    = 0x00006820,
+    GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
+    GUEST_SYSENTER_ESP              = 0x00006824,
+    GUEST_SYSENTER_EIP              = 0x00006826,
+    HOST_CR0                        = 0x00006c00,
+    HOST_CR3                        = 0x00006c02,
+    HOST_CR4                        = 0x00006c04,
+    HOST_FS_BASE                    = 0x00006c06,
+    HOST_GS_BASE                    = 0x00006c08,
+    HOST_TR_BASE                    = 0x00006c0a,
+    HOST_GDTR_BASE                  = 0x00006c0c,
+    HOST_IDTR_BASE                  = 0x00006c0e,
+    HOST_IA32_SYSENTER_ESP          = 0x00006c10,
+    HOST_IA32_SYSENTER_EIP          = 0x00006c12,
+    HOST_RSP                        = 0x00006c14,
+    HOST_RIP                        = 0x00006c16,
+};
+
+#endif /* CONFIG_VMX */
+
+#endif /* ASM_X86_HVM_VMX_VMCS_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vmx/vmx.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Tue Jan 31 10:49:51 2006
@@ -0,0 +1,463 @@
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_HVM_VMX_VMX_H__
+#define __ASM_X86_HVM_VMX_VMX_H__
+
+#include <xen/sched.h>
+#include <asm/types.h>
+#include <asm/regs.h>
+#include <asm/processor.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/i387.h>
+
+#ifdef CONFIG_VMX
+
+extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
+extern void vmx_asm_do_resume(void);
+extern void vmx_asm_do_launch(void);
+extern void vmx_intr_assist(void);
+extern void vmx_set_tsc_shift(struct vcpu *, struct hvm_virpit *);
+
+extern void arch_vmx_do_launch(struct vcpu *);
+extern void arch_vmx_do_resume(struct vcpu *);
+
+extern unsigned int cpu_rev;
+
+/*
+ * Need fill bits for SENTER
+ */
+
+#define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE         0x00000016
+
+#define MONITOR_PIN_BASED_EXEC_CONTROLS       \
+    ( \
+    MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE |   \
+    PIN_BASED_EXT_INTR_MASK |   \
+    PIN_BASED_NMI_EXITING \
+    )
+
+#define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE         0x0401e172
+
+#define _MONITOR_CPU_BASED_EXEC_CONTROLS \
+    ( \
+    MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE |    \
+    CPU_BASED_HLT_EXITING | \
+    CPU_BASED_INVDPG_EXITING | \
+    CPU_BASED_MWAIT_EXITING | \
+    CPU_BASED_MOV_DR_EXITING | \
+    CPU_BASED_ACTIVATE_IO_BITMAP | \
+    CPU_BASED_USE_TSC_OFFSETING  | \
+    CPU_BASED_UNCOND_IO_EXITING \
+    )
+
+#define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \
+    ( \
+    CPU_BASED_CR8_LOAD_EXITING | \
+    CPU_BASED_CR8_STORE_EXITING \
+    )
+
+#define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE   0x0003edff
+
+#define MONITOR_VM_EXIT_CONTROLS_IA32E_MODE       0x00000200
+
+#define _MONITOR_VM_EXIT_CONTROLS                \
+    ( \
+    MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE |\
+    VM_EXIT_ACK_INTR_ON_EXIT \
+    )
+
+#if defined (__x86_64__)
+#define MONITOR_CPU_BASED_EXEC_CONTROLS \
+    ( \
+    _MONITOR_CPU_BASED_EXEC_CONTROLS | \
+    MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \
+    )
+#define MONITOR_VM_EXIT_CONTROLS \
+    ( \
+    _MONITOR_VM_EXIT_CONTROLS | \
+    MONITOR_VM_EXIT_CONTROLS_IA32E_MODE  \
+    )
+#else
+#define MONITOR_CPU_BASED_EXEC_CONTROLS \
+    _MONITOR_CPU_BASED_EXEC_CONTROLS 
+
+#define MONITOR_VM_EXIT_CONTROLS \
+    _MONITOR_VM_EXIT_CONTROLS
+#endif
+
+#define VM_ENTRY_CONTROLS_RESERVED_VALUE        0x000011ff
+#define VM_ENTRY_CONTROLS_IA32E_MODE            0x00000200
+#define MONITOR_VM_ENTRY_CONTROLS       VM_ENTRY_CONTROLS_RESERVED_VALUE 
+/*
+ * Exit Reasons
+ */
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+
+/*
+ * Interruption-information format
+ */
+#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
+#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
+#define INTR_INFO_DELIEVER_CODE_MASK    0x800           /* 11 */
+#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
+
+#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
+
+/*
+ * Exit Qualifications for MOV for Control Register Access
+ */
+#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control 
register */
+#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
+#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose 
register */
+#define LMSW_SOURCE_DATA  (0xFFFF << 16) /* 16:31 lmsw source */
+#define REG_EAX                         (0 << 8) 
+#define REG_ECX                         (1 << 8) 
+#define REG_EDX                         (2 << 8) 
+#define REG_EBX                         (3 << 8) 
+#define REG_ESP                         (4 << 8) 
+#define REG_EBP                         (5 << 8) 
+#define REG_ESI                         (6 << 8) 
+#define REG_EDI                         (7 << 8) 
+#define REG_R8                         (8 << 8)
+#define REG_R9                         (9 << 8)
+#define REG_R10                        (10 << 8)
+#define REG_R11                        (11 << 8)
+#define REG_R12                        (12 << 8)
+#define REG_R13                        (13 << 8)
+#define REG_R14                        (14 << 8)
+#define REG_R15                        (15 << 8)
+
+/*
+ * Exit Qualifications for MOV for Debug Register Access
+ */
+#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug 
register */
+#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
+#define TYPE_MOV_TO_DR                  (0 << 4) 
+#define TYPE_MOV_FROM_DR                (1 << 4)
+#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose 
register */
+ 
+/* These bits in the CR4 are owned by the host */
+#ifdef __i386__
+#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
+#else
+#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
+#endif
+
+#define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
+#define VMCLEAR_OPCODE  ".byte 0x66,0x0f,0xc7\n"        /* reg/opcode: /6 */
+#define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
+#define VMPTRLD_OPCODE  ".byte 0x0f,0xc7\n"             /* reg/opcode: /6 */
+#define VMPTRST_OPCODE  ".byte 0x0f,0xc7\n"             /* reg/opcode: /7 */
+#define VMREAD_OPCODE   ".byte 0x0f,0x78\n"
+#define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
+#define VMWRITE_OPCODE  ".byte 0x0f,0x79\n"
+#define VMXOFF_OPCODE   ".byte 0x0f,0x01,0xc4\n"
+#define VMXON_OPCODE    ".byte 0xf3,0x0f,0xc7\n"
+
+#define MODRM_EAX_06    ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
+#define MODRM_EAX_07    ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
+#define MODRM_EAX_ECX   ".byte 0xc1\n" /* [EAX], [ECX] */
+
+static inline int __vmptrld (u64 addr)
+{
+    unsigned long eflags;
+    __asm__ __volatile__ ( VMPTRLD_OPCODE
+                           MODRM_EAX_06
+                           :
+                           : "a" (&addr) 
+                           : "memory");
+
+    __save_flags(eflags);
+    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+        return -1;
+    return 0;
+}
+
+static inline void __vmptrst (u64 addr)
+{
+    __asm__ __volatile__ ( VMPTRST_OPCODE
+                           MODRM_EAX_07
+                           :
+                           : "a" (&addr) 
+                           : "memory");
+}
+
+static inline int __vmpclear (u64 addr)
+{
+    unsigned long eflags;
+
+    __asm__ __volatile__ ( VMCLEAR_OPCODE
+                           MODRM_EAX_06
+                           :
+                           : "a" (&addr) 
+                           : "memory");
+    __save_flags(eflags);
+    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+        return -1;
+    return 0;
+}
+
+#define __vmread(x, ptr) ___vmread((x), (ptr), sizeof(*(ptr)))
+
+static always_inline int ___vmread (const unsigned long field,  void *ptr, 
const int size)
+{
+    unsigned long eflags;
+    unsigned long ecx = 0;
+
+    __asm__ __volatile__ ( VMREAD_OPCODE
+                           MODRM_EAX_ECX       
+                           : "=c" (ecx)
+                           : "a" (field)
+                           : "memory");
+
+    switch (size) {
+    case 1:
+        *((u8 *) (ptr)) = ecx;
+        break;
+    case 2:
+        *((u16 *) (ptr)) = ecx;
+        break;
+    case 4:
+        *((u32 *) (ptr)) = ecx;
+        break;
+    case 8:
+        *((u64 *) (ptr)) = ecx;
+        break;
+    default:
+        domain_crash_synchronous();
+        break;
+    }
+
+    __save_flags(eflags);
+    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+        return -1;
+    return 0;
+}
+
+
+static always_inline void __vmwrite_vcpu(struct vcpu *v, unsigned long field, 
unsigned long value)
+{
+    switch(field) {
+    case CR0_READ_SHADOW:
+       v->arch.hvm_vmx.cpu_shadow_cr0 = value;
+       break;
+    case GUEST_CR0:
+       v->arch.hvm_vmx.cpu_cr0 = value;
+       break;
+    case CPU_BASED_VM_EXEC_CONTROL:
+       v->arch.hvm_vmx.cpu_based_exec_control = value;
+       break;
+    default:
+       printk("__vmwrite_cpu: invalid field %lx\n", field);
+       break;
+    }
+}
+
+static always_inline void __vmread_vcpu(struct vcpu *v, unsigned long field, 
unsigned long *value)
+{
+    switch(field) {
+    case CR0_READ_SHADOW:
+       *value = v->arch.hvm_vmx.cpu_shadow_cr0;
+       break;
+    case GUEST_CR0:
+       *value = v->arch.hvm_vmx.cpu_cr0;
+       break;
+    case CPU_BASED_VM_EXEC_CONTROL:
+       *value = v->arch.hvm_vmx.cpu_based_exec_control;
+       break;
+    default:
+       printk("__vmread_cpu: invalid field %lx\n", field);
+       break;
+    }
+}
+
+static inline int __vmwrite (unsigned long field, unsigned long value)
+{
+    unsigned long eflags;
+    struct vcpu *v = current;
+
+    __asm__ __volatile__ ( VMWRITE_OPCODE
+                           MODRM_EAX_ECX
+                           :
+                           : "a" (field) , "c" (value)
+                           : "memory");
+    __save_flags(eflags);
+    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+        return -1;
+
+    switch(field) {
+    case CR0_READ_SHADOW:
+    case GUEST_CR0:
+    case CPU_BASED_VM_EXEC_CONTROL:
+       __vmwrite_vcpu(v, field, value);
+       break;
+    }
+
+    return 0;
+}
+
+static inline int __vm_set_bit(unsigned long field, unsigned long mask)
+{
+        unsigned long tmp;
+        int err = 0;
+
+        err |= __vmread(field, &tmp);
+        tmp |= mask;
+        err |= __vmwrite(field, tmp);
+
+        return err;
+}
+
+static inline int __vm_clear_bit(unsigned long field, unsigned long mask)
+{
+        unsigned long tmp;
+        int err = 0;
+
+        err |= __vmread(field, &tmp);
+        tmp &= ~mask;
+        err |= __vmwrite(field, tmp);
+
+        return err;
+}
+
+static inline void __vmxoff (void)
+{
+    __asm__ __volatile__ ( VMXOFF_OPCODE 
+                           ::: "memory");
+}
+
+static inline int __vmxon (u64 addr)
+{
+    unsigned long eflags;
+
+    __asm__ __volatile__ ( VMXON_OPCODE
+                           MODRM_EAX_06
+                           :
+                           : "a" (&addr) 
+                           : "memory");
+    __save_flags(eflags);
+    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
+        return -1;
+    return 0;
+}
+
+/* Make sure that xen intercepts any FP accesses from current */
+static inline void vmx_stts(void)
+{
+    unsigned long cr0;
+    struct vcpu *v = current;
+
+    __vmread_vcpu(v, GUEST_CR0, &cr0);
+    if (!(cr0 & X86_CR0_TS)) {
+        __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS);
+    }
+
+    __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
+    if (!(cr0 & X86_CR0_TS))
+       __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
+}
+
+/* Works only for vcpu == current */
+static inline int vmx_paging_enabled(struct vcpu *v)
+{
+    unsigned long cr0;
+
+    __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
+    return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
+}
+
+static inline int __vmx_inject_exception(struct vcpu *v, int trap, int type, 
+                                         int error_code)
+{
+    unsigned long intr_fields;
+
+    /* Reflect it back into the guest */
+    intr_fields = (INTR_INFO_VALID_MASK | type | trap);
+    if (error_code != VMX_INVALID_ERROR_CODE) {
+        __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+        intr_fields |= INTR_INFO_DELIEVER_CODE_MASK;
+     }
+    
+    __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
+    return 0;
+}
+
+static inline int vmx_inject_exception(struct vcpu *v, int trap, int 
error_code)
+{
+    return __vmx_inject_exception(v, trap, INTR_TYPE_EXCEPTION, error_code);
+}
+
+static inline int vmx_inject_extint(struct vcpu *v, int trap, int error_code)
+{
+    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code);
+    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+
+    return 0;
+}
+
+static inline int vmx_reflect_exception(struct vcpu *v)
+{
+    int error_code, vector;
+
+    __vmread(VM_EXIT_INTR_INFO, &vector);
+    if (vector & INTR_INFO_DELIEVER_CODE_MASK)
+        __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
+    else
+        error_code = VMX_INVALID_ERROR_CODE;
+    vector &= 0xff;
+
+#ifndef NDEBUG
+    {
+        unsigned long eip;
+
+        __vmread(GUEST_RIP, &eip);
+        HVM_DBG_LOG(DBG_LEVEL_1,
+                    "vmx_reflect_exception: eip = %lx, error_code = %x",
+                    eip, error_code);
+    }
+#endif /* NDEBUG */
+
+    vmx_inject_exception(v, vector, error_code);
+    return 0;
+}
+
+#endif /* CONFIG_VMX */
+
+#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vpic.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vpic.h    Tue Jan 31 10:49:51 2006
@@ -0,0 +1,84 @@
+/*
+ * QEMU System Emulator header
+ * 
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2005 Intel Corp
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef __ASM_X86_HVM_VPIC_H__
+#define __ASM_X86_HVM_VPIC_H__
+
+#define hw_error(x)  do {} while (0);
+
+
+/* i8259.c */
+typedef struct IOAPICState IOAPICState;
+typedef struct PicState {
+    uint8_t last_irr; /* edge detection */
+    uint8_t irr; /* interrupt request register */
+    uint8_t imr; /* interrupt mask register */
+    uint8_t isr; /* interrupt service register */
+    uint8_t priority_add; /* highest irq priority */
+    uint8_t irq_base;
+    uint8_t read_reg_select;
+    uint8_t poll;
+    uint8_t special_mask;
+    uint8_t init_state;
+    uint8_t auto_eoi;
+    uint8_t rotate_on_auto_eoi;
+    uint8_t special_fully_nested_mode;
+    uint8_t init4; /* true if 4 byte init */
+    uint8_t elcr; /* PIIX edge/trigger selection*/
+    uint8_t elcr_mask;
+    struct hvm_virpic *pics_state;
+} PicState;
+
+struct hvm_virpic {
+    /* 0 is master pic, 1 is slave pic */
+    /* XXX: better separation between the two pics */
+    PicState pics[2];
+    void (*irq_request)(int *opaque, int level);
+    void *irq_request_opaque;
+    /* IOAPIC callback support */
+    void (*alt_irq_func)(void *opaque, int irq_num, int level);
+    void *alt_irq_opaque;
+};
+
+
+void pic_set_irq(struct hvm_virpic *s, int irq, int level);
+void pic_set_irq_new(void *opaque, int irq, int level);
+void pic_init(struct hvm_virpic *s, 
+              void (*irq_request)(),
+              void *irq_request_opaque);
+void pic_set_alt_irq_func(struct hvm_virpic *s, 
+                          void(*alt_irq_func)(),
+                          void *alt_irq_opaque);
+int pic_read_irq(struct hvm_virpic *s);
+void pic_update_irq(struct hvm_virpic *s);
+uint32_t pic_intack_read(struct hvm_virpic *s);
+void register_pic_io_hook (void);
+int cpu_get_pic_interrupt(struct vcpu *v, int *type);
+int is_pit_irq(struct vcpu *v, int irq, int type);
+int is_irq_enabled(struct vcpu *v, int irq);
+void do_pic_irqs (struct hvm_virpic *s, uint16_t irqs);
+void do_pic_irqs_clear (struct hvm_virpic *s, uint16_t irqs);
+
+#endif  /* __ASM_X86_HVM_VPIC_H__ */  
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/hvm/vpit.h
--- /dev/null   Mon Jan 30 17:51:35 2006
+++ b/xen/include/asm-x86/hvm/vpit.h    Tue Jan 31 10:49:51 2006
@@ -0,0 +1,74 @@
+/*
+ * vpit.h: Virtual PIT definitions
+ *
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ASM_X86_HVM_VPIT_H__
+#define __ASM_X86_HVM_VPIT_H__
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/time.h>
+#include <xen/errno.h>
+#include <xen/timer.h>
+#include <asm/hvm/vpic.h>
+
+#define PIT_FREQ 1193181
+
+#define LSByte          0
+#define MSByte          1
+#define LSByte_multiple 2
+#define MSByte_multiple 3
+
+struct hvm_virpit {
+    /* for simulation of counter 0 in mode 2 */
+    u64 period_cycles;          /* pit frequency in cpu cycles */
+    u64 inject_point;           /* the time inject virt intr */
+    u64 shift;                  /* save the value of offset - drift */
+    s_time_t scheduled;         /* scheduled timer interrupt */
+    struct timer pit_timer;     /* periodic timer for mode 2*/
+    unsigned int channel;       /* the pit channel, counter 0~2 */
+    unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
+    u32 period;                 /* pit frequency in ns */
+    int first_injected;         /* flag to prevent shadow window */
+
+    /* virtual PIT state for handle related I/O */
+    int read_state;
+    int count_LSB_latched;
+    int count_MSB_latched;
+
+    unsigned int count;  /* the 16 bit channel count */
+    unsigned int init_val; /* the init value for the counter */
+};
+
+static __inline__ s_time_t get_pit_scheduled(
+    struct vcpu *v,
+    struct hvm_virpit *vpit)
+{
+    if ( is_irq_enabled(v, 0) ) {
+        return vpit->scheduled;
+    }
+    else
+        return -1;
+}
+
+/* to hook the ioreq packet to get the PIT initialization info */
+extern void hvm_hooks_assist(struct vcpu *v);
+void pickup_deactive_ticks(struct hvm_virpit *vpit);
+
+#endif /* __ASM_X86_HVM_VPIT_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/examples/xmexample.vmx
--- a/tools/examples/xmexample.vmx      Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,154 +0,0 @@
-#  -*- mode: python; -*-
-#============================================================================
-# Python configuration setup for 'xm create'.
-# This script sets the parameters used when a domain is created using 'xm 
create'.
-# You use a separate script for each domain you want to create, or 
-# you can set the parameters for the domain on the xm command line.
-#============================================================================
-
-import os, re
-arch = os.uname()[4]
-if re.search('64', arch):
-    arch_libdir = 'lib64'
-else:
-    arch_libdir = 'lib'
-
-#----------------------------------------------------------------------------
-# Kernel image file.
-kernel = "/usr/lib/xen/boot/vmxloader"
-
-# The domain build function. VMX domain uses 'vmx'.
-builder='vmx'
-
-# Initial memory allocation (in megabytes) for the new domain.
-memory = 128
-
-# A name for your domain. All domains must have different names.
-name = "ExampleVMXDomain"
-
-#-----------------------------------------------------------------------------
-# the number of cpus guest platform has, default=1
-#vcpus=1
-
-# enable/disalbe vmx guest ACPI, default=0 (disabled)
-#acpi=0
-
-# enable/disalbe vmx guest APIC, default=0 (disabled)
-#apic=0
-
-# List of which CPUS this domain is allowed to use, default Xen picks
-#cpus = ""         # leave to Xen to pick
-#cpus = "0"        # all vcpus run on CPU0
-#cpus = "0-3,5,^1" # run on cpus 0,2,3,5
-
-# Optionally define mac and/or bridge for the network interfaces.
-# Random MACs are assigned if not given.
-#vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0' ]
-# type=ioemu specify the NIC is an ioemu device not netfront
-vif = [ 'type=ioemu, bridge=xenbr0' ]
-# for multiple NICs in device model, 3 in this example
-#vif = [ 'type=ioemu, bridge=xenbr0', 'type=ioemu', 'type=ioemu']
-
-#----------------------------------------------------------------------------
-# Define the disk devices you want the domain to have access to, and
-# what you want them accessible as.
-# Each disk entry is of the form phy:UNAME,DEV,MODE
-# where UNAME is the device, DEV is the device name the domain will see,
-# and MODE is r for read-only, w for read-write.
-
-#disk = [ 'phy:hda1,hda1,r' ]
-disk = [ 'file:/var/images/min-el3-i386.img,ioemu:hda,w' ]
-
-#----------------------------------------------------------------------------
-# Configure the behaviour when a domain exits.  There are three 'reasons'
-# for a domain to stop: poweroff, reboot, and crash.  For each of these you
-# may specify:
-#
-#   "destroy",        meaning that the domain is cleaned up as normal;
-#   "restart",        meaning that a new domain is started in place of the old
-#                     one;
-#   "preserve",       meaning that no clean-up is done until the domain is
-#                     manually destroyed (using xm destroy, for example); or
-#   "rename-restart", meaning that the old domain is not cleaned up, but is
-#                     renamed and a new domain started in its place.
-#
-# The default is
-#
-#   on_poweroff = 'destroy'
-#   on_reboot   = 'restart'
-#   on_crash    = 'restart'
-#
-# For backwards compatibility we also support the deprecated option restart
-#
-# restart = 'onreboot' means on_poweroff = 'destroy'
-#                            on_reboot   = 'restart'
-#                            on_crash    = 'destroy'
-#
-# restart = 'always'   means on_poweroff = 'restart'
-#                            on_reboot   = 'restart'
-#                            on_crash    = 'restart'
-#
-# restart = 'never'    means on_poweroff = 'destroy'
-#                            on_reboot   = 'destroy'
-#                            on_crash    = 'destroy'
-
-#on_poweroff = 'destroy'
-#on_reboot   = 'restart'
-#on_crash    = 'restart'
-
-#============================================================================
-
-# New stuff
-device_model = '/usr/' + arch_libdir + '/xen/bin/qemu-dm'
-
-#-----------------------------------------------------------------------------
-# Disk image for 
-#cdrom=
-
-#-----------------------------------------------------------------------------
-# boot on floppy (a), hard disk (c) or CD-ROM (d) 
-#boot=[a|c|d]
-#-----------------------------------------------------------------------------
-#  write to temporary files instead of disk image files
-#snapshot=1
-
-#----------------------------------------------------------------------------
-# enable SDL library for graphics, default = 0
-sdl=0
-
-#----------------------------------------------------------------------------
-# enable VNC library for graphics, default = 1
-vnc=1
-
-#----------------------------------------------------------------------------
-# enable spawning vncviewer(only valid when vnc=1), default = 1
-vncviewer=1
-
-#----------------------------------------------------------------------------
-# no graphics, use serial port
-#nographic=0
-
-
-#-----------------------------------------------------------------------------
-#   serial port re-direct to pty deivce, /dev/pts/n 
-#   then xm console or minicom can connect
-#serial='pty'
-
-#----------------------------------------------------------------------------
-# enable ne2000, default = 0(use pcnet)
-ne2000=0
-
-
-#-----------------------------------------------------------------------------
-#   enable audio support
-#audio=1
-
-
-#-----------------------------------------------------------------------------
-#    set the real time clock to local time [default=0 i.e. set to utc]
-#localtime=1
-
-
-#-----------------------------------------------------------------------------
-#    start in full screen
-#full-screen=1   
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/rombios/rombios.diffs
--- a/tools/firmware/rombios/rombios.diffs      Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,206 +0,0 @@
---- /home/leendert/cvs/bochs/bios/rombios.c    2005-05-23 12:18:11.000000000 
-0400
-+++ rombios.c  2005-06-01 23:46:45.000000000 -0400
-@@ -26,6 +26,7 @@
- 
- // ROM BIOS for use with Bochs/Plex x86 emulation environment
- 
-+#define VMXASSIST
- 
- // ROM BIOS compatability entry points:
- // ===================================
-@@ -170,7 +171,9 @@
- #define BASE_MEM_IN_K   (640 - EBDA_SIZE)
- 
-   // Define the application NAME
--#ifdef PLEX86
-+#ifdef VMXASSIST
-+#  define BX_APPNAME "VMXAssist"
-+#elif PLEX86
- #  define BX_APPNAME "Plex86"
- #else
- #  define BX_APPNAME "Bochs"
-@@ -314,7 +317,6 @@
-   ASM_END
-   }
-   
--#if 0 
-   // memcpy of count bytes
-     void 
-   memcpyb(dseg,doffset,sseg,soffset,count)
-@@ -362,6 +364,7 @@
-   ASM_END
-   }
- 
-+#if 0 
-   // memcpy of count dword
-     void 
-   memcpyd(dseg,doffset,sseg,soffset,count)
-@@ -858,6 +861,7 @@
- static void           write_byte();
- static void           write_word();
- static void           bios_printf();
-+static void           copy_e820_table();
- 
- static Bit8u          inhibit_mouse_int_and_events();
- static void           enable_mouse_int_and_events();
-@@ -1420,6 +1424,16 @@
- ASM_END
- }
- 
-+#ifdef VMXASSIST
-+void
-+copy_e820_table()
-+{
-+  Bit8u nr_entries = read_byte(0x9000, 0x1e8);
-+  write_word(0xe000, 0x8, nr_entries);
-+  memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
-+}
-+#endif /* VMXASSIST */
-+
- #if BX_DEBUG_SERIAL
- /* serial debug port*/
- #define BX_DEBUG_PORT 0x03f8
-@@ -1498,6 +1512,9 @@
-   if (c == '\n') uart_tx_byte(BX_DEBUG_PORT, '\r');
-   uart_tx_byte(BX_DEBUG_PORT, c);
- #endif
-+#ifdef VMXASSIST
-+  outb(0xE9, c);
-+#endif
- #if BX_VIRTUAL_PORTS
-   if (action & BIOS_PRINTF_DEBUG) outb(DEBUG_PORT, c);
-   if (action & BIOS_PRINTF_INFO) outb(INFO_PORT, c);
-@@ -4053,6 +4070,66 @@
-          case 0x20: // coded by osmaker aka K.J.
-             if(regs.u.r32.edx == 0x534D4150)
-             {
-+#ifdef VMXASSIST
-+              if ((regs.u.r16.bx / 0x14)* 0x14 == regs.u.r16.bx) {
-+                  Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
-+
-+                  if (regs.u.r16.bx + 0x14 <= e820_table_size) {
-+                      memcpyb(ES, regs.u.r16.di,
-+                              0xe000, 0x10 + regs.u.r16.bx, 0x14);
-+                  }
-+                  regs.u.r32.ebx += 0x14;
-+                  if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
-+                      regs.u.r32.ebx = 0;
-+                  regs.u.r32.eax = 0x534D4150;
-+                  regs.u.r32.ecx = 0x14;
-+                  CLEAR_CF();
-+                  return;
-+              } else if (regs.u.r16.bx == 1) {
-+                  extended_memory_size = inb_cmos(0x35);
-+                  extended_memory_size <<= 8;
-+                  extended_memory_size |= inb_cmos(0x34);
-+                  extended_memory_size *= 64;
-+                  if (extended_memory_size > 0x3bc000) // greater than 
EFF00000???
-+                  {
-+                      extended_memory_size = 0x3bc000; // everything after 
this is reserved memory until we get to 0x100000000
-+                  }
-+                  extended_memory_size *= 1024;
-+                  extended_memory_size += 15728640; // make up for the 16mb 
of memory that is chopped off
-+
-+                  if (extended_memory_size <= 15728640)
-+                  {
-+                      extended_memory_size = inb_cmos(0x31);
-+                      extended_memory_size <<= 8;
-+                      extended_memory_size |= inb_cmos(0x30);
-+                      extended_memory_size *= 1024;
-+                  }
-+
-+                  write_word(ES, regs.u.r16.di, 0x0000);
-+                  write_word(ES, regs.u.r16.di+2, 0x0010);
-+                  write_word(ES, regs.u.r16.di+4, 0x0000);
-+                  write_word(ES, regs.u.r16.di+6, 0x0000);
-+
-+                  write_word(ES, regs.u.r16.di+8, extended_memory_size);
-+                  extended_memory_size >>= 16;
-+                  write_word(ES, regs.u.r16.di+10, extended_memory_size);
-+                  extended_memory_size >>= 16;
-+                  write_word(ES, regs.u.r16.di+12, extended_memory_size);
-+                  extended_memory_size >>= 16;
-+                  write_word(ES, regs.u.r16.di+14, extended_memory_size);
-+
-+                  write_word(ES, regs.u.r16.di+16, 0x1);
-+                  write_word(ES, regs.u.r16.di+18, 0x0);
-+
-+                  regs.u.r32.ebx = 0;
-+                  regs.u.r32.eax = 0x534D4150;
-+                  regs.u.r32.ecx = 0x14;
-+                  CLEAR_CF();
-+                  return;
-+              } else { /* AX=E820, DX=534D4150, BX unrecognized */
-+                  goto int15_unimplemented;
-+              }
-+#else
-                 switch(regs.u.r16.bx)
-                 {
-                     case 0:
-@@ -4070,6 +4147,7 @@
-                         write_word(ES, regs.u.r16.di+18, 0x0);
- 
-                         regs.u.r32.ebx = 1;
-+
-                         regs.u.r32.eax = 0x534D4150;
-                         regs.u.r32.ecx = 0x14;
-                         CLEAR_CF();
-@@ -4121,6 +4199,7 @@
-                         goto int15_unimplemented;
-                         break;
-                 }
-+#endif
-           } else {
-             // if DX != 0x534D4150)
-             goto int15_unimplemented;
-@@ -9497,9 +9576,16 @@
-   ;; int 1C already points at dummy_iret_handler (above)
-   mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
-   out 0x43, al
-+#ifdef VMXASSIST
-+  mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
-+  out 0x40, al ; lsb
-+  mov al, #0xe9
-+  out 0x40, al ; msb
-+#else
-   mov al, #0x00 ; maximum count of 0000H = 18.2Hz
-   out 0x40, al
-   out 0x40, al
-+#endif
- 
-   ;; Keyboard
-   SET_INT_VECTOR(0x09, #0xF000, #int09_handler)
-@@ -9597,10 +9683,22 @@
-   mov al, #0x11 ; send initialisation commands
-   out 0x20, al
-   out 0xa0, al
-+#ifdef VMXASSIST
-+  ;; The vm86 emulator expects interrupts to be mapped beyond the reserved
-+  ;; vectors (0 through 31). Since rombios fully controls the hardware, we
-+  ;; map it the way the emulator needs it and expect that it will do the
-+  ;; proper 8086 interrupt translation (that is, master pic base is at 0x8
-+  ;; and slave pic base is at 0x70).
-+  mov al, #0x20
-+  out 0x21, al
-+  mov al, #0x28
-+  out 0xa1, al
-+#else
-   mov al, #0x08
-   out 0x21, al
-   mov al, #0x70
-   out 0xa1, al
-+#endif
-   mov al, #0x04
-   out 0x21, al
-   mov al, #0x02
-@@ -9617,6 +9715,10 @@
- #endif
-   out  0xa1, AL ;slave  pic: unmask IRQ 12, 13, 14
- 
-+#ifdef VMXASSIST
-+  call _copy_e820_table
-+#endif
-+
-   call pcibios_init
- 
-   call rom_scan
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/acpi_madt.c
--- a/tools/firmware/vmxassist/acpi_madt.c      Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,189 +0,0 @@
-/*
- * acpi_madt.c: Update ACPI MADT table for multiple processor guest.
- *
- * Yu Ke, ke.yu@xxxxxxxxx
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include "../acpi/acpi2_0.h"
-#include "../acpi/acpi_madt.h"
-
-#include <xen/hvm/hvm_info_table.h>
-
-#define NULL ((void*)0)
-
-extern int puts(const char *s);
-
-static struct hvm_info_table *table = NULL;
-
-static int validate_hvm_info(struct hvm_info_table *t)
-{
-       char signature[] = "HVM INFO";
-       uint8_t *ptr = (uint8_t *)t;
-       uint8_t sum = 0;
-       int i;
-
-       /* strncmp(t->signature, "HVM INFO", 8) */
-       for (i = 0; i < 8; i++) {
-               if (signature[i] != t->signature[i]) {
-                       puts("Bad hvm info signature\n");
-                       return 0;
-               }
-       }
-
-       for (i = 0; i < t->length; i++)
-               sum += ptr[i];
-
-       return (sum == 0);
-}
-
-/* xc_vmx_builder wrote hvm info at 0x9F800. Return it. */
-static struct hvm_info_table *
-get_hvm_info_table(void)
-{
-       struct hvm_info_table *t;
-
-       if (table != NULL)
-               return table;
-
-       t = (struct hvm_info_table *)HVM_INFO_PADDR;
-
-       if (!validate_hvm_info(t)) {
-               puts("Bad hvm info table\n");
-               return NULL;
-       }
-
-       table = t;
-
-       return table;
-}
-
-int
-get_vcpu_nr(void)
-{
-       struct hvm_info_table *t = get_hvm_info_table();
-       return (t ? t->nr_vcpus : 1); /* default 1 vcpu */
-}
-
-int
-get_acpi_enabled(void)
-{
-       struct hvm_info_table *t = get_hvm_info_table();
-       return (t ? t->acpi_enabled : 0); /* default no acpi */
-}
-
-static void *
-acpi_madt_get_madt(unsigned char *acpi_start)
-{
-       ACPI_2_0_RSDP *rsdp=NULL;
-       ACPI_2_0_RSDT *rsdt=NULL;
-       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
-
-       rsdp = (ACPI_2_0_RSDP *)(acpi_start + sizeof(ACPI_2_0_FACS));
-       if (rsdp->Signature != ACPI_2_0_RSDP_SIGNATURE) {
-               puts("Bad RSDP signature\n");
-               return NULL;
-       }
-
-       rsdt= (ACPI_2_0_RSDT *)
-               (acpi_start + rsdp->RsdtAddress - ACPI_PHYSICAL_ADDRESS);
-       if (rsdt->Header.Signature != ACPI_2_0_RSDT_SIGNATURE) {
-               puts("Bad RSDT signature\n");
-               return NULL;
-       }
-
-       madt = (ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *)
-               ( acpi_start+ rsdt->Entry[1] - ACPI_PHYSICAL_ADDRESS);
-       if (madt->Header.Header.Signature !=
-           ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE_SIGNATURE) {
-               puts("Bad MADT signature \n");
-               return NULL;
-       }
-
-       return madt;
-}
-
-static void
-set_checksum(void *start, int checksum_offset, int len)
-{
-       unsigned char sum = 0;
-       unsigned char *ptr;
-
-       ptr = start;
-       ptr[checksum_offset] = 0;
-       while (len--)
-               sum += *ptr++;
-
-       ptr = start;
-       ptr[checksum_offset] = -sum;
-}
-
-static int
-acpi_madt_set_local_apics(
-       int nr_vcpu,
-       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
-{
-       int i;
-
-       if ((nr_vcpu > MAX_VIRT_CPUS) || (nr_vcpu < 0) || !madt)
-               return -1;
-
-       for (i = 0; i < nr_vcpu; i++) {
-               madt->LocalApic[i].Type            = ACPI_PROCESSOR_LOCAL_APIC;
-               madt->LocalApic[i].Length          = sizeof 
(ACPI_LOCAL_APIC_STRUCTURE);
-               madt->LocalApic[i].AcpiProcessorId = i;
-               madt->LocalApic[i].ApicId          = i;
-               madt->LocalApic[i].Flags           = 1;
-       }
-
-       madt->Header.Header.Length =
-               sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) -
-               (MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
-
-       return 0;
-}
-
-#define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
-
-int acpi_madt_update(unsigned char *acpi_start)
-{
-       int rc;
-       ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
-
-       madt = acpi_madt_get_madt(acpi_start);
-       if (!madt)
-               return -1;
-
-       rc = acpi_madt_set_local_apics(get_vcpu_nr(), madt);
-       if (rc != 0)
-               return rc;
-
-       set_checksum(
-               madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
-               madt->Header.Header.Length);
-
-       return 0;
-}
-
-/*
- * Local variables:
- *  c-file-style: "linux"
- *  indent-tabs-mode: t
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/mkhex
--- a/tools/firmware/vmxassist/mkhex    Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,26 +0,0 @@
-#!/bin/sh
-
-#
-# mkhex: Generate C embeddable hexdumps
-#
-# Leendert van Doorn, leendert@xxxxxxxxxxxxxx
-# Copyright (c) 2005, International Business Machines Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-# Place - Suite 330, Boston, MA 02111-1307 USA.
-#
-
-echo "unsigned $1[] = {"
-od -v -t x $2 | sed 's/^[0-9]* /0x/' | sed 's/ /, 0x/g' | sed 's/$/,/'
-echo "};"
-
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/firmware/vmxassist/vmxloader.c
--- a/tools/firmware/vmxassist/vmxloader.c      Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,140 +0,0 @@
-/*
- * vmxloader.c: ROM/VMXAssist image loader.
- *
- * A quicky so that we can boot rom images as if they were a Linux kernel.
- * This code will copy the rom images (ROMBIOS/VGABIOS/VM86) into their
- * respective spaces and transfer control to VM86 to execute the BIOSes.
- *
- * Leendert van Doorn, leendert@xxxxxxxxxxxxxx
- * Copyright (c) 2005, International Business Machines Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#include "machine.h"
-#include "roms.h"
-
-#include "acpi.h"
-#include "../acpi/acpi2_0.h"  // for ACPI_PHYSICAL_ADDRESS
-int acpi_madt_update(unsigned char* acpi_start);
-int get_acpi_enabled(void);
-
-/*
- * C runtime start off
- */
-asm(
-"      .text                           \n"
-"      .globl  _start                  \n"
-"_start:                               \n"
-"      cld                             \n"
-"      cli                             \n"
-"      lgdt    gdt_desr                \n"
-"      movl    $stack_top, %esp        \n"
-"      movl    %esp, %ebp              \n"
-"      call    main                    \n"
-"      jmp     halt                    \n"
-"                                      \n"
-"gdt_desr:                             \n"
-"      .word   gdt_end - gdt - 1       \n"
-"      .long   gdt                     \n"
-"                                      \n"
-"      .align  8                       \n"
-"gdt:                                  \n"
-"      .quad   0x0000000000000000      \n"
-"      .quad   0x00CF92000000FFFF      \n"
-"      .quad   0x00CF9A000000FFFF      \n"
-"gdt_end:                              \n"
-"                                      \n"
-"halt:                                 \n"
-"      sti                             \n"
-"      jmp     .                       \n"
-"                                      \n"
-"      .bss                            \n"
-"      .align  8                       \n"
-"stack:                                        \n"
-"      .skip   0x4000                  \n"
-"stack_top:                            \n"
-);
-
-void *
-memcpy(void *dest, const void *src, unsigned n)
-{
-       int t0, t1, t2;
-
-       __asm__ __volatile__(
-               "cld\n"
-               "rep; movsl\n"
-               "testb $2,%b4\n"
-               "je 1f\n"
-               "movsw\n"
-               "1: testb $1,%b4\n"
-               "je 2f\n"
-               "movsb\n"
-               "2:"
-               : "=&c" (t0), "=&D" (t1), "=&S" (t2)
-               : "0" (n/4), "q" (n), "1" ((long) dest), "2" ((long) src)
-               : "memory"
-       );
-       return dest;
-}
-
-int
-puts(const char *s)
-{
-       while (*s)
-               outb(0xE9, *s++);
-       return 0;
-}
-
-int
-cirrus_check(void)
-{
-       outw(0x3C4, 0x9206);
-       return inb(0x3C5) == 0x12;
-}
-
-int
-main(void)
-{
-       puts("VMXAssist Loader\n");
-       puts("Loading ROMBIOS ...\n");
-       memcpy((void *)0xF0000, rombios, sizeof(rombios));
-       if (cirrus_check()) {
-               puts("Loading Cirrus VGABIOS ...\n");
-               memcpy((void *)0xC0000,
-                       vgabios_cirrusvga, sizeof(vgabios_cirrusvga));
-       } else {
-               puts("Loading Standard VGABIOS ...\n");
-               memcpy((void *)0xC0000,
-                       vgabios_stdvga, sizeof(vgabios_stdvga));
-       }
-
-       if (get_acpi_enabled() != 0) {
-               puts("Loading ACPI ...\n");
-               acpi_madt_update((unsigned char*)acpi);
-               if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
-                       /* make sure acpi table does not overlap rombios
-                        * currently acpi less than 8K will be OK.
-                        */
-                       memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, 
sizeof(acpi));
-               }
-       }
-
-       puts("Loading VMXAssist ...\n");
-       memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist));
-
-       puts("Go ...\n");
-       asm volatile ( "jmp *%%eax" : : "a" (TEXTADDR), "d" (0) );
-
-       return 0;
-}
diff -r e4eb12a6e003 -r f1b361b05bf3 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,848 +0,0 @@
-/******************************************************************************
- * xc_vmx_build.c
- */
-
-#include <stddef.h>
-#include "xg_private.h"
-#define ELFSIZE 32
-#include "xc_elf.h"
-#include <stdlib.h>
-#include <unistd.h>
-#include <zlib.h>
-#include <xen/hvm/hvm_info_table.h>
-#include <xen/hvm/ioreq.h>
-
-#define VMX_LOADER_ENTR_ADDR  0x00100000
-
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#ifdef __x86_64__
-#define L3_PROT (_PAGE_PRESENT)
-#endif
-
-#define E820MAX        128
-
-#define E820_RAM          1
-#define E820_RESERVED     2
-#define E820_ACPI         3
-#define E820_NVS          4
-#define E820_IO          16
-#define E820_SHARED_PAGE 17
-#define E820_XENSTORE    18
-
-#define E820_MAP_PAGE       0x00090000
-#define E820_MAP_NR_OFFSET  0x000001E8
-#define E820_MAP_OFFSET     0x000002D0
-
-struct e820entry {
-    uint64_t addr;
-    uint64_t size;
-    uint32_t type;
-} __attribute__((packed));
-
-#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-#define round_pgdown(_p)  ((_p)&PAGE_MASK)
-
-static int
-parseelfimage(
-    char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
-static int
-loadelfimage(
-    char *elfbase, int xch, uint32_t dom, unsigned long *parray,
-    struct domain_setup_info *dsi);
-
-static unsigned char build_e820map(void *e820_page, unsigned long mem_size)
-{
-    struct e820entry *e820entry =
-        (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
-    unsigned char nr_map = 0;
-
-    /* XXX: Doesn't work for > 4GB yet */
-    e820entry[nr_map].addr = 0x0;
-    e820entry[nr_map].size = 0x9F800;
-    e820entry[nr_map].type = E820_RAM;
-    nr_map++;
-
-    e820entry[nr_map].addr = 0x9F800;
-    e820entry[nr_map].size = 0x800;
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-    e820entry[nr_map].addr = 0xA0000;
-    e820entry[nr_map].size = 0x20000;
-    e820entry[nr_map].type = E820_IO;
-    nr_map++;
-
-    e820entry[nr_map].addr = 0xF0000;
-    e820entry[nr_map].size = 0x10000;
-    e820entry[nr_map].type = E820_RESERVED;
-    nr_map++;
-
-#define STATIC_PAGES    2       /* for ioreq_t and store_mfn */
-    /* Most of the ram goes here */
-    e820entry[nr_map].addr = 0x100000;
-    e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES*PAGE_SIZE;
-    e820entry[nr_map].type = E820_RAM;
-    nr_map++;
-
-    /* Statically allocated special pages */
-
-    /* Shared ioreq_t page */
-    e820entry[nr_map].addr = mem_size - PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
-    e820entry[nr_map].type = E820_SHARED_PAGE;
-    nr_map++;
-
-    /* For xenstore */
-    e820entry[nr_map].addr = mem_size - 2*PAGE_SIZE;
-    e820entry[nr_map].size = PAGE_SIZE;
-    e820entry[nr_map].type = E820_XENSTORE;
-    nr_map++;
-
-    e820entry[nr_map].addr = mem_size;
-    e820entry[nr_map].size = 0x3 * PAGE_SIZE;
-    e820entry[nr_map].type = E820_NVS;
-    nr_map++;
-
-    e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
-    e820entry[nr_map].size = 0xA * PAGE_SIZE;
-    e820entry[nr_map].type = E820_ACPI;
-    nr_map++;
-
-    e820entry[nr_map].addr = 0xFEC00000;
-    e820entry[nr_map].size = 0x1400000;
-    e820entry[nr_map].type = E820_IO;
-    nr_map++;
-
-    return (*(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map);
-}
-
-static void
-set_hvm_info_checksum(struct hvm_info_table *t)
-{
-    uint8_t *ptr = (uint8_t *)t, sum = 0;
-    unsigned int i;
-
-    t->checksum = 0;
-
-    for (i = 0; i < t->length; i++)
-        sum += *ptr++;
-
-    t->checksum = -sum;
-}
-
-/*
- * Use E820 reserved memory 0x9F800 to pass HVM info to vmxloader
- * vmxloader will use this info to set BIOS accordingly
- */
-static int set_hvm_info(int xc_handle, uint32_t dom,
-                        unsigned long *pfn_list, unsigned int vcpus,
-                        unsigned int acpi, unsigned int apic)
-{
-    char *va_map;
-    struct hvm_info_table *va_hvm;
-
-    va_map = xc_map_foreign_range(
-        xc_handle,
-        dom,
-        PAGE_SIZE,
-        PROT_READ|PROT_WRITE,
-        pfn_list[HVM_INFO_PFN]);
-    
-    if ( va_map == NULL )
-        return -1;
-
-    va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
-    memset(va_hvm, 0, sizeof(*va_hvm));
-    strncpy(va_hvm->signature, "HVM INFO", 8);
-    va_hvm->length       = sizeof(struct hvm_info_table);
-    va_hvm->acpi_enabled = acpi;
-    va_hvm->apic_enabled = apic;
-    va_hvm->nr_vcpus     = vcpus;
-
-    set_hvm_info_checksum(va_hvm);
-
-    munmap(va_map, PAGE_SIZE);
-
-    return 0;
-}
-
-#ifdef __i386__
-static int zap_mmio_range(int xc_handle, uint32_t dom,
-                          l2_pgentry_32_t *vl2tab,
-                          unsigned long mmio_range_start,
-                          unsigned long mmio_range_size)
-{
-    unsigned long mmio_addr;
-    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
-    unsigned long vl2e;
-    l1_pgentry_32_t *vl1tab;
-
-    mmio_addr = mmio_range_start & PAGE_MASK;
-    for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
-        vl2e = vl2tab[l2_table_offset(mmio_addr)];
-        if (vl2e == 0)
-            continue;
-        vl1tab = xc_map_foreign_range(
-            xc_handle, dom, PAGE_SIZE,
-            PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
-        if ( vl1tab == 0 )
-        {
-            PERROR("Failed zap MMIO range");
-            return -1;
-        }
-        vl1tab[l1_table_offset(mmio_addr)] = 0;
-        munmap(vl1tab, PAGE_SIZE);
-    }
-    return 0;
-}
-
-static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l2tab,
-                           unsigned char e820_map_nr, unsigned char *e820map)
-{
-    unsigned int i;
-    struct e820entry *e820entry = (struct e820entry *)e820map;
-
-    l2_pgentry_32_t *vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                   PROT_READ|PROT_WRITE,
-                                                   l2tab >> PAGE_SHIFT);
-    if ( vl2tab == 0 )
-        return -1;
-
-    for ( i = 0; i < e820_map_nr; i++ )
-    {
-        if ( (e820entry[i].type == E820_IO) &&
-             (zap_mmio_range(xc_handle, dom, vl2tab,
-                             e820entry[i].addr, e820entry[i].size) == -1))
-            return -1;
-    }
-
-    munmap(vl2tab, PAGE_SIZE);
-    return 0;
-}
-#else
-static int zap_mmio_range(int xc_handle, uint32_t dom,
-                          l3_pgentry_t *vl3tab,
-                          unsigned long mmio_range_start,
-                          unsigned long mmio_range_size)
-{
-    unsigned long mmio_addr;
-    unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
-    unsigned long vl2e = 0;
-    unsigned long vl3e;
-    l1_pgentry_t *vl1tab;
-    l2_pgentry_t *vl2tab;
-
-    mmio_addr = mmio_range_start & PAGE_MASK;
-    for ( ; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE )
-    {
-        vl3e = vl3tab[l3_table_offset(mmio_addr)];
-        if ( vl3e == 0 )
-            continue;
-
-        vl2tab = xc_map_foreign_range(
-            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl3e>>PAGE_SHIFT);
-        if ( vl2tab == NULL )
-        {
-            PERROR("Failed zap MMIO range");
-            return -1;
-        }
-
-        vl2e = vl2tab[l2_table_offset(mmio_addr)];
-        if ( vl2e == 0 )
-        {
-            munmap(vl2tab, PAGE_SIZE);
-            continue;
-        }
-
-        vl1tab = xc_map_foreign_range(
-            xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, vl2e>>PAGE_SHIFT);
-        if ( vl1tab == NULL )
-        {
-            PERROR("Failed zap MMIO range");
-            munmap(vl2tab, PAGE_SIZE);
-            return -1;
-        }
-
-        vl1tab[l1_table_offset(mmio_addr)] = 0;
-        munmap(vl2tab, PAGE_SIZE);
-        munmap(vl1tab, PAGE_SIZE);
-    }
-    return 0;
-}
-
-static int zap_mmio_ranges(int xc_handle, uint32_t dom, unsigned long l3tab,
-                           unsigned char e820_map_nr, unsigned char *e820map)
-{
-    unsigned int i;
-    struct e820entry *e820entry = (struct e820entry *)e820map;
-
-    l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l3tab >> PAGE_SHIFT);
-    if (vl3tab == 0)
-        return -1;
-    for ( i = 0; i < e820_map_nr; i++ ) {
-        if ( (e820entry[i].type == E820_IO) &&
-             (zap_mmio_range(xc_handle, dom, vl3tab,
-                             e820entry[i].addr, e820entry[i].size) == -1) )
-            return -1;
-    }
-    munmap(vl3tab, PAGE_SIZE);
-    return 0;
-}
-
-#endif
-
-static int setup_guest(int xc_handle,
-                       uint32_t dom, int memsize,
-                       char *image, unsigned long image_size,
-                       unsigned long nr_pages,
-                       vcpu_guest_context_t *ctxt,
-                       unsigned long shared_info_frame,
-                       unsigned int control_evtchn,
-                       unsigned int vcpus,
-                       unsigned int acpi,
-                       unsigned int apic,
-                       unsigned int store_evtchn,
-                       unsigned long *store_mfn)
-{
-    l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
-    l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
-    unsigned long *page_array = NULL;
-#ifdef __x86_64__
-    l3_pgentry_t *vl3tab=NULL;
-    unsigned long l3tab;
-#endif
-    unsigned long l2tab = 0;
-    unsigned long l1tab = 0;
-    unsigned long count, i;
-    shared_info_t *shared_info;
-    void *e820_page;
-    unsigned char e820_map_nr;
-    xc_mmu_t *mmu = NULL;
-    int rc;
-
-    unsigned long nr_pt_pages;
-    unsigned long ppt_alloc;
-
-    struct domain_setup_info dsi;
-    unsigned long vpt_start;
-    unsigned long vpt_end;
-    unsigned long v_end;
-
-    unsigned long shared_page_frame = 0;
-    shared_iopage_t *sp;
-
-    memset(&dsi, 0, sizeof(struct domain_setup_info));
-
-    if ( (rc = parseelfimage(image, image_size, &dsi)) != 0 )
-        goto error_out;
-
-    if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
-    {
-        PERROR("Guest OS must load to a page boundary.\n");
-        goto error_out;
-    }
-
-    /* memsize is in megabytes */
-    v_end              = (unsigned long)memsize << 20;
-
-#ifdef __i386__
-    nr_pt_pages = 1 + ((memsize + 3) >> 2);
-#else
-    nr_pt_pages = 5 + ((memsize + 1) >> 1);
-#endif
-    vpt_start   = v_end;
-    vpt_end     = vpt_start + (nr_pt_pages * PAGE_SIZE);
-
-    printf("VIRTUAL MEMORY ARRANGEMENT:\n"
-           " Loaded VMX loader: %08lx->%08lx\n"
-           " Page tables:   %08lx->%08lx\n"
-           " TOTAL:         %08lx->%08lx\n",
-           dsi.v_kernstart, dsi.v_kernend,
-           vpt_start, vpt_end,
-           dsi.v_start, v_end);
-    printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
-
-    if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
-    {
-        ERROR("Initial guest OS requires too much space\n"
-               "(%luMB is greater than %luMB limit)\n",
-               (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
-        goto error_out;
-    }
-
-    if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
-    {
-        PERROR("Could not allocate memory");
-        goto error_out;
-    }
-
-    if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
-    {
-        PERROR("Could not get the page frame list");
-        goto error_out;
-    }
-
-    loadelfimage(image, xc_handle, dom, page_array, &dsi);
-
-    if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
-        goto error_out;
-
-    /* First allocate page for page dir or pdpt */
-    ppt_alloc = vpt_start >> PAGE_SHIFT;
-    if ( page_array[ppt_alloc] > 0xfffff )
-    {
-        unsigned long nmfn;
-        nmfn = xc_make_page_below_4G( xc_handle, dom, page_array[ppt_alloc] );
-        if ( nmfn == 0 )
-        {
-            fprintf(stderr, "Couldn't get a page below 4GB :-(\n");
-            goto error_out;
-        }
-        page_array[ppt_alloc] = nmfn;
-    }
-
-#ifdef __i386__
-    l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-    ctxt->ctrlreg[3] = l2tab;
-
-    if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                        PROT_READ|PROT_WRITE,
-                                        l2tab >> PAGE_SHIFT)) == NULL )
-        goto error_out;
-    memset(vl2tab, 0, PAGE_SIZE);
-    vl2e = &vl2tab[l2_table_offset(0)];
-    for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
-    {
-        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
-        {
-            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-            if ( vl1tab != NULL )
-                munmap(vl1tab, PAGE_SIZE);
-            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l1tab >> PAGE_SHIFT)) == NULL )
-            {
-                munmap(vl2tab, PAGE_SIZE);
-                goto error_out;
-            }
-            memset(vl1tab, 0, PAGE_SIZE);
-            vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
-            *vl2e++ = l1tab | L2_PROT;
-        }
-
-        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
-        vl1e++;
-    }
-    munmap(vl1tab, PAGE_SIZE);
-    munmap(vl2tab, PAGE_SIZE);
-#else
-    l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-    ctxt->ctrlreg[3] = l3tab;
-
-    if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                        PROT_READ|PROT_WRITE,
-                                        l3tab >> PAGE_SHIFT)) == NULL )
-        goto error_out;
-    memset(vl3tab, 0, PAGE_SIZE);
-
-    /* Fill in every PDPT entry. */
-    for ( i = 0; i < L3_PAGETABLE_ENTRIES_PAE; i++ )
-    {
-        l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-        if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                            PROT_READ|PROT_WRITE,
-                                            l2tab >> PAGE_SHIFT)) == NULL )
-            goto error_out;
-        memset(vl2tab, 0, PAGE_SIZE);
-        munmap(vl2tab, PAGE_SIZE);
-        vl2tab = NULL;
-        vl3tab[i] = l2tab | L3_PROT;
-    }
-
-    for ( count = 0; count < (v_end >> PAGE_SHIFT); count++ )
-    {
-        if ( !(count & ((1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)) - 1)) 
)
-        {
-            l2tab = vl3tab[count >> (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]
-                    & PAGE_MASK;
-
-            if (vl2tab != NULL)
-                munmap(vl2tab, PAGE_SIZE);
-
-            if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l2tab >> PAGE_SHIFT)) == NULL )
-                goto error_out;
-
-            vl2e = &vl2tab[l2_table_offset(count << PAGE_SHIFT)];
-        }
-        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
-        {
-            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
-            if ( vl1tab != NULL )
-                munmap(vl1tab, PAGE_SIZE);
-            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
-                                                PROT_READ|PROT_WRITE,
-                                                l1tab >> PAGE_SHIFT)) == NULL )
-            {
-                munmap(vl2tab, PAGE_SIZE);
-                goto error_out;
-            }
-            memset(vl1tab, 0, PAGE_SIZE);
-            vl1e = &vl1tab[l1_table_offset(count << PAGE_SHIFT)];
-            *vl2e++ = l1tab | L2_PROT;
-        }
-
-        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
-        vl1e++;
-    }
-
-    munmap(vl1tab, PAGE_SIZE);
-    munmap(vl2tab, PAGE_SIZE);
-    munmap(vl3tab, PAGE_SIZE);
-#endif
-    /* Write the machine->phys table entries. */
-    for ( count = 0; count < nr_pages; count++ )
-    {
-        if ( xc_add_mmu_update(xc_handle, mmu,
-                               (page_array[count] << PAGE_SHIFT) |
-                               MMU_MACHPHYS_UPDATE, count) )
-            goto error_out;
-    }
-
-    if ( set_hvm_info(xc_handle, dom, page_array, vcpus, acpi, apic) ) {
-        fprintf(stderr, "Couldn't set hvm info for VMX guest.\n");
-        goto error_out;
-    }
-
-    if ( (e820_page = xc_map_foreign_range(
-         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-         page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
-        goto error_out;
-    memset(e820_page, 0, PAGE_SIZE);
-    e820_map_nr = build_e820map(e820_page, v_end);
-#if defined (__i386__)
-    if (zap_mmio_ranges(xc_handle, dom, l2tab, e820_map_nr,
-                        ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
-#else
-    if (zap_mmio_ranges(xc_handle, dom, l3tab, e820_map_nr,
-                        ((unsigned char *)e820_page) + E820_MAP_OFFSET) == -1)
-#endif
-        goto error_out;
-    munmap(e820_page, PAGE_SIZE);
-
-    /* shared_info page starts its life empty. */
-    if ( (shared_info = xc_map_foreign_range(
-         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-         shared_info_frame)) == 0 )
-        goto error_out;
-    memset(shared_info, 0, sizeof(shared_info_t));
-    /* Mask all upcalls... */
-    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
-        shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
-    munmap(shared_info, PAGE_SIZE);
-
-    /* Populate the event channel port in the shared page */
-    shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
-    if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
-         xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
-         shared_page_frame)) == 0 )
-        goto error_out;
-    memset(sp, 0, PAGE_SIZE);
-    sp->sp_global.eport = control_evtchn;
-    munmap(sp, PAGE_SIZE);
-
-    *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
-    if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
-        goto error_out;
-
-    /* Send the page update requests down to the hypervisor. */
-    if ( xc_finish_mmu_updates(xc_handle, mmu) )
-        goto error_out;
-
-    free(mmu);
-    free(page_array);
-
-    /*
-     * Initial register values:
-     */
-    ctxt->user_regs.ds = 0;
-    ctxt->user_regs.es = 0;
-    ctxt->user_regs.fs = 0;
-    ctxt->user_regs.gs = 0;
-    ctxt->user_regs.ss = 0;
-    ctxt->user_regs.cs = 0;
-    ctxt->user_regs.eip = dsi.v_kernentry;
-    ctxt->user_regs.edx = 0;
-    ctxt->user_regs.eax = 0;
-    ctxt->user_regs.esp = 0;
-    ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot 
cpu */
-    ctxt->user_regs.ecx = 0;
-    ctxt->user_regs.esi = 0;
-    ctxt->user_regs.edi = 0;
-    ctxt->user_regs.ebp = 0;
-
-    ctxt->user_regs.eflags = 0;
-
-    return 0;
-
- error_out:
-    free(mmu);
-    free(page_array);
-    return -1;
-}
-
-int xc_vmx_build(int xc_handle,
-                 uint32_t domid,
-                 int memsize,
-                 const char *image_name,
-                 unsigned int control_evtchn,
-                 unsigned int vcpus,
-                 unsigned int acpi,
-                 unsigned int apic,
-                 unsigned int store_evtchn,
-                 unsigned long *store_mfn)
-{
-    dom0_op_t launch_op, op;
-    int rc, i;
-    vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
-    unsigned long nr_pages;
-    char         *image = NULL;
-    unsigned long image_size;
-    xen_capabilities_info_t xen_caps;
-
-    if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
-    {
-        PERROR("Failed to get xen version info");
-        goto error_out;
-    }
-
-    if ( !strstr(xen_caps, "hvm") )
-    {
-        PERROR("CPU doesn't support VMX Extensions or "
-               "CPU VMX Extensions are not turned on");
-        goto error_out;
-    }
-
-    if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
-    {
-        PERROR("Could not find total pages for domain");
-        goto error_out;
-    }
-
-    if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
-        goto error_out;
-
-    if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
-    {
-        PERROR("%s: ctxt mlock failed", __func__);
-        return 1;
-    }
-
-    op.cmd = DOM0_GETDOMAININFO;
-    op.u.getdomaininfo.domain = (domid_t)domid;
-    if ( (xc_dom0_op(xc_handle, &op) < 0) ||
-         ((uint16_t)op.u.getdomaininfo.domain != domid) )
-    {
-        PERROR("Could not get info on domain");
-        goto error_out;
-    }
-
-    memset(ctxt, 0, sizeof(*ctxt));
-
-    if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
-                     ctxt, op.u.getdomaininfo.shared_info_frame, 
control_evtchn,
-                     vcpus, acpi, apic, store_evtchn, store_mfn) < 0)
-    {
-        ERROR("Error constructing guest OS");
-        goto error_out;
-    }
-
-    free(image);
-
-    ctxt->flags = VGCF_VMX_GUEST;
-    /* FPU is set up to default initial state. */
-    memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
-
-    /* Virtual IDT is empty at start-of-day. */
-    for ( i = 0; i < 256; i++ )
-    {
-        ctxt->trap_ctxt[i].vector = i;
-        ctxt->trap_ctxt[i].cs     = FLAT_KERNEL_CS;
-    }
-
-    /* No LDT. */
-    ctxt->ldt_ents = 0;
-
-    /* Use the default Xen-provided GDT. */
-    ctxt->gdt_ents = 0;
-
-    /* No debugging. */
-    memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
-
-    /* No callback handlers. */
-#if defined(__i386__)
-    ctxt->event_callback_cs     = FLAT_KERNEL_CS;
-    ctxt->event_callback_eip    = 0;
-    ctxt->failsafe_callback_cs  = FLAT_KERNEL_CS;
-    ctxt->failsafe_callback_eip = 0;
-#elif defined(__x86_64__)
-    ctxt->event_callback_eip    = 0;
-    ctxt->failsafe_callback_eip = 0;
-    ctxt->syscall_callback_eip  = 0;
-#endif
-
-    memset( &launch_op, 0, sizeof(launch_op) );
-
-    launch_op.u.setvcpucontext.domain = (domid_t)domid;
-    launch_op.u.setvcpucontext.vcpu   = 0;
-    launch_op.u.setvcpucontext.ctxt   = ctxt;
-
-    launch_op.cmd = DOM0_SETVCPUCONTEXT;
-    rc = xc_dom0_op(xc_handle, &launch_op);
-
-    return rc;
-
- error_out:
-    free(image);
-    return -1;
-}
-
-static inline int is_loadable_phdr(Elf32_Phdr *phdr)
-{
-    return ((phdr->p_type == PT_LOAD) &&
-            ((phdr->p_flags & (PF_W|PF_X)) != 0));
-}
-
-static int parseelfimage(char *elfbase,
-                         unsigned long elfsize,
-                         struct domain_setup_info *dsi)
-{
-    Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
-    Elf32_Phdr *phdr;
-    Elf32_Shdr *shdr;
-    unsigned long kernstart = ~0UL, kernend=0UL;
-    char *shstrtab;
-    int h;
-
-    if ( !IS_ELF(*ehdr) )
-    {
-        ERROR("Kernel image does not have an ELF header.");
-        return -EINVAL;
-    }
-
-    if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
-    {
-        ERROR("ELF program headers extend beyond end of image.");
-        return -EINVAL;
-    }
-
-    if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
-    {
-        ERROR("ELF section headers extend beyond end of image.");
-        return -EINVAL;
-    }
-
-    /* Find the section-header strings table. */
-    if ( ehdr->e_shstrndx == SHN_UNDEF )
-    {
-        ERROR("ELF image has no section-header strings table (shstrtab).");
-        return -EINVAL;
-    }
-    shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
-                          (ehdr->e_shstrndx*ehdr->e_shentsize));
-    shstrtab = elfbase + shdr->sh_offset;
-
-    for ( h = 0; h < ehdr->e_phnum; h++ )
-    {
-        phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
-        if ( !is_loadable_phdr(phdr) )
-            continue;
-        if ( phdr->p_paddr < kernstart )
-            kernstart = phdr->p_paddr;
-        if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
-            kernend = phdr->p_paddr + phdr->p_memsz;
-    }
-
-    if ( (kernstart > kernend) ||
-         (ehdr->e_entry < kernstart) ||
-         (ehdr->e_entry > kernend) )
-    {
-        ERROR("Malformed ELF image.");
-        return -EINVAL;
-    }
-
-    dsi->v_start = 0x00000000;
-
-    dsi->v_kernstart = kernstart;
-    dsi->v_kernend   = kernend;
-    dsi->v_kernentry = VMX_LOADER_ENTR_ADDR;
-
-    dsi->v_end       = dsi->v_kernend;
-
-    return 0;
-}
-
-static int
-loadelfimage(
-    char *elfbase, int xch, uint32_t dom, unsigned long *parray,
-    struct domain_setup_info *dsi)
-{
-    Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
-    Elf32_Phdr *phdr;
-    int h;
-
-    char         *va;
-    unsigned long pa, done, chunksz;
-
-    for ( h = 0; h < ehdr->e_phnum; h++ )
-    {
-        phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
-        if ( !is_loadable_phdr(phdr) )
-            continue;
-
-        for ( done = 0; done < phdr->p_filesz; done += chunksz )
-        {
-            pa = (phdr->p_paddr + done) - dsi->v_start;
-            if ((va = xc_map_foreign_range(
-                xch, dom, PAGE_SIZE, PROT_WRITE,
-                parray[pa >> PAGE_SHIFT])) == 0)
-                return -1;
-            chunksz = phdr->p_filesz - done;
-            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
-                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
-            memcpy(va + (pa & (PAGE_SIZE-1)),
-                   elfbase + phdr->p_offset + done, chunksz);
-            munmap(va, PAGE_SIZE);
-        }
-
-        for ( ; done < phdr->p_memsz; done += chunksz )
-        {
-            pa = (phdr->p_paddr + done) - dsi->v_start;
-            if ((va = xc_map_foreign_range(
-                xch, dom, PAGE_SIZE, PROT_WRITE,
-                parray[pa >> PAGE_SHIFT])) == 0)
-                return -1;
-            chunksz = phdr->p_memsz - done;
-            if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
-                chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
-            memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
-            munmap(va, PAGE_SIZE);
-        }
-    }
-
-    return 0;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/dm/i8259.c
--- a/xen/arch/x86/dm/i8259.c   Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,550 +0,0 @@
-/*
- * QEMU 8259 interrupt controller emulation
- * 
- * Copyright (c) 2003-2004 Fabrice Bellard
- * Copyright (c) 2005 Intel Corperation
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to 
deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/xmalloc.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <public/hvm/ioreq.h>
-#include <asm/vmx.h>
-#include <asm/vmx_vpic.h>
-#include <asm/current.h>
-#include <asm/vmx_vioapic.h>
-#include <asm/vmx_vlapic.h>
-
-/* set irq level. If an edge is detected, then the IRR is set to 1 */
-static inline void pic_set_irq1(PicState *s, int irq, int level)
-{
-    int mask;
-    mask = 1 << irq;
-    if (s->elcr & mask) {
-        /* level triggered */
-        if (level) {
-            s->irr |= mask;
-            s->last_irr |= mask;
-        } else {
-            s->irr &= ~mask;
-            s->last_irr &= ~mask;
-        }
-    } else {
-        /* edge triggered */
-        if (level) {
-            if ((s->last_irr & mask) == 0) {
-                s->irr |= mask;
-           }
-            s->last_irr |= mask;
-        } else {
-            s->last_irr &= ~mask;
-        }
-    }
-}
-
-/* return the highest priority found in mask (highest = smallest
-   number). Return 8 if no irq */
-static inline int get_priority(PicState *s, int mask)
-{
-    int priority;
-    if (mask == 0)
-        return 8;
-    priority = 0;
-    while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
-        priority++;
-    return priority;
-}
-
-/* return the pic wanted interrupt. return -1 if none */
-static int pic_get_irq(PicState *s)
-{
-    int mask, cur_priority, priority;
-
-    mask = s->irr & ~s->imr;
-    priority = get_priority(s, mask);
-    if (priority == 8)
-        return -1;
-    /* compute current priority. If special fully nested mode on the
-       master, the IRQ coming from the slave is not taken into account
-       for the priority computation. */
-    mask = s->isr;
-    if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
-        mask &= ~(1 << 2);
-    cur_priority = get_priority(s, mask);
-    if (priority < cur_priority) {
-        /* higher priority found: an irq should be generated */
-        return (priority + s->priority_add) & 7;
-    } else {
-        return -1;
-    }
-}
-
-/* raise irq to CPU if necessary. must be called every time the active
-   irq may change */
-/* XXX: should not export it, but it is needed for an APIC kludge */
-void pic_update_irq(struct vmx_virpic *s)
-{
-    int irq2, irq;
-
-    /* first look at slave pic */
-    irq2 = pic_get_irq(&s->pics[1]);
-    if (irq2 >= 0) {
-        /* if irq request by slave pic, signal master PIC */
-        pic_set_irq1(&s->pics[0], 2, 1);
-        pic_set_irq1(&s->pics[0], 2, 0);
-    }
-    /* look at requested irq */
-    irq = pic_get_irq(&s->pics[0]);
-    if (irq >= 0) {
-        s->irq_request(s->irq_request_opaque, 1);
-    }
-}
-
-void pic_set_irq_new(void *opaque, int irq, int level)
-{
-    struct vmx_virpic *s = opaque;
-
-    vmx_vioapic_set_irq(current->domain, irq, level);
-    pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
-    /* used for IOAPIC irqs */
-    if (s->alt_irq_func)
-        s->alt_irq_func(s->alt_irq_opaque, irq, level);
-    pic_update_irq(s);
-}
-
-void do_pic_irqs (struct vmx_virpic *s, uint16_t irqs)
-{
-    s->pics[1].irr |= (uint8_t)(irqs >> 8);
-    s->pics[0].irr |= (uint8_t) irqs;
-    vmx_vioapic_do_irqs(current->domain, irqs);
-    pic_update_irq(s);
-}
-
-void do_pic_irqs_clear (struct vmx_virpic *s, uint16_t irqs)
-{
-    s->pics[1].irr &= ~(uint8_t)(irqs >> 8);
-    s->pics[0].irr &= ~(uint8_t) irqs;
-    vmx_vioapic_do_irqs_clear(current->domain, irqs);
-    pic_update_irq(s);
-}
-
-/* obsolete function */
-void pic_set_irq(struct vmx_virpic *isa_pic, int irq, int level)
-{
-    pic_set_irq_new(isa_pic, irq, level);
-}
-
-/* acknowledge interrupt 'irq' */
-static inline void pic_intack(PicState *s, int irq)
-{
-    if (s->auto_eoi) {
-        if (s->rotate_on_auto_eoi)
-            s->priority_add = (irq + 1) & 7;
-    } else {
-        s->isr |= (1 << irq);
-    }
-    /* We don't clear a level sensitive interrupt here */
-    if (!(s->elcr & (1 << irq)))
-        s->irr &= ~(1 << irq);
-}
-
-int pic_read_irq(struct vmx_virpic *s)
-{
-    int irq, irq2, intno;
-
-    irq = pic_get_irq(&s->pics[0]);
-    if (irq >= 0) {
-        pic_intack(&s->pics[0], irq);
-        if (irq == 2) {
-            irq2 = pic_get_irq(&s->pics[1]);
-            if (irq2 >= 0) {
-                pic_intack(&s->pics[1], irq2);
-            } else {
-                /* spurious IRQ on slave controller */
-                irq2 = 7;
-            }
-            intno = s->pics[1].irq_base + irq2;
-            irq = irq2 + 8;
-        } else {
-            intno = s->pics[0].irq_base + irq;
-        }
-    } else {
-        /* spurious IRQ on host controller */
-        printk("spurious IRQ irq got=%d\n",irq);
-        irq = 7;
-        intno = s->pics[0].irq_base + irq;
-    }
-    pic_update_irq(s);
-        
-    return intno;
-}
-
-static void update_shared_irr(struct vmx_virpic *s, PicState *c)
-{
-    uint8_t *pl, *pe;
-
-    get_sp(current->domain)->sp_global.pic_elcr = 
-               s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
-    pl =(uint8_t*)&get_sp(current->domain)->sp_global.pic_last_irr;
-    pe =(uint8_t*)&get_sp(current->domain)->sp_global.pic_elcr;
-    if ( c == &s->pics[0] ) {
-         *pl = c->last_irr;
-         *pe = c->elcr;
-    }
-    else {
-         *(pl+1) = c->last_irr;
-         *(pe+1) = c->elcr;
-    }
-}
-
-static void pic_reset(void *opaque)
-{
-    PicState *s = opaque;
-
-    s->last_irr = 0;
-    s->irr = 0;
-    s->imr = 0;
-    s->isr = 0;
-    s->priority_add = 0;
-    s->irq_base = 0;
-    s->read_reg_select = 0;
-    s->poll = 0;
-    s->special_mask = 0;
-    s->init_state = 0;
-    s->auto_eoi = 0;
-    s->rotate_on_auto_eoi = 0;
-    s->special_fully_nested_mode = 0;
-    s->init4 = 0;
-    s->elcr = 0;
-}
-
-static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
-{
-    PicState *s = opaque;
-    int priority, cmd, irq;
-
-    addr &= 1;
-    if (addr == 0) {
-        if (val & 0x10) {
-            /* init */
-            pic_reset(s);
-            update_shared_irr(s->pics_state, s);
-            /* deassert a pending interrupt */
-            s->pics_state->irq_request(s->pics_state->irq_request_opaque, 0);
-            s->init_state = 1;
-            s->init4 = val & 1;
-            if (val & 0x02)
-                hw_error("single mode not supported");
-            if (val & 0x08)
-                hw_error("level sensitive irq not supported");
-        } else if (val & 0x08) {
-            if (val & 0x04)
-                s->poll = 1;
-            if (val & 0x02)
-                s->read_reg_select = val & 1;
-            if (val & 0x40)
-                s->special_mask = (val >> 5) & 1;
-        } else {
-            cmd = val >> 5;
-            switch(cmd) {
-            case 0:
-            case 4:
-                s->rotate_on_auto_eoi = cmd >> 2;
-                break;
-            case 1: /* end of interrupt */
-            case 5:
-                priority = get_priority(s, s->isr);
-                if (priority != 8) {
-                    irq = (priority + s->priority_add) & 7;
-                    s->isr &= ~(1 << irq);
-                    if (cmd == 5)
-                        s->priority_add = (irq + 1) & 7;
-                    pic_update_irq(s->pics_state);
-                }
-                break;
-            case 3:
-                irq = val & 7;
-                s->isr &= ~(1 << irq);
-                pic_update_irq(s->pics_state);
-                break;
-            case 6:
-                s->priority_add = (val + 1) & 7;
-                pic_update_irq(s->pics_state);
-                break;
-            case 7:
-                irq = val & 7;
-                s->isr &= ~(1 << irq);
-                s->priority_add = (irq + 1) & 7;
-                pic_update_irq(s->pics_state);
-                break;
-            default:
-                /* no operation */
-                break;
-            }
-        }
-    } else {
-        switch(s->init_state) {
-        case 0:
-            /* normal mode */
-            s->imr = val;
-            pic_update_irq(s->pics_state);
-            break;
-        case 1:
-            s->irq_base = val & 0xf8;
-            s->init_state = 2;
-            break;
-        case 2:
-            if (s->init4) {
-                s->init_state = 3;
-            } else {
-                s->init_state = 0;
-            }
-            break;
-        case 3:
-            s->special_fully_nested_mode = (val >> 4) & 1;
-            s->auto_eoi = (val >> 1) & 1;
-            s->init_state = 0;
-            break;
-        }
-    }
-}
-
-static uint32_t pic_poll_read (PicState *s, uint32_t addr1)
-{
-    int ret;
-
-    ret = pic_get_irq(s);
-    if (ret >= 0) {
-        if (addr1 >> 7) {
-            s->pics_state->pics[0].isr &= ~(1 << 2);
-            s->pics_state->pics[0].irr &= ~(1 << 2);
-        }
-        s->irr &= ~(1 << ret);
-        s->isr &= ~(1 << ret);
-        if (addr1 >> 7 || ret != 2)
-            pic_update_irq(s->pics_state);
-    } else {
-        ret = 0x07;
-        pic_update_irq(s->pics_state);
-    }
-
-    return ret;
-}
-
-static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
-{
-    PicState *s = opaque;
-    unsigned int addr;
-    int ret;
-
-    addr = addr1;
-    addr &= 1;
-    if (s->poll) {
-        ret = pic_poll_read(s, addr1);
-        s->poll = 0;
-    } else {
-        if (addr == 0) {
-            if (s->read_reg_select)
-                ret = s->isr;
-            else
-                ret = s->irr;
-        } else {
-            ret = s->imr;
-        }
-    }
-    return ret;
-}
-
-/* memory mapped interrupt status */
-/* XXX: may be the same than pic_read_irq() */
-uint32_t pic_intack_read(struct vmx_virpic *s)
-{
-    int ret;
-
-    ret = pic_poll_read(&s->pics[0], 0x00);
-    if (ret == 2)
-        ret = pic_poll_read(&s->pics[1], 0x80) + 8;
-    /* Prepare for ISR read */
-    s->pics[0].read_reg_select = 1;
-    
-    return ret;
-}
-
-static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
-{
-    PicState *s = opaque;
-    s->elcr = val & s->elcr_mask;
-}
-
-static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1)
-{
-    PicState *s = opaque;
-    return s->elcr;
-}
-
-/* XXX: add generic master/slave system */
-static void pic_init1(int io_addr, int elcr_addr, PicState *s)
-{
-    pic_reset(s);
-}
-
-void pic_init(struct vmx_virpic *s, void (*irq_request)(), 
-              void *irq_request_opaque)
-{
-    memset(s, 0, sizeof(*s));
-    pic_init1(0x20, 0x4d0, &s->pics[0]);
-    pic_init1(0xa0, 0x4d1, &s->pics[1]);
-    s->pics[0].elcr_mask = 0xf8;
-    s->pics[1].elcr_mask = 0xde;
-    s->irq_request = irq_request;
-    s->irq_request_opaque = irq_request_opaque;
-    s->pics[0].pics_state = s;
-    s->pics[1].pics_state = s;
-    return; 
-}
-
-void pic_set_alt_irq_func(struct vmx_virpic *s, void (*alt_irq_func)(),
-                          void *alt_irq_opaque)
-{
-    s->alt_irq_func = alt_irq_func;
-    s->alt_irq_opaque = alt_irq_opaque;
-}
-
-static int intercept_pic_io(ioreq_t *p)
-{
-    struct vmx_virpic  *pic;
-    struct vcpu *v = current;
-    uint32_t data;
-    
-    if ( p->size != 1 || p->count != 1) {
-        printk("PIC_IO wrong access size %d!\n", (int)p->size);
-        return 1;
-    }
-    pic = &v->domain->arch.vmx_platform.vmx_pic;
-    if ( p->dir == 0 ) {
-        if(p->pdata_valid) 
-            vmx_copy(&data, (unsigned long)p->u.pdata, p->size, VMX_COPY_IN);
-        else
-            data = p->u.data;
-        pic_ioport_write((void*)&pic->pics[p->addr>>7],
-                (uint32_t) p->addr, (uint32_t) (data & 0xff));
-    }
-    else {
-        data = pic_ioport_read(
-            (void*)&pic->pics[p->addr>>7], (uint32_t) p->addr);
-        if(p->pdata_valid) 
-            vmx_copy(&data, (unsigned long)p->u.pdata, p->size, VMX_COPY_OUT);
-        else 
-            p->u.data = (u64)data;
-    }
-    return 1;
-}
-
-static int intercept_elcr_io(ioreq_t *p)
-{
-    struct vmx_virpic  *s;
-    struct vcpu *v = current;
-    uint32_t data;
-    
-    if ( p->size != 1 || p->count != 1 ) {
-        printk("PIC_IO wrong access size %d!\n", (int)p->size);
-        return 1;
-    }
-
-    s = &v->domain->arch.vmx_platform.vmx_pic;
-    if ( p->dir == 0 ) {
-        if(p->pdata_valid) 
-            vmx_copy(&data, (unsigned long)p->u.pdata, p->size, VMX_COPY_IN);
-        else
-            data = p->u.data;
-        elcr_ioport_write((void*)&s->pics[p->addr&1],
-                (uint32_t) p->addr, (uint32_t)( data & 0xff));
-       get_sp(current->domain)->sp_global.pic_elcr = 
-            s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
-    }
-    else {
-        data = (u64) elcr_ioport_read(
-                (void*)&s->pics[p->addr&1], (uint32_t) p->addr);
-        if(p->pdata_valid) 
-            vmx_copy(&data, (unsigned long)p->u.pdata, p->size, VMX_COPY_OUT);
-        else 
-            p->u.data = (u64)data;
-
-    }
-    return 1;
-}
-void register_pic_io_hook (void)
-{
-    register_portio_handler(0x20, 2, intercept_pic_io); 
-    register_portio_handler(0x4d0, 1, intercept_elcr_io); 
-    register_portio_handler(0xa0, 2, intercept_pic_io); 
-    register_portio_handler(0x4d1, 1, intercept_elcr_io); 
-}
-
-
-/* IRQ handling */
-int cpu_get_pic_interrupt(struct vcpu *v, int *type)
-{
-    int intno;
-    struct vmx_virpic *s = &v->domain->arch.vmx_platform.vmx_pic;
-    struct vmx_platform *plat = &v->domain->arch.vmx_platform;
-
-    if ( !vlapic_accept_pic_intr(v) )
-        return -1;
-
-    if ( !plat->interrupt_request )
-        return -1;
-
-    plat->interrupt_request = 0;
-    /* read the irq from the PIC */
-    intno = pic_read_irq(s);
-    *type = VLAPIC_DELIV_MODE_EXT;
-    return intno;
-}
-
-int is_pit_irq(struct vcpu *v, int irq, int type)
-{
-    int pit_vec;
-
-    if (type == VLAPIC_DELIV_MODE_EXT)
-        pit_vec = v->domain->arch.vmx_platform.vmx_pic.pics[0].irq_base;
-    else
-        pit_vec =
-          
v->domain->arch.vmx_platform.vmx_vioapic.redirtbl[0].RedirForm.vector;
-
-    return (irq == pit_vec);
-}
-
-int is_irq_enabled(struct vcpu *v, int irq)
-{
-    struct vmx_virpic *vpic=&v->domain->arch.vmx_platform.vmx_pic;
-        
-    if ( irq & 8 ) {
-        return !( (1 << (irq&7)) & vpic->pics[1].imr);
-    }
-    else {
-        return !( (1 << irq) & vpic->pics[0].imr);
-    }
-}
-
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/dm/vmx_vioapic.c
--- a/xen/arch/x86/dm/vmx_vioapic.c     Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,621 +0,0 @@
-/*
-*  Copyright (C) 2001  MandrakeSoft S.A.
-*
-*    MandrakeSoft S.A.
-*    43, rue d'Aboukir
-*    75002 Paris - France
-*    http://www.linux-mandrake.com/
-*    http://www.mandrakesoft.com/
-*
-*  This library is free software; you can redistribute it and/or
-*  modify it under the terms of the GNU Lesser General Public
-*  License as published by the Free Software Foundation; either
-*  version 2 of the License, or (at your option) any later version.
-*
-*  This library is distributed in the hope that it will be useful,
-*  but WITHOUT ANY WARRANTY; without even the implied warranty of
-*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-*  Lesser General Public License for more details.
-*
-*  You should have received a copy of the GNU Lesser General Public
-*  License along with this library; if not, write to the Free Software
-*  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
-*/
-
-/*
-*  Yunhong Jiang <yunhong.jiang@xxxxxxxxx>
-*  Ported to xen by using virtual IRQ line.
-*/
-
-#include <asm/vmx_vioapic.h>
-#include <asm/vmx_platform.h>
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/xmalloc.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <public/hvm/ioreq.h>
-#include <asm/vmx.h>
-#include <asm/vmx_vpic.h>
-#include <asm/current.h>
-
-static void ioapic_enable(vmx_vioapic_t *s, uint8_t enable)
-{
-    if (enable)
-        s->flags |= IOAPIC_ENABLE_FLAG;
-    else
-        s->flags &= ~IOAPIC_ENABLE_FLAG;
-}
-
-static void ioapic_dump_redir(vmx_vioapic_t *s, uint8_t entry)
-{
-    RedirStatus redir = s->redirtbl[entry];
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_dump_redir "
-      "entry %x vector %x deliver_mod %x destmode %x delivestatus %x "
-      "polarity %x remote_irr %x trigmod %x mask %x dest_id %x\n",
-      entry, redir.RedirForm.vector, redir.RedirForm.deliver_mode,
-      redir.RedirForm.destmode, redir.RedirForm.delivestatus,
-      redir.RedirForm.polarity, redir.RedirForm.remoteirr,
-      redir.RedirForm.trigmod, redir.RedirForm.mask,
-      redir.RedirForm.dest_id);
-}
-
-#ifdef VMX_DOMAIN_SAVE_RESTORE
-void ioapic_save(QEMUFile* f, void* opaque)
-{
-    printk("no implementation for ioapic_save\n");
-}
-
-int ioapic_load(QEMUFile* f, void* opaque, int version_id)
-{
-    printk("no implementation for ioapic_load\n");
-    return 0;
-}
-#endif
-
-static unsigned long vmx_vioapic_read_indirect(struct vmx_vioapic *s,
-                                              unsigned long addr,
-                                              unsigned long length)
-{
-    unsigned long result = 0;
-
-    ASSERT(s);
-
-    switch (s->ioregsel) {
-    case IOAPIC_REG_VERSION:
-        result = ((((IOAPIC_NUM_PINS-1) & 0xff) << 16)
-                  | (IOAPIC_VERSION_ID & 0x0f));
-        break;
-
-#ifndef __ia64__
-    case IOAPIC_REG_APIC_ID:
-        result = ((s->id & 0xf) << 24);
-        break;
-
-    case IOAPIC_REG_ARB_ID:
-        /* XXX how arb_id used on p4? */
-        result = ((s->id & 0xf) << 24);
-        break;
-#endif
-
-    default:
-        {
-            uint32_t redir_index = 0;
-            uint64_t redir_content = 0;
-
-            redir_index = (s->ioregsel - 0x10) >> 1;
-
-            if (redir_index >= 0 && redir_index < IOAPIC_NUM_PINS) {
-                redir_content = s->redirtbl[redir_index].value;
-
-                result = (s->ioregsel & 0x1)?
-                           (redir_content >> 32) & 0xffffffff :
-                           redir_content & 0xffffffff;
-            } else {
-                printk("upic_mem_readl:undefined ioregsel %x\n",
-                        s->ioregsel);
-                domain_crash_synchronous();
-            }
-            break;
-        }
-    } /* switch */
-
-    return result;
-}
-
-static unsigned long vmx_vioapic_read(struct vcpu *v,
-                                     unsigned long addr,
-                                     unsigned long length)
-{
-    struct vmx_vioapic *s = &(v->domain->arch.vmx_platform.vmx_vioapic);
-    uint32_t    result = 0;
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "vmx_vioapic_read addr %lx\n", addr);
-
-    ASSERT(s);
-
-    addr &= 0xff;
-
-    switch (addr) {
-    case IOAPIC_REG_SELECT:
-        result = s->ioregsel;
-        break;
-
-    case IOAPIC_REG_WINDOW:
-        result = vmx_vioapic_read_indirect(s, addr, length);
-        break;
-
-    default:
-          break;
-    }
-
-    return result;
-}
-
-static void vmx_vioapic_update_imr(struct vmx_vioapic *s, int index)
-{
-   if (s->redirtbl[index].RedirForm.mask)
-       set_bit(index, &s->imr);
-   else
-       clear_bit(index, &s->imr);
-}
-
-static void vmx_vioapic_write_indirect(struct vmx_vioapic *s,
-                                      unsigned long addr,
-                                      unsigned long length,
-                                      unsigned long val)
-{
-    switch (s->ioregsel) {
-    case IOAPIC_REG_VERSION:
-        printk("vmx_vioapic_write_indirect: version register read only\n");
-        break;
-
-#ifndef __ia64__
-    case IOAPIC_REG_APIC_ID:
-        s->id = (val >> 24) & 0xf;
-        break;
-
-    case IOAPIC_REG_ARB_ID:
-        s->arb_id = val;
-        break;
-#endif
-
-    default:
-        {
-            uint32_t redir_index = 0;
-
-            VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "vmx_vioapic_write_indirect "
-              "change redir index %x val %lx\n",
-              redir_index, val);
-
-            redir_index = (s->ioregsel - 0x10) >> 1;
-
-            if (redir_index >= 0 && redir_index < IOAPIC_NUM_PINS) {
-                uint64_t redir_content;
-
-                redir_content = s->redirtbl[redir_index].value;
-
-                if (s->ioregsel & 0x1)
-                    redir_content = (((uint64_t)val & 0xffffffff) << 32) |
-                                    (redir_content & 0xffffffff);
-                else
-                    redir_content = ((redir_content >> 32) << 32) |
-                                    (val & 0xffffffff);
-                s->redirtbl[redir_index].value = redir_content;
-                vmx_vioapic_update_imr(s, redir_index);
-            } else  {
-                printk("vmx_vioapic_write_indirect "
-                  "error register %x\n", s->ioregsel);
-            }
-            break;
-        }
-    } /* switch */
-}
-
-static void vmx_vioapic_write(struct vcpu *v,
-                             unsigned long addr,
-                             unsigned long length,
-                             unsigned long val)
-{
-    vmx_vioapic_t *s = &(v->domain->arch.vmx_platform.vmx_vioapic);
-
-    ASSERT(s);
-
-    addr &= 0xff;
-
-    switch (addr) {
-    case IOAPIC_REG_SELECT:
-        s->ioregsel = val;
-        break;
-
-    case IOAPIC_REG_WINDOW:
-        vmx_vioapic_write_indirect(s, addr, length, val);
-        break;
-
-#ifdef __ia64__
-    case IOAPIC_REG_EOI:
-        ioapic_update_EOI(v->domain, val);
-        break;
-#endif
-
-    default:
-        break;
-    }
-}
-
-static int vmx_vioapic_range(struct vcpu *v, unsigned long addr)
-{
-    vmx_vioapic_t *s = &(v->domain->arch.vmx_platform.vmx_vioapic);
-
-    if ((s->flags & IOAPIC_ENABLE_FLAG) &&
-        (addr >= s->base_address &&
-        (addr <= s->base_address + IOAPIC_MEM_LENGTH)))
-        return 1;
-    else
-        return 0;
-}
-
-struct vmx_mmio_handler vioapic_mmio_handler = {
-    .check_handler = vmx_vioapic_range,
-    .read_handler = vmx_vioapic_read,
-    .write_handler = vmx_vioapic_write
-};
-
-static void vmx_vioapic_reset(vmx_vioapic_t *s)
-{
-    int i;
-
-    memset(s, 0, sizeof(vmx_vioapic_t));
-
-    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-        s->redirtbl[i].RedirForm.mask = 0x1;
-        vmx_vioapic_update_imr(s, i);
-    }
-}
-
-static void ioapic_update_config(vmx_vioapic_t *s,
-                                 unsigned long address,
-                                 uint8_t enable)
-{
-    ASSERT(s);
-
-    ioapic_enable(s, enable);
-
-    if (address != s->base_address)
-        s->base_address = address;
-}
-
-static int ioapic_inj_irq(vmx_vioapic_t *s,
-                          struct vlapic * target,
-                          uint8_t vector,
-                          uint8_t trig_mode,
-                          uint8_t delivery_mode)
-{
-    int result = 0;
-
-    ASSERT(s && target);
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_inj_irq "
-      "irq %d trig %d delive mode %d\n",
-      vector, trig_mode, delivery_mode);
-
-    switch (delivery_mode) {
-    case VLAPIC_DELIV_MODE_FIXED:
-    case VLAPIC_DELIV_MODE_LPRI:
-        if (vlapic_set_irq(target, vector, trig_mode) && (trig_mode == 1))
-            printk("<ioapic_inj_irq> level interrupt happen before cleard\n");
-        result = 1;
-        break;
-    default:
-        printk("<ioapic_inj_irq> error delivery mode %d\n",
-                delivery_mode);
-        break;
-   }
-
-   return result;
-}
-
-#ifndef __ia64__
-static int ioapic_match_logical_addr(vmx_vioapic_t *s, int number, uint8_t 
dest)
-{
-    int result = 0;
-
-    ASSERT(s && s->lapic_info[number]);
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_match_logical_addr "
-      "number %i dest %x\n",
-      number, dest);
-
-    switch (((s->lapic_info[number]->dest_format >> 28) & 0xf)) {
-    case 0xf:
-        result =
-          (dest & ((s->lapic_info[number]->logical_dest >> 24) & 0xff)) != 0;
-        break;
-    case 0x0:
-        /* Should we support flat cluster mode ?*/
-        if ( ((s->lapic_info[number]->logical_dest >> 28)
-               == ((dest >> 0x4) & 0xf)) &&
-             (((s->lapic_info[number]->logical_dest >> 24) & 0xf)
-               & (dest  & 0xf)) )
-            result = 1;
-        break;
-    default:
-        printk("error DFR value for %x local apic\n", number);
-        break;
-    }
-
-    return result;
-}
-#else
-extern int ioapic_match_logical_addr(vmx_vioapic_t *s, int number, uint8_t 
dest);
-#endif
-
-static uint32_t ioapic_get_delivery_bitmask(vmx_vioapic_t *s,
-                                            uint16_t dest,
-                                            uint8_t dest_mode,
-                                            uint8_t vector,
-                                            uint8_t delivery_mode)
-{
-    uint32_t mask = 0;
-    int i;
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_get_delivery_bitmask "
-      "dest %d dest_mode %d "
-      "vector %d del_mode %d, lapic_count %d\n",
-      dest, dest_mode, vector, delivery_mode, s->lapic_count);
-
-    ASSERT(s);
-
-    if (dest_mode == 0) { /* Physical mode */
-        for (i = 0; i < s->lapic_count; i++) {
-            if (VLAPIC_ID(s->lapic_info[i]) == dest) {
-                mask = 1 << i;
-                break;
-            }
-        }
-    } else {
-        /* logical destination. call match_logical_addr for each APIC. */
-        if (dest != 0) {
-            for (i=0; i< s->lapic_count; i++) {
-                if ( s->lapic_info[i] &&
-                     ioapic_match_logical_addr(s, i, dest) ) {
-                    mask |= (1<<i);
-                }
-            }
-        }
-    }
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_get_delivery_bitmask "
-      "mask %x\n", mask);
-
-    return mask;
-}
-
-static void ioapic_deliver(vmx_vioapic_t *s, int irqno)
-{
-    uint16_t dest = s->redirtbl[irqno].RedirForm.dest_id;
-    uint8_t dest_mode = s->redirtbl[irqno].RedirForm.destmode;
-    uint8_t delivery_mode = s->redirtbl[irqno].RedirForm.deliver_mode;
-    uint8_t vector = s->redirtbl[irqno].RedirForm.vector;
-    uint8_t trig_mode = s->redirtbl[irqno].RedirForm.trigmod;
-    uint32_t deliver_bitmask;
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "IOAPIC deliver: "
-      "dest %x dest_mode %x delivery_mode %x vector %x trig_mode %x\n",
-      dest, dest_mode, delivery_mode, vector, trig_mode);
-
-    deliver_bitmask =
-      ioapic_get_delivery_bitmask(s, dest, dest_mode, vector, delivery_mode);
-
-    if (!deliver_bitmask) {
-        VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic deliver "
-          "no target on destination\n");
-
-        return;
-    }
-
-    switch (delivery_mode) {
-    case VLAPIC_DELIV_MODE_LPRI:
-    {
-        struct vlapic* target;
-
-        target = apic_round_robin(
-                s->domain, dest_mode, vector, deliver_bitmask);
-        if (target)
-            ioapic_inj_irq(s, target, vector, trig_mode, delivery_mode);
-        else{
-            VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic deliver "
-              "null round robin mask %x vector %x delivery_mode %x\n",
-              deliver_bitmask, vector, deliver_bitmask);
-        }
-        break;
-    }
-
-    case VLAPIC_DELIV_MODE_FIXED:
-    case VLAPIC_DELIV_MODE_EXT:
-    {
-        uint8_t bit;
-        for (bit = 0; bit < s->lapic_count; bit++) {
-            if (deliver_bitmask & (1 << bit)) {
-                if (s->lapic_info[bit]) {
-                    ioapic_inj_irq(s, s->lapic_info[bit],
-                                vector, trig_mode, delivery_mode);
-                }
-            }
-        }
-        break;
-    }
-
-    case VLAPIC_DELIV_MODE_SMI:
-    case VLAPIC_DELIV_MODE_NMI:
-    case VLAPIC_DELIV_MODE_INIT:
-    case VLAPIC_DELIV_MODE_STARTUP:
-    default:
-        printk("Not support delivey mode %d\n", delivery_mode);
-        break;
-    }
-}
-
-static int ioapic_get_highest_irq(vmx_vioapic_t *s)
-{
-    uint32_t irqs;
-
-    ASSERT(s);
-
-    irqs = s->irr & ~s->isr & ~s->imr;
-    return __fls(irqs);
-}
-
-
-static void service_ioapic(vmx_vioapic_t *s)
-{
-    int irqno;
-
-    while ((irqno = ioapic_get_highest_irq(s)) != -1) {
-
-        VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "service_ioapic "
-          "highest irqno %x\n", irqno);
-
-        if (!test_bit(irqno, &s->imr)) {
-            ioapic_deliver(s, irqno);
-        }
-
-        if (s->redirtbl[irqno].RedirForm.trigmod == IOAPIC_LEVEL_TRIGGER) {
-            s->isr |= (1 << irqno);
-        }
-
-        s->irr &= ~(1 << irqno);
-    }
-}
-
-void vmx_vioapic_do_irqs(struct domain *d, uint16_t irqs)
-{
-    vmx_vioapic_t *s = &(d->arch.vmx_platform.vmx_vioapic);
-
-    if (!vmx_apic_support(d))
-        return;
-
-    s->irr |= irqs & ~s->imr;
-    service_ioapic(s);
-}
-
-void vmx_vioapic_do_irqs_clear(struct domain *d, uint16_t irqs)
-{
-    vmx_vioapic_t *s = &(d->arch.vmx_platform.vmx_vioapic);
-
-    if (!vmx_apic_support(d))
-        return;
-
-    s->irr &= ~irqs;
-    service_ioapic(s);
-}
-
-void vmx_vioapic_set_irq(struct domain *d, int irq, int level)
-{
-    vmx_vioapic_t *s = &(d->arch.vmx_platform.vmx_vioapic);
-
-    if (!vmx_apic_support(d))
-        return ;
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "ioapic_set_irq "
-      "irq %x level %x\n", irq, level);
-
-    if (irq < 0 || irq >= IOAPIC_NUM_PINS) {
-        printk("ioapic_set_irq irq %x is illegal\n", irq);
-        domain_crash_synchronous();
-    }
-
-    if (!IOAPICEnabled(s) || s->redirtbl[irq].RedirForm.mask)
-        return;
-
-    ioapic_dump_redir(s, irq);
-
-    if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
-        uint32_t bit = 1 << irq;
-        if (s->redirtbl[irq].RedirForm.trigmod == IOAPIC_LEVEL_TRIGGER) {
-            if (level)
-                s->irr |= bit;
-            else
-                s->irr &= ~bit;
-        } else {
-            if (level)
-                /* XXX No irr clear for edge interrupt */
-                s->irr |= bit;
-        }
-    }
-
-    service_ioapic(s);
-}
-
-/* XXX If level interrupt, use vector->irq table for performance */
-static int get_redir_num(vmx_vioapic_t *s, int vector)
-{
-    int i = 0;
-
-    ASSERT(s);
-
-    for(i = 0; i < IOAPIC_NUM_PINS - 1; i++) {
-        if (s->redirtbl[i].RedirForm.vector == vector)
-            return i;
-    }
-
-    return -1;
-}
-
-void ioapic_update_EOI(struct domain *d, int vector)
-{
-    vmx_vioapic_t *s = &(d->arch.vmx_platform.vmx_vioapic);
-    int redir_num;
-
-    if ((redir_num = get_redir_num(s, vector)) == -1) {
-        printk("Can't find redir item for %d EOI \n", vector);
-        return;
-    }
-
-    if (!test_and_clear_bit(redir_num, &s->isr)) {
-        printk("redir %d not set for %d  EOI\n", redir_num, vector);
-        return;
-    }
-}
-
-int vmx_vioapic_add_lapic(struct vlapic *vlapic, struct vcpu *v)
-{
-    vmx_vioapic_t *s = &(v->domain->arch.vmx_platform.vmx_vioapic);
-
-    if (v->vcpu_id != s->lapic_count) {
-        printk("vmx_vioapic_add_lapic "
-           "cpu_id not match vcpu_id %x lapic_count %x\n",
-           v->vcpu_id, s->lapic_count);
-        domain_crash_synchronous();
-    }
-
-    /* update count later for race condition on interrupt */
-    s->lapic_info[s->lapic_count] = vlapic;
-    s->lapic_count ++;
-
-    return s->lapic_count;
-}
-
-vmx_vioapic_t * vmx_vioapic_init(struct domain *d)
-{
-    int i = 0;
-    vmx_vioapic_t *s = &(d->arch.vmx_platform.vmx_vioapic);
-
-    VMX_DBG_LOG(DBG_LEVEL_IOAPIC, "vmx_vioapic_init\n");
-
-    vmx_vioapic_reset(s);
-
-    s->domain = d;
-
-    for (i = 0; i < MAX_LAPIC_NUM; i++)
-        s->lapic_info[i] = NULL;
-
-    /* Remove after GFW ready */
-    ioapic_update_config(s, IOAPIC_DEFAULT_BASE_ADDRESS, 1);
-
-    return s;
-}
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,1978 +0,0 @@
-/*
- * vmx.c: handling VMX architecture-related VM exits
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/trace.h>
-#include <xen/sched.h>
-#include <xen/irq.h>
-#include <xen/softirq.h>
-#include <xen/domain_page.h>
-#include <xen/hypercall.h>
-#include <asm/current.h>
-#include <asm/io.h>
-#include <asm/shadow.h>
-#include <asm/regs.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/types.h>
-#include <asm/msr.h>
-#include <asm/spinlock.h>
-#include <asm/vmx.h>
-#include <asm/vmx_vmcs.h>
-#include <asm/vmx_intercept.h>
-#include <asm/shadow.h>
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/shadow_64.h>
-#endif
-#include <public/sched.h>
-#include <public/hvm/ioreq.h>
-#include <asm/vmx_vpic.h>
-#include <asm/vmx_vlapic.h>
-
-int hvm_enabled;
-
-#ifdef CONFIG_VMX
-unsigned int opt_vmx_debug_level = 0;
-integer_param("vmx_debug", opt_vmx_debug_level);
-
-static unsigned long trace_values[NR_CPUS][4];
-#define TRACE_VMEXIT(index,value) trace_values[smp_processor_id()][index]=value
-
-static int vmx_switch_on;
-
-void vmx_final_setup_guest(struct vcpu *v)
-{
-    v->arch.schedule_tail = arch_vmx_do_launch;
-
-    if ( v->vcpu_id == 0 )
-    {
-        struct domain *d = v->domain;
-        struct vcpu *vc;
-
-        /* Initialize monitor page table */
-        for_each_vcpu(d, vc)
-            vc->arch.monitor_table = mk_pagetable(0);
-
-        /*
-         * Required to do this once per domain
-         * XXX todo: add a seperate function to do these.
-         */
-        memset(&d->shared_info->evtchn_mask[0], 0xff,
-               sizeof(d->shared_info->evtchn_mask));
-
-        /* Put the domain in shadow mode even though we're going to be using
-         * the shared 1:1 page table initially. It shouldn't hurt */
-        shadow_mode_enable(d,
-                           SHM_enable|SHM_refcounts|
-                           SHM_translate|SHM_external|SHM_wr_pt_pte);
-    }
-
-    vmx_switch_on = 1;
-}
-
-void vmx_relinquish_resources(struct vcpu *v)
-{
-    struct vmx_virpit *vpit;
-
-    if ( !VMX_DOMAIN(v) )
-        return;
-
-    if (v->vcpu_id == 0) {
-        /* unmap IO shared page */
-        struct domain *d = v->domain;
-        if ( d->arch.vmx_platform.shared_page_va )
-            unmap_domain_page_global(
-                (void *)d->arch.vmx_platform.shared_page_va);
-    }
-
-    destroy_vmcs(&v->arch.arch_vmx);
-    free_monitor_pagetable(v);
-    vpit = &v->domain->arch.vmx_platform.vmx_pit;
-    kill_timer(&vpit->pit_timer);
-    kill_timer(&v->arch.arch_vmx.hlt_timer);
-    if ( vmx_apic_support(v->domain) && (VLAPIC(v) != NULL) )
-    {
-        kill_timer(&VLAPIC(v)->vlapic_timer);
-        xfree(VLAPIC(v));
-    }
-}
-
-#ifdef __x86_64__
-static struct msr_state percpu_msr[NR_CPUS];
-
-static u32 msr_data_index[VMX_MSR_COUNT] =
-{
-    MSR_LSTAR, MSR_STAR, MSR_CSTAR,
-    MSR_SYSCALL_MASK, MSR_EFER,
-};
-
-/*
- * To avoid MSR save/restore at every VM exit/entry time, we restore
- * the x86_64 specific MSRs at domain switch time. Since those MSRs are
- * are not modified once set for generic domains, we don't save them,
- * but simply reset them to the values set at percpu_traps_init().
- */
-void vmx_load_msrs(struct vcpu *n)
-{
-    struct msr_state *host_state = &percpu_msr[smp_processor_id()];
-    int i;
-
-    if ( !vmx_switch_on )
-        return;
-
-    while ( host_state->flags )
-    {
-        i = find_first_set_bit(host_state->flags);
-        wrmsrl(msr_data_index[i], host_state->msr_items[i]);
-        clear_bit(i, &host_state->flags);
-    }
-}
-
-static void vmx_save_init_msrs(void)
-{
-    struct msr_state *host_state = &percpu_msr[smp_processor_id()];
-    int i;
-
-    for ( i = 0; i < VMX_MSR_COUNT; i++ )
-        rdmsrl(msr_data_index[i], host_state->msr_items[i]);
-}
-
-#define CASE_READ_MSR(address)              \
-    case MSR_ ## address:                 \
-    msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
-    break
-
-#define CASE_WRITE_MSR(address)                                     \
-    case MSR_ ## address:                                           \
-    {                                                               \
-        msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content;    \
-        if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)) {    \
-            set_bit(VMX_INDEX_MSR_ ## address, &msr->flags);        \
-        }                                                           \
-        wrmsrl(MSR_ ## address, msr_content);                       \
-        set_bit(VMX_INDEX_MSR_ ## address, &host_state->flags);     \
-    }                                                               \
-    break
-
-#define IS_CANO_ADDRESS(add) 1
-static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
-{
-    u64     msr_content = 0;
-    struct vcpu *vc = current;
-    struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
-    switch(regs->ecx){
-    case MSR_EFER:
-        msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
-        VMX_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long 
long)msr_content);
-        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
-                     &vc->arch.arch_vmx.cpu_state))
-            msr_content |= 1 << _EFER_LME;
-
-        if (VMX_LONG_GUEST(vc))
-            msr_content |= 1 << _EFER_LMA;
-        break;
-    case MSR_FS_BASE:
-        if (!(VMX_LONG_GUEST(vc)))
-            /* XXX should it be GP fault */
-            domain_crash_synchronous();
-        __vmread(GUEST_FS_BASE, &msr_content);
-        break;
-    case MSR_GS_BASE:
-        if (!(VMX_LONG_GUEST(vc)))
-            domain_crash_synchronous();
-        __vmread(GUEST_GS_BASE, &msr_content);
-        break;
-    case MSR_SHADOW_GS_BASE:
-        msr_content = msr->shadow_gs;
-        break;
-
-        CASE_READ_MSR(STAR);
-        CASE_READ_MSR(LSTAR);
-        CASE_READ_MSR(CSTAR);
-        CASE_READ_MSR(SYSCALL_MASK);
-    default:
-        return 0;
-    }
-    VMX_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", 
msr_content);
-    regs->eax = msr_content & 0xffffffff;
-    regs->edx = msr_content >> 32;
-    return 1;
-}
-
-static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
-{
-    u64     msr_content = regs->eax | ((u64)regs->edx << 32);
-    struct vcpu *vc = current;
-    struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
-    struct msr_state * host_state =
-        &percpu_msr[smp_processor_id()];
-
-    VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n",
-                regs->ecx, msr_content);
-
-    switch (regs->ecx){
-    case MSR_EFER:
-        if ((msr_content & EFER_LME) ^
-            test_bit(VMX_CPU_STATE_LME_ENABLED,
-                     &vc->arch.arch_vmx.cpu_state)){
-            if (test_bit(VMX_CPU_STATE_PG_ENABLED,
-                         &vc->arch.arch_vmx.cpu_state) ||
-                !test_bit(VMX_CPU_STATE_PAE_ENABLED,
-                          &vc->arch.arch_vmx.cpu_state)){
-                vmx_inject_exception(vc, TRAP_gp_fault, 0);
-            }
-        }
-        if (msr_content & EFER_LME)
-            set_bit(VMX_CPU_STATE_LME_ENABLED,
-                    &vc->arch.arch_vmx.cpu_state);
-        /* No update for LME/LMA since it have no effect */
-        msr->msr_items[VMX_INDEX_MSR_EFER] =
-            msr_content;
-        if (msr_content & ~(EFER_LME | EFER_LMA)){
-            msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
-            if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){
-                rdmsrl(MSR_EFER,
-                       host_state->msr_items[VMX_INDEX_MSR_EFER]);
-                set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
-                set_bit(VMX_INDEX_MSR_EFER, &msr->flags);
-                wrmsrl(MSR_EFER, msr_content);
-            }
-        }
-        break;
-
-    case MSR_FS_BASE:
-    case MSR_GS_BASE:
-        if (!(VMX_LONG_GUEST(vc)))
-            domain_crash_synchronous();
-        if (!IS_CANO_ADDRESS(msr_content)){
-            VMX_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
-            vmx_inject_exception(vc, TRAP_gp_fault, 0);
-        }
-        if (regs->ecx == MSR_FS_BASE)
-            __vmwrite(GUEST_FS_BASE, msr_content);
-        else
-            __vmwrite(GUEST_GS_BASE, msr_content);
-        break;
-
-    case MSR_SHADOW_GS_BASE:
-        if (!(VMX_LONG_GUEST(vc)))
-            domain_crash_synchronous();
-        vc->arch.arch_vmx.msr_content.shadow_gs = msr_content;
-        wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
-        break;
-
-        CASE_WRITE_MSR(STAR);
-        CASE_WRITE_MSR(LSTAR);
-        CASE_WRITE_MSR(CSTAR);
-        CASE_WRITE_MSR(SYSCALL_MASK);
-    default:
-        return 0;
-    }
-    return 1;
-}
-
-void
-vmx_restore_msrs(struct vcpu *v)
-{
-    int i = 0;
-    struct msr_state *guest_state;
-    struct msr_state *host_state;
-    unsigned long guest_flags ;
-
-    guest_state = &v->arch.arch_vmx.msr_content;;
-    host_state = &percpu_msr[smp_processor_id()];
-
-    wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
-    guest_flags = guest_state->flags;
-    if (!guest_flags)
-        return;
-
-    while (guest_flags){
-        i = find_first_set_bit(guest_flags);
-
-        VMX_DBG_LOG(DBG_LEVEL_2,
-                    "restore guest's index %d msr %lx with %lx\n",
-                    i, (unsigned long) msr_data_index[i], (unsigned long) 
guest_state->msr_items[i]);
-        set_bit(i, &host_state->flags);
-        wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
-        clear_bit(i, &guest_flags);
-    }
-}
-
-#else  /* __i386__ */
-#define  vmx_save_init_msrs()   ((void)0)
-
-static inline int  long_mode_do_msr_read(struct cpu_user_regs *regs){
-    return 0;
-}
-static inline int  long_mode_do_msr_write(struct cpu_user_regs *regs){
-    return 0;
-}
-#endif
-
-extern long evtchn_send(int lport);
-void do_nmi(struct cpu_user_regs *);
-
-static int check_vmx_controls(ctrls, msr)
-{
-    u32 vmx_msr_low, vmx_msr_high;
-
-    rdmsr(msr, vmx_msr_low, vmx_msr_high);
-    if (ctrls < vmx_msr_low || ctrls > vmx_msr_high) {
-        printk("Insufficient VMX capability 0x%x, "
-               "msr=0x%x,low=0x%8x,high=0x%x\n",
-               ctrls, msr, vmx_msr_low, vmx_msr_high);
-        return 0;
-    }
-    return 1;
-}
-
-int start_vmx(void)
-{
-    struct vmcs_struct *vmcs;
-    u32 ecx;
-    u32 eax, edx;
-    u64 phys_vmcs;      /* debugging */
-
-    /*
-     * Xen does not fill x86_capability words except 0.
-     */
-    ecx = cpuid_ecx(1);
-    boot_cpu_data.x86_capability[4] = ecx;
-
-    if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
-        return 0;
-
-    rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);
-
-    if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
-        if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
-            printk("VMX disabled by Feature Control MSR.\n");
-            return 0;
-        }
-    }
-    else {
-        wrmsr(IA32_FEATURE_CONTROL_MSR,
-              IA32_FEATURE_CONTROL_MSR_LOCK |
-              IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
-    }
-
-    if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS,
-                            MSR_IA32_VMX_PINBASED_CTLS_MSR))
-        return 0;
-    if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS,
-                            MSR_IA32_VMX_PROCBASED_CTLS_MSR))
-        return 0;
-    if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS,
-                            MSR_IA32_VMX_EXIT_CTLS_MSR))
-        return 0;
-    if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS,
-                            MSR_IA32_VMX_ENTRY_CTLS_MSR))
-        return 0;
-
-    set_in_cr4(X86_CR4_VMXE);   /* Enable VMXE */
-
-    if (!(vmcs = alloc_vmcs())) {
-        printk("Failed to allocate VMCS\n");
-        return 0;
-    }
-
-    phys_vmcs = (u64) virt_to_phys(vmcs);
-
-    if (!(__vmxon(phys_vmcs))) {
-        printk("VMXON is done\n");
-    }
-
-    vmx_save_init_msrs();
-
-    hvm_enabled = 1;
-
-    return 1;
-}
-
-void stop_vmx(void)
-{
-    if (read_cr4() & X86_CR4_VMXE)
-        __vmxoff();
-}
-
-/*
- * Not all cases receive valid value in the VM-exit instruction length field.
- */
-#define __get_instruction_length(len) \
-    __vmread(VM_EXIT_INSTRUCTION_LEN, &(len)); \
-     if ((len) < 1 || (len) > 15) \
-        __vmx_bug(&regs);
-
-static void inline __update_guest_eip(unsigned long inst_len)
-{
-    unsigned long current_eip;
-
-    __vmread(GUEST_RIP, &current_eip);
-    __vmwrite(GUEST_RIP, current_eip + inst_len);
-}
-
-
-static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs)
-{
-    unsigned long gpa; /* FIXME: PAE */
-    int result;
-
-#if 0 /* keep for debugging */
-    {
-        unsigned long eip;
-
-        __vmread(GUEST_RIP, &eip);
-        VMX_DBG_LOG(DBG_LEVEL_VMMU,
-                    "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
-                    va, eip, (unsigned long)regs->error_code);
-    }
-#endif
-
-    if (!vmx_paging_enabled(current)){
-        handle_mmio(va, va);
-        TRACE_VMEXIT (2,2);
-        return 1;
-    }
-    gpa = gva_to_gpa(va);
-
-    /* Use 1:1 page table to identify MMIO address space */
-    if ( mmio_space(gpa) ){
-        struct vcpu *v = current;
-        /* No support for APIC */
-        if (!vmx_apic_support(v->domain) && gpa >= 0xFEC00000) { 
-            u32 inst_len;
-            __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len));
-            __update_guest_eip(inst_len);
-            return 1;
-        }
-        TRACE_VMEXIT (2,2);
-        handle_mmio(va, gpa);
-        return 1;
-    }
-
-    result = shadow_fault(va, regs);
-    TRACE_VMEXIT (2,result);
-#if 0
-    if ( !result )
-    {
-        __vmread(GUEST_RIP, &eip);
-        printk("vmx pgfault to guest va=%lx eip=%lx\n", va, eip);
-    }
-#endif
-
-    return result;
-}
-
-static void vmx_do_no_device_fault(void)
-{
-    unsigned long cr0;
-    struct vcpu *v = current;
-
-    clts();
-    setup_fpu(current);
-    __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
-    if (!(cr0 & X86_CR0_TS)) {
-        __vmread_vcpu(v, GUEST_CR0, &cr0);
-        cr0 &= ~X86_CR0_TS;
-        __vmwrite(GUEST_CR0, cr0);
-    }
-    __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
-}
-
-/* Reserved bits: [31:15], [12:11], [9], [6], [2:1] */
-#define VMX_VCPU_CPUID_L1_RESERVED 0xffff9a46 
-
-static void vmx_vmexit_do_cpuid(unsigned long input, struct cpu_user_regs 
*regs)
-{
-    unsigned int eax, ebx, ecx, edx;
-    unsigned long eip;
-    struct vcpu *v = current;
-
-    __vmread(GUEST_RIP, &eip);
-
-    VMX_DBG_LOG(DBG_LEVEL_1,
-                "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
-                " (esi) %lx, (edi) %lx",
-                (unsigned long)regs->eax, (unsigned long)regs->ebx,
-                (unsigned long)regs->ecx, (unsigned long)regs->edx,
-                (unsigned long)regs->esi, (unsigned long)regs->edi);
-
-    cpuid(input, &eax, &ebx, &ecx, &edx);
-
-    if ( input == 1 )
-    {
-        if ( vmx_apic_support(v->domain) &&
-             !vlapic_global_enabled((VLAPIC(v))) )
-            clear_bit(X86_FEATURE_APIC, &edx);
-
-#ifdef __x86_64__
-        if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 )
-#endif
-        {
-            clear_bit(X86_FEATURE_PSE, &edx);
-            clear_bit(X86_FEATURE_PAE, &edx);
-            clear_bit(X86_FEATURE_PSE36, &edx);
-        }
-
-        /* Unsupportable for virtualised CPUs. */
-        ecx &= ~VMX_VCPU_CPUID_L1_RESERVED; /* mask off reserved bits */
-        clear_bit(X86_FEATURE_VMXE & 31, &ecx);
-        clear_bit(X86_FEATURE_MWAIT & 31, &ecx);
-    }
-#ifdef __i386__
-    else if ( input == 0x80000001 )
-    {
-        /* Mask feature for Intel ia32e or AMD long mode. */
-        clear_bit(X86_FEATURE_LM & 31, &edx);
-    }
-#endif
-
-    regs->eax = (unsigned long) eax;
-    regs->ebx = (unsigned long) ebx;
-    regs->ecx = (unsigned long) ecx;
-    regs->edx = (unsigned long) edx;
-
-    VMX_DBG_LOG(DBG_LEVEL_1,
-                "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, 
ebx=%x, ecx=%x, edx=%x",
-                eip, input, eax, ebx, ecx, edx);
-
-}
-
-#define CASE_GET_REG_P(REG, reg)    \
-    case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
-
-static void vmx_dr_access (unsigned long exit_qualification, struct 
cpu_user_regs *regs)
-{
-    unsigned int reg;
-    unsigned long *reg_p = 0;
-    struct vcpu *v = current;
-    unsigned long eip;
-
-    __vmread(GUEST_RIP, &eip);
-
-    reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
-
-    VMX_DBG_LOG(DBG_LEVEL_1,
-                "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
-                eip, reg, exit_qualification);
-
-    switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
-        CASE_GET_REG_P(EAX, eax);
-        CASE_GET_REG_P(ECX, ecx);
-        CASE_GET_REG_P(EDX, edx);
-        CASE_GET_REG_P(EBX, ebx);
-        CASE_GET_REG_P(EBP, ebp);
-        CASE_GET_REG_P(ESI, esi);
-        CASE_GET_REG_P(EDI, edi);
-    case REG_ESP:
-        break;
-    default:
-        __vmx_bug(regs);
-    }
-
-    switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
-    case TYPE_MOV_TO_DR:
-        /* don't need to check the range */
-        if (reg != REG_ESP)
-            v->arch.guest_context.debugreg[reg] = *reg_p;
-        else {
-            unsigned long value;
-            __vmread(GUEST_RSP, &value);
-            v->arch.guest_context.debugreg[reg] = value;
-        }
-        break;
-    case TYPE_MOV_FROM_DR:
-        if (reg != REG_ESP)
-            *reg_p = v->arch.guest_context.debugreg[reg];
-        else {
-            __vmwrite(GUEST_RSP, v->arch.guest_context.debugreg[reg]);
-        }
-        break;
-    }
-}
-
-/*
- * Invalidate the TLB for va. Invalidate the shadow page corresponding
- * the address va.
- */
-static void vmx_vmexit_do_invlpg(unsigned long va)
-{
-    unsigned long eip;
-    struct vcpu *v = current;
-
-    __vmread(GUEST_RIP, &eip);
-
-    VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%lx, va=%lx",
-                eip, va);
-
-    /*
-     * We do the safest things first, then try to update the shadow
-     * copying from guest
-     */
-    shadow_invlpg(v, va);
-}
-
-static int check_for_null_selector(unsigned long eip)
-{
-    unsigned char inst[MAX_INST_LEN];
-    unsigned long sel;
-    int i, inst_len;
-    int inst_copy_from_guest(unsigned char *, unsigned long, int);
-
-    __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
-    memset(inst, 0, MAX_INST_LEN);
-    if (inst_copy_from_guest(inst, eip, inst_len) != inst_len) {
-        printf("check_for_null_selector: get guest instruction failed\n");
-        domain_crash_synchronous();
-    }
-
-    for (i = 0; i < inst_len; i++) {
-        switch (inst[i]) {
-        case 0xf3: /* REPZ */
-        case 0xf2: /* REPNZ */
-        case 0xf0: /* LOCK */
-        case 0x66: /* data32 */
-        case 0x67: /* addr32 */
-            continue;
-        case 0x2e: /* CS */
-            __vmread(GUEST_CS_SELECTOR, &sel);
-            break;
-        case 0x36: /* SS */
-            __vmread(GUEST_SS_SELECTOR, &sel);
-            break;
-        case 0x26: /* ES */
-            __vmread(GUEST_ES_SELECTOR, &sel);
-            break;
-        case 0x64: /* FS */
-            __vmread(GUEST_FS_SELECTOR, &sel);
-            break;
-        case 0x65: /* GS */
-            __vmread(GUEST_GS_SELECTOR, &sel);
-            break;
-        case 0x3e: /* DS */
-            /* FALLTHROUGH */
-        default:
-            /* DS is the default */
-            __vmread(GUEST_DS_SELECTOR, &sel);
-        }
-        return sel == 0 ? 1 : 0;
-    }
-
-    return 0;
-}
-
-void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
-                  unsigned long count, int size, long value, int dir, int 
pvalid)
-{
-    struct vcpu *v = current;
-    vcpu_iodata_t *vio;
-    ioreq_t *p;
-
-    vio = get_vio(v->domain, v->vcpu_id);
-    if (vio == NULL) {
-        printk("bad shared page: %lx\n", (unsigned long) vio);
-        domain_crash_synchronous();
-    }
-
-    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
-        printf("VMX I/O has not yet completed\n");
-        domain_crash_synchronous();
-    }
-    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
-
-    p = &vio->vp_ioreq;
-    p->dir = dir;
-    p->pdata_valid = pvalid;
-
-    p->type = IOREQ_TYPE_PIO;
-    p->size = size;
-    p->addr = port;
-    p->count = count;
-    p->df = regs->eflags & EF_DF ? 1 : 0;
-
-    if (pvalid) {
-        if (vmx_paging_enabled(current))
-            p->u.pdata = (void *) gva_to_gpa(value);
-        else
-            p->u.pdata = (void *) value; /* guest VA == guest PA */
-    } else
-        p->u.data = value;
-
-    if (vmx_portio_intercept(p)) {
-        p->state = STATE_IORESP_READY;
-        vmx_io_assist(v);
-        return;
-    }
-
-    p->state = STATE_IOREQ_READY;
-
-    evtchn_send(iopacket_port(v->domain));
-    vmx_wait_io();
-}
-
-static void vmx_io_instruction(struct cpu_user_regs *regs,
-                               unsigned long exit_qualification, unsigned long 
inst_len)
-{
-    struct mmio_op *mmio_opp;
-    unsigned long eip, cs, eflags;
-    unsigned long port, size, dir;
-    int vm86;
-
-    mmio_opp = &current->arch.arch_vmx.mmio_op;
-    mmio_opp->instr = INSTR_PIO;
-    mmio_opp->flags = 0;
-
-    __vmread(GUEST_RIP, &eip);
-    __vmread(GUEST_CS_SELECTOR, &cs);
-    __vmread(GUEST_RFLAGS, &eflags);
-    vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
-
-    VMX_DBG_LOG(DBG_LEVEL_1,
-                "vmx_io_instruction: vm86 %d, eip=%lx:%lx, "
-                "exit_qualification = %lx",
-                vm86, cs, eip, exit_qualification);
-
-    if (test_bit(6, &exit_qualification))
-        port = (exit_qualification >> 16) & 0xFFFF;
-    else
-        port = regs->edx & 0xffff;
-    TRACE_VMEXIT(2, port);
-    size = (exit_qualification & 7) + 1;
-    dir = test_bit(3, &exit_qualification); /* direction */
-
-    if (test_bit(4, &exit_qualification)) { /* string instruction */
-        unsigned long addr, count = 1;
-        int sign = regs->eflags & EF_DF ? -1 : 1;
-
-        __vmread(GUEST_LINEAR_ADDRESS, &addr);
-
-        /*
-         * In protected mode, guest linear address is invalid if the
-         * selector is null.
-         */
-        if (!vm86 && check_for_null_selector(eip))
-            addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
-
-        if (test_bit(5, &exit_qualification)) { /* "rep" prefix */
-            mmio_opp->flags |= REPZ;
-            count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
-        }
-
-        /*
-         * Handle string pio instructions that cross pages or that
-         * are unaligned. See the comments in vmx_platform.c/handle_mmio()
-         */
-        if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
-            unsigned long value = 0;
-
-            mmio_opp->flags |= OVERLAP;
-            if (dir == IOREQ_WRITE)
-                vmx_copy(&value, addr, size, VMX_COPY_IN);
-            send_pio_req(regs, port, 1, size, value, dir, 0);
-        } else {
-            if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) 
{
-                if (sign > 0)
-                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
-                else
-                    count = (addr & ~PAGE_MASK) / size;
-            } else
-                __update_guest_eip(inst_len);
-
-            send_pio_req(regs, port, count, size, addr, dir, 1);
-        }
-    } else {
-        __update_guest_eip(inst_len);
-        send_pio_req(regs, port, 1, size, regs->eax, dir, 0);
-    }
-}
-
-int
-vmx_copy(void *buf, unsigned long laddr, int size, int dir)
-{
-    unsigned long gpa, mfn;
-    char *addr;
-    int count;
-
-    while (size > 0) {
-        count = PAGE_SIZE - (laddr & ~PAGE_MASK);
-        if (count > size)
-            count = size;
-
-        if (vmx_paging_enabled(current)) {
-            gpa = gva_to_gpa(laddr);
-            mfn = get_mfn_from_pfn(gpa >> PAGE_SHIFT);
-        } else
-            mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT);
-        if (mfn == INVALID_MFN)
-            return 0;
-
-        addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK);
-
-        if (dir == VMX_COPY_IN)
-            memcpy(buf, addr, count);
-        else
-            memcpy(addr, buf, count);
-
-        unmap_domain_page(addr);
-
-        laddr += count;
-        buf += count;
-        size -= count;
-    }
-
-    return 1;
-}
-
-int
-vmx_world_save(struct vcpu *v, struct vmx_assist_context *c)
-{
-    unsigned long inst_len;
-    int error = 0;
-
-    error |= __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
-    error |= __vmread(GUEST_RIP, &c->eip);
-    c->eip += inst_len; /* skip transition instruction */
-    error |= __vmread(GUEST_RSP, &c->esp);
-    error |= __vmread(GUEST_RFLAGS, &c->eflags);
-
-    error |= __vmread(CR0_READ_SHADOW, &c->cr0);
-    c->cr3 = v->arch.arch_vmx.cpu_cr3;
-    error |= __vmread(CR4_READ_SHADOW, &c->cr4);
-
-    error |= __vmread(GUEST_IDTR_LIMIT, &c->idtr_limit);
-    error |= __vmread(GUEST_IDTR_BASE, &c->idtr_base);
-
-    error |= __vmread(GUEST_GDTR_LIMIT, &c->gdtr_limit);
-    error |= __vmread(GUEST_GDTR_BASE, &c->gdtr_base);
-
-    error |= __vmread(GUEST_CS_SELECTOR, &c->cs_sel);
-    error |= __vmread(GUEST_CS_LIMIT, &c->cs_limit);
-    error |= __vmread(GUEST_CS_BASE, &c->cs_base);
-    error |= __vmread(GUEST_CS_AR_BYTES, &c->cs_arbytes.bytes);
-
-    error |= __vmread(GUEST_DS_SELECTOR, &c->ds_sel);
-    error |= __vmread(GUEST_DS_LIMIT, &c->ds_limit);
-    error |= __vmread(GUEST_DS_BASE, &c->ds_base);
-    error |= __vmread(GUEST_DS_AR_BYTES, &c->ds_arbytes.bytes);
-
-    error |= __vmread(GUEST_ES_SELECTOR, &c->es_sel);
-    error |= __vmread(GUEST_ES_LIMIT, &c->es_limit);
-    error |= __vmread(GUEST_ES_BASE, &c->es_base);
-    error |= __vmread(GUEST_ES_AR_BYTES, &c->es_arbytes.bytes);
-
-    error |= __vmread(GUEST_SS_SELECTOR, &c->ss_sel);
-    error |= __vmread(GUEST_SS_LIMIT, &c->ss_limit);
-    error |= __vmread(GUEST_SS_BASE, &c->ss_base);
-    error |= __vmread(GUEST_SS_AR_BYTES, &c->ss_arbytes.bytes);
-
-    error |= __vmread(GUEST_FS_SELECTOR, &c->fs_sel);
-    error |= __vmread(GUEST_FS_LIMIT, &c->fs_limit);
-    error |= __vmread(GUEST_FS_BASE, &c->fs_base);
-    error |= __vmread(GUEST_FS_AR_BYTES, &c->fs_arbytes.bytes);
-
-    error |= __vmread(GUEST_GS_SELECTOR, &c->gs_sel);
-    error |= __vmread(GUEST_GS_LIMIT, &c->gs_limit);
-    error |= __vmread(GUEST_GS_BASE, &c->gs_base);
-    error |= __vmread(GUEST_GS_AR_BYTES, &c->gs_arbytes.bytes);
-
-    error |= __vmread(GUEST_TR_SELECTOR, &c->tr_sel);
-    error |= __vmread(GUEST_TR_LIMIT, &c->tr_limit);
-    error |= __vmread(GUEST_TR_BASE, &c->tr_base);
-    error |= __vmread(GUEST_TR_AR_BYTES, &c->tr_arbytes.bytes);
-
-    error |= __vmread(GUEST_LDTR_SELECTOR, &c->ldtr_sel);
-    error |= __vmread(GUEST_LDTR_LIMIT, &c->ldtr_limit);
-    error |= __vmread(GUEST_LDTR_BASE, &c->ldtr_base);
-    error |= __vmread(GUEST_LDTR_AR_BYTES, &c->ldtr_arbytes.bytes);
-
-    return !error;
-}
-
-int
-vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
-{
-    unsigned long mfn, old_cr4, old_base_mfn;
-    int error = 0;
-
-    error |= __vmwrite(GUEST_RIP, c->eip);
-    error |= __vmwrite(GUEST_RSP, c->esp);
-    error |= __vmwrite(GUEST_RFLAGS, c->eflags);
-
-    error |= __vmwrite(CR0_READ_SHADOW, c->cr0);
-
-    if (!vmx_paging_enabled(v)) {
-        VMX_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table");
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
-        goto skip_cr3;
-    }
-
-    if (c->cr3 == v->arch.arch_vmx.cpu_cr3) {
-        /*
-         * This is simple TLB flush, implying the guest has
-         * removed some translation or changed page attributes.
-         * We simply invalidate the shadow.
-         */
-        mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
-        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
-            printk("Invalid CR3 value=%x", c->cr3);
-            domain_crash_synchronous();
-            return 0;
-        }
-        shadow_sync_all(v->domain);
-    } else {
-        /*
-         * If different, make a shadow. Check if the PDBR is valid
-         * first.
-         */
-        VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3);
-        if ((c->cr3 >> PAGE_SHIFT) > v->domain->max_pages) {
-            printk("Invalid CR3 value=%x", c->cr3);
-            domain_crash_synchronous();
-            return 0;
-        }
-        mfn = get_mfn_from_pfn(c->cr3 >> PAGE_SHIFT);
-        if(!get_page(pfn_to_page(mfn), v->domain))
-                return 0;
-        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-        v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
-        if (old_base_mfn)
-             put_page(pfn_to_page(old_base_mfn));
-        update_pagetables(v);
-        /*
-         * arch.shadow_table should now hold the next CR3 for shadow
-         */
-        v->arch.arch_vmx.cpu_cr3 = c->cr3;
-        VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
-    }
-
- skip_cr3:
-
-    error |= __vmread(CR4_READ_SHADOW, &old_cr4);
-    error |= __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
-    error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
-
-    error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
-    error |= __vmwrite(GUEST_IDTR_BASE, c->idtr_base);
-
-    error |= __vmwrite(GUEST_GDTR_LIMIT, c->gdtr_limit);
-    error |= __vmwrite(GUEST_GDTR_BASE, c->gdtr_base);
-
-    error |= __vmwrite(GUEST_CS_SELECTOR, c->cs_sel);
-    error |= __vmwrite(GUEST_CS_LIMIT, c->cs_limit);
-    error |= __vmwrite(GUEST_CS_BASE, c->cs_base);
-    error |= __vmwrite(GUEST_CS_AR_BYTES, c->cs_arbytes.bytes);
-
-    error |= __vmwrite(GUEST_DS_SELECTOR, c->ds_sel);
-    error |= __vmwrite(GUEST_DS_LIMIT, c->ds_limit);
-    error |= __vmwrite(GUEST_DS_BASE, c->ds_base);
-    error |= __vmwrite(GUEST_DS_AR_BYTES, c->ds_arbytes.bytes);
-
-    error |= __vmwrite(GUEST_ES_SELECTOR, c->es_sel);
-    error |= __vmwrite(GUEST_ES_LIMIT, c->es_limit);
-    error |= __vmwrite(GUEST_ES_BASE, c->es_base);
-    error |= __vmwrite(GUEST_ES_AR_BYTES, c->es_arbytes.bytes);
-
-    error |= __vmwrite(GUEST_SS_SELECTOR, c->ss_sel);
-    error |= __vmwrite(GUEST_SS_LIMIT, c->ss_limit);
-    error |= __vmwrite(GUEST_SS_BASE, c->ss_base);
-    error |= __vmwrite(GUEST_SS_AR_BYTES, c->ss_arbytes.bytes);
-
-    error |= __vmwrite(GUEST_FS_SELECTOR, c->fs_sel);
-    error |= __vmwrite(GUEST_FS_LIMIT, c->fs_limit);
-    error |= __vmwrite(GUEST_FS_BASE, c->fs_base);
-    error |= __vmwrite(GUEST_FS_AR_BYTES, c->fs_arbytes.bytes);
-
-    error |= __vmwrite(GUEST_GS_SELECTOR, c->gs_sel);
-    error |= __vmwrite(GUEST_GS_LIMIT, c->gs_limit);
-    error |= __vmwrite(GUEST_GS_BASE, c->gs_base);
-    error |= __vmwrite(GUEST_GS_AR_BYTES, c->gs_arbytes.bytes);
-
-    error |= __vmwrite(GUEST_TR_SELECTOR, c->tr_sel);
-    error |= __vmwrite(GUEST_TR_LIMIT, c->tr_limit);
-    error |= __vmwrite(GUEST_TR_BASE, c->tr_base);
-    error |= __vmwrite(GUEST_TR_AR_BYTES, c->tr_arbytes.bytes);
-
-    error |= __vmwrite(GUEST_LDTR_SELECTOR, c->ldtr_sel);
-    error |= __vmwrite(GUEST_LDTR_LIMIT, c->ldtr_limit);
-    error |= __vmwrite(GUEST_LDTR_BASE, c->ldtr_base);
-    error |= __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes);
-
-    return !error;
-}
-
-enum { VMX_ASSIST_INVOKE = 0, VMX_ASSIST_RESTORE };
-
-int
-vmx_assist(struct vcpu *v, int mode)
-{
-    struct vmx_assist_context c;
-    u32 magic;
-    u32 cp;
-
-    /* make sure vmxassist exists (this is not an error) */
-    if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), VMX_COPY_IN))
-        return 0;
-    if (magic != VMXASSIST_MAGIC)
-        return 0;
-
-    switch (mode) {
-        /*
-         * Transfer control to vmxassist.
-         * Store the current context in VMXASSIST_OLD_CONTEXT and load
-         * the new VMXASSIST_NEW_CONTEXT context. This context was created
-         * by vmxassist and will transfer control to it.
-         */
-    case VMX_ASSIST_INVOKE:
-        /* save the old context */
-        if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
-            goto error;
-        if (cp != 0) {
-            if (!vmx_world_save(v, &c))
-                goto error;
-            if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT))
-                goto error;
-        }
-
-        /* restore the new context, this should activate vmxassist */
-        if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), VMX_COPY_IN))
-            goto error;
-        if (cp != 0) {
-            if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
-                goto error;
-            if (!vmx_world_restore(v, &c))
-                goto error;
-            return 1;
-        }
-        break;
-
-        /*
-         * Restore the VMXASSIST_OLD_CONTEXT that was saved by 
VMX_ASSIST_INVOKE
-         * above.
-         */
-    case VMX_ASSIST_RESTORE:
-        /* save the old context */
-        if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN))
-            goto error;
-        if (cp != 0) {
-            if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN))
-                goto error;
-            if (!vmx_world_restore(v, &c))
-                goto error;
-            return 1;
-        }
-        break;
-    }
-
- error:
-    printf("Failed to transfer to vmxassist\n");
-    domain_crash_synchronous();
-    return 0;
-}
-
-static int vmx_set_cr0(unsigned long value)
-{
-    struct vcpu *v = current;
-    unsigned long mfn;
-    unsigned long eip;
-    int paging_enabled;
-    unsigned long vm_entry_value;
-    unsigned long old_cr0;
-
-    /*
-     * CR0: We don't want to lose PE and PG.
-     */
-    __vmread_vcpu(v, CR0_READ_SHADOW, &old_cr0);
-    paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
-    /* If OS don't use clts to clear TS bit...*/
-    if((old_cr0 & X86_CR0_TS) && !(value & X86_CR0_TS))
-    {
-            clts();
-            setup_fpu(v);
-    }
-
-
-    __vmwrite(GUEST_CR0, value | X86_CR0_PE | X86_CR0_PG | X86_CR0_NE);
-    __vmwrite(CR0_READ_SHADOW, value);
-
-    VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
-
-    if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) {
-        /*
-         * The guest CR3 must be pointing to the guest physical.
-         */
-        if ( !VALID_MFN(mfn = get_mfn_from_pfn(
-            v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
-             !get_page(pfn_to_page(mfn), v->domain) )
-        {
-            printk("Invalid CR3 value = %lx", v->arch.arch_vmx.cpu_cr3);
-            domain_crash_synchronous(); /* need to take a clean path */
-        }
-
-#if defined(__x86_64__)
-        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
-                     &v->arch.arch_vmx.cpu_state) &&
-            !test_bit(VMX_CPU_STATE_PAE_ENABLED,
-                      &v->arch.arch_vmx.cpu_state)){
-            VMX_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
-            vmx_inject_exception(v, TRAP_gp_fault, 0);
-        }
-        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
-                     &v->arch.arch_vmx.cpu_state)){
-            /* Here the PAE is should to be opened */
-            VMX_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
-            set_bit(VMX_CPU_STATE_LMA_ENABLED,
-                    &v->arch.arch_vmx.cpu_state);
-            __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
-            vm_entry_value |= VM_ENTRY_CONTROLS_IA32E_MODE;
-            __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
-
-#if CONFIG_PAGING_LEVELS >= 4
-            if(!shadow_set_guest_paging_levels(v->domain, 4)) {
-                printk("Unsupported guest paging levels\n");
-                domain_crash_synchronous(); /* need to take a clean path */
-            }
-#endif
-        }
-        else
-        {
-#if CONFIG_PAGING_LEVELS >= 4
-            if(!shadow_set_guest_paging_levels(v->domain, 2)) {
-                printk("Unsupported guest paging levels\n");
-                domain_crash_synchronous(); /* need to take a clean path */
-            }
-#endif
-        }
-
-        {
-            unsigned long crn;
-            /* update CR4's PAE if needed */
-            __vmread(GUEST_CR4, &crn);
-            if ( (!(crn & X86_CR4_PAE)) &&
-                 test_bit(VMX_CPU_STATE_PAE_ENABLED,
-                          &v->arch.arch_vmx.cpu_state) )
-            {
-                VMX_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
-                __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
-            }
-        }
-#endif
-        /*
-         * Now arch.guest_table points to machine physical.
-         */
-        v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
-        update_pagetables(v);
-
-        VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
-                    (unsigned long) (mfn << PAGE_SHIFT));
-
-        __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
-        /*
-         * arch->shadow_table should hold the next CR3 for shadow
-         */
-        VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
-                    v->arch.arch_vmx.cpu_cr3, mfn);
-    }
-
-    if(!((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled)
-        if(v->arch.arch_vmx.cpu_cr3){
-            put_page(pfn_to_page(get_mfn_from_pfn(
-                      v->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)));
-            v->arch.guest_table = mk_pagetable(0);
-        }
-
-    /*
-     * VMX does not implement real-mode virtualization. We emulate
-     * real-mode by performing a world switch to VMXAssist whenever
-     * a partition disables the CR0.PE bit.
-     */
-    if ((value & X86_CR0_PE) == 0) {
-        if ( value & X86_CR0_PG ) {
-            /* inject GP here */
-            vmx_inject_exception(v, TRAP_gp_fault, 0);
-            return 0;
-        } else {
-            /*
-             * Disable paging here.
-             * Same to PE == 1 && PG == 0
-             */
-            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
-                         &v->arch.arch_vmx.cpu_state)){
-                clear_bit(VMX_CPU_STATE_LMA_ENABLED,
-                          &v->arch.arch_vmx.cpu_state);
-                __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
-                vm_entry_value &= ~VM_ENTRY_CONTROLS_IA32E_MODE;
-                __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
-            }
-        }
-
-        clear_all_shadow_status(v->domain);
-        if (vmx_assist(v, VMX_ASSIST_INVOKE)) {
-            set_bit(VMX_CPU_STATE_ASSIST_ENABLED, &v->arch.arch_vmx.cpu_state);
-            __vmread(GUEST_RIP, &eip);
-            VMX_DBG_LOG(DBG_LEVEL_1,
-                        "Transfering control to vmxassist %%eip 0x%lx\n", eip);
-            return 0; /* do not update eip! */
-        }
-    } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
-                        &v->arch.arch_vmx.cpu_state)) {
-        __vmread(GUEST_RIP, &eip);
-        VMX_DBG_LOG(DBG_LEVEL_1,
-                    "Enabling CR0.PE at %%eip 0x%lx\n", eip);
-        if (vmx_assist(v, VMX_ASSIST_RESTORE)) {
-            clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
-                      &v->arch.arch_vmx.cpu_state);
-            __vmread(GUEST_RIP, &eip);
-            VMX_DBG_LOG(DBG_LEVEL_1,
-                        "Restoring to %%eip 0x%lx\n", eip);
-            return 0; /* do not update eip! */
-        }
-    }
-
-    return 1;
-}
-
-#define CASE_GET_REG(REG, reg)  \
-    case REG_ ## REG: value = regs->reg; break
-
-#define CASE_EXTEND_SET_REG \
-      CASE_EXTEND_REG(S)
-#define CASE_EXTEND_GET_REG \
-      CASE_EXTEND_REG(G)
-
-#ifdef __i386__
-#define CASE_EXTEND_REG(T)
-#else
-#define CASE_EXTEND_REG(T)    \
-    CASE_ ## T ## ET_REG(R8, r8); \
-    CASE_ ## T ## ET_REG(R9, r9); \
-    CASE_ ## T ## ET_REG(R10, r10); \
-    CASE_ ## T ## ET_REG(R11, r11); \
-    CASE_ ## T ## ET_REG(R12, r12); \
-    CASE_ ## T ## ET_REG(R13, r13); \
-    CASE_ ## T ## ET_REG(R14, r14); \
-    CASE_ ## T ## ET_REG(R15, r15);
-#endif
-
-
-/*
- * Write to control registers
- */
-static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs)
-{
-    unsigned long value;
-    unsigned long old_cr;
-    struct vcpu *v = current;
-
-    switch (gp) {
-        CASE_GET_REG(EAX, eax);
-        CASE_GET_REG(ECX, ecx);
-        CASE_GET_REG(EDX, edx);
-        CASE_GET_REG(EBX, ebx);
-        CASE_GET_REG(EBP, ebp);
-        CASE_GET_REG(ESI, esi);
-        CASE_GET_REG(EDI, edi);
-        CASE_EXTEND_GET_REG
-            case REG_ESP:
-                __vmread(GUEST_RSP, &value);
-        break;
-    default:
-        printk("invalid gp: %d\n", gp);
-        __vmx_bug(regs);
-    }
-
-    VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
-    VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
-
-    switch(cr) {
-    case 0:
-    {
-        return vmx_set_cr0(value);
-    }
-    case 3:
-    {
-        unsigned long old_base_mfn, mfn;
-
-        /*
-         * If paging is not enabled yet, simply copy the value to CR3.
-         */
-        if (!vmx_paging_enabled(v)) {
-            v->arch.arch_vmx.cpu_cr3 = value;
-            break;
-        }
-
-        /*
-         * We make a new one if the shadow does not exist.
-         */
-        if (value == v->arch.arch_vmx.cpu_cr3) {
-            /*
-             * This is simple TLB flush, implying the guest has
-             * removed some translation or changed page attributes.
-             * We simply invalidate the shadow.
-             */
-            mfn = get_mfn_from_pfn(value >> PAGE_SHIFT);
-            if (mfn != pagetable_get_pfn(v->arch.guest_table))
-                __vmx_bug(regs);
-            shadow_sync_all(v->domain);
-        } else {
-            /*
-             * If different, make a shadow. Check if the PDBR is valid
-             * first.
-             */
-            VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
-            if ( ((value >> PAGE_SHIFT) > v->domain->max_pages ) ||
-                 !VALID_MFN(mfn = get_mfn_from_pfn(value >> PAGE_SHIFT)) ||
-                 !get_page(pfn_to_page(mfn), v->domain) )
-            {
-                printk("Invalid CR3 value=%lx", value);
-                domain_crash_synchronous(); /* need to take a clean path */
-            }
-            old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
-            v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
-            if (old_base_mfn)
-                put_page(pfn_to_page(old_base_mfn));
-            update_pagetables(v);
-            /*
-             * arch.shadow_table should now hold the next CR3 for shadow
-             */
-            v->arch.arch_vmx.cpu_cr3 = value;
-            VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
-                        value);
-            __vmwrite(GUEST_CR3, pagetable_get_paddr(v->arch.shadow_table));
-        }
-        break;
-    }
-    case 4: /* CR4 */
-    {
-        if (value & X86_CR4_PAE){
-            set_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
-        } else {
-            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
-                         &v->arch.arch_vmx.cpu_state)){
-                vmx_inject_exception(v, TRAP_gp_fault, 0);
-            }
-            clear_bit(VMX_CPU_STATE_PAE_ENABLED, &v->arch.arch_vmx.cpu_state);
-        }
-
-        __vmread(CR4_READ_SHADOW, &old_cr);
-
-        __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
-        __vmwrite(CR4_READ_SHADOW, value);
-
-        /*
-         * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
-         * all TLB entries except global entries.
-         */
-        if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
-            shadow_sync_all(v->domain);
-        }
-        break;
-    }
-    default:
-        printk("invalid cr: %d\n", gp);
-        __vmx_bug(regs);
-    }
-
-    return 1;
-}
-
-#define CASE_SET_REG(REG, reg)      \
-    case REG_ ## REG:       \
-    regs->reg = value;      \
-    break
-
-/*
- * Read from control registers. CR0 and CR4 are read from the shadow.
- */
-static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
-{
-    unsigned long value;
-    struct vcpu *v = current;
-
-    if (cr != 3)
-        __vmx_bug(regs);
-
-    value = (unsigned long) v->arch.arch_vmx.cpu_cr3;
-
-    switch (gp) {
-        CASE_SET_REG(EAX, eax);
-        CASE_SET_REG(ECX, ecx);
-        CASE_SET_REG(EDX, edx);
-        CASE_SET_REG(EBX, ebx);
-        CASE_SET_REG(EBP, ebp);
-        CASE_SET_REG(ESI, esi);
-        CASE_SET_REG(EDI, edi);
-        CASE_EXTEND_SET_REG
-            case REG_ESP:
-                __vmwrite(GUEST_RSP, value);
-        regs->esp = value;
-        break;
-    default:
-        printk("invalid gp: %d\n", gp);
-        __vmx_bug(regs);
-    }
-
-    VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
-}
-
-static int vmx_cr_access(unsigned long exit_qualification, struct 
cpu_user_regs *regs)
-{
-    unsigned int gp, cr;
-    unsigned long value;
-    struct vcpu *v = current;
-
-    switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
-    case TYPE_MOV_TO_CR:
-        gp = exit_qualification & CONTROL_REG_ACCESS_REG;
-        cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
-        TRACE_VMEXIT(1,TYPE_MOV_TO_CR);
-        TRACE_VMEXIT(2,cr);
-        TRACE_VMEXIT(3,gp);
-        return mov_to_cr(gp, cr, regs);
-    case TYPE_MOV_FROM_CR:
-        gp = exit_qualification & CONTROL_REG_ACCESS_REG;
-        cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
-        TRACE_VMEXIT(1,TYPE_MOV_FROM_CR);
-        TRACE_VMEXIT(2,cr);
-        TRACE_VMEXIT(3,gp);
-        mov_from_cr(cr, gp, regs);
-        break;
-    case TYPE_CLTS:
-        TRACE_VMEXIT(1,TYPE_CLTS);
-        clts();
-        setup_fpu(current);
-
-        __vmread_vcpu(v, GUEST_CR0, &value);
-        value &= ~X86_CR0_TS; /* clear TS */
-        __vmwrite(GUEST_CR0, value);
-
-        __vmread_vcpu(v, CR0_READ_SHADOW, &value);
-        value &= ~X86_CR0_TS; /* clear TS */
-        __vmwrite(CR0_READ_SHADOW, value);
-        break;
-    case TYPE_LMSW:
-        TRACE_VMEXIT(1,TYPE_LMSW);
-        __vmread_vcpu(v, CR0_READ_SHADOW, &value);
-        value = (value & ~0xF) |
-            (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF);
-        return vmx_set_cr0(value);
-        break;
-    default:
-        __vmx_bug(regs);
-        break;
-    }
-    return 1;
-}
-
-static inline void vmx_do_msr_read(struct cpu_user_regs *regs)
-{
-    u64 msr_content = 0;
-    struct vcpu *v = current;
-
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
-                (unsigned long)regs->ecx, (unsigned long)regs->eax,
-                (unsigned long)regs->edx);
-    switch (regs->ecx) {
-    case MSR_IA32_TIME_STAMP_COUNTER:
-    {
-        struct vmx_virpit *vpit;
-
-        rdtscll(msr_content);
-        vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-        msr_content += vpit->shift;
-        break;
-    }
-    case MSR_IA32_SYSENTER_CS:
-        __vmread(GUEST_SYSENTER_CS, (u32 *)&msr_content);
-        break;
-    case MSR_IA32_SYSENTER_ESP:
-        __vmread(GUEST_SYSENTER_ESP, &msr_content);
-        break;
-    case MSR_IA32_SYSENTER_EIP:
-        __vmread(GUEST_SYSENTER_EIP, &msr_content);
-        break;
-    case MSR_IA32_APICBASE:
-        msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0;
-        break;
-    default:
-        if(long_mode_do_msr_read(regs))
-            return;
-        rdmsr_user(regs->ecx, regs->eax, regs->edx);
-        break;
-    }
-
-    regs->eax = msr_content & 0xFFFFFFFF;
-    regs->edx = msr_content >> 32;
-
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
-                "ecx=%lx, eax=%lx, edx=%lx",
-                (unsigned long)regs->ecx, (unsigned long)regs->eax,
-                (unsigned long)regs->edx);
-}
-
-static inline void vmx_do_msr_write(struct cpu_user_regs *regs)
-{
-    u64 msr_content;
-    struct vcpu *v = current;
-
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx",
-                (unsigned long)regs->ecx, (unsigned long)regs->eax,
-                (unsigned long)regs->edx);
-
-    msr_content = (regs->eax & 0xFFFFFFFF) | ((u64)regs->edx << 32);
-
-    switch (regs->ecx) {
-    case MSR_IA32_TIME_STAMP_COUNTER:
-    {
-        struct vmx_virpit *vpit;
-        u64 host_tsc, drift;
-
-        rdtscll(host_tsc);
-        vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-        drift = v->arch.arch_vmx.tsc_offset - vpit->shift;
-        vpit->shift = msr_content - host_tsc;
-        v->arch.arch_vmx.tsc_offset = vpit->shift + drift;
-        __vmwrite(TSC_OFFSET, vpit->shift);
-
-#if defined (__i386__)
-        __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>>32));
-#endif
-        break;
-    }
-    case MSR_IA32_SYSENTER_CS:
-        __vmwrite(GUEST_SYSENTER_CS, msr_content);
-        break;
-    case MSR_IA32_SYSENTER_ESP:
-        __vmwrite(GUEST_SYSENTER_ESP, msr_content);
-        break;
-    case MSR_IA32_SYSENTER_EIP:
-        __vmwrite(GUEST_SYSENTER_EIP, msr_content);
-        break;
-    case MSR_IA32_APICBASE:
-        vlapic_msr_set(VLAPIC(v), msr_content);
-        break;
-    default:
-        long_mode_do_msr_write(regs);
-        break;
-    }
-
-    VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write returns: "
-                "ecx=%lx, eax=%lx, edx=%lx",
-                (unsigned long)regs->ecx, (unsigned long)regs->eax,
-                (unsigned long)regs->edx);
-}
-
-/*
- * Need to use this exit to reschedule
- */
-void vmx_vmexit_do_hlt(void)
-{
-    struct vcpu *v=current;
-    struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-    s_time_t   next_pit=-1,next_wakeup;
-
-    if ( !v->vcpu_id ) {
-        next_pit = get_pit_scheduled(v,vpit);
-    }
-    next_wakeup = get_apictime_scheduled(v);
-    if ( (next_pit != -1 && next_pit < next_wakeup) || next_wakeup == -1 ) {
-        next_wakeup = next_pit;
-    }
-    if ( next_wakeup != - 1 ) 
-        set_timer(&current->arch.arch_vmx.hlt_timer, next_wakeup);
-    do_sched_op(SCHEDOP_block, 0);
-}
-
-static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs)
-{
-    unsigned int vector;
-    int error;
-
-    asmlinkage void do_IRQ(struct cpu_user_regs *);
-    void smp_apic_timer_interrupt(struct cpu_user_regs *);
-    void timer_interrupt(int, void *, struct cpu_user_regs *);
-    void smp_event_check_interrupt(void);
-    void smp_invalidate_interrupt(void);
-    void smp_call_function_interrupt(void);
-    void smp_spurious_interrupt(struct cpu_user_regs *regs);
-    void smp_error_interrupt(struct cpu_user_regs *regs);
-
-    if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
-        && !(vector & INTR_INFO_VALID_MASK))
-        __vmx_bug(regs);
-
-    vector &= 0xff;
-    local_irq_disable();
-
-    switch(vector) {
-    case LOCAL_TIMER_VECTOR:
-        smp_apic_timer_interrupt(regs);
-        break;
-    case EVENT_CHECK_VECTOR:
-        smp_event_check_interrupt();
-        break;
-    case INVALIDATE_TLB_VECTOR:
-        smp_invalidate_interrupt();
-        break;
-    case CALL_FUNCTION_VECTOR:
-        smp_call_function_interrupt();
-        break;
-    case SPURIOUS_APIC_VECTOR:
-        smp_spurious_interrupt(regs);
-        break;
-    case ERROR_APIC_VECTOR:
-        smp_error_interrupt(regs);
-        break;
-    default:
-        regs->entry_vector = vector;
-        do_IRQ(regs);
-        break;
-    }
-}
-
-#define BUF_SIZ     256
-#define MAX_LINE    80
-char print_buf[BUF_SIZ];
-static int index;
-
-static void vmx_print_line(const char c, struct vcpu *v)
-{
-
-    if (index == MAX_LINE || c == '\n') {
-        if (index == MAX_LINE) {
-            print_buf[index++] = c;
-        }
-        print_buf[index] = '\0';
-        printk("(GUEST: %u) %s\n", v->domain->domain_id, (char *) &print_buf);
-        index = 0;
-    }
-    else
-        print_buf[index++] = c;
-}
-
-void save_vmx_cpu_user_regs(struct cpu_user_regs *ctxt)
-{
-    __vmread(GUEST_SS_SELECTOR, &ctxt->ss);
-    __vmread(GUEST_RSP, &ctxt->esp);
-    __vmread(GUEST_RFLAGS, &ctxt->eflags);
-    __vmread(GUEST_CS_SELECTOR, &ctxt->cs);
-    __vmread(GUEST_RIP, &ctxt->eip);
-
-    __vmread(GUEST_GS_SELECTOR, &ctxt->gs);
-    __vmread(GUEST_FS_SELECTOR, &ctxt->fs);
-    __vmread(GUEST_ES_SELECTOR, &ctxt->es);
-    __vmread(GUEST_DS_SELECTOR, &ctxt->ds);
-}
-
-#ifdef XEN_DEBUGGER
-void save_cpu_user_regs(struct cpu_user_regs *regs)
-{
-    __vmread(GUEST_SS_SELECTOR, &regs->xss);
-    __vmread(GUEST_RSP, &regs->esp);
-    __vmread(GUEST_RFLAGS, &regs->eflags);
-    __vmread(GUEST_CS_SELECTOR, &regs->xcs);
-    __vmread(GUEST_RIP, &regs->eip);
-
-    __vmread(GUEST_GS_SELECTOR, &regs->xgs);
-    __vmread(GUEST_FS_SELECTOR, &regs->xfs);
-    __vmread(GUEST_ES_SELECTOR, &regs->xes);
-    __vmread(GUEST_DS_SELECTOR, &regs->xds);
-}
-
-void restore_cpu_user_regs(struct cpu_user_regs *regs)
-{
-    __vmwrite(GUEST_SS_SELECTOR, regs->xss);
-    __vmwrite(GUEST_RSP, regs->esp);
-    __vmwrite(GUEST_RFLAGS, regs->eflags);
-    __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
-    __vmwrite(GUEST_RIP, regs->eip);
-
-    __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
-    __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
-    __vmwrite(GUEST_ES_SELECTOR, regs->xes);
-    __vmwrite(GUEST_DS_SELECTOR, regs->xds);
-}
-#endif
-
-asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs)
-{
-    unsigned int exit_reason, idtv_info_field;
-    unsigned long exit_qualification, eip, inst_len = 0;
-    struct vcpu *v = current;
-    int error;
-
-    if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
-        __vmx_bug(&regs);
-
-    perfc_incra(vmexits, exit_reason);
-
-    __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
-    if (idtv_info_field & INTR_INFO_VALID_MASK) {
-        __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
-
-        __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
-        if (inst_len >= 1 && inst_len <= 15)
-            __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
-
-        if (idtv_info_field & 0x800) { /* valid error code */
-            unsigned long error_code;
-            __vmread(IDT_VECTORING_ERROR_CODE, &error_code);
-            __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
-        }
-
-        VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
-    }
-
-    /* don't bother H/W interrutps */
-    if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
-        exit_reason != EXIT_REASON_VMCALL &&
-        exit_reason != EXIT_REASON_IO_INSTRUCTION)
-        VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
-
-    if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
-        printk("Failed vm entry\n");
-        domain_crash_synchronous();
-        return;
-    }
-
-    {
-        __vmread(GUEST_RIP, &eip);
-        TRACE_3D(TRC_VMX_VMEXIT, v->domain->domain_id, eip, exit_reason);
-        TRACE_VMEXIT(0,exit_reason);
-    }
-
-    switch (exit_reason) {
-    case EXIT_REASON_EXCEPTION_NMI:
-    {
-        /*
-         * We don't set the software-interrupt exiting (INT n).
-         * (1) We can get an exception (e.g. #PG) in the guest, or
-         * (2) NMI
-         */
-        int error;
-        unsigned int vector;
-        unsigned long va;
-
-        if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
-            || !(vector & INTR_INFO_VALID_MASK))
-            __vmx_bug(&regs);
-        vector &= 0xff;
-
-        TRACE_VMEXIT(1,vector);
-        perfc_incra(cause_vector, vector);
-
-        TRACE_3D(TRC_VMX_VECTOR, v->domain->domain_id, eip, vector);
-        switch (vector) {
-#ifdef XEN_DEBUGGER
-        case TRAP_debug:
-        {
-            save_cpu_user_regs(&regs);
-            pdb_handle_exception(1, &regs, 1);
-            restore_cpu_user_regs(&regs);
-            break;
-        }
-        case TRAP_int3:
-        {
-            save_cpu_user_regs(&regs);
-            pdb_handle_exception(3, &regs, 1);
-            restore_cpu_user_regs(&regs);
-            break;
-        }
-#else
-        case TRAP_debug:
-        {
-            void store_cpu_user_regs(struct cpu_user_regs *regs);
-
-            store_cpu_user_regs(&regs);
-            __vm_clear_bit(GUEST_PENDING_DBG_EXCEPTIONS, PENDING_DEBUG_EXC_BS);
-
-            domain_pause_for_debugger();
-            do_sched_op(SCHEDOP_yield, 0);
-
-            break;
-        }
-#endif
-        case TRAP_no_device:
-        {
-            vmx_do_no_device_fault();
-            break;
-        }
-        case TRAP_page_fault:
-        {
-            __vmread(EXIT_QUALIFICATION, &va);
-            __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
-
-            TRACE_VMEXIT(3,regs.error_code);
-            TRACE_VMEXIT(4,va);
-
-            VMX_DBG_LOG(DBG_LEVEL_VMMU,
-                        "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
-                        (unsigned long)regs.eax, (unsigned long)regs.ebx,
-                        (unsigned long)regs.ecx, (unsigned long)regs.edx,
-                        (unsigned long)regs.esi, (unsigned long)regs.edi);
-            v->arch.arch_vmx.mmio_op.inst_decoder_regs = &regs;
-
-            if (!(error = vmx_do_page_fault(va, &regs))) {
-                /*
-                 * Inject #PG using Interruption-Information Fields
-                 */
-                vmx_inject_exception(v, TRAP_page_fault, regs.error_code);
-                v->arch.arch_vmx.cpu_cr2 = va;
-                TRACE_3D(TRC_VMX_INT, v->domain->domain_id, TRAP_page_fault, 
va);
-            }
-            break;
-        }
-        case TRAP_nmi:
-            do_nmi(&regs);
-            break;
-        default:
-            vmx_reflect_exception(v);
-            break;
-        }
-        break;
-    }
-    case EXIT_REASON_EXTERNAL_INTERRUPT:
-        vmx_vmexit_do_extint(&regs);
-        break;
-    case EXIT_REASON_PENDING_INTERRUPT:
-        __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
-                  MONITOR_CPU_BASED_EXEC_CONTROLS);
-        break;
-    case EXIT_REASON_TASK_SWITCH:
-        __vmx_bug(&regs);
-        break;
-    case EXIT_REASON_CPUID:
-        __get_instruction_length(inst_len);
-        vmx_vmexit_do_cpuid(regs.eax, &regs);
-        __update_guest_eip(inst_len);
-        break;
-    case EXIT_REASON_HLT:
-        __get_instruction_length(inst_len);
-        __update_guest_eip(inst_len);
-        vmx_vmexit_do_hlt();
-        break;
-    case EXIT_REASON_INVLPG:
-    {
-        unsigned long   va;
-
-        __vmread(EXIT_QUALIFICATION, &va);
-        vmx_vmexit_do_invlpg(va);
-        __get_instruction_length(inst_len);
-        __update_guest_eip(inst_len);
-        break;
-    }
-    case EXIT_REASON_VMCALL:
-        __get_instruction_length(inst_len);
-        __vmread(GUEST_RIP, &eip);
-        __vmread(EXIT_QUALIFICATION, &exit_qualification);
-
-        vmx_print_line(regs.eax, v); /* provides the current domain */
-        __update_guest_eip(inst_len);
-        break;
-    case EXIT_REASON_CR_ACCESS:
-    {
-        __vmread(GUEST_RIP, &eip);
-        __get_instruction_length(inst_len);
-        __vmread(EXIT_QUALIFICATION, &exit_qualification);
-
-        VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification 
= %lx",
-                    eip, inst_len, exit_qualification);
-        if (vmx_cr_access(exit_qualification, &regs))
-            __update_guest_eip(inst_len);
-        TRACE_VMEXIT(3,regs.error_code);
-        TRACE_VMEXIT(4,exit_qualification);
-        break;
-    }
-    case EXIT_REASON_DR_ACCESS:
-        __vmread(EXIT_QUALIFICATION, &exit_qualification);
-        vmx_dr_access(exit_qualification, &regs);
-        __get_instruction_length(inst_len);
-        __update_guest_eip(inst_len);
-        break;
-    case EXIT_REASON_IO_INSTRUCTION:
-        __vmread(EXIT_QUALIFICATION, &exit_qualification);
-        __get_instruction_length(inst_len);
-        vmx_io_instruction(&regs, exit_qualification, inst_len);
-        TRACE_VMEXIT(4,exit_qualification);
-        break;
-    case EXIT_REASON_MSR_READ:
-        __get_instruction_length(inst_len);
-        vmx_do_msr_read(&regs);
-        __update_guest_eip(inst_len);
-        break;
-    case EXIT_REASON_MSR_WRITE:
-        __vmread(GUEST_RIP, &eip);
-        vmx_do_msr_write(&regs);
-        __get_instruction_length(inst_len);
-        __update_guest_eip(inst_len);
-        break;
-    case EXIT_REASON_MWAIT_INSTRUCTION:
-        __vmx_bug(&regs);
-        break;
-    default:
-        __vmx_bug(&regs);       /* should not happen */
-    }
-}
-
-asmlinkage void load_cr2(void)
-{
-    struct vcpu *v = current;
-
-    local_irq_disable();
-#ifdef __i386__
-    asm volatile("movl %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
-#else
-    asm volatile("movq %0,%%cr2": :"r" (v->arch.arch_vmx.cpu_cr2));
-#endif
-}
-
-asmlinkage void trace_vmentry (void)
-{
-    TRACE_5D(TRC_VMENTRY,
-             trace_values[smp_processor_id()][0],
-             trace_values[smp_processor_id()][1],
-             trace_values[smp_processor_id()][2],
-             trace_values[smp_processor_id()][3],
-             trace_values[smp_processor_id()][4]);
-    TRACE_VMEXIT(0,9);
-    TRACE_VMEXIT(1,9);
-    TRACE_VMEXIT(2,9);
-    TRACE_VMEXIT(3,9);
-    TRACE_VMEXIT(4,9);
-    return;
-}
-asmlinkage void trace_vmexit (void)
-{
-    TRACE_3D(TRC_VMEXIT,0,0,0);
-    return;
-}
-#endif /* CONFIG_VMX */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/vmx_intercept.c
--- a/xen/arch/x86/vmx_intercept.c      Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,455 +0,0 @@
-/*
- * vmx_intercept.c: Handle performance critical I/O packets in hypervisor space
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <asm/vmx.h>
-#include <asm/vmx_platform.h>
-#include <asm/vmx_vpit.h>
-#include <asm/vmx_intercept.h>
-#include <asm/vmx_vlapic.h>
-#include <public/hvm/ioreq.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <asm/current.h>
-#include <io_ports.h>
-#include <xen/event.h>
-
-#ifdef CONFIG_VMX
-
-extern struct vmx_mmio_handler vlapic_mmio_handler;
-extern struct vmx_mmio_handler vioapic_mmio_handler;
-
-#define VMX_MMIO_HANDLER_NR 2
-
-struct vmx_mmio_handler *vmx_mmio_handlers[VMX_MMIO_HANDLER_NR] =
-{
-    &vlapic_mmio_handler,
-    &vioapic_mmio_handler
-};
-
-static inline void vmx_mmio_access(struct vcpu *v,
-                                   ioreq_t *p,
-                                   vmx_mmio_read_t read_handler,
-                                   vmx_mmio_write_t write_handler)
-{
-    ioreq_t *req;
-    vcpu_iodata_t *vio = get_vio(v->domain, v->vcpu_id);
-    unsigned int tmp1, tmp2;
-    unsigned long data;
-
-    if (vio == NULL) {
-        printk("vlapic_access: bad shared page\n");
-        domain_crash_synchronous();
-    }
-
-    req = &vio->vp_ioreq;
-
-    switch (req->type) {
-    case IOREQ_TYPE_COPY:
-    {
-        int sign = (req->df) ? -1 : 1, i;
-
-        if (!req->pdata_valid) {
-            if (req->dir == IOREQ_READ){
-                req->u.data = read_handler(v, req->addr, req->size);
-            } else {                 /* req->dir != IOREQ_READ */
-                write_handler(v, req->addr, req->size, req->u.data);
-            }
-        } else {                     /* !req->pdata_valid */
-            if (req->dir == IOREQ_READ) {
-                for (i = 0; i < req->count; i++) {
-                    data = read_handler(v,
-                      req->addr + (sign * i * req->size),
-                      req->size);
-                    vmx_copy(&data,
-                      (unsigned long)p->u.pdata + (sign * i * req->size),
-                      p->size,
-                      VMX_COPY_OUT);
-                }
-            } else {                  /* !req->dir == IOREQ_READ */
-                for (i = 0; i < req->count; i++) {
-                    vmx_copy(&data,
-                      (unsigned long)p->u.pdata + (sign * i * req->size),
-                      p->size,
-                      VMX_COPY_IN);
-                    write_handler(v,
-                      req->addr + (sign * i * req->size),
-                      req->size, data);
-                }
-            }
-        }
-        break;
-    }
-
-    case IOREQ_TYPE_AND:
-        tmp1 = read_handler(v, req->addr, req->size);
-        if (req->dir == IOREQ_WRITE) {
-            tmp2 = tmp1 & (unsigned long) req->u.data;
-            write_handler(v, req->addr, req->size, tmp2);
-        }
-        req->u.data = tmp1;
-        break;
-
-    case IOREQ_TYPE_OR:
-        tmp1 = read_handler(v, req->addr, req->size);
-        if (req->dir == IOREQ_WRITE) {
-            tmp2 = tmp1 | (unsigned long) req->u.data;
-            write_handler(v, req->addr, req->size, tmp2);
-        }
-        req->u.data = tmp1;
-        break;
-
-    case IOREQ_TYPE_XOR:
-        tmp1 = read_handler(v, req->addr, req->size);
-        if (req->dir == IOREQ_WRITE) {
-            tmp2 = tmp1 ^ (unsigned long) req->u.data;
-            write_handler(v, req->addr, req->size, tmp2);
-        }
-        req->u.data = tmp1;
-        break;
-
-    default:
-        printk("error ioreq type for local APIC %x\n", req->type);
-        domain_crash_synchronous();
-        break;
-    }
-}
-
-int vmx_mmio_intercept(ioreq_t *p)
-{
-    struct vcpu *v = current;
-    int i;
-
-    /* XXX currently only APIC use intercept */
-    if ( !vmx_apic_support(v->domain) )
-        return 0;
-
-    for ( i = 0; i < VMX_MMIO_HANDLER_NR; i++ ) {
-        if ( vmx_mmio_handlers[i]->check_handler(v, p->addr) ) {
-            vmx_mmio_access(v, p,
-                            vmx_mmio_handlers[i]->read_handler,
-                            vmx_mmio_handlers[i]->write_handler);
-            return 1;
-        }
-    }
-    return 0;
-}
-
-/*
- * Check if the request is handled inside xen
- * return value: 0 --not handled; 1 --handled
- */
-int vmx_io_intercept(ioreq_t *p, int type)
-{
-    struct vcpu *v = current;
-    struct vmx_io_handler *handler =
-                           &(v->domain->arch.vmx_platform.vmx_io_handler);
-    int i;
-    unsigned long addr, size;
-
-    for (i = 0; i < handler->num_slot; i++) {
-        if( type != handler->hdl_list[i].type)
-            continue;
-        addr = handler->hdl_list[i].addr;
-        size = handler->hdl_list[i].size;
-        if (p->addr >= addr &&
-            p->addr <  addr + size)
-            return handler->hdl_list[i].action(p);
-    }
-    return 0;
-}
-
-int register_io_handler(unsigned long addr, unsigned long size,
-                        intercept_action_t action, int type)
-{
-    struct vcpu *v = current;
-    struct vmx_io_handler *handler =
-                             &(v->domain->arch.vmx_platform.vmx_io_handler);
-    int num = handler->num_slot;
-
-    if (num >= MAX_IO_HANDLER) {
-        printk("no extra space, register io interceptor failed!\n");
-        domain_crash_synchronous();
-    }
-
-    handler->hdl_list[num].addr = addr;
-    handler->hdl_list[num].size = size;
-    handler->hdl_list[num].action = action;
-    handler->hdl_list[num].type = type;
-    handler->num_slot++;
-
-    return 1;
-}
-
-static void pit_cal_count(struct vmx_virpit *vpit)
-{
-    u64 nsec_delta = (unsigned int)((NOW() - vpit->inject_point));
-    if (nsec_delta > vpit->period)
-        VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT:long time has passed from last 
injection!");
-    if(vpit->init_val == 0)
-    {
-        printk("PIT init value == 0!\n");
-        domain_crash_synchronous();
-    }
-
-    vpit->count = vpit->init_val - ((nsec_delta * PIT_FREQ / 1000000000ULL) % 
vpit->init_val );
-}
-
-static void pit_latch_io(struct vmx_virpit *vpit)
-{
-    pit_cal_count(vpit);
-
-    switch(vpit->read_state) {
-    case MSByte:
-        vpit->count_MSB_latched=1;
-        break;
-    case LSByte:
-        vpit->count_LSB_latched=1;
-        break;
-    case LSByte_multiple:
-        vpit->count_LSB_latched=1;
-        vpit->count_MSB_latched=1;
-        break;
-    case MSByte_multiple:
-        VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT:latch PIT counter before 
MSB_multiple!");
-        vpit->read_state=LSByte_multiple;
-        vpit->count_LSB_latched=1;
-        vpit->count_MSB_latched=1;
-        break;
-    default:
-        domain_crash_synchronous();
-    }
-}
-
-static int pit_read_io(struct vmx_virpit *vpit)
-{
-    if(vpit->count_LSB_latched) {
-        /* Read Least Significant Byte */
-        if(vpit->read_state==LSByte_multiple) {
-            vpit->read_state=MSByte_multiple;
-        }
-        vpit->count_LSB_latched=0;
-        return (vpit->count & 0xFF);
-    } else if(vpit->count_MSB_latched) {
-        /* Read Most Significant Byte */
-        if(vpit->read_state==MSByte_multiple) {
-            vpit->read_state=LSByte_multiple;
-        }
-        vpit->count_MSB_latched=0;
-        return ((vpit->count>>8) & 0xFF);
-    } else {
-        /* Unlatched Count Read */
-        VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT: unlatched read");
-        pit_cal_count(vpit);
-        if(!(vpit->read_state & 0x1)) {
-            /* Read Least Significant Byte */
-            if(vpit->read_state==LSByte_multiple) {
-                vpit->read_state=MSByte_multiple;
-            }
-            return (vpit->count & 0xFF);
-        } else {
-            /* Read Most Significant Byte */
-            if(vpit->read_state==MSByte_multiple) {
-                vpit->read_state=LSByte_multiple;
-            }
-            return ((vpit->count>>8) & 0xFF);
-        }
-    }
-}
-
-/* vmx_io_assist light-weight version, specific to PIT DM */ 
-static void resume_pit_io(ioreq_t *p)
-{
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-    unsigned long old_eax = regs->eax;
-    p->state = STATE_INVALID;
-
-    switch(p->size) {
-    case 1:
-        regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-        break;
-    case 2:
-        regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-        break;
-    case 4:
-        regs->eax = (p->u.data & 0xffffffff);
-        break;
-    default:
-        BUG();
-    }
-}
-
-/* the intercept action for PIT DM retval:0--not handled; 1--handled */
-int intercept_pit_io(ioreq_t *p)
-{
-    struct vcpu *v = current;
-    struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-
-    if (p->size != 1 ||
-        p->pdata_valid ||
-        p->type != IOREQ_TYPE_PIO)
-        return 0;
-    
-    if (p->addr == PIT_MODE &&
-        p->dir == 0 &&    /* write */
-        ((p->u.data >> 4) & 0x3) == 0 && /* latch command */
-        ((p->u.data >> 6) & 0x3) == (vpit->channel)) {/* right channel */
-        pit_latch_io(vpit);
-        return 1;
-    }
-
-    if (p->addr == (PIT_CH0 + vpit->channel) &&
-        p->dir == 1) { /* read */
-        p->u.data = pit_read_io(vpit);
-        resume_pit_io(p);
-        return 1;
-    }
-
-    return 0;
-}
-
-/* hooks function for the HLT instruction emulation wakeup */
-void hlt_timer_fn(void *data)
-{
-    struct vcpu *v = data;
-    
-    evtchn_set_pending(v, iopacket_port(v->domain));
-}
-
-static __inline__ void missed_ticks(struct vmx_virpit*vpit)
-{
-    int        missed_ticks;
-
-    missed_ticks = (NOW() - vpit->scheduled)/(s_time_t) vpit->period;
-    if ( missed_ticks > 0 ) {
-        vpit->pending_intr_nr += missed_ticks;
-        vpit->scheduled += missed_ticks * vpit->period;
-    }
-}
-
-/* hooks function for the PIT when the guest is active */
-static void pit_timer_fn(void *data)
-{
-    struct vcpu *v = data;
-    struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-
-    /* pick up missed timer tick */
-    missed_ticks(vpit);
-
-    vpit->pending_intr_nr++;
-    if ( test_bit(_VCPUF_running, &v->vcpu_flags) ) {
-        vpit->scheduled += vpit->period;
-        set_timer(&vpit->pit_timer, vpit->scheduled);
-    }
-}
-
-void pickup_deactive_ticks(struct vmx_virpit *vpit)
-{
-
-    if ( !active_timer(&(vpit->pit_timer)) ) {
-        /* pick up missed timer tick */
-        missed_ticks(vpit);
-    
-        vpit->scheduled += vpit->period;
-        set_timer(&vpit->pit_timer, vpit->scheduled);
-    }
-}
-
-/* Only some PIT operations such as load init counter need a hypervisor hook.
- * leave all other operations in user space DM
- */
-void vmx_hooks_assist(struct vcpu *v)
-{
-    vcpu_iodata_t *vio = get_vio(v->domain, v->vcpu_id);
-    ioreq_t *p = &vio->vp_ioreq;
-    struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-    int rw_mode, reinit = 0;
-
-    /* load init count*/
-    if (p->state == STATE_IORESP_HOOK) {
-        /* set up actimer, handle re-init */
-        if ( active_timer(&(vpit->pit_timer)) ) {
-            VMX_DBG_LOG(DBG_LEVEL_1, "VMX_PIT: guest reset PIT with channel 
%lx!\n", (unsigned long) ((p->u.data >> 24) & 0x3) );
-            stop_timer(&(vpit->pit_timer));
-            reinit = 1;
- 
-        }
-        else {
-            init_timer(&vpit->pit_timer, pit_timer_fn, v, v->processor);
-        }
-
-        /* init count for this channel */
-        vpit->init_val = (p->u.data & 0xFFFF) ;
-        /* frequency(ns) of pit */
-        vpit->period = DIV_ROUND(((vpit->init_val) * 1000000000ULL), PIT_FREQ);
-        VMX_DBG_LOG(DBG_LEVEL_1,"VMX_PIT: guest set init pit freq:%u ns, 
initval:0x%x\n", vpit->period, vpit->init_val);
-        if (vpit->period < 900000) { /* < 0.9 ms */
-            printk("VMX_PIT: guest programmed too small an init_val: %x\n",
-                   vpit->init_val);
-            vpit->period = 1000000;
-        }
-         vpit->period_cycles = (u64)vpit->period * cpu_khz / 1000000L;
-         printk("VMX_PIT: guest freq in cycles=%lld\n",(long 
long)vpit->period_cycles);
-
-        vpit->channel = ((p->u.data >> 24) & 0x3);
-        vpit->first_injected = 0;
-
-        vpit->count_LSB_latched = 0;
-        vpit->count_MSB_latched = 0;
-
-        rw_mode = ((p->u.data >> 26) & 0x3);
-        switch(rw_mode) {
-        case 0x1:
-            vpit->read_state=LSByte;
-            break;
-        case 0x2:
-            vpit->read_state=MSByte;
-            break;
-        case 0x3:
-            vpit->read_state=LSByte_multiple;
-            break;
-        default:
-            printk("VMX_PIT:wrong PIT rw_mode!\n");
-            break;
-        }
-
-        vpit->scheduled = NOW() + vpit->period;
-        set_timer(&vpit->pit_timer, vpit->scheduled);
-
-        /*restore the state*/
-        p->state = STATE_IORESP_READY;
-
-        /* register handler to intercept the PIT io when vm_exit */
-        if (!reinit) {
-            register_portio_handler(0x40, 4, intercept_pit_io); 
-        }
-    }
-}
-#endif /* CONFIG_VMX */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/vmx_io.c
--- a/xen/arch/x86/vmx_io.c     Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,1014 +0,0 @@
-/*
- * vmx_io.c: handling I/O, interrupts related VMX entry/exit
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/trace.h>
-#include <xen/event.h>
-#include <xen/hypercall.h>
-#include <asm/current.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/vmx.h>
-#include <asm/vmx_vmcs.h>
-#include <asm/vmx_platform.h>
-#include <asm/vmx_vpit.h>
-#include <asm/apic.h>
-#include <asm/shadow.h>
-#include <asm/vmx_vpic.h>
-#include <asm/vmx_vlapic.h>
-#include <public/sched.h>
-#include <public/hvm/ioreq.h>
-
-#ifdef CONFIG_VMX
-#if defined (__i386__)
-void load_cpu_user_regs(struct cpu_user_regs *regs)
-{
-    /*
-     * Write the guest register value into VMCS
-     */
-    __vmwrite(GUEST_SS_SELECTOR, regs->ss);
-    __vmwrite(GUEST_RSP, regs->esp);
-
-    __vmwrite(GUEST_RFLAGS, regs->eflags);
-    if (regs->eflags & EF_TF)
-        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
-    else
-        __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
-
-    __vmwrite(GUEST_CS_SELECTOR, regs->cs);
-    __vmwrite(GUEST_RIP, regs->eip);
-}
-
-static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
-{
-    switch (size) {
-    case BYTE:
-        switch (index) {
-        case 0:
-            regs->eax &= 0xFFFFFF00;
-            regs->eax |= (value & 0xFF);
-            break;
-        case 1:
-            regs->ecx &= 0xFFFFFF00;
-            regs->ecx |= (value & 0xFF);
-            break;
-        case 2:
-            regs->edx &= 0xFFFFFF00;
-            regs->edx |= (value & 0xFF);
-            break;
-        case 3:
-            regs->ebx &= 0xFFFFFF00;
-            regs->ebx |= (value & 0xFF);
-            break;
-        case 4:
-            regs->eax &= 0xFFFF00FF;
-            regs->eax |= ((value & 0xFF) << 8);
-            break;
-        case 5:
-            regs->ecx &= 0xFFFF00FF;
-            regs->ecx |= ((value & 0xFF) << 8);
-            break;
-        case 6:
-            regs->edx &= 0xFFFF00FF;
-            regs->edx |= ((value & 0xFF) << 8);
-            break;
-        case 7:
-            regs->ebx &= 0xFFFF00FF;
-            regs->ebx |= ((value & 0xFF) << 8);
-            break;
-        default:
-            printk("Error: size:%x, index:%x are invalid!\n", size, index);
-            domain_crash_synchronous();
-            break;
-        }
-        break;
-    case WORD:
-        switch (index) {
-        case 0:
-            regs->eax &= 0xFFFF0000;
-            regs->eax |= (value & 0xFFFF);
-            break;
-        case 1:
-            regs->ecx &= 0xFFFF0000;
-            regs->ecx |= (value & 0xFFFF);
-            break;
-        case 2:
-            regs->edx &= 0xFFFF0000;
-            regs->edx |= (value & 0xFFFF);
-            break;
-        case 3:
-            regs->ebx &= 0xFFFF0000;
-            regs->ebx |= (value & 0xFFFF);
-            break;
-        case 4:
-            regs->esp &= 0xFFFF0000;
-            regs->esp |= (value & 0xFFFF);
-            break;
-        case 5:
-            regs->ebp &= 0xFFFF0000;
-            regs->ebp |= (value & 0xFFFF);
-            break;
-        case 6:
-            regs->esi &= 0xFFFF0000;
-            regs->esi |= (value & 0xFFFF);
-            break;
-        case 7:
-            regs->edi &= 0xFFFF0000;
-            regs->edi |= (value & 0xFFFF);
-            break;
-        default:
-            printk("Error: size:%x, index:%x are invalid!\n", size, index);
-            domain_crash_synchronous();
-            break;
-        }
-        break;
-    case LONG:
-        switch (index) {
-        case 0:
-            regs->eax = value;
-            break;
-        case 1:
-            regs->ecx = value;
-            break;
-        case 2:
-            regs->edx = value;
-            break;
-        case 3:
-            regs->ebx = value;
-            break;
-        case 4:
-            regs->esp = value;
-            break;
-        case 5:
-            regs->ebp = value;
-            break;
-        case 6:
-            regs->esi = value;
-            break;
-        case 7:
-            regs->edi = value;
-            break;
-        default:
-            printk("Error: size:%x, index:%x are invalid!\n", size, index);
-            domain_crash_synchronous();
-            break;
-        }
-        break;
-    default:
-        printk("Error: size:%x, index:%x are invalid!\n", size, index);
-        domain_crash_synchronous();
-        break;
-    }
-}
-#else
-void load_cpu_user_regs(struct cpu_user_regs *regs)
-{
-    __vmwrite(GUEST_SS_SELECTOR, regs->ss);
-    __vmwrite(GUEST_RSP, regs->rsp);
-
-    __vmwrite(GUEST_RFLAGS, regs->rflags);
-    if (regs->rflags & EF_TF)
-        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
-    else
-        __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
-
-    __vmwrite(GUEST_CS_SELECTOR, regs->cs);
-    __vmwrite(GUEST_RIP, regs->rip);
-}
-
-static inline void __set_reg_value(unsigned long *reg, int size, long value)
-{
-    switch (size) {
-    case BYTE_64:
-        *reg &= ~0xFF;
-        *reg |= (value & 0xFF);
-        break;
-    case WORD:
-        *reg &= ~0xFFFF;
-        *reg |= (value & 0xFFFF);
-        break;
-    case LONG:
-        *reg &= ~0xFFFFFFFF;
-        *reg |= (value & 0xFFFFFFFF);
-        break;
-    case QUAD:
-        *reg = value;
-        break;
-    default:
-        printk("Error: <__set_reg_value>: size:%x is invalid\n", size);
-        domain_crash_synchronous();
-    }
-}
-
-static void set_reg_value (int size, int index, int seg, struct cpu_user_regs 
*regs, long value)
-{
-    if (size == BYTE) {
-        switch (index) {
-        case 0:
-            regs->rax &= ~0xFF;
-            regs->rax |= (value & 0xFF);
-            break;
-        case 1:
-            regs->rcx &= ~0xFF;
-            regs->rcx |= (value & 0xFF);
-            break;
-        case 2:
-            regs->rdx &= ~0xFF;
-            regs->rdx |= (value & 0xFF);
-            break;
-        case 3:
-            regs->rbx &= ~0xFF;
-            regs->rbx |= (value & 0xFF);
-            break;
-        case 4:
-            regs->rax &= 0xFFFFFFFFFFFF00FF;
-            regs->rax |= ((value & 0xFF) << 8);
-            break;
-        case 5:
-            regs->rcx &= 0xFFFFFFFFFFFF00FF;
-            regs->rcx |= ((value & 0xFF) << 8);
-            break;
-        case 6:
-            regs->rdx &= 0xFFFFFFFFFFFF00FF;
-            regs->rdx |= ((value & 0xFF) << 8);
-            break;
-        case 7:
-            regs->rbx &= 0xFFFFFFFFFFFF00FF;
-            regs->rbx |= ((value & 0xFF) << 8);
-            break;
-        default:
-            printk("Error: size:%x, index:%x are invalid!\n", size, index);
-            domain_crash_synchronous();
-            break;
-        }
-        return;
-    }
-
-    switch (index) {
-    case 0:
-        __set_reg_value(&regs->rax, size, value);
-        break;
-    case 1:
-        __set_reg_value(&regs->rcx, size, value);
-        break;
-    case 2:
-        __set_reg_value(&regs->rdx, size, value);
-        break;
-    case 3:
-        __set_reg_value(&regs->rbx, size, value);
-        break;
-    case 4:
-        __set_reg_value(&regs->rsp, size, value);
-        break;
-    case 5:
-        __set_reg_value(&regs->rbp, size, value);
-        break;
-    case 6:
-        __set_reg_value(&regs->rsi, size, value);
-        break;
-    case 7:
-        __set_reg_value(&regs->rdi, size, value);
-        break;
-    case 8:
-        __set_reg_value(&regs->r8, size, value);
-        break;
-    case 9:
-        __set_reg_value(&regs->r9, size, value);
-        break;
-    case 10:
-        __set_reg_value(&regs->r10, size, value);
-        break;
-    case 11:
-        __set_reg_value(&regs->r11, size, value);
-        break;
-    case 12:
-        __set_reg_value(&regs->r12, size, value);
-        break;
-    case 13:
-        __set_reg_value(&regs->r13, size, value);
-        break;
-    case 14:
-        __set_reg_value(&regs->r14, size, value);
-        break;
-    case 15:
-        __set_reg_value(&regs->r15, size, value);
-        break;
-    default:
-        printk("Error: <set_reg_value> Invalid index\n");
-        domain_crash_synchronous();
-    }
-    return;
-}
-#endif
-
-extern long get_reg_value(int size, int index, int seg, struct cpu_user_regs 
*regs);
-
-static inline void set_eflags_CF(int size, unsigned long v1,
-                                 unsigned long v2, struct cpu_user_regs *regs)
-{
-    unsigned long mask = (1 << (8 * size)) - 1;
-
-    if ((v1 & mask) > (v2 & mask))
-        regs->eflags |= X86_EFLAGS_CF;
-    else
-        regs->eflags &= ~X86_EFLAGS_CF;
-}
-
-static inline void set_eflags_OF(int size, unsigned long v1,
-                                 unsigned long v2, unsigned long v3, struct 
cpu_user_regs *regs)
-{
-    if ((v3 ^ v2) & (v3 ^ v1) & (1 << ((8 * size) - 1)))
-        regs->eflags |= X86_EFLAGS_OF;
-}
-
-static inline void set_eflags_AF(int size, unsigned long v1,
-                                 unsigned long v2, unsigned long v3, struct 
cpu_user_regs *regs)
-{
-    if ((v1 ^ v2 ^ v3) & 0x10)
-        regs->eflags |= X86_EFLAGS_AF;
-}
-
-static inline void set_eflags_ZF(int size, unsigned long v1,
-                                 struct cpu_user_regs *regs)
-{
-    unsigned long mask = (1 << (8 * size)) - 1;
-
-    if ((v1 & mask) == 0)
-        regs->eflags |= X86_EFLAGS_ZF;
-}
-
-static inline void set_eflags_SF(int size, unsigned long v1,
-                                 struct cpu_user_regs *regs)
-{
-    if (v1 & (1 << ((8 * size) - 1)))
-        regs->eflags |= X86_EFLAGS_SF;
-}
-
-static char parity_table[256] = {
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
-    1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
-};
-
-static inline void set_eflags_PF(int size, unsigned long v1,
-                                 struct cpu_user_regs *regs)
-{
-    if (parity_table[v1 & 0xFF])
-        regs->eflags |= X86_EFLAGS_PF;
-}
-
-static void vmx_pio_assist(struct cpu_user_regs *regs, ioreq_t *p,
-                           struct mmio_op *mmio_opp)
-{
-    unsigned long old_eax;
-    int sign = p->df ? -1 : 1;
-
-    if (p->dir == IOREQ_WRITE) {
-        if (p->pdata_valid) {
-            regs->esi += sign * p->count * p->size;
-            if (mmio_opp->flags & REPZ)
-                regs->ecx -= p->count;
-        }
-    } else {
-        if (mmio_opp->flags & OVERLAP) {
-            unsigned long addr;
-
-            regs->edi += sign * p->count * p->size;
-            if (mmio_opp->flags & REPZ)
-                regs->ecx -= p->count;
-
-            addr = regs->edi;
-            if (sign > 0)
-                addr -= p->size;
-            vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
-        } else if (p->pdata_valid) {
-            regs->edi += sign * p->count * p->size;
-            if (mmio_opp->flags & REPZ)
-                regs->ecx -= p->count;
-        } else {
-            old_eax = regs->eax;
-            switch (p->size) {
-            case 1:
-                regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff);
-                break;
-            case 2:
-                regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff);
-                break;
-            case 4:
-                regs->eax = (p->u.data & 0xffffffff);
-                break;
-            default:
-                printk("Error: %s unknown port size\n", __FUNCTION__);
-                domain_crash_synchronous();
-            }
-        }
-    }
-}
-
-static void vmx_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p,
-                            struct mmio_op *mmio_opp)
-{
-    int sign = p->df ? -1 : 1;
-    int size = -1, index = -1;
-    unsigned long value = 0, diff = 0;
-    unsigned long src, dst;
-
-    src = mmio_opp->operand[0];
-    dst = mmio_opp->operand[1];
-    size = operand_size(src);
-
-    switch (mmio_opp->instr) {
-    case INSTR_MOV:
-        if (dst & REGISTER) {
-            index = operand_index(dst);
-            set_reg_value(size, index, 0, regs, p->u.data);
-        }
-        break;
-
-    case INSTR_MOVZX:
-        if (dst & REGISTER) {
-            switch (size) {
-            case BYTE:
-                p->u.data &= 0xFFULL;
-                break;
-
-            case WORD:
-                p->u.data &= 0xFFFFULL;
-                break;
-
-            case LONG:
-                p->u.data &= 0xFFFFFFFFULL;
-                break;
-
-            default:
-                printk("Impossible source operand size of movzx instr: %d\n", 
size);
-                domain_crash_synchronous();
-            }
-            index = operand_index(dst);
-            set_reg_value(operand_size(dst), index, 0, regs, p->u.data);
-        }
-        break;
-
-    case INSTR_MOVSX:
-        if (dst & REGISTER) {
-            switch (size) {
-            case BYTE:
-                p->u.data &= 0xFFULL;
-                if ( p->u.data & 0x80ULL )
-                    p->u.data |= 0xFFFFFFFFFFFFFF00ULL;
-                break;
-
-            case WORD:
-                p->u.data &= 0xFFFFULL;
-                if ( p->u.data & 0x8000ULL )
-                    p->u.data |= 0xFFFFFFFFFFFF0000ULL;
-                break;
-
-            case LONG:
-                p->u.data &= 0xFFFFFFFFULL;
-                if ( p->u.data & 0x80000000ULL )
-                    p->u.data |= 0xFFFFFFFF00000000ULL;
-                break;
-
-            default:
-                printk("Impossible source operand size of movsx instr: %d\n", 
size);
-                domain_crash_synchronous();
-            }
-            index = operand_index(dst);
-            set_reg_value(operand_size(dst), index, 0, regs, p->u.data);
-        }
-        break;
-
-    case INSTR_MOVS:
-        sign = p->df ? -1 : 1;
-        regs->esi += sign * p->count * p->size;
-        regs->edi += sign * p->count * p->size;
-
-        if ((mmio_opp->flags & OVERLAP) && p->dir == IOREQ_READ) {
-            unsigned long addr = regs->edi;
-
-            if (sign > 0)
-                addr -= p->size;
-            vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT);
-        }
-
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-        break;
-
-    case INSTR_STOS:
-        sign = p->df ? -1 : 1;
-        regs->edi += sign * p->count * p->size;
-        if (mmio_opp->flags & REPZ)
-            regs->ecx -= p->count;
-        break;
-
-    case INSTR_AND:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            diff = (unsigned long) p->u.data & value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            diff = (unsigned long) p->u.data & value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            diff = (unsigned long) p->u.data & value;
-            set_reg_value(size, index, 0, regs, diff);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, diff, regs);
-        set_eflags_SF(size, diff, regs);
-        set_eflags_PF(size, diff, regs);
-        break;
-
-    case INSTR_OR:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            diff = (unsigned long) p->u.data | value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            diff = (unsigned long) p->u.data | value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            diff = (unsigned long) p->u.data | value;
-            set_reg_value(size, index, 0, regs, diff);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, diff, regs);
-        set_eflags_SF(size, diff, regs);
-        set_eflags_PF(size, diff, regs);
-        break;
-
-    case INSTR_XOR:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            diff = (unsigned long) p->u.data ^ value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            diff = (unsigned long) p->u.data ^ value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            diff = (unsigned long) p->u.data ^ value;
-            set_reg_value(size, index, 0, regs, diff);
-        }
-
-        /*
-         * The OF and CF flags are cleared; the SF, ZF, and PF
-         * flags are set according to the result. The state of
-         * the AF flag is undefined.
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, diff, regs);
-        set_eflags_SF(size, diff, regs);
-        set_eflags_PF(size, diff, regs);
-        break;
-
-    case INSTR_CMP:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-            diff = (unsigned long) p->u.data - value;
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-            diff = (unsigned long) p->u.data - value;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-            diff = value - (unsigned long) p->u.data;
-        }
-
-        /*
-         * The CF, OF, SF, ZF, AF, and PF flags are set according
-         * to the result
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_CF(size, value, (unsigned long) p->u.data, regs);
-        set_eflags_OF(size, diff, value, (unsigned long) p->u.data, regs);
-        set_eflags_AF(size, diff, value, (unsigned long) p->u.data, regs);
-        set_eflags_ZF(size, diff, regs);
-        set_eflags_SF(size, diff, regs);
-        set_eflags_PF(size, diff, regs);
-        break;
-
-    case INSTR_TEST:
-        if (src & REGISTER) {
-            index = operand_index(src);
-            value = get_reg_value(size, index, 0, regs);
-        } else if (src & IMMEDIATE) {
-            value = mmio_opp->immediate;
-        } else if (src & MEMORY) {
-            index = operand_index(dst);
-            value = get_reg_value(size, index, 0, regs);
-        }
-        diff = (unsigned long) p->u.data & value;
-
-        /*
-         * Sets the SF, ZF, and PF status flags. CF and OF are set to 0
-         */
-        regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|
-                          X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF);
-        set_eflags_ZF(size, diff, regs);
-        set_eflags_SF(size, diff, regs);
-        set_eflags_PF(size, diff, regs);
-        break;
-
-    case INSTR_BT:
-        index = operand_index(src);
-        value = get_reg_value(size, index, 0, regs);
-
-        if (p->u.data & (1 << (value & ((1 << 5) - 1))))
-            regs->eflags |= X86_EFLAGS_CF;
-        else
-            regs->eflags &= ~X86_EFLAGS_CF;
-
-        break;
-    }
-
-    load_cpu_user_regs(regs);
-}
-
-void vmx_io_assist(struct vcpu *v)
-{
-    vcpu_iodata_t *vio;
-    ioreq_t *p;
-    struct cpu_user_regs *regs = guest_cpu_user_regs();
-    struct mmio_op *mmio_opp;
-    struct cpu_user_regs *inst_decoder_regs;
-
-    mmio_opp = &v->arch.arch_vmx.mmio_op;
-    inst_decoder_regs = mmio_opp->inst_decoder_regs;
-
-    vio = get_vio(v->domain, v->vcpu_id);
-
-    if (vio == 0) {
-        VMX_DBG_LOG(DBG_LEVEL_1,
-                    "bad shared page: %lx", (unsigned long) vio);
-        printf("bad shared page: %lx\n", (unsigned long) vio);
-        domain_crash_synchronous();
-    }
-
-    p = &vio->vp_ioreq;
-    if (p->state == STATE_IORESP_HOOK)
-        vmx_hooks_assist(v);
-
-    /* clear IO wait VMX flag */
-    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
-        if (p->state == STATE_IORESP_READY) {
-            p->state = STATE_INVALID;
-            clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
-
-            if (p->type == IOREQ_TYPE_PIO)
-                vmx_pio_assist(regs, p, mmio_opp);
-            else
-                vmx_mmio_assist(regs, p, mmio_opp);
-        }
-        /* else an interrupt send event raced us */
-    }
-}
-
-int vmx_clear_pending_io_event(struct vcpu *v)
-{
-    struct domain *d = v->domain;
-    int port = iopacket_port(d);
-
-    /* evtchn_pending_sel bit is shared by other event channels. */
-    if (!d->shared_info->evtchn_pending[port/BITS_PER_LONG])
-        clear_bit(port/BITS_PER_LONG, &v->vcpu_info->evtchn_pending_sel);
-
-    /* Note: VMX domains may need upcalls as well. */
-    if (!v->vcpu_info->evtchn_pending_sel)
-        clear_bit(0, &v->vcpu_info->evtchn_upcall_pending);
-
-    /* Clear the pending bit for port. */
-    return test_and_clear_bit(port, &d->shared_info->evtchn_pending[0]);
-}
-
-/* Because we've cleared the pending events first, we need to guarantee that
- * all events to be handled by xen for VMX domains are taken care of here.
- *
- * interrupts are guaranteed to be checked before resuming guest.
- * VMX upcalls have been already arranged for if necessary.
- */
-void vmx_check_events(struct vcpu *v)
-{
-    /* clear the event *before* checking for work. This should avoid
-       the set-and-check races */
-    if (vmx_clear_pending_io_event(v))
-        vmx_io_assist(v);
-}
-
-/* On exit from vmx_wait_io, we're guaranteed to have a I/O response from
-   the device model */
-void vmx_wait_io()
-{
-    int port = iopacket_port(current->domain);
-
-    do {
-        if (!test_bit(port, &current->domain->shared_info->evtchn_pending[0]))
-            do_sched_op(SCHEDOP_block, 0);
-
-        vmx_check_events(current);
-        if (!test_bit(ARCH_VMX_IO_WAIT, &current->arch.arch_vmx.flags))
-            break;
-        /* Events other than IOPACKET_PORT might have woken us up. In that
-           case, safely go back to sleep. */
-        clear_bit(port/BITS_PER_LONG, &current->vcpu_info->evtchn_pending_sel);
-        clear_bit(0, &current->vcpu_info->evtchn_upcall_pending);
-    } while(1);
-}
-
-/* Simple minded Local APIC priority implementation. Fix later */
-static __inline__ int find_highest_irq(u32 *pintr)
-{
-    if (pintr[7])
-        return __fls(pintr[7]) + (256-32*1);
-    if (pintr[6])
-        return __fls(pintr[6]) + (256-32*2);
-    if (pintr[5])
-        return __fls(pintr[5]) + (256-32*3);
-    if (pintr[4])
-        return __fls(pintr[4]) + (256-32*4);
-    if (pintr[3])
-        return __fls(pintr[3]) + (256-32*5);
-    if (pintr[2])
-        return __fls(pintr[2]) + (256-32*6);
-    if (pintr[1])
-        return __fls(pintr[1]) + (256-32*7);
-    return __fls(pintr[0]);
-}
-
-void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit)
-{
-    u64   drift;
-
-    if ( vpit->first_injected )
-        drift = vpit->period_cycles * vpit->pending_intr_nr;
-    else 
-        drift = 0;
-    vpit->shift = v->arch.arch_vmx.tsc_offset - drift;
-    __vmwrite(TSC_OFFSET, vpit->shift);
-
-#if defined (__i386__)
-    __vmwrite(TSC_OFFSET_HIGH, ((vpit->shift)>> 32));
-#endif
-}
-
-#define BSP_CPU(v)    (!(v->vcpu_id))
-static inline void
-interrupt_post_injection(struct vcpu * v, int vector, int type)
-{
-    struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-
-    if ( is_pit_irq(v, vector, type) ) {
-        if ( !vpit->first_injected ) {
-            vpit->pending_intr_nr = 0;
-            vpit->scheduled = NOW() + vpit->period;
-            set_timer(&vpit->pit_timer, vpit->scheduled);
-            vpit->first_injected = 1;
-        } else {
-            vpit->pending_intr_nr--;
-        }
-        vpit->inject_point = NOW();
-        set_tsc_shift (v, vpit);
-    }
-
-    switch(type)
-    {
-    case VLAPIC_DELIV_MODE_EXT:
-        break;
-
-    default:
-        vlapic_post_injection(v, vector, type);
-        break;
-    }
-}
-
-static inline void
-enable_irq_window(unsigned long cpu_exec_control)
-{
-    if (!(cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING)) {
-        cpu_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-        __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control);
-    }
-}
-
-static inline void
-disable_irq_window(unsigned long cpu_exec_control)
-{
-    if ( cpu_exec_control & CPU_BASED_VIRTUAL_INTR_PENDING ) {
-        cpu_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
-        __vmwrite(CPU_BASED_VM_EXEC_CONTROL, cpu_exec_control);
-    }
-}
-
-static inline int irq_masked(unsigned long eflags)
-{
-    return ((eflags & X86_EFLAGS_IF) == 0);
-}
-
-void pic_irq_request(int *interrupt_request, int level)
-{
-    if (level)
-        *interrupt_request = 1;
-    else
-        *interrupt_request = 0;
-}
-
-void vmx_pic_assist(struct vcpu *v)
-{
-    global_iodata_t *spg;
-    u16   *virq_line, irqs;
-    struct vmx_virpic *pic = &v->domain->arch.vmx_platform.vmx_pic;
-    
-    spg = &get_sp(v->domain)->sp_global;
-    virq_line  = &spg->pic_clear_irr;
-    if ( *virq_line ) {
-        do {
-            irqs = *(volatile u16*)virq_line;
-        } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
-        do_pic_irqs_clear(pic, irqs);
-    }
-    virq_line  = &spg->pic_irr;
-    if ( *virq_line ) {
-        do {
-            irqs = *(volatile u16*)virq_line;
-        } while ( (u16)cmpxchg(virq_line,irqs, 0) != irqs );
-        do_pic_irqs(pic, irqs);
-    }
-
-}
-
-int cpu_get_interrupt(struct vcpu *v, int *type)
-{
-    int intno;
-    struct vmx_virpic *s = &v->domain->arch.vmx_platform.vmx_pic;
-
-    if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) {
-        /* set irq request if a PIC irq is still pending */
-        /* XXX: improve that */
-        pic_update_irq(s);
-        return intno;
-    }
-    /* read the irq from the PIC */
-    if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 )
-        return intno;
-
-    return -1;
-}
-
-asmlinkage void vmx_intr_assist(void)
-{
-    int intr_type = 0;
-    int highest_vector;
-    unsigned long intr_fields, eflags, interruptibility, cpu_exec_control;
-    struct vcpu *v = current;
-    struct vmx_platform *plat=&v->domain->arch.vmx_platform;
-    struct vmx_virpit *vpit = &plat->vmx_pit;
-    struct vmx_virpic *pic= &plat->vmx_pic;
-
-    vmx_pic_assist(v);
-    __vmread_vcpu(v, CPU_BASED_VM_EXEC_CONTROL, &cpu_exec_control);
-    if ( vpit->pending_intr_nr ) {
-        pic_set_irq(pic, 0, 0);
-        pic_set_irq(pic, 0, 1);
-    }
-
-    __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields);
-
-    if (intr_fields & INTR_INFO_VALID_MASK) {
-        enable_irq_window(cpu_exec_control);
-        VMX_DBG_LOG(DBG_LEVEL_1, "vmx_intr_assist: intr_fields: %lx",
-                    intr_fields);
-        return;
-    }
-
-    __vmread(GUEST_INTERRUPTIBILITY_INFO, &interruptibility);
-
-    if (interruptibility) {
-        enable_irq_window(cpu_exec_control);
-        VMX_DBG_LOG(DBG_LEVEL_1, "interruptibility: %lx",interruptibility);
-        return;
-    }
-
-    __vmread(GUEST_RFLAGS, &eflags);
-    if (irq_masked(eflags)) {
-        enable_irq_window(cpu_exec_control);
-        return;
-    }
-
-    highest_vector = cpu_get_interrupt(v, &intr_type); 
-
-    if (highest_vector == -1) {
-        disable_irq_window(cpu_exec_control);
-        return;
-    }
-
-    switch (intr_type) {
-    case VLAPIC_DELIV_MODE_EXT:
-    case VLAPIC_DELIV_MODE_FIXED:
-    case VLAPIC_DELIV_MODE_LPRI:
-        vmx_inject_extint(v, highest_vector, VMX_INVALID_ERROR_CODE);
-        TRACE_3D(TRC_VMX_INT, v->domain->domain_id, highest_vector, 0);
-        break;
-    case VLAPIC_DELIV_MODE_SMI:
-    case VLAPIC_DELIV_MODE_NMI:
-    case VLAPIC_DELIV_MODE_INIT:
-    case VLAPIC_DELIV_MODE_STARTUP:
-    default:
-        printk("Unsupported interrupt type\n");
-        BUG();
-        break;
-    }
-
-    interrupt_post_injection(v, highest_vector, intr_type);
-    return;
-}
-
-void vmx_do_resume(struct vcpu *v)
-{
-    struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit);
-    vmx_stts();
-
-    if (event_pending(v)) {
-        vmx_check_events(v);
-
-        if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags))
-            vmx_wait_io();
-    }
-    /* pick up the elapsed PIT ticks and re-enable pit_timer */
-    if ( vpit->first_injected ) {
-        pickup_deactive_ticks(vpit);
-    }
-    set_tsc_shift(v,vpit);
-
-    /* We can't resume the guest if we're waiting on I/O */
-    ASSERT(!test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags));
-}
-
-#endif /* CONFIG_VMX */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/vmx_platform.c
--- a/xen/arch/x86/vmx_platform.c       Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,919 +0,0 @@
-/*
- * vmx_platform.c: handling x86 platform related MMIO instructions
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <asm/shadow.h>
-#include <xen/domain_page.h>
-#include <asm/page.h>
-#include <xen/event.h>
-#include <xen/trace.h>
-#include <asm/vmx.h>
-#include <asm/vmx_platform.h>
-#include <public/hvm/ioreq.h>
-
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <asm/current.h>
-#if CONFIG_PAGING_LEVELS >= 3
-#include <asm/shadow_64.h>
-#endif
-#ifdef CONFIG_VMX
-
-#define DECODE_success  1
-#define DECODE_failure  0
-
-#if defined (__x86_64__)
-void store_cpu_user_regs(struct cpu_user_regs *regs)
-{
-    __vmread(GUEST_SS_SELECTOR, &regs->ss);
-    __vmread(GUEST_RSP, &regs->rsp);
-    __vmread(GUEST_RFLAGS, &regs->rflags);
-    __vmread(GUEST_CS_SELECTOR, &regs->cs);
-    __vmread(GUEST_DS_SELECTOR, &regs->ds);
-    __vmread(GUEST_ES_SELECTOR, &regs->es);
-    __vmread(GUEST_RIP, &regs->rip);
-}
-
-static inline long __get_reg_value(unsigned long reg, int size)
-{
-    switch(size) {
-    case BYTE_64:
-        return (char)(reg & 0xFF);
-    case WORD:
-        return (short)(reg & 0xFFFF);
-    case LONG:
-        return (int)(reg & 0xFFFFFFFF);
-    case QUAD:
-        return (long)(reg);
-    default:
-        printf("Error: (__get_reg_value) Invalid reg size\n");
-        domain_crash_synchronous();
-    }
-}
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
-{
-    if (size == BYTE) {
-        switch (index) {
-        case 0: /* %al */
-            return (char)(regs->rax & 0xFF);
-        case 1: /* %cl */
-            return (char)(regs->rcx & 0xFF);
-        case 2: /* %dl */
-            return (char)(regs->rdx & 0xFF);
-        case 3: /* %bl */
-            return (char)(regs->rbx & 0xFF);
-        case 4: /* %ah */
-            return (char)((regs->rax & 0xFF00) >> 8);
-        case 5: /* %ch */
-            return (char)((regs->rcx & 0xFF00) >> 8);
-        case 6: /* %dh */
-            return (char)((regs->rdx & 0xFF00) >> 8);
-        case 7: /* %bh */
-            return (char)((regs->rbx & 0xFF00) >> 8);
-        default:
-            printf("Error: (get_reg_value) Invalid index value\n");
-            domain_crash_synchronous();
-        }
-        /* NOTREACHED */
-    }
-
-    switch (index) {
-    case 0: return __get_reg_value(regs->rax, size);
-    case 1: return __get_reg_value(regs->rcx, size);
-    case 2: return __get_reg_value(regs->rdx, size);
-    case 3: return __get_reg_value(regs->rbx, size);
-    case 4: return __get_reg_value(regs->rsp, size);
-    case 5: return __get_reg_value(regs->rbp, size);
-    case 6: return __get_reg_value(regs->rsi, size);
-    case 7: return __get_reg_value(regs->rdi, size);
-    case 8: return __get_reg_value(regs->r8, size);
-    case 9: return __get_reg_value(regs->r9, size);
-    case 10: return __get_reg_value(regs->r10, size);
-    case 11: return __get_reg_value(regs->r11, size);
-    case 12: return __get_reg_value(regs->r12, size);
-    case 13: return __get_reg_value(regs->r13, size);
-    case 14: return __get_reg_value(regs->r14, size);
-    case 15: return __get_reg_value(regs->r15, size);
-    default:
-        printf("Error: (get_reg_value) Invalid index value\n");
-        domain_crash_synchronous();
-    }
-}
-#elif defined (__i386__)
-void store_cpu_user_regs(struct cpu_user_regs *regs)
-{
-    __vmread(GUEST_SS_SELECTOR, &regs->ss);
-    __vmread(GUEST_RSP, &regs->esp);
-    __vmread(GUEST_RFLAGS, &regs->eflags);
-    __vmread(GUEST_CS_SELECTOR, &regs->cs);
-    __vmread(GUEST_DS_SELECTOR, &regs->ds);
-    __vmread(GUEST_ES_SELECTOR, &regs->es);
-    __vmread(GUEST_RIP, &regs->eip);
-}
-
-static inline long __get_reg_value(unsigned long reg, int size)
-{
-    switch(size) {
-    case WORD:
-        return (short)(reg & 0xFFFF);
-    case LONG:
-        return (int)(reg & 0xFFFFFFFF);
-    default:
-        printf("Error: (__get_reg_value) Invalid reg size\n");
-        domain_crash_synchronous();
-    }
-}
-
-long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs)
-{
-    if (size == BYTE) {
-        switch (index) {
-        case 0: /* %al */
-            return (char)(regs->eax & 0xFF);
-        case 1: /* %cl */
-            return (char)(regs->ecx & 0xFF);
-        case 2: /* %dl */
-            return (char)(regs->edx & 0xFF);
-        case 3: /* %bl */
-            return (char)(regs->ebx & 0xFF);
-        case 4: /* %ah */
-            return (char)((regs->eax & 0xFF00) >> 8);
-        case 5: /* %ch */
-            return (char)((regs->ecx & 0xFF00) >> 8);
-        case 6: /* %dh */
-            return (char)((regs->edx & 0xFF00) >> 8);
-        case 7: /* %bh */
-            return (char)((regs->ebx & 0xFF00) >> 8);
-        default:
-            printf("Error: (get_reg_value) Invalid index value\n");
-            domain_crash_synchronous();
-        }
-    }
-
-    switch (index) {
-    case 0: return __get_reg_value(regs->eax, size);
-    case 1: return __get_reg_value(regs->ecx, size);
-    case 2: return __get_reg_value(regs->edx, size);
-    case 3: return __get_reg_value(regs->ebx, size);
-    case 4: return __get_reg_value(regs->esp, size);
-    case 5: return __get_reg_value(regs->ebp, size);
-    case 6: return __get_reg_value(regs->esi, size);
-    case 7: return __get_reg_value(regs->edi, size);
-    default:
-        printf("Error: (get_reg_value) Invalid index value\n");
-        domain_crash_synchronous();
-    }
-}
-#endif
-
-static inline unsigned char *check_prefix(unsigned char *inst,
-                                          struct instruction *thread_inst, 
unsigned char *rex_p)
-{
-    while (1) {
-        switch (*inst) {
-            /* rex prefix for em64t instructions */
-        case 0x40 ... 0x4e:
-            *rex_p = *inst;
-            break;
-        case 0xf3: /* REPZ */
-            thread_inst->flags = REPZ;
-            break;
-        case 0xf2: /* REPNZ */
-            thread_inst->flags = REPNZ;
-            break;
-        case 0xf0: /* LOCK */
-            break;
-        case 0x2e: /* CS */
-        case 0x36: /* SS */
-        case 0x3e: /* DS */
-        case 0x26: /* ES */
-        case 0x64: /* FS */
-        case 0x65: /* GS */
-            thread_inst->seg_sel = *inst;
-            break;
-        case 0x66: /* 32bit->16bit */
-            thread_inst->op_size = WORD;
-            break;
-        case 0x67:
-            printf("Error: Not handling 0x67 (yet)\n");
-            domain_crash_synchronous();
-            break;
-        default:
-            return inst;
-        }
-        inst++;
-    }
-}
-
-static inline unsigned long get_immediate(int op16,const unsigned char *inst, 
int op_size)
-{
-    int mod, reg, rm;
-    unsigned long val = 0;
-    int i;
-
-    mod = (*inst >> 6) & 3;
-    reg = (*inst >> 3) & 7;
-    rm = *inst & 7;
-
-    inst++; //skip ModR/M byte
-    if (mod != 3 && rm == 4) {
-        inst++; //skip SIB byte
-    }
-
-    switch(mod) {
-    case 0:
-        if (rm == 5 || rm == 4) {
-            if (op16)
-                inst = inst + 2; //disp16, skip 2 bytes
-            else
-                inst = inst + 4; //disp32, skip 4 bytes
-        }
-        break;
-    case 1:
-        inst++; //disp8, skip 1 byte
-        break;
-    case 2:
-        if (op16)
-            inst = inst + 2; //disp16, skip 2 bytes
-        else
-            inst = inst + 4; //disp32, skip 4 bytes
-        break;
-    }
-
-    if (op_size == QUAD)
-        op_size = LONG;
-
-    for (i = 0; i < op_size; i++) {
-        val |= (*inst++ & 0xff) << (8 * i);
-    }
-
-    return val;
-}
-
-static inline int get_index(const unsigned char *inst, unsigned char rex)
-{
-    int mod, reg, rm;
-    int rex_r, rex_b;
-
-    mod = (*inst >> 6) & 3;
-    reg = (*inst >> 3) & 7;
-    rm = *inst & 7;
-
-    rex_r = (rex >> 2) & 1;
-    rex_b = rex & 1;
-
-    //Only one operand in the instruction is register
-    if (mod == 3) {
-        return (rm + (rex_b << 3));
-    } else {
-        return (reg + (rex_r << 3));
-    }
-    return 0;
-}
-
-static void init_instruction(struct instruction *mmio_inst)
-{
-    mmio_inst->instr = 0;
-    mmio_inst->op_size = 0;
-    mmio_inst->immediate = 0;
-    mmio_inst->seg_sel = 0;
-
-    mmio_inst->operand[0] = 0;
-    mmio_inst->operand[1] = 0;
-
-    mmio_inst->flags = 0;
-}
-
-#define GET_OP_SIZE_FOR_BYTE(op_size)       \
-    do {                                    \
-        if (rex)                            \
-            op_size = BYTE_64;              \
-        else                                \
-            op_size = BYTE;                 \
-    } while(0)
-
-#define GET_OP_SIZE_FOR_NONEBYTE(op_size)   \
-    do {                                    \
-        if (rex & 0x8)                      \
-            op_size = QUAD;                 \
-        else if (op_size != WORD)           \
-            op_size = LONG;                 \
-    } while(0)
-
-
-/*
- * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax)
- */
-static int mem_acc(unsigned char size, struct instruction *instr)
-{
-    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
-    instr->operand[1] = mk_operand(size, 0, 0, REGISTER);
-    return DECODE_success;
-}
-
-/*
- * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32)
- */
-static int acc_mem(unsigned char size, struct instruction *instr)
-{
-    instr->operand[0] = mk_operand(size, 0, 0, REGISTER);
-    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
-    return DECODE_success;
-}
-
-/*
- * Decode mem,reg operands (as in <opcode> r32/16, m32/16)
- */
-static int mem_reg(unsigned char size, unsigned char *opcode,
-                   struct instruction *instr, unsigned char rex)
-{
-    int index = get_index(opcode + 1, rex);
-
-    instr->operand[0] = mk_operand(size, 0, 0, MEMORY);
-    instr->operand[1] = mk_operand(size, index, 0, REGISTER);
-    return DECODE_success;
-}
-
-/*
- * Decode reg,mem operands (as in <opcode> m32/16, r32/16)
- */
-static int reg_mem(unsigned char size, unsigned char *opcode,
-                   struct instruction *instr, unsigned char rex)
-{
-    int index = get_index(opcode + 1, rex);
-
-    instr->operand[0] = mk_operand(size, index, 0, REGISTER);
-    instr->operand[1] = mk_operand(size, 0, 0, MEMORY);
-    return DECODE_success;
-}
-
-static int vmx_decode(int vm86, unsigned char *opcode, struct instruction 
*instr)
-{
-    unsigned char size_reg = 0;
-    unsigned char rex = 0;
-    int index;
-
-    init_instruction(instr);
-
-    opcode = check_prefix(opcode, instr, &rex);
-
-    if (vm86) { /* meaning is reversed */
-        if (instr->op_size == WORD)
-            instr->op_size = LONG;
-        else if (instr->op_size == LONG)
-            instr->op_size = WORD;
-        else if (instr->op_size == 0)
-            instr->op_size = WORD;
-    }
-
-    switch (*opcode) {
-    case 0x0B: /* or m32/16, r32/16 */
-        instr->instr = INSTR_OR;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return mem_reg(instr->op_size, opcode, instr, rex);
-
-    case 0x20: /* and r8, m8 */
-        instr->instr = INSTR_AND;
-        instr->op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, instr, rex);
-
-    case 0x21: /* and r32/16, m32/16 */
-        instr->instr = INSTR_AND;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return reg_mem(instr->op_size, opcode, instr, rex);
-
-    case 0x23: /* and m32/16, r32/16 */
-        instr->instr = INSTR_AND;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return mem_reg(instr->op_size, opcode, instr, rex);
-
-    case 0x30: /* xor r8, m8 */
-        instr->instr = INSTR_XOR;
-        instr->op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, instr, rex);
-
-    case 0x31: /* xor r32/16, m32/16 */
-        instr->instr = INSTR_XOR;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return reg_mem(instr->op_size, opcode, instr, rex);
-
-    case 0x39: /* cmp r32/16, m32/16 */
-        instr->instr = INSTR_CMP;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return reg_mem(instr->op_size, opcode, instr, rex);
-
-    case 0x80:
-    case 0x81:
-        {
-            unsigned char ins_subtype = (opcode[1] >> 3) & 7;
-
-            if (opcode[0] == 0x80) {
-                GET_OP_SIZE_FOR_BYTE(size_reg);
-                instr->op_size = BYTE;
-            } else {
-                GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-                size_reg = instr->op_size;
-            }
-
-            instr->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-            instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
-            instr->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-            switch (ins_subtype) {
-                case 7: /* cmp $imm, m32/16 */
-                    instr->instr = INSTR_CMP;
-                    return DECODE_success;
-
-                case 1: /* or $imm, m32/16 */
-                    instr->instr = INSTR_OR;
-                    return DECODE_success;
-
-                default:
-                    printf("%x, This opcode isn't handled yet!\n", *opcode);
-                    return DECODE_failure;
-            }
-        }
-
-    case 0x84:  /* test m8, r8 */
-        instr->instr = INSTR_TEST;
-        instr->op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, instr, rex);
-
-    case 0x88: /* mov r8, m8 */
-        instr->instr = INSTR_MOV;
-        instr->op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return reg_mem(size_reg, opcode, instr, rex);
-
-    case 0x89: /* mov r32/16, m32/16 */
-        instr->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return reg_mem(instr->op_size, opcode, instr, rex);
-
-    case 0x8A: /* mov m8, r8 */
-        instr->instr = INSTR_MOV;
-        instr->op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_reg(size_reg, opcode, instr, rex);
-
-    case 0x8B: /* mov m32/16, r32/16 */
-        instr->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return mem_reg(instr->op_size, opcode, instr, rex);
-
-    case 0xA0: /* mov <addr>, al */
-        instr->instr = INSTR_MOV;
-        instr->op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return mem_acc(size_reg, instr);
-
-    case 0xA1: /* mov <addr>, ax/eax */
-        instr->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return mem_acc(instr->op_size, instr);
-
-    case 0xA2: /* mov al, <addr> */
-        instr->instr = INSTR_MOV;
-        instr->op_size = BYTE;
-        GET_OP_SIZE_FOR_BYTE(size_reg);
-        return acc_mem(size_reg, instr);
-
-    case 0xA3: /* mov ax/eax, <addr> */
-        instr->instr = INSTR_MOV;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return acc_mem(instr->op_size, instr);
-
-    case 0xA4: /* movsb */
-        instr->instr = INSTR_MOVS;
-        instr->op_size = BYTE;
-        return DECODE_success;
-
-    case 0xA5: /* movsw/movsl */
-        instr->instr = INSTR_MOVS;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return DECODE_success;
-
-    case 0xAA: /* stosb */
-        instr->instr = INSTR_STOS;
-        instr->op_size = BYTE;
-        return DECODE_success;
-
-    case 0xAB: /* stosw/stosl */
-        instr->instr = INSTR_STOS;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        return DECODE_success;
-
-    case 0xC6:
-        if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */
-            instr->instr = INSTR_MOV;
-            instr->op_size = BYTE;
-
-            instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
-            instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
-            instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xC7:
-        if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */
-            instr->instr = INSTR_MOV;
-            GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-
-            instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE);
-            instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
-            instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0xF6:
-    case 0xF7:
-        if (((opcode[1] >> 3) & 7) == 0) { /* test $imm8/16/32, m8/16/32 */
-            instr->instr = INSTR_TEST;
-
-            if (opcode[0] == 0xF6) {
-                GET_OP_SIZE_FOR_BYTE(size_reg);
-                instr->op_size = BYTE;
-            } else {
-                GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-                size_reg = instr->op_size;
-            }
-
-            instr->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
-            instr->immediate = get_immediate(vm86, opcode+1, instr->op_size);
-            instr->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
-
-            return DECODE_success;
-        } else
-            return DECODE_failure;
-
-    case 0x0F:
-        break;
-
-    default:
-        printf("%x, This opcode isn't handled yet!\n", *opcode);
-        return DECODE_failure;
-    }
-
-    switch (*++opcode) {
-    case 0xB6: /* movzx m8, r16/r32/r64 */
-        instr->instr = INSTR_MOVZX;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        index = get_index(opcode + 1, rex);
-        instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
-        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xB7: /* movzx m16/m32, r32/r64 */
-        instr->instr = INSTR_MOVZX;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        index = get_index(opcode + 1, rex);
-        if (rex & 0x8)
-            instr->operand[0] = mk_operand(LONG, 0, 0, MEMORY);
-        else
-            instr->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
-        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xBE: /* movsx m8, r16/r32/r64 */
-        instr->instr = INSTR_MOVSX;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        index = get_index(opcode + 1, rex);
-        instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY);
-        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xBF: /* movsx m16, r32/r64 */
-        instr->instr = INSTR_MOVSX;
-        GET_OP_SIZE_FOR_NONEBYTE(instr->op_size);
-        index = get_index(opcode + 1, rex);
-        instr->operand[0] = mk_operand(WORD, 0, 0, MEMORY);
-        instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER);
-        return DECODE_success;
-
-    case 0xA3: /* bt r32, m32 */
-        instr->instr = INSTR_BT;
-        index = get_index(opcode + 1, rex);
-        instr->op_size = LONG;
-        instr->operand[0] = mk_operand(instr->op_size, index, 0, REGISTER);
-        instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY);
-        return DECODE_success;
-
-    default:
-        printf("0f %x, This opcode isn't handled yet\n", *opcode);
-        return DECODE_failure;
-    }
-}
-
-int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int 
inst_len)
-{
-    if (inst_len > MAX_INST_LEN || inst_len <= 0)
-        return 0;
-    if (!vmx_copy(buf, guest_eip, inst_len, VMX_COPY_IN))
-        return 0;
-    return inst_len;
-}
-
-void send_mmio_req(unsigned char type, unsigned long gpa,
-                   unsigned long count, int size, long value, int dir, int 
pvalid)
-{
-    struct vcpu *v = current;
-    vcpu_iodata_t *vio;
-    ioreq_t *p;
-    struct cpu_user_regs *regs;
-    extern long evtchn_send(int lport);
-
-    regs = current->arch.arch_vmx.mmio_op.inst_decoder_regs;
-
-    vio = get_vio(v->domain, v->vcpu_id);
-    if (vio == NULL) {
-        printf("bad shared page\n");
-        domain_crash_synchronous();
-    }
-
-    p = &vio->vp_ioreq;
-
-    if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) {
-        printf("VMX I/O has not yet completed\n");
-        domain_crash_synchronous();
-    }
-
-    set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags);
-    p->dir = dir;
-    p->pdata_valid = pvalid;
-
-    p->type = type;
-    p->size = size;
-    p->addr = gpa;
-    p->count = count;
-    p->df = regs->eflags & EF_DF ? 1 : 0;
-
-    if (pvalid) {
-        if (vmx_paging_enabled(current))
-            p->u.pdata = (void *) gva_to_gpa(value);
-        else
-            p->u.pdata = (void *) value; /* guest VA == guest PA */
-    } else
-        p->u.data = value;
-
-    if (vmx_mmio_intercept(p)){
-        p->state = STATE_IORESP_READY;
-        vmx_io_assist(v);
-        return;
-    }
-
-    p->state = STATE_IOREQ_READY;
-
-    evtchn_send(iopacket_port(v->domain));
-    vmx_wait_io();
-}
-
-static void mmio_operands(int type, unsigned long gpa, struct instruction 
*inst,
-                          struct mmio_op *mmio_opp, struct cpu_user_regs *regs)
-{
-    unsigned long value = 0;
-    int index, size_reg;
-
-    size_reg = operand_size(inst->operand[0]);
-
-    mmio_opp->flags = inst->flags;
-    mmio_opp->instr = inst->instr;
-    mmio_opp->operand[0] = inst->operand[0]; /* source */
-    mmio_opp->operand[1] = inst->operand[1]; /* destination */
-    mmio_opp->immediate = inst->immediate;
-
-    if (inst->operand[0] & REGISTER) { /* dest is memory */
-        index = operand_index(inst->operand[0]);
-        value = get_reg_value(size_reg, index, 0, regs);
-        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
-    } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */
-        value = inst->immediate;
-        send_mmio_req(type, gpa, 1, inst->op_size, value, IOREQ_WRITE, 0);
-    } else if (inst->operand[0] & MEMORY) { /* dest is register */
-        /* send the request and wait for the value */
-        if ( (inst->instr == INSTR_MOVZX) || (inst->instr == INSTR_MOVSX) )
-            send_mmio_req(type, gpa, 1, size_reg, 0, IOREQ_READ, 0);
-        else
-            send_mmio_req(type, gpa, 1, inst->op_size, 0, IOREQ_READ, 0);
-    } else {
-        printf("mmio_operands: invalid operand\n");
-        domain_crash_synchronous();
-    }
-}
-
-#define GET_REPEAT_COUNT() \
-     (mmio_inst.flags & REPZ ? (vm86 ? regs->ecx & 0xFFFF : regs->ecx) : 1)
-
-void handle_mmio(unsigned long va, unsigned long gpa)
-{
-    unsigned long inst_len, inst_addr;
-    struct mmio_op *mmio_opp;
-    struct cpu_user_regs *regs;
-    struct instruction mmio_inst;
-    unsigned char inst[MAX_INST_LEN];
-    int i, vm86, ret;
-
-    mmio_opp = &current->arch.arch_vmx.mmio_op;
-
-    regs = mmio_opp->inst_decoder_regs;
-    store_cpu_user_regs(regs);
-
-    __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len);
-
-    vm86 = regs->eflags & X86_EFLAGS_VM;
-    if (vm86)
-        inst_addr = (regs->cs << 4) + regs->eip;
-    else
-        inst_addr = regs->eip;
-
-    memset(inst, 0, MAX_INST_LEN);
-    ret = inst_copy_from_guest(inst, inst_addr, inst_len);
-    if (ret != inst_len) {
-        printf("handle_mmio - EXIT: get guest instruction fault\n");
-        domain_crash_synchronous();
-    }
-
-    init_instruction(&mmio_inst);
-
-    if (vmx_decode(vm86, inst, &mmio_inst) == DECODE_failure) {
-        printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:",
-               va, gpa, inst_len);
-        for (i = 0; i < inst_len; i++)
-            printf(" %02x", inst[i] & 0xFF);
-        printf("\n");
-        domain_crash_synchronous();
-    }
-
-    regs->eip += inst_len; /* advance %eip */
-
-    switch (mmio_inst.instr) {
-    case INSTR_MOV:
-        mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs);
-        break;
-
-    case INSTR_MOVS:
-    {
-        unsigned long count = GET_REPEAT_COUNT();
-        unsigned long size = mmio_inst.op_size;
-        int sign = regs->eflags & EF_DF ? -1 : 1;
-        unsigned long addr = 0;
-        int dir;
-
-        /* determine non-MMIO address */
-        if (vm86) {
-            if (((regs->es << 4) + (regs->edi & 0xFFFF)) == va) {
-                dir = IOREQ_WRITE;
-                addr = (regs->ds << 4) + (regs->esi & 0xFFFF);
-            } else {
-                dir = IOREQ_READ;
-                addr = (regs->es << 4) + (regs->edi & 0xFFFF);
-            }
-        } else {
-            if (va == regs->edi) {
-                dir = IOREQ_WRITE;
-                addr = regs->esi;
-            } else {
-                dir = IOREQ_READ;
-                addr = regs->edi;
-            }
-        }
-
-        mmio_opp->flags = mmio_inst.flags;
-        mmio_opp->instr = mmio_inst.instr;
-
-        /*
-         * In case of a movs spanning multiple pages, we break the accesses
-         * up into multiple pages (the device model works with non-continguous
-         * physical guest pages). To copy just one page, we adjust %ecx and
-         * do not advance %eip so that the next "rep movs" copies the next 
page.
-         * Unaligned accesses, for example movsl starting at PGSZ-2, are
-         * turned into a single copy where we handle the overlapping memory
-         * copy ourself. After this copy succeeds, "rep movs" is executed
-         * again.
-         */
-        if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) {
-            unsigned long value = 0;
-
-            mmio_opp->flags |= OVERLAP;
-
-            regs->eip -= inst_len; /* do not advance %eip */
-
-            if (dir == IOREQ_WRITE)
-                vmx_copy(&value, addr, size, VMX_COPY_IN);
-            send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0);
-        } else {
-            if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) 
{
-                regs->eip -= inst_len; /* do not advance %eip */
-
-                if (sign > 0)
-                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
-                else
-                    count = (addr & ~PAGE_MASK) / size;
-            }
-
-            send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1);
-        }
-        break;
-    }
-
-    case INSTR_MOVZX:
-    case INSTR_MOVSX:
-        mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mmio_opp, regs);
-        break;
-
-    case INSTR_STOS:
-        /*
-         * Since the destination is always in (contiguous) mmio space we don't
-         * need to break it up into pages.
-         */
-        mmio_opp->flags = mmio_inst.flags;
-        mmio_opp->instr = mmio_inst.instr;
-        send_mmio_req(IOREQ_TYPE_COPY, gpa,
-                      GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, 
IOREQ_WRITE, 0);
-        break;
-
-    case INSTR_OR:
-        mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mmio_opp, regs);
-        break;
-
-    case INSTR_AND:
-        mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mmio_opp, regs);
-        break;
-
-    case INSTR_XOR:
-        mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mmio_opp, regs);
-        break;
-
-    case INSTR_CMP:        /* Pass through */
-    case INSTR_TEST:
-        mmio_opp->flags = mmio_inst.flags;
-        mmio_opp->instr = mmio_inst.instr;
-        mmio_opp->operand[0] = mmio_inst.operand[0]; /* source */
-        mmio_opp->operand[1] = mmio_inst.operand[1]; /* destination */
-        mmio_opp->immediate = mmio_inst.immediate;
-
-        /* send the request and wait for the value */
-        send_mmio_req(IOREQ_TYPE_COPY, gpa, 1,
-                      mmio_inst.op_size, 0, IOREQ_READ, 0);
-        break;
-
-    case INSTR_BT:
-        {
-            unsigned long value = 0;
-            int index, size;
-
-            mmio_opp->instr = mmio_inst.instr;
-            mmio_opp->operand[0] = mmio_inst.operand[0]; /* bit offset */
-            mmio_opp->operand[1] = mmio_inst.operand[1]; /* bit base */
-
-            index = operand_index(mmio_inst.operand[0]);
-            size = operand_size(mmio_inst.operand[0]);
-            value = get_reg_value(size, index, 0, regs);
-
-            send_mmio_req(IOREQ_TYPE_COPY, gpa + (value >> 5), 1,
-                          mmio_inst.op_size, 0, IOREQ_READ, 0);
-            break;
-        }
-
-    default:
-        printf("Unhandled MMIO instruction\n");
-        domain_crash_synchronous();
-    }
-}
-
-#endif /* CONFIG_VMX */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/vmx_vlapic.c
--- a/xen/arch/x86/vmx_vlapic.c Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,1038 +0,0 @@
-/*
- * vmx_vlapic.c: virtualize LAPIC for VMX vcpus.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
-#include <xen/xmalloc.h>
-#include <asm/shadow.h>
-#include <asm/page.h>
-#include <xen/event.h>
-#include <xen/trace.h>
-#include <asm/vmx.h>
-#include <asm/vmx_platform.h>
-#include <asm/vmx_vlapic.h>
-#include <asm/vmx_vioapic.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <asm/current.h>
-#include <public/hvm/ioreq.h>
-
-#ifdef CONFIG_VMX
-
-/* XXX remove this definition after GFW enabled */
-#define VLAPIC_NO_BIOS
-
-extern unsigned int get_apic_bus_scale(void);
-
-static unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] =
-{
-    0x310ff, 0x117ff, 0x117ff, 0x1f7ff, 0x1f7ff, 0x117ff
-};
-
-int vlapic_find_highest_irr(struct vlapic *vlapic)
-{
-    int result;
-
-    result = find_highest_bit((uint32_t *)&vlapic->irr[0], INTR_LEN_32);
-
-    if (result != -1 && result < 16) {
-        printk("VLAPIC: irr on reserved bits %d\n ", result);
-        domain_crash_synchronous();
-    }
-
-    return result;
-}
-
-int vmx_apic_support(struct domain *d)
-{
-    return d->arch.vmx_platform.apic_enabled;
-}
-
-s_time_t get_apictime_scheduled(struct vcpu *v)
-{
-    struct vlapic *vlapic = VLAPIC(v);
-
-    if ( !vmx_apic_support(v->domain) || !vlapic_lvt_timer_enabled(vlapic) )
-        return -1;
-    return vlapic->vlapic_timer.expires;
-}
-
-int vlapic_find_highest_isr(struct vlapic *vlapic)
-{
-    int result;
-
-    result = find_highest_bit((uint32_t *)&vlapic->isr[0], INTR_LEN_32);
-
-    if (result != -1 && result < 16) {
-        int i = 0;
-        printk("VLAPIC: isr on reserved bits %d, isr is\n ", result);
-        for (i = 0; i < INTR_LEN_32; i += 2)
-            printk("%d: 0x%08x%08x\n", i, vlapic->isr[i], vlapic->isr[i+1]);
-        return -1;
-    }
-
-    return result;
-}
-
-uint32_t vlapic_update_ppr(struct vlapic *vlapic)
-{
-    uint32_t tpr, isrv, ppr;
-    int isr;
-
-    tpr = (vlapic->task_priority >> 4) & 0xf;      /* we want 7:4 */
-
-    isr = vlapic_find_highest_isr(vlapic);
-    if (isr != -1)
-        isrv = (isr >> 4) & 0xf;   /* ditto */
-    else
-        isrv = 0;
-
-    if (tpr >= isrv)
-        ppr = vlapic->task_priority & 0xff;
-    else
-        ppr = isrv << 4;  /* low 4 bits of PPR have to be cleared */
-
-    vlapic->processor_priority = ppr;
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC_INTERRUPT,
-                "vlapic_update_ppr: vlapic %p ppr %x isr %x isrv %x",
-                vlapic, ppr, isr, isrv);
-
-    return ppr;
-}
-
-/* This only for fixed delivery mode */
-static int vlapic_match_dest(struct vcpu *v, struct vlapic *source,
-                             int short_hand, int dest, int dest_mode,
-                             int delivery_mode)
-{
-    int result = 0;
-    struct vlapic *target = VLAPIC(v);
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest: "
-                "target %p source %p dest %x dest_mode %x short_hand %x "
-                "delivery_mode %x",
-                target, source, dest, dest_mode, short_hand, delivery_mode);
-
-    if ( unlikely(!target) &&
-         ( (delivery_mode != VLAPIC_DELIV_MODE_INIT) &&
-           (delivery_mode != VLAPIC_DELIV_MODE_STARTUP) &&
-           (delivery_mode != VLAPIC_DELIV_MODE_NMI) )) {
-        VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest "
-                    "uninitialized target v %p delivery_mode %x dest %x\n",
-                    v, delivery_mode, dest);
-        return result;
-    }
-
-    switch (short_hand) {
-    case VLAPIC_NO_SHORTHAND:
-        if (!dest_mode) {   /* Physical */
-            result = ((target ? target->id : v->vcpu_id ) == dest);
-        } else {            /* Logical */
-            if (!target)
-                break;
-            if (((target->dest_format >> 28) & 0xf) == 0xf) {   /* Flat mode */
-                result = (target->logical_dest >> 24) & dest;
-            } else {
-                if ((delivery_mode == VLAPIC_DELIV_MODE_LPRI) &&
-                   (dest == 0xff)) {
-                    /* What shall we do now? */
-                    printk("Broadcast IPI with lowest priority "
-                           "delivery mode\n");
-                    domain_crash_synchronous();
-                }
-                result = (target->logical_dest == (dest & 0xf)) ?
-                  ((target->logical_dest >> 4) & (dest >> 4)) : 0;
-            }
-        }
-        break;
-
-    case VLAPIC_SHORTHAND_SELF:
-        if (target == source)
-            result = 1;
-        break;
-
-    case VLAPIC_SHORTHAND_INCLUDE_SELF:
-        result = 1;
-        break;
-
-    case VLAPIC_SHORTHAND_EXCLUDE_SELF:
-        if (target != source)
-            result = 1;
-        break;
-
-    default:
-        break;
-    }
-
-    return result;
-}
-
-/*
- * Add a pending IRQ into lapic.
- * Return 1 if successfully added and 0 if discarded.
- */
-static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
-                             int vector, int level, int trig_mode)
-{
-    int        result = 0;
-    struct vlapic *vlapic = VLAPIC(v);
-
-    switch (delivery_mode) {
-    case VLAPIC_DELIV_MODE_FIXED:
-    case VLAPIC_DELIV_MODE_LPRI:
-        /* FIXME add logic for vcpu on reset */
-        if (unlikely(!vlapic || !vlapic_enabled(vlapic)))
-            return result;
-
-        if (test_and_set_bit(vector, &vlapic->irr[0])) {
-            printk("<vlapic_accept_irq>"
-                    "level trig mode repeatedly for vector %d\n", vector);
-            result = 0;
-        } else {
-            if (level) {
-                printk("<vlapic_accept_irq> level trig mode for vector %d\n", 
vector);
-                set_bit(vector, &vlapic->tmr[0]);
-            }
-        }
-        evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain));
-        result = 1;
-        break;
-
-    case VLAPIC_DELIV_MODE_RESERVED:
-        printk("Ignore deliver mode 3 in vlapic_accept_irq\n");
-        break;
-
-    case VLAPIC_DELIV_MODE_SMI:
-    case VLAPIC_DELIV_MODE_NMI:
-        /* Fixme */
-        printk("TODO: for guest SMI/NMI\n");
-        break;
-
-    case VLAPIC_DELIV_MODE_INIT:
-        if (!level && trig_mode == 1) {        //Deassert
-            printk("This vmx_vlapic is for P4, no work for De-assert init\n");
-        } else {
-            /* FIXME How to check the situation after vcpu reset? */
-            vlapic->init_sipi_sipi_state = 
VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI;
-            if (vlapic->vcpu) {
-                vcpu_pause(vlapic->vcpu);
-            }
-        }
-        break;
-
-    case VLAPIC_DELIV_MODE_STARTUP:
-        if (vlapic->init_sipi_sipi_state != 
VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI)
-            break;
-        vlapic->init_sipi_sipi_state = VLAPIC_INIT_SIPI_SIPI_STATE_NORM;
-        if (!vlapic->vcpu) {
-            /* XXX Call vmx_bringup_ap here */
-             result = 0;
-        }else{
-            //vmx_vcpu_reset(vlapic->vcpu);
-        }
-        break;
-
-    default:
-        printk("TODO: not support interrup type %x\n", delivery_mode);
-        domain_crash_synchronous();
-        break;
-    }
-
-    return result;
-}
-/*
-    This function is used by both ioapic and local APIC
-    The bitmap is for vcpu_id
- */
-struct vlapic* apic_round_robin(struct domain *d,
-                                uint8_t dest_mode,
-                                uint8_t vector,
-                                uint32_t bitmap)
-{
-    int next, old;
-    struct vlapic* target = NULL;
-
-    if (dest_mode == 0) { //Physical mode
-        printk("<apic_round_robin> lowest priority for physical mode\n");
-        return NULL;
-    }
-
-    if (!bitmap) {
-        printk("<apic_round_robin> no bit on bitmap\n");
-        return NULL;
-    }
-
-    spin_lock(&d->arch.vmx_platform.round_robin_lock);
-
-    old = next = d->arch.vmx_platform.round_info[vector];
-
-    do {
-        /* the vcpu array is arranged according to vcpu_id */
-        if (test_bit(next, &bitmap)) {
-            target = d->vcpu[next]->arch.arch_vmx.vlapic;
-
-            if (!target || !vlapic_enabled(target)) {
-                printk("warning: targe round robin local apic disabled\n");
-                /* XXX should we domain crash?? Or should we return NULL */
-            }
-            break;
-        }
-
-        next ++;
-        if (!d->vcpu[next] ||
-            !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) ||
-            next == MAX_VIRT_CPUS)
-            next = 0;
-    }while(next != old);
-
-    d->arch.vmx_platform.round_info[vector] = next;
-    spin_unlock(&d->arch.vmx_platform.round_robin_lock);
-    return target;
-}
-
-void
-vlapic_EOI_set(struct vlapic *vlapic)
-{
-    int vector = vlapic_find_highest_isr(vlapic);
-
-    /* Not every write EOI will has correpsoning ISR,
-       one example is when Kernel check timer on setup_IO_APIC */
-    if (vector == -1) {
-        return ;
-    }
-
-    vlapic_clear_isr(vlapic, vector);
-    vlapic_update_ppr(vlapic);
-
-    if (test_and_clear_bit(vector, &vlapic->tmr[0]))
-        ioapic_update_EOI(vlapic->domain, vector);
-}
-
-int vlapic_check_vector(struct vlapic *vlapic,
-                        unsigned char dm, int vector)
-{
-    if ((dm == VLAPIC_DELIV_MODE_FIXED) && (vector < 16)) {
-        vlapic->err_status |= 0x40;
-        vlapic_accept_irq(vlapic->vcpu, VLAPIC_DELIV_MODE_FIXED,
-          vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 0, 0);
-        printk("<vlapic_check_vector>: check fail\n");
-        return 0;
-    }
-    return 1;
-}
-
-
-void vlapic_ipi(struct vlapic *vlapic)
-{
-    unsigned int dest = (vlapic->icr_high >> 24) & 0xff;
-    unsigned int short_hand = (vlapic->icr_low >> 18) & 3;
-    unsigned int trig_mode = (vlapic->icr_low >> 15) & 1;
-    unsigned int level = (vlapic->icr_low >> 14) & 1;
-    unsigned int dest_mode = (vlapic->icr_low >> 11) & 1;
-    unsigned int delivery_mode = (vlapic->icr_low >> 8) & 7;
-    unsigned int vector = (vlapic->icr_low & 0xff);
-
-    struct vlapic *target;
-    struct vcpu *v = NULL;
-    uint32_t lpr_map;
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_ipi: "
-                "icr_high %x icr_low %x "
-                "short_hand %x dest %x trig_mode %x level %x "
-                "dest_mode %x delivery_mode %x vector %x",
-                vlapic->icr_high, vlapic->icr_low,
-                short_hand, dest, trig_mode, level, dest_mode,
-                delivery_mode, vector);
-
-    for_each_vcpu ( vlapic->domain, v ) {
-        if (vlapic_match_dest(v, vlapic, short_hand,
-                              dest, dest_mode, delivery_mode)) {
-            if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) {
-                set_bit(v->vcpu_id, &lpr_map);
-            } else
-                vlapic_accept_irq(v, delivery_mode,
-                                  vector, level, trig_mode);
-        }
-    }
-
-    if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) {
-        v = vlapic->vcpu;
-        target = apic_round_robin(v->domain, dest_mode, vector, lpr_map);
-
-        if (target)
-            vlapic_accept_irq(target->vcpu, delivery_mode,
-                              vector, level, trig_mode);
-    }
-}
-
-static void vlapic_begin_timer(struct vlapic *vlapic)
-{
-    s_time_t cur = NOW(), offset;
-
-    offset = vlapic->timer_current *
-      (262144 / get_apic_bus_scale()) * vlapic->timer_divide_counter;
-    vlapic->vlapic_timer.expires = cur + offset;
-
-    set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires );
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_begin_timer: "
-                "bus_scale %x now %08x%08x expire %08x%08x "
-                "offset %08x%08x current %x",
-                get_apic_bus_scale(), (uint32_t)(cur >> 32), (uint32_t)cur,
-                (uint32_t)(vlapic->vlapic_timer.expires >> 32),
-                (uint32_t) vlapic->vlapic_timer.expires,
-                (uint32_t)(offset >> 32), (uint32_t)offset,
-                vlapic->timer_current);
-}
-
-void vlapic_read_aligned(struct vlapic *vlapic, unsigned int offset,
-                         unsigned int len, unsigned int *result)
-{
-    if (len != 4) {
-        VMX_DBG_LOG(DBG_LEVEL_VLAPIC,
-                    "local apic read with len=%d (should be 4)", len);
-    }
-
-    *result = 0;
-
-    switch (offset) {
-    case APIC_ID:
-        *result = (vlapic->id) << 24;
-        break;
-
-    case APIC_LVR:
-        *result = vlapic->version;
-        break;
-
-    case APIC_TASKPRI:
-        *result = vlapic->task_priority;
-        break;
-
-    case APIC_ARBPRI:
-        printk("Access local APIC ARBPRI register which is for P6\n");
-        break;
-
-    case APIC_PROCPRI:
-        *result = vlapic->processor_priority;
-        break;
-
-    case APIC_EOI:      /* EOI is write only */
-        break;
-
-    case APIC_LDR:
-        *result = vlapic->logical_dest;
-        break;
-
-    case APIC_DFR:
-        *result = vlapic->dest_format;
-        break;
-
-    case APIC_SPIV:
-        *result = vlapic->spurious_vec;
-        break;
-
-    case APIC_ISR:
-    case 0x110:
-    case 0x120:
-    case 0x130:
-    case 0x140:
-    case 0x150:
-    case 0x160:
-    case 0x170:
-        *result = vlapic->isr[(offset - APIC_ISR) >> 4];
-        break;
-
-    case APIC_TMR:
-    case 0x190:
-    case 0x1a0:
-    case 0x1b0:
-    case 0x1c0:
-    case 0x1d0:
-    case 0x1e0:
-    case 0x1f0:
-        *result = vlapic->tmr[(offset - APIC_TMR) >> 4];
-        break;
-
-    case APIC_IRR:
-    case 0x210:
-    case 0x220:
-    case 0x230:
-    case 0x240:
-    case 0x250:
-    case 0x260:
-    case 0x270:
-        *result = vlapic->irr[(offset - APIC_IRR) >> 4];
-        break;
-
-    case APIC_ESR:
-        if (vlapic->err_write_count)
-            *result = vlapic->err_status;
-        break;
-
-    case APIC_ICR:
-        *result = vlapic->icr_low;
-        break;
-
-    case APIC_ICR2:
-        *result = vlapic->icr_high;
-        break;
-
-    case APIC_LVTT:     /* LVT Timer Reg */
-    case APIC_LVTTHMR:     /* LVT Thermal Monitor */
-    case APIC_LVTPC:     /* LVT Performance Counter */
-    case APIC_LVT0:     /* LVT LINT0 Reg */
-    case APIC_LVT1:     /* LVT Lint1 Reg */
-    case APIC_LVTERR:     /* LVT Error Reg */
-        *result = vlapic->lvt[(offset - APIC_LVTT) >> 4];
-        break;
-
-    case APIC_TMICT:
-        *result = vlapic->timer_initial;
-        break;
-
-    case APIC_TMCCT:         //Timer CCR
-        {
-            uint32_t counter;
-            s_time_t passed, cur = NOW();
-
-            if (cur <= vlapic->timer_current_update) {
-                passed = ~0x0LL - vlapic->timer_current_update + cur;
-                VMX_DBG_LOG(DBG_LEVEL_VLAPIC,"time elapsed");
-            }else
-                passed = cur - vlapic->timer_current_update;
-
-            counter = (passed * get_apic_bus_scale()) / (262144* 
vlapic->timer_divide_counter);
-            if (vlapic->timer_current > counter)
-                *result = vlapic->timer_current - counter;
-            else {
-                if (!vlapic_lvt_timer_period(vlapic))
-                    *result = 0;
-                //FIXME should we add interrupt here?
-                else
-                    //*result = counter % vlapic->timer_initial;
-                    *result = vlapic->timer_initial - (counter - 
vlapic->timer_current);
-            }
-            vlapic->timer_current = *result;
-            vlapic->timer_current_update = NOW();
-
-            VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
-                        "initial %x timer current %x "
-                        "update %08x%08x cur %08x%08x offset %d",
-                        vlapic->timer_initial, vlapic->timer_current,
-                        (uint32_t)(vlapic->timer_current_update >> 32),
-                        (uint32_t)vlapic->timer_current_update ,
-                        (uint32_t)(cur >> 32), (uint32_t)cur, counter);
-        }
-        break;
-
-    case APIC_TDCR:
-        *result = vlapic->timer_divconf;
-        break;
-
-    default:
-        printk("Read local APIC address %x not implemented\n",offset);
-        *result = 0;
-        break;
-    }
-}
-
-static unsigned long vlapic_read(struct vcpu *v, unsigned long address,
-                                 unsigned long len)
-{
-    unsigned int alignment;
-    unsigned int tmp;
-    unsigned long result;
-    struct vlapic *vlapic = VLAPIC(v);
-    unsigned int offset = address - vlapic->base_address;
-
-    if ( len != 4) {
-        /* some bugs on kernel cause read this with byte*/
-        VMX_DBG_LOG(DBG_LEVEL_VLAPIC,
-                    "Local APIC read with len = %lx, should be 4 instead\n",
-                    len);
-    }
-
-    alignment = offset & 0x3;
-
-    vlapic_read_aligned(vlapic, offset & ~0x3, 4, &tmp);
-    switch (len) {
-    case 1:
-        result = *((unsigned char *)&tmp + alignment);
-        break;
-
-    case 2:
-        result = *(unsigned short *)((unsigned char *)&tmp + alignment);
-        break;
-
-    case 4:
-        result = *(unsigned int *)((unsigned char *)&tmp + alignment);
-        break;
-
-    default:
-        printk("Local APIC read with len = %lx, should be 4 instead\n", len);
-        domain_crash_synchronous();
-        break;
-    }
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC,
-                "vlapic_read offset %x with length %lx and the result is %lx",
-                offset, len, result);
-    return result;
-}
-
-static void vlapic_write(struct vcpu *v, unsigned long address,
-                         unsigned long len, unsigned long val)
-{
-    struct vlapic *vlapic = VLAPIC(v);
-    unsigned int offset = address - vlapic->base_address;
-
-    if (offset != 0xb0)
-        VMX_DBG_LOG(DBG_LEVEL_VLAPIC,
-          "vlapic_write offset %x with length %lx source %lx",
-          offset, len, val);
-
-    /*
-     * According to IA 32 Manual, all resgiters should be accessed with
-     * 32 bits alignment.
-     */
-    if (len != 4) {
-        unsigned int tmp;
-        unsigned char alignment;
-
-        /* Some kernel do will access with byte/word alignment*/
-        printk("Notice: Local APIC write with len = %lx\n",len);
-        alignment = offset & 0x3;
-        tmp = vlapic_read(v, offset & (~0x3), 4);
-        switch (len) {
-        case 1:
-            /* XXX the saddr is a tmp variable from caller, so should be ok
-               But we should still change the following ref to val to 
-               local variable later */
-            val  = (tmp & ~(0xff << alignment)) |
-                        ((val & 0xff) << alignment);
-            break;
-
-        case 2:
-            if (alignment != 0x0 && alignment != 0x2) {
-                printk("alignment error for vlapic with len == 2\n");
-                    domain_crash_synchronous();
-            }
-
-            val = (tmp & ~(0xffff << alignment)) |
-                        ((val & 0xffff)  << alignment);
-            break;
-
-        case 3:
-            /* will it happen? */
-            printk("vlapic_write with len = 3 !!!\n");
-            domain_crash_synchronous();
-            break;
-
-        default:
-            printk("Local APIC write with len = %lx, should be 4 instead\n", 
len);
-            domain_crash_synchronous();
-            break;
-        }
-    }
-
-    offset &= 0xff0;
-
-    switch (offset) {
-    case APIC_ID:   /* Local APIC ID */
-        vlapic->id = ((val) >> 24) & VAPIC_ID_MASK;
-        break;
-
-    case APIC_TASKPRI:
-        vlapic->task_priority = val & 0xff;
-        vlapic_update_ppr(vlapic);
-        break;
-
-    case APIC_EOI:
-        vlapic_EOI_set(vlapic);
-        break;
-
-    case APIC_LDR:
-        vlapic->logical_dest = val & VAPIC_LDR_MASK;
-        break;
-
-    case APIC_DFR:
-        vlapic->dest_format = val ;
-        break;
-
-    case APIC_SPIV:
-        vlapic->spurious_vec = val & 0x1ff;
-        if (!(vlapic->spurious_vec & 0x100)) {
-            int i = 0;
-            for (i = 0; i < VLAPIC_LVT_NUM; i++)
-                vlapic->lvt[i] |= 0x10000;
-            vlapic->status |= VLAPIC_SOFTWARE_DISABLE_MASK;
-        }
-        else
-            vlapic->status &= ~VLAPIC_SOFTWARE_DISABLE_MASK;
-        break;
-
-    case APIC_ESR:
-        vlapic->err_write_count = !vlapic->err_write_count;
-        if (!vlapic->err_write_count)
-            vlapic->err_status = 0;
-        break;
-
-    case APIC_ICR:
-        /* No delay here, so we always clear the pending bit*/
-        vlapic->icr_low = val & ~(1 << 12);
-        vlapic_ipi(vlapic);
-        break;
-
-    case APIC_ICR2:
-        vlapic->icr_high = val & 0xff000000;
-        break;
-
-    case APIC_LVTT: // LVT Timer Reg
-    case APIC_LVTTHMR: // LVT Thermal Monitor
-    case APIC_LVTPC: // LVT Performance Counter
-    case APIC_LVT0: // LVT LINT0 Reg
-    case APIC_LVT1: // LVT Lint1 Reg
-    case APIC_LVTERR: // LVT Error Reg
-        {
-            int vt = (offset - APIC_LVTT) >> 4;
-
-            vlapic->lvt[vt] = val & vlapic_lvt_mask[vt];
-            if (vlapic->status & VLAPIC_SOFTWARE_DISABLE_MASK)
-                vlapic->lvt[vt] |= VLAPIC_LVT_BIT_MASK;
-
-            /* On hardware, when write vector less than 0x20 will error */
-            vlapic_check_vector(vlapic, vlapic_lvt_dm(vlapic->lvt[vt]),
-              vlapic_lvt_vector(vlapic, vt));
-
-            if (!vlapic->vcpu_id && (offset == APIC_LVT0)) {
-                if ((vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_DELIMOD)
-                            == 0x700) {
-                    if (!(vlapic->lvt[VLAPIC_LVT_LINT0] & 
VLAPIC_LVT_BIT_MASK)) {
-                        set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
-                    }else
-                        clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
-                }
-                else
-                    clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
-            }
-
-        }
-        break;
-
-    case APIC_TMICT:
-        if (vlapic_timer_active(vlapic))
-            stop_timer(&(vlapic->vlapic_timer));
-
-        vlapic->timer_initial = val;
-        vlapic->timer_current = val;
-        vlapic->timer_current_update = NOW();
-
-        vlapic_begin_timer(vlapic);
-
-        VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "timer_init %x timer_current %x"
-                    "timer_current_update %08x%08x",
-                    vlapic->timer_initial, vlapic->timer_current,
-                    (uint32_t)(vlapic->timer_current_update >> 32),
-                    (uint32_t)vlapic->timer_current_update);
-        break;
-
-    case APIC_TDCR:
-        {
-            //FIXME clean this code
-            unsigned char tmp1,tmp2;
-            tmp1 = (val & 0xf);
-            tmp2 = ((tmp1 & 0x3 )|((tmp1 & 0x8) >>1)) + 1;
-            vlapic->timer_divide_counter = 0x1<<tmp2;
-
-            VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
-                        "timer divider is 0x%x",
-                        vlapic->timer_divide_counter);
-        }
-        break;
-
-    default:
-        printk("Local APIC Write to read-only register\n");
-        break;
-    }
-}
-
-static int vlapic_range(struct vcpu *v, unsigned long addr)
-{
-    struct vlapic *vlapic = VLAPIC(v);
-
-    if (vlapic_global_enabled(vlapic) &&
-        (addr >= vlapic->base_address) &&
-        (addr <= (vlapic->base_address + VLOCAL_APIC_MEM_LENGTH)))
-        return 1;
-
-    return 0;
-}
-
-struct vmx_mmio_handler vlapic_mmio_handler = {
-    .check_handler = vlapic_range,
-    .read_handler = vlapic_read,
-    .write_handler = vlapic_write
-};
-
-void vlapic_msr_set(struct vlapic *vlapic, uint64_t value)
-{
-    /* When apic disabled */
-    if (!vlapic)
-        return;
-
-    if (vlapic->vcpu_id)
-        value &= ~MSR_IA32_APICBASE_BSP;
-
-    vlapic->apic_base_msr = value;
-    vlapic->base_address = vlapic_get_base_address(vlapic);
-
-    if (!(value & 0x800))
-        set_bit(_VLAPIC_GLOB_DISABLE, &vlapic->status );
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC,
-                "apic base msr = 0x%08x%08x,\nbase address = 0x%lx",
-                (uint32_t)(vlapic->apic_base_msr >> 32),
-                (uint32_t)vlapic->apic_base_msr,
-                vlapic->base_address);
-}
-
-static inline int vlapic_get_init_id(struct vcpu *v)
-{
-    return v->vcpu_id;
-}
-
-void vlapic_timer_fn(void *data)
-{
-    struct vlapic *vlapic;
-
-    vlapic = data;
-    if (!vlapic_enabled(vlapic)) return;
-
-    vlapic->timer_current_update = NOW();
-
-    if (vlapic_lvt_timer_enabled(vlapic)) {
-        if (!vlapic_irr_status(vlapic,
-              vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER))) {
-            test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER),
-              &vlapic->irr[0]);
-        }
-        else
-            vlapic->intr_pending_count[vlapic_lvt_vector(vlapic, 
VLAPIC_LVT_TIMER)]++;
-        evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain));
-    }
-
-    vlapic->timer_current_update = NOW();
-    if (vlapic_lvt_timer_period(vlapic)) {
-        s_time_t offset;
-
-        vlapic->timer_current = vlapic->timer_initial;
-        offset = vlapic->timer_current * (262144/get_apic_bus_scale()) * 
vlapic->timer_divide_counter;
-        vlapic->vlapic_timer.expires = NOW() + offset;
-        set_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires);
-    }else {
-        vlapic->timer_current = 0;
-    }
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER,
-      "vlapic_timer_fn: now: %08x%08x expire %08x%08x init %x current %x",
-      (uint32_t)(NOW() >> 32),(uint32_t)NOW(),
-      (uint32_t)(vlapic->vlapic_timer.expires >> 32),
-      (uint32_t)vlapic->vlapic_timer.expires,
-      vlapic->timer_initial,vlapic->timer_current);
-}
-
-#if 0
-static int
-vlapic_check_direct_intr(struct vcpu *v, int * mode)
-{
-    struct vlapic *vlapic = VLAPIC(v);
-    int type;
-
-    type = __fls(vlapic->direct_intr.deliver_mode);
-    if (type == -1)
-        return -1;
-
-    *mode = type;
-    return 0;
-}
-#endif
-
-int
-vlapic_accept_pic_intr(struct vcpu *v)
-{
-    struct vlapic *vlapic = VLAPIC(v);
-
-    return vlapic ? test_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status) : 1;
-}
-
-int cpu_get_apic_interrupt(struct vcpu* v, int *mode)
-{
-    struct vlapic *vlapic = VLAPIC(v);
-
-    if (vlapic && vlapic_enabled(vlapic)) {
-        int highest_irr = vlapic_find_highest_irr(vlapic);
-
-        if (highest_irr != -1 && highest_irr >= vlapic->processor_priority) {
-            if (highest_irr < 0x10) {
-                vlapic->err_status |= 0x20;
-                /* XXX What will happen if this vector illegal stil */
-                VMX_DBG_LOG(DBG_LEVEL_VLAPIC,
-                  "vmx_intr_assist: illegal vector number %x err_status %x",
-                  highest_irr,  vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR));
-
-                set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 
&vlapic->irr[0]);
-                highest_irr = vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR);
-            }
-
-            *mode = VLAPIC_DELIV_MODE_FIXED;
-            return highest_irr;
-        }
-    }
-    return -1;
-}
-
-void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) {
-    struct vlapic  *vlapic = VLAPIC(v);
-
-    if (!vlapic)
-        return;
-
-    switch (deliver_mode) {
-    case VLAPIC_DELIV_MODE_FIXED:
-    case VLAPIC_DELIV_MODE_LPRI:
-        vlapic_set_isr(vlapic, vector);
-        vlapic_clear_irr(vlapic, vector);
-        vlapic_update_ppr(vlapic);
-
-        if (vector == vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)) {
-            vlapic->intr_pending_count[vector]--;
-            if (vlapic->intr_pending_count[vector] > 0)
-                test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER),
-                  &vlapic->irr[0]);
-        }
-
-        break;
-        /*XXX deal with these later */
-
-    case VLAPIC_DELIV_MODE_RESERVED:
-        printk("Ignore deliver mode 3 in vlapic_post_injection\n");
-        break;
-
-    case VLAPIC_DELIV_MODE_SMI:
-    case VLAPIC_DELIV_MODE_NMI:
-    case VLAPIC_DELIV_MODE_INIT:
-    case VLAPIC_DELIV_MODE_STARTUP:
-        vlapic->direct_intr.deliver_mode &= ~(1 << deliver_mode);
-        break;
-
-    default:
-        printk("<vlapic_post_injection> error deliver mode\n");
-        break;
-    }
-}
-
-static int vlapic_reset(struct vlapic *vlapic)
-{
-    struct vcpu *v;
-    int apic_id, i;
-
-    ASSERT( vlapic != NULL );
-
-    v = vlapic->vcpu;
-
-    ASSERT( v != NULL );
-
-    apic_id = v->vcpu_id;
-
-    vlapic->domain = v->domain;
-
-    vlapic->id = apic_id;
-
-    vlapic->vcpu_id = v->vcpu_id;
-
-    vlapic->version = VLAPIC_VERSION;
-
-    vlapic->apic_base_msr = VLAPIC_BASE_MSR_INIT_VALUE;
-
-    if (apic_id == 0)
-        vlapic->apic_base_msr |= MSR_IA32_APICBASE_BSP;
-
-    vlapic->base_address = vlapic_get_base_address(vlapic);
-
-    for (i = 0; i < VLAPIC_LVT_NUM; i++)
-        vlapic->lvt[i] = VLAPIC_LVT_BIT_MASK;
-
-    vlapic->dest_format = 0xffffffffU;
-
-    vlapic->spurious_vec = 0xff;
-
-    vmx_vioapic_add_lapic(vlapic, v);
-
-    init_timer(&vlapic->vlapic_timer,
-                  vlapic_timer_fn, vlapic, v->processor);
-
-#ifdef VLAPIC_NO_BIOS
-    /*
-     * XXX According to mp sepcific, BIOS will enable LVT0/1,
-     * remove it after BIOS enabled
-     */
-    if (!v->vcpu_id) {
-        vlapic->lvt[VLAPIC_LVT_LINT0] = 0x700;
-        vlapic->lvt[VLAPIC_LVT_LINT1] = 0x500;
-        set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status);
-    }
-#endif
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_reset: "
-                "vcpu=%p id=%d vlapic_apic_base_msr=%08x%08x "
-                "vlapic_base_address=%0lx",
-                v, vlapic->id, (uint32_t)(vlapic->apic_base_msr >> 32),
-                (uint32_t)vlapic->apic_base_msr, vlapic->base_address);
-
-    return 1;
-}
-
-int vlapic_init(struct vcpu *v)
-{
-    struct vlapic *vlapic = NULL;
-
-    ASSERT( v != NULL );
-
-    VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_init %d", v->vcpu_id);
-
-    vlapic = xmalloc_bytes(sizeof(struct vlapic));
-    if (!vlapic) {
-        printk("malloc vlapic error for vcpu %x\n", v->vcpu_id);
-        return -ENOMEM;
-    }
-
-    memset(vlapic, 0, sizeof(struct vlapic));
-
-    VLAPIC(v) = vlapic;
-
-    vlapic->vcpu = v;
-
-    vlapic_reset(vlapic);
-
-    return 0;
-}
-
-#endif  /* CONFIG_VMX */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,688 +0,0 @@
-/*
- * vmx_vmcs.c: VMCS management
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/domain_page.h>
-#include <asm/current.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/vmx.h>
-#include <asm/vmx_vioapic.h>
-#include <asm/flushtlb.h>
-#include <xen/event.h>
-#include <xen/kernel.h>
-#include <public/hvm/hvm_info_table.h>
-#if CONFIG_PAGING_LEVELS >= 4
-#include <asm/shadow_64.h>
-#endif
-#ifdef CONFIG_VMX
-
-int vmcs_size;
-
-struct vmcs_struct *alloc_vmcs(void)
-{
-    struct vmcs_struct *vmcs;
-    u32 vmx_msr_low, vmx_msr_high;
-
-    rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high);
-    vmcs_size = vmx_msr_high & 0x1fff;
-    vmcs = alloc_xenheap_pages(get_order_from_bytes(vmcs_size));
-    memset((char *)vmcs, 0, vmcs_size); /* don't remove this */
-
-    vmcs->vmcs_revision_id = vmx_msr_low;
-    return vmcs;
-}
-
-static void free_vmcs(struct vmcs_struct *vmcs)
-{
-    int order;
-
-    order = get_order_from_bytes(vmcs_size);
-    free_xenheap_pages(vmcs, order);
-}
-
-static int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
-{
-    int error;
-
-    if ((error = __vmptrld(phys_ptr))) {
-        clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
-        return error;
-    }
-    set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
-    return 0;
-}
-
-#if 0
-static int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
-{
-    /* take the current VMCS */
-    __vmptrst(phys_ptr);
-    clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
-    return 0;
-}
-#endif
-
-static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx)
-{
-    int error = 0;
-    void *io_bitmap_a;
-    void *io_bitmap_b;
-
-    error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
-                       MONITOR_PIN_BASED_EXEC_CONTROLS);
-
-    error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
-
-    error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
-
-    /* need to use 0x1000 instead of PAGE_SIZE */
-    io_bitmap_a = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000));
-    io_bitmap_b = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000));
-    memset(io_bitmap_a, 0xff, 0x1000);
-    /* don't bother debug port access */
-    clear_bit(PC_DEBUG_PORT, io_bitmap_a);
-    memset(io_bitmap_b, 0xff, 0x1000);
-
-    error |= __vmwrite(IO_BITMAP_A, (u64) virt_to_phys(io_bitmap_a));
-    error |= __vmwrite(IO_BITMAP_B, (u64) virt_to_phys(io_bitmap_b));
-
-    arch_vmx->io_bitmap_a = io_bitmap_a;
-    arch_vmx->io_bitmap_b = io_bitmap_b;
-
-    return error;
-}
-
-#define GUEST_LAUNCH_DS         0x08
-#define GUEST_LAUNCH_CS         0x10
-#define GUEST_SEGMENT_LIMIT     0xffffffff
-#define HOST_SEGMENT_LIMIT      0xffffffff
-
-struct host_execution_env {
-    /* selectors */
-    unsigned short ldtr_selector;
-    unsigned short tr_selector;
-    unsigned short ds_selector;
-    unsigned short cs_selector;
-    /* limits */
-    unsigned short gdtr_limit;
-    unsigned short ldtr_limit;
-    unsigned short idtr_limit;
-    unsigned short tr_limit;
-    /* base */
-    unsigned long gdtr_base;
-    unsigned long ldtr_base;
-    unsigned long idtr_base;
-    unsigned long tr_base;
-    unsigned long ds_base;
-    unsigned long cs_base;
-#ifdef __x86_64__
-    unsigned long fs_base;
-    unsigned long gs_base;
-#endif
-};
-
-static void vmx_map_io_shared_page(struct domain *d)
-{
-    int i;
-    unsigned char e820_map_nr;
-    struct e820entry *e820entry;
-    unsigned char *p;
-    unsigned long mpfn;
-    unsigned long gpfn = 0;
-
-    local_flush_tlb_pge();
-
-    mpfn = get_mfn_from_pfn(E820_MAP_PAGE >> PAGE_SHIFT);
-    if (mpfn == INVALID_MFN) {
-        printk("Can not find E820 memory map page for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-
-    p = map_domain_page(mpfn);
-    if (p == NULL) {
-        printk("Can not map E820 memory map page for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-
-    e820_map_nr = *(p + E820_MAP_NR_OFFSET);
-    e820entry = (struct e820entry *)(p + E820_MAP_OFFSET);
-
-    for ( i = 0; i < e820_map_nr; i++ )
-    {
-        if (e820entry[i].type == E820_SHARED_PAGE)
-        {
-            gpfn = (e820entry[i].addr >> PAGE_SHIFT);
-            break;
-        }
-    }
-
-    if ( gpfn == 0 ) {
-        printk("Can not get io request shared page"
-               " from E820 memory map for VMX domain.\n");
-        unmap_domain_page(p);
-        domain_crash_synchronous();
-    }
-    unmap_domain_page(p);
-
-    /* Initialise shared page */
-    mpfn = get_mfn_from_pfn(gpfn);
-    if (mpfn == INVALID_MFN) {
-        printk("Can not find io request shared page for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-
-    p = map_domain_page_global(mpfn);
-    if (p == NULL) {
-        printk("Can not map io request shared page for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-    d->arch.vmx_platform.shared_page_va = (unsigned long)p;
-
-    VMX_DBG_LOG(DBG_LEVEL_1, "eport: %x\n", iopacket_port(d));
-
-    clear_bit(iopacket_port(d),
-              &d->shared_info->evtchn_mask[0]);
-}
-
-static int validate_hvm_info(struct hvm_info_table *t)
-{
-    char signature[] = "HVM INFO";
-    uint8_t *ptr = (uint8_t *)t;
-    uint8_t sum = 0;
-    int i;
-
-    /* strncmp(t->signature, "HVM INFO", 8) */
-    for ( i = 0; i < 8; i++ ) {
-        if ( signature[i] != t->signature[i] ) {
-            printk("Bad hvm info signature\n");
-            return 0;
-        }
-    }
-
-    for ( i = 0; i < t->length; i++ )
-        sum += ptr[i];
-
-    return (sum == 0);
-}
-
-static void vmx_get_hvm_info(struct domain *d)
-{
-    unsigned char *p;
-    unsigned long mpfn;
-    struct hvm_info_table *t;
-
-    mpfn = get_mfn_from_pfn(HVM_INFO_PFN);
-    if ( mpfn == INVALID_MFN ) {
-        printk("Can not get hvm info page mfn for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-
-    p = map_domain_page(mpfn);
-    if ( p == NULL ) {
-        printk("Can not map hvm info page for VMX domain.\n");
-        domain_crash_synchronous();
-    }
-
-    t = (struct hvm_info_table *)(p + HVM_INFO_OFFSET);
-
-    if ( validate_hvm_info(t) ) {
-        d->arch.vmx_platform.nr_vcpus = t->nr_vcpus;
-        d->arch.vmx_platform.apic_enabled = t->apic_enabled;
-    } else {
-        printk("Bad hvm info table\n");
-        d->arch.vmx_platform.nr_vcpus = 1;
-        d->arch.vmx_platform.apic_enabled = 0;
-    }
-
-    unmap_domain_page(p);
-}
-
-static void vmx_setup_platform(struct domain* d)
-{
-    struct vmx_platform *platform;
-
-    vmx_map_io_shared_page(d);
-    vmx_get_hvm_info(d);
-
-    platform = &d->arch.vmx_platform;
-    pic_init(&platform->vmx_pic,  pic_irq_request,
-             &platform->interrupt_request);
-    register_pic_io_hook();
-
-    if ( vmx_apic_support(d) ) {
-        spin_lock_init(&d->arch.vmx_platform.round_robin_lock);
-        vmx_vioapic_init(d);
-    }
-}
-
-static void vmx_set_host_env(struct vcpu *v)
-{
-    unsigned int tr, cpu, error = 0;
-    struct host_execution_env host_env;
-    struct Xgt_desc_struct desc;
-
-    cpu = smp_processor_id();
-    __asm__ __volatile__ ("sidt  (%0) \n" :: "a"(&desc) : "memory");
-    host_env.idtr_limit = desc.size;
-    host_env.idtr_base = desc.address;
-    error |= __vmwrite(HOST_IDTR_BASE, host_env.idtr_base);
-
-    __asm__ __volatile__ ("sgdt  (%0) \n" :: "a"(&desc) : "memory");
-    host_env.gdtr_limit = desc.size;
-    host_env.gdtr_base = desc.address;
-    error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
-
-    __asm__ __volatile__ ("str  (%0) \n" :: "a"(&tr) : "memory");
-    host_env.tr_selector = tr;
-    host_env.tr_limit = sizeof(struct tss_struct);
-    host_env.tr_base = (unsigned long) &init_tss[cpu];
-    error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
-    error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
-}
-
-static void vmx_do_launch(struct vcpu *v)
-{
-/* Update CR3, GDT, LDT, TR */
-    unsigned int  error = 0;
-    unsigned long cr0, cr4;
-    u64     host_tsc;
-
-    if (v->vcpu_id == 0)
-        vmx_setup_platform(v->domain);
-
-    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (cr0) : );
-
-    error |= __vmwrite(GUEST_CR0, cr0);
-    cr0 &= ~X86_CR0_PG;
-    error |= __vmwrite(CR0_READ_SHADOW, cr0);
-    error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
-                       MONITOR_CPU_BASED_EXEC_CONTROLS);
-
-    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (cr4) : );
-
-#ifdef __x86_64__
-    error |= __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
-#else
-    error |= __vmwrite(GUEST_CR4, cr4);
-#endif
-
-#ifdef __x86_64__
-    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
-#else
-    cr4 &= ~(X86_CR4_PGE | X86_CR4_VMXE);
-#endif
-    error |= __vmwrite(CR4_READ_SHADOW, cr4);
-
-    vmx_stts();
-
-    if(vmx_apic_support(v->domain))
-        vlapic_init(v);
-
-    vmx_set_host_env(v);
-    init_timer(&v->arch.arch_vmx.hlt_timer, hlt_timer_fn, v, v->processor);
-
-    error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
-    error |= __vmwrite(GUEST_LDTR_BASE, 0);
-    error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
-
-    error |= __vmwrite(GUEST_TR_BASE, 0);
-    error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
-
-    __vmwrite(GUEST_CR3, pagetable_get_paddr(v->domain->arch.phys_table));
-    __vmwrite(HOST_CR3, pagetable_get_paddr(v->arch.monitor_table));
-    __vmwrite(HOST_RSP, (unsigned long)get_stack_bottom());
-
-    v->arch.schedule_tail = arch_vmx_do_resume;
-    v->arch.arch_vmx.launch_cpu = smp_processor_id();
-
-    /* init guest tsc to start from 0 */
-    rdtscll(host_tsc);
-    v->arch.arch_vmx.tsc_offset = 0 - host_tsc;
-    set_tsc_shift (v, &v->domain->arch.vmx_platform.vmx_pit);
-}
-
-/*
- * Initially set the same environement as host.
- */
-static inline int construct_init_vmcs_guest(cpu_user_regs_t *regs)
-{
-    int error = 0;
-    union vmcs_arbytes arbytes;
-    unsigned long dr7;
-    unsigned long eflags;
-
-    /* MSR */
-    error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
-    error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
-
-    error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
-    error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
-    error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
-    /* interrupt */
-    error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
-    /* mask */
-    error |= __vmwrite(CR0_GUEST_HOST_MASK, -1UL);
-    error |= __vmwrite(CR4_GUEST_HOST_MASK, -1UL);
-
-    error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
-    error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
-
-    /* TSC */
-    error |= __vmwrite(CR3_TARGET_COUNT, 0);
-
-    /* Guest Selectors */
-    error |= __vmwrite(GUEST_ES_SELECTOR, GUEST_LAUNCH_DS);
-    error |= __vmwrite(GUEST_SS_SELECTOR, GUEST_LAUNCH_DS);
-    error |= __vmwrite(GUEST_DS_SELECTOR, GUEST_LAUNCH_DS);
-    error |= __vmwrite(GUEST_FS_SELECTOR, GUEST_LAUNCH_DS);
-    error |= __vmwrite(GUEST_GS_SELECTOR, GUEST_LAUNCH_DS);
-    error |= __vmwrite(GUEST_CS_SELECTOR, GUEST_LAUNCH_CS);
-
-    /* Guest segment bases */
-    error |= __vmwrite(GUEST_ES_BASE, 0);
-    error |= __vmwrite(GUEST_SS_BASE, 0);
-    error |= __vmwrite(GUEST_DS_BASE, 0);
-    error |= __vmwrite(GUEST_FS_BASE, 0);
-    error |= __vmwrite(GUEST_GS_BASE, 0);
-    error |= __vmwrite(GUEST_CS_BASE, 0);
-
-    /* Guest segment Limits */
-    error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
-    error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
-    error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
-    error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
-    error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
-    error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
-
-    /* Guest segment AR bytes */
-    arbytes.bytes = 0;
-    arbytes.fields.seg_type = 0x3;          /* type = 3 */
-    arbytes.fields.s = 1;                   /* code or data, i.e. not system */
-    arbytes.fields.dpl = 0;                 /* DPL = 3 */
-    arbytes.fields.p = 1;                   /* segment present */
-    arbytes.fields.default_ops_size = 1;    /* 32-bit */
-    arbytes.fields.g = 1;
-    arbytes.fields.null_bit = 0;            /* not null */
-
-    error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
-    error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
-    error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
-    error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
-    error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
-
-    arbytes.fields.seg_type = 0xb;          /* type = 0xb */
-    error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
-
-    /* Guest GDT */
-    error |= __vmwrite(GUEST_GDTR_BASE, 0);
-    error |= __vmwrite(GUEST_GDTR_LIMIT, 0);
-
-    /* Guest IDT */
-    error |= __vmwrite(GUEST_IDTR_BASE, 0);
-    error |= __vmwrite(GUEST_IDTR_LIMIT, 0);
-
-    /* Guest LDT & TSS */
-    arbytes.fields.s = 0;                   /* not code or data segement */
-    arbytes.fields.seg_type = 0x2;          /* LTD */
-    arbytes.fields.default_ops_size = 0;    /* 16-bit */
-    arbytes.fields.g = 0;
-    error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
-
-    arbytes.fields.seg_type = 0xb;          /* 32-bit TSS (busy) */
-    error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
-    /* CR3 is set in vmx_final_setup_guest */
-
-    error |= __vmwrite(GUEST_RSP, 0);
-    error |= __vmwrite(GUEST_RIP, regs->eip);
-
-    /* Guest EFLAGS */
-    eflags = regs->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
-    eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
-    error |= __vmwrite(GUEST_RFLAGS, eflags);
-
-    error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
-    __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
-    error |= __vmwrite(GUEST_DR7, dr7);
-    error |= __vmwrite(VMCS_LINK_POINTER, 0xffffffff);
-    error |= __vmwrite(VMCS_LINK_POINTER_HIGH, 0xffffffff);
-
-    return error;
-}
-
-static inline int construct_vmcs_host()
-{
-    int error = 0;
-#ifdef __x86_64__
-    unsigned long fs_base;
-    unsigned long gs_base;
-#endif
-    unsigned long crn;
-
-    /* Host Selectors */
-    error |= __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
-    error |= __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
-    error |= __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
-#if defined (__i386__)
-    error |= __vmwrite(HOST_FS_SELECTOR, __HYPERVISOR_DS);
-    error |= __vmwrite(HOST_GS_SELECTOR, __HYPERVISOR_DS);
-    error |= __vmwrite(HOST_FS_BASE, 0);
-    error |= __vmwrite(HOST_GS_BASE, 0);
-
-#else
-    rdmsrl(MSR_FS_BASE, fs_base);
-    rdmsrl(MSR_GS_BASE, gs_base);
-    error |= __vmwrite(HOST_FS_BASE, fs_base);
-    error |= __vmwrite(HOST_GS_BASE, gs_base);
-
-#endif
-    error |= __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
-
-    __asm__ __volatile__ ("mov %%cr0,%0" : "=r" (crn) : );
-    error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
-
-    /* CR3 is set in vmx_final_setup_hostos */
-    __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) : );
-    error |= __vmwrite(HOST_CR4, crn);
-
-    error |= __vmwrite(HOST_RIP, (unsigned long) vmx_asm_vmexit_handler);
-#ifdef __x86_64__
-    /* TBD: support cr8 for 64-bit guest */
-    __vmwrite(VIRTUAL_APIC_PAGE_ADDR, 0);
-    __vmwrite(TPR_THRESHOLD, 0);
-    __vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
-#endif
-
-    return error;
-}
-
-/*
- * Need to extend to support full virtualization.
- */
-static int construct_vmcs(struct arch_vmx_struct *arch_vmx,
-                          cpu_user_regs_t *regs)
-{
-    int error;
-    long rc;
-    u64 vmcs_phys_ptr;
-
-    memset(arch_vmx, 0, sizeof(struct arch_vmx_struct));
-    /*
-     * Create a new VMCS
-     */
-    if (!(arch_vmx->vmcs = alloc_vmcs())) {
-        printk("Failed to create a new VMCS\n");
-        rc = -ENOMEM;
-        goto err_out;
-    }
-    vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
-
-    if ((error = __vmpclear(vmcs_phys_ptr))) {
-        printk("construct_vmcs: VMCLEAR failed\n");
-        rc = -EINVAL;
-        goto err_out;
-    }
-    if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
-        printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
-               (unsigned long) vmcs_phys_ptr);
-        rc = -EINVAL;
-        goto err_out;
-    }
-    if ((error = construct_vmcs_controls(arch_vmx))) {
-        printk("construct_vmcs: construct_vmcs_controls failed\n");
-        rc = -EINVAL;
-        goto err_out;
-    }
-    /* host selectors */
-    if ((error = construct_vmcs_host())) {
-        printk("construct_vmcs: construct_vmcs_host failed\n");
-        rc = -EINVAL;
-        goto err_out;
-    }
-    /* guest selectors */
-    if ((error = construct_init_vmcs_guest(regs))) {
-        printk("construct_vmcs: construct_vmcs_guest failed\n");
-        rc = -EINVAL;
-        goto err_out;
-    }
-    if ((error |= __vmwrite(EXCEPTION_BITMAP,
-                            MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
-        printk("construct_vmcs: setting Exception bitmap failed\n");
-        rc = -EINVAL;
-        goto err_out;
-    }
-
-    if (regs->eflags & EF_TF)
-        __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
-    else
-        __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB);
-
-    return 0;
-
-err_out:
-    destroy_vmcs(arch_vmx);
-    return rc;
-}
-
-void destroy_vmcs(struct arch_vmx_struct *arch_vmx)
-{
-    free_vmcs(arch_vmx->vmcs);
-    arch_vmx->vmcs = NULL;
-
-    free_xenheap_pages(arch_vmx->io_bitmap_a, get_order_from_bytes(0x1000));
-    arch_vmx->io_bitmap_a = NULL;
-
-    free_xenheap_pages(arch_vmx->io_bitmap_b, get_order_from_bytes(0x1000));
-    arch_vmx->io_bitmap_b = NULL;
-}
-
-/*
- * modify guest eflags and execption bitmap for gdb
- */
-int modify_vmcs(struct arch_vmx_struct *arch_vmx,
-                struct cpu_user_regs *regs)
-{
-    int error;
-    u64 vmcs_phys_ptr, old, old_phys_ptr;
-    vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
-
-    old_phys_ptr = virt_to_phys(&old);
-    __vmptrst(old_phys_ptr);
-    if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
-        printk("modify_vmcs: load_vmcs failed: VMCS = %lx\n",
-               (unsigned long) vmcs_phys_ptr);
-        return -EINVAL;
-    }
-    load_cpu_user_regs(regs);
-
-    __vmptrld(old_phys_ptr);
-
-    return 0;
-}
-
-void vm_launch_fail(unsigned long eflags)
-{
-    unsigned long error;
-    __vmread(VM_INSTRUCTION_ERROR, &error);
-    printk("<vm_launch_fail> error code %lx\n", error);
-    __vmx_bug(guest_cpu_user_regs());
-}
-
-void vm_resume_fail(unsigned long eflags)
-{
-    unsigned long error;
-    __vmread(VM_INSTRUCTION_ERROR, &error);
-    printk("<vm_resume_fail> error code %lx\n", error);
-    __vmx_bug(guest_cpu_user_regs());
-}
-
-void arch_vmx_do_resume(struct vcpu *v)
-{
-    if ( v->arch.arch_vmx.launch_cpu == smp_processor_id() )
-    {
-        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
-        vmx_do_resume(v);
-        reset_stack_and_jump(vmx_asm_do_resume);
-    }
-    else
-    {
-        __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
-        load_vmcs(&v->arch.arch_vmx, virt_to_phys(v->arch.arch_vmx.vmcs));
-        vmx_do_resume(v);
-        vmx_set_host_env(v);
-        v->arch.arch_vmx.launch_cpu = smp_processor_id();
-        reset_stack_and_jump(vmx_asm_do_relaunch);
-    }
-}
-
-void arch_vmx_do_launch(struct vcpu *v)
-{
-    int error;
-    cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
-
-    error = construct_vmcs(&v->arch.arch_vmx, regs);
-    if ( error < 0 )
-    {
-        if (v->vcpu_id == 0) {
-            printk("Failed to construct a new VMCS for BSP.\n");
-        } else {
-            printk("Failed to construct a new VMCS for AP %d\n", v->vcpu_id);
-        }
-        domain_crash_synchronous();
-    }
-    vmx_do_launch(v);
-    reset_stack_and_jump(vmx_asm_do_launch);
-}
-
-#endif /* CONFIG_VMX */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx.h
--- a/xen/include/asm-x86/vmx.h Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,534 +0,0 @@
-/*
- * vmx.h: VMX Architecture related definitions
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-#ifndef __ASM_X86_VMX_H__
-#define __ASM_X86_VMX_H__
-
-#include <xen/sched.h>
-#include <asm/types.h>
-#include <asm/regs.h>
-#include <asm/processor.h>
-#include <asm/vmx_vmcs.h>
-#include <asm/i387.h>
-
-#include <public/hvm/ioreq.h>
-
-extern int hvm_enabled;
-
-extern void vmx_asm_vmexit_handler(struct cpu_user_regs);
-extern void vmx_asm_do_resume(void);
-extern void vmx_asm_do_launch(void);
-extern void vmx_intr_assist(void);
-extern void pic_irq_request(int *interrupt_request, int level);
-
-extern void arch_vmx_do_launch(struct vcpu *);
-extern void arch_vmx_do_resume(struct vcpu *);
-
-extern unsigned int cpu_rev;
-
-/*
- * Need fill bits for SENTER
- */
-
-#define MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE         0x00000016
-
-#define MONITOR_PIN_BASED_EXEC_CONTROLS       \
-    ( \
-    MONITOR_PIN_BASED_EXEC_CONTROLS_RESERVED_VALUE |   \
-    PIN_BASED_EXT_INTR_MASK |   \
-    PIN_BASED_NMI_EXITING \
-    )
-
-#define MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE         0x0401e172
-
-#define _MONITOR_CPU_BASED_EXEC_CONTROLS \
-    ( \
-    MONITOR_CPU_BASED_EXEC_CONTROLS_RESERVED_VALUE |    \
-    CPU_BASED_HLT_EXITING | \
-    CPU_BASED_INVDPG_EXITING | \
-    CPU_BASED_MWAIT_EXITING | \
-    CPU_BASED_MOV_DR_EXITING | \
-    CPU_BASED_ACTIVATE_IO_BITMAP | \
-    CPU_BASED_USE_TSC_OFFSETING  | \
-    CPU_BASED_UNCOND_IO_EXITING \
-    )
-
-#define MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \
-    ( \
-    CPU_BASED_CR8_LOAD_EXITING | \
-    CPU_BASED_CR8_STORE_EXITING \
-    )
-
-#define MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE   0x0003edff
-
-#define MONITOR_VM_EXIT_CONTROLS_IA32E_MODE       0x00000200
-
-#define _MONITOR_VM_EXIT_CONTROLS                \
-    ( \
-    MONITOR_VM_EXIT_CONTROLS_RESERVED_VALUE |\
-    VM_EXIT_ACK_INTR_ON_EXIT \
-    )
-
-#if defined (__x86_64__)
-#define MONITOR_CPU_BASED_EXEC_CONTROLS \
-    ( \
-    _MONITOR_CPU_BASED_EXEC_CONTROLS | \
-    MONITOR_CPU_BASED_EXEC_CONTROLS_IA32E_MODE \
-    )
-#define MONITOR_VM_EXIT_CONTROLS \
-    ( \
-    _MONITOR_VM_EXIT_CONTROLS | \
-    MONITOR_VM_EXIT_CONTROLS_IA32E_MODE  \
-    )
-#else
-#define MONITOR_CPU_BASED_EXEC_CONTROLS \
-    _MONITOR_CPU_BASED_EXEC_CONTROLS 
-
-#define MONITOR_VM_EXIT_CONTROLS \
-    _MONITOR_VM_EXIT_CONTROLS
-#endif
-
-#define VM_ENTRY_CONTROLS_RESERVED_VALUE        0x000011ff
-#define VM_ENTRY_CONTROLS_IA32E_MODE            0x00000200
-#define MONITOR_VM_ENTRY_CONTROLS       VM_ENTRY_CONTROLS_RESERVED_VALUE 
-/*
- * Exit Reasons
- */
-#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
-
-#define EXIT_REASON_EXCEPTION_NMI       0
-#define EXIT_REASON_EXTERNAL_INTERRUPT  1
-
-#define EXIT_REASON_PENDING_INTERRUPT   7
-
-#define EXIT_REASON_TASK_SWITCH         9
-#define EXIT_REASON_CPUID               10
-#define EXIT_REASON_HLT                 12
-#define EXIT_REASON_INVLPG              14
-#define EXIT_REASON_RDPMC               15
-#define EXIT_REASON_RDTSC               16
-#define EXIT_REASON_VMCALL              18
-
-#define EXIT_REASON_CR_ACCESS           28
-#define EXIT_REASON_DR_ACCESS           29
-#define EXIT_REASON_IO_INSTRUCTION      30
-#define EXIT_REASON_MSR_READ            31
-#define EXIT_REASON_MSR_WRITE           32
-#define EXIT_REASON_MWAIT_INSTRUCTION   36
-
-/*
- * Interruption-information format
- */
-#define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
-#define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
-#define INTR_INFO_DELIEVER_CODE_MASK    0x800           /* 11 */
-#define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
-
-#define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
-#define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
-
-/*
- * Exit Qualifications for MOV for Control Register Access
- */
-#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control 
register */
-#define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
-#define TYPE_MOV_TO_CR                  (0 << 4) 
-#define TYPE_MOV_FROM_CR                (1 << 4)
-#define TYPE_CLTS                       (2 << 4)
-#define TYPE_LMSW   (3 << 4)
-#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose 
register */
-#define LMSW_SOURCE_DATA  (0xFFFF << 16) /* 16:31 lmsw source */
-#define REG_EAX                         (0 << 8) 
-#define REG_ECX                         (1 << 8) 
-#define REG_EDX                         (2 << 8) 
-#define REG_EBX                         (3 << 8) 
-#define REG_ESP                         (4 << 8) 
-#define REG_EBP                         (5 << 8) 
-#define REG_ESI                         (6 << 8) 
-#define REG_EDI                         (7 << 8) 
-#define REG_R8                         (8 << 8)
-#define REG_R9                         (9 << 8)
-#define REG_R10                        (10 << 8)
-#define REG_R11                        (11 << 8)
-#define REG_R12                        (12 << 8)
-#define REG_R13                        (13 << 8)
-#define REG_R14                        (14 << 8)
-#define REG_R15                        (15 << 8)
-
-/*
- * Exit Qualifications for MOV for Debug Register Access
- */
-#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug 
register */
-#define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
-#define TYPE_MOV_TO_DR                  (0 << 4) 
-#define TYPE_MOV_FROM_DR                (1 << 4)
-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose 
register */
- 
-#define EXCEPTION_BITMAP_DE     (1 << 0)        /* Divide Error */
-#define EXCEPTION_BITMAP_DB     (1 << 1)        /* Debug */
-#define EXCEPTION_BITMAP_NMI    (1 << 2)        /* NMI */
-#define EXCEPTION_BITMAP_BP     (1 << 3)        /* Breakpoint */
-#define EXCEPTION_BITMAP_OF     (1 << 4)        /* Overflow */
-#define EXCEPTION_BITMAP_BR     (1 << 5)        /* BOUND Range Exceeded */
-#define EXCEPTION_BITMAP_UD     (1 << 6)        /* Invalid Opcode */
-#define EXCEPTION_BITMAP_NM     (1 << 7)        /* Device Not Available */
-#define EXCEPTION_BITMAP_DF     (1 << 8)        /* Double Fault */
-/* reserved */
-#define EXCEPTION_BITMAP_TS     (1 << 10)       /* Invalid TSS */
-#define EXCEPTION_BITMAP_NP     (1 << 11)       /* Segment Not Present */
-#define EXCEPTION_BITMAP_SS     (1 << 12)       /* Stack-Segment Fault */
-#define EXCEPTION_BITMAP_GP     (1 << 13)       /* General Protection */
-#define EXCEPTION_BITMAP_PG     (1 << 14)       /* Page Fault */
-#define EXCEPTION_BITMAP_MF     (1 << 16)       /* x87 FPU Floating-Point 
Error (Math Fault)  */
-#define EXCEPTION_BITMAP_AC     (1 << 17)       /* Alignment Check */
-#define EXCEPTION_BITMAP_MC     (1 << 18)       /* Machine Check */
-#define EXCEPTION_BITMAP_XF     (1 << 19)       /* SIMD Floating-Point 
Exception */
-
-/* Pending Debug exceptions */
-
-#define PENDING_DEBUG_EXC_BP    (1 << 12)       /* break point */
-#define PENDING_DEBUG_EXC_BS    (1 << 14)       /* Single step */
-
-#ifdef XEN_DEBUGGER
-#define MONITOR_DEFAULT_EXCEPTION_BITMAP        \
-    ( EXCEPTION_BITMAP_PG |                     \
-      EXCEPTION_BITMAP_DB |                     \
-      EXCEPTION_BITMAP_BP |                     \
-      EXCEPTION_BITMAP_GP )
-#else
-#define MONITOR_DEFAULT_EXCEPTION_BITMAP        \
-    ( EXCEPTION_BITMAP_PG |                     \
-      EXCEPTION_BITMAP_GP )
-#endif
-
-/* These bits in the CR4 are owned by the host */
-#ifdef __i386__
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
-#else
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
-#endif
-
-#define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
-#define VMCLEAR_OPCODE  ".byte 0x66,0x0f,0xc7\n"        /* reg/opcode: /6 */
-#define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
-#define VMPTRLD_OPCODE  ".byte 0x0f,0xc7\n"             /* reg/opcode: /6 */
-#define VMPTRST_OPCODE  ".byte 0x0f,0xc7\n"             /* reg/opcode: /7 */
-#define VMREAD_OPCODE   ".byte 0x0f,0x78\n"
-#define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
-#define VMWRITE_OPCODE  ".byte 0x0f,0x79\n"
-#define VMXOFF_OPCODE   ".byte 0x0f,0x01,0xc4\n"
-#define VMXON_OPCODE    ".byte 0xf3,0x0f,0xc7\n"
-
-#define MODRM_EAX_06    ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
-#define MODRM_EAX_07    ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
-#define MODRM_EAX_ECX   ".byte 0xc1\n" /* [EAX], [ECX] */
-
-static inline int __vmptrld (u64 addr)
-{
-    unsigned long eflags;
-    __asm__ __volatile__ ( VMPTRLD_OPCODE
-                           MODRM_EAX_06
-                           :
-                           : "a" (&addr) 
-                           : "memory");
-
-    __save_flags(eflags);
-    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
-        return -1;
-    return 0;
-}
-
-static inline void __vmptrst (u64 addr)
-{
-    __asm__ __volatile__ ( VMPTRST_OPCODE
-                           MODRM_EAX_07
-                           :
-                           : "a" (&addr) 
-                           : "memory");
-}
-
-static inline int __vmpclear (u64 addr)
-{
-    unsigned long eflags;
-
-    __asm__ __volatile__ ( VMCLEAR_OPCODE
-                           MODRM_EAX_06
-                           :
-                           : "a" (&addr) 
-                           : "memory");
-    __save_flags(eflags);
-    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
-        return -1;
-    return 0;
-}
-
-#define __vmread(x, ptr) ___vmread((x), (ptr), sizeof(*(ptr)))
-
-static always_inline int ___vmread (const unsigned long field,  void *ptr, 
const int size)
-{
-    unsigned long eflags;
-    unsigned long ecx = 0;
-
-    __asm__ __volatile__ ( VMREAD_OPCODE
-                           MODRM_EAX_ECX       
-                           : "=c" (ecx)
-                           : "a" (field)
-                           : "memory");
-
-    switch (size) {
-    case 1:
-        *((u8 *) (ptr)) = ecx;
-        break;
-    case 2:
-        *((u16 *) (ptr)) = ecx;
-        break;
-    case 4:
-        *((u32 *) (ptr)) = ecx;
-        break;
-    case 8:
-        *((u64 *) (ptr)) = ecx;
-        break;
-    default:
-        domain_crash_synchronous();
-        break;
-    }
-
-    __save_flags(eflags);
-    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
-        return -1;
-    return 0;
-}
-
-
-static always_inline void __vmwrite_vcpu(struct vcpu *v, unsigned long field, 
unsigned long value)
-{
-    switch(field) {
-    case CR0_READ_SHADOW:
-       v->arch.arch_vmx.cpu_shadow_cr0 = value;
-       break;
-    case GUEST_CR0:
-       v->arch.arch_vmx.cpu_cr0 = value;
-       break;
-    case CPU_BASED_VM_EXEC_CONTROL:
-       v->arch.arch_vmx.cpu_based_exec_control = value;
-       break;
-    default:
-       printk("__vmwrite_cpu: invalid field %lx\n", field);
-       break;
-    }
-}
-
-static always_inline void __vmread_vcpu(struct vcpu *v, unsigned long field, 
unsigned long *value)
-{
-    switch(field) {
-    case CR0_READ_SHADOW:
-       *value = v->arch.arch_vmx.cpu_shadow_cr0;
-       break;
-    case GUEST_CR0:
-       *value = v->arch.arch_vmx.cpu_cr0;
-       break;
-    case CPU_BASED_VM_EXEC_CONTROL:
-       *value = v->arch.arch_vmx.cpu_based_exec_control;
-       break;
-    default:
-       printk("__vmread_cpu: invalid field %lx\n", field);
-       break;
-    }
-}
-
-static inline int __vmwrite (unsigned long field, unsigned long value)
-{
-    unsigned long eflags;
-    struct vcpu *v = current;
-
-    __asm__ __volatile__ ( VMWRITE_OPCODE
-                           MODRM_EAX_ECX
-                           :
-                           : "a" (field) , "c" (value)
-                           : "memory");
-    __save_flags(eflags);
-    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
-        return -1;
-
-    switch(field) {
-    case CR0_READ_SHADOW:
-    case GUEST_CR0:
-    case CPU_BASED_VM_EXEC_CONTROL:
-       __vmwrite_vcpu(v, field, value);
-       break;
-    }
-
-    return 0;
-}
-
-static inline int __vm_set_bit(unsigned long field, unsigned long mask)
-{
-        unsigned long tmp;
-        int err = 0;
-
-        err |= __vmread(field, &tmp);
-        tmp |= mask;
-        err |= __vmwrite(field, tmp);
-
-        return err;
-}
-
-static inline int __vm_clear_bit(unsigned long field, unsigned long mask)
-{
-        unsigned long tmp;
-        int err = 0;
-
-        err |= __vmread(field, &tmp);
-        tmp &= ~mask;
-        err |= __vmwrite(field, tmp);
-
-        return err;
-}
-
-static inline void __vmxoff (void)
-{
-    __asm__ __volatile__ ( VMXOFF_OPCODE 
-                           ::: "memory");
-}
-
-static inline int __vmxon (u64 addr)
-{
-    unsigned long eflags;
-
-    __asm__ __volatile__ ( VMXON_OPCODE
-                           MODRM_EAX_06
-                           :
-                           : "a" (&addr) 
-                           : "memory");
-    __save_flags(eflags);
-    if (eflags & X86_EFLAGS_ZF || eflags & X86_EFLAGS_CF)
-        return -1;
-    return 0;
-}
-
-/* Make sure that xen intercepts any FP accesses from current */
-static inline void vmx_stts(void)
-{
-    unsigned long cr0;
-    struct vcpu *v = current;
-
-    __vmread_vcpu(v, GUEST_CR0, &cr0);
-    if (!(cr0 & X86_CR0_TS)) {
-        __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS);
-    }
-
-    __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
-    if (!(cr0 & X86_CR0_TS))
-       __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_NM);
-}
-
-/* Works only for vcpu == current */
-static inline int vmx_paging_enabled(struct vcpu *v)
-{
-    unsigned long cr0;
-
-    __vmread_vcpu(v, CR0_READ_SHADOW, &cr0);
-    return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
-}
-
-#define VMX_INVALID_ERROR_CODE  -1
-
-static inline int __vmx_inject_exception(struct vcpu *v, int trap, int type, 
-                                         int error_code)
-{
-    unsigned long intr_fields;
-
-    /* Reflect it back into the guest */
-    intr_fields = (INTR_INFO_VALID_MASK | type | trap);
-    if (error_code != VMX_INVALID_ERROR_CODE) {
-        __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
-        intr_fields |= INTR_INFO_DELIEVER_CODE_MASK;
-     }
-    
-    __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
-    return 0;
-}
-
-static inline int vmx_inject_exception(struct vcpu *v, int trap, int 
error_code)
-{
-    return __vmx_inject_exception(v, trap, INTR_TYPE_EXCEPTION, error_code);
-}
-
-static inline int vmx_inject_extint(struct vcpu *v, int trap, int error_code)
-{
-    __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code);
-    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
-
-    return 0;
-}
-
-static inline int vmx_reflect_exception(struct vcpu *v)
-{
-    int error_code, vector;
-
-    __vmread(VM_EXIT_INTR_INFO, &vector);
-    if (vector & INTR_INFO_DELIEVER_CODE_MASK)
-        __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
-    else
-        error_code = VMX_INVALID_ERROR_CODE;
-    vector &= 0xff;
-
-#ifndef NDEBUG
-    {
-        unsigned long eip;
-
-        __vmread(GUEST_RIP, &eip);
-        VMX_DBG_LOG(DBG_LEVEL_1,
-                    "vmx_reflect_exception: eip = %lx, error_code = %x",
-                    eip, error_code);
-    }
-#endif /* NDEBUG */
-
-    vmx_inject_exception(v, vector, error_code);
-    return 0;
-}
-
-static inline unsigned int vmx_get_vcpu_nr(struct domain *d)
-{
-    return d->arch.vmx_platform.nr_vcpus;
-}
-
-static inline shared_iopage_t *get_sp(struct domain *d)
-{
-    return (shared_iopage_t *) d->arch.vmx_platform.shared_page_va;
-}
-
-static inline vcpu_iodata_t *get_vio(struct domain *d, unsigned long cpu)
-{
-    return &get_sp(d)->vcpu_iodata[cpu];
-}
-
-static inline int iopacket_port(struct domain *d)
-{
-    return get_sp(d)->sp_global.eport;
-}
-
-/* Prototypes */
-void load_cpu_user_regs(struct cpu_user_regs *regs);
-void store_cpu_user_regs(struct cpu_user_regs *regs);
-
-enum { VMX_COPY_IN = 0, VMX_COPY_OUT };
-int vmx_copy(void *buf, unsigned long laddr, int size, int dir);
-void pickup_deactive_ticks(struct vmx_virpit *vpit);
-
-#endif /* __ASM_X86_VMX_H__ */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_cpu.h
--- a/xen/include/asm-x86/vmx_cpu.h     Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,35 +0,0 @@
-/*
- * vmx_cpu.h: Virtual CPU state
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-#ifndef __ASM_X86_VMX_VMCS_H__
-#define __ASM_X86_VMX_VMCS_H__
-
-/*
- * Virtual CPU
- */
-struct arch_state_struct {
-    unsigned long       mode_flags; /* vm86, 32-bit, 64-bit, etc. */
-    /* debug registers */
-    /* MSRs */
-};
-
-#define VMX_MF_VM86     0
-#define VMX_MF_32       1
-#define VMX_MF_64       2
-
-#endif
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_intercept.h
--- a/xen/include/asm-x86/vmx_intercept.h       Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,65 +0,0 @@
-#ifndef _VMX_INTERCEPT_H
-#define _VMX_INTERCEPT_H
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/time.h>
-#include <xen/errno.h>
-#include <public/hvm/ioreq.h>
-
-#define MAX_IO_HANDLER              8
-
-#define VMX_PORTIO                  0
-#define VMX_MMIO                    1
-
-typedef int (*intercept_action_t)(ioreq_t *);
-typedef unsigned long (*vmx_mmio_read_t)(struct vcpu *v,
-                                         unsigned long addr,
-                                         unsigned long length);
-
-typedef void (*vmx_mmio_write_t)(struct vcpu *v,
-                                 unsigned long addr,
-                                 unsigned long length,
-                                 unsigned long val);
-
-typedef int (*vmx_mmio_check_t)(struct vcpu *v, unsigned long addr);
-
-struct io_handler {
-    int                 type;
-    unsigned long       addr;
-    unsigned long       size;
-    intercept_action_t  action;
-};
-
-struct vmx_io_handler {
-    int     num_slot;
-    struct  io_handler hdl_list[MAX_IO_HANDLER];
-};
-
-struct vmx_mmio_handler {
-    vmx_mmio_check_t check_handler;
-    vmx_mmio_read_t read_handler;
-    vmx_mmio_write_t write_handler;
-};
-
-/* global io interception point in HV */
-extern int vmx_io_intercept(ioreq_t *p, int type);
-extern int register_io_handler(unsigned long addr, unsigned long size,
-                               intercept_action_t action, int type);
-
-static inline int vmx_portio_intercept(ioreq_t *p)
-{
-    return vmx_io_intercept(p, VMX_PORTIO);
-}
-
-int vmx_mmio_intercept(ioreq_t *p);
-
-static inline int register_portio_handler(unsigned long addr,
-                                          unsigned long size,
-                                          intercept_action_t action)
-{
-    return register_io_handler(addr, size, action, VMX_PORTIO);
-}
-
-#endif /* _VMX_INTERCEPT_H */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_platform.h
--- a/xen/include/asm-x86/vmx_platform.h        Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,103 +0,0 @@
-/*
- * vmx_platform.h: VMX platform support
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef __ASM_X86_VMX_PLATFORM_H__
-#define __ASM_X86_VMX_PLATFORM_H__
-
-#include <public/xen.h>
-#include <asm/e820.h>
-#include <asm/vmx_vpit.h>
-#include <asm/vmx_intercept.h>
-#include <asm/vmx_vioapic.h>
-#include <asm/vmx_vpic.h>
-
-#define MAX_OPERAND_NUM 2
-
-#define mk_operand(size_reg, index, seg, flag) \
-    (((size_reg) << 24) | ((index) << 16) | ((seg) << 8) | (flag))
-
-#define operand_size(operand)   \
-    ((operand >> 24) & 0xFF)
-
-#define operand_index(operand)  \
-    ((operand >> 16) & 0xFF)
-
-/* for instruction.operand[].size */
-#define BYTE    1
-#define WORD    2
-#define LONG    4
-#define QUAD    8
-#define BYTE_64 16
-
-/* for instruction.operand[].flag */
-#define REGISTER    0x1
-#define MEMORY      0x2
-#define IMMEDIATE   0x4
-
-/* for instruction.flags */
-#define REPZ    0x1
-#define REPNZ   0x2
-#define OVERLAP 0x4
-
-#define INSTR_PIO   1
-#define INSTR_OR    2
-#define INSTR_AND   3
-#define INSTR_XOR   4
-#define INSTR_CMP   5
-#define INSTR_MOV   6
-#define INSTR_MOVS  7
-#define INSTR_MOVZX 8
-#define INSTR_MOVSX 9
-#define INSTR_STOS  10
-#define INSTR_TEST  11
-#define INSTR_BT    12
-
-struct instruction {
-    __s8    instr; /* instruction type */
-    __s16   op_size;    /* the operand's bit size, e.g. 16-bit or 32-bit */
-    __u64   immediate;
-    __u16   seg_sel;    /* segmentation selector */
-    __u32   operand[MAX_OPERAND_NUM];   /* order is AT&T assembly */
-    __u32   flags;
-};
-
-#define MAX_INST_LEN      32
-
-struct vmx_platform {
-    unsigned long          shared_page_va;
-    unsigned int           nr_vcpus;
-    unsigned int           apic_enabled;
-
-    struct vmx_virpit      vmx_pit;
-    struct vmx_io_handler  vmx_io_handler;
-    struct vmx_virpic      vmx_pic;
-    struct vmx_vioapic     vmx_vioapic;
-    unsigned char          round_info[256];
-    spinlock_t             round_robin_lock;
-    int                    interrupt_request;
-};
-
-extern void handle_mmio(unsigned long, unsigned long);
-extern void vmx_wait_io(void);
-extern void vmx_io_assist(struct vcpu *v);
-
-// XXX - think about this -- maybe use bit 30 of the mfn to signify an MMIO 
frame.
-#define mmio_space(gpa) (!VALID_MFN(get_mfn_from_pfn((gpa) >> PAGE_SHIFT)))
-
-#endif
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_vioapic.h
--- a/xen/include/asm-x86/vmx_vioapic.h Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,124 +0,0 @@
-/*
- *
- *  Copyright (C) 2001  MandrakeSoft S.A.
- *
- *    MandrakeSoft S.A.
- *    43, rue d'Aboukir
- *    75002 Paris - France
- *    http://www.linux-mandrake.com/
- *    http://www.mandrakesoft.com/
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public
- *  License along with this library; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#ifndef _IOAPIC_H_
-#define _IOAPIC_H_
-
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/smp.h>
-
-#ifndef __ia64__
-#define IOAPIC_VERSION_ID 0x11
-#else
-#define IOAPIC_VERSION_ID 0x21
-#endif
-
-#define IOAPIC_NUM_PINS 24
-#define MAX_LAPIC_NUM   32
-
-#define IOAPIC_LEVEL_TRIGGER 1
-
-#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
-#define IOAPIC_MEM_LENGTH            0x100
-
-#define IOAPIC_ENABLE_MASK  0x0
-#define IOAPIC_ENABLE_FLAG  (1 << IOAPIC_ENABLE_MASK)
-#define IOAPICEnabled(s)    (s->flags & IOAPIC_ENABLE_FLAG)
-
-#define IOAPIC_REG_SELECT  0x0
-#define IOAPIC_REG_WINDOW  0x10
-
-#ifdef __ia64__
-#define IOAPIC_REG_ASSERTION    0x20
-#define IOAPIC_REG_EOI          0x40
-#endif
-
-#ifndef __ia64__
-#define IOAPIC_REG_APIC_ID 0x0
-#define IOAPIC_REG_ARB_ID  0x2
-#endif
-
-#define IOAPIC_REG_VERSION 0x1
-
-typedef union RedirStatus
-{
-    uint64_t value;
-    struct {
-        uint8_t vector;
-        uint8_t deliver_mode:3;
-        uint8_t destmode:1;
-        uint8_t delivestatus:1;
-        uint8_t polarity:1;
-        uint8_t remoteirr:1;
-        uint8_t trigmod:1;
-        uint8_t mask:1;         /* interrupt mask*/
-        uint8_t reserve:7;
-#ifndef __ia64__
-        uint8_t reserved[4];
-        uint8_t dest_id;
-#else
-        uint8_t reserved[3];
-        uint16_t dest_id;
-#endif
-    } RedirForm;
-} RedirStatus;
-
-#define IOAPIC_MEM_LENGTH    0x100
-#define IOAPIC_ENABLE_MASK   0x0
-#define IOAPIC_ENABLE_FLAG   (1 << IOAPIC_ENABLE_MASK)
-#define MAX_LAPIC_NUM        32
-
-typedef struct vmx_vioapic {
-    uint32_t irr;
-    uint32_t isr;           /* This is used for level trigger */
-    uint32_t imr;
-    uint32_t ioregsel;
-    uint32_t flags;
-    uint32_t lapic_count;
-    uint32_t id;
-    uint32_t arb_id;
-    unsigned long base_address;
-    RedirStatus redirtbl[IOAPIC_NUM_PINS];
-    struct vlapic *lapic_info[MAX_LAPIC_NUM];
-    struct domain *domain;
-} vmx_vioapic_t;
-
-vmx_vioapic_t *vmx_vioapic_init(struct domain *d);
-
-void vmx_vioapic_do_irqs_clear(struct domain *d, uint16_t irqs);
-void vmx_vioapic_do_irqs(struct domain *d, uint16_t irqs);
-void vmx_vioapic_set_irq(struct domain *d, int irq, int level);
-
-int vmx_vioapic_add_lapic(struct vlapic *vlapic, struct vcpu *v);
-
-void ioapic_update_EOI(struct domain *d, int vector);
-
-#ifdef VMX_DOMAIN_SAVE_RESTORE
-void ioapic_save(QEMUFile* f, void* opaque);
-int ioapic_load(QEMUFile* f, void* opaque, int version_id);
-#endif
-
-#endif
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_vlapic.h
--- a/xen/include/asm-x86/vmx_vlapic.h  Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,253 +0,0 @@
-/*
- * vmx_vlapic.h: virtualize LAPIC definitions.
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef VMX_VLAPIC_H
-#define VMX_VLAPIC_H
-
-#include <asm/msr.h>
-#include <public/hvm/ioreq.h>
-
-#if defined(__i386__) || defined(__x86_64__)
-static inline int __fls(uint32_t word)
-{
-    int bit;
-
-    __asm__("bsrl %1,%0"
-      :"=r" (bit)
-      :"rm" (word));
-    return word ? bit : -1;
-}
-#else
-#define __fls(x)    generic_fls(x)
-static __inline__ int generic_fls(uint32_t x)
-{
-    int r = 31;
-
-    if (!x)
-        return -1;
-    if (!(x & 0xffff0000u)) {
-        x <<= 16;
-        r -= 16;
-    }
-    if (!(x & 0xff000000u)) {
-        x <<= 8;
-        r -= 8;
-    }
-    if (!(x & 0xf0000000u)) {
-        x <<= 4;
-        r -= 4;
-    }
-    if (!(x & 0xc0000000u)) {
-        x <<= 2;
-        r -= 2;
-    }
-    if (!(x & 0x80000000u)) {
-        x <<= 1;
-        r -= 1;
-    }
-    return r;
-}
-#endif
-
-static __inline__ int find_highest_bit(uint32_t *data, int length)
-{
-    while(length && !data[--length]);
-    return __fls(data[length]) +  32 * length;
-}
-
-#define VLAPIC(v)                       (v->arch.arch_vmx.vlapic)
-
-#define VAPIC_ID_MASK                   0xff
-#define VAPIC_LDR_MASK                  (VAPIC_ID_MASK << 24)
-#define VLAPIC_VERSION                  0x00050014
-
-#define VLAPIC_BASE_MSR_MASK            0x00000000fffff900ULL
-#define VLAPIC_BASE_MSR_INIT_BASE_ADDR  0xfee00000U
-#define VLAPIC_BASE_MSR_BASE_ADDR_MASK  0xfffff000U
-#define VLAPIC_BASE_MSR_INIT_VALUE      (VLAPIC_BASE_MSR_INIT_BASE_ADDR | \
-                                         MSR_IA32_APICBASE_ENABLE)
-#define VLOCAL_APIC_MEM_LENGTH          (1 << 12)
-
-#define VLAPIC_LVT_TIMER                0
-#define VLAPIC_LVT_THERMAL              1
-#define VLAPIC_LVT_PERFORM              2
-#define VLAPIC_LVT_LINT0                3
-#define VLAPIC_LVT_LINT1                4
-#define VLAPIC_LVT_ERROR                5
-#define VLAPIC_LVT_NUM                  6
-
-#define VLAPIC_LVT_BIT_MASK             (1 << 16)
-#define VLAPIC_LVT_BIT_VECTOR           0xff
-#define VLAPIC_LVT_BIT_DELIMOD          (0x7 << 8)
-#define VLAPIC_LVT_BIT_DELISTATUS       (1 << 12)
-#define VLAPIC_LVT_BIT_POLARITY         (1 << 13)
-#define VLAPIC_LVT_BIT_IRR              (1 << 14)
-#define VLAPIC_LVT_BIT_TRIG             (1 << 15)
-#define VLAPIC_LVT_TIMERMODE            (1 << 17)
-
-#define VLAPIC_DELIV_MODE_FIXED          0x0
-#define VLAPIC_DELIV_MODE_LPRI           0x1
-#define VLAPIC_DELIV_MODE_SMI            0x2
-#define VLAPIC_DELIV_MODE_RESERVED       0x3
-#define VLAPIC_DELIV_MODE_NMI            0x4
-#define VLAPIC_DELIV_MODE_INIT           0x5
-#define VLAPIC_DELIV_MODE_STARTUP        0x6
-#define VLAPIC_DELIV_MODE_EXT            0x7
-
-
-
-#define VLAPIC_NO_SHORTHAND             0x0
-#define VLAPIC_SHORTHAND_SELF           0x1
-#define VLAPIC_SHORTHAND_INCLUDE_SELF   0x2
-#define VLAPIC_SHORTHAND_EXCLUDE_SELF   0x3
-
-#define vlapic_lvt_timer_enabled(vlapic)    \
-  (!(vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_BIT_MASK))
-
-#define vlapic_lvt_vector(vlapic, type)   \
-  (vlapic->lvt[type] & VLAPIC_LVT_BIT_VECTOR)
-
-#define vlapic_lvt_dm(value)        ((value >> 8) && 7)
-#define vlapic_lvt_timer_period(vlapic) \
-  (vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_TIMERMODE)
-
-#define vlapic_isr_status(vlapic,vector)    \
-  test_bit(vector, &vlapic->isr[0])
-
-#define vlapic_irr_status(vlapic,vector)    \
-  test_bit(vector, &vlapic->irr[0])
-
-#define vlapic_set_isr(vlapic,vector) \
-  test_and_set_bit(vector, &vlapic->isr[0])
-
-#define vlapic_set_irr(vlapic,vector)      \
-  test_and_set_bit(vector, &vlapic->irr[0])
-
-#define vlapic_clear_irr(vlapic,vector)      \
-  clear_bit(vector, &vlapic->irr[0])
-#define vlapic_clear_isr(vlapic,vector)     \
-  clear_bit(vector, &vlapic->isr[0])
-
-#define vlapic_enabled(vlapic)               \
-  (!(vlapic->status &                           \
-     (VLAPIC_GLOB_DISABLE_MASK | VLAPIC_SOFTWARE_DISABLE_MASK)))
-
-#define vlapic_global_enabled(vlapic)               \
-  !(test_bit(_VLAPIC_GLOB_DISABLE, &(vlapic)->status))
-
-#define VLAPIC_IRR(t) ((t)->irr[0])
-#define VLAPIC_ID(t)  ((t)->id)
-
-typedef struct direct_intr_info {
-    int deliver_mode;
-    int source[6];
-} direct_intr_info_t;
-
-#define VLAPIC_INIT_SIPI_SIPI_STATE_NORM          0
-#define VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI     1
-
-struct vlapic
-{
-    //FIXME check what would be 64 bit on EM64T
-    uint32_t           version;
-#define _VLAPIC_GLOB_DISABLE            0x0
-#define VLAPIC_GLOB_DISABLE_MASK        0x1
-#define VLAPIC_SOFTWARE_DISABLE_MASK    0x2
-#define _VLAPIC_BSP_ACCEPT_PIC          0x3
-    uint32_t           status;
-    uint32_t           id;
-    uint32_t           vcpu_id;
-    unsigned long      base_address;
-    uint32_t           isr[8];
-    uint32_t           irr[INTR_LEN_32];
-    uint32_t           tmr[INTR_LEN_32];
-    uint32_t           task_priority;
-    uint32_t           processor_priority;
-    uint32_t           logical_dest;
-    uint32_t           dest_format;
-    uint32_t           spurious_vec;
-    uint32_t           lvt[6];
-    uint32_t           timer_initial;
-    uint32_t           timer_current;
-    uint32_t           timer_divconf;
-    uint32_t           timer_divide_counter;
-    struct timer    vlapic_timer;
-    int                intr_pending_count[MAX_VECTOR];
-    s_time_t           timer_current_update;
-    uint32_t           icr_high;
-    uint32_t           icr_low;
-    direct_intr_info_t direct_intr;
-    uint32_t           err_status;
-    unsigned long      init_ticks;
-    uint32_t           err_write_count;
-    uint64_t           apic_base_msr;
-    uint32_t           init_sipi_sipi_state;
-    struct vcpu        *vcpu;
-    struct domain      *domain;
-};
-
-static inline int vlapic_set_irq(struct vlapic *t, uint8_t vec, uint8_t trig)
-{
-    int ret;
-
-    ret = test_and_set_bit(vec, &t->irr[0]);
-    if (trig)
-       test_and_set_bit(vec, &t->tmr[0]);
-
-    /* We may need to wake up target vcpu, besides set pending bit here */
-    return ret;
-}
-
-static inline int  vlapic_timer_active(struct vlapic *vlapic)
-{
-    return  active_timer(&(vlapic->vlapic_timer));
-}
-
-int vlapic_find_highest_irr(struct vlapic *vlapic);
-
-int vlapic_find_highest_isr(struct vlapic *vlapic);
-
-static uint32_t inline vlapic_get_base_address(struct vlapic *vlapic)
-{
-    return (vlapic->apic_base_msr & VLAPIC_BASE_MSR_BASE_ADDR_MASK);
-}
-
-void vlapic_post_injection(struct vcpu* v, int vector, int deliver_mode);
-
-int cpu_get_apic_interrupt(struct vcpu* v, int *mode);
-
-extern uint32_t vlapic_update_ppr(struct vlapic *vlapic);
-
-int vlapic_update(struct vcpu *v);
-
-extern int vlapic_init(struct vcpu *vc);
-
-extern void vlapic_msr_set(struct vlapic *vlapic, uint64_t value);
-
-int vlapic_accept_pic_intr(struct vcpu *v);
-
-struct vlapic* apic_round_robin(struct domain *d,
-                                uint8_t dest_mode,
-                                uint8_t vector,
-                                uint32_t bitmap);
-s_time_t get_apictime_scheduled(struct vcpu *v);
-int vmx_apic_support(struct domain *d);
-
-#endif /* VMX_VLAPIC_H */
-
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,317 +0,0 @@
-/*
- * vmx_vmcs.h: VMCS related definitions
- * Copyright (c) 2004, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-#ifndef __ASM_X86_VMX_VMCS_H__
-#define __ASM_X86_VMX_VMCS_H__
-
-#include <asm/config.h>
-#include <asm/vmx_cpu.h>
-#include <asm/vmx_platform.h>
-#include <asm/vmx_vlapic.h>
-#include <public/hvm/vmx_assist.h>
-
-extern int start_vmx(void);
-extern void stop_vmx(void);
-
-#if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *n);
-void vmx_restore_msrs(struct vcpu *v);
-#else
-#define vmx_load_msrs(_n)          ((void)0)
-#define vmx_restore_msrs(_v)       ((void)0)
-#endif
-
-void vmx_final_setup_guest(struct vcpu *v);
-void vmx_relinquish_resources(struct vcpu *v);
-
-void vmx_enter_scheduler(void);
-
-enum {
-    VMX_CPU_STATE_PG_ENABLED=0,
-    VMX_CPU_STATE_PAE_ENABLED,
-    VMX_CPU_STATE_LME_ENABLED,
-    VMX_CPU_STATE_LMA_ENABLED,
-    VMX_CPU_STATE_ASSIST_ENABLED,
-};
-
-#define VMX_LONG_GUEST(ed)    \
-  (test_bit(VMX_CPU_STATE_LMA_ENABLED, &ed->arch.arch_vmx.cpu_state))
-
-struct vmcs_struct {
-    u32 vmcs_revision_id;
-    unsigned char data [0]; /* vmcs size is read from MSR */
-};
-
-extern int vmcs_size;
-
-enum {
-    VMX_INDEX_MSR_LSTAR = 0,
-    VMX_INDEX_MSR_STAR,
-    VMX_INDEX_MSR_CSTAR,
-    VMX_INDEX_MSR_SYSCALL_MASK,
-    VMX_INDEX_MSR_EFER,
-
-    VMX_MSR_COUNT,
-};
-
-struct msr_state{
-    unsigned long flags;
-    unsigned long msr_items[VMX_MSR_COUNT];
-    unsigned long shadow_gs;
-};
-
-struct mmio_op {
-    int                    flags;
-    int                    instr;       /* instruction */
-    unsigned long          operand[2];  /* operands */
-    unsigned long          immediate;   /* immediate portion */
-    struct cpu_user_regs   *inst_decoder_regs; /* current context */
-};
-
-#define PC_DEBUG_PORT   0x80
-
-struct arch_vmx_struct {
-    struct vmcs_struct      *vmcs;  /* VMCS pointer in virtual. */
-    unsigned int            launch_cpu; /* VMCS is valid on this CPU. */
-    unsigned long           flags;  /* VMCS flags */
-    unsigned long           cpu_cr0; /* copy of guest CR0 */
-    unsigned long           cpu_shadow_cr0; /* copy of guest read shadow CR0 */
-    unsigned long           cpu_cr2; /* save CR2 */
-    unsigned long           cpu_cr3;
-    unsigned long           cpu_state;
-    unsigned long           cpu_based_exec_control;
-    struct msr_state        msr_content;
-    struct mmio_op          mmio_op;  /* MMIO */
-    void                    *io_bitmap_a, *io_bitmap_b;
-    struct vlapic           *vlapic;
-    u64                     tsc_offset;
-    struct timer         hlt_timer;  /* hlt ins emulation wakeup timer */
-};
-
-#define vmx_schedule_tail(next)         \
-    (next)->thread.arch_vmx.arch_vmx_schedule_tail((next))
-
-#define VMX_DOMAIN(v)   ((v)->arch.arch_vmx.flags)
-
-#define ARCH_VMX_VMCS_LOADED    0       /* VMCS has been loaded and active */
-#define ARCH_VMX_VMCS_LAUNCH    1       /* Needs VMCS launch */
-#define ARCH_VMX_VMCS_RESUME    2       /* Needs VMCS resume */
-#define ARCH_VMX_IO_WAIT        3       /* Waiting for I/O completion */
-
-void vmx_do_resume(struct vcpu *);
-struct vmcs_struct *alloc_vmcs(void);
-int modify_vmcs(struct arch_vmx_struct *arch_vmx,
-                struct cpu_user_regs *regs);
-void destroy_vmcs(struct arch_vmx_struct *arch_vmx);
-void hlt_timer_fn(void *data);
-
-#define VMCS_USE_HOST_ENV       1
-#define VMCS_USE_SEPARATE_ENV   0
-
-/* this works for both 32bit & 64bit eflags filteration done in 
construct_init_vmcs_guest() */
-#define VMCS_EFLAGS_RESERVED_0          0xffc08028 /* bitmap for 0 */
-#define VMCS_EFLAGS_RESERVED_1          0x00000002 /* bitmap for 1 */
-
-extern int vmcs_version;
-
-#define CPU_BASED_VIRTUAL_INTR_PENDING  0x00000004
-#define CPU_BASED_USE_TSC_OFFSETING     0x00000008
-#define CPU_BASED_HLT_EXITING           0x00000080
-#define CPU_BASED_INVDPG_EXITING        0x00000200
-#define CPU_BASED_MWAIT_EXITING         0x00000400
-#define CPU_BASED_RDPMC_EXITING         0x00000800
-#define CPU_BASED_RDTSC_EXITING         0x00001000
-#define CPU_BASED_CR8_LOAD_EXITING      0x00080000
-#define CPU_BASED_CR8_STORE_EXITING     0x00100000
-#define CPU_BASED_TPR_SHADOW            0x00200000
-#define CPU_BASED_MOV_DR_EXITING        0x00800000
-#define CPU_BASED_UNCOND_IO_EXITING     0x01000000
-#define CPU_BASED_ACTIVATE_IO_BITMAP    0x02000000
-#define CPU_BASED_MONITOR_EXITING       0x20000000
-#define CPU_BASED_PAUSE_EXITING         0x40000000
-#define PIN_BASED_EXT_INTR_MASK 0x1
-#define PIN_BASED_NMI_EXITING   0x8
-
-#define VM_EXIT_ACK_INTR_ON_EXIT        0x00008000
-#define VM_EXIT_HOST_ADD_SPACE_SIZE     0x00000200
-
-
-/* VMCS Encordings */
-enum vmcs_field {
-    GUEST_ES_SELECTOR               = 0x00000800,
-    GUEST_CS_SELECTOR               = 0x00000802,
-    GUEST_SS_SELECTOR               = 0x00000804,
-    GUEST_DS_SELECTOR               = 0x00000806,
-    GUEST_FS_SELECTOR               = 0x00000808,
-    GUEST_GS_SELECTOR               = 0x0000080a,
-    GUEST_LDTR_SELECTOR             = 0x0000080c,
-    GUEST_TR_SELECTOR               = 0x0000080e,
-    HOST_ES_SELECTOR                = 0x00000c00,
-    HOST_CS_SELECTOR                = 0x00000c02,
-    HOST_SS_SELECTOR                = 0x00000c04,
-    HOST_DS_SELECTOR                = 0x00000c06,
-    HOST_FS_SELECTOR                = 0x00000c08,
-    HOST_GS_SELECTOR                = 0x00000c0a,
-    HOST_TR_SELECTOR                = 0x00000c0c,
-    IO_BITMAP_A                     = 0x00002000, 
-    IO_BITMAP_A_HIGH                = 0x00002001, 
-    IO_BITMAP_B                     = 0x00002002, 
-    IO_BITMAP_B_HIGH                = 0x00002003, 
-    VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
-    VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
-    VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
-    VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
-    VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
-    VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
-    TSC_OFFSET                      = 0x00002010,
-    TSC_OFFSET_HIGH                 = 0x00002011,
-    VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
-    VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
-    VMCS_LINK_POINTER               = 0x00002800,
-    VMCS_LINK_POINTER_HIGH          = 0x00002801,
-    GUEST_IA32_DEBUGCTL             = 0x00002802,
-    GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
-    PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
-    CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,   
-    EXCEPTION_BITMAP                = 0x00004004,
-    PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
-    PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
-    CR3_TARGET_COUNT                = 0x0000400a,
-    VM_EXIT_CONTROLS                = 0x0000400c,
-    VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
-    VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
-    VM_ENTRY_CONTROLS               = 0x00004012,
-    VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
-    VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
-    VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
-    VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
-    TPR_THRESHOLD                   = 0x0000401c,
-    SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
-    VM_INSTRUCTION_ERROR            = 0x00004400,
-    VM_EXIT_REASON                  = 0x00004402,
-    VM_EXIT_INTR_INFO               = 0x00004404,   
-    VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
-    IDT_VECTORING_INFO_FIELD        = 0x00004408,
-    IDT_VECTORING_ERROR_CODE        = 0x0000440a,
-    VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
-    VMX_INSTRUCTION_INFO            = 0x0000440e,
-    GUEST_ES_LIMIT                  = 0x00004800,
-    GUEST_CS_LIMIT                  = 0x00004802,
-    GUEST_SS_LIMIT                  = 0x00004804,
-    GUEST_DS_LIMIT                  = 0x00004806,
-    GUEST_FS_LIMIT                  = 0x00004808,
-    GUEST_GS_LIMIT                  = 0x0000480a,
-    GUEST_LDTR_LIMIT                = 0x0000480c,
-    GUEST_TR_LIMIT                  = 0x0000480e,
-    GUEST_GDTR_LIMIT                = 0x00004810,
-    GUEST_IDTR_LIMIT                = 0x00004812,
-    GUEST_ES_AR_BYTES               = 0x00004814,
-    GUEST_CS_AR_BYTES               = 0x00004816,
-    GUEST_SS_AR_BYTES               = 0x00004818,
-    GUEST_DS_AR_BYTES               = 0x0000481a,
-    GUEST_FS_AR_BYTES               = 0x0000481c,
-    GUEST_GS_AR_BYTES               = 0x0000481e,
-    GUEST_LDTR_AR_BYTES             = 0x00004820,
-    GUEST_TR_AR_BYTES               = 0x00004822,
-    GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
-    GUEST_SYSENTER_CS               = 0x0000482A,
-    HOST_IA32_SYSENTER_CS           = 0x00004c00,
-    CR0_GUEST_HOST_MASK             = 0x00006000,
-    CR4_GUEST_HOST_MASK             = 0x00006002,
-    CR0_READ_SHADOW                 = 0x00006004,
-    CR4_READ_SHADOW                 = 0x00006006,
-    CR3_TARGET_VALUE0               = 0x00006008, 
-    CR3_TARGET_VALUE1               = 0x0000600a, 
-    CR3_TARGET_VALUE2               = 0x0000600c, 
-    CR3_TARGET_VALUE3               = 0x0000600e, 
-    EXIT_QUALIFICATION              = 0x00006400,
-    GUEST_LINEAR_ADDRESS            = 0x0000640a,
-    GUEST_CR0                       = 0x00006800,
-    GUEST_CR3                       = 0x00006802,
-    GUEST_CR4                       = 0x00006804,
-    GUEST_ES_BASE                   = 0x00006806,
-    GUEST_CS_BASE                   = 0x00006808,
-    GUEST_SS_BASE                   = 0x0000680a,
-    GUEST_DS_BASE                   = 0x0000680c,
-    GUEST_FS_BASE                   = 0x0000680e,
-    GUEST_GS_BASE                   = 0x00006810,
-    GUEST_LDTR_BASE                 = 0x00006812,
-    GUEST_TR_BASE                   = 0x00006814,
-    GUEST_GDTR_BASE                 = 0x00006816,    
-    GUEST_IDTR_BASE                 = 0x00006818,
-    GUEST_DR7                       = 0x0000681a,
-    GUEST_RSP                       = 0x0000681c,
-    GUEST_RIP                       = 0x0000681e,
-    GUEST_RFLAGS                    = 0x00006820,
-    GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
-    GUEST_SYSENTER_ESP              = 0x00006824,
-    GUEST_SYSENTER_EIP              = 0x00006826,
-    HOST_CR0                        = 0x00006c00,
-    HOST_CR3                        = 0x00006c02,
-    HOST_CR4                        = 0x00006c04,
-    HOST_FS_BASE                    = 0x00006c06,
-    HOST_GS_BASE                    = 0x00006c08,
-    HOST_TR_BASE                    = 0x00006c0a,
-    HOST_GDTR_BASE                  = 0x00006c0c,
-    HOST_IDTR_BASE                  = 0x00006c0e,
-    HOST_IA32_SYSENTER_ESP          = 0x00006c10,
-    HOST_IA32_SYSENTER_EIP          = 0x00006c12,
-    HOST_RSP                        = 0x00006c14,
-    HOST_RIP                        = 0x00006c16,
-};
-
-#define VMX_DEBUG 1
-#if VMX_DEBUG
-#define DBG_LEVEL_0                 (1 << 0)
-#define DBG_LEVEL_1                 (1 << 1)
-#define DBG_LEVEL_2                 (1 << 2)
-#define DBG_LEVEL_3                 (1 << 3)
-#define DBG_LEVEL_IO                (1 << 4)
-#define DBG_LEVEL_VMMU              (1 << 5)
-#define DBG_LEVEL_VLAPIC            (1 << 6)
-#define DBG_LEVEL_VLAPIC_TIMER      (1 << 7)
-#define DBG_LEVEL_VLAPIC_INTERRUPT  (1 << 8)
-#define DBG_LEVEL_IOAPIC            (1 << 9)
-
-extern unsigned int opt_vmx_debug_level;
-#define VMX_DBG_LOG(level, _f, _a...)           \
-    if ((level) & opt_vmx_debug_level)          \
-        printk("[VMX:%d.%d] " _f "\n",          \
-               current->domain->domain_id, current->vcpu_id, ## _a)
-#else
-#define VMX_DBG_LOG(level, _f, _a...)
-#endif
-
-#define  __vmx_bug(regs)                                        \
-    do {                                                        \
-        printk("__vmx_bug at %s:%d\n", __FILE__, __LINE__);     \
-        show_registers(regs);                                   \
-        domain_crash_synchronous();                             \
-    } while (0)
-
-#endif /* ASM_X86_VMX_VMCS_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_vpic.h
--- a/xen/include/asm-x86/vmx_vpic.h    Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,85 +0,0 @@
-/*
- * QEMU System Emulator header
- * 
- * Copyright (c) 2003 Fabrice Bellard
- * Copyright (c) 2005 Intel Corp
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to 
deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef _VMX_VPIC_H
-#define _VMX_VPIC_H
-
-#define hw_error(x)  do {} while (0);
-
-
-/* i8259.c */
-typedef struct IOAPICState IOAPICState;
-typedef struct PicState {
-    uint8_t last_irr; /* edge detection */
-    uint8_t irr; /* interrupt request register */
-    uint8_t imr; /* interrupt mask register */
-    uint8_t isr; /* interrupt service register */
-    uint8_t priority_add; /* highest irq priority */
-    uint8_t irq_base;
-    uint8_t read_reg_select;
-    uint8_t poll;
-    uint8_t special_mask;
-    uint8_t init_state;
-    uint8_t auto_eoi;
-    uint8_t rotate_on_auto_eoi;
-    uint8_t special_fully_nested_mode;
-    uint8_t init4; /* true if 4 byte init */
-    uint8_t elcr; /* PIIX edge/trigger selection*/
-    uint8_t elcr_mask;
-    struct vmx_virpic *pics_state;
-} PicState;
-
-struct vmx_virpic {
-    /* 0 is master pic, 1 is slave pic */
-    /* XXX: better separation between the two pics */
-    PicState pics[2];
-    void (*irq_request)(int *opaque, int level);
-    void *irq_request_opaque;
-    /* IOAPIC callback support */
-    void (*alt_irq_func)(void *opaque, int irq_num, int level);
-    void *alt_irq_opaque;
-};
-
-
-void pic_set_irq(struct vmx_virpic *s, int irq, int level);
-void pic_set_irq_new(void *opaque, int irq, int level);
-void pic_init(struct vmx_virpic *s, 
-              void (*irq_request)(),
-              void *irq_request_opaque);
-void pic_set_alt_irq_func(struct vmx_virpic *s, 
-                          void(*alt_irq_func)(),
-                          void *alt_irq_opaque);
-int pic_read_irq(struct vmx_virpic *s);
-void pic_update_irq(struct vmx_virpic *s);
-uint32_t pic_intack_read(struct vmx_virpic *s);
-void register_pic_io_hook (void);
-int cpu_get_pic_interrupt(struct vcpu *v, int *type);
-int is_pit_irq(struct vcpu *v, int irq, int type);
-int is_irq_enabled(struct vcpu *v, int irq);
-void do_pic_irqs (struct vmx_virpic *s, uint16_t irqs);
-void do_pic_irqs_clear (struct vmx_virpic *s, uint16_t irqs);
-
-/* APIC */
-#endif  /* _VMX_VPIC_H */  
diff -r e4eb12a6e003 -r f1b361b05bf3 xen/include/asm-x86/vmx_vpit.h
--- a/xen/include/asm-x86/vmx_vpit.h    Mon Jan 30 17:51:35 2006
+++ /dev/null   Tue Jan 31 10:49:51 2006
@@ -1,56 +0,0 @@
-#ifndef _VMX_VIRPIT_H
-#define _VMX_VIRPIT_H
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/time.h>
-#include <xen/errno.h>
-#include <xen/timer.h>
-#include <asm/vmx_vmcs.h>
-#include <asm/vmx_vpic.h>
-
-#define PIT_FREQ 1193181
-
-#define LSByte 0
-#define MSByte 1
-#define LSByte_multiple 2
-#define MSByte_multiple 3
-
-struct vmx_virpit {
-    /* for simulation of counter 0 in mode 2*/
-    u64 period_cycles;                 /* pit frequency in cpu cycles */
-    u64 inject_point; /* the time inject virt intr */
-    u64 shift;  /* save the value of offset - drift */
-    s_time_t scheduled;                 /* scheduled timer interrupt */
-    struct timer pit_timer;  /* periodic timer for mode 2*/
-    unsigned int channel;  /* the pit channel, counter 0~2 */
-    unsigned int pending_intr_nr; /* the couner for pending timer interrupts */
-    u32 period;                /* pit frequency in ns */
-    int first_injected;                 /* flag to prevent shadow window */
-
-    /* virtual PIT state for handle related I/O */
-    int read_state;
-    int count_LSB_latched;
-    int count_MSB_latched;
-
-    unsigned int count;  /* the 16 bit channel count */
-    unsigned int init_val; /* the init value for the counter */
-};
-
-/* to hook the ioreq packet to get the PIT initializaiton info */
-extern void vmx_hooks_assist(struct vcpu *v);
-
-static __inline__ s_time_t get_pit_scheduled(
-    struct vcpu *v, 
-    struct vmx_virpit *vpit)
-{
-    if ( is_irq_enabled(v, 0) ) {
-        return vpit->scheduled;
-    }
-    else
-        return -1;
-}
-extern void set_tsc_shift(struct vcpu *v,struct vmx_virpit *vpit);
-
-#endif /* _VMX_VIRPIT_H_ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.