[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg



# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1230090754 -32400
# Node ID 07f26e047fbfef8d3be9ceb0c878d294fb9d945b
# Parent  9837303a4708cf0bd558efb6676cef095f9c4406
# Parent  e2f36d066b7b66a538bbe240d46f49bede51d9ed
merge with xen-unstable.hg
---
 xen/arch/x86/cpu/mcheck/p4.c                                      |  270 
 xen/arch/x86/cpu/mcheck/p6.c                                      |  118 
 xen/arch/x86/rwlock.c                                             |   28 
 xen/include/asm-x86/rwlock.h                                      |   71 
 extras/mini-os/Makefile                                           |    8 
 extras/mini-os/arch/x86/mm.c                                      |   13 
 extras/mini-os/fs-front.c                                         |   14 
 extras/mini-os/include/xenbus.h                                   |    3 
 extras/mini-os/kernel.c                                           |   26 
 extras/mini-os/xenbus/xenbus.c                                    |   11 
 tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in |    2 
 tools/firmware/hvmloader/hvmloader.c                              |   24 
 tools/firmware/rombios/rombios.c                                  | 4030 
+++++-----
 tools/firmware/rombios/rombios.h                                  |   70 
 tools/libxc/xc_dom_core.c                                         |    1 
 tools/libxc/xc_dom_x86.c                                          |    6 
 tools/libxc/xc_domain.c                                           |   14 
 tools/libxc/xc_domain_restore.c                                   |   16 
 tools/libxc/xc_domain_save.c                                      |   26 
 tools/libxc/xc_pm.c                                               |  138 
 tools/libxc/xc_private.h                                          |    3 
 tools/libxc/xc_ptrace.c                                           |   41 
 tools/libxc/xenctrl.h                                             |   48 
 tools/libxc/xg_private.c                                          |    1 
 tools/misc/xen-detect.c                                           |   24 
 tools/misc/xenpm.c                                                |  744 +
 tools/python/xen/lowlevel/acm/acm.c                               |    2 
 tools/python/xen/lowlevel/flask/flask.c                           |    1 
 tools/python/xen/lowlevel/xc/xc.c                                 |   11 
 tools/python/xen/lowlevel/xs/xs.c                                 |   17 
 tools/python/xen/xend/XendCheckpoint.py                           |    2 
 tools/python/xen/xend/XendConfig.py                               |    8 
 tools/python/xen/xend/XendDomainInfo.py                           |    9 
 tools/python/xen/xend/balloon.py                                  |   36 
 tools/python/xen/xend/server/blkif.py                             |   13 
 tools/xcutils/xc_save.c                                           |   12 
 tools/xenpmd/xenpmd.c                                             |    6 
 tools/xenstat/xentop/xentop.c                                     |    2 
 unmodified_drivers/linux-2.6/Makefile                             |    1 
 unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h |    2 
 unmodified_drivers/linux-2.6/overrides.mk                         |    1 
 unmodified_drivers/linux-2.6/scsifront/Kbuild                     |    6 
 unmodified_drivers/linux-2.6/scsifront/Makefile                   |    3 
 xen/arch/ia64/xen/cpufreq/cpufreq.c                               |    1 
 xen/arch/x86/Makefile                                             |    1 
 xen/arch/x86/acpi/cpu_idle.c                                      |   49 
 xen/arch/x86/acpi/cpufreq/cpufreq.c                               |   73 
 xen/arch/x86/acpi/cpufreq/powernow.c                              |   11 
 xen/arch/x86/apic.c                                               |   33 
 xen/arch/x86/cpu/amd.c                                            |    4 
 xen/arch/x86/cpu/intel.c                                          |    4 
 xen/arch/x86/cpu/mcheck/Makefile                                  |    3 
 xen/arch/x86/cpu/mcheck/amd_k8.c                                  |    4 
 xen/arch/x86/cpu/mcheck/k7.c                                      |    5 
 xen/arch/x86/cpu/mcheck/mce.c                                     |   34 
 xen/arch/x86/cpu/mcheck/mce.h                                     |   17 
 xen/arch/x86/cpu/mcheck/mce_intel.c                               |  632 +
 xen/arch/x86/cpu/mcheck/non-fatal.c                               |   25 
 xen/arch/x86/cpu/mcheck/p5.c                                      |    1 
 xen/arch/x86/cpu/mcheck/x86_mca.h                                 |   19 
 xen/arch/x86/domctl.c                                             |   45 
 xen/arch/x86/hvm/hvm.c                                            |   26 
 xen/arch/x86/hvm/svm/intr.c                                       |    3 
 xen/arch/x86/hvm/vmx/entry.S                                      |   14 
 xen/arch/x86/hvm/vmx/intr.c                                       |   15 
 xen/arch/x86/hvm/vmx/realmode.c                                   |   45 
 xen/arch/x86/hvm/vmx/vmcs.c                                       |   61 
 xen/arch/x86/hvm/vmx/vmx.c                                        |  304 
 xen/arch/x86/i8259.c                                              |    1 
 xen/arch/x86/io_apic.c                                            |   14 
 xen/arch/x86/irq.c                                                |   67 
 xen/arch/x86/mm/shadow/common.c                                   |    9 
 xen/arch/x86/mm/shadow/multi.c                                    |    9 
 xen/arch/x86/msi.c                                                |  215 
 xen/arch/x86/oprofile/nmi_int.c                                   |    9 
 xen/arch/x86/physdev.c                                            |   24 
 xen/arch/x86/platform_hypercall.c                                 |   10 
 xen/arch/x86/setup.c                                              |    8 
 xen/arch/x86/smpboot.c                                            |   25 
 xen/arch/x86/time.c                                               |  126 
 xen/arch/x86/traps.c                                              |   20 
 xen/arch/x86/x86_32/asm-offsets.c                                 |    4 
 xen/arch/x86/x86_64/asm-offsets.c                                 |    6 
 xen/arch/x86/x86_64/compat/entry.S                                |    1 
 xen/arch/x86/x86_emulate/x86_emulate.h                            |    1 
 xen/common/domain.c                                               |   35 
 xen/common/schedule.c                                             |   46 
 xen/common/spinlock.c                                             |    6 
 xen/common/sysctl.c                                               |   25 
 xen/drivers/acpi/pmstat.c                                         |  306 
 xen/drivers/cpufreq/Makefile                                      |    1 
 xen/drivers/cpufreq/cpufreq.c                                     |   48 
 xen/drivers/cpufreq/cpufreq_misc_governors.c                      |  158 
 xen/drivers/cpufreq/cpufreq_ondemand.c                            |  143 
 xen/drivers/passthrough/amd/pci_amd_iommu.c                       |   20 
 xen/drivers/passthrough/io.c                                      |   35 
 xen/drivers/passthrough/iommu.c                                   |   59 
 xen/drivers/passthrough/pci.c                                     |   95 
 xen/drivers/passthrough/vtd/iommu.c                               |  292 
 xen/drivers/video/vesa.c                                          |   18 
 xen/drivers/video/vga.c                                           |   19 
 xen/include/acpi/cpufreq/cpufreq.h                                |   21 
 xen/include/asm-ia64/linux-xen/asm/spinlock.h                     |    2 
 xen/include/asm-x86/apicdef.h                                     |    2 
 xen/include/asm-x86/config.h                                      |    2 
 xen/include/asm-x86/cpufeature.h                                  |    1 
 xen/include/asm-x86/hvm/hvm.h                                     |    2 
 xen/include/asm-x86/hvm/trace.h                                   |    1 
 xen/include/asm-x86/hvm/vcpu.h                                    |    1 
 xen/include/asm-x86/hvm/vmx/vmcs.h                                |   16 
 xen/include/asm-x86/hvm/vmx/vmx.h                                 |    1 
 xen/include/asm-x86/irq.h                                         |    1 
 xen/include/asm-x86/mach-default/irq_vectors.h                    |    4 
 xen/include/asm-x86/msi.h                                         |    9 
 xen/include/asm-x86/msr-index.h                                   |    6 
 xen/include/asm-x86/perfc_defn.h                                  |    3 
 xen/include/asm-x86/processor.h                                   |    2 
 xen/include/asm-x86/spinlock.h                                    |   54 
 xen/include/asm-x86/system.h                                      |   22 
 xen/include/asm-x86/time.h                                        |    2 
 xen/include/asm-x86/x86_32/system.h                               |   10 
 xen/include/asm-x86/x86_64/system.h                               |   10 
 xen/include/public/arch-x86/xen-mca.h                             |   15 
 xen/include/public/domctl.h                                       |   12 
 xen/include/public/hvm/params.h                                   |    5 
 xen/include/public/physdev.h                                      |    9 
 xen/include/public/sysctl.h                                       |   86 
 xen/include/public/trace.h                                        |    1 
 xen/include/xen/iommu.h                                           |    2 
 xen/include/xen/lib.h                                             |    2 
 xen/include/xen/pci.h                                             |   12 
 xen/include/xen/sched.h                                           |    1 
 xen/include/xen/spinlock.h                                        |    6 
 xen/include/xen/time.h                                            |    1 
 134 files changed, 6259 insertions(+), 3231 deletions(-)

diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/Makefile
--- a/extras/mini-os/Makefile   Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/Makefile   Wed Dec 24 12:52:34 2008 +0900
@@ -93,8 +93,12 @@ endif
 $(OBJ_DIR)/$(TARGET)_app.o: $(APP_OBJS) app.lds
        $(LD) -r -d $(LDFLAGS) -\( $^ -\) $(APP_LDLIBS) --undefined main -o $@
 
-$(OBJ_DIR)/$(TARGET): links $(OBJS) $(OBJ_DIR)/$(TARGET)_app.o arch_lib
-       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJ_DIR)/$(TARGET)_app.o $(OBJS) 
$(LDARCHLIB) $(LDLIBS) -o $@.o
+ifneq ($(APP_OBJS),)
+APP_O=$(OBJ_DIR)/$(TARGET)_app.o 
+endif
+
+$(OBJ_DIR)/$(TARGET): links $(OBJS) $(APP_O) arch_lib
+       $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) $(LDLIBS) 
-o $@.o
        $(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
        $(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
        gzip -f -9 -c $@ >$@.gz
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/arch/x86/mm.c      Wed Dec 24 12:52:34 2008 +0900
@@ -420,7 +420,9 @@ static unsigned long demand_map_area_sta
 #define DEMAND_MAP_PAGES ((2ULL << 30) / PAGE_SIZE)
 #endif
 
-#ifdef HAVE_LIBC
+#ifndef HAVE_LIBC
+#define HEAP_PAGES 0
+#else
 unsigned long heap, brk, heap_mapped, heap_end;
 #ifdef __x86_64__
 #define HEAP_PAGES ((128ULL << 30) / PAGE_SIZE)
@@ -591,7 +593,7 @@ void arch_init_mm(unsigned long* start_p
 void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p)
 {
 
-    unsigned long start_pfn, max_pfn;
+    unsigned long start_pfn, max_pfn, virt_pfns;
 
     printk("  _text:        %p\n", &_text);
     printk("  _etext:       %p\n", &_etext);
@@ -604,7 +606,12 @@ void arch_init_mm(unsigned long* start_p
     start_pfn = PFN_UP(to_phys(start_info.pt_base)) + 
                 start_info.nr_pt_frames + 3;
     max_pfn = start_info.nr_pages;
-   
+
+    /* We need room for demand mapping and heap, clip available memory */
+    virt_pfns = DEMAND_MAP_PAGES + HEAP_PAGES;
+    if (max_pfn + virt_pfns + 1 < max_pfn)
+        max_pfn = -(virt_pfns + 1);
+
     printk("  start_pfn:    %lx\n", start_pfn);
     printk("  max_pfn:      %lx\n", max_pfn);
 
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/fs-front.c
--- a/extras/mini-os/fs-front.c Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/fs-front.c Wed Dec 24 12:52:34 2008 +0900
@@ -867,18 +867,6 @@ moretodo:
     if(more) goto moretodo;
     
     in_irq = 0;
-}
-
-/* Small utility function to figure out our domain id */
-static domid_t get_self_id(void)
-{
-    char *dom_id;
-    domid_t ret; 
-
-    BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
-    sscanf(dom_id, "%d", &ret);
-
-    return ret;
 }
 
 static void alloc_request_table(struct fs_import *import)
@@ -1066,7 +1054,7 @@ static int init_fs_import(struct fs_impo
     unmask_evtchn(import->local_port);
 
     
-    self_id = get_self_id(); 
+    self_id = xenbus_get_self_id(); 
     /* Write the frontend info to a node in our Xenbus */
     sprintf(nodename, "/local/domain/%d/device/vfs/%d", 
                         self_id, import->import_id);
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/include/xenbus.h
--- a/extras/mini-os/include/xenbus.h   Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/include/xenbus.h   Wed Dec 24 12:52:34 2008 +0900
@@ -91,6 +91,9 @@ char* xenbus_printf(xenbus_transaction_t
                                   const char* fmt, ...)
                    __attribute__((__format__(printf, 4, 5)));
 
+/* Utility function to figure out our domain id */
+domid_t xenbus_get_self_id(void);
+
 /* Reset the XenBus system. */
 void fini_xenbus(void);
 
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/kernel.c   Wed Dec 24 12:52:34 2008 +0900
@@ -434,25 +434,25 @@ static void kbdfront_thread(void *p)
 
 static struct pcifront_dev *pci_dev;
 
+static void print_pcidev(unsigned int domain, unsigned int bus, unsigned int 
slot, unsigned int fun)
+{
+    unsigned int vendor, device, rev, class;
+
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
+    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
+
+    printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, 
slot, fun, class, vendor, device, rev);
+}
+
 static void pcifront_thread(void *p)
 {
-    void print(unsigned int domain, unsigned int bus, unsigned int slot, 
unsigned int fun)
-    {
-        unsigned int vendor, device, rev, class;
-
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
-        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
-
-        printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, 
bus, slot, fun, class, vendor, device, rev);
-    }
-
     pci_dev = init_pcifront(NULL);
     if (!pci_dev)
         return;
     printk("PCI devices:\n");
-    pcifront_scan(pci_dev, print);
+    pcifront_scan(pci_dev, print_pcidev);
 }
 
 static void fs_thread(void *p)
diff -r 9837303a4708 -r 07f26e047fbf extras/mini-os/xenbus/xenbus.c
--- a/extras/mini-os/xenbus/xenbus.c    Wed Dec 24 12:50:57 2008 +0900
+++ b/extras/mini-os/xenbus/xenbus.c    Wed Dec 24 12:52:34 2008 +0900
@@ -666,6 +666,17 @@ char* xenbus_printf(xenbus_transaction_t
     return xenbus_write(xbt,fullpath,val);
 }
 
+domid_t xenbus_get_self_id(void)
+{
+    char *dom_id;
+    domid_t ret;
+
+    BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
+    sscanf(dom_id, "%d", &ret);
+
+    return ret;
+}
+
 static void do_ls_test(const char *pre)
 {
     char **dirs, *msg;
diff -r 9837303a4708 -r 07f26e047fbf 
tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in
--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in Wed Dec 
24 12:50:57 2008 +0900
+++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in Wed Dec 
24 12:52:34 2008 +0900
@@ -83,7 +83,7 @@ READLINE_DEP = $$(READLINE_DIR)
 # -I. for config files.
 # -I${srcdir} for our headers.
 # -I$(srcdir)/../regformats for regdef.h.
-INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR)  
-I../../../../../libxc/
+INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR)  
-I../../../../../libxc/ -I../../../../../include/
 
 # M{H,T}_CFLAGS, if defined, has host- and target-dependent CFLAGS
 # from the config/ directory.
diff -r 9837303a4708 -r 07f26e047fbf tools/firmware/hvmloader/hvmloader.c
--- a/tools/firmware/hvmloader/hvmloader.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/firmware/hvmloader/hvmloader.c      Wed Dec 24 12:52:34 2008 +0900
@@ -269,6 +269,11 @@ static void pci_setup(void)
             printf("pci dev %02x:%x INT%c->IRQ%u\n",
                    devfn>>3, devfn&7, 'A'+pin-1, isa_irq);
         }
+
+        /* Enable bus mastering. */
+        cmd = pci_readw(devfn, PCI_COMMAND);
+        cmd |= PCI_COMMAND_MASTER;
+        pci_writew(devfn, PCI_COMMAND, cmd);
     }
 
     /* Assign iomem and ioport resources in descending order of size. */
@@ -534,6 +539,23 @@ static uint16_t init_xen_platform_io_bas
     }
 
     return bios_info->xen_pfiob;
+}
+
+/* Set up an empty TSS area for virtual 8086 mode to use. 
+ * The only important thing is that it musn't have any bits set 
+ * in the interrupt redirection bitmap, so all zeros will do.  */
+static void init_vm86_tss(void)
+{
+    uint32_t tss;
+    struct xen_hvm_param p;
+
+    tss = e820_malloc(128, 128);
+    memset((char *)tss, 0, 128);
+    p.domid = DOMID_SELF;
+    p.index = HVM_PARAM_VM86_TSS;
+    p.value = tss;
+    hypercall_hvm_op(HVMOP_set_param, &p);
+    printf("vm86 TSS at %08x\n", tss);
 }
 
 int main(void)
@@ -605,6 +627,8 @@ int main(void)
         printf("Loading ACPI ...\n");
         acpi_build_tables();
     }
+
+    init_vm86_tss();
 
     cmos_write_memory_size();
 
diff -r 9837303a4708 -r 07f26e047fbf tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/firmware/rombios/rombios.c  Wed Dec 24 12:52:34 2008 +0900
@@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $
+// $Id: rombios.c,v 1.221 2008/12/07 17:32:29 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2002  MandrakeSoft S.A.
@@ -22,9 +22,9 @@
 //
 //  You should have received a copy of the GNU Lesser General Public
 //  License along with this library; if not, write to the Free Software
-//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
-
-// ROM BIOS for use with Bochs/Plex x86 emulation environment
+//  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 
USA
+
+// ROM BIOS for use with Bochs/Plex86/QEMU emulation environment
 
 #define uint8_t unsigned char
 #define uint16_t unsigned short
@@ -81,10 +81,10 @@
 //
 // NOTES for El-Torito Boot (cbbochs@xxxxxxx)
 //   - CD-ROM booting is only available if ATA/ATAPI Driver is available
-//   - Current code is only able to boot mono-session cds 
+//   - Current code is only able to boot mono-session cds
 //   - Current code can not boot and emulate a hard-disk
 //     the bios will panic otherwise
-//   - Current code also use memory in EBDA segement. 
+//   - Current code also use memory in EBDA segement.
 //   - I used cmos byte 0x3D to store extended information on boot-device
 //   - Code has to be modified modified to handle multiple cdrom drives
 //   - Here are the cdrom boot failure codes:
@@ -102,13 +102,13 @@
 //      12 : can not read cd - boot image
 //
 //   ATA driver
-//   - EBDA segment. 
+//   - EBDA segment.
 //     I used memory starting at 0x121 in the segment
 //   - the translation policy is defined in cmos regs 0x39 & 0x3a
 //
 // TODO :
 //
-//   int74 
+//   int74
 //     - needs to be reworked.  Uses direct [bp] offsets. (?)
 //
 //   int13:
@@ -128,13 +128,13 @@
 //   - Implement remaining int13_cdemu functions (as defined by El-Torito 
specs)
 //   - cdrom drive is hardcoded to ide 0 device 1 in several places. see 
"FIXME ElTorito Hardcoded"
 //   - int13 Fix DL when emulating a cd. In that case DL is decremented before 
calling real int13.
-//     This is ok. But DL should be reincremented afterwards. 
+//     This is ok. But DL should be reincremented afterwards.
 //   - Fix all "FIXME ElTorito Various"
 //   - should be able to boot any cdrom instead of the first one
 //
 //   BCC Bug: find a generic way to handle the bug of #asm after an "if"  
(fixed in 0.16.7)
 
-#define DEBUG_ROMBIOS      0
+#include "rombios.h"
 
 #define DEBUG_ATA          0
 #define DEBUG_INT13_HD     0
@@ -159,7 +159,7 @@
 #define BX_USE_ATADRV    1
 #define BX_ELTORITO_BOOT 1
 
-#define BX_TCGBIOS       0              /* main switch for TCG BIOS ext. */
+#define BX_TCGBIOS       0   /* main switch for TCG BIOS ext. */
 
 #define BX_MAX_ATA_INTERFACES   4
 #define BX_MAX_ATA_DEVICES      (BX_MAX_ATA_INTERFACES*2)
@@ -183,14 +183,18 @@
 #define EBDA_SIZE          1              // In KiB
 #define BASE_MEM_IN_K   (640 - EBDA_SIZE)
 
-  // Define the application NAME
-#ifdef HVMASSIST
-#  define BX_APPNAME "HVMAssist"
-#elif PLEX86
-#  define BX_APPNAME "Plex86"
-#else
-#  define BX_APPNAME "Bochs"
-#endif
+/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
+#define IPL_TABLE_OFFSET     0x0300  /* offset from EBDA */
+#define IPL_TABLE_ENTRIES    8
+#define IPL_COUNT_OFFSET     0x0380  /* u16: number of valid table entries */
+#define IPL_SEQUENCE_OFFSET  0x0382  /* u16: next boot device */
+#define IPL_BOOTFIRST_OFFSET 0x0384  /* u16: user selected device */
+#define IPL_SIZE             0xff
+#define IPL_TYPE_FLOPPY      0x01
+#define IPL_TYPE_HARDDISK    0x02
+#define IPL_TYPE_CDROM       0x03
+#define IPL_TYPE_BEV         0x80
+
 
   // Sanity Checks
 #if BX_USE_ATADRV && BX_CPU<3
@@ -209,15 +213,10 @@
 #    error APM BIOS can only be used with 386+ cpu
 #endif
 
-#ifndef BX_SMP_PROCESSORS
-#define BX_SMP_PROCESSORS 1
-#    warning BX_SMP_PROCESSORS not defined, defaulting to 1
-#endif
-  
-#define PANIC_PORT  0x400
-#define PANIC_PORT2 0x401
-#define INFO_PORT   0x402
-#define DEBUG_PORT  0x403
+// define this if you want to make PCIBIOS working on a specific bridge only
+// undef enables PCIBIOS when at least one PCI device is found
+// i440FX is emulated by Bochs and QEMU
+#define PCI_FIXED_HOST_BRIDGE 0x12378086 ;; i440FX PCI bridge
 
 // #20  is dec 20
 // #$20 is hex 20 = 32
@@ -250,7 +249,7 @@ use16 286
 
 MACRO HALT
   ;; the HALT macro is called with the line number of the HALT call.
-  ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex 
+  ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex
   ;; to print a BX_PANIC message.  This will normally halt the simulation
   ;; with a message such as "BIOS panic at rombios.c, line 4091".
   ;; However, users can choose to make panics non-fatal and continue.
@@ -289,9 +288,9 @@ typedef unsigned long  Bit32u;
   void memsetb(seg,offset,value,count);
   void memcpyb(dseg,doffset,sseg,soffset,count);
   void memcpyd(dseg,doffset,sseg,soffset,count);
-  
+
   // memset of count bytes
-    void 
+    void
   memsetb(seg,offset,value,count)
     Bit16u seg;
     Bit16u offset;
@@ -301,14 +300,14 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push cx
       push es
       push di
-  
+
       mov  cx, 10[bp] ; count
-      cmp  cx, #0x00
+      test cx, cx
       je   memsetb_end
       mov  ax, 4[bp] ; segment
       mov  es, ax
@@ -318,19 +317,19 @@ typedef unsigned long  Bit32u;
       cld
       rep
        stosb
-  
+
   memsetb_end:
       pop di
       pop es
       pop cx
       pop ax
-  
+
     pop bp
   ASM_END
   }
-  
+
   // memcpy of count bytes
-    void 
+    void
   memcpyb(dseg,doffset,sseg,soffset,count)
     Bit16u dseg;
     Bit16u doffset;
@@ -341,16 +340,16 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push cx
       push es
       push di
       push ds
       push si
-  
+
       mov  cx, 12[bp] ; count
-      cmp  cx, #0x0000
+      test cx, cx
       je   memcpyb_end
       mov  ax, 4[bp] ; dsegment
       mov  es, ax
@@ -363,7 +362,7 @@ typedef unsigned long  Bit32u;
       cld
       rep
        movsb
-  
+
   memcpyb_end:
       pop si
       pop ds
@@ -371,14 +370,13 @@ typedef unsigned long  Bit32u;
       pop es
       pop cx
       pop ax
-  
+
     pop bp
   ASM_END
   }
 
-#if 0 
   // memcpy of count dword
-    void 
+    void
   memcpyd(dseg,doffset,sseg,soffset,count)
     Bit16u dseg;
     Bit16u doffset;
@@ -389,16 +387,16 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push cx
       push es
       push di
       push ds
       push si
-  
+
       mov  cx, 12[bp] ; count
-      cmp  cx, #0x0000
+      test cx, cx
       je   memcpyd_end
       mov  ax, 4[bp] ; dsegment
       mov  es, ax
@@ -411,7 +409,7 @@ typedef unsigned long  Bit32u;
       cld
       rep
        movsd
-  
+
   memcpyd_end:
       pop si
       pop ds
@@ -419,16 +417,15 @@ typedef unsigned long  Bit32u;
       pop es
       pop cx
       pop ax
-  
+
     pop bp
   ASM_END
   }
-#endif
 
   // read_dword and write_dword functions
   static Bit32u         read_dword();
   static void           write_dword();
-  
+
     Bit32u
   read_dword(seg, offset)
     Bit16u seg;
@@ -437,25 +434,24 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push bx
       push ds
       mov  ax, 4[bp] ; segment
       mov  ds, ax
       mov  bx, 6[bp] ; offset
       mov  ax, [bx]
-      inc  bx
-      inc  bx
+      add  bx, #2
       mov  dx, [bx]
       ;; ax = return value (word)
       ;; dx = return value (word)
       pop  ds
       pop  bx
-  
+
     pop  bp
   ASM_END
   }
-  
+
     void
   write_dword(seg, offset, data)
     Bit16u seg;
@@ -465,7 +461,7 @@ typedef unsigned long  Bit32u;
   ASM_START
     push bp
     mov  bp, sp
-  
+
       push ax
       push bx
       push ds
@@ -474,50 +470,49 @@ typedef unsigned long  Bit32u;
       mov  bx, 6[bp] ; offset
       mov  ax, 8[bp] ; data word
       mov  [bx], ax  ; write data word
-      inc  bx
-      inc  bx
+      add  bx, #2
       mov  ax, 10[bp] ; data word
       mov  [bx], ax  ; write data word
       pop  ds
       pop  bx
       pop  ax
-  
+
     pop  bp
   ASM_END
   }
-  
+
   // Bit32u (unsigned long) and long helper functions
   ASM_START
-  
+
   ;; and function
   landl:
   landul:
-    SEG SS 
+    SEG SS
       and ax,[di]
-    SEG SS 
+    SEG SS
       and bx,2[di]
     ret
-  
+
   ;; add function
   laddl:
   laddul:
-    SEG SS 
+    SEG SS
       add ax,[di]
-    SEG SS 
+    SEG SS
       adc bx,2[di]
     ret
-  
+
   ;; cmp function
   lcmpl:
   lcmpul:
     and eax, #0x0000FFFF
     shl ebx, #16
-    add eax, ebx
+    or  eax, ebx
     shr ebx, #16
     SEG SS
       cmp eax, dword ptr [di]
     ret
-  
+
   ;; sub function
   lsubl:
   lsubul:
@@ -526,26 +521,26 @@ typedef unsigned long  Bit32u;
     SEG SS
     sbb bx,2[di]
     ret
-  
+
   ;; mul function
   lmull:
   lmulul:
     and eax, #0x0000FFFF
     shl ebx, #16
-    add eax, ebx
+    or  eax, ebx
     SEG SS
     mul eax, dword ptr [di]
     mov ebx, eax
     shr ebx, #16
     ret
-  
+
   ;; dec function
   ldecl:
   ldecul:
     SEG SS
     dec dword ptr [bx]
     ret
-  
+
   ;; or function
   lorl:
   lorul:
@@ -554,31 +549,31 @@ typedef unsigned long  Bit32u;
     SEG SS
     or  bx,2[di]
     ret
-  
+
   ;; inc function
   lincl:
   lincul:
     SEG SS
     inc dword ptr [bx]
     ret
-  
+
   ;; tst function
   ltstl:
   ltstul:
     and eax, #0x0000FFFF
     shl ebx, #16
-    add eax, ebx
+    or  eax, ebx
     shr ebx, #16
     test eax, eax
     ret
-  
+
   ;; sr function
   lsrul:
     mov  cx,di
     jcxz lsr_exit
     and  eax, #0x0000FFFF
     shl  ebx, #16
-    add  eax, ebx
+    or   eax, ebx
   lsr_loop:
     shr  eax, #1
     loop lsr_loop
@@ -586,7 +581,7 @@ typedef unsigned long  Bit32u;
     shr  ebx, #16
   lsr_exit:
     ret
-  
+
   ;; sl function
   lsll:
   lslul:
@@ -594,15 +589,15 @@ typedef unsigned long  Bit32u;
     jcxz lsl_exit
     and  eax, #0x0000FFFF
     shl  ebx, #16
-    add  eax, ebx
-  lsl_loop: 
+    or   eax, ebx
+  lsl_loop:
     shl  eax, #1
     loop lsl_loop
     mov  ebx, eax
     shr  ebx, #16
   lsl_exit:
     ret
-  
+
   idiv_:
     cwd
     idiv bx
@@ -616,7 +611,7 @@ typedef unsigned long  Bit32u;
   ldivul:
     and  eax, #0x0000FFFF
     shl  ebx, #16
-    add  eax, ebx
+    or   eax, ebx
     xor  edx, edx
     SEG SS
     mov  bx,  2[di]
@@ -665,7 +660,7 @@ typedef struct {
     Bit8u  revision;
     Bit8u  checksum;
     } dpte_t;
- 
+
   typedef struct {
     Bit8u  iface;        // ISA or PCI
     Bit16u iobase1;      // IO Base 1
@@ -678,15 +673,15 @@ typedef struct {
     Bit8u  device;       // Detected type of attached devices (hd/cd/none)
     Bit8u  removable;    // Removable device flag
     Bit8u  lock;         // Locks for removable devices
-    // Bit8u  lba_capable;  // LBA capable flag - always yes for bochs devices
-    Bit8u  mode;         // transfert mode : PIO 16/32 bits - IRQ - ISADMA - 
PCIDMA
+    Bit8u  mode;         // transfer mode : PIO 16/32 bits - IRQ - ISADMA - 
PCIDMA
     Bit16u blksize;      // block size
 
     Bit8u  translation;  // type of translation
     chs_t  lchs;         // Logical CHS
     chs_t  pchs;         // Physical CHS
 
-    Bit32u sectors;      // Total sectors count
+    Bit32u sectors_low;  // Total sectors count
+    Bit32u sectors_high;
     } ata_device_t;
 
   typedef struct {
@@ -697,10 +692,10 @@ typedef struct {
     ata_device_t  devices[BX_MAX_ATA_DEVICES];
     //
     // map between (bios hd id - 0x80) and ata channels
-    Bit8u  hdcount, hdidmap[BX_MAX_ATA_DEVICES];                
+    Bit8u  hdcount, hdidmap[BX_MAX_ATA_DEVICES];
 
     // map between (bios cd id - 0xE0) and ata channels
-    Bit8u  cdcount, cdidmap[BX_MAX_ATA_DEVICES];                
+    Bit8u  cdcount, cdidmap[BX_MAX_ATA_DEVICES];
 
     // Buffer for DPTE table
     dpte_t dpte;
@@ -710,9 +705,9 @@ typedef struct {
     Bit32u trsfbytes;
 
     } ata_t;
-  
+
 #if BX_ELTORITO_BOOT
-  // ElTorito Device Emulation data 
+  // ElTorito Device Emulation data
   typedef struct {
     Bit8u  active;
     Bit8u  media;
@@ -723,20 +718,20 @@ typedef struct {
     Bit16u buffer_segment;
     Bit16u load_segment;
     Bit16u sector_count;
-    
+
     // Virtual device
     chs_t  vdevice;
     } cdemu_t;
 #endif // BX_ELTORITO_BOOT
-  
+
 #include "32bitgateway.h"
 
   // for access to EBDA area
-  //     The EBDA structure should conform to 
-  //     http://www.cybertrails.com/~fys/rombios.htm document
+  //     The EBDA structure should conform to
+  //     http://www.frontiernet.net/~fys/rombios.htm document
   //     I made the ata and cdemu structs begin at 0x121 in the EBDA seg
-  // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot 
-  // device tables are at 0x9ff00 -- 0x9ffff
+  // EBDA must be at most 768 bytes; it lives at EBDA_SEG, and the boot
+  // device tables are at EBDA_SEG:IPL_TABLE_OFFSET
   typedef struct {
     unsigned char ebda_size;
     unsigned char cmos_shutdown_status;
@@ -758,7 +753,7 @@ typedef struct {
 
     upcall_t upcall;
     } ebda_data_t;
-  
+
   #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
   #define EbdaData ((ebda_data_t *) 0)
 
@@ -772,7 +767,7 @@ typedef struct {
     Bit32u lba1;
     Bit32u lba2;
     } int13ext_t;
- 
+
   #define Int13Ext ((int13ext_t *) 0)
 
   // Disk Physical Table definition
@@ -798,7 +793,7 @@ typedef struct {
     Bit8u   reserved3;
     Bit8u   checksum;
     } dpt_t;
- 
+
   #define Int13DPT ((dpt_t *) 0)
 
 #endif // BX_USE_ATADRV
@@ -828,9 +823,9 @@ typedef struct {
     } r16;
   struct {
     Bit32u filler[4];
-    Bit8u  bl, bh; 
+    Bit8u  bl, bh;
     Bit16u filler1;
-    Bit8u  dl, dh; 
+    Bit8u  dl, dh;
     Bit16u filler2;
     Bit8u  cl, ch;
     Bit16u filler3;
@@ -863,6 +858,14 @@ typedef struct {
   Bit16u cs;
   flags_t flags;
   } iret_addr_t;
+
+typedef struct {
+  Bit16u type;
+  Bit16u flags;
+  Bit32u vector;
+  Bit32u description;
+  Bit32u reserved;
+  } ipl_entry_t;
 
 
 
@@ -903,8 +906,6 @@ static void           int70_function();
 static void           int70_function();
 static void           int74_function();
 static Bit16u         get_CS();
-//static Bit16u         get_DS();
-//static void           set_DS();
 static Bit16u         get_SS();
 static unsigned int   enqueue_key();
 static unsigned int   dequeue_key();
@@ -923,7 +924,10 @@ static void           keyboard_panic();
 static void           keyboard_panic();
 static void           shutdown_status_panic();
 static void           nmi_handler_msg();
-
+static void           delay_ticks();
+static void           delay_ticks_and_check_for_keystroke();
+
+static void           interactive_bootkey();
 static void           print_bios_banner();
 static void           print_boot_device();
 static void           print_boot_failure();
@@ -957,33 +961,9 @@ Bit16u cdrom_boot();
 
 #endif // BX_ELTORITO_BOOT
 
-static char bios_cvs_version_string[] = "$Revision: 1.138 $";
-static char bios_date_string[] = "$Date: 2005/05/07 15:55:26 $";
-
-static char CVSID[] = "$Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp 
$";
-
-/* Offset to skip the CVS $Id: prefix */ 
-#define bios_version_string  (CVSID + 4)
-
-#define BIOS_PRINTF_HALT     1
-#define BIOS_PRINTF_SCREEN   2
-#define BIOS_PRINTF_INFO     4
-#define BIOS_PRINTF_DEBUG    8
-#define BIOS_PRINTF_ALL      (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
-#define BIOS_PRINTF_DEBHALT  (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | 
BIOS_PRINTF_HALT)
-
-#define printf(format, p...)  bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
-
-// Defines the output macros. 
-// BX_DEBUG goes to INFO port until we can easily choose debug info on a 
-// per-device basis. Debug info are sent only in debug mode
-#if DEBUG_ROMBIOS
-#  define BX_DEBUG(format, p...)  bios_printf(BIOS_PRINTF_INFO, format, ##p)   
 
-#else
-#  define BX_DEBUG(format, p...) 
-#endif
-#define BX_INFO(format, p...)   bios_printf(BIOS_PRINTF_INFO, format, ##p)
-#define BX_PANIC(format, p...)  bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
+static char bios_cvs_version_string[] = "$Revision: 1.221 $ $Date: 2008/12/07 
17:32:29 $";
+
+#define BIOS_COPYRIGHT_STRING "(c) 2002 MandrakeSoft S.A. Written by Kevin 
Lawton & the Bochs team."
 
 #if DEBUG_ATA
 #  define BX_DEBUG_ATA(a...) BX_DEBUG(a)
@@ -1156,9 +1136,9 @@ static struct {
       { 0x5100, 0x5133, 0x7600,   none, 0x20 }, /* 3 PgDn */
       { 0x5200, 0x5230,   none,   none, 0x20 }, /* 0 Ins */
       { 0x5300, 0x532e,   none,   none, 0x20 }, /* Del */
-      {   none,   none,   none,   none, none }, /* ??? */
-      {   none,   none,   none,   none, none }, /* ??? */
-      {   none,   none,   none,   none, none }, /* ??? */
+      {   none,   none,   none,   none, none },
+      {   none,   none,   none,   none, none },
+      { 0x565c, 0x567c,   none,   none, none }, /* \| */
       { 0x8500, 0x8700, 0x8900, 0x8b00, none }, /* F11 */
       { 0x8600, 0x8800, 0x8a00, 0x8c00, none }, /* F12 */
       };
@@ -1415,31 +1395,6 @@ ASM_END
 ASM_END
 }
 
-//  Bit16u
-//get_DS()
-//{
-//ASM_START
-//  mov  ax, ds
-//ASM_END
-//}
-//
-//  void
-//set_DS(ds_selector)
-//  Bit16u ds_selector;
-//{
-//ASM_START
-//  push bp
-//  mov  bp, sp
-//
-//    push ax
-//    mov  ax, 4[bp] ; ds_selector
-//    mov  ds, ax
-//    pop  ax
-//
-//  pop  bp
-//ASM_END
-//}
-
   Bit16u
 get_SS()
 {
@@ -1455,7 +1410,7 @@ copy_e820_table()
   Bit8u nr_entries = read_byte(0x9000, 0x1e8);
   Bit32u base_mem;
   if (nr_entries > 32)
-       nr_entries = 32;
+       nr_entries = 32;
   write_word(0xe000, 0x8, nr_entries);
   memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
   /* Report the proper base memory size at address 0x0413: otherwise
@@ -1563,7 +1518,7 @@ wrch(c)
   pop  bp
   ASM_END
 }
- 
+
   void
 send(action, c)
   Bit16u action;
@@ -1619,14 +1574,121 @@ put_uint(action, val, width, neg)
   send(action, val - (nval * 10) + '0');
 }
 
+  void
+put_luint(action, val, width, neg)
+  Bit16u action;
+  unsigned long val;
+  short width;
+  bx_bool neg;
+{
+  unsigned long nval = val / 10;
+  if (nval)
+    put_luint(action, nval, width - 1, neg);
+  else {
+    while (--width > 0) send(action, ' ');
+    if (neg) send(action, '-');
+  }
+  send(action, val - (nval * 10) + '0');
+}
+
+void put_str(action, segment, offset)
+  Bit16u action;
+  Bit16u segment;
+  Bit16u offset;
+{
+  Bit8u c;
+
+  while (c = read_byte(segment, offset)) {
+    send(action, c);
+    offset++;
+  }
+}
+
+  void
+delay_ticks(ticks)
+  Bit16u ticks;
+{
+  long ticks_to_wait, delta;
+  Bit32u prev_ticks, t;
+
+   /*
+    * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
+    * We also have to be careful about interrupt storms.
+    */
+ASM_START
+  pushf
+  sti
+ASM_END
+  ticks_to_wait = ticks;
+  prev_ticks = read_dword(0x0, 0x46c);
+  do
+  {
+ASM_START
+    hlt
+ASM_END
+    t = read_dword(0x0, 0x46c);
+    if (t > prev_ticks)
+    {
+      delta = t - prev_ticks;     /* The temp var is required or bcc screws 
up. */
+      ticks_to_wait -= delta;
+    }
+    else if (t < prev_ticks)
+    {
+      ticks_to_wait -= t;         /* wrapped */
+    }
+
+    prev_ticks = t;
+  } while (ticks_to_wait > 0);
+ASM_START
+  cli
+  popf
+ASM_END
+}
+
+  Bit8u
+check_for_keystroke()
+{
+ASM_START
+  mov  ax, #0x100
+  int  #0x16
+  jz   no_key
+  mov  al, #1
+  jmp  done
+no_key:
+  xor  al, al
+done:
+ASM_END
+}
+
+  Bit8u
+get_keystroke()
+{
+ASM_START
+  mov  ax, #0x0
+  int  #0x16
+  xchg ah, al
+ASM_END
+}
+
+  void
+delay_ticks_and_check_for_keystroke(ticks, count)
+  Bit16u ticks, count;
+{
+  Bit16u i;
+  for (i = 1; i <= count; i++) {
+    delay_ticks(ticks);
+    if (check_for_keystroke())
+      break;
+  }
+}
+
 //--------------------------------------------------------------------------
 // bios_printf()
-//   A compact variable argument printf function which prints its output via
-//   an I/O port so that it can be logged by Bochs/Plex.  
-//   Currently, only %x is supported (or %02x, %04x, etc).
+//   A compact variable argument printf function.
 //
-//   Supports %[format_width][format]
-//   where format can be d,x,c,s
+//   Supports %[format_width][length]format
+//   where format can be x,X,u,d,s,S,c
+//   and the optional length modifier is l (ell)
 //--------------------------------------------------------------------------
   void
 bios_printf(action, s)
@@ -1637,7 +1699,7 @@ bios_printf(action, s)
   bx_bool  in_format;
   short i;
   Bit16u  *arg_ptr;
-  Bit16u   arg_seg, arg, nibble, shift_count, format_width;
+  Bit16u   arg_seg, arg, nibble, hibyte, shift_count, format_width, hexadd;
 
   arg_ptr = &s;
   arg_seg = get_SS();
@@ -1664,16 +1726,48 @@ bios_printf(action, s)
       else {
         arg_ptr++; // increment to next arg
         arg = read_word(arg_seg, arg_ptr);
-        if (c == 'x') {
+        if (c == 'x' || c == 'X') {
           if (format_width == 0)
             format_width = 4;
+          if (c == 'x')
+            hexadd = 'a';
+          else
+            hexadd = 'A';
           for (i=format_width-1; i>=0; i--) {
             nibble = (arg >> (4 * i)) & 0x000f;
-            send (action, (nibble<=9)? (nibble+'0') : (nibble-10+'A'));
+            send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
             }
           }
         else if (c == 'u') {
           put_uint(action, arg, format_width, 0);
+          }
+        else if (c == 'l') {
+          s++;
+          c = read_byte(get_CS(), s); /* is it ld,lx,lu? */
+          arg_ptr++; /* increment to next arg */
+          hibyte = read_word(arg_seg, arg_ptr);
+          if (c == 'd') {
+            if (hibyte & 0x8000)
+              put_luint(action, 0L-(((Bit32u) hibyte << 16) | arg), 
format_width-1, 1);
+            else
+              put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 
0);
+           }
+          else if (c == 'u') {
+            put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 0);
+           }
+          else if (c == 'x' || c == 'X')
+           {
+            if (format_width == 0)
+              format_width = 8;
+            if (c == 'x')
+              hexadd = 'a';
+            else
+              hexadd = 'A';
+            for (i=format_width-1; i>=0; i--) {
+              nibble = ((((Bit32u) hibyte <<16) | arg) >> (4 * i)) & 0x000f;
+              send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
+              }
+           }
           }
         else if (c == 'd') {
           if (arg & 0x8000)
@@ -1682,7 +1776,13 @@ bios_printf(action, s)
             put_int(action, arg, format_width, 0);
           }
         else if (c == 's') {
-          bios_printf(action & (~BIOS_PRINTF_HALT), arg);
+          put_str(action, get_CS(), arg);
+          }
+        else if (c == 'S') {
+          hibyte = arg;
+          arg_ptr++;
+          arg = read_word(arg_seg, arg_ptr);
+          put_str(action, hibyte, arg);
           }
         else if (c == 'c') {
           send(action, arg);
@@ -1699,7 +1799,7 @@ bios_printf(action, s)
     }
 
   if (action & BIOS_PRINTF_HALT) {
-    // freeze in a busy loop.  
+    // freeze in a busy loop.
 ASM_START
     cli
  halt2_loop:
@@ -1733,8 +1833,8 @@ keyboard_init()
             max = 0x2000;
             }
         }
-  
-    // Due to timer issues, and if the IPS setting is > 15000000, 
+
+    // Due to timer issues, and if the IPS setting is > 15000000,
     // the incoming keys might not be flushed here. That will
     // cause a panic a few lines below.  See sourceforge bug report :
     // [ 642031 ] FATAL: Keyboard RESET error:993
@@ -1871,12 +1971,11 @@ keyboard_panic(status)
 keyboard_panic(status)
   Bit16u status;
 {
-  // If you're getting a 993 keyboard panic here, 
+  // If you're getting a 993 keyboard panic here,
   // please see the comment in keyboard_init
-  
+
   BX_PANIC("Keyboard error:%u\n",status);
 }
-
 
 #define CMOS_SHUTDOWN_S3 0xFE
 //--------------------------------------------------------------------------
@@ -1932,6 +2031,11 @@ shutdown_status_panic(status)
   BX_PANIC("Unimplemented shutdown status: %02x\n",(Bit8u)status);
 }
 
+void s3_resume_panic()
+{
+  BX_PANIC("Returned from s3_resume.\n");
+}
+
 //--------------------------------------------------------------------------
 // print_bios_banner
 //   displays a the bios version
@@ -1939,108 +2043,197 @@ void
 void
 print_bios_banner()
 {
-  printf(BX_APPNAME" BIOS, %d cpu%s, ", BX_SMP_PROCESSORS, 
BX_SMP_PROCESSORS>1?"s":"");
-  printf("%s %s\n", bios_cvs_version_string, bios_date_string);
+  printf(BX_APPNAME" BIOS - build: %s\n%s\nOptions: ",
+    BIOS_BUILD_DATE, bios_cvs_version_string);
+  printf(
+#if BX_APM
+  "apmbios "
+#endif
+#if BX_PCIBIOS
+  "pcibios "
+#endif
+#if BX_ELTORITO_BOOT
+  "eltorito "
+#endif
+#if BX_ROMBIOS32
+  "rombios32 "
+#endif
 #if BX_TCGBIOS
-  printf("TCG-enabled BIOS.\n");
+  "TCG-enabled"
 #endif
-  printf("\n");
+  "\n\n");
 }
-
 
 //--------------------------------------------------------------------------
 // BIOS Boot Specification 1.0.1 compatibility
 //
-// Very basic support for the BIOS Boot Specification, which allows expansion 
-// ROMs to register themselves as boot devices, instead of just stealing the 
+// Very basic support for the BIOS Boot Specification, which allows expansion
+// ROMs to register themselves as boot devices, instead of just stealing the
 // INT 19h boot vector.
-// 
+//
 // This is a hack: to do it properly requires a proper PnP BIOS and we aren't
-// one; we just lie to the option ROMs to make them behave correctly. 
-// We also don't support letting option ROMs register as bootable disk 
-// drives (BCVs), only as bootable devices (BEVs). 
+// one; we just lie to the option ROMs to make them behave correctly.
+// We also don't support letting option ROMs register as bootable disk
+// drives (BCVs), only as bootable devices (BEVs).
 //
 // 
http://www.phoenix.com/en/Customer+Services/White+Papers-Specs/pc+industry+specifications.htm
 //--------------------------------------------------------------------------
 
-/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
-#define IPL_SEG              0x9ff0
-#define IPL_TABLE_OFFSET     0x0000
-#define IPL_TABLE_ENTRIES    8
-#define IPL_COUNT_OFFSET     0x0080  /* u16: number of valid table entries */
-#define IPL_SEQUENCE_OFFSET  0x0082  /* u16: next boot device */
-
-struct ipl_entry {
-  Bit16u type;
-  Bit16u flags;
-  Bit32u vector;
-  Bit32u description;
-  Bit32u reserved;
-};
-
-static void 
-init_boot_vectors() 
+static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
+
+static void
+init_boot_vectors()
 {
-  struct ipl_entry e; 
+  ipl_entry_t e;
   Bit16u count = 0;
   Bit16u ss = get_SS();
+  Bit16u ebda_seg = read_word(0x0040, 0x000E);
 
   /* Clear out the IPL table. */
-  memsetb(IPL_SEG, IPL_TABLE_OFFSET, 0, 0xff);
+  memsetb(ebda_seg, IPL_TABLE_OFFSET, 0, IPL_SIZE);
+
+  /* User selected device not set */
+  write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
 
   /* Floppy drive */
-  e.type = 1; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
-  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+  e.type = IPL_TYPE_FLOPPY; e.flags = 0; e.vector = 0; e.description = 0; 
e.reserved = 0;
+  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
   count++;
 
   /* First HDD */
-  e.type = 2; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
-  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+  e.type = IPL_TYPE_HARDDISK; e.flags = 0; e.vector = 0; e.description = 0; 
e.reserved = 0;
+  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
   count++;
 
 #if BX_ELTORITO_BOOT
   /* CDROM */
-  e.type = 3; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
-  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+  e.type = IPL_TYPE_CDROM; e.flags = 0; e.vector = 0; e.description = 0; 
e.reserved = 0;
+  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
   count++;
-#endif  
+#endif
 
   /* Remember how many devices we have */
-  write_word(IPL_SEG, IPL_COUNT_OFFSET, count);
+  write_word(ebda_seg, IPL_COUNT_OFFSET, count);
   /* Not tried booting anything yet */
-  write_word(IPL_SEG, IPL_SEQUENCE_OFFSET, 0xffff);
+  write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xffff);
 }
 
 static Bit8u
 get_boot_vector(i, e)
-Bit16u i; struct ipl_entry *e; 
+Bit16u i; ipl_entry_t *e;
 {
   Bit16u count;
   Bit16u ss = get_SS();
+  Bit16u ebda_seg = read_word(0x0040, 0x000E);
   /* Get the count of boot devices, and refuse to overrun the array */
-  count = read_word(IPL_SEG, IPL_COUNT_OFFSET);
+  count = read_word(ebda_seg, IPL_COUNT_OFFSET);
   if (i >= count) return 0;
   /* OK to read this device */
-  memcpyb(ss, e, IPL_SEG, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
+  memcpyb(ss, e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
   return 1;
 }
 
+#if BX_ELTORITO_BOOT
+  void
+interactive_bootkey()
+{
+  ipl_entry_t e;
+  Bit16u count;
+  char description[33];
+  Bit8u scan_code;
+  Bit8u i;
+  Bit16u ss = get_SS();
+  Bit16u valid_choice = 0;
+  Bit16u ebda_seg = read_word(0x0040, 0x000E);
+
+  while (check_for_keystroke())
+    get_keystroke();
+
+  printf("\nPress F12 for boot menu.\n\n");
+
+  delay_ticks_and_check_for_keystroke(11, 5); /* ~3 seconds */
+  if (check_for_keystroke())
+  {
+    scan_code = get_keystroke();
+    if (scan_code == 0x86) /* F12 */
+    {
+      while (check_for_keystroke())
+        get_keystroke();
+
+      printf("Select boot device:\n\n");
+
+      count = read_word(ebda_seg, IPL_COUNT_OFFSET);
+      for (i = 0; i < count; i++)
+      {
+        memcpyb(ss, &e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (e), sizeof 
(e));
+        printf("%d. ", i+1);
+        switch(e.type)
+        {
+          case IPL_TYPE_FLOPPY:
+          case IPL_TYPE_HARDDISK:
+          case IPL_TYPE_CDROM:
+            printf("%s\n", drivetypes[e.type]);
+            break;
+          case IPL_TYPE_BEV:
+            printf("%s", drivetypes[4]);
+            if (e.description != 0)
+            {
+              memcpyb(ss, &description, (Bit16u)(e.description >> 16), 
(Bit16u)(e.description & 0xffff), 32);
+              description[32] = 0;
+              printf(" [%S]", ss, description);
+           }
+           printf("\n");
+           break;
+        }
+      }
+
+      count++;
+      while (!valid_choice) {
+        scan_code = get_keystroke();
+        if (scan_code == 0x01 || scan_code == 0x58) /* ESC or F12 */
+        {
+          valid_choice = 1;
+        }
+        else if (scan_code <= count)
+        {
+          valid_choice = 1;
+          scan_code -= 1;
+          /* Set user selected device */
+          write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, scan_code);
+        }
+      }
+    printf("\n");
+    }
+  }
+}
+#endif // BX_ELTORITO_BOOT
 
 //--------------------------------------------------------------------------
 // print_boot_device
 //   displays the boot device
 //--------------------------------------------------------------------------
 
-static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
-
 void
-print_boot_device(type)
+print_boot_device(e)
+  ipl_entry_t *e;
+{
   Bit16u type;
-{
-  /* NIC appears as type 0x80 */ 
-  if (type == 0x80 ) type = 0x4;
-  if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n"); 
-  printf("Booting from %s...\n", drivetypes[type]);
+  char description[33];
+  Bit16u ss = get_SS();
+  type = e->type;
+  /* NIC appears as type 0x80 */
+  if (type == IPL_TYPE_BEV) type = 0x4;
+  if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n");
+  printf("Booting from %s", drivetypes[type]);
+  /* print product string if BEV */
+  if (type == 4 && e->description != 0) {
+    /* first 32 bytes are significant */
+    memcpyb(ss, &description, (Bit16u)(e->description >> 16), 
(Bit16u)(e->description & 0xffff), 32);
+    /* terminate string */
+    description[32] = 0;
+    printf(" [%S]", ss, description);
+  }
+  printf("...\n");
 }
 
 //--------------------------------------------------------------------------
@@ -2051,17 +2244,17 @@ print_boot_failure(type, reason)
 print_boot_failure(type, reason)
   Bit16u type; Bit8u reason;
 {
-  if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n"); 
+  if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n");
 
   printf("Boot from %s failed", drivetypes[type]);
   if (type < 4) {
     /* Report the reason too */
-  if (reason==0) 
-    printf(": not a bootable disk");
-  else
-    printf(": could not read the boot disk");
+    if (reason==0)
+      printf(": not a bootable disk");
+    else
+      printf(": could not read the boot disk");
   }
-  printf("\n");
+  printf("\n\n");
 }
 
 //--------------------------------------------------------------------------
@@ -2073,218 +2266,9 @@ print_cdromboot_failure( code )
   Bit16u code;
 {
   bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "CDROM boot failure code 
: %04x\n",code);
-  
+
   return;
 }
-
-#define WAIT_HZ 18
-/**
- * Check for keystroke.
- * @returns    True if keystroke available, False if not.
- */
-Bit8u check_for_keystroke()
-{
-ASM_START
-    mov  ax, #0x100
-    int  #0x16
-    jz   no_key
-    mov  al, #1
-    jmp  done
-no_key:
-    xor  al, al
-done:
-ASM_END
-}
-
-/**
- * Get keystroke.
- * @returns    BIOS scan code.
- */
-Bit8u get_keystroke()
-{
-ASM_START
-    mov  ax, #0x0
-    int  #0x16
-    xchg ah, al
-ASM_END
-}
-
-/**
- * Waits (sleeps) for the given number of ticks.
- * Checks for keystroke.
- *
- * @returns BIOS scan code if available, 0 if not.
- * @param   ticks       Number of ticks to sleep.
- * @param   stop_on_key Whether to stop immediately upon keypress.
- */
-Bit8u wait(ticks, stop_on_key)
-  Bit16u ticks;
-  Bit8u stop_on_key;
-{
-    long ticks_to_wait, delta;
-    Bit32u prev_ticks, t;
-    Bit8u scan_code = 0;
-
-    /*
-     * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
-     * We also have to be careful about interrupt storms.
-     */
-    ticks_to_wait = ticks;
-    prev_ticks = read_dword(0x0, 0x46c);
-    do
-    {
-        t = read_dword(0x0, 0x46c);
-        if (t > prev_ticks)
-        {
-            delta = t - prev_ticks;     /* The temp var is required or bcc 
screws up. */
-            ticks_to_wait -= delta;
-        }
-        else if (t < prev_ticks)
-            ticks_to_wait -= t;         /* wrapped */
-        prev_ticks = t;
-
-        if (check_for_keystroke())
-        {
-            scan_code = get_keystroke();
-            bios_printf(BIOS_PRINTF_DEBUG, "Key pressed: %x\n", scan_code);
-            if (stop_on_key)
-                return scan_code;
-        }
-    } while (ticks_to_wait > 0);
-    return scan_code;
-}
-
-static void clearscreen() {
-    /* Hide cursor, clear screen and move cursor to starting position */
-ASM_START
-        push bx
-        push cx
-        push dx
-
-        mov  ax, #0x100
-        mov  cx, #0x1000
-        int  #0x10
-
-        mov  ax, #0x700
-        mov  bh, #7
-        xor  cx, cx
-        mov  dx, #0x184f
-        int  #0x10
-
-        mov  ax, #0x200
-        xor  bx, bx
-        xor  dx, dx
-        int  #0x10
-
-        pop  dx
-        pop  cx
-        pop  bx
-ASM_END
-}
-
-int bootmenu(selected)
-  int selected;
-{
-    Bit8u scode;
-    int max;
-
-    /* get the number of boot devices */
-    max = read_word(IPL_SEG, IPL_COUNT_OFFSET);
-
-    for(;;) {
-        if (selected > max || selected < 1) selected = 1;
-        clearscreen();
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n\n\n\n\n\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "          Select 
boot device\n\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            1. 
Floppy\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            2. 
Hard drive\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            3. 
CD-ROM\n");
-        if (max == 4)
-            bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            4. 
Network\n");
-        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n          
Currently selected: %d\n", selected);
-
-        do {
-            scode = wait(WAIT_HZ, 1);
-        } while (scode == 0);
-        switch(scode) {
-        case 0x02:
-        case 0x03:
-        case 0x04:
-            selected = scode - 1;
-            break;
-        case 0x05:
-            if (max == 4)
-                selected = scode -1 ;
-            else
-                scode = 0;
-            break;
-        case 0x48:
-            selected -= 1;
-            if (selected < 1)
-                selected = 1;
-            scode = 0;
-            break;
-        case 0x50:
-            selected += 1;
-            if (selected > max)
-                selected = max;
-            scode = 0;
-            break;
-        case 0x1c:
-            break;
-        default:
-            scode = 0;
-            break;
-        }
-        if (scode != 0)
-            break;
-    }
-
-    switch (selected) {
-    case 1:
-        return 0x3D;
-    case 2:
-        return 0x3E;
-    case 3:
-        return 0x3F;
-    case 4:
-        return 0x58;
-    default:
-        return 0;
-    }
-}
-
-void interactive_bootkey()
-{
-    Bit16u i;
-    Bit8u scan = 0;
-
-    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO,
-                "\n\nPress F10 to select boot device.\n");
-
-    scan = wait(1, 0);
-    if (scan == 0x44)
-        scan = bootmenu(inb_cmos(0x3d) & 0x0f);
-
-    /* set the default based on the keypress or menu */
-    switch(scan) {
-    case 0x3D:
-        outb_cmos(0x3d, 0x01);
-        break;
-    case 0x3E:
-        outb_cmos(0x3d, 0x02);
-        break;
-    case 0x3F:
-        outb_cmos(0x3d, 0x03);
-        break;
-    case 0x58:
-        outb_cmos(0x3d, 0x04);
-        break;
-    default:
-        break;
-    }
-}
-
 
 void
 nmi_handler_msg()
@@ -2304,7 +2288,7 @@ log_bios_start()
 #if BX_DEBUG_SERIAL
   outb(BX_DEBUG_PORT+UART_LCR, 0x03); /* setup for serial logging: 8N1 */
 #endif
-  BX_INFO("%s\n", bios_version_string);
+  BX_INFO("%s\n", bios_cvs_version_string);
 }
 
   bx_bool
@@ -2339,39 +2323,35 @@ debugger_off()
   outb(0xfedc, 0x00);
 }
 
-void 
+int
 s3_resume()
 {
     Bit32u s3_wakeup_vector;
-    Bit16u s3_wakeup_ip, s3_wakeup_cs;
-    Bit8u cmos_shutdown_status;
-
+    Bit8u s3_resume_flag;
+
+    s3_resume_flag = read_byte(0x40, 0xb0);
+#ifdef HVMASSIST
+    s3_wakeup_vector = get_s3_waking_vector();
+#else
+    s3_wakeup_vector = read_dword(0x40, 0xb2);
+#endif
+
+    BX_INFO("S3 resume called %x 0x%lx\n", s3_resume_flag, s3_wakeup_vector);
+    if (s3_resume_flag != CMOS_SHUTDOWN_S3 || !s3_wakeup_vector)
+           return 0;
+
+    write_byte(0x40, 0xb0, 0);
+
+    /* setup wakeup vector */
+    write_word(0x40, 0xb6, (s3_wakeup_vector & 0xF)); /* IP */
+    write_word(0x40, 0xb8, (s3_wakeup_vector >> 4)); /* CS */
+
+    BX_INFO("S3 resume jump to %x:%x\n", (s3_wakeup_vector >> 4),
+                   (s3_wakeup_vector & 0xF));
 ASM_START
-    push ds
-    push ax
-    mov ax, #EBDA_SEG
-    mov ds, ax
-    mov al, [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET]
-    mov .s3_resume.cmos_shutdown_status[bp], al
-    pop ax
-    pop ds
+    jmpf [0x04b6]
 ASM_END
-
-    if (cmos_shutdown_status != CMOS_SHUTDOWN_S3)
-        return;
-
-    s3_wakeup_vector = get_s3_waking_vector();
-    if (!s3_wakeup_vector)
-        return;
-
-    s3_wakeup_ip = s3_wakeup_vector & 0xF;
-    s3_wakeup_cs = s3_wakeup_vector >> 4;
-
-ASM_START
-    push .s3_resume.s3_wakeup_cs[bp]
-    push .s3_resume.s3_wakeup_ip[bp]
-    retf
-ASM_END
+    return 1;
 }
 
 #if BX_USE_ATADRV
@@ -2421,6 +2401,7 @@ ASM_END
 // bits 7-4 of the device/head (CB_DH) reg
 #define ATA_CB_DH_DEV0 0xa0    // select device 0
 #define ATA_CB_DH_DEV1 0xb0    // select device 1
+#define ATA_CB_DH_LBA 0x40    // use LBA
 
 // status reg (CB_STAT and CB_ASTAT) bits
 #define ATA_CB_STAT_BSY  0x80  // busy
@@ -2470,6 +2451,7 @@ ASM_END
 #define ATA_CMD_READ_SECTORS                 0x20
 #define ATA_CMD_READ_VERIFY_SECTORS          0x40
 #define ATA_CMD_RECALIBRATE                  0x10
+#define ATA_CMD_REQUEST_SENSE                0x03
 #define ATA_CMD_SEEK                         0x70
 #define ATA_CMD_SET_FEATURES                 0xEF
 #define ATA_CMD_SET_MULTIPLE_MODE            0xC6
@@ -2514,7 +2496,7 @@ ASM_END
 #define ATA_DATA_NO      0x00
 #define ATA_DATA_IN      0x01
 #define ATA_DATA_OUT     0x02
-  
+
 // ---------------------------------------------------------------------------
 // ATA/ATAPI driver : initialization
 // ---------------------------------------------------------------------------
@@ -2523,7 +2505,7 @@ void ata_init( )
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit8u  channel, device;
 
-  // Channels info init. 
+  // Channels info init.
   for (channel=0; channel<BX_MAX_ATA_INTERFACES; channel++) {
     write_byte(ebda_seg,&EbdaData->ata.channels[channel].iface,ATA_IFACE_NONE);
     write_word(ebda_seg,&EbdaData->ata.channels[channel].iobase1,0x0);
@@ -2531,7 +2513,7 @@ void ata_init( )
     write_byte(ebda_seg,&EbdaData->ata.channels[channel].irq,0);
     }
 
-  // Devices info init. 
+  // Devices info init.
   for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
     write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
     write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_NONE);
@@ -2546,11 +2528,12 @@ void ata_init( )
     write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads,0);
     write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders,0);
     write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt,0);
-    
-    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors,0L);
+
+    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low,0L);
+    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high,0L);
     }
 
-  // hdidmap  and cdidmap init. 
+  // hdidmap  and cdidmap init.
   for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
     write_byte(ebda_seg,&EbdaData->ata.hdidmap[device],BX_MAX_ATA_DEVICES);
     write_byte(ebda_seg,&EbdaData->ata.cdidmap[device],BX_MAX_ATA_DEVICES);
@@ -2558,6 +2541,58 @@ void ata_init( )
 
   write_byte(ebda_seg,&EbdaData->ata.hdcount,0);
   write_byte(ebda_seg,&EbdaData->ata.cdcount,0);
+}
+
+#define TIMEOUT 0
+#define BSY 1
+#define NOT_BSY 2
+#define NOT_BSY_DRQ 3
+#define NOT_BSY_NOT_DRQ 4
+#define NOT_BSY_RDY 5
+
+#define IDE_TIMEOUT 32000u //32 seconds max for IDE ops
+
+int await_ide();
+static int await_ide(when_done,base,timeout)
+  Bit8u when_done;
+  Bit16u base;
+  Bit16u timeout;
+{
+  Bit32u time=0,last=0;
+  Bit16u status;
+  Bit8u result;
+  status = inb(base + ATA_CB_STAT); // for the times you're supposed to throw 
one away
+  for(;;) {
+    status = inb(base+ATA_CB_STAT);
+    time++;
+    if (when_done == BSY)
+      result = status & ATA_CB_STAT_BSY;
+    else if (when_done == NOT_BSY)
+      result = !(status & ATA_CB_STAT_BSY);
+    else if (when_done == NOT_BSY_DRQ)
+      result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_DRQ);
+    else if (when_done == NOT_BSY_NOT_DRQ)
+      result = !(status & ATA_CB_STAT_BSY) && !(status & ATA_CB_STAT_DRQ);
+    else if (when_done == NOT_BSY_RDY)
+      result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_RDY);
+    else if (when_done == TIMEOUT)
+      result = 0;
+
+    if (result) return 0;
+    if (time>>16 != last) // mod 2048 each 16 ms
+    {
+      last = time >>16;
+      BX_DEBUG_ATA("await_ide: (TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) 
%d time= %ld timeout= %d\n",when_done,time>>11, timeout);
+    }
+    if (status & ATA_CB_STAT_ERR)
+    {
+      BX_DEBUG_ATA("await_ide: ERROR 
(TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) %d time= %ld timeout= 
%d\n",when_done,time>>11, timeout);
+      return -1;
+    }
+    if ((timeout == 0) || ((time>>11) > timeout)) break;
+  }
+  BX_INFO("IDE time out\n");
+  return -1;
 }
 
 // ---------------------------------------------------------------------------
@@ -2600,7 +2635,7 @@ void ata_detect( )
 
   // Device detection
   hdcount=cdcount=0;
-  
+
   for(device=0; device<BX_MAX_ATA_DEVICES; device++) {
     Bit16u iobase1, iobase2;
     Bit8u  channel, slave, shift;
@@ -2630,33 +2665,34 @@ void ata_detect( )
 
     if ( (sc == 0x55) && (sn == 0xaa) ) {
       
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_UNKNOWN);
-    
+
       // reset the channel
-      ata_reset (device);
-      
+      ata_reset(device);
+
       // check for ATA or ATAPI
       outb(iobase1+ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
       sc = inb(iobase1+ATA_CB_SC);
       sn = inb(iobase1+ATA_CB_SN);
-      if ( (sc==0x01) && (sn==0x01) ) {
+      if ((sc==0x01) && (sn==0x01)) {
         cl = inb(iobase1+ATA_CB_CL);
         ch = inb(iobase1+ATA_CB_CH);
         st = inb(iobase1+ATA_CB_STAT);
 
-        if ( (cl==0x14) && (ch==0xeb) ) {
+        if ((cl==0x14) && (ch==0xeb)) {
           
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATAPI);
-          }
-        else if ( (cl==0x00) && (ch==0x00) && (st!=0x00) ) {
+        } else if ((cl==0x00) && (ch==0x00) && (st!=0x00)) {
           
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATA);
-          }
+        } else if ((cl==0xff) && (ch==0xff)) {
+          
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
         }
       }
+    }
 
     type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
-    
-    // Now we send a IDENTIFY command to ATA device 
+
+    // Now we send a IDENTIFY command to ATA device
     if(type == ATA_TYPE_ATA) {
-      Bit32u sectors;
+      Bit32u sectors_low, sectors_high;
       Bit16u cylinders, heads, spt, blksize;
       Bit8u  translation, removable, mode;
 
@@ -2667,21 +2703,26 @@ void ata_detect( )
       write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
       write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
 
-      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, 
get_SS(),buffer) !=0 )
+      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, 0L, 
get_SS(),buffer) !=0 )
         BX_PANIC("ata-detect: Failed to detect ATA device\n");
 
       removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
-#ifndef        NO_PIO32
+#ifndef        NO_PIO32
       mode      = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : 
ATA_MODE_PIO16;
 #endif
-
       blksize   = read_word(get_SS(),buffer+10);
-      
+
       cylinders = read_word(get_SS(),buffer+(1*2)); // word 1
       heads     = read_word(get_SS(),buffer+(3*2)); // word 3
       spt       = read_word(get_SS(),buffer+(6*2)); // word 6
 
-      sectors   = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 61
+      if (read_word(get_SS(),buffer+(83*2)) & (1 << 10)) { // word 83 - lba48 
support
+        sectors_low  = read_dword(get_SS(),buffer+(100*2)); // word 100 and 
word 101
+        sectors_high = read_dword(get_SS(),buffer+(102*2)); // word 102 and 
word 103
+      } else {
+        sectors_low = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 
61
+        sectors_high = 0;
+      }
 
       write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
       write_byte(ebda_seg,&EbdaData->ata.devices[device].removable, removable);
@@ -2690,7 +2731,8 @@ void ata_detect( )
       write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads, heads);
       write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders, 
cylinders);
       write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt, spt);
-      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors, sectors);
+      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, 
sectors_low);
+      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high, 
sectors_high);
       BX_INFO("ata%d-%d: PCHS=%u/%d/%d translation=", channel, 
slave,cylinders, heads, spt);
 
       translation = inb_cmos(0x39 + channel/2);
@@ -2718,14 +2760,14 @@ void ata_detect( )
           break;
         case ATA_TRANSLATION_LBA:
           spt = 63;
-          sectors /= 63;
-          heads = sectors / 1024;
+          sectors_low /= 63;
+          heads = sectors_low / 1024;
           if (heads>128) heads = 255;
           else if (heads>64) heads = 128;
           else if (heads>32) heads = 64;
           else if (heads>16) heads = 32;
           else heads=16;
-          cylinders = sectors / heads;
+          cylinders = sectors_low / heads;
           break;
         case ATA_TRANSLATION_RECHS:
           // Take care not to overflow
@@ -2752,15 +2794,15 @@ void ata_detect( )
       write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.heads, heads);
       write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.cylinders, 
cylinders);
       write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.spt, spt);
- 
-      // fill hdidmap 
+
+      // fill hdidmap
       write_byte(ebda_seg,&EbdaData->ata.hdidmap[hdcount], device);
       hdcount++;
       }
-    
+
     // Now we send a IDENTIFY command to ATAPI device
     if(type == ATA_TYPE_ATAPI) {
- 
+
       Bit8u  type, removable, mode;
       Bit16u blksize;
 
@@ -2771,12 +2813,12 @@ void ata_detect( )
       
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_CDROM);
       write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
 
-      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 
0L, get_SS(),buffer) != 0)
+      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 
0L, 0L, get_SS(),buffer) != 0)
         BX_PANIC("ata-detect: Failed to detect ATAPI device\n");
 
       type      = read_byte(get_SS(),buffer+1) & 0x1f;
       removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
-#ifndef        NO_PIO32
+#ifndef        NO_PIO32
       mode      = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : 
ATA_MODE_PIO16;
 #endif
       blksize   = 2048;
@@ -2786,24 +2828,24 @@ void ata_detect( )
       write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, mode);
       write_word(ebda_seg,&EbdaData->ata.devices[device].blksize, blksize);
 
-      // fill cdidmap 
+      // fill cdidmap
       write_byte(ebda_seg,&EbdaData->ata.cdidmap[cdcount], device);
       cdcount++;
       }
-  
+
       {
       Bit32u sizeinmb;
       Bit16u ataversion;
       Bit8u  c, i, version, model[41];
-      
+
       switch (type) {
         case ATA_TYPE_ATA:
-          sizeinmb = 
read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors);
-          sizeinmb >>= 11;
+          sizeinmb = 
(read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high) << 21)
+            | (read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low) 
>> 11);
         case ATA_TYPE_ATAPI:
           // Read ATA/ATAPI version
           
ataversion=((Bit16u)(read_byte(get_SS(),buffer+161))<<8)|read_byte(get_SS(),buffer+160);
-          for(version=15;version>0;version--) { 
+          for(version=15;version>0;version--) {
             if((ataversion&(1<<version))!=0)
             break;
             }
@@ -2812,7 +2854,7 @@ void ata_detect( )
           for(i=0;i<20;i++){
             
write_byte(get_SS(),model+(i*2),read_byte(get_SS(),buffer+(i*2)+54+1));
             
write_byte(get_SS(),model+(i*2)+1,read_byte(get_SS(),buffer+(i*2)+54));
-            }
+          }
 
           // Reformat
           write_byte(get_SS(),model+40,0x00);
@@ -2820,7 +2862,13 @@ void ata_detect( )
             if(read_byte(get_SS(),model+i)==0x20)
               write_byte(get_SS(),model+i,0x00);
             else break;
+          }
+          if (i>36) {
+            write_byte(get_SS(),model+36,0x00);
+            for(i=35;i>32;i--){
+              write_byte(get_SS(),model+i,0x2E);
             }
+          }
           break;
         }
 
@@ -2828,10 +2876,10 @@ void ata_detect( )
         case ATA_TYPE_ATA:
           printf("ata%d %s: ",channel,slave?" slave":"master");
           i=0; while(c=read_byte(get_SS(),model+i++)) printf("%c",c);
-          if (sizeinmb < 1UL<<16)
-            printf(" ATA-%d Hard-Disk (%04u 
MBytes)\n",version,(Bit16u)sizeinmb);
-          else
-            printf(" ATA-%d Hard-Disk (%04u 
GBytes)\n",version,(Bit16u)(sizeinmb>>10));
+         if (sizeinmb < (1UL<<16))
+            printf(" ATA-%d Hard-Disk (%4u MBytes)\n", version, 
(Bit16u)sizeinmb);
+         else
+            printf(" ATA-%d Hard-Disk (%4u GBytes)\n", version, 
(Bit16u)(sizeinmb>>10));
           break;
         case ATA_TYPE_ATAPI:
           printf("ata%d %s: ",channel,slave?" slave":"master");
@@ -2852,17 +2900,17 @@ void ata_detect( )
   write_byte(ebda_seg,&EbdaData->ata.hdcount, hdcount);
   write_byte(ebda_seg,&EbdaData->ata.cdcount, cdcount);
   write_byte(0x40,0x75, hdcount);
- 
+
   printf("\n");
 
   // FIXME : should use bios=cmos|auto|disable bits
   // FIXME : should know about translation bits
-  // FIXME : move hard_drive_post here 
-  
+  // FIXME : move hard_drive_post here
+
 }
 
 // ---------------------------------------------------------------------------
-// ATA/ATAPI driver : software reset 
+// ATA/ATAPI driver : software reset
 // ---------------------------------------------------------------------------
 // ATA-3
 // 8.2.1 Software reset - Device 0
@@ -2872,7 +2920,8 @@ Bit16u device;
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u iobase1, iobase2;
-  Bit8u  channel, slave, sn, sc; 
+  Bit8u  channel, slave, sn, sc;
+  Bit8u  type;
   Bit16u max;
 
   channel = device / 2;
@@ -2887,16 +2936,13 @@ Bit16u device;
   outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN | ATA_CB_DC_SRST);
 
 // 8.2.1 (b) -- wait for BSY
-  max=0xff;
-  while(--max>0) {
-    Bit8u status = inb(iobase1+ATA_CB_STAT);
-    if ((status & ATA_CB_STAT_BSY) != 0) break;
-  }
+  await_ide(BSY, iobase1, 20);
 
 // 8.2.1 (f) -- clear SRST
   outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
 
-  if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != 
ATA_TYPE_NONE) {
+  type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
+  if (type != ATA_TYPE_NONE) {
 
 // 8.2.1 (g) -- check for sc==sn==0x01
     // select device
@@ -2905,21 +2951,14 @@ Bit16u device;
     sn = inb(iobase1+ATA_CB_SN);
 
     if ( (sc==0x01) && (sn==0x01) ) {
+      if (type == ATA_TYPE_ATA) //ATA
+        await_ide(NOT_BSY_RDY, iobase1, IDE_TIMEOUT);
+      else //ATAPI
+        await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
+    }
 
 // 8.2.1 (h) -- wait for not BSY
-      max=0xff;
-      while(--max>0) {
-        Bit8u status = inb(iobase1+ATA_CB_STAT);
-        if ((status & ATA_CB_STAT_BSY) == 0) break;
-        }
-      }
-    }
-
-// 8.2.1 (i) -- wait for DRDY
-  max=0xfff;
-  while(--max>0) {
-    Bit8u status = inb(iobase1+ATA_CB_STAT);
-      if ((status & ATA_CB_STAT_RDY) != 0) break;
+    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
   }
 
   // Enable interrupts
@@ -2927,7 +2966,7 @@ Bit16u device;
 }
 
 // ---------------------------------------------------------------------------
-// ATA/ATAPI driver : execute a non data command 
+// ATA/ATAPI driver : execute a non data command
 // ---------------------------------------------------------------------------
 
 Bit16u ata_cmd_non_data()
@@ -2945,9 +2984,9 @@ Bit16u ata_cmd_non_data()
       // 5 : more sectors to read/verify
       // 6 : no sectors left to write
       // 7 : more sectors to write
-Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, lba, 
segment, offset)
+Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, 
lba_low, lba_high, segment, offset)
 Bit16u device, command, count, cylinder, head, sector, segment, offset;
-Bit32u lba;
+Bit32u lba_low, lba_high;
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u iobase1, iobase2, blksize;
@@ -2976,22 +3015,20 @@ Bit32u lba;
 
   // sector will be 0 only on lba access. Convert to lba-chs
   if (sector == 0) {
-    if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
+    if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
       outb(iobase1 + ATA_CB_FR, 0x00);
       outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
-      outb(iobase1 + ATA_CB_SN, lba >> 24);
-      outb(iobase1 + ATA_CB_CL, 0);
-      outb(iobase1 + ATA_CB_CH, 0);
+      outb(iobase1 + ATA_CB_SN, lba_low >> 24);
+      outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
+      outb(iobase1 + ATA_CB_CH, lba_high >> 8);
       command |= 0x04;
       count &= (1UL << 8) - 1;
-      lba &= (1UL << 24) - 1;
+      lba_low &= (1UL << 24) - 1;
       }
-    sector = (Bit16u) (lba & 0x000000ffL);
-    lba >>= 8;
-    cylinder = (Bit16u) (lba & 0x0000ffffL);
-    lba >>= 16;
-    head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
-    }
+    sector = (Bit16u) (lba_low & 0x000000ffL);
+    cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
+    head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
+  }
 
   outb(iobase1 + ATA_CB_FR, 0x00);
   outb(iobase1 + ATA_CB_SC, count);
@@ -3001,10 +3038,8 @@ Bit32u lba;
   outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | 
(Bit8u) head );
   outb(iobase1 + ATA_CB_CMD, command);
 
-  while (1) {
-    status = inb(iobase1 + ATA_CB_STAT);
-    if ( !(status & ATA_CB_STAT_BSY) ) break;
-    }
+  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+  status = inb(iobase1 + ATA_CB_STAT);
 
   if (status & ATA_CB_STAT_ERR) {
     BX_DEBUG_ATA("ata_cmd_data_in : read error\n");
@@ -3025,12 +3060,12 @@ ASM_START
 ASM_START
         push bp
         mov  bp, sp
-        mov  di, _ata_cmd_data_in.offset + 2[bp]  
-        mov  ax, _ata_cmd_data_in.segment + 2[bp] 
-        mov  cx, _ata_cmd_data_in.blksize + 2[bp] 
+        mov  di, _ata_cmd_data_in.offset + 2[bp]
+        mov  ax, _ata_cmd_data_in.segment + 2[bp]
+        mov  cx, _ata_cmd_data_in.blksize + 2[bp]
 
         ;; adjust if there will be an overrun. 2K max sector size
-        cmp   di, #0xf800 ;; 
+        cmp   di, #0xf800 ;;
         jbe   ata_in_no_adjust
 
 ata_in_adjust:
@@ -3042,7 +3077,7 @@ ata_in_no_adjust:
 
         mov   dx, _ata_cmd_data_in.iobase1 + 2[bp] ;; ATA data read port
 
-        mov  ah, _ata_cmd_data_in.mode + 2[bp] 
+        mov  ah, _ata_cmd_data_in.mode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_in_32
 
@@ -3064,9 +3099,10 @@ ASM_END
     current++;
     write_word(ebda_seg, &EbdaData->ata.trsfsectors,current);
     count--;
+    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
     status = inb(iobase1 + ATA_CB_STAT);
     if (count == 0) {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) )
           != ATA_CB_STAT_RDY ) {
         BX_DEBUG_ATA("ata_cmd_data_in : no sectors left (status %02x)\n", 
(unsigned) status);
         return 4;
@@ -3074,7 +3110,7 @@ ASM_END
       break;
       }
     else {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) )
           != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
         BX_DEBUG_ATA("ata_cmd_data_in : more sectors left (status %02x)\n", 
(unsigned) status);
         return 5;
@@ -3099,9 +3135,9 @@ ASM_END
       // 5 : more sectors to read/verify
       // 6 : no sectors left to write
       // 7 : more sectors to write
-Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, lba, 
segment, offset)
+Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, 
lba_low, lba_high, segment, offset)
 Bit16u device, command, count, cylinder, head, sector, segment, offset;
-Bit32u lba;
+Bit32u lba_low, lba_high;
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u iobase1, iobase2, blksize;
@@ -3130,22 +3166,20 @@ Bit32u lba;
 
   // sector will be 0 only on lba access. Convert to lba-chs
   if (sector == 0) {
-    if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
+    if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
       outb(iobase1 + ATA_CB_FR, 0x00);
       outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
-      outb(iobase1 + ATA_CB_SN, lba >> 24);
-      outb(iobase1 + ATA_CB_CL, 0);
-      outb(iobase1 + ATA_CB_CH, 0);
+      outb(iobase1 + ATA_CB_SN, lba_low >> 24);
+      outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
+      outb(iobase1 + ATA_CB_CH, lba_high >> 8);
       command |= 0x04;
       count &= (1UL << 8) - 1;
-      lba &= (1UL << 24) - 1;
+      lba_low &= (1UL << 24) - 1;
       }
-    sector = (Bit16u) (lba & 0x000000ffL);
-    lba >>= 8;
-    cylinder = (Bit16u) (lba & 0x0000ffffL);
-    lba >>= 16;
-    head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
-    }
+    sector = (Bit16u) (lba_low & 0x000000ffL);
+    cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
+    head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
+  }
 
   outb(iobase1 + ATA_CB_FR, 0x00);
   outb(iobase1 + ATA_CB_SC, count);
@@ -3155,10 +3189,8 @@ Bit32u lba;
   outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | 
(Bit8u) head );
   outb(iobase1 + ATA_CB_CMD, command);
 
-  while (1) {
-    status = inb(iobase1 + ATA_CB_STAT);
-    if ( !(status & ATA_CB_STAT_BSY) ) break;
-    }
+  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+  status = inb(iobase1 + ATA_CB_STAT);
 
   if (status & ATA_CB_STAT_ERR) {
     BX_DEBUG_ATA("ata_cmd_data_out : read error\n");
@@ -3179,12 +3211,12 @@ ASM_START
 ASM_START
         push bp
         mov  bp, sp
-        mov  si, _ata_cmd_data_out.offset + 2[bp]  
-        mov  ax, _ata_cmd_data_out.segment + 2[bp] 
-        mov  cx, _ata_cmd_data_out.blksize + 2[bp] 
+        mov  si, _ata_cmd_data_out.offset + 2[bp]
+        mov  ax, _ata_cmd_data_out.segment + 2[bp]
+        mov  cx, _ata_cmd_data_out.blksize + 2[bp]
 
         ;; adjust if there will be an overrun. 2K max sector size
-        cmp   si, #0xf800 ;; 
+        cmp   si, #0xf800 ;;
         jbe   ata_out_no_adjust
 
 ata_out_adjust:
@@ -3196,7 +3228,7 @@ ata_out_no_adjust:
 
         mov   dx, _ata_cmd_data_out.iobase1 + 2[bp] ;; ATA data write port
 
-        mov  ah, _ata_cmd_data_out.mode + 2[bp] 
+        mov  ah, _ata_cmd_data_out.mode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_out_32
 
@@ -3222,7 +3254,7 @@ ASM_END
     count--;
     status = inb(iobase1 + ATA_CB_STAT);
     if (count == 0) {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
           != ATA_CB_STAT_RDY ) {
         BX_DEBUG_ATA("ata_cmd_data_out : no sectors left (status %02x)\n", 
(unsigned) status);
         return 6;
@@ -3230,7 +3262,7 @@ ASM_END
       break;
       }
     else {
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
+      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) )
           != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
         BX_DEBUG_ATA("ata_cmd_data_out : more sectors left (status %02x)\n", 
(unsigned) status);
         return 7;
@@ -3297,19 +3329,17 @@ Bit32u length;
   if (status & ATA_CB_STAT_BSY) return 2;
 
   outb(iobase2 + ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
-  // outb(iobase1 + ATA_CB_FR, 0x00);
-  // outb(iobase1 + ATA_CB_SC, 0x00);
-  // outb(iobase1 + ATA_CB_SN, 0x00);
+  outb(iobase1 + ATA_CB_FR, 0x00);
+  outb(iobase1 + ATA_CB_SC, 0x00);
+  outb(iobase1 + ATA_CB_SN, 0x00);
   outb(iobase1 + ATA_CB_CL, 0xfff0 & 0x00ff);
   outb(iobase1 + ATA_CB_CH, 0xfff0 >> 8);
   outb(iobase1 + ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
   outb(iobase1 + ATA_CB_CMD, ATA_CMD_PACKET);
 
   // Device should ok to receive command
-  while (1) {
-    status = inb(iobase1 + ATA_CB_STAT);
-    if ( !(status & ATA_CB_STAT_BSY) ) break;
-    }
+  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+  status = inb(iobase1 + ATA_CB_STAT);
 
   if (status & ATA_CB_STAT_ERR) {
     BX_DEBUG_ATA("ata_cmd_packet : error, status is %02x\n",status);
@@ -3326,13 +3356,13 @@ Bit32u length;
   // Send command to device
 ASM_START
       sti  ;; enable higher priority interrupts
- 
+
       push bp
       mov  bp, sp
-    
-      mov  si, _ata_cmd_packet.cmdoff + 2[bp]  
-      mov  ax, _ata_cmd_packet.cmdseg + 2[bp] 
-      mov  cx, _ata_cmd_packet.cmdlen + 2[bp] 
+
+      mov  si, _ata_cmd_packet.cmdoff + 2[bp]
+      mov  ax, _ata_cmd_packet.cmdseg + 2[bp]
+      mov  cx, _ata_cmd_packet.cmdlen + 2[bp]
       mov  es, ax      ;; segment in es
 
       mov  dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data write port
@@ -3345,32 +3375,38 @@ ASM_END
 ASM_END
 
   if (inout == ATA_DATA_NO) {
+    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
     status = inb(iobase1 + ATA_CB_STAT);
     }
   else {
+        Bit16u loops = 0;
+        Bit8u sc;
   while (1) {
 
+      if (loops == 0) {//first time through
+        status = inb(iobase2 + ATA_CB_ASTAT);
+        await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+      }
+      else
+        await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
+      loops++;
+
       status = inb(iobase1 + ATA_CB_STAT);
+      sc = inb(iobase1 + ATA_CB_SC);
 
       // Check if command completed
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_DRQ) ) ==0 ) break;
+      if(((inb(iobase1 + ATA_CB_SC)&0x7)==0x3) &&
+         ((status & (ATA_CB_STAT_RDY | ATA_CB_STAT_ERR)) == ATA_CB_STAT_RDY)) 
break;
 
       if (status & ATA_CB_STAT_ERR) {
         BX_DEBUG_ATA("ata_cmd_packet : error (status %02x)\n",status);
         return 3;
       }
 
-      // Device must be ready to send data
-      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | 
ATA_CB_STAT_ERR) ) 
-            != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
-        BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", status);
-        return 4;
-        }
-
       // Normalize address
       bufseg += (bufoff / 16);
       bufoff %= 16;
-    
+
       // Get the byte count
       lcount =  ((Bit16u)(inb(iobase1 + ATA_CB_CH))<<8)+inb(iobase1 + 
ATA_CB_CL);
 
@@ -3431,10 +3467,10 @@ ASM_START
 
         mov  dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data read port
 
-        mov  cx, _ata_cmd_packet.lbefore + 2[bp] 
+        mov  cx, _ata_cmd_packet.lbefore + 2[bp]
         jcxz ata_packet_no_before
 
-        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
+        mov  ah, _ata_cmd_packet.lmode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_packet_in_before_32
 
@@ -3451,14 +3487,14 @@ ata_packet_in_before_32_loop:
         pop  eax
 
 ata_packet_no_before:
-        mov  cx, _ata_cmd_packet.lcount + 2[bp] 
+        mov  cx, _ata_cmd_packet.lcount + 2[bp]
         jcxz ata_packet_after
 
-        mov  di, _ata_cmd_packet.bufoff + 2[bp]  
-        mov  ax, _ata_cmd_packet.bufseg + 2[bp] 
+        mov  di, _ata_cmd_packet.bufoff + 2[bp]
+        mov  ax, _ata_cmd_packet.bufseg + 2[bp]
         mov  es, ax
 
-        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
+        mov  ah, _ata_cmd_packet.lmode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_packet_in_32
 
@@ -3472,10 +3508,10 @@ ata_packet_in_32:
           insd ;; CX dwords transfered to port(DX) to ES:[DI]
 
 ata_packet_after:
-        mov  cx, _ata_cmd_packet.lafter + 2[bp] 
+        mov  cx, _ata_cmd_packet.lafter + 2[bp]
         jcxz ata_packet_done
 
-        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
+        mov  ah, _ata_cmd_packet.lmode + 2[bp]
         cmp  ah, #ATA_MODE_PIO32
         je   ata_packet_in_after_32
 
@@ -3505,7 +3541,7 @@ ASM_END
     }
 
   // Final check, device must be ready
-  if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
+  if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | 
ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
          != ATA_CB_STAT_RDY ) {
     BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", (unsigned) 
status);
     return 4;
@@ -3524,55 +3560,108 @@ ASM_END
 // Start of ATA/ATAPI generic functions
 // ---------------------------------------------------------------------------
 
-  Bit16u 
-atapi_get_sense(device)
+  Bit16u
+atapi_get_sense(device, seg, asc, ascq)
   Bit16u device;
 {
   Bit8u  atacmd[12];
-  Bit8u  buffer[16];
+  Bit8u  buffer[18];
   Bit8u i;
 
   memsetb(get_SS(),atacmd,0,12);
 
-  // Request SENSE 
-  atacmd[0]=0x03;    
-  atacmd[4]=0x20;    
-  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 16L, ATA_DATA_IN, 
get_SS(), buffer) != 0)
+  // Request SENSE
+  atacmd[0]=ATA_CMD_REQUEST_SENSE;
+  atacmd[4]=sizeof(buffer);
+  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 18L, ATA_DATA_IN, 
get_SS(), buffer) != 0)
     return 0x0002;
 
-  if ((buffer[0] & 0x7e) == 0x70) {
-    return (((Bit16u)buffer[2]&0x0f)*0x100)+buffer[12];
-    }
+  write_byte(seg,asc,buffer[12]);
+  write_byte(seg,ascq,buffer[13]);
 
   return 0;
 }
 
-  Bit16u 
+  Bit16u
 atapi_is_ready(device)
   Bit16u device;
 {
-  Bit8u  atacmd[12];
-  Bit8u  buffer[];
-
-  memsetb(get_SS(),atacmd,0,12);
- 
-  // Test Unit Ready
-  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, 
get_SS(), buffer) != 0)
-    return 0x000f;
-
-  if (atapi_get_sense(device) !=0 ) {
-    memsetb(get_SS(),atacmd,0,12);
-
-    // try to send Test Unit Ready again
-    if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, 
get_SS(), buffer) != 0)
-      return 0x000f;
-
-    return atapi_get_sense(device);
+  Bit8u packet[12];
+  Bit8u buf[8];
+  Bit32u block_len;
+  Bit32u sectors;
+  Bit32u timeout; //measured in ms
+  Bit32u time;
+  Bit8u asc, ascq;
+  Bit8u in_progress;
+  Bit16u ebda_seg = read_word(0x0040,0x000E);
+  if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != 
ATA_TYPE_ATAPI) {
+    printf("not implemented for non-ATAPI device\n");
+    return -1;
+  }
+
+  BX_DEBUG_ATA("ata_detect_medium: begin\n");
+  memsetb(get_SS(),packet, 0, sizeof packet);
+  packet[0] = 0x25; /* READ CAPACITY */
+
+  /* Retry READ CAPACITY 50 times unless MEDIUM NOT PRESENT
+   * is reported by the device. If the device reports "IN PROGRESS",
+   * 30 seconds is added. */
+  timeout = 5000;
+  time = 0;
+  in_progress = 0;
+  while (time < timeout) {
+    if (ata_cmd_packet(device, sizeof(packet), get_SS(), packet, 0, 8L, 
ATA_DATA_IN, get_SS(), buf) == 0)
+      goto ok;
+
+    if (atapi_get_sense(device, get_SS(), &asc, &ascq) == 0) {
+      if (asc == 0x3a) { /* MEDIUM NOT PRESENT */
+        BX_DEBUG_ATA("Device reports MEDIUM NOT PRESENT\n");
+        return -1;
+      }
+
+      if (asc == 0x04 && ascq == 0x01 && !in_progress) {
+        /* IN PROGRESS OF BECOMING READY */
+        printf("Waiting for device to detect medium... ");
+        /* Allow 30 seconds more */
+        timeout = 30000;
+        in_progress = 1;
+      }
     }
+    time += 100;
+  }
+  BX_DEBUG_ATA("read capacity failed\n");
+  return -1;
+ok:
+
+  block_len = (Bit32u) buf[4] << 24
+    | (Bit32u) buf[5] << 16
+    | (Bit32u) buf[6] << 8
+    | (Bit32u) buf[7] << 0;
+  BX_DEBUG_ATA("block_len=%u\n", block_len);
+
+  if (block_len!= 2048 && block_len!= 512)
+  {
+    printf("Unsupported sector size %u\n", block_len);
+    return -1;
+  }
+  write_dword(ebda_seg,&EbdaData->ata.devices[device].blksize, block_len);
+
+  sectors = (Bit32u) buf[0] << 24
+    | (Bit32u) buf[1] << 16
+    | (Bit32u) buf[2] << 8
+    | (Bit32u) buf[3] << 0;
+
+  BX_DEBUG_ATA("sectors=%u\n", sectors);
+  if (block_len == 2048)
+    sectors <<= 2; /* # of sectors in 512-byte "soft" sector */
+  if (sectors != 
read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low))
+    printf("%dMB medium detected\n", sectors>>(20-9));
+  write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, sectors);
   return 0;
 }
 
-  Bit16u 
+  Bit16u
 atapi_is_cdrom(device)
   Bit8u device;
 {
@@ -3632,7 +3721,7 @@ static char eltorito[24]="EL TORITO SPEC
 //
 // Returns ah: emulated drive, al: error code
 //
-  Bit16u 
+  Bit16u
 cdrom_boot()
 {
   Bit16u ebda_seg=read_word(0x0040,0x000E);
@@ -3645,9 +3734,12 @@ cdrom_boot()
   for (device=0; device<BX_MAX_ATA_DEVICES;device++) {
     if (atapi_is_cdrom(device)) break;
     }
-  
+
   // if not found
   if(device >= BX_MAX_ATA_DEVICES) return 2;
+
+  if(error = atapi_is_ready(device) != 0)
+    BX_INFO("ata_is_ready returned %d\n",error);
 
   // Read the Boot Record Volume Descriptor
   memsetb(get_SS(),atacmd,0,12);
@@ -3668,7 +3760,7 @@ cdrom_boot()
    }
   for(i=0;i<23;i++)
     if(buffer[7+i]!=read_byte(0xf000,&eltorito[i]))return 6;
-  
+
   // ok, now we calculate the Boot catalog address
   
lba=buffer[0x4A]*0x1000000+buffer[0x49]*0x10000+buffer[0x48]*0x100+buffer[0x47];
 
@@ -3683,7 +3775,7 @@ cdrom_boot()
   atacmd[5]=(lba & 0x000000ff);
   if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 2048L, 
ATA_DATA_IN, get_SS(), buffer)) != 0)
     return 7;
- 
+
   // Validation entry
   if(buffer[0x00]!=0x01)return 8;   // Header
   if(buffer[0x01]!=0x00)return 9;   // Platform
@@ -3702,10 +3794,10 @@ cdrom_boot()
 
   write_byte(ebda_seg,&EbdaData->cdemu.media,buffer[0x21]);
   if(buffer[0x21]==0){
-    // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0. 
+    // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0.
     // Win2000 cd boot needs to know it booted from cd
     write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0xE0);
-    } 
+    }
   else if(buffer[0x21]<4)
     write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0x00);
   else
@@ -3719,7 +3811,7 @@ cdrom_boot()
 
   write_word(ebda_seg,&EbdaData->cdemu.load_segment,boot_segment);
   write_word(ebda_seg,&EbdaData->cdemu.buffer_segment,0x0000);
-  
+
   nbsectors=buffer[0x27]*0x100+buffer[0x26];
   write_word(ebda_seg,&EbdaData->cdemu.sector_count,nbsectors);
 
@@ -3743,7 +3835,6 @@ cdrom_boot()
   /* measure 1st 512 bytes  */
   tcpa_ipl((Bit32u)1L,(Bit32u)boot_segment,(Bit32u)0L,(Bit32u)512L);
 #endif
-
 
   // Remember the media type
   switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
@@ -3765,7 +3856,7 @@ cdrom_boot()
     case 0x04:  // Harddrive
       
write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,read_byte(boot_segment,446+6)&0x3f);
       write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,
-             (read_byte(boot_segment,446+6)<<2) + 
read_byte(boot_segment,446+7) + 1);
+              (read_byte(boot_segment,446+6)<<2) + 
read_byte(boot_segment,446+7) + 1);
       
write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,read_byte(boot_segment,446+5)
 + 1);
       break;
    }
@@ -3778,7 +3869,7 @@ cdrom_boot()
       write_byte(ebda_seg, &EbdaData->ata.hdcount, read_byte(ebda_seg, 
&EbdaData->ata.hdcount) + 1);
    }
 
-  
+
   // everything is ok, so from now on, the emulation is active
   if(read_byte(ebda_seg,&EbdaData->cdemu.media)!=0)
     write_byte(ebda_seg,&EbdaData->cdemu.active,0x01);
@@ -4124,9 +4215,10 @@ ASM_END
       regs.u.r8.al = inb_cmos(0x30);
       regs.u.r8.ah = inb_cmos(0x31);
 
-      // limit to 15M
-      if(regs.u.r16.ax > 0x3c00)
-        regs.u.r16.ax = 0x3c00;
+      // According to Ralf Brown's interrupt the limit should be 15M,
+      // but real machines mostly return max. 63M.
+      if(regs.u.r16.ax > 0xffc0)
+        regs.u.r16.ax = 0xffc0;
 
       CLEAR_CF();
 #endif
@@ -4344,13 +4436,35 @@ BX_DEBUG_INT15("case 2:\n");
 
         case 3: // Set Resolution
 BX_DEBUG_INT15("case 3:\n");
-          // BX:
+          // BH:
           //      0 =  25 dpi, 1 count  per millimeter
           //      1 =  50 dpi, 2 counts per millimeter
           //      2 = 100 dpi, 4 counts per millimeter
           //      3 = 200 dpi, 8 counts per millimeter
-          CLEAR_CF();
-          regs.u.r8.ah = 0;
+          comm_byte = inhibit_mouse_int_and_events(); // disable IRQ12 and 
packets
+          if (regs.u.r8.bh < 4) {
+            ret = send_to_mouse_ctrl(0xE8); // set resolution command
+            if (ret == 0) {
+              ret = get_mouse_data(&mouse_data1);
+              if (mouse_data1 != 0xfa)
+                BX_PANIC("Mouse status returned %02x (should be ack)\n", 
(unsigned)mouse_data1);
+              ret = send_to_mouse_ctrl(regs.u.r8.bh);
+              ret = get_mouse_data(&mouse_data1);
+              if (mouse_data1 != 0xfa)
+                BX_PANIC("Mouse status returned %02x (should be ack)\n", 
(unsigned)mouse_data1);
+              CLEAR_CF();
+              regs.u.r8.ah = 0;
+            } else {
+              // error
+              SET_CF();
+              regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+            }
+          } else {
+            // error
+            SET_CF();
+            regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+          }
+          set_kbd_command_byte(comm_byte); // restore IRQ12 and serial enable
           break;
 
         case 4: // Get Device ID
@@ -4472,7 +4586,30 @@ BX_DEBUG_INT15("case default:\n");
       break;
     }
 }
-#endif
+#endif // BX_USE_PS2_MOUSE
+
+
+void set_e820_range(ES, DI, start, end, type)
+     Bit16u ES;
+     Bit16u DI;
+     Bit32u start;
+     Bit32u end;
+     Bit16u type;
+{
+    write_word(ES, DI, start);
+    write_word(ES, DI+2, start >> 16);
+    write_word(ES, DI+4, 0x00);
+    write_word(ES, DI+6, 0x00);
+
+    end -= start;
+    write_word(ES, DI+8, end);
+    write_word(ES, DI+10, end >> 16);
+    write_word(ES, DI+12, 0x0000);
+    write_word(ES, DI+14, 0x0000);
+
+    write_word(ES, DI+16, type);
+    write_word(ES, DI+18, 0x0);
+}
 
   void
 int15_function32(regs, ES, DS, FLAGS)
@@ -4486,17 +4623,20 @@ BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.
 
   switch (regs.u.r8.ah) {
     case 0x86:
-      // Wait for CX:DX microseconds. currently using the 
-      // refresh request port 0x61 bit4, toggling every 15usec 
+      // Wait for CX:DX microseconds. currently using the
+      // refresh request port 0x61 bit4, toggling every 15usec
 
       CX = regs.u.r16.cx;
       DX = regs.u.r16.dx;
 
 ASM_START
+      sti
+
       ;; Get the count in eax
-      mov  ax, .int15_function32.CX [bp]
+      mov  bx, sp
+      mov  ax, _int15_function32.CX [bx]
       shl  eax, #16
-      mov  ax, .int15_function32.DX [bp]
+      mov  ax, _int15_function32.DX [bx]
 
       ;; convert to numbers of 15usec ticks
       mov ebx, #15
@@ -4527,7 +4667,8 @@ ASM_END
     case 0xe8:
         switch(regs.u.r8.al)
         {
-        case 0x20: {
+#ifdef HVMASSIST
+       case 0x20: {
             Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
 
             if (regs.u.r32.edx != 0x534D4150) /* SMAP */
@@ -4575,7 +4716,7 @@ ASM_END
             // Get the amount of extended memory (above 1M)
             regs.u.r8.cl = inb_cmos(0x30);
             regs.u.r8.ch = inb_cmos(0x31);
-          
+
             // limit to 15M
             if (regs.u.r16.cx > (15*1024))
                 regs.u.r16.cx = 15*1024;
@@ -4603,7 +4744,7 @@ ASM_END
             regs.u.r16.bx = regs.u.r16.dx;
             break;
         }
-       default:  /* AH=0xE8?? but not implemented */
+        default:  /* AH=0xE8?? but not implemented */
             goto int15_unimplemented;
         }
         break;
@@ -4616,16 +4757,178 @@ ASM_END
       regs.u.r8.ah = UNSUPPORTED_FUNCTION;
       break;
     }
+#else
+         case 0x20: // coded by osmaker aka K.J.
+            if(regs.u.r32.edx == 0x534D4150)
+            {
+                extended_memory_size = inb_cmos(0x35);
+                extended_memory_size <<= 8;
+                extended_memory_size |= inb_cmos(0x34);
+                extended_memory_size *= 64;
+                // greater than EFF00000???
+                if(extended_memory_size > 0x3bc000) {
+                    extended_memory_size = 0x3bc000; // everything after this 
is reserved memory until we get to 0x100000000
+                }
+                extended_memory_size *= 1024;
+                extended_memory_size += (16L * 1024 * 1024);
+
+                if(extended_memory_size <= (16L * 1024 * 1024)) {
+                    extended_memory_size = inb_cmos(0x31);
+                    extended_memory_size <<= 8;
+                    extended_memory_size |= inb_cmos(0x30);
+                    extended_memory_size *= 1024;
+                    extended_memory_size += (1L * 1024 * 1024);
+                }
+
+                switch(regs.u.r16.bx)
+                {
+                    case 0:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x0000000L, 0x0009f000L, 1);
+                        regs.u.r32.ebx = 1;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 1:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x0009f000L, 0x000a0000L, 2);
+                        regs.u.r32.ebx = 2;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 2:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x000e8000L, 0x00100000L, 2);
+                        regs.u.r32.ebx = 3;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 3:
+#if BX_ROMBIOS32
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x00100000L,
+                                       extended_memory_size - ACPI_DATA_SIZE, 
1);
+                        regs.u.r32.ebx = 4;
+#else
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0x00100000L,
+                                       extended_memory_size, 1);
+                        regs.u.r32.ebx = 5;
+#endif
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 4:
+                        set_e820_range(ES, regs.u.r16.di,
+                                       extended_memory_size - ACPI_DATA_SIZE,
+                                       extended_memory_size, 3); // ACPI RAM
+                        regs.u.r32.ebx = 5;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                        break;
+                    case 5:
+                        /* 256KB BIOS area at the end of 4 GB */
+                        set_e820_range(ES, regs.u.r16.di,
+                                       0xfffc0000L, 0x00000000L, 2);
+                        regs.u.r32.ebx = 0;
+                        regs.u.r32.eax = 0x534D4150;
+                        regs.u.r32.ecx = 0x14;
+                        CLEAR_CF();
+                        return;
+                    default:  /* AX=E820, DX=534D4150, BX unrecognized */
+                        goto int15_unimplemented;
+                        break;
+                }
+            } else {
+              // if DX != 0x534D4150)
+              goto int15_unimplemented;
+            }
+            break;
+
+        case 0x01:
+          // do we have any reason to fail here ?
+          CLEAR_CF();
+
+          // my real system sets ax and bx to 0
+          // this is confirmed by Ralph Brown list
+          // but syslinux v1.48 is known to behave
+          // strangely if ax is set to 0
+          // regs.u.r16.ax = 0;
+          // regs.u.r16.bx = 0;
+
+          // Get the amount of extended memory (above 1M)
+          regs.u.r8.cl = inb_cmos(0x30);
+          regs.u.r8.ch = inb_cmos(0x31);
+
+          // limit to 15M
+          if(regs.u.r16.cx > 0x3c00)
+          {
+            regs.u.r16.cx = 0x3c00;
+          }
+
+          // Get the amount of extended memory above 16M in 64k blocs
+          regs.u.r8.dl = inb_cmos(0x34);
+          regs.u.r8.dh = inb_cmos(0x35);
+
+          // Set configured memory equal to extended memory
+          regs.u.r16.ax = regs.u.r16.cx;
+          regs.u.r16.bx = regs.u.r16.dx;
+          break;
+        default:  /* AH=0xE8?? but not implemented */
+          goto int15_unimplemented;
+       }
+       break;
+    int15_unimplemented:
+       // fall into the default
+    default:
+      BX_INFO("*** int 15h function AX=%04x, BX=%04x not yet supported!\n",
+        (unsigned) regs.u.r16.ax, (unsigned) regs.u.r16.bx);
+      SET_CF();
+      regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+      break;
+    }
+#endif /* HVMASSIST */
 }
 
   void
 int16_function(DI, SI, BP, SP, BX, DX, CX, AX, FLAGS)
   Bit16u DI, SI, BP, SP, BX, DX, CX, AX, FLAGS;
 {
-  Bit8u scan_code, ascii_code, shift_flags, count;
+  Bit8u scan_code, ascii_code, shift_flags, led_flags, count;
   Bit16u kbd_code, max;
 
   BX_DEBUG_INT16("int16: AX=%04x BX=%04x CX=%04x DX=%04x \n", AX, BX, CX, DX);
+
+  shift_flags = read_byte(0x0040, 0x17);
+  led_flags = read_byte(0x0040, 0x97);
+  if ((((shift_flags >> 4) & 0x07) ^ (led_flags & 0x07)) != 0) {
+ASM_START
+    cli
+ASM_END
+    outb(0x60, 0xed);
+    while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
+    if ((inb(0x60) == 0xfa)) {
+      led_flags &= 0xf8;
+      led_flags |= ((shift_flags >> 4) & 0x07);
+      outb(0x60, led_flags & 0x07);
+      while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
+      inb(0x60);
+      write_byte(0x0040, 0x97, led_flags);
+    }
+ASM_START
+    sti
+ASM_END
+  }
 
   switch (GET_AH()) {
     case 0x00: /* read keyboard input */
@@ -4664,7 +4967,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x09: /* GET KEYBOARD FUNCTIONALITY */
-      // bit Bochs Description     
+      // bit Bochs Description
       //  7    0   reserved
       //  6    0   INT 16/AH=20h-22h supported (122-key keyboard support)
       //  5    1   INT 16/AH=10h-12h supported (enhanced keyboard support)
@@ -4694,7 +4997,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
               kbd_code |= (inb(0x60) << 8);
             }
           } while (--count>0);
-       }
+        }
       }
       BX=kbd_code;
       break;
@@ -4721,7 +5024,8 @@ int16_function(DI, SI, BP, SP, BX, DX, C
     case 0x12: /* get extended keyboard status */
       shift_flags = read_byte(0x0040, 0x17);
       SET_AL(shift_flags);
-      shift_flags = read_byte(0x0040, 0x18);
+      shift_flags = read_byte(0x0040, 0x18) & 0x73;
+      shift_flags |= read_byte(0x0040, 0x96) & 0x0c;
       SET_AH(shift_flags);
       BX_DEBUG_INT16("int16: func 12 sending %04x\n",AX);
       break;
@@ -4736,7 +5040,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
 
     case 0x6F:
       if (GET_AL() == 0x08)
-       SET_AH(0x02); // unsupported, aka normal keyboard
+        SET_AH(0x02); // unsupported, aka normal keyboard
 
     default:
       BX_INFO("KBD: unsupported int 16h function %02x\n", GET_AH());
@@ -4877,7 +5181,7 @@ int09_function(DI, SI, BP, SP, BX, DX, C
   Bit16u DI, SI, BP, SP, BX, DX, CX, AX;
 {
   Bit8u scancode, asciicode, shift_flags;
-  Bit8u mf2_flags, mf2_state, led_flags;
+  Bit8u mf2_flags, mf2_state;
 
   //
   // DS has been set to F000 before call
@@ -4895,7 +5199,6 @@ int09_function(DI, SI, BP, SP, BX, DX, C
   shift_flags = read_byte(0x0040, 0x17);
   mf2_flags = read_byte(0x0040, 0x18);
   mf2_state = read_byte(0x0040, 0x96);
-  led_flags = read_byte(0x0040, 0x97);
   asciicode = 0;
 
   switch (scancode) {
@@ -4904,8 +5207,6 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       write_byte(0x0040, 0x17, shift_flags);
       mf2_flags |= 0x40;
       write_byte(0x0040, 0x18, mf2_flags);
-      led_flags ^= 0x04;
-      write_byte(0x0040, 0x97, led_flags);
       break;
     case 0xba: /* Caps Lock release */
       mf2_flags &= ~0x40;
@@ -4913,11 +5214,8 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x2a: /* L Shift press */
-      /*shift_flags &= ~0x40;*/
       shift_flags |= 0x02;
       write_byte(0x0040, 0x17, shift_flags);
-      led_flags &= ~0x04;
-      write_byte(0x0040, 0x97, led_flags);
       break;
     case 0xaa: /* L Shift release */
       shift_flags &= ~0x02;
@@ -4925,11 +5223,8 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x36: /* R Shift press */
-      /*shift_flags &= ~0x40;*/
       shift_flags |= 0x01;
       write_byte(0x0040, 0x17, shift_flags);
-      led_flags &= ~0x04;
-      write_byte(0x0040, 0x97, led_flags);
       break;
     case 0xb6: /* R Shift release */
       shift_flags &= ~0x01;
@@ -4937,71 +5232,75 @@ int09_function(DI, SI, BP, SP, BX, DX, C
       break;
 
     case 0x1d: /* Ctrl press */
-      shift_flags |= 0x04;
-      write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags |= 0x04;
-      } else {
-        mf2_flags |= 0x01;
+      if ((mf2_state & 0x01) == 0) {
+        shift_flags |= 0x04;
+        write_byte(0x0040, 0x17, shift_flags);
+        if (mf2_state & 0x02) {
+          mf2_state |= 0x04;
+          write_byte(0x0040, 0x96, mf2_state);
+        } else {
+          mf2_flags |= 0x01;
+          write_byte(0x0040, 0x18, mf2_flags);
         }
-      write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
     case 0x9d: /* Ctrl release */
-      shift_flags &= ~0x04;
-      write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags &= ~0x04;
-      } else {
-        mf2_flags &= ~0x01;
+      if ((mf2_state & 0x01) == 0) {
+        shift_flags &= ~0x04;
+        write_byte(0x0040, 0x17, shift_flags);
+        if (mf2_state & 0x02) {
+          mf2_state &= ~0x04;
+          write_byte(0x0040, 0x96, mf2_state);
+        } else {
+          mf2_flags &= ~0x01;
+          write_byte(0x0040, 0x18, mf2_flags);
         }
-      write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
 
     case 0x38: /* Alt press */
       shift_flags |= 0x08;
       write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags |= 0x08;
+      if (mf2_state & 0x02) {
+        mf2_state |= 0x08;
+        write_byte(0x0040, 0x96, mf2_state);
       } else {
         mf2_flags |= 0x02;
-        }
-      write_byte(0x0040, 0x18, mf2_flags);
+        write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
     case 0xb8: /* Alt release */
       shift_flags &= ~0x08;
       write_byte(0x0040, 0x17, shift_flags);
-      if (mf2_state & 0x01) {
-        mf2_flags &= ~0x08;
+      if (mf2_state & 0x02) {
+        mf2_state &= ~0x08;
+        write_byte(0x0040, 0x96, mf2_state);
       } else {
         mf2_flags &= ~0x02;
-        }
-      write_byte(0x0040, 0x18, mf2_flags);
+        write_byte(0x0040, 0x18, mf2_flags);
+      }
       break;
 
     case 0x45: /* Num Lock press */
-      if ((mf2_state & 0x01) == 0) {
+      if ((mf2_state & 0x03) == 0) {
         mf2_flags |= 0x20;
         write_byte(0x0040, 0x18, mf2_flags);
         shift_flags ^= 0x20;
-        led_flags ^= 0x02;
         write_byte(0x0040, 0x17, shift_flags);
-        write_byte(0x0040, 0x97, led_flags);
-        }
+      }
       break;
     case 0xc5: /* Num Lock release */
-      if ((mf2_state & 0x01) == 0) {
+      if ((mf2_state & 0x03) == 0) {
         mf2_flags &= ~0x20;
         write_byte(0x0040, 0x18, mf2_flags);
-        }
+      }
       break;
 
     case 0x46: /* Scroll Lock press */
       mf2_flags |= 0x10;
       write_byte(0x0040, 0x18, mf2_flags);
       shift_flags ^= 0x10;
-      led_flags ^= 0x01;
       write_byte(0x0040, 0x17, shift_flags);
-      write_byte(0x0040, 0x97, led_flags);
       break;
 
     case 0xc6: /* Scroll Lock release */
@@ -5014,50 +5313,55 @@ int09_function(DI, SI, BP, SP, BX, DX, C
             machine_reset();
         /* Fall through */
     default:
-      if (scancode & 0x80) return; /* toss key releases ... */
+      if (scancode & 0x80) {
+        break; /* toss key releases ... */
+      }
       if (scancode > MAX_SCAN_CODE) {
-        BX_INFO("KBD: int09h_handler(): unknown scancode (%x) read!\n", 
scancode);
+        BX_INFO("KBD: int09h_handler(): unknown scancode read: 0x%02x!\n", 
scancode);
         return;
-        }
+      }
       if (shift_flags & 0x08) { /* ALT */
         asciicode = scan_to_scanascii[scancode].alt;
         scancode = scan_to_scanascii[scancode].alt >> 8;
-        }
-      else if (shift_flags & 0x04) { /* CONTROL */
+      } else if (shift_flags & 0x04) { /* CONTROL */
         asciicode = scan_to_scanascii[scancode].control;
         scancode = scan_to_scanascii[scancode].control >> 8;
-        }
-      else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
-        /* check if lock state should be ignored 
+      } else if (((mf2_state & 0x02) > 0) && ((scancode >= 0x47) && (scancode 
<= 0x53))) {
+        /* extended keys handling */
+        asciicode = 0xe0;
+        scancode = scan_to_scanascii[scancode].normal >> 8;
+      } else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
+        /* check if lock state should be ignored
          * because a SHIFT key are pressed */
-         
+
         if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
           asciicode = scan_to_scanascii[scancode].normal;
           scancode = scan_to_scanascii[scancode].normal >> 8;
-          }
-        else {
+        } else {
           asciicode = scan_to_scanascii[scancode].shift;
           scancode = scan_to_scanascii[scancode].shift >> 8;
-          }
         }
-      else {
+      } else {
         /* check if lock is on */
         if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
           asciicode = scan_to_scanascii[scancode].shift;
           scancode = scan_to_scanascii[scancode].shift >> 8;
-          }
-        else {
+        } else {
           asciicode = scan_to_scanascii[scancode].normal;
           scancode = scan_to_scanascii[scancode].normal >> 8;
-          }
         }
+      }
       if (scancode==0 && asciicode==0) {
         BX_INFO("KBD: int09h_handler(): scancode & asciicode are zero?\n");
-        }
+      }
       enqueue_key(scancode, asciicode);
       break;
-    }
-  mf2_state &= ~0x01;
+  }
+  if ((scancode & 0x7f) != 0x1d) {
+    mf2_state &= ~0x01;
+  }
+  mf2_state &= ~0x02;
+  write_byte(0x0040, 0x96, mf2_state);
 }
 
   unsigned int
@@ -5065,9 +5369,6 @@ enqueue_key(scan_code, ascii_code)
   Bit8u scan_code, ascii_code;
 {
   Bit16u buffer_start, buffer_end, buffer_head, buffer_tail, temp_tail;
-
-  //BX_INFO("KBD:   enqueue_key() called scan:%02x, ascii:%02x\n",
-  //    scan_code, ascii_code);
 
 #if BX_CPU < 2
   buffer_start = 0x001E;
@@ -5118,9 +5419,8 @@ BX_DEBUG_INT74("int74: read byte %02x\n"
   mouse_flags_2 = read_byte(ebda_seg, 0x0027);
 
   if ( (mouse_flags_2 & 0x80) != 0x80 ) {
-      //    BX_PANIC("int74_function:\n");
       return;
-    }
+  }
 
   package_count = mouse_flags_2 & 0x07;
   index = mouse_flags_1 & 0x07;
@@ -5148,10 +5448,10 @@ BX_DEBUG_INT74("int74_function: make_far
 #if BX_USE_ATADRV
 
   void
-int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
-  Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+  Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
 {
-  Bit32u lba;
+  Bit32u lba_low, lba_high;
   Bit16u ebda_seg=read_word(0x0040,0x000E);
   Bit16u cylinder, head, sector;
   Bit16u segment, offset;
@@ -5172,12 +5472,12 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
   // Get the ata channel
   device=read_byte(ebda_seg,&EbdaData->ata.hdidmap[GET_ELDL()-0x80]);
 
-  // basic check : device has to be valid 
+  // basic check : device has to be valid
   if (device >= BX_MAX_ATA_DEVICES) {
     BX_INFO("int13_harddisk: function %02x, unmapped device for ELDL=%02x\n", 
GET_AH(), GET_ELDL());
     goto int13_fail;
     }
-  
+
   switch (GET_AH()) {
 
     case 0x00: /* disk controller reset */
@@ -5195,7 +5495,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       break;
 
     case 0x02: // read disk sectors
-    case 0x03: // write disk sectors 
+    case 0x03: // write disk sectors
     case 0x04: // verify disk sectors
 
       count       = GET_AL();
@@ -5207,10 +5507,10 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       segment = ES;
       offset  = BX;
 
-      if ( (count > 128) || (count == 0) ) {
-        BX_INFO("int13_harddisk: function %02x, count out of 
range!\n",GET_AH());
+      if ((count > 128) || (count == 0) || (sector == 0)) {
+        BX_INFO("int13_harddisk: function %02x, parameter out of 
range!\n",GET_AH());
         goto int13_fail;
-        }
+      }
 
       nlc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].lchs.cylinders);
       nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
@@ -5221,7 +5521,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         BX_INFO("int13_harddisk: function %02x, parameters out of range 
%04x/%04x/%04x!\n", GET_AH(), cylinder, head, sector);
         goto int13_fail;
         }
-      
+
       // FIXME verify
       if ( GET_AH() == 0x04 ) goto int13_success;
 
@@ -5230,14 +5530,15 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
 
       // if needed, translate lchs to lba, and execute command
       if ( (nph != nlh) || (npspt != nlspt)) {
-        lba = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * 
(Bit32u)nlspt) + (Bit32u)sector - 1;
+        lba_low = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * 
(Bit32u)nlspt) + (Bit32u)sector - 1;
+        lba_high = 0;
         sector = 0; // this forces the command to be lba
         }
 
       if ( GET_AH() == 0x02 )
-        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, 
head, sector, lba, segment, offset);
+        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, 
head, sector, lba_low, lba_high, segment, offset);
       else
-        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 
cylinder, head, sector, lba, segment, offset);
+        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 
cylinder, head, sector, lba_low, lba_high, segment, offset);
 
       // Set nb of sector transferred
       SET_AL(read_word(ebda_seg, &EbdaData->ata.trsfsectors));
@@ -5258,7 +5559,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       break;
 
     case 0x08: /* read disk drive parameters */
-      
+
       // Get logical geometry from table
       nlc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].lchs.cylinders);
       nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
@@ -5273,13 +5574,13 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       SET_DL(count); /* FIXME returns 0, 1, or n hard drives */
 
       // FIXME should set ES & DI
-      
+
       goto int13_success;
       break;
 
     case 0x10: /* check drive ready */
       // should look at 40:8E also???
-      
+
       // Read the status from controller
       status = inb(read_word(ebda_seg, 
&EbdaData->ata.channels[device/2].iobase1) + ATA_CB_STAT);
       if ( (status & ( ATA_CB_STAT_BSY | ATA_CB_STAT_RDY )) == ATA_CB_STAT_RDY 
) {
@@ -5293,15 +5594,15 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
 
     case 0x15: /* read disk drive size */
 
-      // Get physical geometry from table
-      npc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].pchs.cylinders);
-      nph   = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
-      npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
+      // Get logical geometry from table
+      nlc   = read_word(ebda_seg, 
&EbdaData->ata.devices[device].lchs.cylinders);
+      nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
+      nlspt = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.spt);
 
       // Compute sector count seen by int13
-      lba = (Bit32u)(npc - 1) * (Bit32u)nph * (Bit32u)npspt;
-      CX = lba >> 16;
-      DX = lba & 0xffff;
+      lba_low = (Bit32u)(nlc - 1) * (Bit32u)nlh * (Bit32u)nlspt;
+      CX = lba_low >> 16;
+      DX = lba_low & 0xffff;
 
       SET_AH(3);  // hard disk accessible
       goto int13_success_noah;
@@ -5322,30 +5623,31 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
       segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
       offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
- 
-      // Can't use 64 bits lba
-      lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
-      if (lba != 0L) {
-        BX_PANIC("int13_harddisk: function %02x. Can't use 64bits 
lba\n",GET_AH());
-        goto int13_fail;
-        }
-
-      // Get 32 bits lba and check
-      lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
-      if (lba >= read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors) 
) {
+
+      // Get 32 msb lba and check
+      lba_high=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
+      if (lba_high > read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_high) ) {
         BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
         goto int13_fail;
         }
 
+      // Get 32 lsb lba and check
+      lba_low=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
+      if (lba_high == read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_high)
+          && lba_low >= read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_low) ) {
+        BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
+        goto int13_fail;
+        }
+
       // If verify or seek
       if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
         goto int13_success;
-      
+
       // Execute the command
       if ( GET_AH() == 0x42 )
-        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, 
lba, segment, offset);
+        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, 
lba_low, lba_high, segment, offset);
       else
-        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, 
lba, segment, offset);
+        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, 
lba_low, lba_high, segment, offset);
 
       count=read_word(ebda_seg, &EbdaData->ata.trsfsectors);
       write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
@@ -5363,7 +5665,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
     case 0x49: // IBM/MS extended media change
       goto int13_success;    // Always success for HD
       break;
-      
+
     case 0x46: // IBM/MS eject media
       SET_AH(0xb2);          // Volume Not Removable
       goto int13_fail_noah;  // Always fail for HD
@@ -5373,7 +5675,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
       size=read_word(DS,SI+(Bit16u)&Int13DPT->size);
 
       // Buffer is too small
-      if(size < 0x1a) 
+      if(size < 0x1a)
         goto int13_fail;
 
       // EDD 1.x
@@ -5383,17 +5685,26 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         npc     = read_word(ebda_seg, 
&EbdaData->ata.devices[device].pchs.cylinders);
         nph     = read_word(ebda_seg, 
&EbdaData->ata.devices[device].pchs.heads);
         npspt   = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
-        lba     = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors);
+        lba_low = read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_low);
+        lba_high = read_dword(ebda_seg, 
&EbdaData->ata.devices[device].sectors_high);
         blksize = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
 
         write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1a);
-        write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is valid
-        write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
+        if (lba_high || (lba_low/npspt)/nph > 0x3fff)
+        {
+          write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x00); // geometry is 
invalid
+          write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, 0x3fff);
+        }
+        else
+        {
+          write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is 
valid
+          write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
+        }
         write_dword(DS, SI+(Bit16u)&Int13DPT->heads, (Bit32u)nph);
         write_dword(DS, SI+(Bit16u)&Int13DPT->spt, (Bit32u)npspt);
-        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba);  // FIXME 
should be Bit64
-        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0L);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);  
+        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba_low);
+        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, lba_high);
+        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
         }
 
       // EDD 2.x
@@ -5403,8 +5714,8 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
 
         write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
 
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, 
&EbdaData->ata.dpte);  
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
 
         // Fill in dpte
         channel = device / 2;
@@ -5414,14 +5725,14 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
         translation = read_byte(ebda_seg, 
&EbdaData->ata.devices[device].translation);
 
-        options  = (translation==ATA_TRANSLATION_NONE?0:1<<3); // chs 
translation
+        options  = (translation==ATA_TRANSLATION_NONE?0:1)<<3; // chs 
translation
         options |= (1<<4); // lba translation
-        options |= (mode==ATA_MODE_PIO32?1:0<<7);
-        options |= (translation==ATA_TRANSLATION_LBA?1:0<<9); 
-        options |= (translation==ATA_TRANSLATION_RECHS?3:0<<9); 
+        options |= (mode==ATA_MODE_PIO32?1:0)<<7;
+        options |= (translation==ATA_TRANSLATION_LBA?1:0)<<9;
+        options |= (translation==ATA_TRANSLATION_RECHS?3:0)<<9;
 
         write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
-        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
         write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 
2))<<4 );
         write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
         write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
@@ -5430,10 +5741,13 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
         write_byte(ebda_seg, &EbdaData->ata.dpte.pio, 0 );
         write_word(ebda_seg, &EbdaData->ata.dpte.options, options);
         write_word(ebda_seg, &EbdaData->ata.dpte.reserved, 0);
-        write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
- 
+        if (size >=0x42)
+          write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
+        else
+          write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x10);
+
         checksum=0;
-        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
(&EbdaData->ata.dpte) + i);
+        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
((Bit8u*)(&EbdaData->ata.dpte)) + i);
         checksum = ~checksum;
         write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
         }
@@ -5459,7 +5773,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
@@ -5472,7 +5786,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
           write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
           write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
@@ -5508,7 +5822,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
     case 0x0d: /* alternate disk reset */
     case 0x11: /* recalibrate */
     case 0x14: /* controller internal diagnostic */
-      BX_INFO("int13h_harddisk function %02xh unimplemented, returns 
success\n", GET_AH());
+      BX_INFO("int13_harddisk: function %02xh unimplemented, returns 
success\n", GET_AH());
       goto int13_success;
       break;
 
@@ -5517,7 +5831,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
     case 0x18: // set media type for format
     case 0x50: // IBM/MS send packet command
     default:
-      BX_INFO("int13_harddisk function %02xh unsupported, returns fail\n", 
GET_AH());
+      BX_INFO("int13_harddisk: function %02xh unsupported, returns fail\n", 
GET_AH());
       goto int13_fail;
       break;
     }
@@ -5553,8 +5867,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
   Bit16u count, segment, offset, i, size;
 
   BX_DEBUG_INT13_CD("int13_cdrom: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", 
AX, BX, CX, DX, ES);
-  // BX_DEBUG_INT13_CD("int13_cdrom: SS=%04x DS=%04x ES=%04x DI=%04x 
SI=%04x\n",get_SS(), DS, ES, DI, SI);
-  
+
   SET_DISK_RET_STATUS(0x00);
 
   /* basic check : device should be 0xE0+ */
@@ -5571,16 +5884,16 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
     BX_INFO("int13_cdrom: function %02x, unmapped device for ELDL=%02x\n", 
GET_AH(), GET_ELDL());
     goto int13_fail;
     }
-  
+
   switch (GET_AH()) {
 
     // all those functions return SUCCESS
     case 0x00: /* disk controller reset */
     case 0x09: /* initialize drive parameters */
     case 0x0c: /* seek to specified cylinder */
-    case 0x0d: /* alternate disk reset */  
-    case 0x10: /* check drive ready */    
-    case 0x11: /* recalibrate */      
+    case 0x0d: /* alternate disk reset */
+    case 0x10: /* check drive ready */
+    case 0x11: /* recalibrate */
     case 0x14: /* controller internal diagnostic */
     case 0x16: /* detect disk change */
       goto int13_success;
@@ -5602,7 +5915,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
       /* set CF if error status read */
       if (status) goto int13_fail_nostatus;
       else        goto int13_success_noah;
-      break;      
+      break;
 
     case 0x15: /* read disk drive size */
       SET_AH(0x02);
@@ -5619,11 +5932,11 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
     case 0x42: // IBM/MS extended read
     case 0x44: // IBM/MS verify sectors
     case 0x47: // IBM/MS extended seek
-       
+
       count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
       segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
       offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
- 
+
       // Can't use 64 bits lba
       lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
       if (lba != 0L) {
@@ -5631,13 +5944,13 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
         goto int13_fail;
         }
 
-      // Get 32 bits lba 
+      // Get 32 bits lba
       lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
 
       // If verify or seek
       if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
         goto int13_success;
-      
+
       memsetb(get_SS(),atacmd,0,12);
       atacmd[0]=0x28;                      // READ command
       atacmd[7]=(count & 0xff00) >> 8;     // Sectors
@@ -5646,7 +5959,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
       atacmd[3]=(lba & 0x00ff0000) >> 16;
       atacmd[4]=(lba & 0x0000ff00) >> 8;
       atacmd[5]=(lba & 0x000000ff);
-      status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, 
ATA_DATA_IN, segment,offset); 
+      status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, 
ATA_DATA_IN, segment,offset);
 
       count = (Bit16u)(read_dword(ebda_seg, &EbdaData->ata.trsfbytes) >> 11);
       write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
@@ -5693,21 +6006,21 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
 
     case 0x46: // IBM/MS eject media
       locks = read_byte(ebda_seg, &EbdaData->ata.devices[device].lock);
-      
+
       if (locks != 0) {
         SET_AH(0xb1); // media locked
         goto int13_fail_noah;
         }
       // FIXME should handle 0x31 no media in device
       // FIXME should handle 0xb5 valid request failed
-    
+
       // Call removable media eject
       ASM_START
         push bp
         mov  bp, sp
 
         mov ah, #0x52
-        int 15
+        int #0x15
         mov _int13_cdrom.status + 2[bp], ah
         jnc int13_cdrom_rme_end
         mov _int13_cdrom.status, #1
@@ -5727,7 +6040,7 @@ int13_cdrom_rme_end:
       size = read_word(DS,SI+(Bit16u)&Int13Ext->size);
 
       // Buffer is too small
-      if(size < 0x1a) 
+      if(size < 0x1a)
         goto int13_fail;
 
       // EDD 1.x
@@ -5742,8 +6055,8 @@ int13_cdrom_rme_end:
         write_dword(DS, SI+(Bit16u)&Int13DPT->heads, 0xffffffff);
         write_dword(DS, SI+(Bit16u)&Int13DPT->spt, 0xffffffff);
         write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, 0xffffffff);  // 
FIXME should be Bit64
-        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);  
+        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);
+        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
         }
 
       // EDD 2.x
@@ -5753,8 +6066,8 @@ int13_cdrom_rme_end:
 
         write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
 
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);  
-        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, 
&EbdaData->ata.dpte);  
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
 
         // Fill in dpte
         channel = device / 2;
@@ -5770,7 +6083,7 @@ int13_cdrom_rme_end:
         options |= (mode==ATA_MODE_PIO32?1:0<<7);
 
         write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
-        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
         write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 
2))<<4 );
         write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
         write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
@@ -5782,7 +6095,7 @@ int13_cdrom_rme_end:
         write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
 
         checksum=0;
-        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
(&EbdaData->ata.dpte) + i);
+        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, 
((Bit8u*)(&EbdaData->ata.dpte)) + i);
         checksum = ~checksum;
         write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
         }
@@ -5808,7 +6121,7 @@ int13_cdrom_rme_end:
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
           write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
@@ -5821,7 +6134,7 @@ int13_cdrom_rme_end:
           write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
           write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
           }
-        else { 
+        else {
           // FIXME PCI
           }
         write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
@@ -5843,7 +6156,7 @@ int13_cdrom_rme_end:
       SET_AH(06);
       goto int13_fail_nostatus;
       break;
-      
+
     case 0x4e: // // IBM/MS set hardware configuration
       // DMA, prefetch, PIO maximum not supported
       switch (GET_AL()) {
@@ -5905,7 +6218,7 @@ int13_eltorito(DS, ES, DI, SI, BP, SP, B
 
   BX_DEBUG_INT13_ET("int13_eltorito: AX=%04x BX=%04x CX=%04x DX=%04x 
ES=%04x\n", AX, BX, CX, DX, ES);
   // BX_DEBUG_INT13_ET("int13_eltorito: SS=%04x DS=%04x ES=%04x DI=%04x 
SI=%04x\n",get_SS(), DS, ES, DI, SI);
-  
+
   switch (GET_AH()) {
 
     // FIXME ElTorito Various. Should be implemented
@@ -5980,11 +6293,10 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
   Bit8u  atacmd[12];
 
   BX_DEBUG_INT13_ET("int13_cdemu: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", 
AX, BX, CX, DX, ES);
-  //BX_DEBUG_INT13_ET("int13_cdemu: SS=%04x ES=%04x DI=%04x SI=%04x\n", 
get_SS(), ES, DI, SI);
-  
+
   /* at this point, we are emulating a floppy/harddisk */
-  
-  // Recompute the device number 
+
+  // Recompute the device number
   device  = read_byte(ebda_seg,&EbdaData->cdemu.controller_index) * 2;
   device += read_byte(ebda_seg,&EbdaData->cdemu.device_spec);
 
@@ -5997,7 +6309,6 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
     goto int13_fail;
     }
 
-  
   switch (GET_AH()) {
 
     // all those functions return SUCCESS
@@ -6006,7 +6317,7 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
     case 0x0c: /* seek to specified cylinder */
     case 0x0d: /* alternate disk reset */  // FIXME ElTorito Various. should 
really reset ?
     case 0x10: /* check drive ready */     // FIXME ElTorito Various. should 
check if ready ?
-    case 0x11: /* recalibrate */      
+    case 0x11: /* recalibrate */
     case 0x14: /* controller internal diagnostic */
     case 0x16: /* detect disk change */
       goto int13_success;
@@ -6031,9 +6342,9 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
 
     case 0x02: // read disk sectors
     case 0x04: // verify disk sectors
-      vspt       = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt); 
-      vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders); 
-      vheads     = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads); 
+      vspt       = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+      vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders);
+      vheads     = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads);
 
       ilba       = read_dword(ebda_seg,&EbdaData->cdemu.ilba);
 
@@ -6062,17 +6373,17 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
 
       // calculate the virtual lba inside the image
       
vlba=((((Bit32u)cylinder*(Bit32u)vheads)+(Bit32u)head)*(Bit32u)vspt)+((Bit32u)(sector-1));
- 
+
       // In advance so we don't loose the count
       SET_AL(nbsectors);
 
       // start lba on cd
-      slba  = (Bit32u)vlba/4; 
+      slba  = (Bit32u)vlba/4;
       before= (Bit16u)vlba%4;
 
       // end lba on cd
       elba = (Bit32u)(vlba+nbsectors-1)/4;
-      
+
       memsetb(get_SS(),atacmd,0,12);
       atacmd[0]=0x28;                      // READ command
       atacmd[7]=((Bit16u)(elba-slba+1) & 0xff00) >> 8; // Sectors
@@ -6092,10 +6403,10 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
       break;
 
     case 0x08: /* read disk drive parameters */
-      vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt); 
-      vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1; 
-      vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1; 
- 
+      vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+      vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1;
+      vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1;
+
       SET_AL( 0x00 );
       SET_BL( 0x00 );
       SET_CH( vcylinders & 0xff );
@@ -6103,7 +6414,7 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
       SET_DH( vheads );
       SET_DL( 0x02 );   // FIXME ElTorito Various. should send the real count 
of drives 1 or 2
                         // FIXME ElTorito Harddisk. should send the HD count
- 
+
       switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
         case 0x01: SET_BL( 0x02 ); break;
         case 0x02: SET_BL( 0x04 ); break;
@@ -6139,7 +6450,7 @@ ASM_END
     case 0x45: // IBM/MS lock/unlock drive
     case 0x46: // IBM/MS eject media
     case 0x47: // IBM/MS extended seek
-    case 0x48: // IBM/MS get drive parameters 
+    case 0x48: // IBM/MS get drive parameters
     case 0x49: // IBM/MS extended media change
     case 0x4e: // ? - set hardware configuration
     case 0x50: // ? - send packet command
@@ -6227,8 +6538,8 @@ ASM_END
 }
 
   void
-int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
-  Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+  Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
 {
   Bit8u    drive, num_sectors, sector, head, status, mod;
   Bit8u    drive_map;
@@ -6334,7 +6645,7 @@ BX_DEBUG_INT13_HD("int13_f01\n");
         }
 
       if ( (num_sectors > 128) || (num_sectors == 0) )
-        BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+        BX_PANIC("int13_harddisk: num_sectors out of range!\n");
 
       if (head > 15)
         BX_PANIC("hard drive BIOS:(read/verify) head > 15\n");
@@ -6480,7 +6791,7 @@ BX_DEBUG_INT13_HD("int13_f03\n");
         }
 
       if ( (num_sectors > 128) || (num_sectors == 0) )
-        BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+        BX_PANIC("int13_harddisk: num_sectors out of range!\n");
 
       if (head > 15)
         BX_PANIC("hard drive BIOS:(read) head > 15\n");
@@ -6590,7 +6901,7 @@ BX_DEBUG_INT13_HD("int13_f05\n");
 
     case 0x08: /* read disk drive parameters */
 BX_DEBUG_INT13_HD("int13_f08\n");
-      
+
       drive = GET_ELDL ();
       get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
 
@@ -6730,10 +7041,10 @@ ASM_END
       break;
 
     case 0x18: // set media type for format
-    case 0x41: // IBM/MS 
-    case 0x42: // IBM/MS 
-    case 0x43: // IBM/MS 
-    case 0x44: // IBM/MS 
+    case 0x41: // IBM/MS
+    case 0x42: // IBM/MS
+    case 0x43: // IBM/MS
+    case 0x44: // IBM/MS
     case 0x45: // IBM/MS lock/unlock drive
     case 0x46: // IBM/MS eject media
     case 0x47: // IBM/MS extended seek
@@ -6778,7 +7089,7 @@ get_hd_geometry(drive, hd_cylinders, hd_
     hd_type = inb_cmos(0x12) & 0x0f;
     if (hd_type != 0x0f)
       BX_INFO(panic_msg_reg12h,1);
-    hd_type = inb_cmos(0x1a); // HD0: extended type
+    hd_type = inb_cmos(0x1a); // HD1: extended type
     if (hd_type != 47)
       BX_INFO(panic_msg_reg19h,0,0x1a);
     iobase = 0x24;
@@ -6797,10 +7108,71 @@ get_hd_geometry(drive, hd_cylinders, hd_
 
 #endif //else BX_USE_ATADRV
 
+#if BX_SUPPORT_FLOPPY
 
 //////////////////////
 // FLOPPY functions //
 //////////////////////
+
+void floppy_reset_controller()
+{
+  Bit8u val8;
+
+  // Reset controller
+  val8 = inb(0x03f2);
+  outb(0x03f2, val8 & ~0x04);
+  outb(0x03f2, val8 | 0x04);
+
+  // Wait for controller to come out of reset
+  do {
+    val8 = inb(0x3f4);
+  } while ( (val8 & 0xc0) != 0x80 );
+}
+
+void floppy_prepare_controller(drive)
+  Bit16u drive;
+{
+  Bit8u  val8, dor, prev_reset;
+
+  // set 40:3e bit 7 to 0
+  val8 = read_byte(0x0040, 0x003e);
+  val8 &= 0x7f;
+  write_byte(0x0040, 0x003e, val8);
+
+  // turn on motor of selected drive, DMA & int enabled, normal operation
+  prev_reset = inb(0x03f2) & 0x04;
+  if (drive)
+    dor = 0x20;
+  else
+    dor = 0x10;
+  dor |= 0x0c;
+  dor |= drive;
+  outb(0x03f2, dor);
+
+  // reset the disk motor timeout value of INT 08
+  write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
+
+  // wait for drive readiness
+  do {
+    val8 = inb(0x3f4);
+  } while ( (val8 & 0xc0) != 0x80 );
+
+  if (prev_reset == 0) {
+    // turn on interrupts
+ASM_START
+    sti
+ASM_END
+    // wait on 40:3e bit 7 to become 1
+    do {
+      val8 = read_byte(0x0040, 0x003e);
+    } while ( (val8 & 0x80) == 0 );
+    val8 &= 0x7f;
+ASM_START
+    cli
+ASM_END
+    write_byte(0x0040, 0x003e, val8);
+  }
+}
 
   bx_bool
 floppy_media_known(drive)
@@ -6908,7 +7280,7 @@ floppy_media_sense(drive)
     retval = 1;
     }
   //
-  // Extended floppy size uses special cmos setting 
+  // Extended floppy size uses special cmos setting
   else if ( drive_type == 6 ) {
     // 160k 5.25" drive
     config_data = 0x00; // 0000 0000
@@ -6949,63 +7321,41 @@ floppy_drive_recal(drive)
 floppy_drive_recal(drive)
   Bit16u drive;
 {
-  Bit8u  val8, dor;
+  Bit8u  val8;
   Bit16u curr_cyl_offset;
 
-  // set 40:3e bit 7 to 0
-  val8 = read_byte(0x0000, 0x043e);
-  val8 &= 0x7f;
-  write_byte(0x0000, 0x043e, val8);
-
-  // turn on motor of selected drive, DMA & int enabled, normal operation
-  if (drive)
-    dor = 0x20;
-  else
-    dor = 0x10;
-  dor |= 0x0c;
-  dor |= drive;
-  outb(0x03f2, dor);
-
-  // reset the disk motor timeout value of INT 08
-  write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-  // check port 3f4 for drive readiness
-  val8 = inb(0x3f4);
-  if ( (val8 & 0xf0) != 0x80 )
-    BX_PANIC("floppy recal:f07: ctrl not ready\n");
+  floppy_prepare_controller(drive);
 
   // send Recalibrate command (2 bytes) to controller
   outb(0x03f5, 0x07);  // 07: Recalibrate
   outb(0x03f5, drive); // 0=drive0, 1=drive1
 
- // turn on interrupts
+  // turn on interrupts
 ASM_START
   sti
 ASM_END
 
   // wait on 40:3e bit 7 to become 1
-  val8 = (read_byte(0x0000, 0x043e) & 0x80);
-  while ( val8 == 0 ) {
-    val8 = (read_byte(0x0000, 0x043e) & 0x80);
-    }
-
- val8 = 0; // separate asm from while() loop
- // turn off interrupts
+  do {
+    val8 = (read_byte(0x0040, 0x003e) & 0x80);
+  } while ( val8 == 0 );
+
+  val8 = 0; // separate asm from while() loop
+  // turn off interrupts
 ASM_START
   cli
 ASM_END
 
   // set 40:3e bit 7 to 0, and calibrated bit
-  val8 = read_byte(0x0000, 0x043e);
+  val8 = read_byte(0x0040, 0x003e);
   val8 &= 0x7f;
   if (drive) {
     val8 |= 0x02; // Drive 1 calibrated
     curr_cyl_offset = 0x0095;
-    }
-  else {
+  } else {
     val8 |= 0x01; // Drive 0 calibrated
     curr_cyl_offset = 0x0094;
-    }
+  }
   write_byte(0x0040, 0x003e, val8);
   write_byte(0x0040, curr_cyl_offset, 0); // current cylinder is 0
 
@@ -7032,7 +7382,6 @@ floppy_drive_exists(drive)
     return(1);
 }
 
-#if BX_SUPPORT_FLOPPY
   void
 int13_diskette_function(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, 
FLAGS)
   Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
@@ -7045,7 +7394,6 @@ int13_diskette_function(DS, ES, DI, SI, 
   Bit16u es, last_addr;
 
   BX_DEBUG_INT13_FL("int13_diskette: AX=%04x BX=%04x CX=%04x DX=%04x 
ES=%04x\n", AX, BX, CX, DX, ES);
-  // BX_DEBUG_INT13_FL("int13_diskette: SS=%04x DS=%04x ES=%04x DI=%04x 
SI=%04x\n",get_SS(), get_DS(), ES, DI, SI);
 
   ah = GET_AH();
 
@@ -7058,7 +7406,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
         set_diskette_ret_status(1);
         SET_CF();
         return;
-        }
+      }
       drive_type = inb_cmos(0x10);
 
       if (drive == 0)
@@ -7070,7 +7418,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
         set_diskette_ret_status(0x80);
         SET_CF();
         return;
-        }
+      }
       SET_AH(0);
       set_diskette_ret_status(0);
       CLEAR_CF(); // successful
@@ -7083,7 +7431,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
       SET_AH(val8);
       if (val8) {
         SET_CF();
-        }
+      }
       return;
 
     case 0x02: // Read Diskette Sectors
@@ -7095,15 +7443,15 @@ BX_DEBUG_INT13_FL("floppy f00\n");
       head        = GET_DH();
       drive       = GET_ELDL();
 
-      if ( (drive > 1) || (head > 1) ||
-           (num_sectors == 0) || (num_sectors > 72) ) {
-BX_INFO("floppy: drive>1 || head>1 ...\n");
+      if ((drive > 1) || (head > 1) || (sector == 0) ||
+          (num_sectors == 0) || (num_sectors > 72)) {
+        BX_INFO("int13_diskette: read/write/verify: parameter out of range\n");
         SET_AH(1);
         set_diskette_ret_status(1);
         SET_AL(0); // no sectors read
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       // see if drive exists
       if (floppy_drive_exists(drive) == 0) {
@@ -7112,7 +7460,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         SET_AL(0); // no sectors read
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       // see if media in drive, and type is known
       if (floppy_media_known(drive) == 0) {
@@ -7122,8 +7470,8 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
         }
+      }
 
       if (ah == 0x02) {
         // Read Diskette Sectors
@@ -7142,7 +7490,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         if ( base_address < base_es ) {
           // in case of carry, adjust page by 1
           page++;
-          }
+        }
         base_count = (num_sectors * 512) - 1;
 
         // check for 64K boundary overrun
@@ -7153,7 +7501,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
+        }
 
         BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
         outb(0x000a, 0x06);
@@ -7186,28 +7534,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         //--------------------------------------
         // set up floppy controller for transfer
         //--------------------------------------
-
-        // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
-        val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
-
-        // turn on motor of selected drive, DMA & int enabled, normal operation
-        if (drive)
-          dor = 0x20;
-        else
-          dor = 0x10;
-        dor |= 0x0c;
-        dor |= drive;
-        outb(0x03f2, dor);
-
-        // reset the disk motor timeout value of INT 08
-        write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-        // check port 3f4 for drive readiness
-        val8 = inb(0x3f4);
-        if ( (val8 & 0xf0) != 0x80 )
-          BX_PANIC("int13_diskette:f02: ctrl not ready\n");
+        floppy_prepare_controller(drive);
 
         // send read-normal-data command (9 bytes) to controller
         outb(0x03f5, 0xe6); // e6: read normal data
@@ -7220,27 +7547,35 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         outb(0x03f5, 0); // Gap length
         outb(0x03f5, 0xff); // Gap length
 
-       // turn on interrupts
+        // turn on interrupts
   ASM_START
         sti
   ASM_END
 
         // wait on 40:3e bit 7 to become 1
-        val8 = (read_byte(0x0000, 0x043e) & 0x80);
-        while ( val8 == 0 ) {
-          val8 = (read_byte(0x0000, 0x043e) & 0x80);
+        do {
+          val8 = read_byte(0x0040, 0x0040);
+          if (val8 == 0) {
+            floppy_reset_controller();
+            SET_AH(0x80); // drive not ready (timeout)
+            set_diskette_ret_status(0x80);
+            SET_AL(0); // no sectors read
+            SET_CF(); // error occurred
+            return;
           }
-
-       val8 = 0; // separate asm from while() loop
-       // turn off interrupts
+          val8 = (read_byte(0x0040, 0x003e) & 0x80);
+        } while ( val8 == 0 );
+
+        val8 = 0; // separate asm from while() loop
+        // turn off interrupts
   ASM_START
         cli
   ASM_END
 
         // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
+        val8 = read_byte(0x0040, 0x003e);
         val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
+        write_byte(0x0040, 0x003e, val8);
 
         // check port 3f4 for accessibility to status bytes
         val8 = inb(0x3f4);
@@ -7271,7 +7606,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
+        }
 
         // ??? should track be new val from return_status[3] ?
         set_diskette_current_cyl(drive, track);
@@ -7279,8 +7614,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         SET_AH(0x00); // success
         CLEAR_CF();   // success
         return;
-        }
-      else if (ah == 0x03) {
+      } else if (ah == 0x03) {
         // Write Diskette Sectors
 
         //-----------------------------------
@@ -7297,7 +7631,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         if ( base_address < base_es ) {
           // in case of carry, adjust page by 1
           page++;
-          }
+        }
         base_count = (num_sectors * 512) - 1;
 
         // check for 64K boundary overrun
@@ -7308,7 +7642,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
+        }
 
         BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
         outb(0x000a, 0x06);
@@ -7334,30 +7668,9 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         //--------------------------------------
         // set up floppy controller for transfer
         //--------------------------------------
-
-        // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
-        val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
-
-        // turn on motor of selected drive, DMA & int enabled, normal operation
-        if (drive)
-          dor = 0x20;
-        else
-          dor = 0x10;
-        dor |= 0x0c;
-        dor |= drive;
-        outb(0x03f2, dor);
-
-        // reset the disk motor timeout value of INT 08
-        write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-        // check port 3f4 for drive readiness
-        val8 = inb(0x3f4);
-        if ( (val8 & 0xf0) != 0x80 )
-          BX_PANIC("int13_diskette:f03: ctrl not ready\n");
-
-        // send read-normal-data command (9 bytes) to controller
+        floppy_prepare_controller(drive);
+
+        // send write-normal-data command (9 bytes) to controller
         outb(0x03f5, 0xc5); // c5: write normal data
         outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
         outb(0x03f5, track);
@@ -7368,27 +7681,35 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         outb(0x03f5, 0); // Gap length
         outb(0x03f5, 0xff); // Gap length
 
-       // turn on interrupts
+        // turn on interrupts
   ASM_START
         sti
   ASM_END
 
         // wait on 40:3e bit 7 to become 1
-        val8 = (read_byte(0x0000, 0x043e) & 0x80);
-        while ( val8 == 0 ) {
-          val8 = (read_byte(0x0000, 0x043e) & 0x80);
+        do {
+          val8 = read_byte(0x0040, 0x0040);
+          if (val8 == 0) {
+            floppy_reset_controller();
+            SET_AH(0x80); // drive not ready (timeout)
+            set_diskette_ret_status(0x80);
+            SET_AL(0); // no sectors written
+            SET_CF(); // error occurred
+            return;
           }
-
-       val8 = 0; // separate asm from while() loop
-       // turn off interrupts
+          val8 = (read_byte(0x0040, 0x003e) & 0x80);
+        } while ( val8 == 0 );
+
+        val8 = 0; // separate asm from while() loop
+        // turn off interrupts
   ASM_START
         cli
   ASM_END
 
         // set 40:3e bit 7 to 0
-        val8 = read_byte(0x0000, 0x043e);
+        val8 = read_byte(0x0040, 0x003e);
         val8 &= 0x7f;
-        write_byte(0x0000, 0x043e, val8);
+        write_byte(0x0040, 0x003e, val8);
 
         // check port 3f4 for accessibility to status bytes
         val8 = inb(0x3f4);
@@ -7432,8 +7753,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         SET_AH(0x00); // success
         CLEAR_CF();   // success
         return;
-        }
-      else {  // if (ah == 0x04)
+      } else {  // if (ah == 0x04)
         // Verify Diskette Sectors
 
         // ??? should track be new val from return_status[3] ?
@@ -7442,8 +7762,8 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
         CLEAR_CF();   // success
         SET_AH(0x00); // success
         return;
-        }
-
+      }
+      break;
 
     case 0x05: // format diskette track
 BX_DEBUG_INT13_FL("floppy f05\n");
@@ -7458,7 +7778,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
         SET_AH(1);
         set_diskette_ret_status(1);
         SET_CF(); // error occurred
-        }
+      }
 
       // see if drive exists
       if (floppy_drive_exists(drive) == 0) {
@@ -7466,7 +7786,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
         set_diskette_ret_status(0x80);
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       // see if media in drive, and type is known
       if (floppy_media_known(drive) == 0) {
@@ -7476,8 +7796,8 @@ BX_DEBUG_INT13_FL("floppy f05\n");
           SET_AL(0); // no sectors read
           SET_CF(); // error occurred
           return;
-          }
         }
+      }
 
       // set up DMA controller for transfer
       page = (ES >> 12);   // upper 4 bits
@@ -7487,7 +7807,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
       if ( base_address < base_es ) {
         // in case of carry, adjust page by 1
         page++;
-        }
+      }
       base_count = (num_sectors * 4) - 1;
 
       // check for 64K boundary overrun
@@ -7498,7 +7818,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
         SET_AL(0); // no sectors read
         SET_CF(); // error occurred
         return;
-        }
+      }
 
       outb(0x000a, 0x06);
       outb(0x000c, 0x00); // clear flip-flop
@@ -7515,27 +7835,9 @@ BX_DEBUG_INT13_FL("floppy f05\n");
       outb(0x000a, 0x02);
 
       // set up floppy controller for transfer
-      val8 = read_byte(0x0000, 0x043e);
-      val8 &= 0x7f;
-      write_byte(0x0000, 0x043e, val8);
-      // turn on motor of selected drive, DMA & int enabled, normal operation
-      if (drive)
-        dor = 0x20;
-      else
-        dor = 0x10;
-      dor |= 0x0c;
-      dor |= drive;
-      outb(0x03f2, dor);
-
-      // reset the disk motor timeout value of INT 08
-      write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
-      // check port 3f4 for drive readiness
-      val8 = inb(0x3f4);
-      if ( (val8 & 0xf0) != 0x80 )
-        BX_PANIC("int13_diskette:f05: ctrl not ready\n");
-
-      // send read-normal-data command (6 bytes) to controller
+      floppy_prepare_controller(drive);
+
+      // send format-track command (6 bytes) to controller
       outb(0x03f5, 0x4d); // 4d: format track
       outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
       outb(0x03f5, 2); // 512 byte sector size
@@ -7546,20 +7848,29 @@ BX_DEBUG_INT13_FL("floppy f05\n");
   ASM_START
       sti
   ASM_END
+
       // wait on 40:3e bit 7 to become 1
-      val8 = (read_byte(0x0000, 0x043e) & 0x80);
-      while ( val8 == 0 ) {
-        val8 = (read_byte(0x0000, 0x043e) & 0x80);
+      do {
+        val8 = read_byte(0x0040, 0x0040);
+        if (val8 == 0) {
+          floppy_reset_controller();
+          SET_AH(0x80); // drive not ready (timeout)
+          set_diskette_ret_status(0x80);
+          SET_CF(); // error occurred
+          return;
         }
-     val8 = 0; // separate asm from while() loop
-     // turn off interrupts
+        val8 = (read_byte(0x0040, 0x003e) & 0x80);
+      } while ( val8 == 0 );
+
+      val8 = 0; // separate asm from while() loop
+      // turn off interrupts
   ASM_START
       cli
   ASM_END
       // set 40:3e bit 7 to 0
-      val8 = read_byte(0x0000, 0x043e);
+      val8 = read_byte(0x0040, 0x003e);
       val8 &= 0x7f;
-      write_byte(0x0000, 0x043e, val8);
+      write_byte(0x0040, 0x003e, val8);
       // check port 3f4 for accessibility to status bytes
       val8 = inb(0x3f4);
       if ( (val8 & 0xc0) != 0xc0 )
@@ -7911,8 +8222,9 @@ Bit16u seq_nr;
   Bit16u bootseg;
   Bit16u bootip;
   Bit16u status;
-
-  struct ipl_entry e;
+  Bit16u bootfirst;
+
+  ipl_entry_t e;
 
   // if BX_ELTORITO_BOOT is not defined, old behavior
   //   check bit 5 in CMOS reg 0x2d.  load either 0x00 or 0x80 into DL
@@ -7926,7 +8238,7 @@ Bit16u seq_nr;
   //     CMOS reg 0x38 & 0xf0 : 3rd boot device
   //   boot device codes:
   //     0x00 : not defined
-  //     0x01 : first floppy 
+  //     0x01 : first floppy
   //     0x02 : first harddrive
   //     0x03 : first cdrom
   //     0x04 - 0x0f : PnP expansion ROMs (e.g. Etherboot)
@@ -7938,16 +8250,25 @@ Bit16u seq_nr;
   bootdev |= ((inb_cmos(0x38) & 0xf0) << 4);
   bootdev >>= 4 * seq_nr;
   bootdev &= 0xf;
-  if (bootdev == 0) BX_PANIC("No bootable device.\n");
-  
+
+  /* Read user selected device */
+  bootfirst = read_word(ebda_seg, IPL_BOOTFIRST_OFFSET);
+  if (bootfirst != 0xFFFF) {
+    bootdev = bootfirst;
+    /* User selected device not set */
+    write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
+    /* Reset boot sequence */
+    write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xFFFF);
+  } else if (bootdev == 0) BX_PANIC("No bootable device.\n");
+
   /* Translate from CMOS runes to an IPL table offset by subtracting 1 */
   bootdev -= 1;
-#else  
+#else
   if (seq_nr ==2) BX_PANIC("No more boot devices.");
-  if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1)) 
+  if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1))
       /* Boot from floppy if the bit is set or it's the second boot */
     bootdev = 0x00;
-  else 
+  else
     bootdev = 0x01;
 #endif
 
@@ -7959,13 +8280,13 @@ Bit16u seq_nr;
 
   /* Do the loading, and set up vector as a far pointer to the boot
    * address, and bootdrv as the boot drive */
-  print_boot_device(e.type);
+  print_boot_device(&e);
 
   switch(e.type) {
-  case 0x01: /* FDD */
-  case 0x02: /* HDD */
-
-    bootdrv = (e.type == 0x02) ? 0x80 : 0x00;
+  case IPL_TYPE_FLOPPY: /* FDD */
+  case IPL_TYPE_HARDDISK: /* HDD */
+
+    bootdrv = (e.type == IPL_TYPE_HARDDISK) ? 0x80 : 0x00;
     bootseg = 0x07c0;
     status = 0;
 
@@ -7980,7 +8301,7 @@ ASM_START
     mov  dl, _int18_function.bootdrv + 2[bp]
     mov  ax, _int18_function.bootseg + 2[bp]
     mov  es, ax         ;; segment
-    mov  bx, #0x0000    ;; offset
+    xor  bx, bx         ;; offset
     mov  ah, #0x02      ;; function 2, read diskette sector
     mov  al, #0x01      ;; read 1 sector
     mov  ch, #0x00      ;; track 0
@@ -7998,7 +8319,7 @@ int19_load_done:
     pop  ax
     pop  bp
 ASM_END
-    
+
     if (status != 0) {
       print_boot_failure(e.type, 1);
       return;
@@ -8006,7 +8327,7 @@ ASM_END
 
     /* Always check the signature on a HDD boot sector; on FDD, only do
      * the check if the CMOS doesn't tell us to skip it */
-    if (e.type != 0x00 || !((inb_cmos(0x38) & 0x01))) {
+    if ((e.type != IPL_TYPE_FLOPPY) || !((inb_cmos(0x38) & 0x01))) {
       if (read_word(bootseg,0x1fe) != 0xaa55) {
         print_boot_failure(e.type, 0);
         return;
@@ -8024,7 +8345,7 @@ ASM_END
   break;
 
 #if BX_ELTORITO_BOOT
-  case 0x03: /* CD-ROM */
+  case IPL_TYPE_CDROM: /* CD-ROM */
     status = cdrom_boot();
 
     // If failure
@@ -8043,7 +8364,7 @@ ASM_END
     break;
 #endif
 
-  case 0x80: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */
+  case IPL_TYPE_BEV: /* Expansion ROM with a Bootstrap Entry Vector (a far 
pointer) */
     bootseg = e.vector >> 16;
     bootip = e.vector & 0xffff;
     break;
@@ -8051,16 +8372,20 @@ ASM_END
   default: return;
   }
 
-  
+  /* Debugging info */
+  BX_INFO("Booting from %x:%x\n", bootseg, bootip);
+
   /* Jump to the boot vector */
 ASM_START
     mov  bp, sp
+//    push cs
+//    push #int18_handler
     ;; Build an iret stack frame that will take us to the boot vector.
     ;; iret pops ip, then cs, then flags, so push them in the opposite order.
     pushf
-    mov  ax, _int18_function.bootseg + 0[bp] 
+    mov  ax, _int18_function.bootseg + 0[bp]
     push ax
-    mov  ax, _int18_function.bootip + 0[bp] 
+    mov  ax, _int18_function.bootip + 0[bp]
     push ax
     ;; Set the magic number in ax and the boot drive in dl.
     mov  ax, #0xaa55
@@ -8263,7 +8588,11 @@ int1a_function(regs, ds, iret_addr)
       } else if (regs.u.r8.bl == 0x83) {
         BX_INFO("bad PCI vendor ID %04x\n", regs.u.r16.dx);
       } else if (regs.u.r8.bl == 0x86) {
-        BX_INFO("PCI device %04x:%04x not found\n", regs.u.r16.dx, 
regs.u.r16.cx);
+        if (regs.u.r8.al == 0x02) {
+          BX_INFO("PCI device %04x:%04x not found at index %d\n", 
regs.u.r16.dx, regs.u.r16.cx, regs.u.r16.si);
+        } else {
+          BX_INFO("no PCI device with class code 0x%02x%04x found at index 
%d\n", regs.u.r8.cl, regs.u.r16.dx, regs.u.r16.si);
+        }
       }
       regs.u.r8.ah = regs.u.r8.bl;
       SetCF(iret_addr.flags);
@@ -8309,11 +8638,11 @@ ASM_END
           // Done waiting.
           Bit16u segment, offset;
 
-          offset = read_word( 0x40, 0x98 );
-          segment = read_word( 0x40, 0x9A );
+          segment = read_word( 0x40, 0x98 );
+          offset = read_word( 0x40, 0x9A );
           write_byte( 0x40, 0xA0, 0 );  // Turn of status byte.
           outb_cmos( 0xB, registerB & 0x37 ); // Clear the Periodic Interrupt.
-          write_byte( segment, offset, 0x80 );  // Write to specified flag 
byte.
+          write_byte(segment, offset, read_byte(segment, offset) | 0x80 );  // 
Write to specified flag byte.
         } else {
           // Continue waiting.
           time -= 0x3D1;
@@ -8521,13 +8850,18 @@ int13_notcdrom:
 #endif
 
 int13_disk:
+  ;; int13_harddisk modifies high word of EAX
+  shr   eax, #16
+  push  ax
   call  _int13_harddisk
+  pop   ax
+  shl   eax, #16
 
 int13_out:
   pop ds
   pop es
   popa
-  iret 
+  iret
 
 ;----------
 ;- INT18h -
@@ -8540,18 +8874,19 @@ int18_handler: ;; Boot Failure recovery:
   xor  ax, ax
   mov  ss, ax
 
-  ;; Get the boot sequence number out of the IPL memory
   ;; The first time we do this it will have been set to -1 so 
   ;; we will start from device 0.
-  mov  bx, #IPL_SEG 
+  mov  ds, ax
+  mov  bx, word ptr [0x40E]       ;; EBDA segment
   mov  ds, bx                     ;; Set segment
   mov  bx, IPL_SEQUENCE_OFFSET    ;; BX is now the sequence number
   inc  bx                         ;; ++
   mov  IPL_SEQUENCE_OFFSET, bx    ;; Write it back
-  mov  ds, ax                     ;; and reset the segment to zero. 
+  mov  ds, ax                     ;; and reset the segment to zero.
 
   ;; Call the C code for the next boot device
   push bx
+
   call _int18_function
 
   ;; Boot failed: invoke the boot recovery function...
@@ -8561,6 +8896,7 @@ int18_handler: ;; Boot Failure recovery:
 ;- INT19h -
 ;----------
 int19_relocated: ;; Boot function, relocated
+
   ;;
   ;; *** Warning: INT 19h resets the whole machine *** 
   ;;
@@ -8572,10 +8908,12 @@ int19_relocated: ;; Boot function, reloc
   ;; boot sequence will start, which is more or less the required behaviour.
   ;; 
   ;; Reset SP and SS
+
   mov  ax, #0xfffe
   mov  sp, ax
   xor  ax, ax
   mov  ss, ax
+
   call _machine_reset
 
 ;----------
@@ -8589,7 +8927,7 @@ int1c_handler: ;; User Timer Tick
 ;- POST: Floppy Drive -
 ;----------------------
 floppy_drive_post:
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
 
   mov  al, #0x00
@@ -8671,7 +9009,7 @@ hard_drive_post:
   mov  dx, #0x03f6
   out  dx, al
 
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
   mov  0x0474, al /* hard disk status of last operation */
   mov  0x0477, al /* hard disk port offset (XT only ???) */
@@ -8686,8 +9024,8 @@ hard_drive_post:
   SET_INT_VECTOR(0x76, #0xF000, #int76_handler)
   ;; INT 41h: hard disk 0 configuration pointer
   ;; INT 46h: hard disk 1 configuration pointer
-  SET_INT_VECTOR(0x41, #EBDA_SEG, #0x003D)
-  SET_INT_VECTOR(0x46, #EBDA_SEG, #0x004D)
+  SET_INT_VECTOR(0x41, word ptr [0x40E], #0x003D) /* EBDA:003D */
+  SET_INT_VECTOR(0x46, word ptr [0x40E], #0x004D) /* EBDA:004D */
 
   ;; move disk geometry data from CMOS to EBDA disk parameter table(s)
   mov  al, #0x12
@@ -8716,7 +9054,9 @@ post_d0_type47:
   ;; 22    landing zone high        D
   ;; 23    sectors/track            E
 
-  mov  ax, #EBDA_SEG
+  xor  ax, ax
+  mov  ds, ax
+  mov  ax, word ptr [0x40E] ;; EBDA segment
   mov  ds, ax
 
   ;;; Filling EBDA table for hard disk 0.
@@ -8862,7 +9202,9 @@ post_d1_type47:
   ;; 0x2b    landing zone high        D
   ;; 0x2c    sectors/track            E
 ;;; Fill EBDA table for hard disk 1.
-  mov  ax, #EBDA_SEG
+  xor  ax, ax
+  mov  ds, ax
+  mov  ax, word ptr [0x40E] ;; EBDA segment
   mov  ds, ax
   mov  al, #0x28
   out  #0x70, al
@@ -8993,13 +9335,42 @@ ebda_post:
 ;--------------------
 ; relocated here because the primary POST area isnt big enough.
 eoi_jmp_post:
-  call eoi_both_pics
-
+  mov   al, #0x20
+  out   #0xA0, al ;; slave  PIC EOI
+  mov   al, #0x20
+  out   #0x20, al ;; master PIC EOI
+
+jmp_post_0x467:
   xor ax, ax
   mov ds, ax
 
   jmp far ptr [0x467]
 
+iret_post_0x467:
+  xor ax, ax
+  mov ds, ax
+
+  mov sp, [0x467]
+  mov ss, [0x469]
+  iret
+
+retf_post_0x467:
+  xor ax, ax
+  mov ds, ax
+
+  mov sp, [0x467]
+  mov ss, [0x469]
+  retf
+
+s3_post:
+#if BX_ROMBIOS32
+  call rombios32_init
+#endif
+  call _s3_resume
+  mov bl, #0x00
+  and ax, ax
+  jz normal_post
+  call _s3_resume_panic
 
 ;--------------------
 eoi_both_pics:
@@ -9152,16 +9523,22 @@ bios32_structure:
 
 .align 16
 bios32_entry_point:
-  pushf
-  cmp eax, #0x49435024
+  pushfd
+  cmp eax, #0x49435024 ;; "$PCI"
   jne unknown_service
   mov eax, #0x80000000
   mov dx, #0x0cf8
   out dx, eax
   mov dx, #0x0cfc
   in  eax, dx
-  cmp eax, #0x12378086
+#ifdef PCI_FIXED_HOST_BRIDGE
+  cmp eax, #PCI_FIXED_HOST_BRIDGE
   jne unknown_service
+#else
+  ;; say ok if a device is present
+  cmp eax, #0xffffffff
+  je unknown_service
+#endif
   mov ebx, #0x000f0000
   mov ecx, #0
   mov edx, #pcibios_protected
@@ -9170,12 +9547,15 @@ unknown_service:
 unknown_service:
   mov al, #0x80
 bios32_end:
-  popf
+#ifdef BX_QEMU
+  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+  popfd
   retf
 
 .align 16
 pcibios_protected:
-  pushf
+  pushfd
   cli
   push esi
   push edi
@@ -9183,15 +9563,15 @@ pcibios_protected:
   jne pci_pro_f02
   mov bx, #0x0210
   mov cx, #0
-  mov edx, #0x20494350
+  mov edx, #0x20494350 ;; "PCI "
   mov al, #0x01
   jmp pci_pro_ok
 pci_pro_f02: ;; find pci device
   cmp al, #0x02
-  jne pci_pro_f08
+  jne pci_pro_f03
   shl ecx, #16
   mov cx, dx
-  mov bx, #0x0000
+  xor bx, bx
   mov di, #0x00
 pci_pro_devloop:
   call pci_pro_select_reg
@@ -9206,6 +9586,27 @@ pci_pro_nextdev:
   inc bx
   cmp bx, #0x0100
   jne pci_pro_devloop
+  mov ah, #0x86
+  jmp pci_pro_fail
+pci_pro_f03: ;; find class code
+  cmp al, #0x03
+  jne pci_pro_f08
+  xor bx, bx
+  mov di, #0x08
+pci_pro_devloop2:
+  call pci_pro_select_reg
+  mov dx, #0x0cfc
+  in  eax, dx
+  shr eax, #8
+  cmp eax, ecx
+  jne pci_pro_nextdev2
+  cmp si, #0
+  je  pci_pro_ok
+  dec si
+pci_pro_nextdev2:
+  inc bx
+  cmp bx, #0x0100
+  jne pci_pro_devloop2
   mov ah, #0x86
   jmp pci_pro_fail
 pci_pro_f08: ;; read configuration byte
@@ -9281,16 +9682,20 @@ pci_pro_fail:
 pci_pro_fail:
   pop edi
   pop esi
-  sti
-  popf
+#ifdef BX_QEMU
+  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+  popfd
   stc
   retf
 pci_pro_ok:
   xor ah, ah
   pop edi
   pop esi
-  sti
-  popf
+#ifdef BX_QEMU
+  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+  popfd
   clc
   retf
 
@@ -9317,8 +9722,14 @@ pcibios_real:
   out dx, eax
   mov dx, #0x0cfc
   in  eax, dx
-  cmp eax, #0x12378086
+#ifdef PCI_FIXED_HOST_BRIDGE
+  cmp eax, #PCI_FIXED_HOST_BRIDGE
   je  pci_present
+#else
+  ;; say ok if a device is present
+  cmp eax, #0xffffffff
+  jne  pci_present
+#endif
   pop dx
   pop eax
   mov ah, #0xff
@@ -9332,7 +9743,7 @@ pci_present:
   mov ax, #0x0001
   mov bx, #0x0210
   mov cx, #0
-  mov edx, #0x20494350
+  mov edx, #0x20494350 ;; "PCI "
   mov edi, #0xf0000
   mov di, #pcibios_protected
   clc
@@ -9341,10 +9752,10 @@ pci_real_f02: ;; find pci device
   push esi
   push edi
   cmp al, #0x02
-  jne pci_real_f08
+  jne pci_real_f03
   shl ecx, #16
   mov cx, dx
-  mov bx, #0x0000
+  xor bx, bx
   mov di, #0x00
 pci_real_devloop:
   call pci_real_select_reg
@@ -9361,7 +9772,30 @@ pci_real_nextdev:
   jne pci_real_devloop
   mov dx, cx
   shr ecx, #16
-  mov ah, #0x86
+  mov ax, #0x8602
+  jmp pci_real_fail
+pci_real_f03: ;; find class code
+  cmp al, #0x03
+  jne pci_real_f08
+  xor bx, bx
+  mov di, #0x08
+pci_real_devloop2:
+  call pci_real_select_reg
+  mov dx, #0x0cfc
+  in  eax, dx
+  shr eax, #8
+  cmp eax, ecx
+  jne pci_real_nextdev2
+  cmp si, #0
+  je  pci_real_ok
+  dec si
+pci_real_nextdev2:
+  inc bx
+  cmp bx, #0x0100
+  jne pci_real_devloop2
+  mov dx, cx
+  shr ecx, #16
+  mov ax, #0x8603
   jmp pci_real_fail
 pci_real_f08: ;; read configuration byte
   cmp al, #0x08
@@ -9423,7 +9857,7 @@ pci_real_f0c: ;; write configuration wor
   jmp pci_real_ok
 pci_real_f0d: ;; write configuration dword
   cmp al, #0x0d
-  jne pci_real_unknown
+  jne pci_real_f0e
   call pci_real_select_reg
   push dx
   mov dx, #0x0cfc
@@ -9431,6 +9865,46 @@ pci_real_f0d: ;; write configuration dwo
   out dx, eax
   pop dx
   jmp pci_real_ok
+pci_real_f0e: ;; get irq routing options
+  cmp al, #0x0e
+  jne pci_real_unknown
+  SEG ES
+  cmp word ptr [di], #pci_routing_table_structure_end - 
pci_routing_table_structure_start
+  jb pci_real_too_small
+  SEG ES
+  mov word ptr [di], #pci_routing_table_structure_end - 
pci_routing_table_structure_start
+  pushf
+  push ds
+  push es
+  push cx
+  push si
+  push di
+  cld
+  mov si, #pci_routing_table_structure_start
+  push cs
+  pop ds
+  SEG ES
+  mov cx, [di+2]
+  SEG ES
+  mov es, [di+4]
+  mov di, cx
+  mov cx, #pci_routing_table_structure_end - pci_routing_table_structure_start
+  rep
+      movsb
+  pop di
+  pop si
+  pop cx
+  pop es
+  pop ds
+  popf
+  mov bx, #(1 << 9) | (1 << 11)   ;; irq 9 and 11 are used
+  jmp pci_real_ok
+pci_real_too_small:
+  SEG ES
+  mov word ptr [di], #pci_routing_table_structure_end - 
pci_routing_table_structure_start
+  mov ah, #0x89
+  jmp pci_real_fail
+
 pci_real_unknown:
   mov ah, #0x81
 pci_real_fail:
@@ -9457,7 +9931,7 @@ pci_real_select_reg:
   out dx,  eax
   pop dx
   ret
-  
+
 .align 16
 pci_routing_table_structure:
   db 0x24, 0x50, 0x49, 0x52  ;; "$PIR" signature
@@ -9465,21 +9939,22 @@ pci_routing_table_structure:
   dw 32 + (6 * 16) ;; table size
   db 0 ;; PCI interrupt router bus
   db 0x08 ;; PCI interrupt router DevFunc
-  dw 0x0000 ;; PCI exclusive IRQs 
+  dw 0x0000 ;; PCI exclusive IRQs
   dw 0x8086 ;; compatible PCI interrupt router vendor ID
-  dw 0x7000 ;; compatible PCI interrupt router device ID
+  dw 0x122e ;; compatible PCI interrupt router device ID
   dw 0,0 ;; Miniport data
   db 0,0,0,0,0,0,0,0,0,0,0 ;; reserved
-  db 0x07 ;; checksum
+  db 0x37 ;; checksum
+pci_routing_table_structure_start:
   ;; first slot entry PCI-to-ISA (embedded)
   db 0 ;; pci bus number
   db 0x08 ;; pci device number (bit 7-3)
   db 0x61 ;; link value INTA#: pointer into PCI2ISA config space
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x62 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x63 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x60 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 0 ;; physical slot (0 = embedded)
@@ -9488,11 +9963,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x10 ;; pci device number (bit 7-3)
   db 0x62 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x63 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x60 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x61 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 1 ;; physical slot (0 = embedded)
@@ -9501,11 +9976,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x18 ;; pci device number (bit 7-3)
   db 0x63 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x60 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x61 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x62 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 2 ;; physical slot (0 = embedded)
@@ -9514,11 +9989,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x20 ;; pci device number (bit 7-3)
   db 0x60 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x61 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x62 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x63 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 3 ;; physical slot (0 = embedded)
@@ -9527,11 +10002,11 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x28 ;; pci device number (bit 7-3)
   db 0x61 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x62 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x63 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x60 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 4 ;; physical slot (0 = embedded)
@@ -9540,16 +10015,351 @@ pci_routing_table_structure:
   db 0 ;; pci bus number
   db 0x30 ;; pci device number (bit 7-3)
   db 0x62 ;; link value INTA#
-  dw 0x0c20 ;; IRQ bitmap INTA# 
+  dw 0x0c20 ;; IRQ bitmap INTA#
   db 0x63 ;; link value INTB#
-  dw 0x0c20 ;; IRQ bitmap INTB# 
+  dw 0x0c20 ;; IRQ bitmap INTB#
   db 0x60 ;; link value INTC#
-  dw 0x0c20 ;; IRQ bitmap INTC# 
+  dw 0x0c20 ;; IRQ bitmap INTC#
   db 0x61 ;; link value INTD#
   dw 0x0c20 ;; IRQ bitmap INTD#
   db 5 ;; physical slot (0 = embedded)
   db 0 ;; reserved
+pci_routing_table_structure_end:
+
+#if !BX_ROMBIOS32
+pci_irq_list:
+  db 11, 10, 9, 5;
+
+pcibios_init_sel_reg:
+  push eax
+  mov eax, #0x800000
+  mov ax,  bx
+  shl eax, #8
+  and dl,  #0xfc
+  or  al,  dl
+  mov dx,  #0x0cf8
+  out dx,  eax
+  pop eax
+  ret
+
+pcibios_init_iomem_bases:
+  push bp
+  mov  bp, sp
+  mov  eax, #0xe0000000 ;; base for memory init
+  push eax
+  mov  ax, #0xc000 ;; base for i/o init
+  push ax
+  mov  ax, #0x0010 ;; start at base address #0
+  push ax
+  mov  bx, #0x0008
+pci_init_io_loop1:
+  mov  dl, #0x00
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   ax, dx
+  cmp  ax, #0xffff
+  jz   next_pci_dev
+  mov  dl, #0x04 ;; disable i/o and memory space access
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   al, dx
+  and  al, #0xfc
+  out  dx, al
+pci_init_io_loop2:
+  mov  dl, [bp-8]
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   eax, dx
+  test al, #0x01
+  jnz  init_io_base
+  mov  ecx, eax
+  mov  eax, #0xffffffff
+  out  dx, eax
+  in   eax, dx
+  cmp  eax, ecx
+  je   next_pci_base
+  xor  eax, #0xffffffff
+  mov  ecx, eax
+  mov  eax, [bp-4]
+  out  dx, eax
+  add  eax, ecx ;; calculate next free mem base
+  add  eax, #0x01000000
+  and  eax, #0xff000000
+  mov  [bp-4], eax
+  jmp  next_pci_base
+init_io_base:
+  mov  cx, ax
+  mov  ax, #0xffff
+  out  dx, ax
+  in   ax, dx
+  cmp  ax, cx
+  je   next_pci_base
+  xor  ax, #0xfffe
+  mov  cx, ax
+  mov  ax, [bp-6]
+  out  dx, ax
+  add  ax, cx ;; calculate next free i/o base
+  add  ax, #0x0100
+  and  ax, #0xff00
+  mov  [bp-6], ax
+next_pci_base:
+  mov  al, [bp-8]
+  add  al, #0x04
+  cmp  al, #0x28
+  je   enable_iomem_space
+  mov  byte ptr[bp-8], al
+  jmp  pci_init_io_loop2
+enable_iomem_space:
+  mov  dl, #0x04 ;; enable i/o and memory space access if available
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   al, dx
+  or   al, #0x07
+  out  dx, al
+next_pci_dev:
+  mov  byte ptr[bp-8], #0x10
+  inc  bx
+  cmp  bx, #0x0100
+  jne  pci_init_io_loop1
+  mov  sp, bp
+  pop  bp
+  ret
+
+pcibios_init_set_elcr:
+  push ax
+  push cx
+  mov  dx, #0x04d0
+  test al, #0x08
+  jz   is_master_pic
+  inc  dx
+  and  al, #0x07
+is_master_pic:
+  mov  cl, al
+  mov  bl, #0x01
+  shl  bl, cl
+  in   al, dx
+  or   al, bl
+  out  dx, al
+  pop  cx
+  pop  ax
+  ret
+
+pcibios_init_irqs:
+  push ds
+  push bp
+  mov  ax, #0xf000
+  mov  ds, ax
+  mov  dx, #0x04d0 ;; reset ELCR1 + ELCR2
+  mov  al, #0x00
+  out  dx, al
+  inc  dx
+  out  dx, al
+  mov  si, #pci_routing_table_structure
+  mov  bh, [si+8]
+  mov  bl, [si+9]
+  mov  dl, #0x00
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   eax, dx
+  cmp  eax, [si+12] ;; check irq router
+  jne  pci_init_end
+  mov  dl, [si+34]
+  call pcibios_init_sel_reg
+  push bx ;; save irq router bus + devfunc
+  mov  dx, #0x0cfc
+  mov  ax, #0x8080
+  out  dx, ax ;; reset PIRQ route control
+  add  dx, #2
+  out  dx, ax
+  mov  ax, [si+6]
+  sub  ax, #0x20
+  shr  ax, #4
+  mov  cx, ax
+  add  si, #0x20 ;; set pointer to 1st entry
+  mov  bp, sp
+  mov  ax, #pci_irq_list
+  push ax
+  xor  ax, ax
+  push ax
+pci_init_irq_loop1:
+  mov  bh, [si]
+  mov  bl, [si+1]
+pci_init_irq_loop2:
+  mov  dl, #0x00
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  in   ax, dx
+  cmp  ax, #0xffff
+  jnz  pci_test_int_pin
+  test bl, #0x07
+  jz   next_pir_entry
+  jmp  next_pci_func
+pci_test_int_pin:
+  mov  dl, #0x3c
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfd
+  in   al, dx
+  and  al, #0x07
+  jz   next_pci_func
+  dec  al ;; determine pirq reg
+  mov  dl, #0x03
+  mul  al, dl
+  add  al, #0x02
+  xor  ah, ah
+  mov  bx, ax
+  mov  al, [si+bx]
+  mov  dl, al
+  mov  bx, [bp]
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  and  al, #0x03
+  add  dl, al
+  in   al, dx
+  cmp  al, #0x80
+  jb   pirq_found
+  mov  bx, [bp-2] ;; pci irq list pointer
+  mov  al, [bx]
+  out  dx, al
+  inc  bx
+  mov  [bp-2], bx
+  call pcibios_init_set_elcr
+pirq_found:
+  mov  bh, [si]
+  mov  bl, [si+1]
+  add  bl, [bp-3] ;; pci function number
+  mov  dl, #0x3c
+  call pcibios_init_sel_reg
+  mov  dx, #0x0cfc
+  out  dx, al
+next_pci_func:
+  inc  byte ptr[bp-3]
+  inc  bl
+  test bl, #0x07
+  jnz  pci_init_irq_loop2
+next_pir_entry:
+  add  si, #0x10
+  mov  byte ptr[bp-3], #0x00
+  loop pci_init_irq_loop1
+  mov  sp, bp
+  pop  bx
+pci_init_end:
+  pop  bp
+  pop  ds
+  ret
+#endif // !BX_ROMBIOS32
 #endif // BX_PCIBIOS
+
+#if BX_ROMBIOS32
+rombios32_init:
+  ;; save a20 and enable it
+  in al, 0x92
+  push ax
+  or al, #0x02
+  out 0x92, al
+
+  ;; save SS:SP to the BDA
+  xor ax, ax
+  mov ds, ax
+  mov 0x0469, ss
+  mov 0x0467, sp
+
+  SEG CS
+    lidt [pmode_IDT_info]
+  SEG CS
+    lgdt [rombios32_gdt_48]
+  ;; set PE bit in CR0
+  mov  eax, cr0
+  or   al, #0x01
+  mov  cr0, eax
+  ;; start protected mode code: ljmpl 0x10:rombios32_init1
+  db 0x66, 0xea
+  dw rombios32_05
+  dw 0x000f       ;; high 16 bit address
+  dw 0x0010
+
+use32 386
+rombios32_05:
+  ;; init data segments
+  mov eax, #0x18
+  mov ds, ax
+  mov es, ax
+  mov ss, ax
+  xor eax, eax
+  mov fs, ax
+  mov gs, ax
+  cld
+
+  ;; init the stack pointer to point below EBDA
+  mov ax, [0x040e]
+  shl eax, #4
+  mov esp, #-0x10
+  add esp, eax
+
+  ;; pass pointer to s3_resume_flag and s3_resume_vector to rombios32
+  push #0x04b0
+  push #0x04b2
+
+  ;; call rombios32 code
+  mov eax, #0x000e0000
+  call eax
+
+  ;; return to 16 bit protected mode first
+  db 0xea
+  dd rombios32_10
+  dw 0x20
+
+use16 386
+rombios32_10:
+  ;; restore data segment limits to 0xffff
+  mov ax, #0x28
+  mov ds, ax
+  mov es, ax
+  mov ss, ax
+  mov fs, ax
+  mov gs, ax
+
+  ;; reset PE bit in CR0
+  mov  eax, cr0
+  and  al, #0xFE
+  mov  cr0, eax
+
+  ;; far jump to flush CPU queue after transition to real mode
+  JMP_AP(0xf000, rombios32_real_mode)
+
+rombios32_real_mode:
+  ;; restore IDT to normal real-mode defaults
+  SEG CS
+    lidt [rmode_IDT_info]
+
+  xor ax, ax
+  mov ds, ax
+  mov es, ax
+  mov fs, ax
+  mov gs, ax
+
+  ;; restore SS:SP from the BDA
+  mov ss, 0x0469
+  xor esp, esp
+  mov sp, 0x0467
+  ;; restore a20
+  pop ax
+  out 0x92, al
+  ret
+
+rombios32_gdt_48:
+  dw 0x30
+  dw rombios32_gdt
+  dw 0x000f
+
+rombios32_gdt:
+  dw 0, 0, 0, 0
+  dw 0, 0, 0, 0
+  dw 0xffff, 0, 0x9b00, 0x00cf ; 32 bit flat code segment (0x10)
+  dw 0xffff, 0, 0x9300, 0x00cf ; 32 bit flat data segment (0x18)
+  dw 0xffff, 0, 0x9b0f, 0x0000 ; 16 bit code segment base=0xf0000 limit=0xffff
+  dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff
+#endif // BX_ROMBIOS32
+
 
 ; parallel port detection: base address in DX, index in BX, timeout in CL
 detect_parport:
@@ -9621,13 +10431,12 @@ checksum_loop:
   ret
 
 
-;; We need a copy of this string, but we are not actually a PnP BIOS, 
+;; We need a copy of this string, but we are not actually a PnP BIOS,
 ;; so make sure it is *not* aligned, so OSes will not see it if they scan.
 .align 16
   db 0
 pnp_string:
   .ascii "$PnP"
-
 
 rom_scan:
   ;; Scan for existence of valid expansion ROMS.
@@ -9645,8 +10454,9 @@ rom_scan:
 #if BX_TCGBIOS
   call _tcpa_start_option_rom_scan    /* specs: 3.2.3.3 + 10.4.3 */
 #endif
-  mov  cx, #0xc000
+
 rom_scan_loop:
+  push ax       ;; Save AX
   mov  ds, cx
   mov  ax, #0x0004 ;; start with increment of 4 (512-byte) blocks = 2k
   cmp [0], #0xAA55 ;; look for signature
@@ -9663,6 +10473,8 @@ rom_scan_loop:
   add  al, #0x04
 block_count_rounded:
 
+  xor  bx, bx   ;; Restore DS back to 0000:
+  mov  ds, bx
 #if BX_TCGBIOS
   push ax
   push ds
@@ -9673,7 +10485,7 @@ block_count_rounded:
   push ecx       ;; segment where option rom is located at
   call _tcpa_option_rom                   /* specs: 3.2.3.3 */
   add sp, #4    ;; pop segment
-  pop ecx      ;; original ecx
+  pop ecx      ;; original ecx
   pop ds
   pop ax
 #endif
@@ -9697,11 +10509,11 @@ fetch_bdf:
   xor  ax, ax
   mov  al, [bx]
 
-  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.  
+  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
   ;; That should stop it grabbing INT 19h; we will use its BEV instead.
   mov  bx, #0xf000
   mov  es, bx
-  lea  di, pnp_string 
+  lea  di, pnp_string
 
   xor  bx, bx   ;; Restore DS back to 0000:
   mov  ds, bx
@@ -9714,8 +10526,8 @@ fetch_bdf:
   add  sp, #2   ;; Pop offset value
   pop  cx       ;; Pop seg value (restore CX)
 
-  ;; Look at the ROM's PnP Expansion header.  Properly, we're supposed 
-  ;; to init all the ROMs and then go back and build an IPL table of 
+  ;; Look at the ROM's PnP Expansion header.  Properly, we're supposed
+  ;; to init all the ROMs and then go back and build an IPL table of
   ;; all the bootable devices, but we can get away with one pass.
   mov  ds, cx       ;; ROM base
   mov  bx, 0x001a   ;; 0x1A is the offset into ROM header that contains...
@@ -9723,22 +10535,54 @@ fetch_bdf:
   cmp  ax, #0x5024  ;; we look for signature "$PnP"
   jne  no_bev
   mov  ax, 2[bx]
-  cmp  ax, #0x506e 
+  cmp  ax, #0x506e
   jne  no_bev
+
+  mov  ax, 0x16[bx] ;; 0x16 is the offset of Boot Connection Vector
+  cmp  ax, #0x0000
+  je   no_bcv
+
+  ;; Option ROM has BCV. Run it now.
+  push cx       ;; Push seg
+  push ax       ;; Push offset
+
+  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
+  mov  bx, #0xf000
+  mov  es, bx
+  lea  di, pnp_string
+  /* jump to BCV function entry pointer */
+  mov  bp, sp   ;; Call ROM BCV routine using seg:off on stack
+  db   0xff     ;; call_far ss:[bp+0]
+  db   0x5e
+  db   0
+  cli           ;; In case expansion ROM BIOS turns IF on
+  add  sp, #2   ;; Pop offset value
+  pop  cx       ;; Pop seg value (restore CX)
+  jmp   no_bev
+
+no_bcv:
   mov  ax, 0x1a[bx] ;; 0x1A is also the offset into the expansion header of...
   cmp  ax, #0x0000  ;; the Bootstrap Entry Vector, or zero if there is none.
   je   no_bev
 
-  ;; Found a device that thinks it can boot the system.  Record its BEV.
-  mov  bx, #IPL_SEG            ;; Go to the segment where the IPL table lives 
+  ;; Found a device that thinks it can boot the system.  Record its BEV and 
product name string.
+  mov  di, 0x10[bx]            ;; Pointer to the product name string or zero 
if none
+  xor  bx, bx
   mov  ds, bx
+  mov  bx, word ptr [0x40E]    ;; EBDA segment
+  mov  ds, bx                  ;; Go to the segment where the IPL table lives
   mov  bx, IPL_COUNT_OFFSET    ;; Read the number of entries so far
   cmp  bx, #IPL_TABLE_ENTRIES
   je   no_bev                  ;; Get out if the table is full
   shl  bx, #0x4                ;; Turn count into offset (entries are 16 bytes)
-  mov  0[bx], #0x80            ;; This entry is a BEV device
-  mov  6[bx], cx               ;; Build a far pointer from the segment...
-  mov  4[bx], ax               ;; and the offset
+  mov  IPL_TABLE_OFFSET+0[bx], #IPL_TYPE_BEV ;; This entry is a BEV device
+  mov  IPL_TABLE_OFFSET+6[bx], cx            ;; Build a far pointer from the 
segment...
+  mov  IPL_TABLE_OFFSET+4[bx], ax            ;; and the offset
+  cmp  di, #0x0000
+  je   no_prod_str
+  mov  0xA[bx], cx             ;; Build a far pointer from the segment...
+  mov  8[bx], di               ;; and the offset
+no_prod_str:
   shr  bx, #0x4                ;; Turn the offset back into a count
   inc  bx                      ;; We have one more entry now
   mov  IPL_COUNT_OFFSET, bx    ;; Remember that.
@@ -9750,7 +10594,8 @@ rom_scan_increment:
   shl  ax, #5   ;; convert 512-bytes blocks to 16-byte increments
                 ;; because the segment selector is shifted left 4 bits.
   add  cx, ax
-  cmp  cx, #0xe000
+  pop  ax       ;; Restore AX
+  cmp  cx, ax
   jbe  rom_scan_loop
 
   xor  ax, ax   ;; Restore DS back to 0000:
@@ -9815,234 +10660,7 @@ tcpa_post_part2:
 #endif
 
 
-;; for 'C' strings and other data, insert them here with
-;; a the following hack:
-;; DATA_SEG_DEFS_HERE
-
-
-;--------
-;- POST -
-;--------
-.org 0xe05b ; POST Entry Point
-post:
-
-  xor ax, ax
-
-  ;; first reset the DMA controllers
-  out 0x0d,al
-  out 0xda,al
-
-  ;; then initialize the DMA controllers
-  mov al, #0xC0
-  out 0xD6, al ; cascade mode of channel 4 enabled
-  mov al, #0x00
-  out 0xD4, al ; unmask channel 4
-
-  ;; Examine CMOS shutdown status.
-  mov AL, #0x0f
-  out 0x70, AL
-  in  AL, 0x71
-
-  ;; backup status
-  mov bl, al
-
-  ;; Reset CMOS shutdown status.
-  mov AL, #0x0f
-  out 0x70, AL          ; select CMOS register Fh
-  mov AL, #0x00
-  out 0x71, AL          ; set shutdown action to normal
-
-  ;; Examine CMOS shutdown status.
-  mov al, bl
-  mov dx, #EBDA_SEG
-  mov ds, dx
-  mov [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET], AL
-
-  cli
-  mov  ax, #0xfffe
-  mov  sp, ax
-  mov  ax, #0x0000
-  mov  ds, ax
-  mov  ss, ax
-
-  ;; zero out BIOS data area (40:00..40:ff)
-  mov  es, ax
-  mov  cx, #0x0080 ;; 128 words
-  mov  di, #0x0400
-  cld
-  rep
-    stosw
-
-  call _log_bios_start
-
-  ;; set all interrupts to default handler
-  mov  bx, #0x0000    ;; offset index
-  mov  cx, #0x0100    ;; counter (256 interrupts)
-  mov  ax, #dummy_iret_handler
-  mov  dx, #0xF000
-
-post_default_ints:
-  mov  [bx], ax
-  inc  bx
-  inc  bx
-  mov  [bx], dx
-  inc  bx
-  inc  bx
-  loop post_default_ints
-
-  ;; set vector 0x79 to zero
-  ;; this is used by 'gardian angel' protection system
-  SET_INT_VECTOR(0x79, #0, #0)
-
-  ;; base memory in K 40:13 (word)
-  mov  ax, #BASE_MEM_IN_K
-  mov  0x0413, ax
-
-
-  ;; Manufacturing Test 40:12
-  ;;   zerod out above
-
-  ;; Warm Boot Flag 0040:0072
-  ;;   value of 1234h = skip memory checks
-  ;;   zerod out above
-
-
-  ;; Printer Services vector
-  SET_INT_VECTOR(0x17, #0xF000, #int17_handler)
-
-  ;; Bootstrap failure vector
-  SET_INT_VECTOR(0x18, #0xF000, #int18_handler)
-
-  ;; Bootstrap Loader vector
-  SET_INT_VECTOR(0x19, #0xF000, #int19_handler)
-
-  ;; User Timer Tick vector
-  SET_INT_VECTOR(0x1c, #0xF000, #int1c_handler)
-
-  ;; Memory Size Check vector
-  SET_INT_VECTOR(0x12, #0xF000, #int12_handler)
-
-  ;; Equipment Configuration Check vector
-  SET_INT_VECTOR(0x11, #0xF000, #int11_handler)
-
-  ;; System Services
-  SET_INT_VECTOR(0x15, #0xF000, #int15_handler)
-
-  ;; EBDA setup
-  call ebda_post
-
-  ;; PIT setup
-  SET_INT_VECTOR(0x08, #0xF000, #int08_handler)
-  ;; int 1C already points at dummy_iret_handler (above)
-  mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
-  out 0x43, al
-#ifdef HVMASSIST
-  mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
-  out 0x40, al ; lsb
-  mov al, #0xe9
-  out 0x40, al ; msb
-#else
-  mov al, #0x00 ; maximum count of 0000H = 18.2Hz
-  out 0x40, al
-  out 0x40, al
-#endif
-
-  ;; Keyboard
-  SET_INT_VECTOR(0x09, #0xF000, #int09_handler)
-  SET_INT_VECTOR(0x16, #0xF000, #int16_handler)
-
-  xor  ax, ax
-  mov  ds, ax
-  mov  0x0417, al /* keyboard shift flags, set 1 */
-  mov  0x0418, al /* keyboard shift flags, set 2 */
-  mov  0x0419, al /* keyboard alt-numpad work area */
-  mov  0x0471, al /* keyboard ctrl-break flag */
-  mov  0x0497, al /* keyboard status flags 4 */
-  mov  al, #0x10
-  mov  0x0496, al /* keyboard status flags 3 */
-
-
-  /* keyboard head of buffer pointer */
-  mov  bx, #0x001E
-  mov  0x041A, bx
-
-  /* keyboard end of buffer pointer */
-  mov  0x041C, bx
-
-  /* keyboard pointer to start of buffer */
-  mov  bx, #0x001E
-  mov  0x0480, bx
-
-  /* keyboard pointer to end of buffer */
-  mov  bx, #0x003E
-  mov  0x0482, bx
-
-  /* init the keyboard */
-  call _keyboard_init
-
-  ;; mov CMOS Equipment Byte to BDA Equipment Word
-  mov  ax, 0x0410
-  mov  al, #0x14
-  out  0x70, al
-  in   al, 0x71
-  mov  0x0410, ax
-
-#if BX_TCGBIOS
-  call tcpa_post_part1
-#endif
-
-  ;; Parallel setup
-  SET_INT_VECTOR(0x0F, #0xF000, #dummy_iret_handler)
-  xor ax, ax
-  mov ds, ax
-  xor bx, bx
-  mov cl, #0x14 ; timeout value
-  mov dx, #0x378 ; Parallel I/O address, port 1
-  call detect_parport
-  mov dx, #0x278 ; Parallel I/O address, port 2
-  call detect_parport
-  shl bx, #0x0e
-  mov ax, 0x410   ; Equipment word bits 14..15 determing # parallel ports
-  and ax, #0x3fff
-  or  ax, bx ; set number of parallel ports
-  mov 0x410, ax
-
-  ;; Serial setup
-  SET_INT_VECTOR(0x0C, #0xF000, #dummy_iret_handler)
-  SET_INT_VECTOR(0x14, #0xF000, #int14_handler)
-  xor bx, bx
-  mov cl, #0x0a ; timeout value
-  mov dx, #0x03f8 ; Serial I/O address, port 1
-  call detect_serial
-  mov dx, #0x02f8 ; Serial I/O address, port 2
-  call detect_serial
-  mov dx, #0x03e8 ; Serial I/O address, port 3
-  call detect_serial
-  mov dx, #0x02e8 ; Serial I/O address, port 4
-  call detect_serial
-  shl bx, #0x09
-  mov ax, 0x410   ; Equipment word bits 9..11 determing # serial ports
-  and ax, #0xf1ff
-  or  ax, bx ; set number of serial port
-  mov 0x410, ax
-
-  ;; CMOS RTC
-  SET_INT_VECTOR(0x1A, #0xF000, #int1a_handler)
-  SET_INT_VECTOR(0x4A, #0xF000, #dummy_iret_handler)
-  SET_INT_VECTOR(0x70, #0xF000, #int70_handler)
-  ;; BIOS DATA AREA 0x4CE ???
-  call timer_tick_post
-
-  ;; PS/2 mouse setup
-  SET_INT_VECTOR(0x74, #0xF000, #int74_handler)
-
-  ;; IRQ13 (FPU exception) setup
-  SET_INT_VECTOR(0x75, #0xF000, #int75_handler)
-
-  ;; Video setup
-  SET_INT_VECTOR(0x10, #0xF000, #int10_handler)
-
-  ;; PIC
+post_init_pic:
   mov al, #0x11 ; send initialisation commands
   out 0x20, al
   out 0xa0, al
@@ -10065,6 +10683,329 @@ post_default_ints:
   mov  al, #0x9f
 #endif
   out  0xa1, AL ;slave  pic: unmask IRQ 12, 13, 14
+  ret
+
+;; the following area can be used to write dynamically generated tables
+  .align 16
+bios_table_area_start:
+  dd 0xaafb4442
+  dd bios_table_area_end - bios_table_area_start - 8;
+
+;--------
+;- POST -
+;--------
+.org 0xe05b ; POST Entry Point
+post:
+
+  xor ax, ax
+
+  ;; first reset the DMA controllers
+  out 0x0d,al
+  out 0xda,al
+
+  ;; then initialize the DMA controllers
+  mov al, #0xC0
+  out 0xD6, al ; cascade mode of channel 4 enabled
+  mov al, #0x00
+  out 0xD4, al ; unmask channel 4
+
+  ;; Examine CMOS shutdown status.
+  mov AL, #0x0f
+  out 0x70, AL
+  in  AL, 0x71
+
+  ;; backup status
+  mov bl, al
+
+  ;; Reset CMOS shutdown status.
+  mov AL, #0x0f
+  out 0x70, AL          ; select CMOS register Fh
+  mov AL, #0x00
+  out 0x71, AL          ; set shutdown action to normal
+
+  ;; Examine CMOS shutdown status.
+  mov al, bl
+
+  ;; 0x00, 0x09, 0x0D+ = normal startup
+  cmp AL, #0x00
+  jz normal_post
+  cmp AL, #0x0d
+  jae normal_post
+  cmp AL, #0x09
+  je normal_post
+
+  ;; 0x05 = eoi + jmp via [0x40:0x67] jump
+  cmp al, #0x05
+  je  eoi_jmp_post
+
+  ;; 0x0A = jmp via [0x40:0x67] jump
+  cmp al, #0x0a
+  je  jmp_post_0x467
+
+  ;; 0x0B = iret via [0x40:0x67]
+  cmp al, #0x0b
+  je  iret_post_0x467
+
+  ;; 0x0C = retf via [0x40:0x67]
+  cmp al, #0x0c
+  je  retf_post_0x467
+
+  ;; Examine CMOS shutdown status.
+  ;;  0x01,0x02,0x03,0x04,0x06,0x07,0x08 = Unimplemented shutdown status.
+  push bx
+  call _shutdown_status_panic
+
+#if 0
+  HALT(__LINE__)
+  ;
+  ;#if 0
+  ;  0xb0, 0x20,       /* mov al, #0x20 */
+  ;  0xe6, 0x20,       /* out 0x20, al    ;send EOI to PIC */
+  ;#endif
+  ;
+  pop es
+  pop ds
+  popa
+  iret
+#endif
+
+normal_post:
+  ; case 0: normal startup
+
+  cli
+  mov  ax, #0xfffe
+  mov  sp, ax
+  xor  ax, ax
+  mov  ds, ax
+  mov  ss, ax
+
+  ;; Save shutdown status
+  mov 0x04b0, bl
+
+  cmp bl, #0xfe
+  jz s3_post
+
+  ;; zero out BIOS data area (40:00..40:ff)
+  mov  es, ax
+  mov  cx, #0x0080 ;; 128 words
+  mov  di, #0x0400
+  cld
+  rep
+    stosw
+
+  call _log_bios_start
+
+  ;; set all interrupts to default handler
+  xor  bx, bx         ;; offset index
+  mov  cx, #0x0100    ;; counter (256 interrupts)
+  mov  ax, #dummy_iret_handler
+  mov  dx, #0xF000
+
+post_default_ints:
+  mov  [bx], ax
+  add  bx, #2
+  mov  [bx], dx
+  add  bx, #2
+  loop post_default_ints
+
+  ;; set vector 0x79 to zero
+  ;; this is used by 'gardian angel' protection system
+  SET_INT_VECTOR(0x79, #0, #0)
+
+  ;; base memory in K 40:13 (word)
+  mov  ax, #BASE_MEM_IN_K
+  mov  0x0413, ax
+
+
+  ;; Manufacturing Test 40:12
+  ;;   zerod out above
+
+  ;; Warm Boot Flag 0040:0072
+  ;;   value of 1234h = skip memory checks
+  ;;   zerod out above
+
+
+  ;; Printer Services vector
+  SET_INT_VECTOR(0x17, #0xF000, #int17_handler)
+
+  ;; Bootstrap failure vector
+  SET_INT_VECTOR(0x18, #0xF000, #int18_handler)
+
+  ;; Bootstrap Loader vector
+  SET_INT_VECTOR(0x19, #0xF000, #int19_handler)
+
+  ;; User Timer Tick vector
+  SET_INT_VECTOR(0x1c, #0xF000, #int1c_handler)
+
+  ;; Memory Size Check vector
+  SET_INT_VECTOR(0x12, #0xF000, #int12_handler)
+
+  ;; Equipment Configuration Check vector
+  SET_INT_VECTOR(0x11, #0xF000, #int11_handler)
+
+  ;; System Services
+  SET_INT_VECTOR(0x15, #0xF000, #int15_handler)
+
+  ;; EBDA setup
+  call ebda_post
+
+  ;; PIT setup
+  SET_INT_VECTOR(0x08, #0xF000, #int08_handler)
+  ;; int 1C already points at dummy_iret_handler (above)
+  mov al, #0x34 ; timer0: binary count, 16bit count, mode 2
+  out 0x43, al
+#ifdef HVMASSIST
+  mov al, #0x0b ; #0xe90b = 20 Hz (temporary, until we fix xen/vmx support)
+  out 0x40, al ; lsb
+  mov al, #0xe9
+  out 0x40, al ; msb
+#else
+  mov al, #0x00 ; maximum count of 0000H = 18.2Hz
+  out 0x40, al
+  out 0x40, al
+#endif
+
+  ;; Keyboard
+  SET_INT_VECTOR(0x09, #0xF000, #int09_handler)
+  SET_INT_VECTOR(0x16, #0xF000, #int16_handler)
+
+  xor  ax, ax
+  mov  ds, ax
+  mov  0x0417, al /* keyboard shift flags, set 1 */
+  mov  0x0418, al /* keyboard shift flags, set 2 */
+  mov  0x0419, al /* keyboard alt-numpad work area */
+  mov  0x0471, al /* keyboard ctrl-break flag */
+  mov  0x0497, al /* keyboard status flags 4 */
+  mov  al, #0x10
+  mov  0x0496, al /* keyboard status flags 3 */
+
+
+  /* keyboard head of buffer pointer */
+  mov  bx, #0x001E
+  mov  0x041A, bx
+
+  /* keyboard end of buffer pointer */
+  mov  0x041C, bx
+
+  /* keyboard pointer to start of buffer */
+  mov  bx, #0x001E
+  mov  0x0480, bx
+
+  /* keyboard pointer to end of buffer */
+  mov  bx, #0x003E
+  mov  0x0482, bx
+
+  /* init the keyboard */
+  call _keyboard_init
+
+  ;; mov CMOS Equipment Byte to BDA Equipment Word
+  mov  ax, 0x0410
+  mov  al, #0x14
+  out  0x70, al
+  in   al, 0x71
+  mov  0x0410, ax
+
+#if BX_TCGBIOS
+  call tcpa_post_part1
+#endif
+
+  ;; Parallel setup
+  SET_INT_VECTOR(0x0F, #0xF000, #dummy_iret_handler)
+  xor ax, ax
+  mov ds, ax
+  xor bx, bx
+  mov cl, #0x14 ; timeout value
+  mov dx, #0x378 ; Parallel I/O address, port 1
+  call detect_parport
+  mov dx, #0x278 ; Parallel I/O address, port 2
+  call detect_parport
+  shl bx, #0x0e
+  mov ax, 0x410   ; Equipment word bits 14..15 determing # parallel ports
+  and ax, #0x3fff
+  or  ax, bx ; set number of parallel ports
+  mov 0x410, ax
+
+  ;; Serial setup
+  SET_INT_VECTOR(0x0C, #0xF000, #dummy_iret_handler)
+  SET_INT_VECTOR(0x14, #0xF000, #int14_handler)
+  xor bx, bx
+  mov cl, #0x0a ; timeout value
+  mov dx, #0x03f8 ; Serial I/O address, port 1
+  call detect_serial
+  mov dx, #0x02f8 ; Serial I/O address, port 2
+  call detect_serial
+  mov dx, #0x03e8 ; Serial I/O address, port 3
+  call detect_serial
+  mov dx, #0x02e8 ; Serial I/O address, port 4
+  call detect_serial
+  shl bx, #0x09
+  mov ax, 0x410   ; Equipment word bits 9..11 determing # serial ports
+  and ax, #0xf1ff
+  or  ax, bx ; set number of serial port
+  mov 0x410, ax
+
+  ;; CMOS RTC
+  SET_INT_VECTOR(0x1A, #0xF000, #int1a_handler)
+  SET_INT_VECTOR(0x4A, #0xF000, #dummy_iret_handler)
+  SET_INT_VECTOR(0x70, #0xF000, #int70_handler)
+  ;; BIOS DATA AREA 0x4CE ???
+  call timer_tick_post
+
+  ;; PS/2 mouse setup
+  SET_INT_VECTOR(0x74, #0xF000, #int74_handler)
+
+  ;; IRQ13 (FPU exception) setup
+  SET_INT_VECTOR(0x75, #0xF000, #int75_handler)
+
+  ;; Video setup
+  SET_INT_VECTOR(0x10, #0xF000, #int10_handler)
+
+  ;; PIC
+  call post_init_pic
+
+  mov  cx, #0xc000  ;; init vga bios
+  mov  ax, #0xc780
+  call rom_scan
+
+  call _print_bios_banner
+
+#if BX_ROMBIOS32
+  call rombios32_init
+#else
+#if BX_PCIBIOS
+  call pcibios_init_iomem_bases
+  call pcibios_init_irqs
+#endif //BX_PCIBIOS
+#endif
+
+  ;;
+  ;; Floppy setup
+  ;;
+  call floppy_drive_post
+
+  ;;
+  ;; Hard Drive setup
+  ;;
+  call hard_drive_post
+
+#if BX_USE_ATADRV
+
+  ;;
+  ;; ATA/ATAPI driver setup
+  ;;
+  call _ata_init
+  call _ata_detect
+  ;;
+
+#endif // BX_USE_ATADRV
+
+#if BX_ELTORITO_BOOT
+  ;;
+  ;; eltorito floppy/harddisk emulation from cd
+  ;;
+  call _cdemu_init
+  ;;
+#endif // BX_ELTORITO_BOOT
 
 #ifdef HVMASSIST
   call _enable_rom_write_access
@@ -10076,52 +11017,19 @@ post_default_ints:
 
   call _init_boot_vectors
 
+  mov  cx, #0xc800  ;; init option roms
+  mov  ax, #0xe000
   call rom_scan
 
-  call _print_bios_banner 
-
-  ;;
-  ;; Floppy setup
-  ;;
-  call floppy_drive_post
-
-#if BX_USE_ATADRV
-
-  ;;
-  ;; Hard Drive setup
-  ;;
-  call hard_drive_post
-
-  ;;
-  ;; ATA/ATAPI driver setup
-  ;;
-  call _ata_init
-  call _ata_detect
-  ;;
-#else // BX_USE_ATADRV
-
-  ;;
-  ;; Hard Drive setup
-  ;;
-  call hard_drive_post
-
-#endif // BX_USE_ATADRV
-
 #if BX_ELTORITO_BOOT
-  ;;
-  ;; eltorito floppy/harddisk emulation from cd
-  ;;
-  call _cdemu_init
-  ;;
+  call _interactive_bootkey
 #endif // BX_ELTORITO_BOOT
-
-  call _s3_resume
-  call _interactive_bootkey
 
 #if BX_TCGBIOS
   call tcpa_post_part2
 #endif
 
+  sti        ;; enable interrupts
   ;; Start the boot sequence.   See the comments in int19_relocated 
   ;; for why we use INT 18h instead of INT 19h here.
   int  #0x18
@@ -10134,7 +11042,7 @@ nmi:
   iret
 
 int75_handler:
-  out  0xf0, al         // clear irq13 
+  out  0xf0, al         // clear irq13
   call eoi_both_pics    // clear interrupt
   int  2                // legacy nmi call
   iret
@@ -10233,7 +11141,7 @@ int14_handler:
 int14_handler:
   push ds
   pusha
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
   call _int14_function
   popa
@@ -10338,26 +11246,7 @@ int09_handler:
   jz  int09_finish
 
   in  al, #0x60             ;;read key from keyboard controller
-  //test al, #0x80            ;;look for key release
-  //jnz  int09_process_key    ;; dont pass releases to intercept?
-
-  ;; check for extended key
-  cmp  al, #0xe0
-  jne int09_call_int15_4f
-  
-  push ds
-  xor  ax, ax
-  mov  ds, ax
-  mov  al, BYTE [0x496]     ;; mf2_state |= 0x01
-  or   al, #0x01
-  mov  BYTE [0x496], al
-  pop  ds
-  
-  in  al, #0x60             ;;read another key from keyboard controller
-
   sti
-
-int09_call_int15_4f:
   push  ds
   pusha
 #ifdef BX_CALL_INT15_4F
@@ -10367,8 +11256,27 @@ int09_call_int15_4f:
   jnc  int09_done
 #endif
 
-
-//int09_process_key:
+  ;; check for extended key
+  cmp  al, #0xe0
+  jne int09_check_pause
+  xor  ax, ax
+  mov  ds, ax
+  mov  al, BYTE [0x496]     ;; mf2_state |= 0x02
+  or   al, #0x02
+  mov  BYTE [0x496], al
+  jmp int09_done
+
+int09_check_pause: ;; check for pause key
+  cmp  al, #0xe1
+  jne int09_process_key
+  xor  ax, ax
+  mov  ds, ax
+  mov  al, BYTE [0x496]     ;; mf2_state |= 0x01
+  or   al, #0x01
+  mov  BYTE [0x496], al
+  jmp int09_done
+
+int09_process_key:
   mov   bx, #0xf000
   mov   ds, bx
   call  _int09_function
@@ -10384,8 +11292,6 @@ int09_finish:
   out #0x64, al
   pop ax
   iret
-
-
 
 
 ;----------------------------------------
@@ -10426,7 +11332,7 @@ int0e_loop2:
   je int0e_loop2
 int0e_normal:
   push ds
-  mov  ax, #0x0000 ;; segment 0000
+  xor  ax, ax ;; segment 0000
   mov  ds, ax
   call eoi_master_pic
   mov  al, 0x043e
@@ -10463,7 +11369,7 @@ int17_handler:
 int17_handler:
   push ds
   pusha
-  mov  ax, #0x0000
+  xor  ax, ax
   mov  ds, ax
   call _int17_function
   popa
@@ -10653,11 +11559,11 @@ int1a_callfunction:
 ;;
 int70_handler:
   push ds
-  pusha
+  pushad
   xor  ax, ax
   mov  ds, ax
   call _int70_function
-  popa
+  popad
   pop  ds
   iret
 
@@ -10715,7 +11621,7 @@ int08_store_ticks:
 
 
 .org 0xff00
-.ascii "(c) 2002 MandrakeSoft S.A. Written by Kevin Lawton & the Bochs team."
+.ascii BIOS_COPYRIGHT_STRING
 
 ;------------------------------------------------
 ;- IRET Instruction for Dummy Interrupt Handler -
@@ -10737,7 +11643,7 @@ dummy_iret_handler:
 #ifdef HVMTEST
   jmp 0xd000:0x0003;
 #else
-  jmp 0xf000:post
+   jmp 0xf000:post
 #endif
 
 .org 0xfff5 ; ASCII Date ROM was built - 8 characters in MM/DD/YY
@@ -10750,10 +11656,10 @@ db 0x00   ; filler
 .org 0xfa6e ;; Character Font for 320x200 & 640x200 Graphics (lower 128 
characters)
 ASM_END
 /*
- * This font comes from the fntcol16.zip package (c) by  Joseph Gil 
+ * This font comes from the fntcol16.zip package (c) by  Joseph Gil
  * found at ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
  * This font is public domain
- */ 
+ */
 static Bit8u vgafont8[128*8]=
 {
  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -10929,328 +11835,10 @@ db 0,0,0,0,0,0,0   ; 31 bytes
 db 0,0,0,0,0,0,0   ; 31 bytes
 ASM_END
 
-#else // !HVMASSIST
-
+#endif // HVMASSIST
 ASM_START
-.org 0xcc00
+.org 0xcff0
+bios_table_area_end:
 // bcc-generated data will be placed here
-
-// For documentation of this config structure, look on developer.intel.com and
-// search for multiprocessor specification.  Note that when you change anything
-// you must update the checksum (a pain!).  It would be better to construct 
this
-// with C structures, or at least fill in the checksum automatically.
-//
-// Maybe this structs could be moved elsewhere than d000
-
-#if (BX_SMP_PROCESSORS==1)
-  // no structure necessary.
-#elif (BX_SMP_PROCESSORS==2)
-// define the Intel MP Configuration Structure for 2 processors at
-// APIC ID 0,1.  I/O APIC at ID=2.
-.align 16
-mp_config_table:
-  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
-  dw (mp_config_end-mp_config_table)  ;; table length
-  db 4 ;; spec rev
-  db 0x65 ;; checksum
-  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
-  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
-  db 0x20, 0x20, 0x20, 0x20 
-  db 0x20, 0x20, 0x20, 0x20
-  dw 0,0 ;; oem table ptr
-  dw 0 ;; oem table size
-  dw 20 ;; entry count
-  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
-  dw 0 ;; extended table length
-  db 0 ;; extended table checksum
-  db 0 ;; reserved
-mp_config_proc0:
-  db 0 ;; entry type=processor
-  db 0 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 3 ;; cpu flags: enabled, bootstrap processor
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc1:
-  db 0 ;; entry type=processor
-  db 1 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_isa_bus:
-  db 1 ;; entry type=bus
-  db 0 ;; bus ID
-  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
-mp_config_ioapic:
-  db 2 ;; entry type=I/O APIC
-  db 2 ;; apic id=2. linux will set.
-  db 0x11 ;; I/O APIC version number
-  db 1 ;; flags=1=enabled
-  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
-  db 3 ;; entry type=I/O interrupt
-  db 0 ;; interrupt type=vectored interrupt
-  db 0,0 ;; flags po=0, el=0 (linux uses as default)
-  db 0 ;; source bus ID is ISA
-  db 0 ;; source bus IRQ
-  db 2 ;; destination I/O APIC ID
-  db 0 ;; destination I/O APIC interrrupt in
-  ;; repeat pattern for interrupts 0-15
-  db 3,0,0,0,0,1,2,1
-  db 3,0,0,0,0,2,2,2
-  db 3,0,0,0,0,3,2,3
-  db 3,0,0,0,0,4,2,4
-  db 3,0,0,0,0,5,2,5
-  db 3,0,0,0,0,6,2,6
-  db 3,0,0,0,0,7,2,7
-  db 3,0,0,0,0,8,2,8
-  db 3,0,0,0,0,9,2,9
-  db 3,0,0,0,0,10,2,10
-  db 3,0,0,0,0,11,2,11
-  db 3,0,0,0,0,12,2,12
-  db 3,0,0,0,0,13,2,13
-  db 3,0,0,0,0,14,2,14
-  db 3,0,0,0,0,15,2,15
-#elif (BX_SMP_PROCESSORS==4)
-// define the Intel MP Configuration Structure for 4 processors at
-// APIC ID 0,1,2,3.  I/O APIC at ID=4.
-.align 16
-mp_config_table:
-  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
-  dw (mp_config_end-mp_config_table)  ;; table length
-  db 4 ;; spec rev
-  db 0xdd ;; checksum
-  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
-  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
-  db 0x20, 0x20, 0x20, 0x20 
-  db 0x20, 0x20, 0x20, 0x20
-  dw 0,0 ;; oem table ptr
-  dw 0 ;; oem table size
-  dw 22 ;; entry count
-  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
-  dw 0 ;; extended table length
-  db 0 ;; extended table checksum
-  db 0 ;; reserved
-mp_config_proc0:
-  db 0 ;; entry type=processor
-  db 0 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 3 ;; cpu flags: enabled, bootstrap processor
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc1:
-  db 0 ;; entry type=processor
-  db 1 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc2:
-  db 0 ;; entry type=processor
-  db 2 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc3:
-  db 0 ;; entry type=processor
-  db 3 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_isa_bus:
-  db 1 ;; entry type=bus
-  db 0 ;; bus ID
-  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
-mp_config_ioapic:
-  db 2 ;; entry type=I/O APIC
-  db 4 ;; apic id=4. linux will set.
-  db 0x11 ;; I/O APIC version number
-  db 1 ;; flags=1=enabled
-  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
-  db 3 ;; entry type=I/O interrupt
-  db 0 ;; interrupt type=vectored interrupt
-  db 0,0 ;; flags po=0, el=0 (linux uses as default)
-  db 0 ;; source bus ID is ISA
-  db 0 ;; source bus IRQ
-  db 4 ;; destination I/O APIC ID
-  db 0 ;; destination I/O APIC interrrupt in
-  ;; repeat pattern for interrupts 0-15
-  db 3,0,0,0,0,1,4,1
-  db 3,0,0,0,0,2,4,2
-  db 3,0,0,0,0,3,4,3
-  db 3,0,0,0,0,4,4,4
-  db 3,0,0,0,0,5,4,5
-  db 3,0,0,0,0,6,4,6
-  db 3,0,0,0,0,7,4,7
-  db 3,0,0,0,0,8,4,8
-  db 3,0,0,0,0,9,4,9
-  db 3,0,0,0,0,10,4,10
-  db 3,0,0,0,0,11,4,11
-  db 3,0,0,0,0,12,4,12
-  db 3,0,0,0,0,13,4,13
-  db 3,0,0,0,0,14,4,14
-  db 3,0,0,0,0,15,4,15
-#elif (BX_SMP_PROCESSORS==8)
-// define the Intel MP Configuration Structure for 8 processors at
-// APIC ID 0,1,2,3,4,5,6,7.  I/O APIC at ID=8.
-.align 16
-mp_config_table:
-  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
-  dw (mp_config_end-mp_config_table)  ;; table length
-  db 4 ;; spec rev
-  db 0xc3 ;; checksum
-  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
-  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
-  db 0x20, 0x20, 0x20, 0x20 
-  db 0x20, 0x20, 0x20, 0x20
-  dw 0,0 ;; oem table ptr
-  dw 0 ;; oem table size
-  dw 26 ;; entry count
-  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
-  dw 0 ;; extended table length
-  db 0 ;; extended table checksum
-  db 0 ;; reserved
-mp_config_proc0:
-  db 0 ;; entry type=processor
-  db 0 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 3 ;; cpu flags: enabled, bootstrap processor
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc1:
-  db 0 ;; entry type=processor
-  db 1 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc2:
-  db 0 ;; entry type=processor
-  db 2 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc3:
-  db 0 ;; entry type=processor
-  db 3 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc4:
-  db 0 ;; entry type=processor
-  db 4 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc5:
-  db 0 ;; entry type=processor
-  db 5 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc6:
-  db 0 ;; entry type=processor
-  db 6 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_proc7:
-  db 0 ;; entry type=processor
-  db 7 ;; local APIC id
-  db 0x11 ;; local APIC version number
-  db 1 ;; cpu flags: enabled
-  db 0,6,0,0 ;; cpu signature
-  dw 0x201,0 ;; feature flags
-  dw 0,0 ;; reserved
-  dw 0,0 ;; reserved
-mp_config_isa_bus:
-  db 1 ;; entry type=bus
-  db 0 ;; bus ID
-  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
-mp_config_ioapic:
-  db 2 ;; entry type=I/O APIC
-  db 8 ;; apic id=8
-  db 0x11 ;; I/O APIC version number
-  db 1 ;; flags=1=enabled
-  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
-  db 3 ;; entry type=I/O interrupt
-  db 0 ;; interrupt type=vectored interrupt
-  db 0,0 ;; flags po=0, el=0 (linux uses as default)
-  db 0 ;; source bus ID is ISA
-  db 0 ;; source bus IRQ
-  db 8 ;; destination I/O APIC ID
-  db 0 ;; destination I/O APIC interrrupt in
-  ;; repeat pattern for interrupts 0-15
-  db 3,0,0,0,0,1,8,1
-  db 3,0,0,0,0,2,8,2
-  db 3,0,0,0,0,3,8,3
-  db 3,0,0,0,0,4,8,4
-  db 3,0,0,0,0,5,8,5
-  db 3,0,0,0,0,6,8,6
-  db 3,0,0,0,0,7,8,7
-  db 3,0,0,0,0,8,8,8
-  db 3,0,0,0,0,9,8,9
-  db 3,0,0,0,0,10,8,10
-  db 3,0,0,0,0,11,8,11
-  db 3,0,0,0,0,12,8,12
-  db 3,0,0,0,0,13,8,13
-  db 3,0,0,0,0,14,8,14
-  db 3,0,0,0,0,15,8,15
-#else
-#  error Sorry, rombios only has configurations for 1, 2, 4 or 8 processors.
-#endif  // if (BX_SMP_PROCESSORS==...)
-
-mp_config_end:   // this label used to find length of mp structure
- db 0
-
-#if (BX_SMP_PROCESSORS>1)
-.align 16
-mp_floating_pointer_structure:
-db 0x5f, 0x4d, 0x50, 0x5f   ; "_MP_" signature
-dw mp_config_table, 0xf ;; pointer to MP configuration table
-db 1     ;; length of this struct in 16-bit byte chunks
-db 4     ;; MP spec revision
-db 0xc1  ;; checksum
-db 0     ;; MP feature byte 1.  value 0 means look at the config table
-db 0,0,0,0     ;; MP feature bytes 2-5.
-#endif
-
 ASM_END
 
-#endif // HVMASSIST
diff -r 9837303a4708 -r 07f26e047fbf tools/firmware/rombios/rombios.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/firmware/rombios/rombios.h  Wed Dec 24 12:52:34 2008 +0900
@@ -0,0 +1,70 @@
+/////////////////////////////////////////////////////////////////////////
+// $Id: rombios.h,v 1.8 2008/12/04 18:48:33 sshwarts Exp $
+/////////////////////////////////////////////////////////////////////////
+//
+//  Copyright (C) 2006 Volker Ruppert
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 
USA
+
+/* define it to include QEMU specific code */
+//#define BX_QEMU
+#define LEGACY
+
+#ifndef LEGACY
+#  define BX_ROMBIOS32     1
+#else
+#  define BX_ROMBIOS32     0
+#endif
+#define DEBUG_ROMBIOS    1
+
+#define PANIC_PORT  0x400
+#define PANIC_PORT2 0x401
+#define INFO_PORT   0x402
+#define DEBUG_PORT  0x403
+
+#define BIOS_PRINTF_HALT     1
+#define BIOS_PRINTF_SCREEN   2
+#define BIOS_PRINTF_INFO     4
+#define BIOS_PRINTF_DEBUG    8
+#define BIOS_PRINTF_ALL      (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
+#define BIOS_PRINTF_DEBHALT  (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | 
BIOS_PRINTF_HALT)
+
+#define printf(format, p...)  bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
+
+// Defines the output macros.
+// BX_DEBUG goes to INFO port until we can easily choose debug info on a
+// per-device basis. Debug info are sent only in debug mode
+#if DEBUG_ROMBIOS
+#  define BX_DEBUG(format, p...)  bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#else
+#  define BX_DEBUG(format, p...)
+#endif
+#define BX_INFO(format, p...)   bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#define BX_PANIC(format, p...)  bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
+
+#define ACPI_DATA_SIZE    0x00010000L
+#define PM_IO_BASE        0xb000
+#define SMB_IO_BASE       0xb100
+
+  // Define the application NAME
+#if define HVMASSIST
+#  define BX_APPNAME "HVMAssist"
+#elif defined(BX_QEMU)
+#  define BX_APPNAME "QEMU"
+#elif defined(PLEX86)
+#  define BX_APPNAME "Plex86"
+#else
+#  define BX_APPNAME "Bochs"
+#endif
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_dom_core.c
--- a/tools/libxc/xc_dom_core.c Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_dom_core.c Wed Dec 24 12:52:34 2008 +0900
@@ -244,6 +244,7 @@ int xc_dom_do_gunzip(void *src, size_t s
         return -1;
     }
     rc = inflate(&zStream, Z_FINISH);
+    inflateEnd(&zStream);
     if ( rc != Z_STREAM_END )
     {
         xc_dom_panic(XC_INTERNAL_ERROR,
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_dom_x86.c
--- a/tools/libxc/xc_dom_x86.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_dom_x86.c  Wed Dec 24 12:52:34 2008 +0900
@@ -418,7 +418,8 @@ static int start_info_x86_32(struct xc_d
     xc_dom_printf("%s: called\n", __FUNCTION__);
 
     memset(start_info, 0, sizeof(*start_info));
-    snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
+    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
     start_info->nr_pages = dom->total_pages;
     start_info->shared_info = shinfo << PAGE_SHIFT_X86;
     start_info->pt_base = dom->pgtables_seg.vstart;
@@ -457,7 +458,8 @@ static int start_info_x86_64(struct xc_d
     xc_dom_printf("%s: called\n", __FUNCTION__);
 
     memset(start_info, 0, sizeof(*start_info));
-    snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
+    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
     start_info->nr_pages = dom->total_pages;
     start_info->shared_info = shinfo << PAGE_SHIFT_X86;
     start_info->pt_base = dom->pgtables_seg.vstart;
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_domain.c   Wed Dec 24 12:52:34 2008 +0900
@@ -1061,6 +1061,20 @@ int xc_domain_suppress_spurious_page_fau
 
 }
 
+int xc_domain_debug_control(int xc, uint32_t domid, uint32_t sop, uint32_t 
vcpu)
+{
+    DECLARE_DOMCTL;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = (domid_t)domid;
+    domctl.cmd = XEN_DOMCTL_debug_op;
+    domctl.u.debug_op.op     = sop;
+    domctl.u.debug_op.vcpu   = vcpu;
+
+    return do_domctl(xc, &domctl);
+}
+
+
 /*
  * Local variables:
  * mode: C
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_domain_restore.c   Wed Dec 24 12:52:34 2008 +0900
@@ -490,6 +490,22 @@ int xc_domain_restore(int xc_handle, int
             continue;
         }
 
+        if ( j == -4 )
+        {
+            uint64_t vm86_tss;
+
+            /* Skip padding 4 bytes then read the vm86 TSS location. */
+            if ( read_exact(io_fd, &vm86_tss, sizeof(uint32_t)) ||
+                 read_exact(io_fd, &vm86_tss, sizeof(uint64_t)) )
+            {
+                ERROR("error read the address of the vm86 TSS");
+                goto out;
+            }
+
+            xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, vm86_tss);
+            continue;
+        }
+
         if ( j == 0 )
             break;  /* our work here is done */
 
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_domain_save.c      Wed Dec 24 12:52:34 2008 +0900
@@ -1388,18 +1388,30 @@ int xc_domain_save(int xc_handle, int io
     if ( hvm )
     {
         struct {
-            int minusthree;
+            int id;
             uint32_t pad;
-            uint64_t ident_pt;
-        } chunk = { -3, 0 };
-
+            uint64_t data;
+        } chunk = { 0, };
+
+        chunk.id = -3;
         xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
-                         (unsigned long *)&chunk.ident_pt);
-
-        if ( (chunk.ident_pt != 0) &&
+                         (unsigned long *)&chunk.data);
+
+        if ( (chunk.data != 0) &&
              write_exact(io_fd, &chunk, sizeof(chunk)) )
         {
             PERROR("Error when writing the ident_pt for EPT guest");
+            goto out;
+        }
+
+        chunk.id = -4;
+        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS,
+                         (unsigned long *)&chunk.data);
+
+        if ( (chunk.data != 0) &&
+             write_exact(io_fd, &chunk, sizeof(chunk)) )
+        {
+            PERROR("Error when writing the vm86 TSS for guest");
             goto out;
         }
     }
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_pm.c
--- a/tools/libxc/xc_pm.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_pm.c       Wed Dec 24 12:52:34 2008 +0900
@@ -23,8 +23,13 @@
  *
  */
 
+#include <errno.h>
+#include <stdbool.h>
 #include "xc_private.h"
 
+/*
+ * Get PM statistic info
+ */
 int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px)
 {
     DECLARE_SYSCTL;
@@ -168,3 +173,136 @@ int xc_pm_reset_cxstat(int xc_handle, in
 
     return xc_sysctl(xc_handle, &sysctl);
 }
+
+
+/*
+ * 1. Get PM parameter
+ * 2. Provide user PM control
+ */
+int xc_get_cpufreq_para(int xc_handle, int cpuid,
+                        struct xc_get_cpufreq_para *user_para)
+{
+    DECLARE_SYSCTL;
+    int ret = 0;
+    struct xen_get_cpufreq_para *sys_para = &sysctl.u.pm_op.get_para;
+    bool has_num = user_para->cpu_num &&
+                     user_para->freq_num &&
+                     user_para->gov_num;
+
+    if ( (xc_handle < 0) || !user_para )
+        return -EINVAL;
+
+    if ( has_num )
+    {
+        if ( (!user_para->affected_cpus)                    ||
+             (!user_para->scaling_available_frequencies)    ||
+             (!user_para->scaling_available_governors) )
+            return -EINVAL;
+
+        if ( (ret = lock_pages(user_para->affected_cpus,
+                               user_para->cpu_num * sizeof(uint32_t))) )
+            goto unlock_1;
+        if ( (ret = lock_pages(user_para->scaling_available_frequencies,
+                               user_para->freq_num * sizeof(uint32_t))) )
+            goto unlock_2;
+        if ( (ret = lock_pages(user_para->scaling_available_governors,
+                 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+            goto unlock_3;
+
+        set_xen_guest_handle(sys_para->affected_cpus,
+                             user_para->affected_cpus);
+        set_xen_guest_handle(sys_para->scaling_available_frequencies,
+                             user_para->scaling_available_frequencies);
+        set_xen_guest_handle(sys_para->scaling_available_governors,
+                             user_para->scaling_available_governors);
+    }
+
+    sysctl.cmd = XEN_SYSCTL_pm_op;
+    sysctl.u.pm_op.cmd = GET_CPUFREQ_PARA;
+    sysctl.u.pm_op.cpuid = cpuid;
+    sys_para->cpu_num  = user_para->cpu_num;
+    sys_para->freq_num = user_para->freq_num;
+    sys_para->gov_num  = user_para->gov_num;
+
+    ret = xc_sysctl(xc_handle, &sysctl);
+    if ( ret )
+    {
+        if ( errno == EAGAIN )
+        {
+            user_para->cpu_num  = sys_para->cpu_num;
+            user_para->freq_num = sys_para->freq_num;
+            user_para->gov_num  = sys_para->gov_num;
+            ret = -errno;
+        }
+
+        if ( has_num )
+            goto unlock_4;
+        goto unlock_1;
+    }
+    else
+    {
+        user_para->cpuinfo_cur_freq = sys_para->cpuinfo_cur_freq;
+        user_para->cpuinfo_max_freq = sys_para->cpuinfo_max_freq;
+        user_para->cpuinfo_min_freq = sys_para->cpuinfo_min_freq;
+        user_para->scaling_cur_freq = sys_para->scaling_cur_freq;
+        user_para->scaling_max_freq = sys_para->scaling_max_freq;
+        user_para->scaling_min_freq = sys_para->scaling_min_freq;
+
+        memcpy(user_para->scaling_driver, 
+                sys_para->scaling_driver, CPUFREQ_NAME_LEN);
+        memcpy(user_para->scaling_governor,
+                sys_para->scaling_governor, CPUFREQ_NAME_LEN);
+
+        /* copy to user_para no matter what cpufreq governor */
+        XC_BUILD_BUG_ON(sizeof(((struct xc_get_cpufreq_para *)0)->u) !=
+                        sizeof(((struct xen_get_cpufreq_para *)0)->u));
+
+        memcpy(&user_para->u, &sys_para->u, sizeof(sys_para->u));
+    }
+
+unlock_4:
+    unlock_pages(user_para->scaling_available_governors,
+                 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char));
+unlock_3:
+    unlock_pages(user_para->scaling_available_frequencies,
+                 user_para->freq_num * sizeof(uint32_t));
+unlock_2:
+    unlock_pages(user_para->affected_cpus,
+                 user_para->cpu_num * sizeof(uint32_t));
+unlock_1:
+    return ret;
+}
+
+int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname)
+{
+    DECLARE_SYSCTL;
+    char *scaling_governor = sysctl.u.pm_op.set_gov.scaling_governor;
+
+    if ( (xc_handle < 0) || (!govname) )
+        return -EINVAL;
+
+    sysctl.cmd = XEN_SYSCTL_pm_op;
+    sysctl.u.pm_op.cmd = SET_CPUFREQ_GOV;
+    sysctl.u.pm_op.cpuid = cpuid;
+    strncpy(scaling_governor, govname, CPUFREQ_NAME_LEN);
+    scaling_governor[CPUFREQ_NAME_LEN - 1] = '\0';
+
+    return xc_sysctl(xc_handle, &sysctl);
+}
+
+int xc_set_cpufreq_para(int xc_handle, int cpuid, 
+                        int ctrl_type, int ctrl_value)
+{
+    DECLARE_SYSCTL;
+
+    if ( xc_handle < 0 )
+        return -EINVAL;
+
+    sysctl.cmd = XEN_SYSCTL_pm_op;
+    sysctl.u.pm_op.cmd = SET_CPUFREQ_PARA;
+    sysctl.u.pm_op.cpuid = cpuid;
+    sysctl.u.pm_op.set_para.ctrl_type = ctrl_type;
+    sysctl.u.pm_op.set_para.ctrl_value = ctrl_value;
+
+    return xc_sysctl(xc_handle, &sysctl);
+}
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_private.h
--- a/tools/libxc/xc_private.h  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_private.h  Wed Dec 24 12:52:34 2008 +0900
@@ -42,6 +42,9 @@
 #define DEBUG    1
 #define INFO     1
 #define PROGRESS 0
+
+/* Force a compilation error if condition is true */
+#define XC_BUILD_BUG_ON(p) ((void)sizeof(struct { int:-!!(p); }))
 
 /*
 ** Define max dirty page cache to permit during save/restore -- need to 
balance 
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xc_ptrace.c
--- a/tools/libxc/xc_ptrace.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xc_ptrace.c   Wed Dec 24 12:52:34 2008 +0900
@@ -524,10 +524,20 @@ xc_ptrace(
         /*  XXX we can still have problems if the user switches threads
          *  during single-stepping - but that just seems retarded
          */
-        ctxt[cpu].c.user_regs.eflags |= PSL_T;
-        if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
-                                &ctxt[cpu])))
-            goto out_error_domctl;
+        /* Try to enalbe Monitor Trap Flag for HVM, and fall back to TF
+         * if no MTF support
+         */
+        if ( !current_is_hvm ||
+             xc_domain_debug_control(xc_handle,
+                                     current_domid,
+                                     XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON,
+                                     cpu) )
+        {
+            ctxt[cpu].c.user_regs.eflags |= PSL_T;
+            if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
+                                    &ctxt[cpu])))
+                goto out_error_domctl;
+        }
         /* FALLTHROUGH */
 
     case PTRACE_CONT:
@@ -538,15 +548,22 @@ xc_ptrace(
         {
             FOREACH_CPU(cpumap, index) {
                 cpu = index - 1;
-                if (fetch_regs(xc_handle, cpu, NULL))
-                    goto out_error;
-                /* Clear trace flag */
-                if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
+                if ( !current_is_hvm ||
+                      xc_domain_debug_control(xc_handle,
+                                              current_domid,
+                                              
XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF,
+                                              cpu) )
                 {
-                    ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
-                    if ((retval = xc_vcpu_setcontext(xc_handle, current_domid,
-                                                cpu, &ctxt[cpu])))
-                        goto out_error_domctl;
+                    if (fetch_regs(xc_handle, cpu, NULL))
+                        goto out_error;
+                    /* Clear trace flag */
+                    if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
+                    {
+                        ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
+                        if ((retval = xc_vcpu_setcontext(xc_handle, 
current_domid,
+                                        cpu, &ctxt[cpu])))
+                            goto out_error_domctl;
+                    }
                 }
             }
         }
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xenctrl.h     Wed Dec 24 12:52:34 2008 +0900
@@ -1111,6 +1111,12 @@ int xc_domain_set_target(int xc_handle,
                          uint32_t domid,
                          uint32_t target);
 
+/* Control the domain for debug */
+int xc_domain_debug_control(int xc_handle,
+                            uint32_t domid,
+                            uint32_t sop,
+                            uint32_t vcpu);
+
 #if defined(__i386__) || defined(__x86_64__)
 int xc_cpuid_check(int xc,
                    const unsigned int *input,
@@ -1161,4 +1167,46 @@ int xc_pm_reset_cxstat(int xc_handle, in
 
 int xc_cpu_online(int xc_handle, int cpu);
 int xc_cpu_offline(int xc_handle, int cpu);
+
+/* 
+ * cpufreq para name of this structure named 
+ * same as sysfs file name of native linux
+ */
+typedef xen_userspace_t xc_userspace_t;
+typedef xen_ondemand_t xc_ondemand_t;
+
+struct xc_get_cpufreq_para {
+    /* IN/OUT variable */
+    uint32_t cpu_num;
+    uint32_t freq_num;
+    uint32_t gov_num;
+
+    /* for all governors */
+    /* OUT variable */
+    uint32_t *affected_cpus;
+    uint32_t *scaling_available_frequencies;
+    char     *scaling_available_governors;
+    char scaling_driver[CPUFREQ_NAME_LEN];
+
+    uint32_t cpuinfo_cur_freq;
+    uint32_t cpuinfo_max_freq;
+    uint32_t cpuinfo_min_freq;
+    uint32_t scaling_cur_freq;
+
+    char scaling_governor[CPUFREQ_NAME_LEN];
+    uint32_t scaling_max_freq;
+    uint32_t scaling_min_freq;
+
+    /* for specific governor */
+    union {
+        xc_userspace_t userspace;
+        xc_ondemand_t ondemand;
+    } u;
+};
+
+int xc_get_cpufreq_para(int xc_handle, int cpuid,
+                        struct xc_get_cpufreq_para *user_para);
+int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname);
+int xc_set_cpufreq_para(int xc_handle, int cpuid,
+                        int ctrl_type, int ctrl_value);
 #endif /* XENCTRL_H */
diff -r 9837303a4708 -r 07f26e047fbf tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/libxc/xg_private.c  Wed Dec 24 12:52:34 2008 +0900
@@ -131,6 +131,7 @@ char *xc_inflate_buffer(const char *in_b
 
     /* Inflate in one pass/call */
     sts = inflate(&zStream, Z_FINISH);
+    inflateEnd(&zStream);
     if ( sts != Z_STREAM_END )
     {
         ERROR("inflate failed, sts %d\n", sts);
diff -r 9837303a4708 -r 07f26e047fbf tools/misc/xen-detect.c
--- a/tools/misc/xen-detect.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/misc/xen-detect.c   Wed Dec 24 12:52:34 2008 +0900
@@ -50,17 +50,25 @@ static int check_for_xen(void)
 {
     uint32_t eax, ebx, ecx, edx;
     char signature[13];
+    uint32_t base;
 
-    cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
-    *(uint32_t *)(signature + 0) = ebx;
-    *(uint32_t *)(signature + 4) = ecx;
-    *(uint32_t *)(signature + 8) = edx;
-    signature[12] = '\0';
+    for ( base = 0x40000000; base < 0x40001000; base += 0x100 )
+    {
+        cpuid(base, &eax, &ebx, &ecx, &edx);
 
-    if ( strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002) )
-        return 0;
+        *(uint32_t *)(signature + 0) = ebx;
+        *(uint32_t *)(signature + 4) = ecx;
+        *(uint32_t *)(signature + 8) = edx;
+        signature[12] = '\0';
 
-    cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
+        if ( !strcmp("XenVMMXenVMM", signature) && (eax >= (base + 2)) )
+            goto found;
+    }
+
+    return 0;
+
+ found:
+    cpuid(base + 1, &eax, &ebx, &ecx, &edx);
     printf("Running in %s context on Xen v%d.%d.\n",
            pv_context ? "PV" : "HVM", (uint16_t)(eax >> 16), (uint16_t)eax);
     return 1;
diff -r 9837303a4708 -r 07f26e047fbf tools/misc/xenpm.c
--- a/tools/misc/xenpm.c        Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/misc/xenpm.c        Wed Dec 24 12:52:34 2008 +0900
@@ -16,199 +16,591 @@
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
 
+/* to eliminate warning on `strndup' */
+#define _GNU_SOURCE
+
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <getopt.h>
 #include <errno.h>
 
 #include <xenctrl.h>
 #include <inttypes.h>
 
-int main(int argc, char **argv)
-{
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+/* help message */
+void show_help(void)
+{
+    fprintf(stderr,
+            "Usage:\n"
+            "       xenpm get-cpuidle-states [cpuid]: list cpu idle 
information on CPU cpuid or all CPUs.\n"
+            "       xenpm get-cpufreq-states [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
+            "       xenpm get-cpufreq-para [cpuid]: list cpu frequency 
information on CPU cpuid or all CPUs.\n"
+            "       xenpm set-scaling-maxfreq <cpuid> <HZ>: set max cpu 
frequency <HZ> on CPU <cpuid>.\n"
+            "       xenpm set-scaling-minfreq <cpuid> <HZ>: set min cpu 
frequency <HZ> on CPU <cpuid>.\n"
+            "       xenpm set-scaling-governor <cpuid> <name>: set scaling 
governor on CPU <cpuid>.\n"
+            "       xenpm set-scaling-speed <cpuid> <num>: set scaling speed 
on CPU <cpuid>.\n"
+            "       xenpm set-sampling-rate <cpuid> <num>: set sampling rate 
on CPU <cpuid>.\n"
+            "       xenpm set-up-threshold <cpuid> <num>: set up threshold on 
CPU <cpuid>.\n");
+}
+
+/* wrapper function */
+int help_func(int xc_fd, int cpuid, uint32_t value)
+{
+    show_help();
+    return 0;
+}
+
+/* show cpu idle information on CPU cpuid */
+static int show_cx_cpuid(int xc_fd, int cpuid)
+{
+    int i, ret = 0;
+    int max_cx_num = 0;
+    struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
+
+    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
+    if ( ret )
+    {
+        if ( errno == ENODEV )
+        {
+            fprintf(stderr, "Xen cpuidle is not enabled!\n");
+            return -ENODEV;
+        }
+        else
+        {
+            fprintf(stderr, "[CPU%d] failed to get max C-state\n", cpuid);
+            return -EINVAL;
+        }
+    }
+
+    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->triggers )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for C-states triggers\n", 
cpuid);
+        return -ENOMEM;
+    }
+    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
+    if ( !cxstat->residencies )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for C-states residencies\n", 
cpuid);
+        free(cxstat->triggers);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
+    if( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to get C-states statistics "
+                "information\n", cpuid);
+        free(cxstat->triggers);
+        free(cxstat->residencies);
+        return -EINVAL;
+    }
+
+    printf("cpu id               : %d\n", cpuid);
+    printf("total C-states       : %d\n", cxstat->nr);
+    printf("idle time(ms)        : %"PRIu64"\n",
+           cxstat->idle_time/1000000UL);
+    for ( i = 0; i < cxstat->nr; i++ )
+    {
+        printf("C%d                   : transition [%020"PRIu64"]\n",
+               i, cxstat->triggers[i]);
+        printf("                       residency  [%020"PRIu64" ms]\n",
+               cxstat->residencies[i]/1000000UL);
+    }
+
+    free(cxstat->triggers);
+    free(cxstat->residencies);
+
+    printf("\n");
+    return 0;
+}
+
+int cxstates_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+    xc_physinfo_t physinfo = { 0 };
+
+    if ( cpuid < 0 )
+    {
+        /* show cxstates on all cpu */
+        ret = xc_physinfo(xc_fd, &physinfo);
+        if ( ret )
+        {
+            fprintf(stderr, "failed to get the processor information\n");
+        }
+        else
+        {
+            int i;
+            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            {
+                if ( (ret = show_cx_cpuid(xc_fd, i)) == -ENODEV )
+                    break;
+            }
+        }
+    }
+    else
+        ret = show_cx_cpuid(xc_fd, cpuid);
+
+    return ret;
+}
+
+/* show cpu frequency information on CPU cpuid */
+static int show_px_cpuid(int xc_fd, int cpuid)
+{
+    int i, ret = 0;
+    int max_px_num = 0;
+    struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
+
+    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
+    if ( ret )
+    {
+        if ( errno == ENODEV )
+        {
+            printf("Xen cpufreq is not enabled!\n");
+            return -ENODEV;
+        }
+        else
+        {
+            fprintf(stderr, "[CPU%d] failed to get max P-state\n", cpuid);
+            return -EINVAL;
+        }
+    }
+
+    pxstat->trans_pt = malloc(max_px_num * max_px_num *
+                              sizeof(uint64_t));
+    if ( !pxstat->trans_pt )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for P-states transition 
table\n", cpuid);
+        return -ENOMEM;
+    }
+    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
+    if ( !pxstat->pt )
+    {
+        fprintf(stderr, "[CPU%d] failed to malloc for P-states table\n", 
cpuid);
+        free(pxstat->trans_pt);
+        return -ENOMEM;
+    }
+
+    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
+    if( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to get P-states statistics 
information\n", cpuid);
+        free(pxstat->trans_pt);
+        free(pxstat->pt);
+        return -ENOMEM;
+    }
+
+    printf("cpu id               : %d\n", cpuid);
+    printf("total P-states       : %d\n", pxstat->total);
+    printf("usable P-states      : %d\n", pxstat->usable);
+    printf("current frequency    : %"PRIu64" MHz\n",
+           pxstat->pt[pxstat->cur].freq);
+    for ( i = 0; i < pxstat->total; i++ )
+    {
+        if ( pxstat->cur == i )
+            printf("*P%d", i);
+        else
+            printf("P%d ", i);
+        printf("                  : freq       [%04"PRIu64" MHz]\n",
+               pxstat->pt[i].freq);
+        printf("                       transition [%020"PRIu64"]\n",
+               pxstat->pt[i].count);
+        printf("                       residency  [%020"PRIu64" ms]\n",
+               pxstat->pt[i].residency/1000000UL);
+    }
+
+    free(pxstat->trans_pt);
+    free(pxstat->pt);
+
+    printf("\n");
+    return 0;
+}
+
+int pxstates_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+    xc_physinfo_t physinfo = { 0 };
+
+    if ( cpuid < 0 )
+    {
+        ret = xc_physinfo(xc_fd, &physinfo);
+        if ( ret )
+        {
+            fprintf(stderr, "failed to get the processor information\n");
+        }
+        else
+        {
+            int i;
+            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            {
+                if ( (ret = show_px_cpuid(xc_fd, i)) == -ENODEV )
+                    break;
+            }
+        }
+    }
+    else
+        ret = show_px_cpuid(xc_fd, cpuid);
+
+    return ret;
+}
+
+/* print out parameters about cpu frequency */
+static void print_cpufreq_para(int cpuid, struct xc_get_cpufreq_para 
*p_cpufreq)
+{
+    int i;
+
+    printf("cpu id               : %d\n", cpuid);
+
+    printf("affected_cpus        :");
+    for ( i = 0; i < p_cpufreq->cpu_num; i++ )
+        if ( i == cpuid )
+            printf(" *%d", p_cpufreq->affected_cpus[i]);
+        else
+            printf(" %d", p_cpufreq->affected_cpus[i]);
+    printf("\n");
+
+    printf("cpuinfo frequency    : max [%u] min [%u] cur [%u]\n",
+           p_cpufreq->cpuinfo_max_freq,
+           p_cpufreq->cpuinfo_min_freq,
+           p_cpufreq->cpuinfo_cur_freq);
+
+    printf("scaling_driver       : %s\n", p_cpufreq->scaling_driver);
+
+    printf("scaling_avail_gov    : %s\n",
+           p_cpufreq->scaling_available_governors);
+
+    printf("current_governor     : %s\n", p_cpufreq->scaling_governor);
+    if ( !strncmp(p_cpufreq->scaling_governor,
+                  "userspace", CPUFREQ_NAME_LEN) )
+    {
+        printf("  userspace specific :\n");
+        printf("    scaling_setspeed : %u\n",
+               p_cpufreq->u.userspace.scaling_setspeed);
+    }
+    else if ( !strncmp(p_cpufreq->scaling_governor,
+                       "ondemand", CPUFREQ_NAME_LEN) )
+    {
+        printf("  ondemand specific  :\n");
+        printf("    sampling_rate    : max [%u] min [%u] cur [%u]\n",
+               p_cpufreq->u.ondemand.sampling_rate_max,
+               p_cpufreq->u.ondemand.sampling_rate_min,
+               p_cpufreq->u.ondemand.sampling_rate);
+        printf("    up_threshold     : %u\n",
+               p_cpufreq->u.ondemand.up_threshold);
+    }
+
+    printf("scaling_avail_freq   :");
+    for ( i = 0; i < p_cpufreq->freq_num; i++ )
+        if ( p_cpufreq->scaling_available_frequencies[i] == 
p_cpufreq->scaling_cur_freq )
+            printf(" *%d", p_cpufreq->scaling_available_frequencies[i]);
+        else
+            printf(" %d", p_cpufreq->scaling_available_frequencies[i]);
+    printf("\n");
+
+    printf("scaling frequency    : max [%u] min [%u] cur [%u]\n",
+           p_cpufreq->scaling_max_freq,
+           p_cpufreq->scaling_min_freq,
+           p_cpufreq->scaling_cur_freq);
+    printf("\n");
+}
+
+/* show cpu frequency parameters information on CPU cpuid */
+static int show_cpufreq_para_cpuid(int xc_fd, int cpuid)
+{
+    int ret = 0;
+    struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para;
+
+    p_cpufreq->cpu_num = 0;
+    p_cpufreq->freq_num = 0;
+    p_cpufreq->gov_num = 0;
+    p_cpufreq->affected_cpus = NULL;
+    p_cpufreq->scaling_available_frequencies = NULL;
+    p_cpufreq->scaling_available_governors = NULL;
+
+    do
+    {
+        free(p_cpufreq->affected_cpus);
+        free(p_cpufreq->scaling_available_frequencies);
+        free(p_cpufreq->scaling_available_governors);
+
+        p_cpufreq->affected_cpus = NULL;
+        p_cpufreq->scaling_available_frequencies = NULL;
+        p_cpufreq->scaling_available_governors = NULL;
+
+        if (!(p_cpufreq->affected_cpus =
+              malloc(p_cpufreq->cpu_num * sizeof(uint32_t))))
+        {
+            fprintf(stderr,
+                    "[CPU%d] failed to malloc for affected_cpus\n",
+                    cpuid);
+            ret = -ENOMEM;
+            goto out;
+        }
+        if (!(p_cpufreq->scaling_available_frequencies =
+              malloc(p_cpufreq->freq_num * sizeof(uint32_t))))
+        {
+            fprintf(stderr,
+                    "[CPU%d] failed to malloc for 
scaling_available_frequencies\n",
+                    cpuid);
+            ret = -ENOMEM;
+            goto out;
+        }
+        if (!(p_cpufreq->scaling_available_governors =
+              malloc(p_cpufreq->gov_num * CPUFREQ_NAME_LEN * sizeof(char))))
+        {
+            fprintf(stderr,
+                    "[CPU%d] failed to malloc for 
scaling_available_governors\n",
+                    cpuid);
+            ret = -ENOMEM;
+            goto out;
+        }
+
+        ret = xc_get_cpufreq_para(xc_fd, cpuid, p_cpufreq);
+    } while ( ret && errno == EAGAIN );
+
+    if ( ret == 0 )
+        print_cpufreq_para(cpuid, p_cpufreq);
+    else if ( errno == ENODEV )
+    {
+        ret = -ENODEV;
+        fprintf(stderr, "Xen cpufreq is not enabled!\n");
+    }
+    else
+        fprintf(stderr,
+                "[CPU%d] failed to get cpufreq parameter\n",
+                cpuid);
+
+out:
+    free(p_cpufreq->scaling_available_governors);
+    free(p_cpufreq->scaling_available_frequencies);
+    free(p_cpufreq->affected_cpus);
+
+    return ret;
+}
+
+int cpufreq_para_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+    xc_physinfo_t physinfo = { 0 };
+
+    if ( cpuid < 0 )
+    {
+        ret = xc_physinfo(xc_fd, &physinfo);
+        if ( ret )
+        {
+            fprintf(stderr, "failed to get the processor information\n");
+        }
+        else
+        {
+            int i;
+            for ( i = 0; i < physinfo.nr_cpus; i++ )
+            {
+                if ( (ret = show_cpufreq_para_cpuid(xc_fd, i)) == -ENODEV )
+                    break;
+            }
+        }
+    }
+    else
+        ret = show_cpufreq_para_cpuid(xc_fd, cpuid);
+
+    return ret;
+}
+
+int scaling_max_freq_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret = 0;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_min_freq_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_speed_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling speed\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_sampling_rate_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, value);
+    if ( ret ) 
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling sampling rate\n", 
cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_up_threshold_func(int xc_fd, int cpuid, uint32_t value)
+{
+    int ret;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, value);
+    if ( ret )
+    {
+        fprintf(stderr, "[CPU%d] failed to set scaling threshold\n", cpuid);
+    }
+
+    return ret;
+}
+
+int scaling_governor_func(int xc_fd, int cpuid, char *name)
+{
+    int ret = 0;
+
+    if ( cpuid < 0 )
+    {
+        show_help();
+        return -EINVAL;
+    }
+
+    ret = xc_set_cpufreq_gov(xc_fd, cpuid, name);
+    if ( ret )
+    {
+        fprintf(stderr, "failed to set cpufreq governor to %s\n", name);
+    }
+
+    return ret;
+}
+
+struct {
+    const char *name;
+    int (*function)(int xc_fd, int cpuid, uint32_t value);
+} main_options[] = {
+    { "help", help_func },
+    { "get-cpuidle-states", cxstates_func },
+    { "get-cpufreq-states", pxstates_func },
+    { "get-cpufreq-para", cpufreq_para_func },
+    { "set-scaling-maxfreq", scaling_max_freq_func },
+    { "set-scaling-minfreq", scaling_min_freq_func },
+    { "set-scaling-governor", NULL },
+    { "set-scaling-speed", scaling_speed_func },
+    { "set-sampling-rate", scaling_sampling_rate_func },
+    { "set-up-threshold", scaling_up_threshold_func },
+};
+
+int main(int argc, char *argv[])
+{
+    int i, ret = -EINVAL;
     int xc_fd;
-    int i, j, ret = 0;
-    int cinfo = 0, pinfo = 0;
-    int ch;
-    xc_physinfo_t physinfo = { 0 };
-
-    while ( (ch = getopt(argc, argv, "cp")) != -1 )
-    {
-        switch ( ch )
-        {
-        case 'c':
-            cinfo = 1;
-            break;
-        case 'p':
-            pinfo = 1;
-            break;
-        default:
-            fprintf(stderr, "%s [-p] [-c]\n", argv[0]);
-            return -1;
-        }
-    }
-
-    if ( !cinfo && !pinfo )
-    {
-        cinfo = 1;
-        pinfo = 1;
+    int cpuid = -1;
+    uint32_t value = 0;
+    int nr_matches = 0;
+    int matches_main_options[ARRAY_SIZE(main_options)];
+
+    if ( argc < 2 )
+    {
+        show_help();
+        return ret;
+    }
+
+    if ( argc > 2 )
+    {
+        if ( sscanf(argv[2], "%d", &cpuid) != 1 )
+            cpuid = -1;
     }
 
     xc_fd = xc_interface_open();
     if ( xc_fd < 0 )
     {
         fprintf(stderr, "failed to get the handler\n");
-        return xc_fd;
-    }
-
-    ret = xc_physinfo(xc_fd, &physinfo);
-    if ( ret )
-    {
-        fprintf(stderr, "failed to get the processor information\n");
-        xc_interface_close(xc_fd);
-        return ret;
-    }
-
-    /* print out the C state information */
-    if ( cinfo )
-    {
-        int max_cx_num = 0;
-        struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
-
-        for ( i = 0; i < physinfo.nr_cpus; i++ )
-        {
-            ret = xc_pm_get_max_cx(xc_fd, i, &max_cx_num);
-            if ( ret )
+    }
+
+    for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
+    {
+        if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
+        {
+            matches_main_options[nr_matches++] = i;
+        }
+    }
+
+    if ( nr_matches > 1 )
+    {
+        fprintf(stderr, "Ambigious options: ");
+        for ( i = 0; i < nr_matches; i++ )
+            fprintf(stderr, " %s", main_options[matches_main_options[i]].name);
+        fprintf(stderr, "\n");
+    }
+    else if ( nr_matches == 1 )
+    {
+        if ( !strcmp("set-scaling-governor", 
main_options[matches_main_options[0]].name) )
+        {
+            char *name = strdup(argv[3]);
+            ret = scaling_governor_func(xc_fd, cpuid, name);
+            free(name);
+        }
+        else
+        {
+            if ( argc > 3 )
             {
-                if ( errno == ENODEV )
-                {
-                    fprintf(stderr, "Xen cpuidle is not enabled!\n");
-                    break;
-                }
-                else
-                {
-                    fprintf(stderr, "[CPU%d] failed to get max C-state\n", i);
-                    continue;
-                }
+                if ( sscanf(argv[3], "%d", &value) != 1 )
+                    value = 0;
             }
-
-            cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
-            if ( !cxstat->triggers )
-            {
-                fprintf(stderr, "failed to malloc for C-states triggers\n");
-                break;
-            }
-            cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
-            if ( !cxstat->residencies )
-            {
-                fprintf(stderr, "failed to malloc for C-states residencies\n");
-                free(cxstat->triggers);
-                break;
-            }
-
-            ret = xc_pm_get_cxstat(xc_fd, i, cxstat);
-            if( ret )
-            {
-                fprintf(stderr, "[CPU%d] failed to get C-states statistics "
-                        "information\n", i);
-                free(cxstat->triggers);
-                free(cxstat->residencies);
-                continue;
-            }
-
-            printf("cpu id               : %d\n", i);
-            printf("total C-states       : %d\n", cxstat->nr);
-            printf("idle time(ms)        : %"PRIu64"\n",
-                   cxstat->idle_time/1000000UL);
-            for ( j = 0; j < cxstat->nr; j++ )
-            {
-                printf("C%d                   : transition [%020"PRIu64"]\n",
-                       j, cxstat->triggers[j]);
-                printf("                       residency  [%020"PRIu64" ms]\n",
-                       cxstat->residencies[j]*1000000UL/3579/1000000UL);
-            }
-
-            free(cxstat->triggers);
-            free(cxstat->residencies);
-
-            printf("\n");
-        }
-    }
-
-    /* print out P state information */
-    if ( pinfo )
-    {
-        int max_px_num = 0;
-        struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
-
-        for ( i = 0; i < physinfo.nr_cpus; i++ )
-        {
-            ret = xc_pm_get_max_px(xc_fd, i, &max_px_num);
-            if ( ret )
-            {
-                if ( errno == ENODEV )
-                {
-                    printf("Xen cpufreq is not enabled!\n");
-                    break;
-                }
-                else
-                {
-                    fprintf(stderr, "[CPU%d] failed to get max P-state\n", i);
-                    continue;
-                }
-            }
-
-            pxstat->trans_pt = malloc(max_px_num * max_px_num *
-                                      sizeof(uint64_t));
-            if ( !pxstat->trans_pt )
-            {
-                fprintf(stderr, "failed to malloc for P-states "
-                        "transition table\n");
-                break;
-            }
-            pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
-            if ( !pxstat->pt )
-            {
-                fprintf(stderr, "failed to malloc for P-states table\n");
-                free(pxstat->trans_pt);
-                break;
-            }
-
-            ret = xc_pm_get_pxstat(xc_fd, i, pxstat);
-            if( ret )
-            {
-                fprintf(stderr, "[CPU%d] failed to get P-states "
-                        "statistics information\n", i);
-                free(pxstat->trans_pt);
-                free(pxstat->pt);
-                continue;
-            }
-
-            printf("cpu id               : %d\n", i);
-            printf("total P-states       : %d\n", pxstat->total);
-            printf("usable P-states      : %d\n", pxstat->usable);
-            printf("current frequency    : %"PRIu64" MHz\n",
-                   pxstat->pt[pxstat->cur].freq);
-            for ( j = 0; j < pxstat->total; j++ )
-            {
-                if ( pxstat->cur == j )
-                    printf("*P%d", j);
-                else
-                    printf("P%d ", j);
-                printf("                  : freq       [%04"PRIu64" MHz]\n",
-                       pxstat->pt[j].freq);
-                printf("                       transition [%020"PRIu64"]\n",
-                       pxstat->pt[j].count);
-                printf("                       residency  [%020"PRIu64" ms]\n",
-                       pxstat->pt[j].residency/1000000UL);
-            }
-
-            free(pxstat->trans_pt);
-            free(pxstat->pt);
-
-            printf("\n");
-        }
-    }
+            ret = main_options[matches_main_options[0]].function(xc_fd, cpuid, 
value);
+        }
+    }
+    else
+        show_help();
 
     xc_interface_close(xc_fd);
     return ret;
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/acm/acm.c
--- a/tools/python/xen/lowlevel/acm/acm.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/acm/acm.c       Wed Dec 24 12:52:34 2008 +0900
@@ -68,6 +68,8 @@ static void *__getssid(int domid, uint32
         goto out2;
     } else {
         *buflen = SSID_BUFFER_SIZE;
+        free(buf);
+        buf = NULL;
         goto out2;
     }
  out2:
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/flask/flask.c
--- a/tools/python/xen/lowlevel/flask/flask.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/flask/flask.c   Wed Dec 24 12:52:34 2008 +0900
@@ -55,6 +55,7 @@ static PyObject *pyflask_context_to_sid(
     xc_handle = xc_interface_open();
     if (xc_handle < 0) {
         errno = xc_handle;
+        free(buf);
         return PyErr_SetFromErrno(xc_error_obj);
     }
     
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Dec 24 12:52:34 2008 +0900
@@ -678,19 +678,22 @@ static PyObject *pyxc_get_device_group(X
 
     if ( rc < 0 )
     {
-      free(sdev_array); 
-      return pyxc_error_to_exception();
+        free(sdev_array); 
+        return pyxc_error_to_exception();
     }
 
     if ( !num_sdevs )
     {
-       free(sdev_array);
-       return Py_BuildValue("s", "");
+        free(sdev_array);
+        return Py_BuildValue("s", "");
     }
 
     group_str = calloc(num_sdevs, sizeof(dev_str));
     if (group_str == NULL)
+    {
+        free(sdev_array);
         return PyErr_NoMemory();
+    }
 
     for ( i = 0; i < num_sdevs; i++ )
     {
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/lowlevel/xs/xs.c
--- a/tools/python/xen/lowlevel/xs/xs.c Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/lowlevel/xs/xs.c Wed Dec 24 12:52:34 2008 +0900
@@ -336,15 +336,19 @@ static PyObject *xspy_set_permissions(Xs
        xs_set_error(EINVAL);
         goto exit;
     }
+
     xsperms_n = PyList_Size(perms);
-    xsperms = calloc(xsperms_n, sizeof(struct xs_permissions));
+    /* NB. alloc +1 so we can change the owner if necessary. */
+    xsperms = calloc(xsperms_n + 1, sizeof(struct xs_permissions));
     if (!xsperms) {
        xs_set_error(ENOMEM);
         goto exit;
     }
+
     tuple0 = PyTuple_New(0);
     if (!tuple0)
         goto exit;
+
     for (i = 0; i < xsperms_n; i++) {
         /* Read/write perms. Set these. */
         int p_read = 0, p_write = 0;
@@ -357,6 +361,17 @@ static PyObject *xspy_set_permissions(Xs
         if (p_write)
             xsperms[i].perms |= XS_PERM_WRITE;
     }
+
+    /*
+     * Is the caller trying to restrict access to the first specified
+     * domain? If so then it cannot be owner, so we force dom0 as owner.
+     */
+    if (xsperms_n && xsperms[0].perms && xsperms[0].id) {
+        memmove(&xsperms[1], &xsperms[0], xsperms_n * sizeof(*xsperms));
+        xsperms[0].id = xsperms[0].perms = 0;
+        xsperms_n++;
+    }
+
     Py_BEGIN_ALLOW_THREADS
     result = xs_set_permissions(xh, th, path, xsperms, xsperms_n);
     Py_END_ALLOW_THREADS
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/XendCheckpoint.py   Wed Dec 24 12:52:34 2008 +0900
@@ -253,7 +253,7 @@ def restore(xd, fd, dominfo = None, paus
         # set memory limit
         xc.domain_setmaxmem(dominfo.getDomid(), maxmem)
 
-        balloon.free(memory + shadow)
+        balloon.free(memory + shadow, dominfo)
 
         shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024)
         dominfo.info['shadow_memory'] = shadow_cur
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Wed Dec 24 12:52:34 2008 +0900
@@ -1289,7 +1289,6 @@ class XendConfig(dict):
                     pass
 
             if dev_type == 'vbd':
-                dev_info['bootable'] = 0
                 if dev_info.get('dev', '').startswith('ioemu:'):
                     dev_info['driver'] = 'ioemu'
                 else:
@@ -1325,7 +1324,7 @@ class XendConfig(dict):
                 if param not in target:
                     target[param] = []
                 if dev_uuid not in target[param]:
-                    if dev_type == 'vbd':
+                    if dev_type == 'vbd' and 'bootable' not in dev_info:
                         # Compat hack -- mark first disk bootable
                         dev_info['bootable'] = int(not target[param])
                     target[param].append(dev_uuid)
@@ -1333,8 +1332,9 @@ class XendConfig(dict):
                 if 'vbd_refs' not in target:
                     target['vbd_refs'] = []
                 if dev_uuid not in target['vbd_refs']:
-                    # Compat hack -- mark first disk bootable
-                    dev_info['bootable'] = int(not target['vbd_refs'])
+                    if 'bootable' not in dev_info:
+                        # Compat hack -- mark first disk bootable
+                        dev_info['bootable'] = int(not target['vbd_refs'])
                     target['vbd_refs'].append(dev_uuid)
                     
             elif dev_type == 'vfb':
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Dec 24 12:52:34 2008 +0900
@@ -517,7 +517,8 @@ class XendDomainInfo:
         # HVM domain shuts itself down only if it has PV drivers
         if self.info.is_hvm():
             hvm_pvdrv = xc.hvm_get_param(self.domid, HVM_PARAM_CALLBACK_IRQ)
-            if not hvm_pvdrv:
+            hvm_s_state = xc.hvm_get_param(self.domid, HVM_PARAM_ACPI_S_STATE)
+            if not hvm_pvdrv or hvm_s_state != 0:
                 code = REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
                 log.info("HVM save:remote shutdown dom %d!", self.domid)
                 xc.domain_shutdown(self.domid, code)
@@ -2104,7 +2105,7 @@ class XendDomainInfo:
         # overhead is greater for some types of domain than others. For
         # example, an x86 HVM domain will have a default shadow-pagetable
         # allocation of 1MB. We free up 2MB here to be on the safe side.
-        balloon.free(2*1024) # 2MB should be plenty
+        balloon.free(2*1024, self) # 2MB should be plenty
 
         ssidref = 0
         if security.on() == xsconstants.XS_POLICY_USE:
@@ -2298,7 +2299,7 @@ class XendDomainInfo:
             vtd_mem = ((vtd_mem + 1023) / 1024) * 1024
 
             # Make sure there's enough RAM available for the domain
-            balloon.free(memory + shadow + vtd_mem)
+            balloon.free(memory + shadow + vtd_mem, self)
 
             # Set up the shadow memory
             shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
@@ -2715,7 +2716,7 @@ class XendDomainInfo:
             # The domain might already have some shadow memory
             overhead_kb -= xc.shadow_mem_control(self.domid) * 1024
         if overhead_kb > 0:
-            balloon.free(overhead_kb)
+            balloon.free(overhead_kb, self)
 
     def _unwatchVm(self):
         """Remove the watch on the VM path, if any.  Idempotent.  Nothrow
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/balloon.py  Wed Dec 24 12:52:34 2008 +0900
@@ -67,7 +67,7 @@ def get_dom0_target_alloc():
         raise VmError('Failed to query target memory allocation of dom0.')
     return kb
 
-def free(need_mem):
+def free(need_mem ,self):
     """Balloon out memory from the privileged domain so that there is the
     specified required amount (in KiB) free.
     """
@@ -121,6 +121,40 @@ def free(need_mem):
             max_free_mem = total_mem - dom0_alloc
         if need_mem >= max_free_mem:
             retries = rlimit
+
+        # Check whethercurrent machine is a numa system and the new 
+        # created hvm has all its vcpus in the same node, if all the 
+        # conditions above are fit. We will wait until all the pages 
+        # in scrub list are freed (if waiting time go beyond 20s, 
+        # we will stop waiting it.)
+        if physinfo['nr_nodes'] > 1 and retries == 0:
+            oldnode = -1
+            waitscrub = 1
+            vcpus = self.info['cpus'][0]
+            for vcpu in vcpus:
+                nodenum = 0
+                for node in physinfo['node_to_cpu']:
+                    for cpu in node:
+                        if vcpu == cpu:
+                            if oldnode == -1:
+                                oldnode = nodenum
+                            elif oldnode != nodenum:
+                                waitscrub = 0
+                    nodenum = nodenum + 1
+
+            if waitscrub == 1 and scrub_mem > 0:
+                log.debug("wait for scrub %s", scrub_mem)
+                while scrub_mem > 0 and retries < rlimit:
+                    time.sleep(sleep_time)
+                    physinfo = xc.physinfo()
+                    free_mem = physinfo['free_memory']
+                    scrub_mem = physinfo['scrub_memory']
+                    retries += 1
+                    sleep_time += SLEEP_TIME_GROWTH
+                log.debug("scrub for %d times", retries)
+
+            retries = 0
+            sleep_time = SLEEP_TIME_GROWTH
 
         while retries < rlimit:
             physinfo = xc.physinfo()
diff -r 9837303a4708 -r 07f26e047fbf tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/python/xen/xend/server/blkif.py     Wed Dec 24 12:52:34 2008 +0900
@@ -78,6 +78,10 @@ class BlkifController(DevController):
         if uuid:
             back['uuid'] = uuid
 
+        bootable = config.get('bootable', None)
+        if bootable != None:
+            back['bootable'] = str(bootable)
+
         if security.on() == xsconstants.XS_POLICY_USE:
             self.do_access_control(config, uname)
 
@@ -143,11 +147,12 @@ class BlkifController(DevController):
         config = DevController.getDeviceConfiguration(self, devid, transaction)
         if transaction is None:
             devinfo = self.readBackend(devid, 'dev', 'type', 'params', 'mode',
-                                       'uuid')
+                                       'uuid', 'bootable')
         else:
             devinfo = self.readBackendTxn(transaction, devid,
-                                          'dev', 'type', 'params', 'mode', 
'uuid')
-        dev, typ, params, mode, uuid = devinfo
+                                          'dev', 'type', 'params', 'mode', 
'uuid',
+                                          'bootable')
+        dev, typ, params, mode, uuid, bootable = devinfo
         
         if dev:
             if transaction is None:
@@ -165,6 +170,8 @@ class BlkifController(DevController):
             config['mode'] = mode
         if uuid:
             config['uuid'] = uuid
+        if bootable != None:
+            config['bootable'] = int(bootable)
 
         proto = self.readFrontend(devid, 'protocol')
         if proto:
diff -r 9837303a4708 -r 07f26e047fbf tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c   Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/xcutils/xc_save.c   Wed Dec 24 12:52:34 2008 +0900
@@ -166,18 +166,12 @@ static int suspend(void)
 {
     unsigned long sx_state = 0;
 
-    /* Nothing to do if the guest is in an ACPI sleep state. */
+    /* Cannot notify guest to shut itself down if it's in ACPI sleep state. */
     if (si.flags & XCFLAGS_HVM)
         xc_get_hvm_param(si.xc_fd, si.domid,
                          HVM_PARAM_ACPI_S_STATE, &sx_state);
-    if (sx_state != 0) {
-        /* notify xend that it can do device migration */
-        printf("suspended\n");
-        fflush(stdout);
-        return 1;
-    }
-
-    if (si.suspend_evtchn >= 0)
+
+    if ((sx_state == 0) && (si.suspend_evtchn >= 0))
         return evtchn_suspend();
 
     return compat_suspend();
diff -r 9837303a4708 -r 07f26e047fbf tools/xenpmd/xenpmd.c
--- a/tools/xenpmd/xenpmd.c     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/xenpmd/xenpmd.c     Wed Dec 24 12:52:34 2008 +0900
@@ -297,7 +297,6 @@ int get_next_battery_info_or_status(DIR 
     if  ( !info_or_status )
         return 0;
 
-    memset(line_info, 0, 256);
     if (type == BIF) 
         memset(info_or_status, 0, sizeof(struct battery_info));
     else 
@@ -307,11 +306,8 @@ int get_next_battery_info_or_status(DIR 
     if ( !file )
         return 0;
 
-    while ( fgets(line_info, 1024, file) != NULL ) 
-    {
+    while ( fgets(line_info, sizeof(line_info), file) != NULL ) 
         parse_battery_info_or_status(line_info, type, info_or_status);
-        memset(line_info, 0, 256);
-    }
 
     fclose(file);
     return 1;
diff -r 9837303a4708 -r 07f26e047fbf tools/xenstat/xentop/xentop.c
--- a/tools/xenstat/xentop/xentop.c     Wed Dec 24 12:50:57 2008 +0900
+++ b/tools/xenstat/xentop/xentop.c     Wed Dec 24 12:52:34 2008 +0900
@@ -254,7 +254,7 @@ static void fail(const char *str)
 {
        if(cwin != NULL && !isendwin())
                endwin();
-       fprintf(stderr, str);
+       fprintf(stderr, "%s", str);
        exit(1);
 }
 
diff -r 9837303a4708 -r 07f26e047fbf unmodified_drivers/linux-2.6/Makefile
--- a/unmodified_drivers/linux-2.6/Makefile     Wed Dec 24 12:50:57 2008 +0900
+++ b/unmodified_drivers/linux-2.6/Makefile     Wed Dec 24 12:52:34 2008 +0900
@@ -4,3 +4,4 @@ obj-m += balloon/
 obj-m += balloon/
 obj-m += blkfront/
 obj-m += netfront/
+obj-m += scsifront/
diff -r 9837303a4708 -r 07f26e047fbf 
unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h
--- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Wed Dec 
24 12:50:57 2008 +0900
+++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h Wed Dec 
24 12:52:34 2008 +0900
@@ -147,7 +147,9 @@ extern char *kasprintf(gfp_t gfp, const 
  *   RHEL_VERSION
  */
 #if !defined(RHEL_VERSION) || (RHEL_VERSION == 4 && RHEL_UPDATE < 5)
+#if !defined(RHEL_MAJOR) || (RHEL_MAJOR == 4 && RHEL_MINOR < 5)
 typedef irqreturn_t (*irq_handler_t)(int, void *, struct pt_regs *);
+#endif
 #endif
 #endif
 
diff -r 9837303a4708 -r 07f26e047fbf unmodified_drivers/linux-2.6/overrides.mk
--- a/unmodified_drivers/linux-2.6/overrides.mk Wed Dec 24 12:50:57 2008 +0900
+++ b/unmodified_drivers/linux-2.6/overrides.mk Wed Dec 24 12:52:34 2008 +0900
@@ -15,3 +15,4 @@ _XEN_CPPFLAGS += -include $(objtree)/inc
 
 EXTRA_CFLAGS += $(_XEN_CPPFLAGS)
 EXTRA_AFLAGS += $(_XEN_CPPFLAGS)
+CPPFLAGS := -I$(M)/include $(CPPFLAGS)
diff -r 9837303a4708 -r 07f26e047fbf 
unmodified_drivers/linux-2.6/scsifront/Kbuild
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/unmodified_drivers/linux-2.6/scsifront/Kbuild     Wed Dec 24 12:52:34 
2008 +0900
@@ -0,0 +1,6 @@
+include $(M)/overrides.mk
+
+obj-m += xen-scsi.o
+
+xen-scsi-objs := scsifront.o xenbus.o
+
diff -r 9837303a4708 -r 07f26e047fbf 
unmodified_drivers/linux-2.6/scsifront/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/unmodified_drivers/linux-2.6/scsifront/Makefile   Wed Dec 24 12:52:34 
2008 +0900
@@ -0,0 +1,3 @@
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/ia64/xen/cpufreq/cpufreq.c
--- a/xen/arch/ia64/xen/cpufreq/cpufreq.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/ia64/xen/cpufreq/cpufreq.c       Wed Dec 24 12:52:34 2008 +0900
@@ -275,6 +275,7 @@ acpi_cpufreq_cpu_exit (struct cpufreq_po
 }
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
+       .name       = "acpi-cpufreq",
        .verify     = acpi_cpufreq_verify,
        .target     = acpi_cpufreq_target,
        .get        = acpi_cpufreq_get,
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/Makefile     Wed Dec 24 12:52:34 2008 +0900
@@ -37,7 +37,6 @@ obj-y += numa.o
 obj-y += numa.o
 obj-y += pci.o
 obj-y += physdev.o
-obj-y += rwlock.o
 obj-y += setup.o
 obj-y += shutdown.o
 obj-y += smp.o
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c      Wed Dec 24 12:52:34 2008 +0900
@@ -71,7 +71,8 @@ static struct acpi_processor_power *__re
 
 static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
 {
-    uint32_t i;
+    uint32_t i, idle_usage = 0;
+    uint64_t res, idle_res = 0;
 
     printk("==cpu%d==\n", cpu);
     printk("active state:\t\tC%d\n",
@@ -81,14 +82,21 @@ static void print_acpi_power(uint32_t cp
     
     for ( i = 1; i < power->count; i++ )
     {
+        res = acpi_pm_tick_to_ns(power->states[i].time);
+        idle_usage += power->states[i].usage;
+        idle_res += res;
+
         printk((power->last_state && power->last_state->idx == i) ?
                "   *" : "    ");
         printk("C%d:\t", i);
         printk("type[C%d] ", power->states[i].type);
         printk("latency[%03d] ", power->states[i].latency);
         printk("usage[%08d] ", power->states[i].usage);
-        printk("duration[%"PRId64"]\n", power->states[i].time);
-    }
+        printk("duration[%"PRId64"]\n", res);
+    }
+    printk("    C0:\tusage[%08d] duration[%"PRId64"]\n",
+           idle_usage, NOW() - idle_res);
+
 }
 
 static void dump_cx(unsigned char key)
@@ -317,8 +325,6 @@ static void acpi_processor_idle(void)
          * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
          * deep C state can't work correctly.
          */
-        /* preparing TSC stop */
-        cstate_save_tsc();
         /* preparing APIC stop */
         lapic_timer_off();
 
@@ -751,8 +757,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
 int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
 {
     const struct acpi_processor_power *power = processor_powers[cpuid];
-    struct vcpu *v = idle_vcpu[cpuid];
-    uint64_t usage;
+    uint64_t usage, res, idle_usage = 0, idle_res = 0;
     int i;
 
     if ( power == NULL )
@@ -765,20 +770,26 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
 
     stat->last = power->last_state ? power->last_state->idx : 0;
     stat->nr = power->count;
-    stat->idle_time = v->runstate.time[RUNSTATE_running];
-    if ( v->is_running )
-        stat->idle_time += NOW() - v->runstate.state_entry_time;
-
-    for ( i = 0; i < power->count; i++ )
-    {
-        usage = power->states[i].usage;
-        if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) )
+    stat->idle_time = get_cpu_idle_time(cpuid);
+
+    for ( i = power->count - 1; i >= 0; i-- )
+    {
+        if ( i != 0 )
+        {
+            usage = power->states[i].usage;
+            res = acpi_pm_tick_to_ns(power->states[i].time);
+            idle_usage += usage;
+            idle_res += res;
+        }
+        else
+        {
+            usage = idle_usage;
+            res = NOW() - idle_res;
+        }
+        if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) ||
+             copy_to_guest_offset(stat->residencies, i, &res, 1) )
             return -EFAULT;
     }
-    for ( i = 0; i < power->count; i++ )
-        if ( copy_to_guest_offset(stat->residencies, i, 
-                                  &power->states[i].time, 1) )
-            return -EFAULT;
 
     return 0;
 }
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Wed Dec 24 12:52:34 2008 +0900
@@ -131,9 +131,12 @@ struct drv_cmd {
     u32 val;
 };
 
-static void do_drv_read(struct drv_cmd *cmd)
-{
+static void do_drv_read(void *drvcmd)
+{
+    struct drv_cmd *cmd;
     u32 h;
+
+    cmd = (struct drv_cmd *)drvcmd;
 
     switch (cmd->type) {
     case SYSTEM_INTEL_MSR_CAPABLE:
@@ -174,7 +177,13 @@ static void drv_read(struct drv_cmd *cmd
 {
     cmd->val = 0;
 
-    do_drv_read(cmd);
+    ASSERT(cpus_weight(cmd->mask) == 1);
+
+    /* to reduce IPI for the sake of performance */
+    if (cpu_isset(smp_processor_id(), cmd->mask))
+        do_drv_read((void *)cmd);
+    else
+        on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
 }
 
 static void drv_write(struct drv_cmd *cmd)
@@ -184,13 +193,21 @@ static void drv_write(struct drv_cmd *cm
 
 static u32 get_cur_val(cpumask_t mask)
 {
+    struct cpufreq_policy *policy;
     struct processor_performance *perf;
     struct drv_cmd cmd;
+    unsigned int cpu;
 
     if (unlikely(cpus_empty(mask)))
         return 0;
 
-    switch (drv_data[first_cpu(mask)]->cpu_feature) {
+    cpu = first_cpu(mask);
+    policy = cpufreq_cpu_policy[cpu];
+
+    if (!policy)
+        return 0;    
+
+    switch (drv_data[policy->cpu]->cpu_feature) {
     case SYSTEM_INTEL_MSR_CAPABLE:
         cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
         cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
@@ -205,7 +222,7 @@ static u32 get_cur_val(cpumask_t mask)
         return 0;
     }
 
-    cmd.mask = mask;
+    cmd.mask = cpumask_of_cpu(cpu);
 
     drv_read(&cmd);
     return cmd.val;
@@ -255,28 +272,43 @@ static void  __get_measured_perf(void *p
 
 static unsigned int get_measured_perf(unsigned int cpu)
 {
-    unsigned int retval, perf_percent;
+    struct cpufreq_policy *policy;
+    unsigned int perf_percent;
     cpumask_t cpumask;
 
     if (!cpu_online(cpu))
         return 0;
 
-    cpumask = cpumask_of_cpu(cpu);
-    on_selected_cpus(cpumask, __get_measured_perf, (void *)&perf_percent,0,1);
-
-    retval = drv_data[cpu]->max_freq * perf_percent / 100;
-    return retval;
+    policy = cpufreq_cpu_policy[cpu];
+    if (!policy)
+        return 0;
+
+    /* Usually we take the short path (no IPI) for the sake of performance. */
+    if (cpu == smp_processor_id()) {
+        __get_measured_perf((void *)&perf_percent);
+    } else {
+        cpumask = cpumask_of_cpu(cpu);
+        on_selected_cpus(cpumask, __get_measured_perf, 
+                        (void *)&perf_percent,0,1);
+    }
+
+    return drv_data[cpu]->max_freq * perf_percent / 100;
 }
 
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
-    struct acpi_cpufreq_data *data = drv_data[cpu];
+    struct cpufreq_policy *policy;
+    struct acpi_cpufreq_data *data;
     unsigned int freq;
 
+    policy = cpufreq_cpu_policy[cpu];
+    if (!policy)
+        return 0;
+
+    data = drv_data[policy->cpu];
     if (unlikely(data == NULL ||
-        data->acpi_data == NULL || data->freq_table == NULL)) {
-        return 0;
-    }
+        data->acpi_data == NULL || data->freq_table == NULL))
+        return 0;
 
     freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
     return freq;
@@ -327,16 +359,10 @@ static int acpi_cpufreq_target(struct cp
 
     next_perf_state = data->freq_table[next_state].index;
     if (perf->state == next_perf_state) {
-        if (unlikely(policy->resume)) {
-            printk(KERN_INFO "Called after resume, resetting to P%d\n", 
-                next_perf_state);
+        if (unlikely(policy->resume))
             policy->resume = 0;
-        }
-        else {
-            printk(KERN_DEBUG "Already at target state (P%d)\n", 
-                next_perf_state);
+        else
             return 0;
-        }
     }
 
     switch (data->cpu_feature) {
@@ -555,6 +581,7 @@ static int acpi_cpufreq_cpu_exit(struct 
 }
 
 static struct cpufreq_driver acpi_cpufreq_driver = {
+    .name   = "acpi-cpufreq",
     .verify = acpi_cpufreq_verify,
     .target = acpi_cpufreq_target,
     .init   = acpi_cpufreq_cpu_init,
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/acpi/cpufreq/powernow.c
--- a/xen/arch/x86/acpi/cpufreq/powernow.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c      Wed Dec 24 12:52:34 2008 +0900
@@ -129,6 +129,16 @@ static int powernow_cpufreq_target(struc
     return result;
 }
 
+static int powernow_cpufreq_verify(struct cpufreq_policy *policy)
+{
+    struct powernow_cpufreq_data *data;
+
+    if (!policy || !(data = drv_data[policy->cpu]))
+        return -EINVAL;
+
+    return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
 static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
     unsigned int i;
@@ -243,6 +253,7 @@ static int powernow_cpufreq_cpu_exit(str
 }
 
 static struct cpufreq_driver powernow_cpufreq_driver = {
+    .verify = powernow_cpufreq_verify,
     .target = powernow_cpufreq_target,
     .init   = powernow_cpufreq_cpu_init,
     .exit   = powernow_cpufreq_cpu_exit
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/apic.c       Wed Dec 24 12:52:34 2008 +0900
@@ -99,8 +99,11 @@ void __init apic_intr_init(void)
     /* Performance Counters Interrupt */
     set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
-    /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+    /* CMCI Correctable Machine Check Interrupt */
+    set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt);
+
+    /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/
+#ifdef CONFIG_X86_MCE_THERMAL
     set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
 }
@@ -172,12 +175,17 @@ void clear_local_APIC(void)
     }
 
 /* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5) {
         v = apic_read(APIC_LVTTHMR);
         apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
     }
 #endif
+
+    if (maxlvt >= 6) {
+        v = apic_read(APIC_CMCI);
+        apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED);
+    }
     /*
      * Clean APIC state for other OSs:
      */
@@ -189,10 +197,13 @@ void clear_local_APIC(void)
     if (maxlvt >= 4)
         apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
 
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
     if (maxlvt >= 5)
         apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
 #endif
+    if (maxlvt >= 6)
+        apic_write_around(APIC_CMCI, APIC_LVT_MASKED);
+
     v = GET_APIC_VERSION(apic_read(APIC_LVR));
     if (APIC_INTEGRATED(v)) {  /* !82489DX */
         if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
@@ -597,6 +608,7 @@ static struct {
     unsigned int apic_spiv;
     unsigned int apic_lvtt;
     unsigned int apic_lvtpc;
+    unsigned int apic_lvtcmci;
     unsigned int apic_lvt0;
     unsigned int apic_lvt1;
     unsigned int apic_lvterr;
@@ -608,7 +620,7 @@ int lapic_suspend(void)
 int lapic_suspend(void)
 {
     unsigned long flags;
-
+    int maxlvt = get_maxlvt();
     if (!apic_pm_state.active)
         return 0;
 
@@ -620,6 +632,11 @@ int lapic_suspend(void)
     apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
     apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
     apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+
+    if (maxlvt >= 6) {
+        apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
+    }
+
     apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
     apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
     apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
@@ -637,6 +654,7 @@ int lapic_resume(void)
 {
     unsigned int l, h;
     unsigned long flags;
+    int maxlvt = get_maxlvt();
 
     if (!apic_pm_state.active)
         return 0;
@@ -669,6 +687,11 @@ int lapic_resume(void)
     apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
     apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
     apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+
+    if (maxlvt >= 6) {
+        apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
+    }
+
     apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
     apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
     apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/amd.c
--- a/xen/arch/x86/cpu/amd.c    Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/amd.c    Wed Dec 24 12:52:34 2008 +0900
@@ -461,8 +461,10 @@ static void __devinit init_amd(struct cp
 
        if (cpuid_eax(0x80000000) >= 0x80000007) {
                c->x86_power = cpuid_edx(0x80000007);
-               if (c->x86_power & (1<<8))
+               if (c->x86_power & (1<<8)) {
                        set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+                       set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+               }
        }
 
 #ifdef CONFIG_X86_HT
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/intel.c
--- a/xen/arch/x86/cpu/intel.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/intel.c  Wed Dec 24 12:52:34 2008 +0900
@@ -218,6 +218,10 @@ static void __devinit init_intel(struct 
        if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
                (c->x86 == 0x6 && c->x86_model >= 0x0e))
                set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+       if (cpuid_edx(0x80000007) & (1u<<8)) {
+               set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+               set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+       }
 
        start_vmx();
 }
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/Makefile
--- a/xen/arch/x86/cpu/mcheck/Makefile  Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/Makefile  Wed Dec 24 12:52:34 2008 +0900
@@ -3,8 +3,7 @@ obj-y += amd_k8.o
 obj-y += amd_k8.o
 obj-y += amd_f10.o
 obj-y += mce.o
+obj-y += mce_intel.o
 obj-y += non-fatal.o
-obj-y += p4.o
 obj-$(x86_32) += p5.o
-obj-$(x86_32) += p6.o
 obj-$(x86_32) += winchip.o
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c  Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c  Wed Dec 24 12:52:34 2008 +0900
@@ -219,7 +219,7 @@ void k8_machine_check(struct cpu_user_re
                        show_execution_state(regs);
                }
                x86_mcinfo_dump(mc_data);
-               panic("End of MCE. Use mcelog to decode above error codes.\n");
+               mc_panic("End of MCE. Use mcelog to decode above error 
codes.\n");
        }
 
        /* If Dom0 registered a machine check handler, which is only possible
@@ -248,7 +248,7 @@ void k8_machine_check(struct cpu_user_re
                        /* Dom0 is impacted. Since noone can't handle
                         * this error, panic! */
                        x86_mcinfo_dump(mc_data);
-                       panic("MCE occured in Dom0, which it can't handle\n");
+                       mc_panic("MCE occured in Dom0, which it can't 
handle\n");
 
                        /* UNREACHED */
                } else {
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/k7.c
--- a/xen/arch/x86/cpu/mcheck/k7.c      Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/k7.c      Wed Dec 24 12:52:34 2008 +0900
@@ -14,6 +14,7 @@
 #include <asm/msr.h>
 
 #include "mce.h"
+#include "x86_mca.h"
 
 /* Machine Check Handler For AMD Athlon/Duron */
 static fastcall void k7_machine_check(struct cpu_user_regs * regs, long 
error_code)
@@ -57,9 +58,9 @@ static fastcall void k7_machine_check(st
        }
 
        if (recover&2)
-               panic ("CPU context corrupt");
+               mc_panic ("CPU context corrupt");
        if (recover&1)
-               panic ("Unable to continue");
+               mc_panic ("Unable to continue");
        printk (KERN_EMERG "Attempting to continue.\n");
        mcgstl &= ~(1<<2);
        wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Wed Dec 24 12:52:34 2008 +0900
@@ -9,6 +9,7 @@
 #include <xen/config.h>
 #include <xen/smp.h>
 #include <xen/errno.h>
+#include <xen/console.h>
 
 #include <asm/processor.h> 
 #include <asm/system.h>
@@ -26,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks);      /* non-
  * to physical cpus present in the machine.
  * The more physical cpus are available, the more entries you need.
  */
-#define MAX_MCINFO     10
+#define MAX_MCINFO     20
 
 struct mc_machine_notify {
        struct mc_info mc;
@@ -109,6 +110,12 @@ static void amd_mcheck_init(struct cpuin
        }
 }
 
+/*check the existence of Machine Check*/
+int mce_available(struct cpuinfo_x86 *c)
+{
+       return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
 {
@@ -134,11 +141,13 @@ void mcheck_init(struct cpuinfo_x86 *c)
 #ifndef CONFIG_X86_64
                if (c->x86==5)
                        intel_p5_mcheck_init(c);
-               if (c->x86==6)
-                       intel_p6_mcheck_init(c);
 #endif
-               if (c->x86==15)
-                       intel_p4_mcheck_init(c);
+               /*If it is P6 or P4 family, including CORE 2 DUO series*/
+               if (c->x86 == 6 || c->x86==15)
+               {
+                       printk(KERN_DEBUG "MCE: Intel newly family MC Init\n");
+                       intel_mcheck_init(c);
+               }
                break;
 
 #ifndef CONFIG_X86_64
@@ -412,7 +421,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                if (mic == NULL)
                        return;
                if (mic->type != MC_TYPE_BANK)
-                       continue;
+                       goto next;
 
                mc_bank = (struct mcinfo_bank *)mic;
        
@@ -425,6 +434,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
                        printk(" at %16"PRIx64, mc_bank->mc_addr);
 
                printk("\n");
+next:
                mic = x86_mcinfo_next(mic); /* next entry */
                if ((mic == NULL) || (mic->size == 0))
                        break;
@@ -574,3 +584,15 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
 
        return ret;
 }
+
+void mc_panic(char *s)
+{
+    console_start_sync();
+    printk("Fatal machine check: %s\n", s);
+    printk("\n"
+           "****************************************\n"
+           "\n"
+           "   The processor has reported a hardware error which cannot\n"
+           "   be recovered from.  Xen will now reboot the machine.\n");
+    panic("HARDWARE ERROR");
+}
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Wed Dec 24 12:52:34 2008 +0900
@@ -1,14 +1,22 @@
 #include <xen/init.h>
+#include <asm/types.h>
 #include <asm/traps.h>
+#include <asm/atomic.h>
+#include <asm/percpu.h>
+
 
 /* Init functions */
 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k7_mcheck_init(struct cpuinfo_x86 *c);
 void amd_k8_mcheck_init(struct cpuinfo_x86 *c);
 void amd_f10_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c);
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void intel_mcheck_init(struct cpuinfo_x86 *c);
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
 /* Function pointer used in the handlers to collect additional information
@@ -19,12 +27,11 @@ extern int (*mc_callback_bank_extended)(
                uint16_t bank, uint64_t status);
 
 
+int mce_available(struct cpuinfo_x86 *c);
 /* Helper functions used for collecting error telemetry */
 struct mc_info *x86_mcinfo_getptr(void);
 void x86_mcinfo_clear(struct mc_info *mi);
 int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
 void x86_mcinfo_dump(struct mc_info *mi);
+void mc_panic(char *s);
 
-/* Global variables */
-extern int mce_disabled;
-extern unsigned int nr_mce_banks;
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/mce_intel.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Dec 24 12:52:34 2008 +0900
@@ -0,0 +1,632 @@
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+static int nr_intel_ext_msrs = 0;
+static int cmci_support = 0;
+extern int firstbank;
+
+#ifdef CONFIG_X86_MCE_THERMAL
+static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
+                smp_processor_id());
+    add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    u32 l, h;
+    unsigned int cpu = smp_processor_id();
+    static s_time_t next[NR_CPUS];
+
+    ack_APIC_irq();
+    if (NOW() < next[cpu])
+        return;
+
+    next[cpu] = NOW() + MILLISECS(5000);
+    rdmsr(MSR_IA32_THERM_STATUS, l, h);
+    if (l & 0x1) {
+        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
+        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
+                cpu);
+        add_taint(TAINT_MACHINE_CHECK);
+    } else {
+        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+    }
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) 
+        = unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
+{
+    irq_enter();
+    vendor_thermal_interrupt(regs);
+    irq_exit();
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+    int tm2 = 0;
+    unsigned int cpu = smp_processor_id();
+
+    /* Thermal monitoring */
+    if (!cpu_has(c, X86_FEATURE_ACPI))
+        return; /* -ENODEV */
+
+    /* Clock modulation */
+    if (!cpu_has(c, X86_FEATURE_ACC))
+        return; /* -ENODEV */
+
+    /* first check if its enabled already, in which case there might
+     * be some SMM goo which handles it, so we can't even put a handler
+     * since it might be delivered via SMI already -zwanem.
+     */
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    h = apic_read(APIC_LVTTHMR);
+    if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+        printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu);
+        return; /* -EBUSY */
+    }
+
+    if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+        tm2 = 1;
+
+    /* check whether a vector already exists, temporarily masked? */
+    if (h & APIC_VECTOR_MASK) {
+        printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already 
installed\n",
+                 cpu, (h & APIC_VECTOR_MASK));
+        return; /* -EBUSY */
+    }
+
+    /* The temperature transition interrupt handler setup */
+    h = THERMAL_APIC_VECTOR;    /* our delivery vector */
+    h |= (APIC_DM_FIXED | APIC_LVT_MASKED);  /* we'll mask till we're ready */
+    apic_write_around(APIC_LVTTHMR, h);
+
+    rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+    wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+    /* ok we're good to go... */
+    vendor_thermal_interrupt = intel_thermal_interrupt;
+
+    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+    wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+    l = apic_read (APIC_LVTTHMR);
+    apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+    printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 
+            cpu, tm2 ? "TM2" : "TM1");
+    return;
+}
+#endif /* CONFIG_X86_MCE_THERMAL */
+
+static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext)
+{
+    if (nr_intel_ext_msrs == 0)
+        return;
+
+    /*this function will called when CAP(9).MCG_EXT_P = 1*/
+    memset(mc_ext, 0, sizeof(struct mcinfo_extended));
+    mc_ext->common.type = MC_TYPE_EXTENDED;
+    mc_ext->common.size = sizeof(mc_ext);
+    mc_ext->mc_msrs = 10;
+
+    mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX;
+    rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value);
+    mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX;
+    rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value);
+    mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX;
+    rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value);
+
+    mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX;
+    rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value);
+    mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI;
+    rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value);
+    mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI;
+    rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value);
+
+    mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP;
+    rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value);
+    mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP;
+    rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value);
+    mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS;
+    rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value);
+    mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP;
+    rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value);
+}
+
+/* machine_check_poll might be called by following types:
+ * 1. called when do mcheck_init.
+ * 2. called in cmci interrupt handler
+ * 3. called in polling handler
+ * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
+ * consumer.
+*/
+static int machine_check_poll(struct mc_info *mi, int calltype)
+{
+    int exceptions = (read_cr4() & X86_CR4_MCE);
+    int i, nr_unit = 0, uc = 0, pcc = 0;
+    uint64_t status, addr;
+    struct mcinfo_global mcg;
+    struct mcinfo_extended mce;
+    unsigned int cpu;
+    struct domain *d;
+
+    cpu = smp_processor_id();
+
+    if (!mi) {
+        printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n");
+        return 0;
+    }
+    x86_mcinfo_clear(mi);
+
+    memset(&mcg, 0, sizeof(mcg));
+    mcg.common.type = MC_TYPE_GLOBAL;
+    mcg.common.size = sizeof(mcg);
+    /*If called from cpu-reset check, don't need to fill them.
+     *If called from cmci context, we'll try to fill domid by memory addr
+    */
+    mcg.mc_domid = -1;
+    mcg.mc_vcpuid = -1;
+    if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
+        mcg.mc_flags = MC_FLAG_POLLED;
+    else if (calltype == MC_FLAG_CMCI)
+        mcg.mc_flags = MC_FLAG_CMCI;
+    mcg.mc_socketid = phys_proc_id[cpu];
+    mcg.mc_coreid = cpu_core_id[cpu];
+    mcg.mc_apicid = cpu_physical_id(cpu);
+    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
+    rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
+
+    for ( i = 0; i < nr_mce_banks; i++ ) {
+        struct mcinfo_bank mcb;
+        /*For CMCI, only owners checks the owned MSRs*/
+        if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
+             (calltype & MC_FLAG_CMCI) )
+            continue;
+        rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
+
+        if (! (status & MCi_STATUS_VAL) )
+            continue;
+        /*
+         * Uncorrected events are handled by the exception
+         * handler when it is enabled. But when the exception
+         * is disabled such as when mcheck_init, log everything.
+         */
+        if ((status & MCi_STATUS_UC) && exceptions)
+            continue;
+
+        if (status & MCi_STATUS_UC)
+            uc = 1;
+        if (status & MCi_STATUS_PCC)
+            pcc = 1;
+
+        memset(&mcb, 0, sizeof(mcb));
+        mcb.common.type = MC_TYPE_BANK;
+        mcb.common.size = sizeof(mcb);
+        mcb.mc_bank = i;
+        mcb.mc_status = status;
+        if (status & MCi_STATUS_MISCV)
+            rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc);
+        if (status & MCi_STATUS_ADDRV) {
+            rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
+            d = maddr_get_owner(addr);
+            if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) 
)
+                mcb.mc_domid = d->domain_id;
+        }
+        if (cmci_support)
+            rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
+        if (calltype == MC_FLAG_CMCI)
+            rdtscll(mcb.mc_tsc);
+        x86_mcinfo_add(mi, &mcb);
+        nr_unit++;
+        add_taint(TAINT_MACHINE_CHECK);
+        /*Clear state for this bank */
+        wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
+        printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%"PRIx64"]\n", 
+                i, cpu, status);
+        printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], "
+                "thread[%d]\n", cpu, mcg.mc_socketid, 
+                mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
+ 
+    }
+    /*if pcc = 1, uc must be 1*/
+    if (pcc)
+        mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
+    else if (uc)
+        mcg.mc_flags |= MC_FLAG_RECOVERABLE;
+    else /*correctable*/
+        mcg.mc_flags |= MC_FLAG_CORRECTABLE;
+
+    if (nr_unit && nr_intel_ext_msrs && 
+                    (mcg.mc_gstatus & MCG_STATUS_EIPV)) {
+        intel_get_extended_msrs(&mce);
+        x86_mcinfo_add(mi, &mce);
+    }
+    if (nr_unit) 
+        x86_mcinfo_add(mi, &mcg);
+    /*Clear global state*/
+    return nr_unit;
+}
+
+static fastcall void intel_machine_check(struct cpu_user_regs * regs, long 
error_code)
+{
+    /* MACHINE CHECK Error handler will be sent in another patch,
+     * simply copy old solutions here. This code will be replaced
+     * by upcoming machine check patches
+     */
+
+    int recover=1;
+    u32 alow, ahigh, high, low;
+    u32 mcgstl, mcgsth;
+    int i;
+   
+    rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+    if (mcgstl & (1<<0))       /* Recoverable ? */
+        recover=0;
+    
+    printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+           smp_processor_id(), mcgsth, mcgstl);
+    
+    for (i=0; i<nr_mce_banks; i++) {
+        rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+        if (high & (1<<31)) {
+            if (high & (1<<29))
+                recover |= 1;
+            if (high & (1<<25))
+                recover |= 2;
+            printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+            high &= ~(1<<31);
+            if (high & (1<<27)) {
+                rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                printk ("[%08x%08x]", ahigh, alow);
+            }
+            if (high & (1<<26)) {
+                rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                printk (" at %08x%08x", ahigh, alow);
+            }
+            printk ("\n");
+        }
+    }
+    
+    if (recover & 2)
+        mc_panic ("CPU context corrupt");
+    if (recover & 1)
+        mc_panic ("Unable to continue");
+    
+    printk(KERN_EMERG "Attempting to continue.\n");
+    /* 
+     * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+     * recoverable/continuable.This will allow BIOS to look at the MSRs
+     * for errors if the OS could not log the error.
+     */
+    for (i=0; i<nr_mce_banks; i++) {
+        u32 msr;
+        msr = MSR_IA32_MC0_STATUS+i*4;
+        rdmsr (msr, low, high);
+        if (high&(1<<31)) {
+            /* Clear it */
+            wrmsr(msr, 0UL, 0UL);
+            /* Serialize */
+            wmb();
+            add_taint(TAINT_MACHINE_CHECK);
+        }
+    }
+    mcgstl &= ~(1<<2);
+    wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
+
+/*
+ * Discover bank sharing using the algorithm recommended in the SDM.
+ */
+static int do_cmci_discover(int i)
+{
+    unsigned msr = MSR_IA32_MC0_CTL2 + i;
+    u64 val;
+
+    rdmsrl(msr, val);
+    /* Some other CPU already owns this bank. */
+    if (val & CMCI_EN) {
+        clear_bit(i, __get_cpu_var(mce_banks_owned));
+        goto out;
+    }
+    wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
+    rdmsrl(msr, val);
+
+    if (!(val & CMCI_EN)) {
+        /* This bank does not support CMCI. Polling timer has to handle it. */
+        set_bit(i, __get_cpu_var(no_cmci_banks));
+        return 0;
+    }
+    set_bit(i, __get_cpu_var(mce_banks_owned));
+out:
+    clear_bit(i, __get_cpu_var(no_cmci_banks));
+    return 1;
+}
+
+static void cmci_discover(void)
+{
+    unsigned long flags;
+    int i;
+
+    printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id());
+
+    spin_lock_irqsave(&cmci_discover_lock, flags);
+
+    for (i = 0; i < nr_mce_banks; i++)
+        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+            do_cmci_discover(i);
+
+    spin_unlock_irqrestore(&cmci_discover_lock, flags);
+
+    printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", 
+           smp_processor_id(), 
+           *((unsigned long *)__get_cpu_var(mce_banks_owned)), 
+           *((unsigned long *)__get_cpu_var(no_cmci_banks)));
+}
+
+/*
+ * Define an owner for each bank. Banks can be shared between CPUs
+ * and to avoid reporting events multiple times always set up one
+ * CPU as owner. 
+ *
+ * The assignment has to be redone when CPUs go offline and
+ * any of the owners goes away. Also pollers run in parallel so we
+ * have to be careful to update the banks in a way that doesn't
+ * lose or duplicate events.
+ */
+
+static void mce_set_owner(void)
+{
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    cmci_discover();
+}
+
+static void __cpu_mcheck_distribute_cmci(void *unused)
+{
+    cmci_discover();
+}
+
+void cpu_mcheck_distribute_cmci(void)
+{
+    if (cmci_support && !mce_disabled)
+        on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0, 0);
+}
+
+static void clear_cmci(void)
+{
+    int i;
+
+    if (!cmci_support || mce_disabled == 1)
+        return;
+
+    printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n", 
+            smp_processor_id());
+
+    for (i = 0; i < nr_mce_banks; i++) {
+        unsigned msr = MSR_IA32_MC0_CTL2 + i;
+        u64 val;
+        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+            continue;
+        rdmsrl(msr, val);
+        if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
+            wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
+        clear_bit(i, __get_cpu_var(mce_banks_owned));
+    }
+}
+
+void cpu_mcheck_disable(void)
+{
+    clear_in_cr4(X86_CR4_MCE);
+
+    if (cmci_support && !mce_disabled)
+        clear_cmci();
+}
+
+static void intel_init_cmci(struct cpuinfo_x86 *c)
+{
+    u32 l, apic;
+    int cpu = smp_processor_id();
+
+    if (!mce_available(c) || !cmci_support) {
+        printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu);
+        return;
+    }
+
+    apic = apic_read(APIC_CMCI);
+    if ( apic & APIC_VECTOR_MASK )
+    {
+        printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n",
+            cpu, ( apic & APIC_VECTOR_MASK ));
+        return;
+    }
+
+    apic = CMCI_APIC_VECTOR;
+    apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+    apic_write_around(APIC_CMCI, apic);
+
+    l = apic_read(APIC_CMCI);
+    apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
+}
+
+fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    int cpu = smp_processor_id();
+
+    ack_APIC_irq();
+    irq_enter();
+    printk(KERN_DEBUG "CMCI: cmci_intr happen on CPU%d\n", cpu);
+    nr_unit = machine_check_poll(mi, MC_FLAG_CMCI);
+    if (nr_unit) {
+        x86_mcinfo_dump(mi);
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+    irq_exit();
+}
+
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+
+#ifdef CONFIG_X86_MCE_THERMAL
+    intel_init_thermal(c);
+#endif
+    intel_init_cmci(c);
+}
+
+static void mce_cap_init(struct cpuinfo_x86 *c)
+{
+    u32 l, h;
+
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if ((l & MCG_CMCI_P) && cpu_has_apic)
+        cmci_support = 1;
+
+    nr_mce_banks = l & 0xff;
+    if (nr_mce_banks > MAX_NR_BANKS)
+        printk(KERN_WARNING "MCE: exceed max mce banks\n");
+    if (l & MCG_EXT_P)
+    {
+        nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
+        printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n",
+            smp_processor_id(), nr_intel_ext_msrs);
+    }
+    /* for most of p6 family, bank 0 is an alias bios MSR.
+     * But after model>1a, bank 0 is available*/
+    if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL
+            && c->x86_model < 0x1A)
+        firstbank = 1;
+    else
+        firstbank = 0;
+}
+
+static void mce_init(void)
+{
+    u32 l, h;
+    int i, nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+    clear_in_cr4(X86_CR4_MCE);
+    /* log the machine checks left over from the previous reset.
+     * This also clears all registers*/
+
+    nr_unit = machine_check_poll(mi, MC_FLAG_RESET);
+    /*in the boot up stage, not expect inject to DOM0, but go print out
+    */
+    if (nr_unit > 0)
+        x86_mcinfo_dump(mi);
+
+    set_in_cr4(X86_CR4_MCE);
+    rdmsr (MSR_IA32_MCG_CAP, l, h);
+    if (l & MCG_CTL_P) /* Control register present ? */
+        wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+    for (i = firstbank; i < nr_mce_banks; i++)
+    {
+        /*Some banks are shared across cores, use MCi_CTRL to judge whether
+         * this bank has been initialized by other cores already.*/
+        rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
+        if (!l & !h)
+        {
+            /*if ctl is 0, this bank is never initialized*/
+            printk(KERN_DEBUG "mce_init: init bank%d\n", i);
+            wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
+            wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
+       }
+    }
+    if (firstbank) /*if cmci enabled, firstbank = 0*/
+        wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+}
+
+/*p4/p6 faimily has similar MCA initialization process*/
+void intel_mcheck_init(struct cpuinfo_x86 *c)
+{
+    mce_cap_init(c);
+    printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+            smp_processor_id());
+    /* machine check is available */
+    machine_check_vector = intel_machine_check;
+    mce_init();
+    mce_intel_feature_init(c);
+    mce_set_owner();
+}
+
+/*
+ * Periodic polling timer for "silent" machine check errors. If the
+ * poller finds an MCE, poll faster. When the poller finds no more 
+ * errors, poll slower
+*/
+static struct timer mce_timer;
+
+#define MCE_PERIOD 4000
+#define MCE_MIN    2000
+#define MCE_MAX    32000
+
+static u64 period = MCE_PERIOD;
+static int adjust = 0;
+
+static void mce_intel_checkregs(void *info)
+{
+    int nr_unit;
+    struct mc_info *mi =  x86_mcinfo_getptr();
+
+    if( !mce_available(&current_cpu_data))
+        return;
+    nr_unit = machine_check_poll(mi, MC_FLAG_POLLED);
+    if (nr_unit)
+    {
+        x86_mcinfo_dump(mi);
+        adjust++;
+        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
+            send_guest_global_virq(dom0, VIRQ_MCA);
+    }
+}
+
+static void mce_intel_work_fn(void *data)
+{
+    on_each_cpu(mce_intel_checkregs, data, 1, 1);
+    if (adjust) {
+        period = period / (adjust + 1);
+        printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval "
+               "to %"PRIu64"\n", period);
+    }
+    else {
+        period *= 2;
+    }
+    if (period > MCE_MAX) 
+        period = MCE_MAX;
+    if (period < MCE_MIN)
+        period = MCE_MIN;
+    set_timer(&mce_timer, NOW() + MILLISECS(period));
+    adjust = 0;
+}
+
+void intel_mcheck_timer(struct cpuinfo_x86 *c)
+{
+    printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n");
+    init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+    set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD));
+}
+
diff -r 9837303a4708 -r 07f26e047fbf xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c       Wed Dec 24 12:50:57 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c       Wed Dec 24 12:52:34 2008 +0900
@@ -19,8 +19,8 @@
 #include <asm/msr.h>
 
 #include "mce.h"
-
-static int firstbank;
+#include "x86_mca.h"
+int firstbank = 0;
 static struct timer mce_timer;
 
 #define MCE_PERIOD MILLISECS(15000)
@@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_chec
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
        /* Check for MCE support */
-       if (!cpu_has(c, X86_FEATURE_MCE))
+       if (!mce_available(c))
                return -ENODEV;
-
-       /* Check for PPro style MCA */
-       if (!cpu_has(c, X86_FEATURE_MCA))
-               return -ENODEV;
-
        /*
         * Check for non-fatal errors every MCE_RATE s

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.