[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg (staging)



# HG changeset patch
# User Alex Williamson <alex.williamson@xxxxxx>
# Date 1195661526 25200
# Node ID 53dc1cf505060a06e5b34a4812fce4312743ca26
# Parent  9a9ddc04eea2cac0ccfe8be2b9259b4edea5ec9d
# Parent  05cbf512b82b2665d407395bac73b9cca0c396b4
merge with xen-unstable.hg (staging)
---
 xen/include/asm-powerpc/smpboot.h   |   21 
 xen/include/asm-x86/smpboot.h       |   16 
 tools/ioemu/Makefile.target         |    2 
 tools/ioemu/hw/e100.c               | 2464 ++++++++++++++++++++++++++++++++++++
 tools/ioemu/hw/pci.c                |    2 
 xen/arch/x86/hvm/hpet.c             |    8 
 xen/arch/x86/irq.c                  |    1 
 xen/arch/x86/mm.c                   |   15 
 xen/arch/x86/physdev.c              |    1 
 xen/arch/x86/smp.c                  |    1 
 xen/arch/x86/traps.c                |    3 
 xen/arch/x86/x86_32/seg_fixup.c     |  183 +-
 xen/include/asm-x86/desc.h          |   63 
 xen/include/asm-x86/system.h        |  365 +----
 xen/include/asm-x86/x86_32/system.h |  114 +
 xen/include/asm-x86/x86_64/system.h |   68 
 16 files changed, 2897 insertions(+), 430 deletions(-)

diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/Makefile.target
--- a/tools/ioemu/Makefile.target       Tue Nov 20 11:53:44 2007 -0700
+++ b/tools/ioemu/Makefile.target       Wed Nov 21 09:12:06 2007 -0700
@@ -399,7 +399,7 @@ VL_OBJS+= usb.o usb-hub.o usb-linux.o us
 VL_OBJS+= usb.o usb-hub.o usb-linux.o usb-hid.o usb-ohci.o usb-msd.o
 
 # PCI network cards
-VL_OBJS+= ne2000.o rtl8139.o pcnet.o
+VL_OBJS+= ne2000.o rtl8139.o pcnet.o e100.o
 
 ifeq ($(TARGET_BASE_ARCH), i386)
 # Hardware support
diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/hw/e100.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/ioemu/hw/e100.c     Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,2464 @@
+/*
+ * QEMU E100(i82557) ethernet card emulation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
+ *
+ * Copyright (c) 2006-2007 Stefan Weil
+ * Copyright (c) 2006-2007 Zhang Xin(xing.z.zhang@xxxxxxxxx)
+ *
+ * Support OS:
+ *      x86 linux and windows
+ *      PAE linux and windows
+ *      x86_64 linux and windows
+ *      IA64 linux and windows
+ *
+ * Untested:
+ *      Big-endian machine
+ *
+ * References:
+ *
+ * Intel 8255x 10/100 Mbps Ethernet Controller Family
+ * Open Source Software Developer Manual
+ */
+
+#include <assert.h>
+#include "vl.h"
+
+enum
+{
+    E100_PCI_VENDOR_ID = 0x00,        /* 16 bits */
+    E100_PCI_DEVICE_ID = 0x02,        /* 16 bits */
+    E100_PCI_COMMAND = 0x04,        /* 16 bits */
+    E100_PCI_STATUS = 0x06,            /* 16 bits */
+    E100_PCI_REVISION_ID = 0x08,    /* 8 bits */
+    E100_PCI_CLASS_CODE = 0x0b,        /* 8 bits */
+    E100_PCI_SUBCLASS_CODE = 0x0a,    /* 8 bits */
+    E100_PCI_HEADER_TYPE = 0x0e,    /* 8 bits */
+    E100_PCI_BASE_ADDRESS_0 = 0x10,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_1 = 0x14,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_2 = 0x18,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_3 = 0x1c,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_4 = 0x20,    /* 32 bits */
+    E100_PCI_BASE_ADDRESS_5 = 0x24    /* 32 bits */
+}PCI_CONFIGURE_SPACE;
+
+#define PCI_CONFIG_8(offset, value) \
+    (*(uint8_t *)&pci_conf[offset] = (value))
+#define PCI_CONFIG_16(offset, value) \
+    (*(uint16_t *)&pci_conf[offset] = cpu_to_le16(value))
+#define PCI_CONFIG_32(offset, value) \
+    (*(uint32_t *)&pci_conf[offset] = cpu_to_le32(value))
+
+// Alias for Control/Status register read/write
+#define CSR_STATUS  scb_status
+#define CSR_CMD scb_cmd
+#define CSR_POINTER scb_pointer
+#define CSR_PORT port
+#define CSR_EEPROM eeprom_ctrl
+#define CSR_MDI mdi_ctrl
+#define CSR_PM pm_reg
+
+#define CSR(class, field)   \
+    (s->pci_mem.csr.class.u.field)
+#define CSR_VAL(class)  \
+    (s->pci_mem.csr.class.val)
+
+#define CSR_READ(x, type)    \
+    ({  \
+        type t; \
+        memcpy(&t, &s->pci_mem.mem[x], sizeof(type)); \
+        t;  \
+     })
+
+#define CSR_WRITE(x, val, type)    \
+    ({  \
+        type t = val; \
+        memcpy(&s->pci_mem.mem[x], &t, sizeof(type)); \
+     })
+
+#define SET_CU_STATE(val)    \
+    (CSR(CSR_STATUS, cus) = val)
+#define GET_CU_STATE    \
+    (CSR(CSR_STATUS, cus))
+
+#define SET_RU_STATE(val)    \
+    (CSR(CSR_STATUS, rus) = val)
+#define GET_RU_STATE    \
+    (CSR(CSR_STATUS, rus))
+
+#define KiB 1024
+
+#define EEPROM_SIZE     64
+
+#define BIT(n) (1U << (n))
+
+/* debug E100 card */
+//#define DEBUG_E100
+
+#ifdef DEBUG_E100
+#define logout(fmt, args...) fprintf(stderr, "EE100\t%-28s" fmt, __func__, 
##args)
+#else
+#define logout(fmt, args...) ((void)0)
+#endif
+
+#define MAX_ETH_FRAME_SIZE 1514
+
+/* This driver supports several different devices which are declared here. */
+#define i82551          0x82551
+#define i82557B         0x82557b
+#define i82557C         0x82557c
+#define i82558B         0x82558b
+#define i82559C         0x82559c
+#define i82559ER        0x82559e
+#define i82562          0x82562
+
+#define PCI_MEM_SIZE            (4 * KiB)
+#define PCI_IO_SIZE             (64)
+#define PCI_FLASH_SIZE          (128 * KiB)
+
+enum
+{
+    OP_READ,
+    OP_WRITE,
+} OPERTAION_DIRECTION;
+
+/* The SCB accepts the following controls for the Tx and Rx units: */
+enum
+{
+    CU_NOP = 0x0000,        /* No operation */
+    CU_START = 0x0010,        /* CU start     */
+    CU_RESUME = 0x0020,        /* CU resume    */
+    CU_STATSADDR = 0x0040,    /* Load dump counters address */
+    CU_SHOWSTATS = 0x0050,    /* Dump statistical counters */
+    CU_CMD_BASE = 0x0060,    /* Load CU base address */
+    CU_DUMPSTATS = 0x0070,    /* Dump and reset statistical counters */
+    CU_S_RESUME = 0x00a0    /* CU static resume */
+}CONTROL_UNIT_COMMAND;
+
+enum
+{
+    RU_NOP = 0x0000,
+    RU_START = 0x0001,
+    RU_RESUME = 0x0002,
+    RU_DMA_REDIRECT = 0x0003,
+    RU_ABORT = 0x0004,
+    RU_LOAD_HDS = 0x0005,
+    RU_ADDR_LOAD = 0x0006,
+    RU_RESUMENR = 0x0007,
+}RECEIVE_UNIT_COMMAND;
+
+/* SCB status word descriptions */
+enum
+{
+    CU_IDLE = 0,
+    CU_SUSPENDED = 1,
+    CU_LPQ_ACTIVE = 2,
+    CU_HQP_ACTIVE = 3
+} CONTROL_UINT_STATE;
+
+enum
+{
+    RU_IDLE = 0,
+    RU_SUSPENDED = 1,
+    RU_NO_RESOURCES =2,
+    RU_READY = 4
+} RECEIVE_UNIT_STATE;
+
+enum
+{
+    PORT_SOFTWARE_RESET = 0,
+    PORT_SELF_TEST = 1,
+    PORT_SELECTIVE_RESET = 2,
+    PORT_DUMP = 3,
+    PORT_DUMP_WAKE_UP = 7,
+}SCB_PORT_SELECTION_FUNCTION;
+
+enum
+{
+    CBL_NOP = 0,
+    CBL_IASETUP = 1,
+    CBL_CONFIGURE = 2,
+    CBL_MULTCAST_ADDR_SETUP = 3,
+    CBL_TRANSMIT = 4,
+    CBL_LOAD_MICROCODE = 5,
+    CBL_DUMP = 6,
+    CBL_DIAGNOSE = 7,
+}CBL_COMMAND;
+
+enum
+{
+    SCB_STATUS = 0,            /* SCB base + 0x00h, RU states + CU states + 
STAT/ACK */
+    SCB_ACK = 1,            /* SCB ack/stat */
+    SCB_CMD = 2,            /* RU command + CU command + S bit + M bit */
+    SCB_INTERRUPT_MASK = 3, /* Interrupts mask bits */
+    SCB_POINTER = 4,        /* SCB general pointer, depending on command type 
*/
+    SCB_PORT = 8,            /* SCB port register */
+    SCB_EEPROM = 0xe,        /* SCB eeprom control register */
+    SCB_MDI =0x10,            /* SCB MDI control register */
+} CSR_OFFSETS;
+
+enum
+{
+    EEPROM_SK = 0x01,
+    EEPROM_CS = 0x02,
+    EEPROM_DI = 0x04,
+    EEPROM_DO = 0x08,
+} EEPROM_CONTROL_REGISTER;
+
+enum
+{
+    EEPROM_READ = 0x2,
+    EEPROM_WRITE = 0x1,
+    EEPROM_ERASE = 0x3,
+} EEPROM_OPCODE;
+
+enum
+{
+    MDI_WRITE = 0x1,
+    MDI_READ = 0x2,
+} MDI_OPCODE;
+
+enum
+{
+    INT_FCP = BIT(8),
+    INT_SWI = BIT(10),
+    INT_MDI = BIT(11),
+    INT_RNR = BIT(12),
+    INT_CNA = BIT(13),
+    INT_FR = BIT(14),
+    INT_CX_TNO = BIT(15),
+} E100_INTERRUPT;
+
+enum
+{
+    CSR_MEMORY_BASE,
+    CSR_IO_BASE,
+    FLASH_MEMORY_BASE,
+    REGION_NUM
+}E100_PCI_MEMORY_REGION;
+
+typedef struct {
+    uint32_t tx_good_frames,        // Good frames transmitted
+             tx_max_collisions,     // Fatal frames -- had max collisions
+             tx_late_collisions,    // Fatal frames -- had a late coll.
+             tx_underruns,          // Transmit underruns (fatal or 
re-transmit)
+             tx_lost_crs,           // Frames transmitted without CRS
+             tx_deferred,           // Deferred transmits
+             tx_single_collisions,  // Transmits that had 1 and only 1 coll.
+             tx_multiple_collisions,// Transmits that had multiple coll.
+             tx_total_collisions,   // Transmits that had 1+ collisions.
+
+             rx_good_frames,        // Good frames received
+             rx_crc_errors,         // Aligned frames that had a CRC error
+             rx_alignment_errors,   // Receives that had alignment errors
+             rx_resource_errors,    // Good frame dropped due to lack of 
resources
+             rx_overrun_errors,     // Overrun errors - bus was busy
+             rx_cdt_errors,         // Received frames that encountered coll.
+             rx_short_frame_errors, // Received frames that were to short
+
+             complete_word;         // A005h indicates dump cmd completion,
+                                    // A007h indicates dump and reset cmd 
completion.
+
+// TODO: Add specific field for i82558, i82559
+} __attribute__ ((packed)) e100_stats_t;
+
+#define EEPROM_I82557_ADDRBIT 6
+/* Below data is dumped from a real I82557 card */
+static const uint16_t eeprom_i82557[] =
+{
+    0x300, 0xe147, 0x2fa4, 0x203, 0x0, 0x201, 0x4701, 0x0, 0x7414, 0x6207,
+    0x4082, 0xb, 0x8086, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x128, 0x0, 0x0, 0x0, 0x0, 
0x0,
+    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc374,
+};
+
+static const uint8_t e100_pci_configure[] =
+{
+    0x86, 0x80, 0x29, 0x12, 0x17, 0x00, 0x90, 0x02, 0x08, 0x00, 0x00, 0x02, 
0x10, 0x20, 0x00, 0x00,
+    0x00, 0x00, 0x10, 0x50, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x86, 0x80, 0x0b, 0x00,
+    0x00, 0x00, 0xf0, 0xff, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x09, 0x01, 0x08, 0x38,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x01, 0x00, 0x22, 0xfe,
+    0x00, 0x40, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+};
+
+typedef struct
+{
+#define OPCODE      0xb
+#define ADDR        0xc
+#define DATA        0xd
+#define NOP         0xe
+
+#define EEPROM_RESET_ALL      0xfe
+#define EEPROM_SELECT_RESET   0xff
+    uint8_t  start_bit;
+    uint8_t  opcode;
+    uint8_t  address;
+    uint16_t data;  //This must be 16 bit represents a register in eeprom
+
+    uint32_t val;
+    uint32_t val_len;
+    uint8_t  val_type;  // What data type is in DI. opcode?address?data?
+
+    uint8_t cs;
+    uint8_t sk;
+
+    // This two fileds only be reset when device init
+    uint16_t addr_len;
+    uint16_t contents[256]; // 256 is enough to all device(i82557 ... i82559)
+} eeprom_t;
+
+// Control/Status register structure
+typedef struct
+{
+    /* SCB status word */
+    union
+    {
+        uint16_t val;
+        struct
+        {
+            uint8_t rs1:2;  // Reserved
+            uint8_t rus:4;  // RU status
+            uint8_t cus:2;  // CU status
+            uint8_t stat_ack; // Stat/ACK
+        }u;
+    }scb_status;
+
+    /* SCB command word */
+    union
+    {
+        uint16_t val;
+        struct
+        {
+            uint8_t ru_cmd:3;   // RU command
+            uint8_t rs1:1;      // Reserved
+            uint8_t cu_cmd:4;   // CU command
+            uint8_t m:1;        // Interrup mask bit(1:mask all interrupt)
+            uint8_t si:1;       // Use for software cause interrupt
+            uint8_t simb:6;     // Specific interrupt mask bit
+        }u;
+    }scb_cmd;
+
+    /* SCB general pointer */
+    union
+    {
+        uint32_t val;
+        struct
+        {
+            uint32_t scb_ptr;
+        }u;
+    }scb_pointer;
+
+    /* Port interface */
+    union
+    {
+        uint32_t val;
+        struct
+        {
+            uint8_t opcode:4;   // Op code for function selection
+            uint32_t ptr:28;    // Result pointer
+        }u;
+    }port;
+
+    uint16_t rs1;               // Reserved
+
+    /* EEPROM control register */
+    union
+    {
+        uint16_t val;
+        struct
+        {
+            uint8_t eesk:1;      // Serial clock
+            uint8_t eecs:1;      // Chip select
+            uint8_t eedi:1;      // Serial data in
+            uint8_t eedo:1;      // Serial data out
+            uint8_t rs1:4;       // Reserved
+            uint8_t data;
+        }u;
+    }eeprom_ctrl;
+
+    /* MDI control register */
+    union
+    {
+        uint32_t val;
+        struct
+        {
+            uint16_t data;       // Data
+            uint8_t regaddr:5;   // PHY register address
+            uint8_t phyaddr:5;   // PHY address
+            uint8_t opcode:2;    // Opcode
+            uint8_t r:1;         // Ready
+            uint8_t ie:1;        // Interrup enable
+            uint8_t rs1:2;       // Reserved
+        }u;
+    } mdi_ctrl;
+
+    /* Receive byte counter register */
+    uint32_t rx_byte_counter;
+
+    /* Early receive interrupt register */
+    uint8_t early_interrupt;
+
+    /* Flow control register */
+    union
+    {
+        uint16_t val;
+    }flow_ctrl;
+
+    /* Power management driver register */
+    union
+    {
+        uint8_t val;
+        struct
+        {
+            uint8_t pme_s:1;     // PME status
+            uint8_t tco_r:1;     // TCO request
+            uint8_t f_tco_i:1;   // Force TCO indication
+            uint8_t tco_re:1;    // TCO ready
+            uint8_t rs1:1;       // Reserved
+            uint8_t isp:1;       // Intersting packet
+            uint8_t mg:1;        // Magic packet
+            uint8_t lsci:1;      // Link status change indication
+        }u;
+    }pm_reg;
+
+    /* General control register */
+    uint8_t gen_ctrl;
+
+    /* General status register */
+    uint8_t gen_status;
+
+    /* These are reserved or we don't support register */
+    uint8_t others[30];
+} __attribute__ ((packed)) csr_t;
+
+typedef struct
+{
+    uint8_t byte_count;
+    uint8_t rx_fifo_limit:4;
+    uint8_t tx_fifo_limit:4;
+    uint8_t adpt_inf_spacing;
+    uint8_t rs1;
+    uint8_t rx_dma_max_bytes;
+    uint8_t tx_dma_max_bytes:7;
+    uint8_t dmbc_en:1;
+    uint8_t late_scb:1,
+            rs2:1,
+            tno_intr:1,
+            ci_intr:1,
+            rs3:1,
+            rs4:1,
+            dis_overrun_rx:1,
+            save_bad_frame:1;
+    uint8_t dis_short_rx:1,
+            underrun_retry:2,
+            rs5:5;
+    uint8_t mii:1,
+            rs6:7;
+    uint8_t rs7;
+    uint8_t rs8:3,
+            nsai:1,
+            preamble_len:2,
+            loopback:2;
+    uint8_t linear_prio:3,
+            rs9:5;
+    uint8_t pri_mode:1,
+            rs10:3,
+            interframe_spacing:4;
+    uint16_t rs11;
+    uint8_t promiscuous:1,
+            broadcast_dis:1,
+            rs12:5,
+            crs_cdt:1;
+    uint16_t rs13;
+    uint8_t strip:1,
+            padding:1,
+            rx_crc:1,
+            rs14:5;
+    uint8_t rs15:6,
+            force_fdx:1,
+            fdx_en:1;
+    uint8_t rs16:6,
+            mul_ia:2;
+    uint8_t rs17:3,
+            mul_all:1,
+            rs18:4;
+} __attribute__ ((packed)) i82557_cfg_t;
+
+typedef struct {
+    VLANClientState *vc;
+    PCIDevice *pci_dev;
+    int mmio_index;
+    uint8_t scb_stat;           /* SCB stat/ack byte */
+    uint32_t region_base_addr[REGION_NUM];         /* PCI region addresses */
+    uint8_t macaddr[6];
+    uint16_t mdimem[32];
+    eeprom_t eeprom;
+    uint32_t device;            /* device variant */
+
+    uint8_t mult_list[8];       /* Multicast address list */
+    int is_multcast_enable;
+
+    /* (cu_base + cu_offset) address the next command block in the command 
block list. */
+    uint32_t cu_base;           /* CU base address */
+    uint32_t cu_offset;         /* CU address offset */
+    uint32_t cu_next;           /* Point to next command when CU go to suspend 
*/
+
+    /* (ru_base + ru_offset) address the RFD in the Receive Frame Area. */
+    uint32_t ru_base;           /* RU base address */
+    uint32_t ru_offset;         /* RU address offset */
+
+    uint32_t statsaddr;         /* pointer to e100_stats_t */
+
+    e100_stats_t statistics;        /* statistical counters */
+
+    /* Configuration bytes. */
+    i82557_cfg_t config;
+
+    /* FIFO buffer of card. The packet that need to be sent buffered in it */
+    uint8_t pkt_buf[MAX_ETH_FRAME_SIZE+4];
+    /* Data length in FIFO buffer */
+    int pkt_buf_len;
+
+    /* Data in mem is always in the byte order of the controller (le). */
+    union
+    {
+        csr_t csr;
+        uint8_t mem[PCI_MEM_SIZE];
+    }pci_mem;
+
+} E100State;
+
+/* CB structure, filled by device driver
+ * This is a common structure of CB. In some
+ * special case such as TRANSMIT command, the
+ * reserved field will be used.
+ */
+struct  control_block
+{
+    uint16_t rs1:13;            /* reserved */
+    uint8_t ok:1;               /* 1:command executed without error, otherwise 
0 */
+    uint8_t rs2:1;
+    uint8_t c:1;                /* execution status. set by device, clean by 
software */
+    uint8_t cmd:3;              /* command */
+    uint16_t rs3:10;            /* most time equal to 0 */
+    uint8_t i:1;                /* whether trigger interrupt after execution. 
1:yes; 0:no */
+    uint8_t s:1;                /* suspend */
+    uint8_t el:1;               /* end flag */
+    uint32_t link_addr;
+} __attribute__ ((packed));
+
+typedef struct
+{
+    uint32_t tx_desc_addr;      /* transmit buffer decsriptor array address. */
+    uint16_t tcb_bytes:14;         /* transmit command block byte count (in 
lower 14 bits)*/
+    uint8_t rs1:1;
+    uint8_t eof:1;
+    uint8_t tx_threshold;       /* transmit threshold */
+    uint8_t tbd_num;          /* TBD number */
+} __attribute__ ((packed)) tbd_t;
+
+/* Receive frame descriptore structure */
+typedef struct
+{
+    uint16_t status:13;     // Result of receive opration
+    uint8_t ok:1;           // 1:receive without error, otherwise 0
+    uint8_t rs1:1;
+    uint8_t c:1;            // 1:receive complete
+    uint8_t rs2:3;
+    uint8_t sf:1;           // 0:simplified mode
+    uint8_t h:1;            // 1:header RFD
+    uint16_t rs3:9;
+    uint8_t s:1;            // 1:go to suspend
+    uint8_t el:1;           // 1:last RFD
+    uint32_t link_addr;     // Add on RU base point to next RFD
+    uint32_t rs4;
+    uint16_t count:14;      // Number of bytes written into data area
+    uint8_t f:1;            // Set by device when count field update
+    uint8_t eof:1;          // Set by device when placing data into data area 
complete
+    uint16_t size:14;       // Buffer size (even number)
+    uint8_t rs5:2;
+} __attribute__ ((packed)) rfd_t;
+
+enum
+{
+    RX_COLLISION = BIT(0),  // 1:Receive collision detected
+    RX_IA_MATCH = BIT(1),      // 0:Receive frame match individual address
+    RX_NO_MATCH = BIT(2), // 1:Receive frame match no address
+    RX_ERR = BIT(4),        // 1:Receive frame error
+    RX_TYPE = BIT(5),       // 1:Receive frame is a type frame
+    RX_SHORT = BIT(7),      // 1:Receive frame is too short
+    RX_DMA_ERR = BIT(8),
+    RX_LARGE = BIT(9),      // 1:Receive frame is too large
+    RX_CRC_ERR = BIT(10),
+} RFD_STATUS;
+
+typedef struct PCIE100State {
+    PCIDevice dev;
+    E100State e100;
+} PCIE100State;
+
+/* Default values for MDI (PHY) registers */
+static const uint16_t e100_mdi_default[] = {
+    /* MDI Registers 0 - 6, 7 */
+    0x3000, 0x780d, 0x02a8, 0x0154, 0x05e1, 0x0000, 0x0000, 0x0000,
+    /* MDI Registers 8 - 15 */
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    /* MDI Registers 16 - 31 */
+    0x0003, 0x0000, 0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const uint8_t broadcast_macaddr[6] =
+    { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+/* Debugging codes */
+#ifdef  DEBUG_E100
+
+static void e100_dump(char *comment, uint8_t *info, int len)
+{
+    int i;
+
+    if ( !comment || !info )
+        return;
+
+    fprintf(stderr, "EE100\t%-24s%s", __func__, comment);
+    for ( i=0; i<len; i++ )
+        fprintf(stderr, "%x ", info[i]);
+
+    fprintf(stderr, "\n");
+}
+
+static const char *regname[] =
+{
+    [0] = "SCB Status", [1] = "SCB Ack",
+    [2] = "SCB Cmd", [3] = "SCB Interrupt Mask",
+    [4] = "SCB Pointer", [8] = "SCB Port",
+    [0xc] = "SCB Flash", [0xe] = "SCB Eeprom",
+    [0x10] = "SCB Ctrl MDI", [0x14] = "SCB Early RX",
+};
+#define SCBNAME(x)    \
+    ( (x) < (sizeof(regname) / sizeof(regname[0])) ? regname[(x)] : "Unknown 
SCB Register" )
+
+static const char *cb_cmd_name[] =
+{
+    [CBL_NOP] = "NOP", [CBL_IASETUP] = "Individual address setup",
+    [CBL_CONFIGURE] = "Configure", [CBL_MULTCAST_ADDR_SETUP] = "Set Multcast 
address list",
+    [CBL_TRANSMIT] = "Transmit", [CBL_LOAD_MICROCODE] = "Load microcode",
+    [CBL_DUMP] = "Dump", [CBL_DIAGNOSE] = "Diagnose",
+};
+#define CB_CMD_NAME(x)  \
+    ( (x) < (sizeof(cb_cmd_name) / sizeof(cb_cmd_name[0])) ? cb_cmd_name[(x)] 
: "Unknown CB command" )
+
+static const char *eeprom_opcode_name[] =
+{
+    [0] = "Unknow", [EEPROM_WRITE] = "Write",
+    [EEPROM_READ] = "Read", [EEPROM_ERASE] = "Erase",
+};
+#define EEPROM_OPCODE_NAME(x)   \
+    ( (x) < (sizeof(eeprom_opcode_name) / sizeof(eeprom_opcode_name[0])) ?  \
+      eeprom_opcode_name[(x)] : "Unknown" )
+
+static struct eeprom_trace_data
+{
+    uint8_t eedo[256];
+    uint8_t di[256];
+    int op;
+    int i;
+    uint32_t data;
+}etd = {.op = NOP};
+
+static void eeprom_trace(int eedo, int di, int dir, int next_op, int clr)
+{
+    int i;
+
+    if ( clr )
+    {
+        char *opname = NULL;
+
+        switch ( etd.op )
+        {
+            case NOP:
+                break;
+            case OPCODE:
+                opname = "opcode";
+                break;
+            case ADDR:
+                opname = "address";
+                break;
+            case DATA:
+                opname = "data transfer";
+                break;
+            default:
+                opname = "Unknown";
+        }
+
+        if ( opname )
+        {
+            logout("EEPROM trace:\n");
+            fprintf(stderr, "\toperation: %s\n", opname);
+            fprintf(stderr, "\tDI track:");
+            for ( i=0; i<etd.i; i++ )
+                fprintf(stderr, "%x ", etd.di[i]);
+            fprintf(stderr, "\n\tDO track:");
+            for ( i=0; i<etd.i; i++ )
+                fprintf(stderr, "%x ", etd.eedo[i]);
+            fprintf(stderr, "\n\tData:%#x\n", etd.data);
+        }
+
+
+        memset(&etd, 0x0, sizeof(etd));
+        etd.op = next_op;
+
+        return;
+    }
+
+    etd.eedo[etd.i] = eedo;
+    etd.di[etd.i] = di;
+    etd.i ++;
+    if ( dir == EEPROM_READ && etd.op == DATA )
+        etd.data = (etd.data << 1) | eedo;
+    else
+        etd.data = (etd.data << 1) | di;
+}
+
+#define INT_NAME(x) \
+    ({  \
+     char *name = NULL; \
+     switch (x) \
+     {  \
+     case INT_FCP:  \
+            name = "FCP";   \
+            break;  \
+     case INT_SWI:  \
+            name = "SWI";   \
+            break;  \
+     case INT_MDI:  \
+            name = "MDI";   \
+            break;  \
+     case INT_RNR:  \
+            name = "RNR";   \
+            break;  \
+     case INT_CNA:  \
+            name = "CNA";   \
+            break;  \
+     case INT_FR:   \
+            name = "FR";    \
+            break;  \
+     case INT_CX_TNO:   \
+            name ="CX/TNO"; \
+            break;  \
+     default:   \
+            name ="Unknown"; \
+     }  \
+     name;  \
+     })
+
+#else
+static void e100_dump(char *comment, uint8_t *info, int len) {}
+static void eeprom_trace(int eedo, int di, int dir, int next_op, int clr) {}
+#endif
+
+static void pci_reset(E100State * s)
+{
+    uint8_t *pci_conf = s->pci_dev->config;
+
+    memcpy(pci_conf, &e100_pci_configure[0], sizeof(e100_pci_configure));
+    logout("%p\n", s);
+
+    /* I82557 */
+    PCI_CONFIG_8(E100_PCI_REVISION_ID, 0x01);
+
+    PCI_CONFIG_8(0x3c, 0x0);
+
+}
+
+static void e100_selective_reset(E100State * s)
+{
+
+    memset(s->pci_mem.mem, 0x0, sizeof(s->pci_mem.mem));
+    // Set RU/CU to idle, maintain the register mentioned in spec,
+    SET_CU_STATE(CU_IDLE);
+    SET_RU_STATE(RU_IDLE);
+    logout("CU and RU go to idle\n");
+
+    s->ru_offset = 0;
+    s->cu_offset = 0;
+    s->cu_next = 0;
+
+    // For 82557, special interrupt bits are all 1
+    CSR(CSR_CMD, simb) = 0x3f;
+    // Set PHY to 1
+    CSR_VAL(CSR_MDI) |= BIT(21);
+
+    /* Initialize EEDO bit to 1. Due to driver would detect dummy 0 at
+     * EEDO bit, so initialize it to 1 is safety a way.
+     */
+    CSR(CSR_EEPROM, eedo) = 1;
+    // no pending interrupts
+    s->scb_stat = 0;
+
+    return;
+}
+
+static void e100_software_reset(E100State *s)
+{
+    memset(s->pci_mem.mem, 0x0, sizeof(s->pci_mem.mem));
+    // Clear multicast list
+    memset(s->mult_list, 0x0, sizeof(s->mult_list));
+    // Set MDI register to default value
+    memcpy(&s->mdimem[0], &e100_mdi_default[0], sizeof(s->mdimem));
+    s->is_multcast_enable = 1;
+    /* Clean FIFO buffer */
+    memset(s->pkt_buf, 0x0, sizeof(s->pkt_buf));
+    s->pkt_buf_len = 0;
+
+    memset(&s->statistics, 0x0, sizeof(s->statistics));
+    e100_selective_reset(s);
+    return;
+}
+
+static void e100_reset(void *opaque)
+{
+    E100State *s = (E100State *) opaque;
+    logout("%p\n", s);
+    e100_software_reset(s);
+}
+
+
+static void e100_save(QEMUFile * f, void *opaque)
+{
+    //TODO
+    return;
+}
+
+static int e100_load(QEMUFile * f, void *opaque, int version_id)
+{
+    //TODO
+    return 0;
+}
+
+/* Interrupt functions */
+static void e100_interrupt(E100State *s, uint16_t int_type)
+{
+
+    //TODO: Add another i8255x card supported mask bit
+    if ( !CSR(CSR_CMD,m) )
+    {
+        //Set bit in stat/ack, so driver can no what interrupt happen
+        CSR_VAL(CSR_STATUS) |= int_type;
+        s->scb_stat = CSR(CSR_STATUS, stat_ack);
+
+        /* SCB maske and SCB Bit M do not disable interrupt. */
+        logout("Trigger an interrupt(type = %s(%#x), SCB Status = %#x)\n",
+                INT_NAME(int_type), int_type, CSR_VAL(CSR_STATUS));
+        pci_set_irq(s->pci_dev, 0, 1);
+    }
+}
+
+static void e100_interrupt_ack(E100State * s, uint8_t ack)
+{
+
+    /* Ignore acknowledege if driver write 0 to ack or
+     * according interrupt bit is not set
+     */
+    if ( !ack || !(s->scb_stat & ack) )
+    {
+        logout("Illegal interrupt ack(ack=%#x, SCB Stat/Ack=%#x), ignore it\n",
+                ack, s->scb_stat);
+        // Due to we do write operation before e100_execute(), so
+        // we must restore value of ack field here
+        CSR(CSR_STATUS, stat_ack) = s->scb_stat;
+        return;
+    }
+
+    s->scb_stat &= ~ack;
+    CSR(CSR_STATUS, stat_ack) = s->scb_stat;
+
+    logout("Interrupt ack(name=%s,val=%#x)\n", INT_NAME(({uint16_t bit = 
ack<<8;bit;})),ack);
+    if ( !s->scb_stat )
+    {
+        logout("All interrupts are acknowledeged, de-assert interrupt line\n");
+        pci_set_irq(s->pci_dev, 0, 0);
+    }
+}
+
+static void e100_self_test(uint32_t res_addr)
+{
+    struct
+    {
+        uint32_t st_sign;           /* Self Test Signature */
+        uint32_t st_result;         /* Self Test Results */
+    } test_res;
+
+    test_res.st_sign = (uint32_t)-1;
+    test_res.st_result = 0; // Our self test always success
+    cpu_physical_memory_write(res_addr, (uint8_t *)&test_res, 
sizeof(test_res));
+
+    logout("Write self test result to %#x\n", res_addr);
+}
+
+static void scb_port_func(E100State *s, uint32_t val, int dir)
+{
+#define PORT_SELECTION_MASK 0xfU
+
+    uint32_t sel = val & PORT_SELECTION_MASK;
+
+    switch ( sel )
+    {
+        case PORT_SOFTWARE_RESET:
+            logout("do PORT_SOFTWARE_RESET!\n");
+            e100_software_reset(s);
+            break;
+        case PORT_SELF_TEST:
+            e100_self_test(val & ~PORT_SELECTION_MASK);
+            logout("do PORT_SELF_TEST!\n");
+            break;
+        case PORT_SELECTIVE_RESET:
+            logout("do PORT_SELECTIVE_RESET!\n");
+            e100_selective_reset(s);
+            break;
+        case PORT_DUMP:
+            logout("do PORT_SOFTWARE_RESET!\n");
+            break;
+        case PORT_DUMP_WAKE_UP:
+            logout("do PORT_SOFTWARE_RESET!\n");
+            break;
+        default:
+            logout("Unkonw SCB port command(selection function = %#x)\n", sel);
+    }
+}
+
+static void e100_write_mdi(E100State *s, uint32_t val)
+{
+    uint32_t ie = (val & 0x20000000) >> 29;
+    uint32_t opcode = (val & 0x0c000000) >> 26;
+    uint32_t phyaddr = (val & 0x03e00000) >> 21;
+    uint32_t regaddr = (val & 0x001f0000) >> 16;
+    uint32_t data = val & 0x0000ffff;
+
+    logout("Write MDI:\n"
+           "\topcode:%#x\n"
+           "\tphy address:%#x\n"
+           "\treg address:%#x\n"
+           "\tie:%#x\n"
+           "\tdata:%#x\n",
+           opcode, phyaddr, regaddr, ie, data);
+
+    /* We use default value --- PHY1
+     * If driver operate on other PHYs, do nothing and
+     * deceive it that the operation is finished
+     */
+    if ( phyaddr != 1 )
+    {
+        logout("Unsupport PHY address(phy = %#x)\n", phyaddr);
+        goto done;
+    }
+
+    // 1: MDI write
+    // 2: MDI read
+    if ( opcode != MDI_WRITE && opcode != MDI_READ )
+    {
+        logout("Invalid Opcode(opcode = %#x)\n", opcode);
+        return;
+    }
+
+    // Current only support MDI generic registers.
+    if ( regaddr > 6 )
+    {
+        logout("Invalid phy register index( phy register addr = %#x)\n", 
regaddr);
+    }
+
+    if ( opcode == MDI_WRITE )
+    {
+        // MDI write
+        switch ( regaddr )
+        {
+            case 0:    // Control Register
+                if ( data & 0x8000 ) // Reset
+                {
+                    /* Reset status and control registers to default. */
+                    s->mdimem[0] = e100_mdi_default[0];
+                    s->mdimem[1] = e100_mdi_default[1];
+                    data = s->mdimem[regaddr];
+                }
+                else
+                {
+                    /* Restart Auto Configuration = Normal Operation */
+                    data &= ~0x0200;
+                }
+                break;
+            case 1:    // Status Register
+                logout("Invalid write on readonly register(opcode = %#x)\n", 
opcode);
+                data = s->mdimem[regaddr];
+                break;
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+                break;
+        }
+        s->mdimem[regaddr] = data;
+        logout("MDI WRITE: reg = %#x, data = %#x\n", regaddr, data);
+    }
+    else if ( opcode == MDI_READ )
+    {
+        // MDI read
+        switch ( regaddr )
+        {
+            case 0: // Control Register
+                if ( data & 0x8000 ) // Reset
+                {
+                    /* Reset status and control registers to default. */
+                    s->mdimem[0] = e100_mdi_default[0];
+                    s->mdimem[1] = e100_mdi_default[1];
+                }
+                break;
+            case 1: // Status Register
+                // Auto Negotiation complete, set sticky bit to 1
+                s->mdimem[regaddr] |= 0x0026;
+                break;
+            case 2: // PHY Identification Register (Word 1)
+            case 3: // PHY Identification Register (Word 2)
+                break;
+            case 5: // Auto-Negotiation Link Partner Ability Register
+                s->mdimem[regaddr] = 0x41fe;
+                break;
+            case 6: // Auto-Negotiation Expansion Register
+                s->mdimem[regaddr] = 0x0001;
+                break;
+        }
+        data = s->mdimem[regaddr];
+        logout("MDI READ: reg = %#x, data = %#x\n", regaddr, data);
+    }
+
+    /* Emulation takes no time to finish MDI transaction.
+     * Set MDI bit in SCB status register. */
+done:
+    val |= BIT(28);
+    val = (val & 0xffff0000) + data;
+    CSR_WRITE(SCB_MDI, val, uint32_t);
+
+    if ( ie )
+        e100_interrupt(s, (uint16_t)INT_MDI);
+}
+
+static void scb_mdi_func(E100State *s, uint32_t val, int dir)
+{
+    if ( dir == OP_READ )
+        // Do nothing, just tell driver we are ready
+        CSR_VAL(CSR_MDI) |= BIT(28);
+    else if ( dir == OP_WRITE )
+        e100_write_mdi(s, val);
+    else
+        logout("Invalid operation direction(dir=%x)\n", dir);
+
+}
+
+static void eeprom_reset(E100State *s, int type)
+{
+    eeprom_t *e = &s->eeprom;
+
+    if ( type == EEPROM_RESET_ALL )
+    {
+        memset(e, 0x0, sizeof(eeprom_t));
+        e->val_type = NOP;
+        logout("EEPROM reset all\n");
+        return;
+    }
+
+    CSR(CSR_EEPROM, eedo) = 1;
+    e->start_bit = 0;
+    e->opcode = 0;
+    e->address = 0;
+    e->data = 0;
+
+    e->val = 0;
+    e->val_len = 0;
+    e->val_type = NOP;
+
+    e->cs = 0;
+    e->sk = 0;
+    logout("EEPROM select reset\n");
+}
+
+static void do_eeprom_op(E100State *s, eeprom_t *e, int cs, int sk, int di, 
int dir)
+{
+    int assert_cs = (cs == 1 && e->cs == 0);
+    int de_assert_cs = (cs == 0 && e->cs == 1);
+    int de_assert_sk = (sk == 0 && e->sk == 1);
+
+    // Chip select is not be enabled
+    if ( cs == 0 && e->cs == 0 )
+    {
+        logout("Invalid EECS signal\n");
+        return;
+    }
+
+    // update state
+    e->cs = cs;
+    e->sk = sk;
+
+    // Do nothing
+    if ( assert_cs )
+    {
+        logout("EECS assert\n");
+        return;
+    }
+
+    // Complete one command
+    if ( de_assert_cs )
+    {
+        if ( e->val_type == DATA && e->opcode == EEPROM_WRITE )
+        {
+            e->data = e->val;
+            memcpy((void *)((unsigned long)e->contents + e->address),
+                    &e->data, sizeof(e->data));
+            logout("EEPROM write complete(data=%#x)\n", e->data);
+        }
+        eeprom_trace(0,0,0,NOP,1);
+        eeprom_reset(s, EEPROM_SELECT_RESET);
+        logout("EECS de-asserted\n");
+        return;
+    }
+
+    // Chip is selected and serial clock is change, so the operation is vaild
+    if ( cs == 1 && de_assert_sk == 1)
+    {
+        // Set start bit
+        if ( e->start_bit == 0 && di == 1 )
+        {
+             e->start_bit = di;
+             e->val_len = 0;
+             e->val = 0;
+             e->val_type = OPCODE;
+
+             eeprom_trace(0,0,0,OPCODE,1);
+             logout("EEPROM start bit set\n");
+             return;
+        }
+        // Data in DI is vaild
+        else if ( e->start_bit == 1 )
+        {
+            // If current operation is eeprom read, ignore DI
+            if ( !(e->val_type == DATA && e->opcode == EEPROM_READ) )
+            {
+                e->val = (e->val << 1) | di;
+                e->val_len ++;
+            }
+
+            switch ( e->val_type )
+            {
+                // Get the opcode.
+                case OPCODE:
+                    eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+                    if ( e->val_len  == 2 )
+                    {
+                        e->opcode = e->val;
+                        e->val = 0;
+                        e->val_len = 0;
+                        e->val_type = ADDR;
+
+                        eeprom_trace(0,0,0,ADDR,1);
+                        logout("EEPROM get opcode(opcode name=%s,opcode=%#x 
)\n",
+                                EEPROM_OPCODE_NAME(e->opcode), e->opcode);
+                    }
+                    break;
+                // Get address
+                case ADDR:
+                    eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+                    if ( e->val_len == e->addr_len )
+                    {
+                        e->address = e->val;
+                        e->val = 0;
+                        e->val_len = 0;
+                        e->val_type = DATA;
+
+                        // We prepare data eary for later read operation
+                        if ( e->opcode == EEPROM_READ )
+                        {
+                            memcpy(&e->data, (void *)(e->contents + 
e->address),
+                                    sizeof(e->data));
+                            logout("EEPROM prepare data to 
read(addr=%#x,data=%#x)\n", 
+                                    e->address, e->data);
+                        }
+
+                        // Write dummy 0 to response to driver the address is 
written complete
+                        CSR(CSR_EEPROM, eedo) = 0;
+                        eeprom_trace(0,0,0,DATA,1);
+                        logout("EEPROM get address(addr=%#x)\n", e->address);
+                    }
+                    break;
+                // Only do data out operation
+                case DATA:
+                    if ( e->opcode == EEPROM_READ )
+                    {
+                        // Start from the most significant bit
+                        //uint16_t t = ((e->data & (1<<(sizeof(e->data)*8 - 
e->val_len - 1))) != 0);
+                        uint16_t t = !!(e->data & (0x8000U >> e->val_len));
+
+                        CSR(CSR_EEPROM, eedo) = t;
+
+                        logout("EEPROM read(reg address=%#x, reg val=%#x, 
do=%#x, len=%#x)\n", 
+                                e->address, e->data, t, e->val_len);
+
+                        if ( e->val_len > sizeof(e->data)*8 )
+                        {
+                            /* Driver may do more write op to de-assert EESK,
+                             * So we let EEPROM go to idle after a register be
+                             * read complete
+                             */
+                            e->val_type = NOP;
+                            logout("Read complete\n");
+
+                            break;
+                        }
+
+                        e->val_len ++;
+                    }
+                    eeprom_trace(CSR(CSR_EEPROM, eedo), di, e->opcode, 0, 0);
+                    // Do eerpom write when CS de-assert
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    return;
+}
+
+
+static void scb_eeprom_func(E100State *s, uint32_t val, int dir)
+{
+    int eecs = ((val & EEPROM_CS) != 0);
+    int eesk = ((val & EEPROM_SK) != 0);
+    int eedi = ((val & EEPROM_DI) != 0);
+
+    logout("EEPROM: Old(cs=%#x, sk=%#x), New(cs=%#x, sk=%#x, di=%#x)\n", 
+            s->eeprom.cs, s->eeprom.sk, eecs, eesk, eedi);
+
+    do_eeprom_op(s, &s->eeprom, eecs, eesk, eedi, dir);
+
+    return;
+}
+
+static void e100_ru_command(E100State *s, uint8_t val)
+{
+    switch ( val )
+    {
+        case RU_NOP:
+            /* Will not be here */
+            break;
+        case RU_START:
+            /* RU start */
+
+            SET_RU_STATE(RU_READY);
+            logout("RU is set to ready\n");
+            s->ru_offset = CSR_VAL(CSR_POINTER);
+            logout("RFD offset is at %#x\n", s->ru_offset);
+            break;
+        case RU_RESUME:
+            /* RU Resume */
+            if ( GET_RU_STATE == RU_SUSPENDED )
+                SET_RU_STATE(RU_READY);
+            logout("RU resume to ready\n");
+            break;
+        case RU_ADDR_LOAD:
+            /* Load RU base */
+            s->ru_base = CSR_VAL(CSR_POINTER);
+            logout("Load RU base address at %#x\n", s->ru_base);
+            break;
+        case RU_DMA_REDIRECT:
+            logout("RU DMA redirect not implemented\n");
+            break;
+        case RU_ABORT:
+            e100_interrupt(s, INT_RNR);
+            SET_RU_STATE(RU_IDLE);
+            logout("RU abort, go to idle\n");
+            break;
+        case RU_LOAD_HDS:
+            logout("RU load header data size(HDS) not implemented\n");
+        default:
+            break;
+    }
+}
+
+// This function will change CU's state, so CU start and
+// CU resume must set CU's state before it
+static void e100_execute_cb_list(E100State *s, int is_resume)
+{
+
+    struct control_block cb = {0};
+    uint32_t cb_addr;
+
+    if ( !is_resume )
+        s->cu_offset = CSR_VAL(CSR_POINTER);
+
+    /* If call from CU resume, cu_offset has been set */
+
+    while (1)
+    {
+        cb_addr = s->cu_base + s->cu_offset;
+        cpu_physical_memory_read(cb_addr, (uint8_t *)&cb, sizeof(cb));
+
+
+        switch ( cb.cmd )
+        {
+            case CBL_NOP:
+                /* Do nothing */
+                break;
+            case CBL_IASETUP:
+                cpu_physical_memory_read(cb_addr + 8, &s->macaddr[0], 
sizeof(s->macaddr));
+                e100_dump("Setup Individual Address:", &s->macaddr[0], 6);
+                break;
+            case CBL_CONFIGURE:
+                {
+                    i82557_cfg_t *cfg = &s->config;
+
+                    assert(sizeof(s->config) == 22);
+                    cpu_physical_memory_read(cb_addr + 8, (uint8_t *)cfg, 
sizeof(s->config));
+                    logout("Setup card configuration:"
+                            "\tbyte count:%d\n"
+                            "\tRx FIFO limit:%d\n"
+                            "\tTx FIFO limit:%d\n"
+                            "\tAdaptive interframe spacing:%d\n"
+                            "\tRx DMA max:%d\n"
+                            "\tTX DMA max:%d\n"
+                            "\tDMBC enable:%d\n"
+                            "\tLate SCB:%d\n"
+                            "\tTNO:%d\n"
+                            "\tCI:%d\n"
+                            "\tDiscard overrun RX:%d\n"
+                            "\tSave bad frame:%d\n"
+                            "\tDiscard short RX:%d\n"
+                            "\tunderrun retry:%d\n"
+                            "\tMII:%d\n"
+                            "\tNSAI:%d\n"
+                            "\tPreamble len:%d\n"
+                            "\tloopback:%d\n"
+                            "\tliner pro:%d\n"
+                            "\tPRI mode:%d\n"
+                            "\tinterframe spacing:%d\n"
+                            "\tpromiscuous:%d\n"
+                            "\tbroadcast dis:%d\n"
+                            "\tCRS CDT:%d\n"
+                            "\tstripping:%d\n"
+                            "\tpadding:%d\n"
+                            "\tRX crc:%d\n"
+                            "\tforce fdx:%d\n"
+                            "\tfdx enable:%d\n"
+                            "\tmultiple IA:%d\n"
+                            "\tmulticast all:%d\n",
+                        cfg->byte_count, cfg->rx_fifo_limit, 
cfg->tx_fifo_limit,
+                        cfg->adpt_inf_spacing, cfg->rx_dma_max_bytes, 
cfg->tx_dma_max_bytes,
+                        cfg->dmbc_en, cfg->late_scb, cfg->tno_intr, 
cfg->ci_intr,
+                        cfg->dis_overrun_rx, cfg->save_bad_frame, 
cfg->dis_short_rx,
+                        cfg->underrun_retry, cfg->mii, cfg->nsai, 
cfg->preamble_len,
+                        cfg->loopback, cfg->linear_prio, cfg->pri_mode, 
cfg->interframe_spacing,
+                        cfg->promiscuous, cfg->broadcast_dis, cfg->crs_cdt, 
cfg->strip,
+                        cfg->padding, cfg->rx_crc, cfg->force_fdx, cfg->fdx_en,
+                        cfg->mul_ia, cfg->mul_all);
+                }
+                break;
+            case CBL_MULTCAST_ADDR_SETUP:
+                {
+                    uint16_t mult_list_count = 0;
+                    uint16_t size = 0;
+
+                    cpu_physical_memory_read(cb_addr + 8, (uint8_t 
*)&mult_list_count, 2);
+                    mult_list_count = (mult_list_count << 2) >> 2;
+
+                    if ( !mult_list_count )
+                    {
+                        logout("Multcast disabled(multicast count=0)\n");
+                        s->is_multcast_enable = 0;
+                        memset(s->mult_list, 0x0, sizeof(s->mult_list));
+                        break;
+                    }
+                    size = mult_list_count > sizeof(s->mult_list) ?
+                        sizeof(s->mult_list) : mult_list_count;
+                    cpu_physical_memory_read(cb_addr + 12, &s->mult_list[0], 
size);
+
+                    e100_dump("Setup Multicast list: ", &s->mult_list[0], 
size);
+                    break;
+                }
+            case CBL_TRANSMIT:
+                {
+                    struct
+                    {
+                        struct control_block cb;
+                        tbd_t tbd;
+                    } __attribute__ ((packed)) tx;
+
+                    struct
+                    {
+                        uint32_t addr;
+                        uint16_t size;
+                        uint16_t is_el_set;
+                    } tx_buf = {0};
+
+                    uint32_t tbd_array;
+                    uint16_t tcb_bytes;
+                    uint8_t sf;
+                    int len = s->pkt_buf_len;
+
+                    assert( len < sizeof(s->pkt_buf));
+
+                    cpu_physical_memory_read(cb_addr, (uint8_t *)&tx, 
sizeof(tx));
+                    tbd_array = le32_to_cpu(tx.tbd.tx_desc_addr);
+                    tcb_bytes = le16_to_cpu(tx.tbd.tcb_bytes);
+                    // Indicate use what mode to transmit(simple or flexible)
+                    sf = tx.cb.rs3 & 0x1;
+
+                    logout("Get a TBD:\n"
+                            "\tTBD array address:%#x\n"
+                            "\tTCB byte count:%#x\n"
+                            "\tEOF:%#x\n"
+                            "\tTransmit Threshold:%#x\n"
+                            "\tTBD number:%#x\n"
+                            "\tUse %s mode to send frame\n",
+                            tbd_array, tcb_bytes, tx.tbd.eof,
+                            tx.tbd.tx_threshold, tx.tbd.tbd_num,
+                            sf ? "Flexible" : "Simple");
+
+                    if ( !sf || tbd_array == (uint32_t)-1 )
+                    {
+                        /* Simple mode */
+
+                        /* For simple mode, TCB bytes should not be zero.
+                         * But we still check here for safety
+                         */
+                        if ( !tcb_bytes || tcb_bytes > sizeof(s->pkt_buf) )
+                            break;
+
+                        cpu_physical_memory_read(cb_addr+16, &s->pkt_buf[0], 
tcb_bytes);
+                        len = tcb_bytes;
+                        logout("simple mode(size=%d)\n", len);
+
+                    }
+                    else
+                    {
+                        /* Flexible mode */
+
+                        /* For flexible mode, TBD num should not be zero.
+                         * But we still check here for safety
+                         */
+                        if ( !tx.tbd.tbd_num )
+                            break;
+
+                        // I82557 don't support extend TCB
+                        if ( s->device == i82557C || s->device == i82557B )
+                        {
+                            /* Standard TCB mode */
+
+                            int i;
+
+                            for ( i=0; i<tx.tbd.tbd_num; i++ )
+                            {
+
+                                cpu_physical_memory_read(tbd_array, (uint8_t 
*)&tx_buf,
+                                        sizeof(tx_buf));
+                                tx_buf.is_el_set &= 0x1;
+                                tx_buf.size &= 0x7fff;
+                                tbd_array += 8;
+
+                                if ( tx_buf.size > sizeof(s->pkt_buf) - len )
+                                {
+                                    logout("Warning: Get a too big TBD, ignore 
it"
+                                            "(buf addr %#x, size %d, 
el:%#x)\n",
+                                            tx_buf.addr, tx_buf.size, 
tx_buf.is_el_set);
+                                    continue;
+                                }
+
+                                cpu_physical_memory_read(tx_buf.addr, 
&s->pkt_buf[len],
+                                        tx_buf.size);
+
+                                logout("TBD (standard mode): buf addr %#x, 
size %d, el:%#x\n",
+                                        tx_buf.addr, tx_buf.size, 
tx_buf.is_el_set);
+                                len += tx_buf.size;
+
+                                if ( tx_buf.is_el_set )
+                                    break;
+                            }
+
+                        }
+                        //FIXME: Extend mode is not be tested
+                        else
+                        {
+                            /* Extend TCB mode */
+
+                            /* A strandard TCB followed by two TBDs */
+                            uint32_t tbd_addr = cb_addr+16;
+                            int i = 0;
+
+
+                            for ( ; i<2 && i<tx.tbd.tbd_num; i++ )
+                            {
+
+                                cpu_physical_memory_read(tbd_array, (uint8_t 
*)&tx_buf,
+                                        sizeof(tx_buf));
+                                tx_buf.is_el_set &= 0x1;
+                                tbd_addr += 8;
+
+                                /* From Intel's spec, size of TBD equal to zero
+                                 * has same effect with EL bit set
+                                 */
+                                if ( tx_buf.size == 0 )
+                                {
+                                    tx_buf.is_el_set = 1;
+                                    break;
+                                }
+
+                                if ( tx_buf.size + len > sizeof(s->pkt_buf) )
+                                {
+                                    logout("TX frame is too large, discarding 
it"
+                                            "(buf addr=%#x, size=%#x)\n", 
tx_buf.addr,
+                                            tx_buf.size);
+                                    //continue;
+                                    break;
+                                }
+
+                                logout("TBD (extended mode): buf addr %#08x, 
size %#04x, el:%#x\n",
+                                        tx_buf.addr, tx_buf.size, 
tx_buf.is_el_set);
+                                cpu_physical_memory_read(tx_buf.addr, 
&s->pkt_buf[len],
+                                        tx_buf.size);
+
+                                len += tx_buf.size;
+
+                                if ( tx_buf.is_el_set )
+                                    break;
+                            }
+
+                            /* In extend TCB mode, TDB array point to the 
thrid TBD
+                             * if it is not NULL(0xffffffff) and EL bit of 
before
+                             * two TBDs is not set
+                             */
+                            if ( tbd_array != (uint32_t)-1 && 
!tx_buf.is_el_set )
+                            {
+                                tbd_addr = tbd_array;
+
+                                /* TBD number includes first two TBDs, so don't
+                                 * initialize i here
+                                 */
+                                for ( ; i<tx.tbd.tbd_num; i++ )
+                                {
+                                    cpu_physical_memory_read(tbd_addr, 
(uint8_t *)&tx_buf,
+                                            sizeof(tx_buf));
+                                    tx_buf.is_el_set &= 0x1;
+                                    tbd_addr += 8;
+
+                                    cpu_physical_memory_read(tx_buf.addr, 
&s->pkt_buf[len],
+                                            tx_buf.size);
+                                    logout("TBD (extended mode): buf addr 
0x%#08x, size 0x%#04x\n",
+                                            tx_buf.addr, tx_buf.size);
+
+                                    len += tx_buf.size;
+
+                                    if ( tx_buf.is_el_set )
+                                        break;
+                                }
+                            }
+                        }
+                    }
+
+
+                    s->pkt_buf_len = len;
+
+/* Below codes are used for Threshold. But with these logic, network of guest
+ * getting bad performance. So I comment it and leave codes here to hope anyone
+ * fix it
+ */
+#if 0
+                    /* If threshold is set, only send packet when threshold
+                     * bytes are read
+                     */
+                    if ( tx.tbd.tx_threshold && s->pkt_buf_len < 
tx.tbd.tx_threshold * 8 )
+                    {
+                        logout("Current data length in FIFO buffer:%d\n", 
s->pkt_buf_len);
+                        break;
+                    }
+#endif
+
+                    if ( s->pkt_buf_len )
+                    {
+                        qemu_send_packet(s->vc, s->pkt_buf, s->pkt_buf_len);
+                        s->statistics.tx_good_frames ++;
+                        logout("Send out frame successful(size=%d,"
+                                "already sent %d frames)\n", s->pkt_buf_len,
+                                s->statistics.tx_good_frames);
+                        s->pkt_buf_len = 0;
+                    }
+
+                    e100_dump("Dest addr:", (uint8_t *)s->pkt_buf, 6);
+                    e100_dump("Src addr:", (uint8_t *)(s->pkt_buf+6), 6);
+                    e100_dump("type:", (uint8_t *)(s->pkt_buf+8), 2);
+
+                    break;
+                }
+            case CBL_LOAD_MICROCODE:
+#ifdef DEBUG_E100
+                {
+                    /* Don't support load marco code, just dump it */
+                    #define MICRO_CODE_LEN 256
+                    uint8_t micro_code[MICRO_CODE_LEN] = {0};
+                    cpu_physical_memory_read(cb_addr+8, micro_code, 
MICRO_CODE_LEN);
+                    e100_dump("Load micro code:", micro_code, MICRO_CODE_LEN);
+                }
+#endif
+                break;
+            case CBL_DUMP:
+                logout("Control block dump\n");
+                break;
+            case CBL_DIAGNOSE:
+                logout("Control block diagnose\n");
+                break;
+            default:
+                logout("Unknown Control block command(val=%#x)\n", cb.cmd);
+                break;
+        }
+
+        /* Now, we finished executing a command, update status of CB.
+         * We always success
+         */
+        cb.c = 1;
+        cb.ok = 1;
+        // Only update C bit and OK bit field in TCB
+        cpu_physical_memory_write(cb_addr, (uint8_t *)&cb, 2);
+
+        logout("Finished a command from CB list:\n"
+                "\tok:%d\n"
+                "\tc:%d\n"
+                "\tcommand name:%s(cmd=%#x)\n"
+                "\ti:%d\n"
+                "\ts:%d\n"
+                "\tel:%d\n"
+                "\tlink address:%#x\n",
+                cb.ok, cb.c, CB_CMD_NAME(cb.cmd), cb.cmd,
+                cb.i, cb.s, cb.el, cb.link_addr);
+
+        if ( cb.i )
+            e100_interrupt(s, (uint16_t)INT_CX_TNO);
+
+        // Suspend CU
+        if ( cb.s )
+        {
+            logout("CU go to suspend\n");
+            SET_CU_STATE(CU_SUSPENDED);
+            s->cu_next = cb.link_addr; // Save it for go on executing when 
resume
+
+            // Trigger CNA interrupt only when CNA mode is configured
+            if ( !(s->config.ci_intr) && cb.i )
+                e100_interrupt(s, (uint16_t)INT_CNA);
+
+            return;
+        }
+
+        // This is last command in CB list, CU go back to IDLE
+        if ( cb.el )
+        {
+            logout("Command block list is empty, CU go to idle\n");
+            SET_CU_STATE(CU_IDLE);
+            /* Either in CNA mode or CI mode, interrupt need be triggered
+             * when CU go to idle.
+             */
+            if ( cb.i )
+                e100_interrupt(s, (uint16_t)INT_CNA);
+
+            return;
+        }
+
+        s->cu_offset = le32_to_cpu(cb.link_addr); // get next CB offset
+    }
+}
+
+static void dump_statistics(E100State * s, uint32_t complete_word)
+{
+    /* Dump statistical data. Most data is never changed by the emulation
+     * and always 0.
+     */
+    s->statistics.complete_word = complete_word;
+    cpu_physical_memory_write(s->statsaddr, (uint8_t *)&s->statistics, 
sizeof(s->statistics));
+
+}
+
+static void e100_cu_command(E100State *s, uint8_t val)
+{
+
+    switch ( val )
+    {
+        case CU_NOP:
+            /* Will not be here */
+            break;
+        case CU_START:
+            /* This strictly follow Intel's spec */
+            if ( GET_CU_STATE != CU_IDLE && GET_CU_STATE != CU_SUSPENDED )
+            {
+                logout("Illegal CU start command. Device is not idle or 
suspend\n");
+                return;
+            }
+
+            SET_CU_STATE(CU_LPQ_ACTIVE);
+            logout("CU start\n");
+
+            e100_execute_cb_list(s, 0);
+            break;
+        case CU_RESUME:
+            {
+                uint32_t previous_cb = s->cu_base + s->cu_offset;
+                struct control_block cb;
+
+                /* Resume from suspend */
+
+                /* FIXME:From Intel's spec, CU resume from idle is
+                 * forbidden, but e100 drive in linux
+                 * indeed do this.
+                 */
+                if ( GET_CU_STATE == CU_IDLE )
+                {
+                    logout("Illegal resume form IDLE\n");
+                }
+
+                cpu_physical_memory_read(previous_cb, (uint8_t *)&cb,
+                                        sizeof(cb));
+
+                //FIXME: Need any speical handle when CU is active ?
+
+                /* Driver must clean S bit in previous CB when
+                 * it issue CU resume command
+                 */
+                if ( cb.s )
+                {
+                    logout("CU still in suspend\n");
+                    break;
+                }
+
+                SET_CU_STATE(CU_LPQ_ACTIVE);
+                if ( cb.el )
+                {
+                    logout("CB list is empty, CU just go to active\n");
+                    break;
+                }
+
+                // Continue next command
+                s->cu_offset = s->cu_next;
+
+                e100_execute_cb_list(s, 1);
+
+                logout("CU resume\n");
+            }
+            break;
+        case CU_STATSADDR:
+            /* Load dump counters address */
+            s->statsaddr = CSR_VAL(CSR_POINTER);
+            logout("Load Stats address at %#x\n", s->statsaddr);
+            break;
+        case CU_SHOWSTATS:
+            /* Dump statistical counters */
+            dump_statistics(s, 0xa005);
+            logout("Execute dump statistics\n");
+            break;
+        case CU_CMD_BASE:
+            /* Load CU base */
+            s->cu_base = CSR_VAL(CSR_POINTER);
+            logout("Load CU base at %x\n", s->cu_base);
+            break;
+        case CU_DUMPSTATS:
+            /* Dump statistical counters and reset counters. */
+            dump_statistics(s, 0xa007);
+            memset(&s->statistics, 0x0, sizeof(s->statistics));
+            logout("Execute dump and reset statistics\n");
+            break;
+        case CU_S_RESUME:
+            /* CU static resume */
+            logout("CU static resume is not implemented\n");
+            break;
+        default:
+            logout("Unknown CU command(val=%#x)\n", val);
+            break;
+    }
+
+}
+
+static void scb_cmd_func(E100State *s, uint16_t val, int dir)
+{
+    /* ignore NOP operation */
+    if ( val & 0x0f )
+    {
+        e100_ru_command(s, val & 0x0f);
+        CSR(CSR_CMD, ru_cmd) = 0;
+    }
+    else if ( val & 0xf0 )
+    {
+        e100_cu_command(s, val & 0xf0);
+        CSR(CSR_CMD, cu_cmd) = 0;
+    }
+
+}
+
+enum
+{
+    WRITEB,
+    WRITEW,
+    WRITEL,
+    OP_IS_READ,
+} WRITE_BYTES;
+
+/* Driver may issue a command by writting one 32bit-entry,
+ * two 16bit-entries or four 8bit-entries. In late two case, we
+ * must wait until driver finish writting to the highest byte. The parameter
+ * 'bytes' means write action of driver(writeb, wirtew, wirtel)
+ */
+static void e100_execute(E100State *s, uint32_t addr_offset,
+        uint32_t val, int dir, int bytes)
+{
+
+    switch ( addr_offset )
+    {
+        case SCB_STATUS:
+            if ( bytes == WRITEB )
+                break;
+        case SCB_ACK:
+            if ( dir == OP_WRITE )
+            {
+                uint8_t _val = 0;
+                if ( bytes == WRITEB )
+                    _val = (uint8_t)val;
+                else if ( bytes == WRITEW )
+                    _val = ((uint16_t)val) >> 8;
+                else if ( bytes == WRITEL)
+                {
+                    // This should not be happen
+                    _val = ((uint16_t)val) >> 8;
+                    logout("WARNNING: Drvier write 4 bytes to CSR register at 
offset %d,"
+                           "emulator may do things wrong!!!\n", addr_offset);
+                }
+
+                e100_interrupt_ack(s, _val);
+            }
+            break;
+        case SCB_CMD:
+            if ( dir == OP_WRITE )
+                scb_cmd_func(s, val, dir);
+
+/* I don't know whether there is any driver writes command words and
+ * interrupt mask at same time by two bytes. This is not a regular operation.
+ * but if we meet the case, below codes could copy with it. As far
+ * as I know. windows's and linux's driver don't do this thing.
+ */
+#if 0
+            if ( bytes == WRITEW && (val&0xff00) != 0 )
+                ;
+            else
+                break;
+#endif
+            break;
+        case SCB_INTERRUPT_MASK:
+            if ( dir == OP_WRITE )
+            {
+                uint8_t _val = 0;
+                if ( bytes == WRITEB )
+                    _val = (uint8_t)val;
+                else if ( bytes == WRITEW )
+                    _val = (val & 0xff00) >> 8;
+                else
+                    logout("WARNNING: Drvier write 4 bytes to CSR register at 
offset %d,"
+                           "emulator may do things wrong!!!\n", addr_offset);
+
+                // Driver generates a software interrupt
+                if ( _val & BIT(1) )
+                    e100_interrupt(s, INT_SWI);
+            }
+            break;
+        case SCB_PORT ... SCB_PORT + 3:
+            if ( dir == OP_WRITE )
+            {
+                // Waitting for driver write to the highest byte
+                if ( (bytes == WRITEB && addr_offset != SCB_PORT + 3) ||
+                     (bytes == WRITEW && addr_offset != SCB_PORT + 2) )
+                    break;
+
+                scb_port_func(s, CSR_VAL(CSR_PORT), dir);
+            }
+            break;
+        case SCB_MDI ... SCB_MDI + 3:
+            if ( dir == OP_WRITE )
+            {
+                // Waitting for driver write to the highest byte
+                if ( (bytes == WRITEB && addr_offset != SCB_MDI + 3) ||
+                     (bytes == WRITEW && addr_offset != SCB_MDI + 2) )
+                    break;
+            }
+
+            scb_mdi_func(s, CSR_VAL(CSR_MDI), dir);
+            break;
+        case SCB_EEPROM:
+            if ( dir == OP_WRITE )
+                scb_eeprom_func(s, val, dir);
+            // Nothing need do when driver read EEPROM registers of CSR
+            break;
+        case SCB_POINTER:
+            break;
+        default:
+            logout("Driver operate on CSR reg(offset=%#x,dir=%s,val=%#x)\n",
+                    addr_offset, dir==OP_WRITE?"write":"read", val);
+    }
+
+}
+
+/* MMIO access functions */
+static uint8_t e100_read1(E100State * s, uint32_t addr_offset)
+{
+    uint8_t val = -1;
+
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset
+                + s->region_base_addr[CSR_MEMORY_BASE]);
+        return val;
+    }
+
+
+    e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+    val = CSR_READ(addr_offset, uint8_t);
+    logout("READ1: Register name = %s, addr_offset = %#x, val=%#x\n", 
SCBNAME(addr_offset), addr_offset, val);
+
+    return val;
+}
+
+static uint16_t e100_read2(E100State * s, uint32_t addr_offset)
+{
+    uint16_t val = -1;
+
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset 
+                + s->region_base_addr[CSR_MEMORY_BASE]);
+        return val;
+    }
+
+    e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+    val = CSR_READ(addr_offset, uint16_t);
+    logout("READ2: Register name = %s, addr_offset = %#x, val=%#x\n", 
SCBNAME(addr_offset), addr_offset, val);
+
+    return val;
+
+}
+
+static uint32_t e100_read4(E100State * s, uint32_t addr_offset)
+{
+    uint32_t val = -1;
+
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild read, beyond memory boundary(addr:%#x)\n", addr_offset 
+                + s->region_base_addr[CSR_MEMORY_BASE]);
+        return val;
+    }
+
+    e100_execute(s, addr_offset, val, OP_READ, OP_IS_READ);
+    val = CSR_READ(addr_offset, uint32_t);
+    logout("READ4: Register name = %s, addr_offset = %#x, val=%#x\n", 
SCBNAME(addr_offset), addr_offset, val);
+
+    return val;
+
+}
+
+static uint32_t pci_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    return e100_read1(s, addr);
+}
+
+static uint32_t pci_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    return e100_read2(s, addr);
+}
+
+static uint32_t pci_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    return e100_read4(s, addr);
+}
+
+static CPUReadMemoryFunc *pci_mmio_read[] = {
+    pci_mmio_readb,
+    pci_mmio_readw,
+    pci_mmio_readl
+};
+
+static void e100_write1(E100State * s, uint32_t addr_offset, uint8_t val)
+{
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild write, beyond memory boundary(addr = %#x, val = 
%#x\n", addr_offset
+                + s->region_base_addr[CSR_MEMORY_BASE], val);
+        return;
+    }
+
+    // SCB stauts is read-only word, can not be directly write
+    if ( addr_offset == SCB_STATUS )
+    {
+        return;
+    }
+    // EEDO bit of eeprom register is read-only, can not be written;
+    else if ( addr_offset == SCB_EEPROM )
+    {
+        int eedo = BIT(3) & CSR_VAL(CSR_EEPROM);
+        CSR_WRITE(addr_offset, val, uint8_t);
+        CSR(CSR_EEPROM, eedo) = !!(eedo & EEPROM_DO);
+
+        logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, (uint8_t)CSR_VAL(CSR_EEPROM));
+        return;
+    }
+    else
+    {
+        CSR_WRITE(addr_offset, val, uint8_t);
+    }
+
+    logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, val);
+    return;
+}
+
+static void e100_write2(E100State * s, uint32_t addr_offset, uint16_t val)
+{
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild write, beyond memory boundary(addr = %#x, val = 
%#x\n", addr_offset
+                + s->region_base_addr[CSR_MEMORY_BASE], val);
+        return;
+    }
+
+    // SCB stauts is readonly word, can not be directly write
+    if ( addr_offset == SCB_STATUS )
+    {
+        uint8_t __val = val >> 8;
+        CSR_WRITE(addr_offset+1, __val, uint8_t);
+    }
+    // EEDO bit of eeprom register is read-only, can not be written;
+    else if ( addr_offset == SCB_EEPROM )
+    {
+        int eedo = BIT(3) & CSR_VAL(CSR_EEPROM);
+        CSR_WRITE(addr_offset, val, uint16_t);
+        CSR(CSR_EEPROM, eedo) = !!(eedo & EEPROM_DO);
+
+        logout("WRITE1: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, CSR_VAL(CSR_EEPROM));
+        return;
+    }
+    else
+    {
+        CSR_WRITE(addr_offset, val, uint16_t);
+    }
+
+    logout("WRITE2: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, val);
+    return;
+}
+
+static void e100_write4(E100State * s, uint32_t addr_offset, uint32_t val)
+{
+    if ( addr_offset + sizeof(val) >= sizeof(s->pci_mem.mem) )
+    {
+        logout("Invaild write, beyond memory boundary(addr = %#x, val = 
%#x\n", addr_offset 
+                + s->region_base_addr[CSR_MEMORY_BASE], val);
+        return;
+    }
+
+    // SCB stauts is readonly word, can not be directly write
+    if ( addr_offset == SCB_STATUS )
+    {
+        uint8_t __val[4] = {0};
+
+        //FIXME: any un-aligned reference ?
+        *(uint32_t *)&__val = val;
+
+        CSR_WRITE(addr_offset+1, __val[1], uint8_t);
+        CSR_WRITE(addr_offset+2, __val[2], uint8_t);
+        CSR_WRITE(addr_offset+3, __val[3], uint8_t);
+    }
+    /* No write4 opertaion on EEPROM register */
+    else
+    {
+        CSR_WRITE(addr_offset, val, uint32_t);
+    }
+
+    logout("WRITE4: Register name = %s, addr_offset = %#x, val = %#x\n", 
SCBNAME(addr_offset),addr_offset, val);
+    return;
+}
+
+static void pci_mmio_writeb(void *opaque, target_phys_addr_t addr, uint32_t 
val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    e100_write1(s, addr, val);
+    e100_execute(s, addr, val, OP_WRITE, WRITEB);
+}
+
+static void pci_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t 
val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    e100_write2(s, addr, val);
+    e100_execute(s, addr, val, OP_WRITE, WRITEW);
+}
+
+static void pci_mmio_writel(void *opaque, target_phys_addr_t addr, uint32_t 
val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_MEMORY_BASE];
+    e100_write4(s, addr, val);
+    (void)e100_execute(s, addr, val, OP_WRITE, WRITEL);
+}
+
+static CPUWriteMemoryFunc *pci_mmio_write[] = {
+    pci_mmio_writeb,
+    pci_mmio_writew,
+    pci_mmio_writel
+};
+
+static void pci_mmio_map(PCIDevice * pci_dev, int region_num,
+                         uint32_t addr, uint32_t size, int type)
+{
+    PCIE100State *d = (PCIE100State *) pci_dev;
+
+    logout("region %d, addr=0x%08x, size=0x%08x, type=%d\n",
+           region_num, addr, size, type);
+
+    if ( region_num == CSR_MEMORY_BASE ) {
+        /* Map control / status registers. */
+        cpu_register_physical_memory(addr, size, d->e100.mmio_index);
+        d->e100.region_base_addr[region_num] = addr;
+    }
+}
+
+/* IO access functions */
+static void ioport_write1(void *opaque, uint32_t addr, uint32_t val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    e100_write1(s, addr, val);
+    (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEB);
+}
+
+static void ioport_write2(void *opaque, uint32_t addr, uint32_t val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    e100_write2(s, addr, val);
+    (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEW);
+}
+
+static void ioport_write4(void *opaque, uint32_t addr, uint32_t val)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    e100_write4(s, addr, val);
+    (void)e100_execute(s, addr, (uint32_t)val, OP_WRITE, WRITEL);
+}
+
+static uint32_t ioport_read1(void *opaque, uint32_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    return e100_read1(s, addr);
+}
+
+static uint32_t ioport_read2(void *opaque, uint32_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    return e100_read2(s, addr);
+}
+
+static uint32_t ioport_read4(void *opaque, uint32_t addr)
+{
+    E100State *s = opaque;
+    addr -= s->region_base_addr[CSR_IO_BASE];
+    return e100_read4(s, addr);
+}
+
+static void pci_ioport_map(PCIDevice * pci_dev, int region_num,
+                    uint32_t addr, uint32_t size, int type)
+{
+    PCIE100State *d = (PCIE100State *) pci_dev;
+    E100State *s = &d->e100;
+
+    logout("region %d, addr=0x%08x, size=0x%08x, type=%d\n",
+           region_num, addr, size, type);
+
+    if ( region_num != 1 )
+    {
+        logout("Invaid region number!\n");
+        return;
+    }
+
+    register_ioport_write(addr, size, 1, ioport_write1, s);
+    register_ioport_read(addr, size, 1, ioport_read1, s);
+    register_ioport_write(addr, size, 2, ioport_write2, s);
+    register_ioport_read(addr, size, 2, ioport_read2, s);
+    register_ioport_write(addr, size, 4, ioport_write4, s);
+    register_ioport_read(addr, size, 4, ioport_read4, s);
+
+    s->region_base_addr[region_num] = addr;
+}
+
+/* From FreeBSD */
+#define POLYNOMIAL 0x04c11db6
+static int compute_mcast_idx(const uint8_t *ep)
+{
+    uint32_t crc;
+    int carry, i, j;
+    uint8_t b;
+
+    crc = 0xffffffff;
+    for (i = 0; i < 6; i++) {
+        b = *ep++;
+        for (j = 0; j < 8; j++) {
+            carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01);
+            crc <<= 1;
+            b >>= 1;
+            if (carry)
+                crc = ((crc ^ POLYNOMIAL) | carry);
+        }
+    }
+    return (crc >> 26);
+}
+
+/* Eerpro100 receive functions */
+static int e100_can_receive(void *opaque)
+{
+    E100State *s = opaque;
+
+    int is_ready = (GET_RU_STATE == RU_READY);
+    logout("%s\n", is_ready ? "EEPro100 receiver is ready"
+            : "EEPro100 receiver is not ready");
+    return is_ready;
+}
+
+static void e100_receive(void *opaque, const uint8_t * buf, int size)
+{
+    E100State *s = opaque;
+    uint32_t rfd_addr = 0;
+    rfd_t rfd = {0};
+
+
+    if ( GET_RU_STATE != RU_READY )
+    {
+        //logout("RU is not ready. Begin discarding frame(state=%x)\n", 
GET_RU_STATE);
+        return;
+    }
+
+    rfd_addr = s->ru_base + s->ru_offset;
+    cpu_physical_memory_read(rfd_addr, (uint8_t *)&rfd, sizeof(rfd_t));
+
+    if ( (size > MAX_ETH_FRAME_SIZE+4) )
+    {
+        /* Long frame and configuration byte 18/3 (long receive ok) not set:
+         * Long frames are discarded. */
+        logout("Discard long frame(size=%d)\n", size);
+
+        return;
+    }
+    else if ( !memcmp(buf, s->macaddr, sizeof(s->macaddr)) )
+    {
+        /* The frame is for me */
+        logout("Receive a frame for me(size=%d)\n", size);
+        e100_dump("FRAME:", (uint8_t *)buf, size);
+    }
+    else if ( !memcmp(buf, broadcast_macaddr, sizeof(broadcast_macaddr)) )
+    {
+        if ( s->config.broadcast_dis && !s->config.promiscuous )
+        {
+            logout("Discard a broadcast frame\n");
+            return;
+        }
+
+        /* Broadcast frame */
+        rfd.status |= RX_IA_MATCH;
+        logout("Receive a broadcast frame(size=%d)\n", size);
+    }
+    else if ( s->is_multcast_enable && buf[0] & 0x1 )
+    {
+        int mcast_idx = compute_mcast_idx(buf);
+        if ( !(s->mult_list[mcast_idx >> 3] & (1 << (mcast_idx & 7))) )
+        {
+            logout("Multicast address mismatch, discard\n");
+            return;
+        }
+        logout("Receive a multicast frame(size=%d)\n", size);
+    }
+    else if ( size < 64 && (s->config.dis_short_rx) )
+    {
+        /* From Intel's spec, short frame should be discarded
+         * when configuration byte 7/0 (discard short receive) set.
+         * But this will cause frame lossing such as ICMP frame, ARP frame.
+         * So we check is the frame for me before discarding short frame
+         */
+
+        /* Save Bad Frame bit */
+        if ( s->config.save_bad_frame )
+        {
+            rfd.status |= RX_SHORT;
+            s->statistics.rx_short_frame_errors ++;
+        }
+        logout("Receive a short frame(size=%d), discard it\n", size);
+        return;
+    }
+    else if ( s->config.promiscuous )
+    {
+        /* Promiscuous: receive all. No address match */
+        logout("Received frame in promiscuous mode(size=%d)\n", size);
+        rfd.status |= RX_NO_MATCH;
+    }
+    else
+    {
+        e100_dump("Unknown frame, MAC = ", (uint8_t *)buf, 6);
+        return;
+    }
+    e100_dump("Get frame, MAC = ", (uint8_t *)buf, 6);
+
+    rfd.c = 1;
+    rfd.ok = 1;
+    rfd.f = 1;
+    rfd.eof = 1;
+    rfd.status &= ~RX_COLLISION;
+    rfd.count = size;
+
+    logout("Get a RFD configure:\n"
+            "\tstatus:%#x\n"
+            "\tok:%#x\n" "\tc:%#x\n" "\tsf:%#x\n"
+            "\th:%#x\n" "\ts:%#x\n" "\tel:%#x\n"
+            "\tlink add:%#x\n" "\tactual count:%#x\n"
+            "\tf:%#x\n" "\teof:%#x\n" "\tsize:%#x\n",
+            rfd.status, rfd.ok, rfd.c, rfd.sf, rfd.h,
+            rfd.s, rfd.el, rfd.link_addr, rfd.count,
+            rfd.f, rfd.eof, rfd.size);
+
+    cpu_physical_memory_write(rfd_addr, (uint8_t *)&rfd, sizeof(rfd));
+    cpu_physical_memory_write(rfd_addr + sizeof(rfd_t), buf, size);
+    s->statistics.rx_good_frames ++;
+    s->ru_offset = le32_to_cpu(rfd.link_addr);
+
+    e100_interrupt(s, INT_FR);
+
+    if ( rfd.el || rfd.s )
+    {
+        /* Go to suspend */
+        SET_RU_STATE(RU_SUSPENDED);
+        e100_interrupt(s, INT_RNR);
+        logout("RFD met S or EL bit set, RU go to suspend\n");
+        return;
+    }
+
+    logout("Complete a frame receive(size = %d)\n", size);
+    return;
+}
+
+static void eeprom_init(E100State *s)
+{
+    int i;
+    int chksum = 0;
+    /* Add 64 * 2 EEPROM. i82557 and i82558 support a 64 word EEPROM,
+     * i82559 and later support 64 or 256 word EEPROM. */
+    eeprom_reset(s, EEPROM_RESET_ALL);
+    s->eeprom.addr_len = EEPROM_I82557_ADDRBIT;
+    memcpy(s->eeprom.contents, eeprom_i82557, sizeof(eeprom_i82557));
+    /* Dirver is going to get MAC from eeprom*/
+    memcpy((uint8_t *)s->eeprom.contents, s->macaddr, sizeof(s->macaddr));
+
+    /* The last word in eeprom saving checksum value.
+     * After we update MAC in eeprom, the checksum need be re-calculate
+     * and saved at the end of eeprom
+     */
+    for ( i=0; i<(1<<s->eeprom.addr_len)-1; i++ )
+        chksum += s->eeprom.contents[i];
+    s->eeprom.contents[i] = 0xBABA - chksum;
+
+}
+
+static void e100_init(PCIBus * bus, NICInfo * nd,
+        const char *name, uint32_t device)
+{
+    PCIE100State *d;
+    E100State *s;
+
+    logout("\n");
+
+    d = (PCIE100State *) pci_register_device(bus, name,
+            sizeof(PCIE100State), -1,
+            NULL, NULL);
+
+    s = &d->e100;
+    s->device = device;
+    s->pci_dev = &d->dev;
+
+    pci_reset(s);
+
+
+    /* Handler for memory-mapped I/O */
+    d->e100.mmio_index =
+        cpu_register_io_memory(0, pci_mmio_read, pci_mmio_write, s);
+
+    //CSR Memory mapped base
+    pci_register_io_region(&d->dev, 0, PCI_MEM_SIZE,
+            PCI_ADDRESS_SPACE_MEM | PCI_ADDRESS_SPACE_MEM_PREFETCH,
+            pci_mmio_map);
+    //CSR I/O mapped base
+    pci_register_io_region(&d->dev, 1, PCI_IO_SIZE, PCI_ADDRESS_SPACE_IO,
+            pci_ioport_map);
+    //Flash memory mapped base
+    pci_register_io_region(&d->dev, 2, PCI_FLASH_SIZE, PCI_ADDRESS_SPACE_MEM,
+            pci_mmio_map);
+
+    memcpy(s->macaddr, nd->macaddr, 6);
+    e100_dump("MAC ADDR", (uint8_t *)&s->macaddr[0], 6);
+
+    eeprom_init(s);
+
+    e100_reset(s);
+
+    s->vc = qemu_new_vlan_client(nd->vlan, e100_receive, e100_can_receive, s);
+
+    snprintf(s->vc->info_str, sizeof(s->vc->info_str),
+            "e100 pci macaddr=%02x:%02x:%02x:%02x:%02x:%02x",
+            s->macaddr[0],
+            s->macaddr[1],
+            s->macaddr[2], s->macaddr[3], s->macaddr[4], s->macaddr[5]);
+
+    qemu_register_reset(e100_reset, s);
+
+    register_savevm(name, 0, 3, e100_save, e100_load, s);
+}
+
+void pci_e100_init(PCIBus * bus, NICInfo * nd)
+{
+    e100_init(bus, nd, "e100", i82557C);
+}
+
diff -r 9a9ddc04eea2 -r 53dc1cf50506 tools/ioemu/hw/pci.c
--- a/tools/ioemu/hw/pci.c      Tue Nov 20 11:53:44 2007 -0700
+++ b/tools/ioemu/hw/pci.c      Wed Nov 21 09:12:06 2007 -0700
@@ -565,6 +565,8 @@ void pci_nic_init(PCIBus *bus, NICInfo *
         pci_rtl8139_init(bus, nd, devfn);
     } else if (strcmp(nd->model, "pcnet") == 0) {
         pci_pcnet_init(bus, nd, devfn);
+    } else if (strcmp(nd->model, "e100") == 0) {
+        pci_e100_init(bus, nd);
     } else {
         fprintf(stderr, "qemu: Unsupported NIC: %s\n", nd->model);
         exit (1);
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c   Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/hvm/hpet.c   Wed Nov 21 09:12:06 2007 -0700
@@ -127,9 +127,13 @@ static inline int hpet_check_access_leng
 {
     if ( (addr & (len - 1)) || (len > 8) )
     {
-        gdprintk(XENLOG_ERR, "HPET: access across register boundary: "
+        /*
+         * According to ICH9 specification, unaligned accesses may result
+         * in unexpected behaviour or master abort, but should not crash/hang.
+         * Hence we read all-ones, drop writes, and log a warning.
+         */
+        gdprintk(XENLOG_WARNING, "HPET: access across register boundary: "
                  "%lx %lx\n", addr, len);
-        domain_crash(current->domain);
         return -EINVAL;
     }
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c        Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/irq.c        Wed Nov 21 09:12:06 2007 -0700
@@ -15,7 +15,6 @@
 #include <xen/keyhandler.h>
 #include <xen/compat.h>
 #include <asm/current.h>
-#include <asm/smpboot.h>
 #include <asm/iommu.h>
 
 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/mm.c Wed Nov 21 09:12:06 2007 -0700
@@ -3007,7 +3007,8 @@ long set_gdt(struct vcpu *v,
         return -EINVAL;
 
     /* Check the pages in the new GDT. */
-    for ( i = 0; i < nr_pages; i++ ) {
+    for ( i = 0; i < nr_pages; i++ )
+    {
         mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
         if ( !mfn_valid(mfn) ||
              !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
@@ -3073,23 +3074,15 @@ long do_update_descriptor(u64 pa, u64 de
 
     *(u64 *)&d = desc;
 
-    LOCK_BIGLOCK(dom);
-
     mfn = gmfn_to_mfn(dom, gmfn);
     if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
          !mfn_valid(mfn) ||
          !check_descriptor(dom, &d) )
-    {
-        UNLOCK_BIGLOCK(dom);
         return -EINVAL;
-    }
 
     page = mfn_to_page(mfn);
     if ( unlikely(!get_page(page, dom)) )
-    {
-        UNLOCK_BIGLOCK(dom);
         return -EINVAL;
-    }
 
     /* Check if the given frame is in use in an unsafe context. */
     switch ( page->u.inuse.type_info & PGT_type_mask )
@@ -3112,7 +3105,7 @@ long do_update_descriptor(u64 pa, u64 de
 
     /* All is good so make the update. */
     gdt_pent = map_domain_page(mfn);
-    memcpy(&gdt_pent[offset], &d, 8);
+    atomic_write64((uint64_t *)&gdt_pent[offset], *(uint64_t *)&d);
     unmap_domain_page(gdt_pent);
 
     put_page_type(page);
@@ -3121,8 +3114,6 @@ long do_update_descriptor(u64 pa, u64 de
 
  out:
     put_page(page);
-
-    UNLOCK_BIGLOCK(dom);
 
     return ret;
 }
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/physdev.c    Wed Nov 21 09:12:06 2007 -0700
@@ -8,7 +8,6 @@
 #include <xen/event.h>
 #include <xen/guest_access.h>
 #include <asm/current.h>
-#include <asm/smpboot.h>
 #include <asm/hypercall.h>
 #include <public/xen.h>
 #include <public/physdev.h>
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/smp.c
--- a/xen/arch/x86/smp.c        Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/smp.c        Wed Nov 21 09:12:06 2007 -0700
@@ -18,7 +18,6 @@
 #include <asm/smp.h>
 #include <asm/mc146818rtc.h>
 #include <asm/flushtlb.h>
-#include <asm/smpboot.h>
 #include <asm/hardirq.h>
 #include <asm/ipi.h>
 #include <asm/hvm/support.h>
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/traps.c      Wed Nov 21 09:12:06 2007 -0700
@@ -2583,7 +2583,10 @@ void set_system_gate(unsigned int n, voi
 
 void set_task_gate(unsigned int n, unsigned int sel)
 {
+    idt_table[n].b = 0;
+    wmb(); /* disable gate /then/ rewrite */
     idt_table[n].a = sel << 16;
+    wmb(); /* rewrite /then/ enable gate */
     idt_table[n].b = 0x8500;
 }
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/arch/x86/x86_32/seg_fixup.c
--- a/xen/arch/x86/x86_32/seg_fixup.c   Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/arch/x86/x86_32/seg_fixup.c   Wed Nov 21 09:12:06 2007 -0700
@@ -42,7 +42,7 @@
 #define O  OPCODE_BYTE
 #define M  HAS_MODRM
 
-static unsigned char insn_decode[256] = {
+static const unsigned char insn_decode[256] = {
     /* 0x00 - 0x0F */
     O|M, O|M, O|M, O|M, X, X, X, X,
     O|M, O|M, O|M, O|M, X, X, X, X,
@@ -69,7 +69,7 @@ static unsigned char insn_decode[256] = 
     X, X, X, X, X, X, X, X,
     /* 0x80 - 0x8F */
     O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M,
-    O|M, O|M, O|M, O|M, O|M, O|M, O|M, X,
+    O|M, O|M, O|M, O|M, O|M, X|M, O|M, O|M,
     /* 0x90 - 0x9F */
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
@@ -89,17 +89,17 @@ static unsigned char insn_decode[256] = 
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
     /* 0xF0 - 0xFF */
-    X, X, X, X, X, X, X, X,
+    X, X, X, X, X, X, O|M, O|M,
     X, X, X, X, X, X, O|M, O|M
 };
 
-static unsigned char twobyte_decode[256] = {
+static const unsigned char twobyte_decode[256] = {
     /* 0x00 - 0x0F */
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
     /* 0x10 - 0x1F */
     X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    O|M, X, X, X, X, X, X, X,
     /* 0x20 - 0x2F */
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
@@ -122,16 +122,16 @@ static unsigned char twobyte_decode[256]
     X, X, X, X, X, X, X, X,
     X, X, X, X, X, X, X, X,
     /* 0x90 - 0x9F */
-    X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M,
+    O|M, O|M, O|M, O|M, O|M, O|M, O|M, O|M,
     /* 0xA0 - 0xAF */
-    X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    X, X, X, O|M, O|M|1, O|M, O|M, X,
+    X, X, X, O|M, O|M|1, O|M, X, O|M,
     /* 0xB0 - 0xBF */
-    X, X, X, X, X, X, X, X,
-    X, X, X, X, X, X, X, X,
+    X, X, X, O|M, X, X, O|M, O|M,
+    X, X, O|M|1, O|M, O|M, O|M, O|M, O|M,
     /* 0xC0 - 0xCF */
-    X, X, X, X, X, X, X, X,
+    O|M, O|M, X, O|M, X, X, X, O|M,
     X, X, X, X, X, X, X, X,
     /* 0xD0 - 0xDF */
     X, X, X, X, X, X, X, X,
@@ -153,24 +153,24 @@ static unsigned char twobyte_decode[256]
  *  @base  (OUT): Decoded linear base address.
  *  @limit (OUT): Decoded segment limit, in bytes. 0 == unlimited (4GB).
  */
-int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
+static int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit)
 {
-    struct vcpu *d = current;
-    unsigned long *table, a, b;
-    int            ldt = !!(seg & 4);
-    int            idx = (seg >> 3) & 8191;
+    struct vcpu *curr = current;
+    uint32_t    *table, a, b;
+    int          ldt = !!(seg & 4);
+    int          idx = (seg >> 3) & 8191;
 
     /* Get base and check limit. */
     if ( ldt )
     {
-        table = (unsigned long *)LDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.ldt_ents )
+        table = (uint32_t *)LDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.ldt_ents )
             goto fail;
     }
     else /* gdt */
     {
-        table = (unsigned long *)GDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.gdt_ents )
+        table = (uint32_t *)GDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.gdt_ents )
             goto fail;
     }
 
@@ -204,7 +204,7 @@ int get_baselimit(u16 seg, unsigned long
 }
 
 /* Turn a segment+offset into a linear address. */
-int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
+static int linearise_address(u16 seg, unsigned long off, unsigned long *linear)
 {
     unsigned long base, limit;
 
@@ -219,31 +219,31 @@ int linearise_address(u16 seg, unsigned 
     return 1;
 }
 
-int fixup_seg(u16 seg, unsigned long offset)
+static int fixup_seg(u16 seg, unsigned long offset)
 {
-    struct vcpu *d = current;
-    unsigned long *table, a, b, base, limit;
-    int            ldt = !!(seg & 4);
-    int            idx = (seg >> 3) & 8191;
+    struct vcpu *curr = current;
+    uint32_t    *table, a, b, base, limit;
+    int          ldt = !!(seg & 4);
+    int          idx = (seg >> 3) & 8191;
 
     /* Get base and check limit. */
     if ( ldt )
     {
-        table = (unsigned long *)LDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.ldt_ents )
+        table = (uint32_t *)LDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.ldt_ents )
         {
             dprintk(XENLOG_DEBUG, "Segment %04x out of LDT range (%ld)\n",
-                    seg, d->arch.guest_context.ldt_ents);
+                    seg, curr->arch.guest_context.ldt_ents);
             goto fail;
         }
     }
     else /* gdt */
     {
-        table = (unsigned long *)GDT_VIRT_START(d);
-        if ( idx >= d->arch.guest_context.gdt_ents )
+        table = (uint32_t *)GDT_VIRT_START(curr);
+        if ( idx >= curr->arch.guest_context.gdt_ents )
         {
             dprintk(XENLOG_DEBUG, "Segment %04x out of GDT range (%ld)\n",
-                    seg, d->arch.guest_context.gdt_ents);
+                    seg, curr->arch.guest_context.gdt_ents);
             goto fail;
         }
     }
@@ -261,7 +261,7 @@ int fixup_seg(u16 seg, unsigned long off
                _SEGMENT_G|_SEGMENT_CODE|_SEGMENT_DPL)) != 
          (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB|_SEGMENT_G|_SEGMENT_DPL) )
     {
-        dprintk(XENLOG_DEBUG, "Bad segment %08lx:%08lx\n", a, b);
+        dprintk(XENLOG_DEBUG, "Bad segment %08x:%08x\n", a, b);
         goto fail;
     }
 
@@ -291,8 +291,7 @@ int fixup_seg(u16 seg, unsigned long off
         }
     }
 
-    dprintk(XENLOG_DEBUG, "None of the above! "
-            "(%08lx:%08lx, %08lx, %08lx, %08lx)\n",
+    dprintk(XENLOG_DEBUG, "None of the above! (%08x:%08x, %08x, %08x, %08x)\n",
             a, b, base, limit, base+limit);
 
  fail:
@@ -303,9 +302,8 @@ int fixup_seg(u16 seg, unsigned long off
     a &= ~0x0ffff; a |= limit & 0x0ffff;
     b &= ~0xf0000; b |= limit & 0xf0000;
     b ^= _SEGMENT_EC; /* grows-up <-> grows-down */
-    /* NB. These can't fault. Checked readable above; must also be writable. */
-    table[2*idx+0] = a;
-    table[2*idx+1] = b;
+    /* NB. This can't fault. Checked readable above; must also be writable. */
+    atomic_write64((uint64_t *)&table[2*idx], ((uint64_t)b<<32) | a);
     return 1;
 }
 
@@ -315,18 +313,15 @@ int fixup_seg(u16 seg, unsigned long off
  */
 int gpf_emulate_4gb(struct cpu_user_regs *regs)
 {
-    struct vcpu *d = current;
-    struct trap_info   *ti;
-    struct trap_bounce *tb;
-    u8            modrm, mod, reg, rm, decode;
-    void         *memreg;
-    unsigned long offset;
-    u8            disp8;
-    u32           disp32 = 0;
+    struct vcpu   *curr = current;
+    u8             modrm, mod, rm, decode;
+    const u32     *base, *index = NULL;
+    unsigned long  offset;
+    s8             disp8;
+    s32            disp32 = 0;
     u8            *eip;         /* ptr to instruction start */
     u8            *pb, b;       /* ptr into instr. / current instr. byte */
-    int            gs_override = 0;
-    int            twobyte = 0;
+    int            gs_override = 0, scale = 0, twobyte = 0;
 
     /* WARNING: We only work for ring-3 segments. */
     if ( unlikely(vm86_mode(regs)) || unlikely(!ring_3(regs)) )
@@ -356,6 +351,9 @@ int gpf_emulate_4gb(struct cpu_user_regs
                     "legal instruction\n");
             goto fail;
         }
+
+        if ( twobyte )
+            break;
 
         switch ( b )
         {
@@ -375,6 +373,9 @@ int gpf_emulate_4gb(struct cpu_user_regs
         case 0x65: /* GS override */
             gs_override = 1;
             break;
+        case 0x0f: /* Not really a prefix byte */
+            twobyte = 1;
+            break;
         default: /* Not a prefix byte */
             goto done_prefix;
         }
@@ -387,32 +388,10 @@ int gpf_emulate_4gb(struct cpu_user_regs
         goto fail;
     }
 
-    decode = insn_decode[b]; /* opcode byte */
+    decode = (!twobyte ? insn_decode : twobyte_decode)[b];
     pb++;
-    if ( decode == 0 && b == 0x0f )
-    {
-        twobyte = 1;
-
-        if ( get_user(b, pb) )
-        {
-            dprintk(XENLOG_DEBUG,
-                    "Fault while accessing byte %ld of instruction\n",
-                    (long)(pb-eip));
-            goto page_fault;
-        }
-
-        if ( (pb - eip) >= 15 )
-        {
-            dprintk(XENLOG_DEBUG, "Too many opcode bytes for a "
-                    "legal instruction\n");
-            goto fail;
-        }
-
-        decode = twobyte_decode[b];
-        pb++;
-    }
-
-    if ( decode == 0 )
+
+    if ( !(decode & OPCODE_BYTE) )
     {
         dprintk(XENLOG_DEBUG, "Unsupported %sopcode %02x\n",
                 twobyte ? "two byte " : "", b);
@@ -422,12 +401,12 @@ int gpf_emulate_4gb(struct cpu_user_regs
     if ( !(decode & HAS_MODRM) )
     {
         /* Must be a <disp32>, or bail. */
-        if ( (decode & 7) != 4 )
+        if ( (decode & INSN_SUFFIX_BYTES) != 4 )
             goto fail;
 
         if ( get_user(offset, (u32 *)pb) )
         {
-            dprintk(XENLOG_DEBUG, "Fault while extracting <disp32>.\n");
+            dprintk(XENLOG_DEBUG, "Fault while extracting <moffs32>.\n");
             goto page_fault;
         }
         pb += 4;
@@ -448,29 +427,39 @@ int gpf_emulate_4gb(struct cpu_user_regs
     pb++;
 
     mod = (modrm >> 6) & 3;
-    reg = (modrm >> 3) & 7;
     rm  = (modrm >> 0) & 7;
 
     if ( rm == 4 )
     {
-        dprintk(XENLOG_DEBUG, "FIXME: Add decoding for the SIB byte.\n");
-        goto fixme;
+        u8 sib;
+
+        if ( get_user(sib, pb) )
+        {
+            dprintk(XENLOG_DEBUG, "Fault while extracting sib byte\n");
+            goto page_fault;
+        }
+
+        pb++;
+
+        rm = sib & 7;
+        if ( (sib & 0x38) != 0x20 )
+            index = decode_register((sib >> 3) & 7, regs, 0);
+        scale = sib >> 6;
     }
 
     /* Decode R/M field. */
-    memreg = decode_register(rm,  regs, 0);
+    base = decode_register(rm, regs, 0);
 
     /* Decode Mod field. */
-    switch ( modrm >> 6 )
+    switch ( mod )
     {
     case 0:
-        disp32 = 0;
         if ( rm == 5 ) /* disp32 rather than (EBP) */
         {
-            memreg = NULL;
+            base = NULL;
             if ( get_user(disp32, (u32 *)pb) )
             {
-                dprintk(XENLOG_DEBUG, "Fault while extracting <disp8>.\n");
+                dprintk(XENLOG_DEBUG, "Fault while extracting <base32>.\n");
                 goto page_fault;
             }
             pb += 4;
@@ -484,13 +473,13 @@ int gpf_emulate_4gb(struct cpu_user_regs
             goto page_fault;
         }
         pb++;
-        disp32 = (disp8 & 0x80) ? (disp8 | ~0xff) : disp8;;
+        disp32 = disp8;
         break;
 
     case 2:
         if ( get_user(disp32, (u32 *)pb) )
         {
-            dprintk(XENLOG_DEBUG, "Fault while extracting <disp8>.\n");
+            dprintk(XENLOG_DEBUG, "Fault while extracting <disp32>.\n");
             goto page_fault;
         }
         pb += 4;
@@ -502,8 +491,10 @@ int gpf_emulate_4gb(struct cpu_user_regs
     }
 
     offset = disp32;
-    if ( memreg != NULL )
-        offset += *(u32 *)memreg;
+    if ( base != NULL )
+        offset += *base;
+    if ( index != NULL )
+        offset += *index << scale;
 
  skip_modrm:
     if ( !fixup_seg((u16)regs->gs, offset) )
@@ -513,10 +504,11 @@ int gpf_emulate_4gb(struct cpu_user_regs
     perfc_incr(seg_fixups);
 
     /* If requested, give a callback on otherwise unused vector 15. */
-    if ( VM_ASSIST(d->domain, VMASST_TYPE_4gb_segments_notify) )
-    {
-        ti  = &d->arch.guest_context.trap_ctxt[15];
-        tb  = &d->arch.trap_bounce;
+    if ( VM_ASSIST(curr->domain, VMASST_TYPE_4gb_segments_notify) )
+    {
+        struct trap_info   *ti  = &curr->arch.guest_context.trap_ctxt[15];
+        struct trap_bounce *tb  = &curr->arch.trap_bounce;
+
         tb->flags      = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
         tb->error_code = pb - eip;
         tb->cs         = ti->cs;
@@ -527,13 +519,6 @@ int gpf_emulate_4gb(struct cpu_user_regs
 
     return EXCRET_fault_fixed;
 
- fixme:
-    dprintk(XENLOG_DEBUG, "Undecodable instruction "
-            "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
-            "caused GPF(0) at %04x:%08x\n",
-            eip[0], eip[1], eip[2], eip[3],
-            eip[4], eip[5], eip[6], eip[7],
-            regs->cs, regs->eip);
  fail:
     return 0;
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-powerpc/smpboot.h
--- a/xen/include/asm-powerpc/smpboot.h Tue Nov 20 11:53:44 2007 -0700
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright (C) IBM Corp. 2005
- *
- * Authors: Jimi Xenidis <jimix@xxxxxxxxxxxxxx>
- */
-
-#include "../asm-x86/smpboot.h"
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h        Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/include/asm-x86/desc.h        Wed Nov 21 09:12:06 2007 -0700
@@ -143,6 +143,11 @@ typedef struct {
 
 #define _set_gate(gate_addr,type,dpl,addr)               \
 do {                                                     \
+    (gate_addr)->a = 0;                                  \
+    wmb(); /* disable gate /then/ rewrite */             \
+    (gate_addr)->b =                                     \
+        ((unsigned long)(addr) >> 32);                   \
+    wmb(); /* rewrite /then/ enable gate */              \
     (gate_addr)->a =                                     \
         (((unsigned long)(addr) & 0xFFFF0000UL) << 32) | \
         ((unsigned long)(dpl) << 45) |                   \
@@ -150,49 +155,53 @@ do {                                    
         ((unsigned long)(addr) & 0xFFFFUL) |             \
         ((unsigned long)__HYPERVISOR_CS64 << 16) |       \
         (1UL << 47);                                     \
-    (gate_addr)->b =                                     \
-        ((unsigned long)(addr) >> 32);                   \
 } while (0)
 
 #define _set_tssldt_desc(desc,addr,limit,type)           \
 do {                                                     \
+    (desc)[0].b = (desc)[1].b = 0;                       \
+    wmb(); /* disable entry /then/ rewrite */            \
     (desc)[0].a =                                        \
         ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
+    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
+    wmb(); /* rewrite /then/ enable entry */             \
     (desc)[0].b =                                        \
         ((u32)(addr) & 0xFF000000U) |                    \
         ((u32)(type) << 8) | 0x8000U |                   \
         (((u32)(addr) & 0x00FF0000U) >> 16);             \
-    (desc)[1].a = (u32)(((unsigned long)(addr)) >> 32);  \
-    (desc)[1].b = 0;                                     \
 } while (0)
 
 #elif defined(__i386__)
 
 typedef struct desc_struct idt_entry_t;
 
-#define _set_gate(gate_addr,type,dpl,addr) \
-do { \
-  int __d0, __d1; \
-  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %4,%%dx\n\t" \
- "movl %%eax,%0\n\t" \
- "movl %%edx,%1" \
- :"=m" (*((long *) (gate_addr))), \
-  "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
- :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
-  "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
-} while (0)
-
-#define _set_tssldt_desc(n,addr,limit,type) \
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
- "movw %%ax,2(%2)\n\t" \
- "rorl $16,%%eax\n\t" \
- "movb %%al,4(%2)\n\t" \
- "movb %4,5(%2)\n\t" \
- "movb $0,6(%2)\n\t" \
- "movb %%ah,7(%2)\n\t" \
- "rorl $16,%%eax" \
- : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type|0x80))
+#define _set_gate(gate_addr,type,dpl,addr)               \
+do {                                                     \
+    (gate_addr)->b = 0;                                  \
+    wmb(); /* disable gate /then/ rewrite */             \
+    (gate_addr)->a =                                     \
+        ((unsigned long)(addr) & 0xFFFFUL) |             \
+        ((unsigned long)__HYPERVISOR_CS << 16);          \
+    wmb(); /* rewrite /then/ enable gate */              \
+    (gate_addr)->b =                                     \
+        ((unsigned long)(addr) & 0xFFFF0000UL) |         \
+        ((unsigned long)(dpl) << 13) |                   \
+        ((unsigned long)(type) << 8) |                   \
+        (1UL << 15);                                     \
+} while (0)
+
+#define _set_tssldt_desc(desc,addr,limit,type)           \
+do {                                                     \
+    (desc)->b = 0;                                       \
+    wmb(); /* disable entry /then/ rewrite */            \
+    (desc)->a =                                          \
+        ((u32)(addr) << 16) | ((u32)(limit) & 0xFFFF);   \
+    wmb(); /* rewrite /then/ enable entry */             \
+    (desc)->b =                                          \
+        ((u32)(addr) & 0xFF000000U) |                    \
+        ((u32)(type) << 8) | 0x8000U |                   \
+        (((u32)(addr) & 0x00FF0000U) >> 16);             \
+} while (0)
 
 #endif
 
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/smpboot.h
--- a/xen/include/asm-x86/smpboot.h     Tue Nov 20 11:53:44 2007 -0700
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-#ifndef __ASM_SMPBOOT_H
-#define __ASM_SMPBOOT_H
-
-static inline unsigned long apicid_to_phys_cpu_present(int apicid)
-{
-       return 1UL << apicid;
-}
-
-extern volatile int logical_apicid_2_cpu[];
-extern volatile int cpu_2_logical_apicid[];
-extern volatile int physical_apicid_2_cpu[];
-extern volatile int cpu_2_physical_apicid[];
-
-#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-
-#endif
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/system.h
--- a/xen/include/asm-x86/system.h      Tue Nov 20 11:53:44 2007 -0700
+++ b/xen/include/asm-x86/system.h      Wed Nov 21 09:12:06 2007 -0700
@@ -5,69 +5,78 @@
 #include <xen/types.h>
 #include <asm/bitops.h>
 
-#define read_segment_register(name)                                     \
-({  u16 __sel;                                                          \
-    __asm__ __volatile__ ( "movw %%" STR(name) ",%0" : "=r" (__sel) );  \
-    __sel;                                                              \
+#define read_segment_register(name)                             \
+({  u16 __sel;                                                  \
+    asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) );  \
+    __sel;                                                      \
 })
 
 #define wbinvd() \
-       __asm__ __volatile__ ("wbinvd": : :"memory");
+    asm volatile ( "wbinvd" : : : "memory" )
 
 #define clflush(a) \
-       __asm__ __volatile__ ("clflush (%0)": :"r"(a));
+    asm volatile ( "clflush (%0)" : : "r"(a) )
 
-#define nop() __asm__ __volatile__ ("nop")
+#define nop() \
+    asm volatile ( "nop" )
 
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned 
long)(v),(ptr),sizeof(*(ptr))))
+#define xchg(ptr,v) \
+    ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
 
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) ((volatile struct __xchg_dummy *)(x))
 
+#if defined(__i386__)
+# include <asm/x86_32/system.h>
+#elif defined(__x86_64__)
+# include <asm/x86_64/system.h>
+#endif
 
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
  * Note 2: xchg has side effect, so that attribute volatile is necessary,
  *   but generally the primitive is invalid, *ptr is output argument. --ANK
  */
-static always_inline unsigned long __xchg(unsigned long x, volatile void * 
ptr, int size)
+static always_inline unsigned long __xchg(
+    unsigned long x, volatile void *ptr, int size)
 {
-       switch (size) {
-               case 1:
-                       __asm__ __volatile__("xchgb %b0,%1"
-                               :"=q" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
-               case 2:
-                       __asm__ __volatile__("xchgw %w0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
+    switch ( size )
+    {
+    case 1:
+        asm volatile ( "xchgb %b0,%1"
+                       : "=q" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
+    case 2:
+        asm volatile ( "xchgw %w0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
 #if defined(__i386__)
-               case 4:
-                       __asm__ __volatile__("xchgl %0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
+    case 4:
+        asm volatile ( "xchgl %0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
 #elif defined(__x86_64__)
-               case 4:
-                       __asm__ __volatile__("xchgl %k0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
-               case 8:
-                       __asm__ __volatile__("xchgq %0,%1"
-                               :"=r" (x)
-                               :"m" (*__xg((volatile void *)ptr)), "0" (x)
-                               :"memory");
-                       break;
+    case 4:
+        asm volatile ( "xchgl %k0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
+    case 8:
+        asm volatile ( "xchgq %0,%1"
+                       : "=r" (x)
+                       : "m" (*__xg((volatile void *)ptr)), "0" (x)
+                       : "memory" );
+        break;
 #endif
-       }
-       return x;
+    }
+    return x;
 }
 
 /*
@@ -79,230 +88,88 @@ static always_inline unsigned long __cmp
 static always_inline unsigned long __cmpxchg(
     volatile void *ptr, unsigned long old, unsigned long new, int size)
 {
-       unsigned long prev;
-       switch (size) {
-       case 1:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
-                                    : "=a"(prev)
-                                    : "q"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
-       case 2:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
+    unsigned long prev;
+    switch ( size )
+    {
+    case 1:
+        asm volatile ( LOCK_PREFIX "cmpxchgb %b1,%2"
+                       : "=a" (prev)
+                       : "q" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
+    case 2:
+        asm volatile ( LOCK_PREFIX "cmpxchgw %w1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
 #if defined(__i386__)
-       case 4:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
+    case 4:
+        asm volatile ( LOCK_PREFIX "cmpxchgl %1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
 #elif defined(__x86_64__)
-       case 4:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
-       case 8:
-               __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
-                                    : "=a"(prev)
-                                    : "r"(new), "m"(*__xg((volatile void 
*)ptr)), "0"(old)
-                                    : "memory");
-               return prev;
+    case 4:
+        asm volatile ( LOCK_PREFIX "cmpxchgl %k1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
+    case 8:
+        asm volatile ( LOCK_PREFIX "cmpxchgq %1,%2"
+                       : "=a" (prev)
+                       : "r" (new), "m" (*__xg((volatile void *)ptr)),
+                       "0" (old)
+                       : "memory" );
+        return prev;
 #endif
-       }
-       return old;
+    }
+    return old;
 }
 
 #define __HAVE_ARCH_CMPXCHG
 
-#if BITS_PER_LONG == 64
-
-#define cmpxchg(ptr,o,n)                                                \
-    ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),            \
-                                   (unsigned long)(n),sizeof(*(ptr))))
-#else
-
-static always_inline unsigned long long __cmpxchg8b(
-    volatile void *ptr, unsigned long long old, unsigned long long new)
-{
-    unsigned long long prev;
-    __asm__ __volatile__ (
-        LOCK_PREFIX "cmpxchg8b %3"
-        : "=A" (prev)
-        : "c" ((u32)(new>>32)), "b" ((u32)new),
-          "m" (*__xg((volatile void *)ptr)), "0" (old)
-        : "memory" );
-    return prev;
-}
-
-#define cmpxchg(ptr,o,n)                                \
-({                                                      \
-    __typeof__(*(ptr)) __prev;                          \
-    switch ( sizeof(*(ptr)) ) {                         \
-    case 8:                                             \
-        __prev = ((__typeof__(*(ptr)))__cmpxchg8b(      \
-            (ptr),                                      \
-            (unsigned long long)(o),                    \
-            (unsigned long long)(n)));                  \
-        break;                                          \
-    default:                                            \
-        __prev = ((__typeof__(*(ptr)))__cmpxchg(        \
-            (ptr),                                      \
-            (unsigned long)(o),                         \
-            (unsigned long)(n),                         \
-            sizeof(*(ptr))));                           \
-        break;                                          \
-    }                                                   \
-    __prev;                                             \
-})
-
-#endif
-
-
 /*
- * This function causes value _o to be changed to _n at location _p.
- * If this access causes a fault then we return 1, otherwise we return 0.
- * If no fault occurs then _o is updated to the value we saw at _p. If this
- * is the same as the initial value of _o then _n is written to location _p.
+ * Both Intel and AMD agree that, from a programmer's viewpoint:
+ *  Loads cannot be reordered relative to other loads.
+ *  Stores cannot be reordered relative to other stores.
+ * 
+ * Intel64 Architecture Memory Ordering White Paper
+ * <http://developer.intel.com/products/processor/manuals/318147.pdf>
+ * 
+ * AMD64 Architecture Programmer's Manual, Volume 2: System Programming
+ * <http://www.amd.com/us-en/assets/content_type/\
+ *  white_papers_and_tech_docs/24593.pdf>
  */
-#ifdef __i386__
-#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
-    __asm__ __volatile__ (                                              \
-        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
-        "2:\n"                                                          \
-        ".section .fixup,\"ax\"\n"                                      \
-        "3:     movl $1,%1\n"                                           \
-        "       jmp 2b\n"                                               \
-        ".previous\n"                                                   \
-        ".section __ex_table,\"a\"\n"                                   \
-        "       .align 4\n"                                             \
-        "       .long 1b,3b\n"                                          \
-        ".previous"                                                     \
-        : "=a" (_o), "=r" (_rc)                                         \
-        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
-        : "memory");
-#define cmpxchg_user(_p,_o,_n)                                          \
-({                                                                      \
-    int _rc;                                                            \
-    switch ( sizeof(*(_p)) ) {                                          \
-    case 1:                                                             \
-        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
-        break;                                                          \
-    case 2:                                                             \
-        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
-        break;                                                          \
-    case 4:                                                             \
-        __cmpxchg_user(_p,_o,_n,"l","","r");                            \
-        break;                                                          \
-    case 8:                                                             \
-        __asm__ __volatile__ (                                          \
-            "1: " LOCK_PREFIX "cmpxchg8b %4\n"                          \
-            "2:\n"                                                      \
-            ".section .fixup,\"ax\"\n"                                  \
-            "3:     movl $1,%1\n"                                       \
-            "       jmp 2b\n"                                           \
-            ".previous\n"                                               \
-            ".section __ex_table,\"a\"\n"                               \
-            "       .align 4\n"                                         \
-            "       .long 1b,3b\n"                                      \
-            ".previous"                                                 \
-            : "=A" (_o), "=r" (_rc)                                     \
-            : "c" ((u32)((u64)(_n)>>32)), "b" ((u32)(_n)),              \
-              "m" (*__xg((volatile void *)(_p))), "0" (_o), "1" (0)     \
-            : "memory");                                                \
-        break;                                                          \
-    }                                                                   \
-    _rc;                                                                \
-})
-#else
-#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
-    __asm__ __volatile__ (                                              \
-        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
-        "2:\n"                                                          \
-        ".section .fixup,\"ax\"\n"                                      \
-        "3:     movl $1,%1\n"                                           \
-        "       jmp 2b\n"                                               \
-        ".previous\n"                                                   \
-        ".section __ex_table,\"a\"\n"                                   \
-        "       .align 8\n"                                             \
-        "       .quad 1b,3b\n"                                          \
-        ".previous"                                                     \
-        : "=a" (_o), "=r" (_rc)                                         \
-        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
-        : "memory");
-#define cmpxchg_user(_p,_o,_n)                                          \
-({                                                                      \
-    int _rc;                                                            \
-    switch ( sizeof(*(_p)) ) {                                          \
-    case 1:                                                             \
-        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
-        break;                                                          \
-    case 2:                                                             \
-        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
-        break;                                                          \
-    case 4:                                                             \
-        __cmpxchg_user(_p,_o,_n,"l","k","r");                           \
-        break;                                                          \
-    case 8:                                                             \
-        __cmpxchg_user(_p,_o,_n,"q","","r");                            \
-        break;                                                          \
-    }                                                                   \
-    _rc;                                                                \
-})
-#endif
-
-#if defined(__i386__)
-#define mb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#define rmb()  __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#elif defined(__x86_64__)
-#define mb()    __asm__ __volatile__ ("mfence":::"memory")
-#define rmb()   __asm__ __volatile__ ("lfence":::"memory")
-#endif
-#define wmb()  __asm__ __volatile__ ("": : :"memory")
+#define rmb()           barrier()
+#define wmb()           barrier()
 
 #ifdef CONFIG_SMP
-#define smp_mb()       mb()
-#define smp_rmb()      rmb()
-#define smp_wmb()      wmb()
+#define smp_mb()        mb()
+#define smp_rmb()       rmb()
+#define smp_wmb()       wmb()
 #else
-#define smp_mb()       barrier()
-#define smp_rmb()      barrier()
-#define smp_wmb()      barrier()
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
 #endif
 
 #define set_mb(var, value) do { xchg(&var, value); } while (0)
 #define set_wmb(var, value) do { var = value; wmb(); } while (0)
 
-/* interrupt control.. */
-#if defined(__i386__)
-#define __save_flags(x)                __asm__ __volatile__("pushfl ; popl 
%0":"=g" (x): /* no input */)
-#define __restore_flags(x)     __asm__ __volatile__("pushl %0 ; popfl": /* no 
output */ :"g" (x):"memory", "cc")
-#elif defined(__x86_64__)
-#define __save_flags(x)                do { __asm__ __volatile__("# save_flags 
\n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define __restore_flags(x)     __asm__ __volatile__("# restore_flags \n\t 
pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
-#endif
-#define __cli()                __asm__ __volatile__("cli": : :"memory")
-#define __sti()                        __asm__ __volatile__("sti": : :"memory")
+#define local_irq_disable()     asm volatile ( "cli" : : : "memory" )
+#define local_irq_enable()      asm volatile ( "sti" : : : "memory" )
+
 /* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()            __asm__ __volatile__("sti; hlt": : :"memory")
+#define safe_halt()     asm volatile ( "sti; hlt" : : : "memory" )
 /* used when interrupts are already enabled or to shutdown the processor */
-#define halt()                 __asm__ __volatile__("hlt": : :"memory")
-
-/* For spinlocks etc */
-#if defined(__i386__)
-#define local_irq_save(x)      __asm__ __volatile__("pushfl ; popl %0 ; 
cli":"=g" (x): /* no input */ :"memory")
-#define local_irq_restore(x)   __restore_flags(x)
-#elif defined(__x86_64__)
-#define local_irq_save(x)      do { __asm__ __volatile__("# local_irq_save 
\n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_restore(x)   __asm__ __volatile__("# local_irq_restore \n\t 
pushq %0 ; popfq": /* no output */ :"g" (x):"memory")
-#endif
-#define local_irq_disable()    __cli()
-#define local_irq_enable()     __sti()
+#define halt()          asm volatile ( "hlt" : : : "memory" )
 
 static inline int local_irq_is_enabled(void)
 {
@@ -311,8 +178,8 @@ static inline int local_irq_is_enabled(v
     return !!(flags & (1<<9)); /* EFLAGS_IF */
 }
 
-#define BROKEN_ACPI_Sx         0x0001
-#define BROKEN_INIT_AFTER_S1   0x0002
+#define BROKEN_ACPI_Sx          0x0001
+#define BROKEN_INIT_AFTER_S1    0x0002
 
 void trap_init(void);
 void percpu_traps_init(void);
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/x86_32/system.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_32/system.h       Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,114 @@
+#ifndef __X86_32_SYSTEM_H__
+#define __X86_32_SYSTEM_H__
+
+static always_inline unsigned long long __cmpxchg8b(
+    volatile void *ptr, unsigned long long old, unsigned long long new)
+{
+    unsigned long long prev;
+    asm volatile (
+        LOCK_PREFIX "cmpxchg8b %3"
+        : "=A" (prev)
+        : "c" ((u32)(new>>32)), "b" ((u32)new),
+          "m" (*__xg((volatile void *)ptr)), "0" (old)
+        : "memory" );
+    return prev;
+}
+
+#define cmpxchg(ptr,o,n)                                \
+({                                                      \
+    __typeof__(*(ptr)) __prev;                          \
+    switch ( sizeof(*(ptr)) ) {                         \
+    case 8:                                             \
+        __prev = ((__typeof__(*(ptr)))__cmpxchg8b(      \
+            (ptr),                                      \
+            (unsigned long long)(o),                    \
+            (unsigned long long)(n)));                  \
+        break;                                          \
+    default:                                            \
+        __prev = ((__typeof__(*(ptr)))__cmpxchg(        \
+            (ptr),                                      \
+            (unsigned long)(o),                         \
+            (unsigned long)(n),                         \
+            sizeof(*(ptr))));                           \
+        break;                                          \
+    }                                                   \
+    __prev;                                             \
+})
+
+/*
+ * This function causes value _o to be changed to _n at location _p.
+ * If this access causes a fault then we return 1, otherwise we return 0.
+ * If no fault occurs then _o is updated to the value we saw at _p. If this
+ * is the same as the initial value of _o then _n is written to location _p.
+ */
+#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
+    asm volatile (                                                      \
+        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
+        "2:\n"                                                          \
+        ".section .fixup,\"ax\"\n"                                      \
+        "3:     movl $1,%1\n"                                           \
+        "       jmp 2b\n"                                               \
+        ".previous\n"                                                   \
+        ".section __ex_table,\"a\"\n"                                   \
+        "       .align 4\n"                                             \
+        "       .long 1b,3b\n"                                          \
+        ".previous"                                                     \
+        : "=a" (_o), "=r" (_rc)                                         \
+        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
+        : "memory");
+
+#define cmpxchg_user(_p,_o,_n)                                          \
+({                                                                      \
+    int _rc;                                                            \
+    switch ( sizeof(*(_p)) ) {                                          \
+    case 1:                                                             \
+        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
+        break;                                                          \
+    case 2:                                                             \
+        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
+        break;                                                          \
+    case 4:                                                             \
+        __cmpxchg_user(_p,_o,_n,"l","","r");                            \
+        break;                                                          \
+    case 8:                                                             \
+        asm volatile (                                                  \
+            "1: " LOCK_PREFIX "cmpxchg8b %4\n"                          \
+            "2:\n"                                                      \
+            ".section .fixup,\"ax\"\n"                                  \
+            "3:     movl $1,%1\n"                                       \
+            "       jmp 2b\n"                                           \
+            ".previous\n"                                               \
+            ".section __ex_table,\"a\"\n"                               \
+            "       .align 4\n"                                         \
+            "       .long 1b,3b\n"                                      \
+            ".previous"                                                 \
+            : "=A" (_o), "=r" (_rc)                                     \
+            : "c" ((u32)((u64)(_n)>>32)), "b" ((u32)(_n)),              \
+              "m" (*__xg((volatile void *)(_p))), "0" (_o), "1" (0)     \
+            : "memory");                                                \
+        break;                                                          \
+    }                                                                   \
+    _rc;                                                                \
+})
+
+static inline void atomic_write64(uint64_t *p, uint64_t v)
+{
+    uint64_t w = *p, x;
+    while ( (x = __cmpxchg8b(p, w, v)) != w )
+        w = x;
+}
+
+#define mb()                    \
+    asm volatile ( "lock; addl $0,0(%%esp)" : : : "memory" )
+
+#define __save_flags(x)         \
+    asm volatile ( "pushfl ; popl %0" : "=g" (x) : )
+#define __restore_flags(x)      \
+    asm volatile ( "pushl %0 ; popfl" : : "g" (x) : "memory", "cc" )
+
+#define local_irq_save(x)       \
+    asm volatile ( "pushfl ; popl %0 ; cli" : "=g" (x) : : "memory" )
+#define local_irq_restore(x)    \
+    __restore_flags(x)
+
+#endif /* __X86_32_SYSTEM_H__ */
diff -r 9a9ddc04eea2 -r 53dc1cf50506 xen/include/asm-x86/x86_64/system.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/x86_64/system.h       Wed Nov 21 09:12:06 2007 -0700
@@ -0,0 +1,68 @@
+#ifndef __X86_64_SYSTEM_H__
+#define __X86_64_SYSTEM_H__
+
+#define cmpxchg(ptr,o,n)                                                \
+    ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),            \
+                                   (unsigned long)(n),sizeof(*(ptr))))
+
+/*
+ * This function causes value _o to be changed to _n at location _p.
+ * If this access causes a fault then we return 1, otherwise we return 0.
+ * If no fault occurs then _o is updated to the value we saw at _p. If this
+ * is the same as the initial value of _o then _n is written to location _p.
+ */
+#define __cmpxchg_user(_p,_o,_n,_isuff,_oppre,_regtype)                 \
+    asm volatile (                                                      \
+        "1: " LOCK_PREFIX "cmpxchg"_isuff" %"_oppre"2,%3\n"             \
+        "2:\n"                                                          \
+        ".section .fixup,\"ax\"\n"                                      \
+        "3:     movl $1,%1\n"                                           \
+        "       jmp 2b\n"                                               \
+        ".previous\n"                                                   \
+        ".section __ex_table,\"a\"\n"                                   \
+        "       .align 8\n"                                             \
+        "       .quad 1b,3b\n"                                          \
+        ".previous"                                                     \
+        : "=a" (_o), "=r" (_rc)                                         \
+        : _regtype (_n), "m" (*__xg((volatile void *)_p)), "0" (_o), "1" (0) \
+        : "memory");
+
+#define cmpxchg_user(_p,_o,_n)                                          \
+({                                                                      \
+    int _rc;                                                            \
+    switch ( sizeof(*(_p)) ) {                                          \
+    case 1:                                                             \
+        __cmpxchg_user(_p,_o,_n,"b","b","q");                           \
+        break;                                                          \
+    case 2:                                                             \
+        __cmpxchg_user(_p,_o,_n,"w","w","r");                           \
+        break;                                                          \
+    case 4:                                                             \
+        __cmpxchg_user(_p,_o,_n,"l","k","r");                           \
+        break;                                                          \
+    case 8:                                                             \
+        __cmpxchg_user(_p,_o,_n,"q","","r");                            \
+        break;                                                          \
+    }                                                                   \
+    _rc;                                                                \
+})
+
+static inline void atomic_write64(uint64_t *p, uint64_t v)
+{
+    *p = v;
+}
+
+#define mb()                    \
+    asm volatile ( "mfence" : : : "memory" )
+
+#define __save_flags(x)         \
+    asm volatile ( "pushfq ; popq %q0" : "=g" (x) : :"memory" )
+#define __restore_flags(x)      \
+    asm volatile ( "pushq %0 ; popfq" : : "g" (x) : "memory", "cc" )
+
+#define local_irq_save(x)       \
+    asm volatile ( "pushfq ; popq %0 ; cli" : "=g" (x) : : "memory" )
+#define local_irq_restore(x)    \
+    __restore_flags(x)
+
+#endif /* __X86_64_SYSTEM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.