[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [xenppc-unstable] merge
# HG changeset patch # User Hollis Blanchard <hollisb@xxxxxxxxxx> # Node ID 156a0963a1aed529e5c5517e7153b0ad64d99276 # Parent d3e181fa238b93c616bd010edd45f707c359cf99 # Parent c191c649cdb387e7ec573d218c9581c639c87700 merge --- linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 14 linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c | 7 linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c | 27 linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 288 ++++++++-- linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 26 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 173 ++++-- linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c | 14 linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h | 7 linux-2.6-xen-sparse/include/xen/public/privcmd.h | 16 tools/debugger/libxendebug/xendebug.c | 2 tools/firmware/vmxassist/vm86.c | 65 ++ tools/ioemu/hw/cirrus_vga.c | 12 tools/ioemu/vl.c | 15 tools/libxc/xc_core.c | 8 tools/libxc/xc_domain.c | 10 tools/libxc/xc_hvm_build.c | 6 tools/libxc/xc_ia64_stubs.c | 12 tools/libxc/xc_linux.c | 2 tools/libxc/xc_linux_build.c | 58 +- tools/libxc/xc_linux_restore.c | 210 ++++++- tools/libxc/xc_linux_save.c | 51 + tools/libxc/xc_load_aout9.c | 4 tools/libxc/xc_load_bin.c | 4 tools/libxc/xc_load_elf.c | 19 tools/libxc/xc_private.c | 62 +- tools/libxc/xenctrl.h | 19 tools/libxc/xg_private.h | 7 tools/libxc/xg_save_restore.h | 12 tools/tests/test_x86_emulator.c | 131 ++-- xen/arch/x86/domain.c | 21 xen/arch/x86/domain_build.c | 3 xen/arch/x86/hvm/vmx/vmx.c | 22 xen/arch/x86/hvm/vmx/x86_32/exits.S | 35 - xen/arch/x86/hvm/vmx/x86_64/exits.S | 71 +- xen/arch/x86/mm.c | 15 xen/arch/x86/x86_32/asm-offsets.c | 2 xen/arch/x86/x86_32/entry.S | 5 xen/arch/x86/x86_32/traps.c | 6 xen/arch/x86/x86_64/asm-offsets.c | 3 xen/arch/x86/x86_64/entry.S | 10 xen/arch/x86/x86_64/traps.c | 12 xen/arch/x86/x86_emulate.c | 4 xen/common/kernel.c | 5 xen/common/keyhandler.c | 5 xen/common/memory.c | 20 xen/include/public/arch-ia64.h | 3 xen/include/public/arch-x86_32.h | 19 xen/include/public/arch-x86_64.h | 21 xen/include/public/callback.h | 15 xen/include/public/dom0_ops.h | 56 - xen/include/public/grant_table.h | 2 xen/include/public/io/netif.h | 4 xen/include/public/io/ring.h | 16 xen/include/public/memory.h | 10 xen/include/public/xen.h | 22 55 files changed, 1228 insertions(+), 460 deletions(-) diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/arch/i386/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Tue Jun 06 13:25:31 2006 -0500 @@ -558,15 +558,11 @@ void __init paging_init(void) kmap_init(); - if (!xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->shared_info >= xen_start_info->nr_pages) { - /* Switch to the real shared_info page, and clear the - * dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); - HYPERVISOR_shared_info = - (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } + /* Switch to the real shared_info page, and clear the + * dummy page. */ + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); /* Setup mapping of lower 1st MB */ for (i = 0; i < NR_FIX_ISAMAPS; i++) diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Tue Jun 06 13:25:31 2006 -0500 @@ -665,13 +665,6 @@ void __init setup_arch(char **cmdline_p) setup_xen_features(); - if (xen_feature(XENFEAT_auto_translated_physmap) && - xen_start_info->shared_info < xen_start_info->nr_pages) { - HYPERVISOR_shared_info = - (shared_info_t *)__va(xen_start_info->shared_info); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Tue Jun 06 13:25:31 2006 -0500 @@ -666,7 +666,18 @@ void __meminit init_memory_mapping(unsig set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); } - BUG_ON(!after_bootmem && start_pfn != table_end); + if (!after_bootmem) { + BUG_ON(start_pfn != table_end); + /* + * Destroy the temporary mappings created above. Prevents + * overlap with modules area (if init mapping is very big). + */ + start = __START_KERNEL_map + (table_start << PAGE_SHIFT); + end = __START_KERNEL_map + (table_end << PAGE_SHIFT); + for (; start < end; start += PAGE_SIZE) + WARN_ON(HYPERVISOR_update_va_mapping( + start, __pte_ma(0), 0)); + } __flush_tlb_all(); } @@ -752,15 +763,11 @@ void __init paging_init(void) free_area_init_node(0, NODE_DATA(0), zones, __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); - if (!xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->shared_info >= xen_start_info->nr_pages) { - /* Switch to the real shared_info page, and clear the - * dummy page. */ - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); - HYPERVISOR_shared_info = - (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } + /* Switch to the real shared_info page, and clear the + * dummy page. */ + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); + memset(empty_zero_page, 0, sizeof(empty_zero_page)); init_mm.context.pinned = 1; diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Tue Jun 06 13:25:31 2006 -0500 @@ -458,6 +458,9 @@ inline static void net_tx_action_dealloc dc = dealloc_cons; dp = dealloc_prod; + /* Ensure we see all indexes enqueued by netif_idx_release(). */ + smp_rmb(); + /* * Free up any grants we have finished using */ @@ -487,6 +490,177 @@ inline static void net_tx_action_dealloc } } +static void netbk_tx_err(netif_t *netif, RING_IDX end) +{ + RING_IDX cons = netif->tx.req_cons; + + do { + netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons); + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + } while (++cons < end); + netif->tx.req_cons = cons; + netif_schedule_work(netif); + netif_put(netif); +} + +static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp, + int work_to_do) +{ + netif_tx_request_t *first = txp; + RING_IDX cons = netif->tx.req_cons; + int frags = 1; + + while (txp->flags & NETTXF_more_data) { + if (frags >= work_to_do) { + DPRINTK("Need more frags\n"); + return -frags; + } + + txp = RING_GET_REQUEST(&netif->tx, cons + frags); + if (txp->size > first->size) { + DPRINTK("Frags galore\n"); + return -frags; + } + + first->size -= txp->size; + frags++; + + if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { + DPRINTK("txp->offset: %x, size: %u\n", + txp->offset, txp->size); + return -frags; + } + } + + return frags; +} + +static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif, + struct sk_buff *skb, + gnttab_map_grant_ref_t *mop) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; + netif_tx_request_t *txp; + unsigned long pending_idx = *((u16 *)skb->data); + RING_IDX cons = netif->tx.req_cons + 1; + int i, start; + + /* Skip first skb fragment if it is on same page as header fragment. */ + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < shinfo->nr_frags; i++) { + txp = RING_GET_REQUEST(&netif->tx, cons++); + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)]; + + gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx), + GNTMAP_host_map | GNTMAP_readonly, + txp->gref, netif->domid); + + memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); + netif_get(netif); + pending_tx_info[pending_idx].netif = netif; + frags[i].page = (void *)pending_idx; + } + + return mop; +} + +static int netbk_tx_check_mop(struct sk_buff *skb, + gnttab_map_grant_ref_t **mopp) +{ + gnttab_map_grant_ref_t *mop = *mopp; + int pending_idx = *((u16 *)skb->data); + netif_t *netif = pending_tx_info[pending_idx].netif; + netif_tx_request_t *txp; + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + int i, err, start; + + /* Check status of header. */ + err = mop->status; + if (unlikely(err)) { + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + netif_put(netif); + } else { + set_phys_to_machine( + __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); + grant_tx_handle[pending_idx] = mop->handle; + } + + /* Skip first skb fragment if it is on same page as header fragment. */ + start = ((unsigned long)shinfo->frags[0].page == pending_idx); + + for (i = start; i < nr_frags; i++) { + int j, newerr; + + pending_idx = (unsigned long)shinfo->frags[i].page; + + /* Check error status: if okay then remember grant handle. */ + newerr = (++mop)->status; + if (likely(!newerr)) { + set_phys_to_machine( + __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT, + FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT)); + grant_tx_handle[pending_idx] = mop->handle; + /* Had a previous error? Invalidate this fragment. */ + if (unlikely(err)) + netif_idx_release(pending_idx); + continue; + } + + /* Error on this fragment: respond to client with an error. */ + txp = &pending_tx_info[pending_idx].req; + make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + netif_put(netif); + + /* Not the first error? Preceding frags already invalidated. */ + if (err) + continue; + + /* First error: invalidate header and preceding fragments. */ + pending_idx = *((u16 *)skb->data); + netif_idx_release(pending_idx); + for (j = start; j < i; j++) { + pending_idx = (unsigned long)shinfo->frags[i].page; + netif_idx_release(pending_idx); + } + + /* Remember the error: invalidate all subsequent fragments. */ + err = newerr; + } + + *mopp = mop + 1; + return err; +} + +static void netbk_fill_frags(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + int i; + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = shinfo->frags + i; + netif_tx_request_t *txp; + unsigned long pending_idx; + + pending_idx = (unsigned long)frag->page; + txp = &pending_tx_info[pending_idx].req; + frag->page = virt_to_page(MMAP_VADDR(pending_idx)); + frag->size = txp->size; + frag->page_offset = txp->offset; + + skb->len += txp->size; + skb->data_len += txp->size; + skb->truesize += txp->size; + } +} + /* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { @@ -504,7 +678,7 @@ static void net_tx_action(unsigned long net_tx_action_dealloc(); mop = tx_map_ops; - while ((NR_PENDING_REQS < MAX_PENDING_REQS) && + while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && !list_empty(&net_schedule_list)) { /* Get a netif from the list with work to do. */ ent = net_schedule_list.next; @@ -552,38 +726,44 @@ static void net_tx_action(unsigned long } netif->remaining_credit -= txreq.size; - netif->tx.req_cons++; - - netif_schedule_work(netif); - - if (unlikely(txreq.size < ETH_HLEN) || - unlikely(txreq.size > ETH_FRAME_LEN)) { + ret = netbk_count_requests(netif, &txreq, work_to_do); + if (unlikely(ret < 0)) { + netbk_tx_err(netif, i - ret); + continue; + } + i += ret; + + if (unlikely(ret > MAX_SKB_FRAGS + 1)) { + DPRINTK("Too many frags\n"); + netbk_tx_err(netif, i); + continue; + } + + if (unlikely(txreq.size < ETH_HLEN)) { DPRINTK("Bad packet size: %d\n", txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); continue; } /* No crossing a page as the payload mustn't fragment. */ - if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { + if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, (txreq.offset &~PAGE_MASK) + txreq.size); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); continue; } pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; - data_len = (txreq.size > PKT_PROT_LEN) ? + data_len = (txreq.size > PKT_PROT_LEN && + ret < MAX_SKB_FRAGS + 1) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len+16, GFP_ATOMIC); if (unlikely(skb == NULL)) { DPRINTK("Can't allocate a skb in start_xmit.\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + netbk_tx_err(netif, i); break; } @@ -600,9 +780,23 @@ static void net_tx_action(unsigned long pending_tx_info[pending_idx].netif = netif; *((u16 *)skb->data) = pending_idx; + __skb_put(skb, data_len); + + skb_shinfo(skb)->nr_frags = ret - 1; + if (data_len < txreq.size) { + skb_shinfo(skb)->nr_frags++; + skb_shinfo(skb)->frags[0].page = + (void *)(unsigned long)pending_idx; + } + __skb_queue_tail(&tx_queue, skb); pending_cons++; + + mop = netbk_get_requests(netif, skb, mop); + + netif->tx.req_cons = i; + netif_schedule_work(netif); if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) break; @@ -617,75 +811,56 @@ static void net_tx_action(unsigned long mop = tx_map_ops; while ((skb = __skb_dequeue(&tx_queue)) != NULL) { + netif_tx_request_t *txp; + pending_idx = *((u16 *)skb->data); netif = pending_tx_info[pending_idx].netif; - memcpy(&txreq, &pending_tx_info[pending_idx].req, - sizeof(txreq)); + txp = &pending_tx_info[pending_idx].req; /* Check the remap error code. */ - if (unlikely(mop->status)) { + if (unlikely(netbk_tx_check_mop(skb, &mop))) { printk(KERN_ALERT "#### netback grant fails\n"); - make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); - netif_put(netif); + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); - mop++; - pending_ring[MASK_PEND_IDX(pending_prod++)] = - pending_idx; continue; } - set_phys_to_machine( - __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT, - FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT)); - grant_tx_handle[pending_idx] = mop->handle; - - data_len = (txreq.size > PKT_PROT_LEN) ? - PKT_PROT_LEN : txreq.size; - - __skb_put(skb, data_len); + + data_len = skb->len; memcpy(skb->data, - (void *)(MMAP_VADDR(pending_idx)|txreq.offset), + (void *)(MMAP_VADDR(pending_idx)|txp->offset), data_len); - if (data_len < txreq.size) { + if (data_len < txp->size) { /* Append the packet payload as a fragment. */ - skb_shinfo(skb)->frags[0].page = - virt_to_page(MMAP_VADDR(pending_idx)); - skb_shinfo(skb)->frags[0].size = - txreq.size - data_len; - skb_shinfo(skb)->frags[0].page_offset = - txreq.offset + data_len; - skb_shinfo(skb)->nr_frags = 1; + txp->offset += data_len; + txp->size -= data_len; } else { /* Schedule a response immediately. */ netif_idx_release(pending_idx); } - - skb->data_len = txreq.size - data_len; - skb->len += skb->data_len; - skb->truesize += skb->data_len; - - skb->dev = netif->dev; - skb->protocol = eth_type_trans(skb, skb->dev); /* * Old frontends do not assert data_validated but we * can infer it from csum_blank so test both flags. */ - if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) { + if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->proto_data_valid = 1; } else { skb->ip_summed = CHECKSUM_NONE; skb->proto_data_valid = 0; } - skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank); - - netif->stats.rx_bytes += txreq.size; + skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank); + + netbk_fill_frags(skb); + + skb->dev = netif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); + + netif->stats.rx_bytes += skb->len; netif->stats.rx_packets++; netif_rx(skb); netif->dev->last_rx = jiffies; - - mop++; } } @@ -695,7 +870,10 @@ static void netif_idx_release(u16 pendin unsigned long flags; spin_lock_irqsave(&_lock, flags); - dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; + dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx; + /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */ + smp_wmb(); + dealloc_prod++; spin_unlock_irqrestore(&_lock, flags); tasklet_schedule(&net_tx_tasklet); diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Tue Jun 06 13:25:31 2006 -0500 @@ -69,6 +69,8 @@ static int netback_probe(struct xenbus_d static int netback_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { + const char *message; + xenbus_transaction_t xbt; int err; struct backend_info *be = kzalloc(sizeof(struct backend_info), GFP_KERNEL); @@ -86,6 +88,27 @@ static int netback_probe(struct xenbus_d if (err) goto fail; + do { + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto fail; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + } while (err == -EAGAIN); + + if (err) { + xenbus_dev_fatal(dev, err, "completing transaction"); + goto fail; + } + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) { goto fail; @@ -93,6 +116,9 @@ static int netback_probe(struct xenbus_d return 0; +abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); fail: DPRINTK("failed"); netback_remove(dev); diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Tue Jun 06 13:25:31 2006 -0500 @@ -45,6 +45,7 @@ #include <linux/bitops.h> #include <linux/ethtool.h> #include <linux/in.h> +#include <linux/if_ether.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/arp.h> @@ -173,6 +174,11 @@ static void xennet_sysfs_delif(struct ne #define xennet_sysfs_delif(dev) do { } while(0) #endif +static inline int xennet_can_sg(struct net_device *dev) +{ + return dev->features & NETIF_F_SG; +} + /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and @@ -307,8 +313,6 @@ again: goto destroy_ring; } - xenbus_switch_state(dev, XenbusStateConnected); - return 0; abort_transaction: @@ -370,12 +374,9 @@ static int setup_device(struct xenbus_de goto fail; memcpy(netdev->dev_addr, info->mac, ETH_ALEN); - network_connect(netdev); info->irq = bind_evtchn_to_irqhandler( info->evtchn, netif_int, SA_SAMPLE_RANDOM, netdev->name, netdev); - (void)send_fake_arp(netdev); - show_device(info); return 0; @@ -391,15 +392,24 @@ static void backend_changed(struct xenbu static void backend_changed(struct xenbus_device *dev, enum xenbus_state backend_state) { + struct netfront_info *np = dev->data; + struct net_device *netdev = np->netdev; + DPRINTK("\n"); switch (backend_state) { case XenbusStateInitialising: - case XenbusStateInitWait: case XenbusStateInitialised: case XenbusStateConnected: case XenbusStateUnknown: case XenbusStateClosed: + break; + + case XenbusStateInitWait: + network_connect(netdev); + xenbus_switch_state(dev, XenbusStateConnected); + (void)send_fake_arp(netdev); + show_device(np); break; case XenbusStateClosing: @@ -452,13 +462,17 @@ static int network_open(struct net_devic return 0; } +static inline int netfront_tx_slot_available(struct netfront_info *np) +{ + return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1; +} + static inline void network_maybe_wake_tx(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); if (unlikely(netif_queue_stopped(dev)) && - !RING_FULL(&np->tx) && - !gnttab_empty_grant_references(&np->gref_tx_head) && + netfront_tx_slot_available(np) && likely(netif_running(dev))) netif_wake_queue(dev); } @@ -485,7 +499,7 @@ static void network_tx_buf_gc(struct net printk(KERN_ALERT "network_tx_buf_gc: warning " "-- grant still in use by backend " "domain.\n"); - break; /* bail immediately */ + BUG(); } gnttab_end_foreign_access_ref( np->grant_tx_ref[id], GNTMAP_readonly); @@ -638,36 +652,95 @@ static void network_alloc_rx_buffers(str RING_PUSH_REQUESTS(&np->rx); } +static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, + struct netif_tx_request *tx) +{ + struct netfront_info *np = netdev_priv(dev); + char *data = skb->data; + unsigned long mfn; + RING_IDX prod = np->tx.req_prod_pvt; + int frags = skb_shinfo(skb)->nr_frags; + unsigned int offset = offset_in_page(data); + unsigned int len = skb_headlen(skb); + unsigned int id; + grant_ref_t ref; + int i; + + while (len > PAGE_SIZE - offset) { + tx->size = PAGE_SIZE - offset; + tx->flags |= NETTXF_more_data; + len -= tx->size; + data += tx->size; + offset = 0; + + id = get_id_from_freelist(np->tx_skbs); + np->tx_skbs[id] = skb_get(skb); + tx = RING_GET_REQUEST(&np->tx, prod++); + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + mfn = virt_to_mfn(data); + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; + tx->flags = 0; + } + + for (i = 0; i < frags; i++) { + skb_frag_t *frag = skb_shinfo(skb)->frags + i; + + tx->flags |= NETTXF_more_data; + + id = get_id_from_freelist(np->tx_skbs); + np->tx_skbs[id] = skb_get(skb); + tx = RING_GET_REQUEST(&np->tx, prod++); + tx->id = id; + ref = gnttab_claim_grant_reference(&np->gref_tx_head); + BUG_ON((signed short)ref < 0); + + mfn = pfn_to_mfn(page_to_pfn(frag->page)); + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = frag->page_offset; + tx->size = frag->size; + tx->flags = 0; + } + + np->tx.req_prod_pvt = prod; +} static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned short id; struct netfront_info *np = netdev_priv(dev); struct netif_tx_request *tx; + char *data = skb->data; RING_IDX i; grant_ref_t ref; unsigned long mfn; int notify; - - if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= - PAGE_SIZE)) { - struct sk_buff *nskb; - nskb = __dev_alloc_skb(skb->len, GFP_ATOMIC|__GFP_NOWARN); - if (unlikely(nskb == NULL)) - goto drop; - skb_put(nskb, skb->len); - memcpy(nskb->data, skb->data, skb->len); - /* Copy only the header fields we use in this driver. */ - nskb->dev = skb->dev; - nskb->ip_summed = skb->ip_summed; - nskb->proto_data_valid = skb->proto_data_valid; - dev_kfree_skb(skb); - skb = nskb; + int frags = skb_shinfo(skb)->nr_frags; + unsigned int offset = offset_in_page(data); + unsigned int len = skb_headlen(skb); + + frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; + if (unlikely(frags > MAX_SKB_FRAGS + 1)) { + printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", + frags); + dump_stack(); + goto drop; } spin_lock_irq(&np->tx_lock); - if (unlikely(!netif_carrier_ok(dev))) { + if (unlikely(!netif_carrier_ok(dev) || + (frags > 1 && !xennet_can_sg(dev)))) { spin_unlock_irq(&np->tx_lock); goto drop; } @@ -682,12 +755,12 @@ static int network_start_xmit(struct sk_ tx->id = id; ref = gnttab_claim_grant_reference(&np->gref_tx_head); BUG_ON((signed short)ref < 0); - mfn = virt_to_mfn(skb->data); + mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref( ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); tx->gref = np->grant_tx_ref[id] = ref; - tx->offset = (unsigned long)skb->data & ~PAGE_MASK; - tx->size = skb->len; + tx->offset = offset; + tx->size = len; tx->flags = 0; if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ @@ -696,14 +769,17 @@ static int network_start_xmit(struct sk_ tx->flags |= NETTXF_data_validated; np->tx.req_prod_pvt = i + 1; + + xennet_make_frags(skb, dev, tx); + tx->size = skb->len; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); if (notify) notify_remote_via_irq(np->irq); network_tx_buf_gc(dev); - if (RING_FULL(&np->tx) || - gnttab_empty_grant_references(&np->gref_tx_head)) + if (!netfront_tx_slot_available(np)) netif_stop_queue(dev); spin_unlock_irq(&np->tx_lock); @@ -963,12 +1039,46 @@ static struct net_device_stats *network_ return &np->stats; } +static int xennet_change_mtu(struct net_device *dev, int mtu) +{ + int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; + dev->mtu = mtu; + return 0; +} + +static int xennet_set_sg(struct net_device *dev, u32 data) +{ + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NULL, np->xbdev->otherend, "feature-sg", + "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } else if (dev->mtu > ETH_DATA_LEN) + dev->mtu = ETH_DATA_LEN; + + return ethtool_op_set_sg(dev, data); +} + +static void xennet_set_features(struct net_device *dev) +{ + xennet_set_sg(dev, 1); +} + static void network_connect(struct net_device *dev) { struct netfront_info *np; int i, requeue_idx; struct netif_tx_request *tx; struct sk_buff *skb; + + xennet_set_features(dev); np = netdev_priv(dev); spin_lock_irq(&np->tx_lock); @@ -1081,6 +1191,8 @@ static struct ethtool_ops network_ethtoo { .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = xennet_set_sg, }; #ifdef CONFIG_SYSFS @@ -1297,6 +1409,7 @@ static struct net_device * __devinit cre netdev->poll = netif_poll; netdev->set_multicast_list = network_set_multicast_list; netdev->uninit = netif_uninit; + netdev->change_mtu = xennet_change_mtu; netdev->weight = 64; netdev->features = NETIF_F_IP_CSUM; diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Tue Jun 06 13:25:31 2006 -0500 @@ -61,11 +61,11 @@ static int privcmd_ioctl(struct inode *i __asm__ __volatile__ ( "pushl %%ebx; pushl %%ecx; pushl %%edx; " "pushl %%esi; pushl %%edi; " - "movl 4(%%eax),%%ebx ;" - "movl 8(%%eax),%%ecx ;" - "movl 12(%%eax),%%edx ;" - "movl 16(%%eax),%%esi ;" - "movl 20(%%eax),%%edi ;" + "movl 8(%%eax),%%ebx ;" + "movl 16(%%eax),%%ecx ;" + "movl 24(%%eax),%%edx ;" + "movl 32(%%eax),%%esi ;" + "movl 40(%%eax),%%edi ;" "movl (%%eax),%%eax ;" "shll $5,%%eax ;" "addl $hypercall_page,%%eax ;" @@ -161,7 +161,7 @@ static int privcmd_ioctl(struct inode *i case IOCTL_PRIVCMD_MMAPBATCH: { privcmd_mmapbatch_t m; struct vm_area_struct *vma = NULL; - unsigned long __user *p; + xen_pfn_t __user *p; unsigned long addr, mfn; int i; @@ -210,7 +210,7 @@ static int privcmd_ioctl(struct inode *i batch_err: printk("batch_err ret=%d vma=%p addr=%lx " "num=%d arr=%p %lx-%lx\n", - ret, vma, m.addr, m.num, m.arr, + ret, vma, (unsigned long)m.addr, m.num, m.arr, vma ? vma->vm_start : 0, vma ? vma->vm_end : 0); break; } diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Tue Jun 06 13:25:31 2006 -0500 @@ -61,13 +61,6 @@ static void __init machine_specific_arch .address = { __KERNEL_CS, (unsigned long)nmi }, }; - if (xen_feature(XENFEAT_auto_translated_physmap) && - xen_start_info->shared_info < xen_start_info->nr_pages) { - HYPERVISOR_shared_info = - (shared_info_t *)__va(xen_start_info->shared_info); - memset(empty_zero_page, 0, sizeof(empty_zero_page)); - } - ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event); if (ret == 0) ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); diff -r d3e181fa238b -r 156a0963a1ae linux-2.6-xen-sparse/include/xen/public/privcmd.h --- a/linux-2.6-xen-sparse/include/xen/public/privcmd.h Fri Jun 02 12:54:22 2006 -0500 +++ b/linux-2.6-xen-sparse/include/xen/public/privcmd.h Tue Jun 06 13:25:31 2006 -0500 @@ -33,20 +33,22 @@ #ifndef __LINUX_PUBLIC_PRIVCMD_H__ #define __LINUX_PUBLIC_PRIVCMD_H__ +#include <linux/types.h> + #ifndef __user #define __user #endif typedef struct privcmd_hypercall { - unsigned long op; - unsigned long arg[5]; + __u64 op; + __u64 arg[5]; } privcmd_hypercall_t; typedef struct privcmd_mmap_entry { - unsigned long va; - unsigned long mfn; - unsigned long npages; + __u64 va; + __u64 mfn; + __u64 npages; } privcmd_mmap_entry_t; typedef struct privcmd_mmap { @@ -58,8 +60,8 @@ typedef struct privcmd_mmapbatch { typedef struct privcmd_mmapbatch { int num; /* number of pages to populate */ domid_t dom; /* target domain */ - unsigned long addr; /* virtual address */ - unsigned long __user *arr; /* array of mfns - top nibble set on err */ + __u64 addr; /* virtual address */ + xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ } privcmd_mmapbatch_t; /* diff -r d3e181fa238b -r 156a0963a1ae tools/debugger/libxendebug/xendebug.c --- a/tools/debugger/libxendebug/xendebug.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/debugger/libxendebug/xendebug.c Tue Jun 06 13:25:31 2006 -0500 @@ -57,7 +57,7 @@ typedef struct domain_context vcpu_guest_context_t context[MAX_VIRT_CPUS]; long total_pages; - unsigned long *page_array; + xen_pfn_t *page_array; unsigned long cr3_phys[MAX_VIRT_CPUS]; unsigned long *cr3_virt[MAX_VIRT_CPUS]; diff -r d3e181fa238b -r 156a0963a1ae tools/firmware/vmxassist/vm86.c --- a/tools/firmware/vmxassist/vm86.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/firmware/vmxassist/vm86.c Tue Jun 06 13:25:31 2006 -0500 @@ -36,6 +36,8 @@ static unsigned prev_eip = 0; enum vm86_mode mode = 0; + +static struct regs saved_rm_regs; #ifdef DEBUG int traceset = 0; @@ -795,6 +797,8 @@ protected_mode(struct regs *regs) oldctx.esp = regs->uesp; oldctx.eflags = regs->eflags; + memset(&saved_rm_regs, 0, sizeof(struct regs)); + /* reload all segment registers */ if (!load_seg(regs->cs, &oldctx.cs_base, &oldctx.cs_limit, &oldctx.cs_arbytes)) @@ -808,6 +812,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.es_base, &oldctx.es_limit, &oldctx.es_arbytes); oldctx.es_sel = 0; + saved_rm_regs.ves = regs->ves; } if (load_seg(regs->uss, &oldctx.ss_base, @@ -817,6 +822,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.ss_base, &oldctx.ss_limit, &oldctx.ss_arbytes); oldctx.ss_sel = 0; + saved_rm_regs.uss = regs->uss; } if (load_seg(regs->vds, &oldctx.ds_base, @@ -826,6 +832,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.ds_base, &oldctx.ds_limit, &oldctx.ds_arbytes); oldctx.ds_sel = 0; + saved_rm_regs.vds = regs->vds; } if (load_seg(regs->vfs, &oldctx.fs_base, @@ -835,6 +842,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.fs_base, &oldctx.fs_limit, &oldctx.fs_arbytes); oldctx.fs_sel = 0; + saved_rm_regs.vfs = regs->vfs; } if (load_seg(regs->vgs, &oldctx.gs_base, @@ -844,6 +852,7 @@ protected_mode(struct regs *regs) load_seg(0, &oldctx.gs_base, &oldctx.gs_limit, &oldctx.gs_arbytes); oldctx.gs_sel = 0; + saved_rm_regs.vgs = regs->vgs; } /* initialize jump environment to warp back to protected mode */ @@ -880,16 +889,22 @@ real_mode(struct regs *regs) if (regs->uss >= HIGHMEM) panic("%%ss 0x%lx higher than 1MB", regs->uss); regs->uss = address(regs, regs->uss, 0) >> 4; + } else { + regs->uss = saved_rm_regs.uss; } if (regs->vds != 0) { if (regs->vds >= HIGHMEM) panic("%%ds 0x%lx higher than 1MB", regs->vds); regs->vds = address(regs, regs->vds, 0) >> 4; + } else { + regs->vds = saved_rm_regs.vds; } if (regs->ves != 0) { if (regs->ves >= HIGHMEM) panic("%%es 0x%lx higher than 1MB", regs->ves); regs->ves = address(regs, regs->ves, 0) >> 4; + } else { + regs->ves = saved_rm_regs.ves; } /* this should get us into 16-bit mode */ @@ -971,6 +986,39 @@ jmpl(struct regs *regs, int prefix) } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */ eip = (prefix & DATA32) ? fetch32(regs) : fetch16(regs); cs = fetch16(regs); + + TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); + + regs->cs = cs; + regs->eip = eip; + set_mode(regs, VM86_REAL); + } else + panic("jmpl"); +} + +static void +jmpl_indirect(struct regs *regs, int prefix, unsigned modrm) +{ + unsigned n = regs->eip; + unsigned cs, eip; + unsigned addr; + + addr = operand(prefix, regs, modrm); + + if (mode == VM86_REAL_TO_PROTECTED) { /* jump to protected mode */ + eip = (prefix & DATA32) ? read32(addr) : read16(addr); + addr += (prefix & DATA32) ? 4 : 2; + cs = read16(addr); + + TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); + + regs->cs = cs; + regs->eip = eip; + set_mode(regs, VM86_PROTECTED); + } else if (mode == VM86_PROTECTED_TO_REAL) { /* jump to real mode */ + eip = (prefix & DATA32) ? read32(addr) : read16(addr); + addr += (prefix & DATA32) ? 4 : 2; + cs = read16(addr); TRACE((regs, (regs->eip - n) + 1, "jmpl 0x%x:0x%x", cs, eip)); @@ -1306,6 +1354,23 @@ opcode(struct regs *regs) } goto invalid; + case 0xFF: /* jmpl (indirect) */ + if ((mode == VM86_REAL_TO_PROTECTED) || + (mode == VM86_PROTECTED_TO_REAL)) { + unsigned modrm = fetch8(regs); + + switch((modrm >> 3) & 7) { + case 5: + jmpl_indirect(regs, prefix, modrm); + return OPC_INVALID; + + default: + break; + } + + } + goto invalid; + case 0xEB: /* short jump */ if ((mode == VM86_REAL_TO_PROTECTED) || (mode == VM86_PROTECTED_TO_REAL)) { diff -r d3e181fa238b -r 156a0963a1ae tools/ioemu/hw/cirrus_vga.c --- a/tools/ioemu/hw/cirrus_vga.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/ioemu/hw/cirrus_vga.c Tue Jun 06 13:25:31 2006 -0500 @@ -2462,7 +2462,7 @@ extern FILE *logfile; extern FILE *logfile; static void * set_vram_mapping(unsigned long begin, unsigned long end) { - unsigned long * extent_start = NULL; + xen_pfn_t *extent_start = NULL; unsigned long nr_extents; void *vram_pointer = NULL; int i; @@ -2473,14 +2473,14 @@ static void * set_vram_mapping(unsigned end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; nr_extents = (end - begin) >> TARGET_PAGE_BITS; - extent_start = malloc(sizeof(unsigned long) * nr_extents ); + extent_start = malloc(sizeof(xen_pfn_t) * nr_extents ); if (extent_start == NULL) { fprintf(stderr, "Failed malloc on set_vram_mapping\n"); return NULL; } - memset(extent_start, 0, sizeof(unsigned long) * nr_extents); + memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents); for (i = 0; i < nr_extents; i++) { @@ -2508,7 +2508,7 @@ static void * set_vram_mapping(unsigned static int unset_vram_mapping(unsigned long begin, unsigned long end) { - unsigned long * extent_start = NULL; + xen_pfn_t *extent_start = NULL; unsigned long nr_extents; int i; @@ -2519,7 +2519,7 @@ static int unset_vram_mapping(unsigned l end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK; nr_extents = (end - begin) >> TARGET_PAGE_BITS; - extent_start = malloc(sizeof(unsigned long) * nr_extents ); + extent_start = malloc(sizeof(xen_pfn_t) * nr_extents ); if (extent_start == NULL) { @@ -2527,7 +2527,7 @@ static int unset_vram_mapping(unsigned l return -1; } - memset(extent_start, 0, sizeof(unsigned long) * nr_extents); + memset(extent_start, 0, sizeof(xen_pfn_t) * nr_extents); for (i = 0; i < nr_extents; i++) extent_start[i] = (begin + (i * TARGET_PAGE_SIZE)) >> TARGET_PAGE_BITS; diff -r d3e181fa238b -r 156a0963a1ae tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/ioemu/vl.c Tue Jun 06 13:25:31 2006 -0500 @@ -2458,7 +2458,7 @@ int unset_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err = 0; xc_dominfo_t info; @@ -2491,7 +2491,7 @@ int set_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { xc_dominfo_t info; int err = 0; @@ -2557,7 +2557,8 @@ int main(int argc, char **argv) int serial_device_index; char qemu_dm_logfilename[64]; const char *loadvm = NULL; - unsigned long nr_pages, *page_array; + unsigned long nr_pages; + xen_pfn_t *page_array; extern void *shared_page; #if !defined(CONFIG_SOFTMMU) @@ -3023,8 +3024,8 @@ int main(int argc, char **argv) xc_handle = xc_interface_open(); - if ( (page_array = (unsigned long *) - malloc(nr_pages * sizeof(unsigned long))) == NULL) + if ( (page_array = (xen_pfn_t *) + malloc(nr_pages * sizeof(xen_pfn_t))) == NULL) { fprintf(logfile, "malloc returned error %d\n", errno); exit(-1); @@ -3079,8 +3080,8 @@ int main(int argc, char **argv) page_array[0]); #endif - fprintf(logfile, "shared page at pfn:%lx, mfn: %lx\n", (nr_pages-1), - (page_array[nr_pages - 1])); + fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n", (nr_pages-1), + (uint64_t)(page_array[nr_pages - 1])); /* we always create the cdrom drive, even if no disk is there */ bdrv_init(); diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_core.c --- a/tools/libxc/xc_core.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_core.c Tue Jun 06 13:25:31 2006 -0500 @@ -28,7 +28,7 @@ xc_domain_dumpcore_via_callback(int xc_h dumpcore_rtn_t dump_rtn) { unsigned long nr_pages; - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; xc_dominfo_t info; int i, nr_vcpus = 0; char *dump_mem, *dump_mem_start = NULL; @@ -70,7 +70,7 @@ xc_domain_dumpcore_via_callback(int xc_h sizeof(vcpu_guest_context_t)*nr_vcpus; dummy_len = (sizeof(struct xc_core_header) + (sizeof(vcpu_guest_context_t) * nr_vcpus) + - (nr_pages * sizeof(unsigned long))); + (nr_pages * sizeof(xen_pfn_t))); header.xch_pages_offset = round_pgup(dummy_len); sts = dump_rtn(args, (char *)&header, sizeof(struct xc_core_header)); @@ -81,7 +81,7 @@ xc_domain_dumpcore_via_callback(int xc_h if ( sts != 0 ) goto error_out; - if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) + if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ) { printf("Could not allocate memory\n"); goto error_out; @@ -91,7 +91,7 @@ xc_domain_dumpcore_via_callback(int xc_h printf("Could not get the page frame list\n"); goto error_out; } - sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(unsigned long)); + sts = dump_rtn(args, (char *)page_array, nr_pages * sizeof(xen_pfn_t)); if ( sts != 0 ) goto error_out; diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_domain.c Tue Jun 06 13:25:31 2006 -0500 @@ -291,7 +291,7 @@ int xc_domain_memory_increase_reservatio unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err; struct xen_memory_reservation reservation = { @@ -324,7 +324,7 @@ int xc_domain_memory_decrease_reservatio uint32_t domid, unsigned long nr_extents, unsigned int extent_order, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err; struct xen_memory_reservation reservation = { @@ -363,7 +363,7 @@ int xc_domain_memory_populate_physmap(in unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start) + xen_pfn_t *extent_start) { int err; struct xen_memory_reservation reservation = { @@ -392,8 +392,8 @@ int xc_domain_translate_gpfn_list(int xc int xc_domain_translate_gpfn_list(int xc_handle, uint32_t domid, unsigned long nr_gpfns, - unsigned long *gpfn_list, - unsigned long *mfn_list) + xen_pfn_t *gpfn_list, + xen_pfn_t *mfn_list) { struct xen_translate_gpfn_list op = { .domid = domid, diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_hvm_build.c Tue Jun 06 13:25:31 2006 -0500 @@ -135,7 +135,7 @@ static void set_hvm_info_checksum(struct * hvmloader will use this info to set BIOS accordingly */ static int set_hvm_info(int xc_handle, uint32_t dom, - unsigned long *pfn_list, unsigned int vcpus, + xen_pfn_t *pfn_list, unsigned int vcpus, unsigned int pae, unsigned int acpi, unsigned int apic) { char *va_map; @@ -178,7 +178,7 @@ static int setup_guest(int xc_handle, unsigned int store_evtchn, unsigned long *store_mfn) { - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; unsigned long count, i; unsigned long long ptr; xc_mmu_t *mmu = NULL; @@ -223,7 +223,7 @@ static int setup_guest(int xc_handle, goto error_out; } - if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) + if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ) { PERROR("Could not allocate memory.\n"); goto error_out; diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_ia64_stubs.c Tue Jun 06 13:25:31 2006 -0500 @@ -57,7 +57,7 @@ xc_plan9_build(int xc_handle, int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + xen_pfn_t *pfn_buf, unsigned int start_page, unsigned int nr_pages) { @@ -65,7 +65,7 @@ int xc_ia64_get_pfn_list(int xc_handle, int num_pfns,ret; unsigned int __start_page, __nr_pages; unsigned long max_pfns; - unsigned long *__pfn_buf; + xen_pfn_t *__pfn_buf; __start_page = start_page; __nr_pages = nr_pages; @@ -80,7 +80,7 @@ int xc_ia64_get_pfn_list(int xc_handle, set_xen_guest_handle(op.u.getmemlist.buffer, __pfn_buf); if ( (max_pfns != -1UL) - && mlock(__pfn_buf, __nr_pages * sizeof(unsigned long)) != 0 ) + && mlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)) != 0 ) { PERROR("Could not lock pfn list buffer"); return -1; @@ -89,7 +89,7 @@ int xc_ia64_get_pfn_list(int xc_handle, ret = do_dom0_op(xc_handle, &op); if (max_pfns != -1UL) - (void)munlock(__pfn_buf, __nr_pages * sizeof(unsigned long)); + (void)munlock(__pfn_buf, __nr_pages * sizeof(xen_pfn_t)); if (max_pfns == -1UL) return 0; @@ -122,10 +122,10 @@ int xc_ia64_copy_to_domain_pages(int xc_ { // N.B. gva should be page aligned - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; int i; - if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ){ + if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ){ PERROR("Could not allocate memory"); goto error_out; } diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux.c --- a/tools/libxc/xc_linux.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_linux.c Tue Jun 06 13:25:31 2006 -0500 @@ -28,7 +28,7 @@ int xc_interface_close(int xc_handle) } void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot, - unsigned long *arr, int num) + xen_pfn_t *arr, int num) { privcmd_mmapbatch_t ioctlx; void *addr; diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_linux_build.c Tue Jun 06 13:25:31 2006 -0500 @@ -10,6 +10,7 @@ #include "xc_aout9.h" #include <stdlib.h> #include <unistd.h> +#include <inttypes.h> #include <zlib.h> #if defined(__i386__) @@ -136,7 +137,7 @@ int load_initrd(int xc_handle, domid_t d int load_initrd(int xc_handle, domid_t dom, struct initrd_info *initrd, unsigned long physbase, - unsigned long *phys_to_mach) + xen_pfn_t *phys_to_mach) { char page[PAGE_SIZE]; unsigned long pfn_start, pfn, nr_pages; @@ -189,7 +190,7 @@ static int setup_pg_tables(int xc_handle vcpu_guest_context_t *ctxt, unsigned long dsi_v_start, unsigned long v_end, - unsigned long *page_array, + xen_pfn_t *page_array, unsigned long vpt_start, unsigned long vpt_end, unsigned shadow_mode_enabled) @@ -251,19 +252,35 @@ static int setup_pg_tables_pae(int xc_ha vcpu_guest_context_t *ctxt, unsigned long dsi_v_start, unsigned long v_end, - unsigned long *page_array, + xen_pfn_t *page_array, unsigned long vpt_start, unsigned long vpt_end, - unsigned shadow_mode_enabled) + unsigned shadow_mode_enabled, + unsigned pae_mode) { l1_pgentry_64_t *vl1tab = NULL, *vl1e = NULL; l2_pgentry_64_t *vl2tab = NULL, *vl2e = NULL; l3_pgentry_64_t *vl3tab = NULL, *vl3e = NULL; uint64_t l1tab, l2tab, l3tab, pl1tab, pl2tab, pl3tab; - unsigned long ppt_alloc, count; + unsigned long ppt_alloc, count, nmfn; /* First allocate page for page dir. */ ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT; + + if ( pae_mode == PAEKERN_extended_cr3 ) + { + ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3); + } + else if ( page_array[ppt_alloc] > 0xfffff ) + { + nmfn = xc_make_page_below_4G(xc_handle, dom, page_array[ppt_alloc]); + if ( nmfn == 0 ) + { + fprintf(stderr, "Couldn't get a page below 4GB :-(\n"); + goto error_out; + } + page_array[ppt_alloc] = nmfn; + } alloc_pt(l3tab, vl3tab, pl3tab); vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)]; @@ -340,7 +357,7 @@ static int setup_pg_tables_64(int xc_han vcpu_guest_context_t *ctxt, unsigned long dsi_v_start, unsigned long v_end, - unsigned long *page_array, + xen_pfn_t *page_array, unsigned long vpt_start, unsigned long vpt_end, int shadow_mode_enabled) @@ -451,7 +468,7 @@ static int setup_guest(int xc_handle, unsigned int console_evtchn, unsigned long *console_mfn, uint32_t required_features[XENFEAT_NR_SUBMAPS]) { - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; struct load_funcs load_funcs; struct domain_setup_info dsi; unsigned long vinitrd_start; @@ -478,7 +495,7 @@ static int setup_guest(int xc_handle, start_page = dsi.v_start >> PAGE_SHIFT; pgnr = (v_end - dsi.v_start) >> PAGE_SHIFT; - if ( (page_array = malloc(pgnr * sizeof(unsigned long))) == NULL ) + if ( (page_array = malloc(pgnr * sizeof(xen_pfn_t))) == NULL ) { PERROR("Could not allocate memory"); goto error_out; @@ -579,11 +596,11 @@ static int compat_check(int xc_handle, s } if (strstr(xen_caps, "xen-3.0-x86_32p")) { - if (!dsi->pae_kernel) { + if (dsi->pae_kernel == PAEKERN_no) { ERROR("Non PAE-kernel on PAE host."); return 0; } - } else if (dsi->pae_kernel) { + } else if (dsi->pae_kernel != PAEKERN_no) { ERROR("PAE-kernel on non-PAE host."); return 0; } @@ -606,7 +623,7 @@ static int setup_guest(int xc_handle, unsigned int console_evtchn, unsigned long *console_mfn, uint32_t required_features[XENFEAT_NR_SUBMAPS]) { - unsigned long *page_array = NULL; + xen_pfn_t *page_array = NULL; unsigned long count, i, hypercall_pfn; start_info_t *start_info; shared_info_t *shared_info; @@ -617,7 +634,7 @@ static int setup_guest(int xc_handle, unsigned long nr_pt_pages; unsigned long physmap_pfn; - unsigned long *physmap, *physmap_e; + xen_pfn_t *physmap, *physmap_e; struct load_funcs load_funcs; struct domain_setup_info dsi; @@ -673,7 +690,8 @@ static int setup_guest(int xc_handle, for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ ) { - if ( (supported_features[i]&required_features[i]) != required_features[i] ) + if ( (supported_features[i] & required_features[i]) != + required_features[i] ) { ERROR("Guest kernel does not support a required feature."); goto error_out; @@ -719,7 +737,7 @@ static int setup_guest(int xc_handle, (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ ((_l) & ~((1UL<<(_s))-1))) >> (_s)) #if defined(__i386__) - if ( dsi.pae_kernel ) + if ( dsi.pae_kernel != PAEKERN_no ) { if ( (1 + /* # L3 */ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT_PAE) + /* # L2 */ @@ -797,11 +815,11 @@ static int setup_guest(int xc_handle, /* setup page tables */ #if defined(__i386__) - if (dsi.pae_kernel) + if (dsi.pae_kernel != PAEKERN_no) rc = setup_pg_tables_pae(xc_handle, dom, ctxt, dsi.v_start, v_end, page_array, vpt_start, vpt_end, - shadow_mode_enabled); + shadow_mode_enabled, dsi.pae_kernel); else rc = setup_pg_tables(xc_handle, dom, ctxt, dsi.v_start, v_end, @@ -824,7 +842,7 @@ static int setup_guest(int xc_handle, */ if ( !shadow_mode_enabled ) { - if ( dsi.pae_kernel ) + if ( dsi.pae_kernel != PAEKERN_no ) { if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE, xen_cr3_to_pfn(ctxt->ctrlreg[3]), dom) ) @@ -865,8 +883,8 @@ static int setup_guest(int xc_handle, ((uint64_t)page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, count) ) { - fprintf(stderr,"m2p update failure p=%lx m=%lx\n", - count, page_array[count]); + fprintf(stderr,"m2p update failure p=%lx m=%"PRIx64"\n", + count, (uint64_t)page_array[count]); munmap(physmap, PAGE_SIZE); goto error_out; } @@ -958,7 +976,7 @@ static int setup_guest(int xc_handle, rc = xc_version(xc_handle, XENVER_version, NULL); sprintf(start_info->magic, "xen-%i.%i-x86_%d%s", rc >> 16, rc & (0xFFFF), (unsigned int)sizeof(long)*8, - dsi.pae_kernel ? "p" : ""); + (dsi.pae_kernel != PAEKERN_no) ? "p" : ""); start_info->nr_pages = nr_pages; start_info->shared_info = guest_shared_info_mfn << PAGE_SHIFT; start_info->flags = flags; diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux_restore.c --- a/tools/libxc/xc_linux_restore.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_linux_restore.c Tue Jun 06 13:25:31 2006 -0500 @@ -25,10 +25,10 @@ static unsigned long max_pfn; static unsigned long max_pfn; /* Live mapping of the table mapping each PFN to its current MFN. */ -static unsigned long *live_p2m = NULL; +static xen_pfn_t *live_p2m = NULL; /* A table mapping each PFN to its new MFN. */ -static unsigned long *p2m = NULL; +static xen_pfn_t *p2m = NULL; static ssize_t @@ -108,7 +108,7 @@ int xc_linux_restore(int xc_handle, int unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOM0_OP; - int rc = 1, i, n; + int rc = 1, i, n, pae_extended_cr3 = 0; unsigned long mfn, pfn; unsigned int prev_pc, this_pc; int verify = 0; @@ -126,7 +126,7 @@ int xc_linux_restore(int xc_handle, int unsigned long *pfn_type = NULL; /* A table of MFNs to map in the current region */ - unsigned long *region_mfn = NULL; + xen_pfn_t *region_mfn = NULL; /* Types of the pfns in the current region */ unsigned long region_pfn_type[MAX_BATCH_SIZE]; @@ -135,7 +135,7 @@ int xc_linux_restore(int xc_handle, int unsigned long *page = NULL; /* A copy of the pfn-to-mfn table frame list. */ - unsigned long *p2m_frame_list = NULL; + xen_pfn_t *p2m_frame_list = NULL; /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; @@ -162,30 +162,88 @@ int xc_linux_restore(int xc_handle, int return 1; } - if (mlock(&ctxt, sizeof(ctxt))) { /* needed for build dom0 op, but might as well do early */ ERR("Unable to mlock ctxt"); return 1; } - - /* Read the saved P2M frame list */ - if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) { + if (!(p2m_frame_list = malloc(P2M_FL_SIZE))) { ERR("Couldn't allocate p2m_frame_list array"); goto out; } - if (!read_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + /* Read first entry of P2M list, or extended-info signature (~0UL). */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERR("read extended-info signature failed"); + goto out; + } + + if (p2m_frame_list[0] == ~0UL) { + uint32_t tot_bytes; + + /* Next 4 bytes: total size of following extended info. */ + if (!read_exact(io_fd, &tot_bytes, sizeof(tot_bytes))) { + ERR("read extended-info size failed"); + goto out; + } + + while (tot_bytes) { + uint32_t chunk_bytes; + char chunk_sig[4]; + + /* 4-character chunk signature + 4-byte remaining chunk size. */ + if (!read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || + !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes))) { + ERR("read extended-info chunk signature failed"); + goto out; + } + tot_bytes -= 8; + + /* VCPU context structure? */ + if (!strncmp(chunk_sig, "vcpu", 4)) { + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERR("read extended-info vcpu context failed"); + goto out; + } + tot_bytes -= sizeof(struct vcpu_guest_context); + chunk_bytes -= sizeof(struct vcpu_guest_context); + + if (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3)) + pae_extended_cr3 = 1; + } + + /* Any remaining bytes of this chunk: read and discard. */ + while (chunk_bytes) { + unsigned long sz = chunk_bytes; + if ( sz > P2M_FL_SIZE ) + sz = P2M_FL_SIZE; + if (!read_exact(io_fd, p2m_frame_list, sz)) { + ERR("read-and-discard extended-info chunk bytes failed"); + goto out; + } + chunk_bytes -= sz; + tot_bytes -= sz; + } + } + + /* Now read the real first entry of P2M list. */ + if (!read_exact(io_fd, p2m_frame_list, sizeof(long))) { + ERR("read first entry of p2m_frame_list failed"); + goto out; + } + } + + /* First entry is already read into the p2m array. */ + if (!read_exact(io_fd, &p2m_frame_list[1], P2M_FL_SIZE - sizeof(long))) { ERR("read p2m_frame_list failed"); goto out; } - /* We want zeroed memory so use calloc rather than malloc. */ - p2m = calloc(max_pfn, sizeof(unsigned long)); + p2m = calloc(max_pfn, sizeof(xen_pfn_t)); pfn_type = calloc(max_pfn, sizeof(unsigned long)); - region_mfn = calloc(MAX_BATCH_SIZE, sizeof(unsigned long)); + region_mfn = calloc(MAX_BATCH_SIZE, sizeof(xen_pfn_t)); if ((p2m == NULL) || (pfn_type == NULL) || (region_mfn == NULL)) { ERR("memory alloc failed"); @@ -193,7 +251,7 @@ int xc_linux_restore(int xc_handle, int goto out; } - if (mlock(region_mfn, sizeof(unsigned long) * MAX_BATCH_SIZE)) { + if (mlock(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE)) { ERR("Could not mlock region_mfn"); goto out; } @@ -331,17 +389,27 @@ int xc_linux_restore(int xc_handle, int ** A page table page - need to 'uncanonicalize' it, i.e. ** replace all the references to pfns with the corresponding ** mfns for the new domain. + ** + ** On PAE we need to ensure that PGDs are in MFNs < 4G, and + ** so we may need to update the p2m after the main loop. + ** Hence we defer canonicalization of L1s until then. */ - if(!uncanonicalize_pagetable(pagetype, page)) { - /* - ** Failing to uncanonicalize a page table can be ok - ** under live migration since the pages type may have - ** changed by now (and we'll get an update later). - */ - DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", - pagetype >> 28, pfn, mfn); - nraces++; - continue; + if ((pt_levels != 3) || + pae_extended_cr3 || + (pagetype != L1TAB)) { + + if (!uncanonicalize_pagetable(pagetype, page)) { + /* + ** Failing to uncanonicalize a page table can be ok + ** under live migration since the pages type may have + ** changed by now (and we'll get an update later). + */ + DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", + pagetype >> 28, pfn, mfn); + nraces++; + continue; + } + } } else if(pagetype != NOTAB) { @@ -389,6 +457,100 @@ int xc_linux_restore(int xc_handle, int } DPRINTF("Received all pages (%d races)\n", nraces); + + if ((pt_levels == 3) && !pae_extended_cr3) { + + /* + ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This + ** is a little awkward and involves (a) finding all such PGDs and + ** replacing them with 'lowmem' versions; (b) upating the p2m[] + ** with the new info; and (c) canonicalizing all the L1s using the + ** (potentially updated) p2m[]. + ** + ** This is relatively slow (and currently involves two passes through + ** the pfn_type[] array), but at least seems to be correct. May wish + ** to consider more complex approaches to optimize this later. + */ + + int j, k; + + /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ + for (i = 0; i < max_pfn; i++) { + + if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) { + + unsigned long new_mfn; + uint64_t l3ptes[4]; + uint64_t *l3tab; + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, p2m[i]); + + for(j = 0; j < 4; j++) + l3ptes[j] = l3tab[j]; + + munmap(l3tab, PAGE_SIZE); + + if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) { + ERR("Couldn't get a page below 4GB :-("); + goto out; + } + + p2m[i] = new_mfn; + if (xc_add_mmu_update(xc_handle, mmu, + (((unsigned long long)new_mfn) + << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE, i)) { + ERR("Couldn't m2p on PAE root pgdir"); + goto out; + } + + l3tab = (uint64_t *) + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ | PROT_WRITE, p2m[i]); + + for(j = 0; j < 4; j++) + l3tab[j] = l3ptes[j]; + + munmap(l3tab, PAGE_SIZE); + + } + } + + /* Second pass: find all L1TABs and uncanonicalize them */ + j = 0; + + for(i = 0; i < max_pfn; i++) { + + if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) { + region_mfn[j] = p2m[i]; + j++; + } + + if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) { + + if (!(region_base = xc_map_foreign_batch( + xc_handle, dom, PROT_READ | PROT_WRITE, + region_mfn, j))) { + ERR("map batch failed"); + goto out; + } + + for(k = 0; k < j; k++) { + if(!uncanonicalize_pagetable(L1TAB, + region_base + k*PAGE_SIZE)) { + ERR("failed uncanonicalize pt!"); + goto out; + } + } + + munmap(region_base, j*PAGE_SIZE); + j = 0; + } + } + + } if (xc_finish_mmu_updates(xc_handle, mmu)) { diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_linux_save.c --- a/tools/libxc/xc_linux_save.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_linux_save.c Tue Jun 06 13:25:31 2006 -0500 @@ -40,10 +40,10 @@ static unsigned long max_pfn; static unsigned long max_pfn; /* Live mapping of the table mapping each PFN to its current MFN. */ -static unsigned long *live_p2m = NULL; +static xen_pfn_t *live_p2m = NULL; /* Live mapping of system MFN to PFN table. */ -static unsigned long *live_m2p = NULL; +static xen_pfn_t *live_m2p = NULL; /* grep fodder: machine_to_phys */ @@ -501,22 +501,22 @@ void canonicalize_pagetable(unsigned lon -static unsigned long *xc_map_m2p(int xc_handle, +static xen_pfn_t *xc_map_m2p(int xc_handle, unsigned long max_mfn, int prot) { struct xen_machphys_mfn_list xmml; privcmd_mmap_entry_t *entries; unsigned long m2p_chunks, m2p_size; - unsigned long *m2p; - unsigned long *extent_start; + xen_pfn_t *m2p; + xen_pfn_t *extent_start; int i, rc; m2p_size = M2P_SIZE(max_mfn); m2p_chunks = M2P_CHUNKS(max_mfn); xmml.max_extents = m2p_chunks; - if (!(extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) { + if (!(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t)))) { ERR("failed to allocate space for m2p mfns"); return NULL; } @@ -583,11 +583,11 @@ int xc_linux_save(int xc_handle, int io_ char page[PAGE_SIZE]; /* Double and single indirect references to the live P2M table */ - unsigned long *live_p2m_frame_list_list = NULL; - unsigned long *live_p2m_frame_list = NULL; + xen_pfn_t *live_p2m_frame_list_list = NULL; + xen_pfn_t *live_p2m_frame_list = NULL; /* A copy of the pfn-to-mfn table frame list. */ - unsigned long *p2m_frame_list = NULL; + xen_pfn_t *p2m_frame_list = NULL; /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; @@ -712,11 +712,11 @@ int xc_linux_save(int xc_handle, int io_ memcpy(p2m_frame_list, live_p2m_frame_list, P2M_FL_SIZE); /* Canonicalise the pfn-to-mfn table frame-number list. */ - for (i = 0; i < max_pfn; i += ulpp) { - if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) { + for (i = 0; i < max_pfn; i += fpp) { + if (!translate_mfn_to_pfn(&p2m_frame_list[i/fpp])) { ERR("Frame# in pfn-to-mfn frame list is not in pseudophys"); - ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp, - p2m_frame_list[i/ulpp]); + ERR("entry %d: p2m_frame_list[%ld] is 0x%"PRIx64, i, i/fpp, + (uint64_t)p2m_frame_list[i/fpp]); goto out; } } @@ -818,12 +818,33 @@ int xc_linux_save(int xc_handle, int io_ /* Start writing out the saved-domain record. */ - if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { ERR("write: max_pfn"); goto out; } - if(!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { + /* + * Write an extended-info structure to inform the restore code that + * a PAE guest understands extended CR3 (PDPTs above 4GB). Turns off + * slow paths in the restore code. + */ + if ((pt_levels == 3) && + (ctxt.vm_assist & (1UL << VMASST_TYPE_pae_extended_cr3))) { + unsigned long signature = ~0UL; + uint32_t tot_sz = sizeof(struct vcpu_guest_context) + 8; + uint32_t chunk_sz = sizeof(struct vcpu_guest_context); + char chunk_sig[] = "vcpu"; + if (!write_exact(io_fd, &signature, sizeof(signature)) || + !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) || + !write_exact(io_fd, &chunk_sig, 4) || + !write_exact(io_fd, &chunk_sz, sizeof(chunk_sz)) || + !write_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERR("write: extended info"); + goto out; + } + } + + if (!write_exact(io_fd, p2m_frame_list, P2M_FL_SIZE)) { ERR("write: p2m_frame_list"); goto out; } diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_load_aout9.c --- a/tools/libxc/xc_load_aout9.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_load_aout9.c Tue Jun 06 13:25:31 2006 -0500 @@ -17,7 +17,7 @@ #define KOFFSET(_p) ((_p)&~KZERO) static int parseaout9image(const char *, unsigned long, struct domain_setup_info *); -static int loadaout9image(const char *, unsigned long, int, uint32_t, unsigned long *, struct domain_setup_info *); +static int loadaout9image(const char *, unsigned long, int, uint32_t, xen_pfn_t *, struct domain_setup_info *); static void copyout(int, uint32_t, unsigned long *, unsigned long, const char *, int); struct Exec *get_header(const char *, unsigned long, struct Exec *); @@ -79,7 +79,7 @@ loadaout9image( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, + xen_pfn_t *parray, struct domain_setup_info *dsi) { struct Exec ehdr; diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_load_bin.c --- a/tools/libxc/xc_load_bin.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_load_bin.c Tue Jun 06 13:25:31 2006 -0500 @@ -107,7 +107,7 @@ static int static int loadbinimage( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi); + xen_pfn_t *parray, struct domain_setup_info *dsi); int probe_bin(const char *image, unsigned long image_size, @@ -235,7 +235,7 @@ static int static int loadbinimage( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi) + xen_pfn_t *parray, struct domain_setup_info *dsi) { unsigned long size; char *va; diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_load_elf.c Tue Jun 06 13:25:31 2006 -0500 @@ -17,10 +17,10 @@ static int static int loadelfimage( const char *image, unsigned long image_size, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi); + xen_pfn_t *parray, struct domain_setup_info *dsi); static int loadelfsymtab( - const char *image, int xch, uint32_t dom, unsigned long *parray, + const char *image, int xch, uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi); int probe_elf(const char *image, @@ -138,8 +138,15 @@ static int parseelfimage(const char *ima ERROR("Actually saw: '%s'", guestinfo); return -EINVAL; } - if ( (strstr(guestinfo, "PAE=yes") != NULL) ) - dsi->pae_kernel = 1; + + dsi->pae_kernel = PAEKERN_no; + p = strstr(guestinfo, "PAE=yes"); + if ( p != NULL ) + { + dsi->pae_kernel = PAEKERN_yes; + if ( !strncmp(p+7, "[extended-cr3]", 14) ) + dsi->pae_kernel = PAEKERN_extended_cr3; + } break; } @@ -220,7 +227,7 @@ static int static int loadelfimage( const char *image, unsigned long elfsize, int xch, uint32_t dom, - unsigned long *parray, struct domain_setup_info *dsi) + xen_pfn_t *parray, struct domain_setup_info *dsi) { Elf_Ehdr *ehdr = (Elf_Ehdr *)image; Elf_Phdr *phdr; @@ -274,7 +281,7 @@ loadelfimage( static int loadelfsymtab( - const char *image, int xch, uint32_t dom, unsigned long *parray, + const char *image, int xch, uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi) { Elf_Ehdr *ehdr = (Elf_Ehdr *)image, *sym_ehdr; diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xc_private.c Tue Jun 06 13:25:31 2006 -0500 @@ -4,6 +4,7 @@ * Helper functions for the rest of the library. */ +#include <inttypes.h> #include "xc_private.h" /* NB: arr must be mlock'ed */ @@ -134,9 +135,9 @@ int xc_memory_op(int xc_handle, struct xen_memory_reservation *reservation = arg; struct xen_machphys_mfn_list *xmml = arg; struct xen_translate_gpfn_list *trans = arg; - unsigned long *extent_start; - unsigned long *gpfn_list; - unsigned long *mfn_list; + xen_pfn_t *extent_start; + xen_pfn_t *gpfn_list; + xen_pfn_t *mfn_list; long ret = -EINVAL; hypercall.op = __HYPERVISOR_memory_op; @@ -156,7 +157,7 @@ int xc_memory_op(int xc_handle, get_xen_guest_handle(extent_start, reservation->extent_start); if ( (extent_start != NULL) && (mlock(extent_start, - reservation->nr_extents * sizeof(unsigned long)) != 0) ) + reservation->nr_extents * sizeof(xen_pfn_t)) != 0) ) { PERROR("Could not mlock"); safe_munlock(reservation, sizeof(*reservation)); @@ -171,7 +172,7 @@ int xc_memory_op(int xc_handle, } get_xen_guest_handle(extent_start, xmml->extent_start); if ( mlock(extent_start, - xmml->max_extents * sizeof(unsigned long)) != 0 ) + xmml->max_extents * sizeof(xen_pfn_t)) != 0 ) { PERROR("Could not mlock"); safe_munlock(xmml, sizeof(*xmml)); @@ -192,17 +193,17 @@ int xc_memory_op(int xc_handle, goto out1; } get_xen_guest_handle(gpfn_list, trans->gpfn_list); - if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(long)) != 0 ) + if ( mlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 ) { PERROR("Could not mlock"); safe_munlock(trans, sizeof(*trans)); goto out1; } get_xen_guest_handle(mfn_list, trans->mfn_list); - if ( mlock(mfn_list, trans->nr_gpfns * sizeof(long)) != 0 ) - { - PERROR("Could not mlock"); - safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long)); + if ( mlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)) != 0 ) + { + PERROR("Could not mlock"); + safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)); safe_munlock(trans, sizeof(*trans)); goto out1; } @@ -220,22 +221,22 @@ int xc_memory_op(int xc_handle, get_xen_guest_handle(extent_start, reservation->extent_start); if ( extent_start != NULL ) safe_munlock(extent_start, - reservation->nr_extents * sizeof(unsigned long)); + reservation->nr_extents * sizeof(xen_pfn_t)); break; case XENMEM_machphys_mfn_list: safe_munlock(xmml, sizeof(*xmml)); get_xen_guest_handle(extent_start, xmml->extent_start); safe_munlock(extent_start, - xmml->max_extents * sizeof(unsigned long)); + xmml->max_extents * sizeof(xen_pfn_t)); break; case XENMEM_add_to_physmap: safe_munlock(arg, sizeof(struct xen_add_to_physmap)); break; case XENMEM_translate_gpfn_list: get_xen_guest_handle(mfn_list, trans->mfn_list); - safe_munlock(mfn_list, trans->nr_gpfns * sizeof(long)); + safe_munlock(mfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)); get_xen_guest_handle(gpfn_list, trans->gpfn_list); - safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(long)); + safe_munlock(gpfn_list, trans->nr_gpfns * sizeof(xen_pfn_t)); safe_munlock(trans, sizeof(*trans)); break; } @@ -263,7 +264,7 @@ long long xc_domain_get_cpu_usage( int x int xc_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + xen_pfn_t *pfn_buf, unsigned long max_pfns) { DECLARE_DOM0_OP; @@ -274,10 +275,10 @@ int xc_get_pfn_list(int xc_handle, set_xen_guest_handle(op.u.getmemlist.buffer, pfn_buf); #ifdef VALGRIND - memset(pfn_buf, 0, max_pfns * sizeof(unsigned long)); + memset(pfn_buf, 0, max_pfns * sizeof(xen_pfn_t)); #endif - if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 ) + if ( mlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)) != 0 ) { PERROR("xc_get_pfn_list: pfn_buf mlock failed"); return -1; @@ -285,7 +286,7 @@ int xc_get_pfn_list(int xc_handle, ret = do_dom0_op(xc_handle, &op); - safe_munlock(pfn_buf, max_pfns * sizeof(unsigned long)); + safe_munlock(pfn_buf, max_pfns * sizeof(xen_pfn_t)); #if 0 #ifdef DEBUG @@ -364,7 +365,7 @@ unsigned long xc_get_filesz(int fd) } void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size, - int xch, uint32_t dom, unsigned long *parray, + int xch, uint32_t dom, xen_pfn_t *parray, unsigned long vstart) { char *va; @@ -428,6 +429,29 @@ int xc_version(int xc_handle, int cmd, v safe_munlock(arg, argsize); return rc; +} + +unsigned long xc_make_page_below_4G( + int xc_handle, uint32_t domid, unsigned long mfn) +{ + xen_pfn_t old_mfn = mfn; + xen_pfn_t new_mfn; + + if ( xc_domain_memory_decrease_reservation( + xc_handle, domid, 1, 0, &old_mfn) != 0 ) + { + fprintf(stderr,"xc_make_page_below_4G decrease failed. mfn=%lx\n",mfn); + return 0; + } + + if ( xc_domain_memory_increase_reservation( + xc_handle, domid, 1, 0, 32, &new_mfn) != 0 ) + { + fprintf(stderr,"xc_make_page_below_4G increase failed. mfn=%lx\n",mfn); + return 0; + } + + return new_mfn; } /* diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xenctrl.h Tue Jun 06 13:25:31 2006 -0500 @@ -420,26 +420,26 @@ int xc_domain_memory_increase_reservatio unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start); + xen_pfn_t *extent_start); int xc_domain_memory_decrease_reservation(int xc_handle, uint32_t domid, unsigned long nr_extents, unsigned int extent_order, - unsigned long *extent_start); + xen_pfn_t *extent_start); int xc_domain_memory_populate_physmap(int xc_handle, uint32_t domid, unsigned long nr_extents, unsigned int extent_order, unsigned int address_bits, - unsigned long *extent_start); + xen_pfn_t *extent_start); int xc_domain_translate_gpfn_list(int xc_handle, uint32_t domid, unsigned long nr_gpfns, - unsigned long *gpfn_list, - unsigned long *mfn_list); + xen_pfn_t *gpfn_list, + xen_pfn_t *mfn_list); int xc_domain_ioport_permission(int xc_handle, uint32_t domid, @@ -458,6 +458,9 @@ int xc_domain_iomem_permission(int xc_ha unsigned long nr_mfns, uint8_t allow_access); +unsigned long xc_make_page_below_4G(int xc_handle, uint32_t domid, + unsigned long mfn); + typedef dom0_perfc_desc_t xc_perfc_desc_t; /* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */ int xc_perfc_control(int xc_handle, @@ -489,7 +492,7 @@ void *xc_map_foreign_range(int xc_handle unsigned long mfn ); void *xc_map_foreign_batch(int xc_handle, uint32_t dom, int prot, - unsigned long *arr, int num ); + xen_pfn_t *arr, int num ); /** * Translates a virtual address in the context of a given domain and @@ -504,11 +507,11 @@ unsigned long xc_translate_foreign_addre unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, int vcpu, unsigned long long virt); -int xc_get_pfn_list(int xc_handle, uint32_t domid, unsigned long *pfn_buf, +int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf, unsigned long max_pfns); int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid, - unsigned long *pfn_buf, + xen_pfn_t *pfn_buf, unsigned int start_page, unsigned int nr_pages); int xc_copy_to_domain_page(int xc_handle, uint32_t domid, diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xg_private.h --- a/tools/libxc/xg_private.h Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xg_private.h Tue Jun 06 13:25:31 2006 -0500 @@ -156,6 +156,9 @@ struct domain_setup_info unsigned long elf_paddr_offset; +#define PAEKERN_no 0 +#define PAEKERN_yes 1 +#define PAEKERN_extended_cr3 2 unsigned int pae_kernel; unsigned int load_symtab; @@ -170,7 +173,7 @@ typedef int (*parseimagefunc)(const char struct domain_setup_info *dsi); typedef int (*loadimagefunc)(const char *image, unsigned long image_size, int xch, - uint32_t dom, unsigned long *parray, + uint32_t dom, xen_pfn_t *parray, struct domain_setup_info *dsi); struct load_funcs @@ -198,7 +201,7 @@ unsigned long xc_get_filesz(int fd); unsigned long xc_get_filesz(int fd); void xc_map_memcpy(unsigned long dst, const char *src, unsigned long size, - int xch, uint32_t dom, unsigned long *parray, + int xch, uint32_t dom, xen_pfn_t *parray, unsigned long vstart); int pin_table(int xc_handle, unsigned int type, unsigned long mfn, diff -r d3e181fa238b -r 156a0963a1ae tools/libxc/xg_save_restore.h --- a/tools/libxc/xg_save_restore.h Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/libxc/xg_save_restore.h Tue Jun 06 13:25:31 2006 -0500 @@ -105,23 +105,23 @@ static int get_platform_info(int xc_hand */ #define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE #define M2P_CHUNK_SIZE (1 << M2P_SHIFT) -#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT) +#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT) #define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT) /* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */ -#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT) +#define P2M_SIZE ROUNDUP((max_pfn * sizeof(xen_pfn_t)), PAGE_SHIFT) -/* Number of unsigned longs in a page */ -#define ulpp (PAGE_SIZE/sizeof(unsigned long)) +/* Number of xen_pfn_t in a page */ +#define fpp (PAGE_SIZE/sizeof(xen_pfn_t)) /* Number of entries in the pfn_to_mfn_frame_list */ -#define P2M_FL_ENTRIES (((max_pfn)+ulpp-1)/ulpp) +#define P2M_FL_ENTRIES (((max_pfn)+fpp-1)/fpp) /* Size in bytes of the pfn_to_mfn_frame_list */ #define P2M_FL_SIZE ((P2M_FL_ENTRIES)*sizeof(unsigned long)) /* Number of entries in the pfn_to_mfn_frame_list_list */ -#define P2M_FLL_ENTRIES (((max_pfn)+(ulpp*ulpp)-1)/(ulpp*ulpp)) +#define P2M_FLL_ENTRIES (((max_pfn)+(fpp*fpp)-1)/(fpp*fpp)) /* Current guests allow 8MB 'slack' in their P2M */ #define NR_SLACK_ENTRIES ((8 * 1024 * 1024) / PAGE_SIZE) diff -r d3e181fa238b -r 156a0963a1ae tools/tests/test_x86_emulator.c --- a/tools/tests/test_x86_emulator.c Fri Jun 02 12:54:22 2006 -0500 +++ b/tools/tests/test_x86_emulator.c Tue Jun 06 13:25:31 2006 -0500 @@ -13,6 +13,7 @@ typedef int64_t s64; typedef int64_t s64; #include <public/xen.h> #include <asm-x86/x86_emulate.h> +#include <sys/mman.h> static int read_any( unsigned long addr, @@ -85,23 +86,30 @@ int main(int argc, char **argv) struct x86_emulate_ctxt ctxt; struct cpu_user_regs regs; char instr[20] = { 0x01, 0x08 }; /* add %ecx,(%eax) */ - unsigned int res = 0x7FFFFFFF; - u32 cmpxchg8b_res[2] = { 0x12345678, 0x87654321 }; + unsigned int *res; int rc; ctxt.regs = ®s; ctxt.mode = X86EMUL_MODE_PROT32; + res = mmap((void *)0x100000, 0x1000, PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if ( res == MAP_FAILED ) + { + fprintf(stderr, "mmap to low address failed\n"); + exit(1); + } + printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - res = 0x7FFFFFFF; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x92345677) || + ctxt.cr2 = (unsigned long)res; + *res = 0x7FFFFFFF; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x92345677) || (regs.eflags != 0xa94) || (regs.eip != (unsigned long)&instr[2]) ) goto fail; @@ -116,11 +124,25 @@ int main(int argc, char **argv) #else regs.ecx = 0x12345678UL; #endif - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x92345677) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x92345677) || (regs.ecx != 0x8000000FUL) || + (regs.eip != (unsigned long)&instr[2]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing movl (%%eax),%%ecx..."); + instr[0] = 0x8b; instr[1] = 0x08; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = ~0UL; + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x92345677) || + (regs.ecx != 0x92345677UL) || (regs.eip != (unsigned long)&instr[2]) ) goto fail; printf("okay\n"); @@ -131,10 +153,10 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0x92345677UL; regs.ecx = 0xAA; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x923456AA) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x923456AA) || (regs.eflags != 0x244) || (regs.eax != 0x92345677UL) || (regs.eip != (unsigned long)&instr[4]) ) @@ -147,10 +169,10 @@ int main(int argc, char **argv) regs.eip = (unsigned long)&instr[0]; regs.eax = 0xAABBCC77UL; regs.ecx = 0xFF; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x923456AA) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x923456AA) || ((regs.eflags&0x240) != 0x200) || (regs.eax != 0xAABBCCAA) || (regs.ecx != 0xFF) || @@ -163,10 +185,10 @@ int main(int argc, char **argv) regs.eflags = 0x200; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0x12345678) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0x12345678) || (regs.eflags != 0x200) || (regs.ecx != 0x923456AA) || (regs.eip != (unsigned long)&instr[2]) ) @@ -176,14 +198,14 @@ int main(int argc, char **argv) printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)..."); instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08; regs.eflags = 0x200; - res = 0x923456AA; + *res = 0x923456AA; regs.eip = (unsigned long)&instr[0]; regs.eax = 0x923456AAUL; regs.ecx = 0xDDEEFF00L; - ctxt.cr2 = (unsigned long)&res; - rc = x86_emulate_memop(&ctxt, &emulops); - if ( (rc != 0) || - (res != 0xDDEEFF00) || + ctxt.cr2 = (unsigned long)res; + rc = x86_emulate_memop(&ctxt, &emulops); + if ( (rc != 0) || + (*res != 0xDDEEFF00) || (regs.eflags != 0x244) || (regs.eax != 0x923456AAUL) || (regs.eip != (unsigned long)&instr[4]) ) @@ -192,54 +214,57 @@ int main(int argc, char **argv) printf("%-40s", "Testing rep movsw..."); instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5; - res = 0x22334455; + *res = 0x22334455; regs.eflags = 0x200; regs.ecx = 23; regs.eip = (unsigned long)&instr[0]; - regs.esi = (unsigned long)&res + 0; - regs.edi = (unsigned long)&res + 2; + regs.esi = (unsigned long)res + 0; + regs.edi = (unsigned long)res + 2; regs.error_code = 0; /* read fault */ ctxt.cr2 = regs.esi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x44554455) || + (*res != 0x44554455) || (regs.eflags != 0x200) || (regs.ecx != 22) || - (regs.esi != ((unsigned long)&res + 2)) || - (regs.edi != ((unsigned long)&res + 4)) || + (regs.esi != ((unsigned long)res + 2)) || + (regs.edi != ((unsigned long)res + 4)) || (regs.eip != (unsigned long)&instr[0]) ) goto fail; printf("okay\n"); printf("%-40s", "Testing btrl $0x1,(%edi)..."); instr[0] = 0x0f; instr[1] = 0xba; instr[2] = 0x37; instr[3] = 0x01; - res = 0x2233445F; - regs.eflags = 0x200; - regs.eip = (unsigned long)&instr[0]; - regs.edi = (unsigned long)&res; + *res = 0x2233445F; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.edi = (unsigned long)res; ctxt.cr2 = regs.edi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x2233445D) || + (*res != 0x2233445D) || ((regs.eflags&0x201) != 0x201) || (regs.eip != (unsigned long)&instr[4]) ) goto fail; printf("okay\n"); + + res[0] = 0x12345678; + res[1] = 0x87654321; printf("%-40s", "Testing cmpxchg8b (%edi) [succeeding]..."); instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f; regs.eflags = 0x200; - regs.eax = cmpxchg8b_res[0]; - regs.edx = cmpxchg8b_res[1]; + regs.eax = res[0]; + regs.edx = res[1]; regs.ebx = 0x9999AAAA; regs.ecx = 0xCCCCFFFF; regs.eip = (unsigned long)&instr[0]; - regs.edi = (unsigned long)cmpxchg8b_res; + regs.edi = (unsigned long)res; ctxt.cr2 = regs.edi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (cmpxchg8b_res[0] != 0x9999AAAA) || - (cmpxchg8b_res[1] != 0xCCCCFFFF) || + (res[0] != 0x9999AAAA) || + (res[1] != 0xCCCCFFFF) || ((regs.eflags&0x240) != 0x240) || (regs.eip != (unsigned long)&instr[3]) ) goto fail; @@ -248,12 +273,12 @@ int main(int argc, char **argv) printf("%-40s", "Testing cmpxchg8b (%edi) [failing]..."); instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f; regs.eip = (unsigned long)&instr[0]; - regs.edi = (unsigned long)cmpxchg8b_res; + regs.edi = (unsigned long)res; ctxt.cr2 = regs.edi; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (cmpxchg8b_res[0] != 0x9999AAAA) || - (cmpxchg8b_res[1] != 0xCCCCFFFF) || + (res[0] != 0x9999AAAA) || + (res[1] != 0xCCCCFFFF) || (regs.eax != 0x9999AAAA) || (regs.edx != 0xCCCCFFFF) || ((regs.eflags&0x240) != 0x200) || @@ -265,11 +290,11 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - res = 0x82; + ctxt.cr2 = (unsigned long)res; + *res = 0x82; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x82) || + (*res != 0x82) || (regs.ecx != 0xFFFFFF82) || ((regs.eflags&0x240) != 0x200) || (regs.eip != (unsigned long)&instr[3]) ) @@ -280,11 +305,11 @@ int main(int argc, char **argv) instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08; regs.eip = (unsigned long)&instr[0]; regs.ecx = 0x12345678; - ctxt.cr2 = (unsigned long)&res; - res = 0x1234aa82; + ctxt.cr2 = (unsigned long)res; + *res = 0x1234aa82; rc = x86_emulate_memop(&ctxt, &emulops); if ( (rc != 0) || - (res != 0x1234aa82) || + (*res != 0x1234aa82) || (regs.ecx != 0xaa82) || ((regs.eflags&0x240) != 0x200) || (regs.eip != (unsigned long)&instr[3]) ) diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/domain.c Tue Jun 06 13:25:31 2006 -0500 @@ -259,7 +259,7 @@ int arch_set_info_guest( struct vcpu *v, struct vcpu_guest_context *c) { struct domain *d = v->domain; - unsigned long cr3_pfn; + unsigned long cr3_pfn = INVALID_MFN; int i, rc; if ( !(c->flags & VGCF_HVM_GUEST) ) @@ -524,20 +524,29 @@ static void load_segments(struct vcpu *n if ( unlikely(!all_segs_okay) ) { struct cpu_user_regs *regs = guest_cpu_user_regs(); - unsigned long *rsp = + unsigned long *rsp = (n->arch.flags & TF_kernel_mode) ? (unsigned long *)regs->rsp : (unsigned long *)nctxt->kernel_sp; + unsigned long cs_and_mask, rflags; if ( !(n->arch.flags & TF_kernel_mode) ) toggle_guest_mode(n); else regs->cs &= ~3; + /* CS longword also contains full evtchn_upcall_mask. */ + cs_and_mask = (unsigned long)regs->cs | + ((unsigned long)n->vcpu_info->evtchn_upcall_mask << 32); + + /* Fold upcall mask into RFLAGS.IF. */ + rflags = regs->rflags & ~X86_EFLAGS_IF; + rflags |= !n->vcpu_info->evtchn_upcall_mask << 9; + if ( put_user(regs->ss, rsp- 1) | put_user(regs->rsp, rsp- 2) | - put_user(regs->rflags, rsp- 3) | - put_user(regs->cs, rsp- 4) | + put_user(rflags, rsp- 3) | + put_user(cs_and_mask, rsp- 4) | put_user(regs->rip, rsp- 5) | put_user(nctxt->user_regs.gs, rsp- 6) | put_user(nctxt->user_regs.fs, rsp- 7) | @@ -549,6 +558,10 @@ static void load_segments(struct vcpu *n DPRINTK("Error while creating failsafe callback frame.\n"); domain_crash(n->domain); } + + if ( test_bit(_VGCF_failsafe_disables_events, + &n->arch.guest_context.flags) ) + n->vcpu_info->evtchn_upcall_mask = 1; regs->entry_vector = TRAP_syscall; regs->rflags &= 0xFFFCBEFFUL; diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/domain_build.c Tue Jun 06 13:25:31 2006 -0500 @@ -301,6 +301,9 @@ int construct_dom0(struct domain *d, xen_pae ? "yes" : "no", dom0_pae ? "yes" : "no"); return -EINVAL; } + + if ( xen_pae && !!strstr(dsi.xen_section_string, "PAE=yes[extended-cr3]") ) + set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); if ( (p = strstr(dsi.xen_section_string, "FEATURES=")) != NULL ) { diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Jun 06 13:25:31 2006 -0500 @@ -1970,7 +1970,6 @@ static inline void vmx_vmexit_do_extint( __hvm_bug(regs); vector &= INTR_INFO_VECTOR_MASK; - local_irq_disable(); TRACE_VMEXIT(1,vector); switch(vector) { @@ -2065,30 +2064,33 @@ asmlinkage void vmx_vmexit_handler(struc struct vcpu *v = current; int error; - if ((error = __vmread(VM_EXIT_REASON, &exit_reason))) - __hvm_bug(®s); + error = __vmread(VM_EXIT_REASON, &exit_reason); + BUG_ON(error); perfc_incra(vmexits, exit_reason); - /* don't bother H/W interrutps */ - if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT && - exit_reason != EXIT_REASON_VMCALL && - exit_reason != EXIT_REASON_IO_INSTRUCTION) + if ( (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT) && + (exit_reason != EXIT_REASON_VMCALL) && + (exit_reason != EXIT_REASON_IO_INSTRUCTION) ) HVM_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason); - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { + if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT ) + local_irq_enable(); + + if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) + { printk("Failed vm entry (reason 0x%x)\n", exit_reason); printk("*********** VMCS Area **************\n"); vmcs_dump_vcpu(); printk("**************************************\n"); domain_crash_synchronous(); - return; } __vmread(GUEST_RIP, &eip); TRACE_VMEXIT(0,exit_reason); - switch (exit_reason) { + switch ( exit_reason ) + { case EXIT_REASON_EXCEPTION_NMI: { /* diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Tue Jun 06 13:25:31 2006 -0500 @@ -55,29 +55,26 @@ * domain pointer, DS, ES, FS, GS. Therefore, we effectively skip 6 registers. */ -#define HVM_MONITOR_EFLAGS 0x202 /* IF on */ #define NR_SKIPPED_REGS 6 /* See the above explanation */ -#define HVM_SAVE_ALL_NOSEGREGS \ - pushl $HVM_MONITOR_EFLAGS; \ - popf; \ - subl $(NR_SKIPPED_REGS*4), %esp; \ +#define HVM_SAVE_ALL_NOSEGREGS \ + subl $(NR_SKIPPED_REGS*4), %esp; \ movl $0, 0xc(%esp); /* XXX why do we need to force eflags==0 ?? */ \ - pushl %eax; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ pushl %ebx; -#define HVM_RESTORE_ALL_NOSEGREGS \ - popl %ebx; \ - popl %ecx; \ - popl %edx; \ - popl %esi; \ - popl %edi; \ - popl %ebp; \ - popl %eax; \ +#define HVM_RESTORE_ALL_NOSEGREGS \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ addl $(NR_SKIPPED_REGS*4), %esp ALIGN diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Tue Jun 06 13:25:31 2006 -0500 @@ -51,45 +51,42 @@ * (2/1) u32 entry_vector; * (1/1) u32 error_code; */ -#define HVM_MONITOR_RFLAGS 0x202 /* IF on */ #define NR_SKIPPED_REGS 6 /* See the above explanation */ -#define HVM_SAVE_ALL_NOSEGREGS \ - pushq $HVM_MONITOR_RFLAGS; \ - popfq; \ - subq $(NR_SKIPPED_REGS*8), %rsp; \ - pushq %rdi; \ - pushq %rsi; \ - pushq %rdx; \ - pushq %rcx; \ - pushq %rax; \ - pushq %r8; \ - pushq %r9; \ - pushq %r10; \ - pushq %r11; \ - pushq %rbx; \ - pushq %rbp; \ - pushq %r12; \ - pushq %r13; \ - pushq %r14; \ - pushq %r15; \ +#define HVM_SAVE_ALL_NOSEGREGS \ + subq $(NR_SKIPPED_REGS*8), %rsp; \ + pushq %rdi; \ + pushq %rsi; \ + pushq %rdx; \ + pushq %rcx; \ + pushq %rax; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11; \ + pushq %rbx; \ + pushq %rbp; \ + pushq %r12; \ + pushq %r13; \ + pushq %r14; \ + pushq %r15; -#define HVM_RESTORE_ALL_NOSEGREGS \ - popq %r15; \ - popq %r14; \ - popq %r13; \ - popq %r12; \ - popq %rbp; \ - popq %rbx; \ - popq %r11; \ - popq %r10; \ - popq %r9; \ - popq %r8; \ - popq %rax; \ - popq %rcx; \ - popq %rdx; \ - popq %rsi; \ - popq %rdi; \ - addq $(NR_SKIPPED_REGS*8), %rsp; \ +#define HVM_RESTORE_ALL_NOSEGREGS \ + popq %r15; \ + popq %r14; \ + popq %r13; \ + popq %r12; \ + popq %rbp; \ + popq %rbx; \ + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rax; \ + popq %rcx; \ + popq %rdx; \ + popq %rsi; \ + popq %rdi; \ + addq $(NR_SKIPPED_REGS*8), %rsp; ENTRY(vmx_asm_vmexit_handler) /* selectors are restored/saved by VMX */ diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/mm.c Tue Jun 06 13:25:31 2006 -0500 @@ -996,6 +996,21 @@ static int alloc_l3_table(struct page_in int i; ASSERT(!shadow_mode_refcounts(d)); + +#ifdef CONFIG_X86_PAE + /* + * PAE pgdirs above 4GB are unacceptable if the guest does not understand + * the weird 'extended cr3' format for dealing with high-order address + * bits. We cut some slack for control tools (before vcpu0 is initialised). + */ + if ( (pfn >= 0x100000) && + unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) && + d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) ) + { + MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); + return 0; + } +#endif pl3e = map_domain_page(pfn); for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_32/asm-offsets.c --- a/xen/arch/x86/x86_32/asm-offsets.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/x86_32/asm-offsets.c Tue Jun 06 13:25:31 2006 -0500 @@ -64,11 +64,13 @@ void __dummy__(void) arch.guest_context.kernel_ss); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt); OFFSET(VCPU_flags, struct vcpu, vcpu_flags); OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr); DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending); DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked); + DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events); BLANK(); OFFSET(TSS_ss0, struct tss_struct, ss0); diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/x86_32/entry.S Tue Jun 06 13:25:31 2006 -0500 @@ -130,7 +130,10 @@ failsafe_callback: movl VCPU_failsafe_sel(%ebx),%eax movw %ax,TRAPBOUNCE_cs(%edx) movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx) - call create_bounce_frame + bt $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%ebx) + jnc 1f + orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx) +1: call create_bounce_frame xorl %eax,%eax movl %eax,UREGS_ds(%esp) movl %eax,UREGS_es(%esp) diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/x86_32/traps.c Tue Jun 06 13:25:31 2006 -0500 @@ -346,6 +346,12 @@ static long register_guest_callback(stru case CALLBACKTYPE_failsafe: v->arch.guest_context.failsafe_callback_cs = reg->address.cs; v->arch.guest_context.failsafe_callback_eip = reg->address.eip; + if ( reg->flags & CALLBACKF_mask_events ) + set_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); + else + clear_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); break; #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_64/asm-offsets.c --- a/xen/arch/x86/x86_64/asm-offsets.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/x86_64/asm-offsets.c Tue Jun 06 13:25:31 2006 -0500 @@ -64,11 +64,14 @@ void __dummy__(void) arch.guest_context.syscall_callback_eip); OFFSET(VCPU_kernel_sp, struct vcpu, arch.guest_context.kernel_sp); + OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags); OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt); OFFSET(VCPU_flags, struct vcpu, vcpu_flags); OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr); DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending); DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked); + DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events); + DEFINE(_VGCF_syscall_disables_events, _VGCF_syscall_disables_events); BLANK(); OFFSET(VCPU_svm_vmcb_pa, struct vcpu, arch.hvm_svm.vmcb_pa); diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/x86_64/entry.S Tue Jun 06 13:25:31 2006 -0500 @@ -30,7 +30,10 @@ switch_to_kernel: movq VCPU_syscall_addr(%rbx),%rax movq %rax,TRAPBOUNCE_eip(%rdx) movw $0,TRAPBOUNCE_flags(%rdx) - call create_bounce_frame + bt $_VGCF_syscall_disables_events,VCPU_guest_context_flags(%rbx) + jnc 1f + orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) +1: call create_bounce_frame jmp test_all_events /* %rbx: struct vcpu, interrupts disabled */ @@ -77,7 +80,10 @@ failsafe_callback: movq VCPU_failsafe_addr(%rbx),%rax movq %rax,TRAPBOUNCE_eip(%rdx) movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx) - call create_bounce_frame + bt $_VGCF_failsafe_disables_events,VCPU_guest_context_flags(%rbx) + jnc 1f + orw $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx) +1: call create_bounce_frame jmp test_all_events .previous .section __pre_ex_table,"a" diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/x86_64/traps.c Tue Jun 06 13:25:31 2006 -0500 @@ -334,10 +334,22 @@ static long register_guest_callback(stru case CALLBACKTYPE_failsafe: v->arch.guest_context.failsafe_callback_eip = reg->address; + if ( reg->flags & CALLBACKF_mask_events ) + set_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); + else + clear_bit(_VGCF_failsafe_disables_events, + &v->arch.guest_context.flags); break; case CALLBACKTYPE_syscall: v->arch.guest_context.syscall_callback_eip = reg->address; + if ( reg->flags & CALLBACKF_mask_events ) + set_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); + else + clear_bit(_VGCF_syscall_disables_events, + &v->arch.guest_context.flags); break; case CALLBACKTYPE_nmi: diff -r d3e181fa238b -r 156a0963a1ae xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/arch/x86/x86_emulate.c Tue Jun 06 13:25:31 2006 -0500 @@ -100,8 +100,8 @@ static uint8_t opcode_table[256] = { ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, /* 0x88 - 0x8F */ - ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, - ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM, + ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov, + ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, 0, 0, 0, DstMem|SrcNone|ModRM|Mov, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff -r d3e181fa238b -r 156a0963a1ae xen/common/kernel.c --- a/xen/common/kernel.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/common/kernel.c Tue Jun 06 13:25:31 2006 -0500 @@ -184,6 +184,7 @@ long do_xen_version(int cmd, XEN_GUEST_H case XENVER_get_features: { xen_feature_info_t fi; + struct domain *d = current->domain; if ( copy_from_guest(&fi, arg, 1) ) return -EFAULT; @@ -191,7 +192,9 @@ long do_xen_version(int cmd, XEN_GUEST_H switch ( fi.submap_idx ) { case 0: - fi.submap = (1U << XENFEAT_pae_pgdir_above_4gb); + fi.submap = 0; + if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) ) + fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb); if ( shadow_mode_translate(current->domain) ) fi.submap |= (1U << XENFEAT_writable_page_tables) | diff -r d3e181fa238b -r 156a0963a1ae xen/common/keyhandler.c --- a/xen/common/keyhandler.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/common/keyhandler.c Tue Jun 06 13:25:31 2006 -0500 @@ -128,11 +128,12 @@ static void dump_domains(unsigned char k d->domain_flags, atomic_read(&d->refcnt), d->tot_pages, d->xenheap_pages, cpuset); printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-" - "%02x%02x-%02x%02x%02x%02x%02x%02x\n", + "%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n", d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3], d->handle[ 4], d->handle[ 5], d->handle[ 6], d->handle[ 7], d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11], - d->handle[12], d->handle[13], d->handle[14], d->handle[15]); + d->handle[12], d->handle[13], d->handle[14], d->handle[15], + d->vm_assist); arch_dump_domain_info(d); diff -r d3e181fa238b -r 156a0963a1ae xen/common/memory.c --- a/xen/common/memory.c Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/common/memory.c Tue Jun 06 13:25:31 2006 -0500 @@ -31,14 +31,15 @@ static long static long increase_reservation( struct domain *d, - XEN_GUEST_HANDLE(ulong) extent_list, + XEN_GUEST_HANDLE(xen_pfn_t) extent_list, unsigned int nr_extents, unsigned int extent_order, unsigned int flags, int *preempted) { struct page_info *page; - unsigned long i, mfn; + unsigned long i; + xen_pfn_t mfn; if ( !guest_handle_is_null(extent_list) && !guest_handle_okay(extent_list, nr_extents) ) @@ -80,14 +81,16 @@ static long static long populate_physmap( struct domain *d, - XEN_GUEST_HANDLE(ulong) extent_list, + XEN_GUEST_HANDLE(xen_pfn_t) extent_list, unsigned int nr_extents, unsigned int extent_order, unsigned int flags, int *preempted) { struct page_info *page; - unsigned long i, j, gpfn, mfn; + unsigned long i, j; + xen_pfn_t gpfn; + xen_pfn_t mfn; if ( !guest_handle_okay(extent_list, nr_extents) ) return 0; @@ -177,13 +180,14 @@ static long static long decrease_reservation( struct domain *d, - XEN_GUEST_HANDLE(ulong) extent_list, + XEN_GUEST_HANDLE(xen_pfn_t) extent_list, unsigned int nr_extents, unsigned int extent_order, unsigned int flags, int *preempted) { - unsigned long i, j, gmfn; + unsigned long i, j; + xen_pfn_t gmfn; if ( !guest_handle_okay(extent_list, nr_extents) ) return 0; @@ -214,7 +218,9 @@ translate_gpfn_list( XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress) { struct xen_translate_gpfn_list op; - unsigned long i, gpfn, mfn; + unsigned long i; + xen_pfn_t gpfn; + xen_pfn_t mfn; struct domain *d; if ( copy_from_guest(&op, uop, 1) ) diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/arch-ia64.h Tue Jun 06 13:25:31 2006 -0500 @@ -26,6 +26,9 @@ DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif /* Arch specific VIRQs definition */ diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/arch-x86_32.h --- a/xen/include/public/arch-x86_32.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/arch-x86_32.h Tue Jun 06 13:25:31 2006 -0500 @@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif /* @@ -138,9 +141,17 @@ struct vcpu_guest_context { struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_HVM_GUEST (1<<1) -#define VGCF_IN_KERNEL (1<<2) +#define VGCF_I387_VALID (1<<0) +#define VGCF_HVM_GUEST (1<<1) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_hvm_guest 1 +#define VGCF_hvm_guest (1<<_VGCF_hvm_guest) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ @@ -169,7 +180,7 @@ struct arch_shared_info { struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; + xen_pfn_t pfn_to_mfn_frame_list_list; unsigned long nmi_reason; }; typedef struct arch_shared_info arch_shared_info_t; diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/arch-x86_64.h --- a/xen/include/public/arch-x86_64.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/arch-x86_64.h Tue Jun 06 13:25:31 2006 -0500 @@ -28,6 +28,9 @@ DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(int); DEFINE_XEN_GUEST_HANDLE(long); DEFINE_XEN_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif /* @@ -211,9 +214,19 @@ struct vcpu_guest_context { struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_HVM_GUEST (1<<1) -#define VGCF_IN_KERNEL (1<<2) +#define VGCF_I387_VALID (1<<0) +#define VGCF_HVM_GUEST (1<<1) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_hvm_guest 1 +#define VGCF_hvm_guest (1<<_VGCF_hvm_guest) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ @@ -240,7 +253,7 @@ struct arch_shared_info { struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; + xen_pfn_t pfn_to_mfn_frame_list_list; unsigned long nmi_reason; }; typedef struct arch_shared_info arch_shared_info_t; diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/callback.h --- a/xen/include/public/callback.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/callback.h Tue Jun 06 13:25:31 2006 -0500 @@ -29,12 +29,20 @@ #define CALLBACKTYPE_nmi 4 /* + * Disable event deliver during callback? This flag is ignored for event and + * NMI callbacks: event delivery is unconditionally disabled. + */ +#define _CALLBACKF_mask_events 0 +#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) + +/* * Register a callback. */ #define CALLBACKOP_register 0 struct callback_register { - int type; - xen_callback_t address; + uint16_t type; + uint16_t flags; + xen_callback_t address; }; typedef struct callback_register callback_register_t; DEFINE_XEN_GUEST_HANDLE(callback_register_t); @@ -47,7 +55,8 @@ DEFINE_XEN_GUEST_HANDLE(callback_registe */ #define CALLBACKOP_unregister 1 struct callback_unregister { - int type; + uint16_t type; + uint16_t _unused; }; typedef struct callback_unregister callback_unregister_t; DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/dom0_ops.h Tue Jun 06 13:25:31 2006 -0500 @@ -19,7 +19,7 @@ * This makes sure that old versions of dom0 tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define DOM0_INTERFACE_VERSION 0x03000000 +#define DOM0_INTERFACE_VERSION 0x03000001 /************************************************************************/ @@ -27,10 +27,10 @@ struct dom0_getmemlist { struct dom0_getmemlist { /* IN variables. */ domid_t domain; - unsigned long max_pfns; - XEN_GUEST_HANDLE(ulong) buffer; - /* OUT variables. */ - unsigned long num_pfns; + uint64_t max_pfns; + XEN_GUEST_HANDLE(xen_pfn_t) buffer; + /* OUT variables. */ + uint64_t num_pfns; }; typedef struct dom0_getmemlist dom0_getmemlist_t; DEFINE_XEN_GUEST_HANDLE(dom0_getmemlist_t); @@ -96,9 +96,9 @@ struct dom0_getdomaininfo { #define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code. */ #define DOMFLAGS_SHUTDOWNSHIFT 16 uint32_t flags; - unsigned long tot_pages; - unsigned long max_pages; - unsigned long shared_info_frame; /* MFN of shared_info struct */ + uint64_t tot_pages; + uint64_t max_pages; + xen_pfn_t shared_info_frame; /* MFN of shared_info struct */ uint64_t cpu_time; uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ @@ -162,7 +162,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_settime_t); struct dom0_getpageframeinfo { /* IN variables. */ - unsigned long mfn; /* Machine page frame number to query. */ + xen_pfn_t mfn; /* Machine page frame number to query. */ domid_t domain; /* To which domain does the frame belong? */ /* OUT variables. */ /* Is the page PINNED to a type? */ @@ -213,7 +213,7 @@ struct dom0_tbufcontrol { cpumap_t cpu_mask; uint32_t evt_mask; /* OUT variables */ - unsigned long buffer_mfn; + xen_pfn_t buffer_mfn; uint32_t size; }; typedef struct dom0_tbufcontrol dom0_tbufcontrol_t; @@ -229,8 +229,8 @@ struct dom0_physinfo { uint32_t sockets_per_node; uint32_t nr_nodes; uint32_t cpu_khz; - unsigned long total_pages; - unsigned long free_pages; + uint64_t total_pages; + uint64_t free_pages; uint32_t hw_cap[8]; }; typedef struct dom0_physinfo dom0_physinfo_t; @@ -276,7 +276,7 @@ struct dom0_shadow_control { uint32_t op; XEN_GUEST_HANDLE(ulong) dirty_bitmap; /* IN/OUT variables. */ - unsigned long pages; /* size of buffer, updated with actual size */ + uint64_t pages; /* size of buffer, updated with actual size */ /* OUT variables. */ struct dom0_shadow_control_stats stats; }; @@ -286,8 +286,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_shadow_cont #define DOM0_SETDOMAINMAXMEM 28 struct dom0_setdomainmaxmem { /* IN variables. */ - domid_t domain; - unsigned long max_memkb; + domid_t domain; + uint64_t max_memkb; }; typedef struct dom0_setdomainmaxmem dom0_setdomainmaxmem_t; DEFINE_XEN_GUEST_HANDLE(dom0_setdomainmaxmem_t); @@ -295,8 +295,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_setdomainma #define DOM0_GETPAGEFRAMEINFO2 29 /* batched interface */ struct dom0_getpageframeinfo2 { /* IN variables. */ - domid_t domain; - unsigned long num; + domid_t domain; + uint64_t num; /* IN/OUT variables. */ XEN_GUEST_HANDLE(ulong) array; }; @@ -313,12 +313,12 @@ DEFINE_XEN_GUEST_HANDLE(dom0_getpagefram #define DOM0_ADD_MEMTYPE 31 struct dom0_add_memtype { /* IN variables. */ - unsigned long mfn; - unsigned long nr_mfns; - uint32_t type; - /* OUT variables. */ - uint32_t handle; - uint32_t reg; + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; }; typedef struct dom0_add_memtype dom0_add_memtype_t; DEFINE_XEN_GUEST_HANDLE(dom0_add_memtype_t); @@ -345,8 +345,8 @@ struct dom0_read_memtype { /* IN variables. */ uint32_t reg; /* OUT variables. */ - unsigned long mfn; - unsigned long nr_mfns; + xen_pfn_t mfn; + uint64_t nr_mfns; uint32_t type; }; typedef struct dom0_read_memtype dom0_read_memtype_t; @@ -499,8 +499,8 @@ DEFINE_XEN_GUEST_HANDLE(dom0_irq_permiss #define DOM0_IOMEM_PERMISSION 47 struct dom0_iomem_permission { domid_t domain; /* domain to be affected */ - unsigned long first_mfn; /* first page (physical page number) in range */ - unsigned long nr_mfns; /* number of pages in range (>0) */ + xen_pfn_t first_mfn; /* first page (physical page number) in range */ + uint64_t nr_mfns; /* number of pages in range (>0) */ uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ }; typedef struct dom0_iomem_permission dom0_iomem_permission_t; @@ -509,7 +509,7 @@ DEFINE_XEN_GUEST_HANDLE(dom0_iomem_permi #define DOM0_HYPERCALL_INIT 48 struct dom0_hypercall_init { domid_t domain; /* domain to be affected */ - unsigned long mfn; /* machine frame to be initialised */ + xen_pfn_t mfn; /* machine frame to be initialised */ }; typedef struct dom0_hypercall_init dom0_hypercall_init_t; DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t); diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/grant_table.h Tue Jun 06 13:25:31 2006 -0500 @@ -244,7 +244,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_tabl #define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ - unsigned long mfn; + xen_pfn_t mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/io/netif.h Tue Jun 06 13:25:31 2006 -0500 @@ -26,6 +26,10 @@ /* Packet data has been validated against protocol checksum. */ #define _NETTXF_data_validated (1) #define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the request. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) struct netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/io/ring.h --- a/xen/include/public/io/ring.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/io/ring.h Tue Jun 06 13:25:31 2006 -0500 @@ -151,19 +151,27 @@ typedef struct __name##_back_ring __name #define RING_SIZE(_r) \ ((_r)->nr_ents) +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ #define RING_FULL(_r) \ - (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r)) + (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ #define RING_HAS_UNCONSUMED_RESPONSES(_r) \ - ((_r)->rsp_cons != (_r)->sring->rsp_prod) + ((_r)->sring->rsp_prod - (_r)->rsp_cons) #define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - (((_r)->req_cons != (_r)->sring->req_prod) && \ - (((_r)->req_cons - (_r)->rsp_prod_pvt) != RING_SIZE(_r))) + ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ + }) /* Direct access to individual ring elements, by index. */ #define RING_GET_REQUEST(_r, _idx) \ diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/memory.h --- a/xen/include/public/memory.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/memory.h Tue Jun 06 13:25:31 2006 -0500 @@ -29,7 +29,7 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - XEN_GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ unsigned long nr_extents; @@ -87,7 +87,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - XEN_GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -117,7 +117,7 @@ struct xen_add_to_physmap { unsigned long idx; /* GPFN where the source mapping page should appear. */ - unsigned long gpfn; + xen_pfn_t gpfn; }; typedef struct xen_add_to_physmap xen_add_to_physmap_t; DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); @@ -135,13 +135,13 @@ struct xen_translate_gpfn_list { unsigned long nr_gpfns; /* List of GPFNs to translate. */ - XEN_GUEST_HANDLE(ulong) gpfn_list; + XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list; /* * Output list to contain MFN translations. May be the same as the input * list (in which case each input GPFN is overwritten with the output MFN). */ - XEN_GUEST_HANDLE(ulong) mfn_list; + XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; }; typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); diff -r d3e181fa238b -r 156a0963a1ae xen/include/public/xen.h --- a/xen/include/public/xen.h Fri Jun 02 12:54:22 2006 -0500 +++ b/xen/include/public/xen.h Tue Jun 06 13:25:31 2006 -0500 @@ -199,7 +199,7 @@ struct mmuext_op { unsigned int cmd; union { /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ - unsigned long mfn; + xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; } arg1; @@ -236,10 +236,24 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); */ #define VMASST_CMD_enable 0 #define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ #define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ #define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ #define VMASST_TYPE_writable_pagetables 2 -#define MAX_VMASST_TYPE 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 #ifndef __ASSEMBLY__ @@ -449,9 +463,9 @@ struct start_info { unsigned long nr_pages; /* Total pages allocated to this domain. */ unsigned long shared_info; /* MACHINE address of shared info struct. */ uint32_t flags; /* SIF_xxx flags. */ - unsigned long store_mfn; /* MACHINE page number of shared page. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ uint32_t store_evtchn; /* Event channel for store communication. */ - unsigned long console_mfn; /* MACHINE address of console page. */ + xen_pfn_t console_mfn; /* MACHINE page number of console page. */ uint32_t console_evtchn; /* Event channel for console messages. */ /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ unsigned long pt_base; /* VIRTUAL address of page directory. */ _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |