[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] netif & grant tables
On Thu, Jun 30, 2005 at 08:28:54PM -0600, Matt Chapman wrote: > I'm currently looking at getting domU networking working > on IA64, and to do this I need to make netback/netfront > use grant tables. > > I'm told that there's already a patch floating around, > can someone tell me where to find it? Hi Matt, Here is the patch for having grant tables with netback and netfront. diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig --- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig 2005-06-02 15:02:46 +01:00 @@ -97,6 +97,20 @@ dedicated device-driver domain, or your master control domain (domain 0), then you almost certainly want to say Y here. +config XEN_NETDEV_GRANT_TX + bool "Grant table substrate for net drivers tx path (DANGEROUS)" + default y + help + This introduces the use of grant tables as a data exhange mechanism + between the frontend and backend network drivers. + +config XEN_NETDEV_GRANT_RX + bool "Grant table substrate for net drivers rx path (DANGEROUS)" + default y + help + This introduces the use of grant tables as a data exhange mechanism + between the frontend and backend network drivers. + config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER bool "Pipelined transmitter (DANGEROUS)" depends on XEN_NETDEV_FRONTEND diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46 +01:00 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c 2005-06-02 15:02:46 +01:00 @@ -165,8 +165,14 @@ u16 flags; flags = shared[ref].flags; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + /* + * But can't flags == (GTF_accept_transfer | GTF_transfer_completed) + * if gnttab_donate executes without interruption??? + */ +#else ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed)); - +#endif /* * If a transfer is committed then wait for the frame address to appear. * Otherwise invalidate the grant entry against future use. diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h --- a/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02 15:02:46 +01:00 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h 2005-06-02 15:02:46 +01:00 @@ -50,6 +50,9 @@ /* Private indexes into shared ring. */ NETIF_RING_IDX rx_req_cons; NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */ +#endif NETIF_RING_IDX tx_req_cons; NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */ diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02 15:02:46 +01:00 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c 2005-06-02 15:02:46 +01:00 @@ -18,6 +18,24 @@ #include <linux/delay.h> #endif +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#include <asm-xen/xen-public/grant_table.h> +#include <asm-xen/gnttab.h> +#ifdef GRANT_DEBUG +static void +dump_packet(int tag, u32 addr, unsigned char *p) +{ + int i; + + printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr); + for (i = 0; i < 20; i++) { + printk("%02x", p[i]); + } + printk("\n"); +} +#endif +#endif + static void netif_idx_release(u16 pending_idx); static void netif_page_release(struct page *page); static void make_tx_response(netif_t *netif, @@ -41,7 +59,9 @@ static struct sk_buff_head rx_queue; static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1]; static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE]; +#ifndef CONFIG_XEN_NETDEV_GRANT_RX static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE]; +#endif static unsigned char rx_notify[NR_EVENT_CHANNELS]; /* Don't currently gate addition of an interface to the tx scheduling list. */ @@ -68,7 +88,21 @@ static PEND_RING_IDX dealloc_prod, dealloc_cons; static struct sk_buff_head tx_queue; + +#ifdef CONFIG_XEN_NETDEV_GRANT_TX +static gnttab_op_t grant_tx_op[MAX_PENDING_REQS]; +static u16 grant_tx_ref[MAX_PENDING_REQS]; +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX +static gnttab_op_t grant_rx_op[MAX_PENDING_REQS]; +#endif +#ifndef CONFIG_XEN_NETDEV_GRANT_TX static multicall_entry_t tx_mcl[MAX_PENDING_REQS]; +#endif + +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#define GRANT_INVALID_REF (0xFFFF) +#endif static struct list_head net_schedule_list; static spinlock_t net_schedule_list_lock; @@ -91,6 +125,7 @@ return mfn; } +#ifndef CONFIG_XEN_NETDEV_GRANT_RX static void free_mfn(unsigned long mfn) { unsigned long flags; @@ -102,6 +137,7 @@ BUG(); spin_unlock_irqrestore(&mfn_lock, flags); } +#endif static inline void maybe_schedule_tx_action(void) { @@ -160,7 +196,17 @@ dev_kfree_skb(skb); skb = nskb; } - +#ifdef CONFIG_XEN_NETDEV_GRANT_RX +#ifdef DEBUG_GRANT + printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n", + netif->rx->req_prod, + netif->rx_req_cons, + netif->rx->ring[ + MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id, + netif->rx->ring[ + MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref); +#endif +#endif netif->rx_req_cons++; netif_get(netif); @@ -201,7 +247,11 @@ u16 size, id, evtchn; multicall_entry_t *mcl; mmu_update_t *mmu; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gnttab_op_t *gop; +#else struct mmuext_op *mmuext; +#endif unsigned long vdata, mdata, new_mfn; struct sk_buff_head rxq; struct sk_buff *skb; @@ -212,7 +262,12 @@ mcl = rx_mcl; mmu = rx_mmu; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gop = grant_rx_op; +#else mmuext = rx_mmuext; +#endif + while ( (skb = skb_dequeue(&rx_queue)) != NULL ) { netif = netdev_priv(skb->dev); @@ -228,7 +283,6 @@ skb_queue_head(&rx_queue, skb); break; } - /* * Set the new P2M table entry before reassigning the old data page. * Heed the comment in pgtable-2level.h:pte_page(). :-) @@ -241,6 +295,14 @@ mcl->args[2] = 0; mcl++; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gop->u.donate.mfn = mdata >> PAGE_SHIFT; + gop->u.donate.domid = netif->domid; + gop->u.donate.handle = netif->rx->ring[ + MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref; + netif->rx_resp_prod_copy++; + gop++; +#else mcl->op = __HYPERVISOR_mmuext_op; mcl->args[0] = (unsigned long)mmuext; mcl->args[1] = 1; @@ -251,13 +313,16 @@ mmuext->cmd = MMUEXT_REASSIGN_PAGE; mmuext->mfn = mdata >> PAGE_SHIFT; mmuext++; - +#endif mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu->val = __pa(vdata) >> PAGE_SHIFT; mmu++; __skb_queue_tail(&rxq, skb); +#ifdef DEBUG_GRANT + dump_packet('a', mdata, vdata); +#endif /* Filled the batch queue? */ if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) ) break; @@ -273,12 +338,24 @@ mcl->args[3] = DOMID_SELF; mcl++; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mcl[-2].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; +#else mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; +#endif if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) ) BUG(); mcl = rx_mcl; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate, + grant_rx_op, gop - grant_rx_op))) { + BUG(); + } + gop = grant_rx_op; +#else mmuext = rx_mmuext; +#endif while ( (skb = __skb_dequeue(&rxq)) != NULL ) { netif = netdev_priv(skb->dev); @@ -286,9 +363,12 @@ /* Rederive the machine addresses. */ new_mfn = mcl[0].args[1] >> PAGE_SHIFT; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mdata = (unsigned long)skb->data & ~PAGE_MASK; +#else mdata = ((mmuext[0].mfn << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK)); - +#endif atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->frag_list = NULL; @@ -302,13 +382,18 @@ /* Check the reassignment error code. */ status = NETIF_RSP_OKAY; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if (unlikely(gop->u.donate.status != 0)) { + BUG(); + } +#else if ( unlikely(mcl[1].result != 0) ) { DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid); free_mfn(mdata >> PAGE_SHIFT); status = NETIF_RSP_ERROR; } - +#endif evtchn = netif->evtchn; id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id; if ( make_rx_response(netif, id, status, mdata, @@ -321,9 +406,13 @@ netif_put(netif); dev_kfree_skb(skb); - +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mcl++; + gop++; +#else mcl += 2; mmuext += 1; +#endif } while ( notify_nr != 0 ) @@ -407,6 +496,7 @@ netif_schedule_work(netif); } +/* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { struct list_head *ent; @@ -415,13 +505,36 @@ netif_tx_request_t txreq; u16 pending_idx; NETIF_RING_IDX i; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gnttab_op_t *gop; +#else multicall_entry_t *mcl; +#endif PEND_RING_IDX dc, dp; unsigned int data_len; + if ( (dc = dealloc_cons) == (dp = dealloc_prod) ) goto skip_dealloc; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + /* + * Free up any grants we have finished using + */ + gop = grant_tx_op; + while (dc != dp) { + pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; + gop->u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx); + gop->u.unmap_grant_ref.dev_bus_addr = 0; + gop->u.unmap_grant_ref.handle = grant_tx_ref[pending_idx]; + grant_tx_ref[pending_idx] = GRANT_INVALID_REF; + gop++; + } + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + grant_tx_op, gop - grant_tx_op))) { + BUG(); + } +#else mcl = tx_mcl; while ( dc != dp ) { @@ -438,11 +551,14 @@ BUG(); mcl = tx_mcl; +#endif while ( dealloc_cons != dp ) { +#ifndef CONFIG_XEN_NETDEV_GRANT_TX /* The update_va_mapping() must not fail. */ if ( unlikely(mcl[0].result != 0) ) BUG(); +#endif pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; @@ -466,11 +582,17 @@ netif_put(netif); +#ifndef CONFIG_XEN_NETDEV_GRANT_TX mcl++; +#endif } skip_dealloc: +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gop = grant_tx_op; +#else mcl = tx_mcl; +#endif while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && !list_empty(&net_schedule_list) ) { @@ -492,7 +614,6 @@ rmb(); /* Ensure that we see the request before we copy it. */ memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, sizeof(txreq)); - /* Credit-based scheduling. */ if ( txreq.size > netif->remaining_credit ) { @@ -572,13 +693,20 @@ /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, 16); - +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gop->u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx); + gop->u.map_grant_ref.dom = netif->domid; + gop->u.map_grant_ref.ref = txreq.addr >> PAGE_SHIFT; + gop->u.map_grant_ref.flags = GNTMAP_host_map | GNTMAP_readonly; + gop++; +#else mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain; mcl[0].args[0] = MMAP_VADDR(pending_idx); mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL; mcl[0].args[2] = 0; mcl[0].args[3] = netif->domid; mcl++; +#endif memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); pending_tx_info[pending_idx].netif = netif; @@ -588,11 +716,26 @@ pending_cons++; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if ((gop - grant_tx_op) >= ARRAY_SIZE(grant_tx_op)) + break; +#else /* Filled the batch queue? */ if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) ) break; +#endif } +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (gop == grant_tx_op) { + return; + } + if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + grant_tx_op, gop - grant_tx_op))) { + BUG(); + } + gop = grant_tx_op; +#else if ( mcl == tx_mcl ) return; @@ -600,6 +743,7 @@ BUG(); mcl = tx_mcl; +#endif while ( (skb = __skb_dequeue(&tx_queue)) != NULL ) { pending_idx = *((u16 *)skb->data); @@ -607,6 +751,20 @@ memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); /* Check the remap error code. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (unlikely(gop->u.map_grant_ref.dev_bus_addr == 0)) { + printk(KERN_ALERT "#### netback grant fails\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + kfree_skb(skb); + gop++; + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + continue; + } + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = + FOREIGN_FRAME(gop->u.map_grant_ref.dev_bus_addr); + grant_tx_ref[pending_idx] = gop->u.map_grant_ref.handle; +#else if ( unlikely(mcl[0].result != 0) ) { DPRINTK("Bad page frame\n"); @@ -620,6 +778,7 @@ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT); +#endif data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size; @@ -627,7 +786,6 @@ memcpy(skb->data, (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)), data_len); - if ( data_len < txreq.size ) { /* Append the packet payload as a fragment. */ @@ -661,7 +819,11 @@ netif_rx(skb); netif->dev->last_rx = jiffies; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gop++; +#else mcl++; +#endif } } @@ -781,6 +943,12 @@ return 0; printk("Initialising Xen netif backend\n"); +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + printk("#### netback tx using grant tables\n"); +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + printk("#### netback rx using grant tables\n"); +#endif /* We can increase reservation by this much in net_rx_action(). */ balloon_update_driver_allowance(NETIF_RX_RING_SIZE); diff -Nru a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02 15:02:46 +01:00 +++ b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c 2005-06-02 15:02:46 +01:00 @@ -54,6 +54,25 @@ #include <asm/page.h> #include <asm/uaccess.h> +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +#include <asm-xen/xen-public/grant_table.h> +#include <asm-xen/gnttab.h> +#ifdef GRANT_DEBUG +static void +dump_packet(int tag, u32 addr, u32 ap) +{ + unsigned char *p = (unsigned char *)ap; + int i; + + printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr); + for (i = 0; i < 20; i++) { + printk("%02x", p[i]); + } + printk("\n"); +} +#endif +#endif + #ifndef __GFP_NOWARN #define __GFP_NOWARN 0 #endif @@ -82,6 +101,21 @@ #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */ #endif +#ifdef CONFIG_XEN_NETDEV_GRANT_TX +static grant_ref_t gref_tx_head, gref_tx_terminal; +static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1]; +#endif + +#ifdef CONFIG_XEN_NETDEV_GRANT_RX +static grant_ref_t gref_rx_head, gref_rx_terminal; +static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1]; +#endif + +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) +static domid_t rdomid = 0; +#define GRANT_INVALID_REF (0xFFFF) +#endif + static void network_tx_buf_gc(struct net_device *dev); static void network_alloc_rx_buffers(struct net_device *dev); @@ -322,6 +356,14 @@ for (i = np->tx_resp_cons; i != prod; i++) { id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id; skb = np->tx_skbs[id]; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) { + printk(KERN_ALERT "netfront: query foreign access\n"); + } + gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly); + gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]); + grant_tx_ref[id] = GRANT_INVALID_REF; +#endif ADD_ID_TO_FREELIST(np->tx_skbs, id); dev_kfree_skb_irq(skb); } @@ -356,6 +398,9 @@ struct sk_buff *skb; int i, batch_target; NETIF_RING_IDX req_prod = np->rx->req_prod; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + int ref; +#endif if (unlikely(np->backend_state != BEST_CONNECTED)) return; @@ -388,7 +433,16 @@ np->rx_skbs[id] = skb; np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id; - +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if ((ref = gnttab_claim_grant_reference(&gref_rx_head, gref_rx_terminal)) < 0) { + printk(KERN_ALERT "#### netfront can't claim rx reference\n"); + BUG(); + } + grant_rx_ref[id] = ref; + gnttab_grant_foreign_transfer_ref(ref, rdomid, + virt_to_machine(skb->head) >> PAGE_SHIFT); + np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref; +#endif rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT; /* Remove this page from pseudo phys map before passing back to Xen. */ @@ -438,6 +492,10 @@ struct net_private *np = netdev_priv(dev); netif_tx_request_t *tx; NETIF_RING_IDX i; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + unsigned int ref; + unsigned long mfn; +#endif if (unlikely(np->tx_full)) { printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); @@ -472,7 +530,18 @@ tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req; tx->id = id; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) < 0) { + printk(KERN_ALERT "#### netfront can't claim tx grant reference\n"); + BUG(); + } + mfn = virt_to_machine(skb->data) >> PAGE_SHIFT; + gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly); + tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK); + grant_tx_ref[id] = ref; +#else tx->addr = virt_to_machine(skb->data); +#endif tx->size = skb->len; tx->csum_blank = (skb->ip_summed == CHECKSUM_HW); @@ -532,6 +601,10 @@ int work_done, budget, more_to_do = 1; struct sk_buff_head rxq; unsigned long flags; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + unsigned long mfn; + grant_ref_t ref; +#endif spin_lock(&np->rx_lock); @@ -544,7 +617,6 @@ if ((budget = *pbudget) > dev->quota) budget = dev->quota; - rp = np->rx->resp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ @@ -552,7 +624,6 @@ (i != rp) && (work_done < budget); i++, work_done++) { rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp; - /* * An error here is very odd. Usually indicates a backend bug, * low-memory condition, or that we didn't have reservation headroom. @@ -567,11 +638,23 @@ continue; } +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + ref = grant_rx_ref[rx->id]; + grant_rx_ref[rx->id] = GRANT_INVALID_REF; + + mfn = gnttab_end_foreign_transfer(ref); + gnttab_release_grant_reference(&gref_rx_head, ref); +#endif + skb = np->rx_skbs[rx->id]; ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); /* NB. We handle skb overflow later. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + skb->data = skb->head + rx->addr; +#else skb->data = skb->head + (rx->addr & ~PAGE_MASK); +#endif skb->len = rx->status; skb->tail = skb->data + skb->len; @@ -582,18 +665,33 @@ np->stats.rx_bytes += rx->status; /* Remap the page. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE; +#else mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE; +#endif mmu->val = __pa(skb->head) >> PAGE_SHIFT; mmu++; mcl->op = __HYPERVISOR_update_va_mapping; mcl->args[0] = (unsigned long)skb->head; +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mcl->args[1] = (mfn << PAGE_SHIFT) | __PAGE_KERNEL; +#else mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL; +#endif mcl->args[2] = 0; mcl++; phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + mfn; +#else rx->addr >> PAGE_SHIFT; - +#endif +#ifdef GRANT_DEBUG + printk(KERN_ALERT "#### rx_poll enqueue vdata=%08x mfn=%08x ref=%04x\n", + skb->data, mfn, ref); +#endif __skb_queue_tail(&rxq, skb); } @@ -612,6 +710,11 @@ } while ((skb = __skb_dequeue(&rxq)) != NULL) { +#ifdef GRANT_DEBUG + printk(KERN_ALERT "#### rx_poll dequeue vdata=%08x mfn=%08x\n", + skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT); + dump_packet('d', skb->data, (unsigned long)skb->data); +#endif /* * Enough room in skbuff for the data we were passed? Also, Linux * expects at least 16 bytes headroom in each receive buffer. @@ -620,6 +723,7 @@ unlikely((skb->data - skb->head) < 16)) { nskb = NULL; + /* Only copy the packet if it fits in the current MTU. */ if (skb->len <= (dev->mtu + ETH_HLEN)) { if ((skb->tail > skb->end) && net_ratelimit()) @@ -650,7 +754,6 @@ /* Set the shared-info area, which is hidden behind the real data. */ init_skb_shinfo(skb); - /* Ethernet-specific work. Delayed to here as it peeks the header. */ skb->protocol = eth_type_trans(skb, dev); @@ -923,6 +1026,9 @@ network_connect(dev, status); np->evtchn = status->evtchn; np->irq = bind_evtchn_to_irq(np->evtchn); +#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX) + rdomid = status->domid; +#endif (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev); netctrl_connected_count(); (void)send_fake_arp(dev); @@ -966,10 +1072,18 @@ np->rx_max_target = RX_MAX_TARGET; /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for (i = 0; i <= NETIF_TX_RING_SIZE; i++) + for (i = 0; i <= NETIF_TX_RING_SIZE; i++) { np->tx_skbs[i] = (void *)(i+1); - for (i = 0; i <= NETIF_RX_RING_SIZE; i++) +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + grant_tx_ref[i] = GRANT_INVALID_REF; +#endif + } + for (i = 0; i <= NETIF_RX_RING_SIZE; i++) { np->rx_skbs[i] = (void *)(i+1); +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + grant_rx_ref[i] = GRANT_INVALID_REF; +#endif + } dev->open = network_open; dev->hard_start_xmit = network_start_xmit; @@ -1271,6 +1385,22 @@ if (xen_start_info.flags & SIF_INITDOMAIN) return 0; +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE, + &gref_tx_head, &gref_tx_terminal) < 0) { + printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); + return 1; + } + printk(KERN_ALERT "#### netfront tx using grant tables\n"); +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE, + &gref_rx_head, &gref_rx_terminal) < 0) { + printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); + return 1; + } + printk(KERN_ALERT "#### netfront rx using grant tables\n"); +#endif if ((err = xennet_proc_init()) != 0) return err; @@ -1290,6 +1420,16 @@ return err; } +static void netif_exit(void) +{ +#ifdef CONFIG_XEN_NETDEV_GRANT_TX + gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head); +#endif +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head); +#endif +} + static void vif_suspend(struct net_private *np) { /* Avoid having tx/rx stuff happen until we're ready. */ @@ -1482,3 +1622,4 @@ #endif module_init(netif_init); +module_exit(netif_exit); diff -Nru a/xen/common/grant_table.c b/xen/common/grant_table.c --- a/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00 +++ b/xen/common/grant_table.c 2005-06-02 15:02:46 +01:00 @@ -797,6 +797,146 @@ } #endif +static long +gnttab_donate(gnttab_op_t *uop, unsigned int count) +{ + struct domain *d = current->domain; + struct domain *e; + struct pfn_info *page; + u32 _d, _nd, x, y; + int i; + int result = GNTST_okay; + + for (i = 0; i < count; i++) { + gnttab_donate_t *gop = &uop[i].u.donate; +#if GRANT_DEBUG + printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n", + i, gop->mfn, gop->domid, gop->handle); +#endif + page = &frame_table[gop->mfn]; + + if (unlikely(IS_XEN_HEAP_FRAME(page))) { + printk("gnttab_donate: xen heap frame mfn=%08x\n", gop->mfn); + gop->status = GNTST_bad_virt_addr; + continue; + } + if (unlikely(!pfn_valid(page_to_pfn(page)))) { + printk("gnttab_donate: invalid pfn for mfn=%08x\n", gop->mfn); + gop->status = GNTST_bad_virt_addr; + continue; + } + if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) { + printk("gnttab_donate: can't find domain %d\n", gop->domid); + gop->status = GNTST_bad_domain; + continue; + } + + spin_lock(&d->page_alloc_lock); + + /* + * The tricky bit: atomically release ownership while + * there is just one benign reference to the page + * (PGC_allocated). If that reference disappears then the + * deallocation routine will safely spin. + */ + _d = pickle_domptr(d); + _nd = page->u.inuse._domain; + y = page->count_info; + do { + x = y; + if (unlikely((x & (PGC_count_mask|PGC_allocated)) != + (1 | PGC_allocated)) || unlikely(_nd != _d)) { + printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p," + " caf=%08x, taf=%08x\n", page_to_pfn(page), + d, d->id, unpickle_domptr(_nd), x, + page->u.inuse.type_info); + spin_unlock(&d->page_alloc_lock); + put_domain(e); + return 0; + } + __asm__ __volatile__( + LOCK_PREFIX "cmpxchg8b %2" + : "=d" (_nd), "=a" (y), + "=m" (*(volatile u64 *)(&page->count_info)) + : "0" (_d), "1" (x), "c" (NULL), "b" (x) ); + } while (unlikely(_nd != _d) || unlikely(y != x)); + + /* + * Unlink from 'd'. At least one reference remains (now + * anonymous), so noone else is spinning to try to delete + * this page from 'd'. + */ + d->tot_pages--; + list_del(&page->list); + + spin_unlock(&d->page_alloc_lock); + + spin_lock(&e->page_alloc_lock); + + /* + * Check that 'e' will accept the page and has reservation + * headroom. Also, a domain mustn't have PGC_allocated + * pages when it is dying. + */ +#ifdef GRANT_DEBUG + if (unlikely(e->tot_pages >= e->max_pages)) { + printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n", + e->tot_pages, e->max_pages); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } + if (unlikely(test_bit(DF_DYING, &e->d_flags))) { + printk("gnttab_donate: target domain is dying\n"); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } + if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) { + printk("gnttab_donate: gnttab_prepare_for_transfer fails\n"); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } +#else + ASSERT(e->tot_pages <= e->max_pages); + if (unlikely(test_bit(DF_DYING, &e->d_flags)) || + unlikely(e->tot_pages == e->max_pages) || + unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) { + printk("gnttab_donate: Transferee has no reservation headroom (%d,%d), or " + "provided a bad grant ref (%08x), or is dying (%p).\n", + e->tot_pages, e->max_pages, gop->handle, e->d_flags); + spin_unlock(&e->page_alloc_lock); + put_domain(e); + result = GNTST_general_error; + break; + } +#endif + /* Okay, add the page to 'e'. */ + if (unlikely(e->tot_pages++ == 0)) { + get_knownalive_domain(e); + } + list_add_tail(&page->list, &e->page_list); + page_set_owner(page, e); + + spin_unlock(&e->page_alloc_lock); + + /* + * Transfer is all done: tell the guest about its new page + * frame. + */ + gnttab_notify_transfer(e, d, gop->handle, gop->mfn); + + put_domain(e); + + gop->status = GNTST_okay; + } + return result; +} + long do_grant_table_op( unsigned int cmd, void *uop, unsigned int count) @@ -831,6 +971,13 @@ rc = gnttab_dump_table((gnttab_dump_table_t *)uop); break; #endif + case GNTTABOP_donate: + if (unlikely(!array_access_ok(VERIFY_WRITE, uop, count, + sizeof(gnttab_op_t)))) { + goto out; + } + rc = gnttab_donate(uop, count); + break; default: rc = -ENOSYS; break; @@ -1066,6 +1213,10 @@ } sha->frame = __mfn_to_gpfn(rd, frame); sha->domid = rd->domain_id; +#ifdef GRANT_DEBUG + printk("gnttab_notify: ref=%08x src=%08x dest=%08x mfn=%08x\n", + ref, frame, pfn, sha->frame); +#endif wmb(); sha->flags = ( GTF_accept_transfer | GTF_transfer_completed ); diff -Nru a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h --- a/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00 +++ b/xen/include/public/grant_table.h 2005-06-02 15:02:46 +01:00 @@ -220,6 +220,19 @@ s16 status; /* 2: GNTST_* */ } PACKED gnttab_dump_table_t; /* 4 bytes */ +/* + * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain. The + * foreign domain has previously registered the details of the transfer. + * These can be identified from <handle>, a grant reference. + */ +#define GNTTABOP_donate 4 +typedef struct { + memory_t mfn; /* 0 */ + domid_t domid; /* 4 */ + u16 handle; /* 8 */ + s16 status; /* 10: GNTST_* */ + u32 __pad; +} PACKED gnttab_donate_t; /* 14 bytes */ /* * Bitfield values for update_pin_status.flags. @@ -273,6 +286,7 @@ gnttab_unmap_grant_ref_t unmap_grant_ref; gnttab_setup_table_t setup_table; gnttab_dump_table_t dump_table; + gnttab_donate_t donate; u8 __dummy[24]; } PACKED u; } PACKED gnttab_op_t; /* 32 bytes */ diff -Nru a/xen/include/public/io/netif.h b/xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00 +++ b/xen/include/public/io/netif.h 2005-06-02 15:02:46 +01:00 @@ -25,10 +25,13 @@ typedef struct { u16 id; /* 0: Echoed in response message. */ +#ifdef CONFIG_XEN_NETDEV_GRANT_RX + grant_ref_t gref; /* 2: Reference to incoming granted frame */ +#endif } PACKED netif_rx_request_t; /* 2 bytes */ typedef struct { - memory_t addr; /* 0: Machine address of packet. */ + u32 addr; /* 0: Offset in page of start of received packet */ MEMORY_PADDING; u16 csum_valid:1; /* Protocol checksum is validated? */ u16 id:15; /* 8: */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |