[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] add grant table capabilities to netback and netfront.



# HG changeset patch
# User vh249@xxxxxxxxxxxxxxxxxxxx
# Node ID 7bc4ebdd56605b550cc10360dd8f748f95252f47
# Parent  a49bf96419a421637aedf01735141fb207fb43f0

add grant table capabilities to netback and netfront.

Signed-off-by: Vincent Hanquez <vincent@xxxxxxxxxxxxx>

diff -r a49bf96419a4 -r 7bc4ebdd5660 xen/common/grant_table.c
--- a/xen/common/grant_table.c  Mon Jul 25 09:56:50 2005
+++ b/xen/common/grant_table.c  Mon Jul 25 17:07:31 2005
@@ -809,6 +809,146 @@
 }
 #endif
 
+static long
+gnttab_donate(gnttab_donate_t *uop, unsigned int count)
+{
+    struct domain *d = current->domain;
+    struct domain *e;
+    struct pfn_info *page;
+    u32 _d, _nd, x, y;
+    int i;
+    int result = GNTST_okay;
+
+    for (i = 0; i < count; i++) {
+        gnttab_donate_t *gop = &uop[i];
+#if GRANT_DEBUG
+        printk("gnttab_donate: i=%d mfn=%08x domid=%d gref=%08x\n",
+               i, gop->mfn, gop->domid, gop->handle);
+#endif
+        page = &frame_table[gop->mfn];
+
+        if (unlikely(IS_XEN_HEAP_FRAME(page))) { 
+            printk("gnttab_donate: xen heap frame mfn=%lx\n", (unsigned long) 
gop->mfn);
+            gop->status = GNTST_bad_virt_addr;
+            continue;
+        }
+        if (unlikely(!pfn_valid(page_to_pfn(page)))) {
+            printk("gnttab_donate: invalid pfn for mfn=%lx\n", (unsigned long) 
gop->mfn);
+            gop->status = GNTST_bad_virt_addr;
+            continue;
+        }
+        if (unlikely((e = find_domain_by_id(gop->domid)) == NULL)) {
+            printk("gnttab_donate: can't find domain %d\n", gop->domid);
+            gop->status = GNTST_bad_domain;
+            continue;
+        }
+
+        spin_lock(&d->page_alloc_lock);
+
+        /*
+         * The tricky bit: atomically release ownership while
+         * there is just one benign reference to the page
+         * (PGC_allocated). If that reference disappears then the
+         * deallocation routine will safely spin.
+         */
+        _d  = pickle_domptr(d);
+        _nd = page->u.inuse._domain;
+        y   = page->count_info;
+        do {
+            x = y;
+            if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
+                         (1 | PGC_allocated)) || unlikely(_nd != _d)) {
+                printk("gnttab_donate: Bad page values %p: ed=%p(%u), sd=%p,"
+                        " caf=%08x, taf=%08x\n", (void *) page_to_pfn(page),
+                        d, d->domain_id, unpickle_domptr(_nd), x, 
+                        page->u.inuse.type_info);
+                spin_unlock(&d->page_alloc_lock);
+                put_domain(e);
+                return 0;
+            }
+            __asm__ __volatile__(
+                LOCK_PREFIX "cmpxchg8b %2"
+                : "=d" (_nd), "=a" (y),
+                "=m" (*(volatile u64 *)(&page->count_info))
+                : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
+        } while (unlikely(_nd != _d) || unlikely(y != x));
+
+        /*
+         * Unlink from 'd'. At least one reference remains (now
+         * anonymous), so noone else is spinning to try to delete
+         * this page from 'd'.
+         */
+        d->tot_pages--;
+        list_del(&page->list);
+
+        spin_unlock(&d->page_alloc_lock);
+
+        spin_lock(&e->page_alloc_lock);
+
+        /*
+         * Check that 'e' will accept the page and has reservation
+         * headroom.  Also, a domain mustn't have PGC_allocated
+         * pages when it is dying.
+         */
+#ifdef GRANT_DEBUG
+        if (unlikely(e->tot_pages >= e->max_pages)) {
+            printk("gnttab_dontate: no headroom tot_pages=%d max_pages=%d\n",
+                   e->tot_pages, e->max_pages);
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+        if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags))) {
+            printk("gnttab_donate: target domain is dying\n");
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+        if (unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+            printk("gnttab_donate: gnttab_prepare_for_transfer fails\n");
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+#else
+        ASSERT(e->tot_pages <= e->max_pages);
+        if (unlikely(test_bit(DOMFLAGS_DYING, &e->domain_flags)) ||
+            unlikely(e->tot_pages == e->max_pages) ||
+            unlikely(!gnttab_prepare_for_transfer(e, d, gop->handle))) {
+            printk("gnttab_donate: Transferee has no reservation headroom 
(%d,%d), or "
+                    "provided a bad grant ref (%08x), or is dying (%p).\n",
+                    e->tot_pages, e->max_pages, gop->handle, e->d_flags);
+            spin_unlock(&e->page_alloc_lock);
+            put_domain(e);
+            result = GNTST_general_error;
+            break;
+        }
+#endif
+        /* Okay, add the page to 'e'. */
+        if (unlikely(e->tot_pages++ == 0)) {
+            get_knownalive_domain(e);
+        }
+        list_add_tail(&page->list, &e->page_list);
+        page_set_owner(page, e);
+
+        spin_unlock(&e->page_alloc_lock);
+
+        /*
+         * Transfer is all done: tell the guest about its new page
+         * frame.
+         */
+        gnttab_notify_transfer(e, d, gop->handle, gop->mfn);
+        
+        put_domain(e);
+
+        gop->status = GNTST_okay;
+    }
+    return result;
+}
+
 long 
 do_grant_table_op(
     unsigned int cmd, void *uop, unsigned int count)
@@ -843,6 +983,11 @@
         rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
         break;
 #endif
+    case GNTTABOP_donate:
+        if (unlikely(!array_access_ok(uop, count, sizeof(gnttab_donate_t))))
+            goto out;
+        rc = gnttab_donate(uop, count);
+        break;
     default:
         rc = -ENOSYS;
         break;
diff -r a49bf96419a4 -r 7bc4ebdd5660 xen/include/public/grant_table.h
--- a/xen/include/public/grant_table.h  Mon Jul 25 09:56:50 2005
+++ b/xen/include/public/grant_table.h  Mon Jul 25 17:07:31 2005
@@ -213,6 +213,19 @@
     s16         status;               /* GNTST_* */
 } gnttab_dump_table_t;
 
+/*
+ * GNTTABOP_donate_grant_ref: Donate <frame> to a foreign domain.  The
+ * foreign domain has previously registered the details of the transfer.
+ * These can be identified from <handle>, a grant reference.
+ */
+#define GNTTABOP_donate                4
+typedef struct {
+    memory_t    mfn;                 /*  0 */
+    domid_t     domid;               /*  4 */
+    u16         handle;               /*  8 */
+    s16         status;               /*  10: GNTST_* */
+    u32         __pad;
+} gnttab_donate_t;           /*  14 bytes */
 
 /*
  * Bitfield values for update_pin_status.flags.
diff -r a49bf96419a4 -r 7bc4ebdd5660 linux-2.6-xen-sparse/arch/xen/Kconfig
--- a/linux-2.6-xen-sparse/arch/xen/Kconfig     Mon Jul 25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/Kconfig     Mon Jul 25 17:07:31 2005
@@ -96,6 +96,20 @@
          network interfaces within another guest OS. Unless you are building a
          dedicated device-driver domain, or your master control domain
          (domain 0), then you almost certainly want to say Y here.
+
+config XEN_NETDEV_GRANT_TX
+        bool "Grant table substrate for net drivers tx path (DANGEROUS)"
+        default n
+        help
+          This introduces the use of grant tables as a data exhange mechanism
+          between the frontend and backend network drivers.
+
+config XEN_NETDEV_GRANT_RX
+        bool "Grant table substrate for net drivers rx path (DANGEROUS)"
+        default n
+        help
+          This introduces the use of grant tables as a data exhange mechanism
+          between the frontend and backend network drivers.
 
 config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
        bool "Pipelined transmitter (DANGEROUS)"
diff -r a49bf96419a4 -r 7bc4ebdd5660 
linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Mon Jul 
25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32       Mon Jul 
25 17:07:31 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.12-xen0
-# Sat Jul  9 09:19:47 2005
+# Mon Jul 25 09:48:34 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -18,6 +18,8 @@
 CONFIG_XEN_NETDEV_BACKEND=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_GRANT_TX is not set
+# CONFIG_XEN_NETDEV_GRANT_RX is not set
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set
@@ -176,38 +178,12 @@
 # PCI Hotplug Support
 #
 # CONFIG_HOTPLUG_PCI is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_PREEMPT is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-# CONFIG_DEBUG_HIGHMEM is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-# CONFIG_DEBUG_INFO is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_FRAME_POINTER is not set
-CONFIG_EARLY_PRINTK=y
-# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
-# CONFIG_DEBUG_STACK_USAGE is not set
-# CONFIG_DEBUG_PAGEALLOC is not set
-# CONFIG_4KSTACKS is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
-CONFIG_X86_MPPARSE=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_X86_BIOS_REBOOT=y
 CONFIG_PC=y
 CONFIG_SECCOMP=y
+CONFIG_EARLY_PRINTK=y
 
 #
 # Executable file formats
@@ -1274,3 +1250,29 @@
 CONFIG_CRC32=y
 CONFIG_LIBCRC32C=y
 CONFIG_ZLIB_INFLATE=y
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_4KSTACKS is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
diff -r a49bf96419a4 -r 7bc4ebdd5660 
linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Mon Jul 25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c     Mon Jul 25 17:07:31 2005
@@ -166,8 +166,14 @@
     u16           flags;
 
     flags = shared[ref].flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    /*
+     * But can't flags == (GTF_accept_transfer | GTF_transfer_completed)
+     * if gnttab_donate executes without interruption???
+     */
+#else
     ASSERT(flags == (GTF_accept_transfer | GTF_transfer_committed));
-
+#endif
     /*
      * If a transfer is committed then wait for the frame address to appear.
      * Otherwise invalidate the grant entry against future use.
diff -r a49bf96419a4 -r 7bc4ebdd5660 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jul 25 
09:56:50 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Mon Jul 25 
17:07:31 2005
@@ -18,6 +18,24 @@
 #include <linux/delay.h>
 #endif
 
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, unsigned char *p)
+{
+       int i;
+
+       printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
+       for (i = 0; i < 20; i++) {
+               printk("%02x", p[i]);
+       }
+       printk("\n");
+}
+#endif
+#endif
+
 static void netif_idx_release(u16 pending_idx);
 static void netif_page_release(struct page *page);
 static void make_tx_response(netif_t *netif, 
@@ -41,7 +59,9 @@
 static struct sk_buff_head rx_queue;
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
+#endif
 static unsigned char rx_notify[NR_EVENT_CHANNELS];
 
 /* Don't currently gate addition of an interface to the tx scheduling list. */
@@ -68,7 +88,20 @@
 static PEND_RING_IDX dealloc_prod, dealloc_cons;
 
 static struct sk_buff_head tx_queue;
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static u16 grant_tx_ref[MAX_PENDING_REQS];
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
+#endif
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#define GRANT_INVALID_REF (0xFFFF)
+#endif
 
 static struct list_head net_schedule_list;
 static spinlock_t net_schedule_list_lock;
@@ -91,6 +124,7 @@
     return mfn;
 }
 
+#ifndef CONFIG_XEN_NETDEV_GRANT_RX
 static void free_mfn(unsigned long mfn)
 {
     unsigned long flags;
@@ -102,6 +136,7 @@
         BUG();
     spin_unlock_irqrestore(&mfn_lock, flags);
 }
+#endif
 
 static inline void maybe_schedule_tx_action(void)
 {
@@ -160,7 +195,17 @@
         dev_kfree_skb(skb);
         skb = nskb;
     }
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+#ifdef DEBUG_GRANT
+    printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x 
gr=%04x\n",
+           netif->rx->req_prod,
+           netif->rx_req_cons,
+           netif->rx->ring[
+                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
+           netif->rx->ring[
+                  MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
+#endif
+#endif
     netif->rx_req_cons++;
     netif_get(netif);
 
@@ -201,7 +246,11 @@
     u16 size, id, evtchn;
     multicall_entry_t *mcl;
     mmu_update_t *mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gnttab_donate_t *gop;
+#else
     struct mmuext_op *mmuext;
+#endif
     unsigned long vdata, mdata, new_mfn;
     struct sk_buff_head rxq;
     struct sk_buff *skb;
@@ -212,7 +261,12 @@
 
     mcl = rx_mcl;
     mmu = rx_mmu;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gop = grant_rx_op;
+#else
     mmuext = rx_mmuext;
+#endif
+
     while ( (skb = skb_dequeue(&rx_queue)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -228,7 +282,6 @@
             skb_queue_head(&rx_queue, skb);
             break;
         }
-
         /*
          * Set the new P2M table entry before reassigning the old data page.
          * Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -239,6 +292,14 @@
                                pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
         mcl++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        gop->mfn = mdata >> PAGE_SHIFT;
+        gop->domid = netif->domid;
+        gop->handle = netif->rx->ring[
+        MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
+        netif->rx_resp_prod_copy++;
+        gop++;
+#else
         mcl->op = __HYPERVISOR_mmuext_op;
         mcl->args[0] = (unsigned long)mmuext;
         mcl->args[1] = 1;
@@ -249,13 +310,16 @@
         mmuext->cmd = MMUEXT_REASSIGN_PAGE;
         mmuext->mfn = mdata >> PAGE_SHIFT;
         mmuext++;
-
+#endif
         mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
         mmu->val = __pa(vdata) >> PAGE_SHIFT;  
         mmu++;
 
         __skb_queue_tail(&rxq, skb);
 
+#ifdef DEBUG_GRANT
+        dump_packet('a', mdata, vdata);
+#endif
         /* Filled the batch queue? */
         if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
             break;
@@ -271,12 +335,24 @@
     mcl->args[3] = DOMID_SELF;
     mcl++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+#else
     mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+#endif
     if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
         BUG();
 
     mcl = rx_mcl;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
+                                           grant_rx_op, gop - grant_rx_op))) {
+        BUG();
+    }
+    gop = grant_rx_op;
+#else
     mmuext = rx_mmuext;
+#endif
     while ( (skb = __skb_dequeue(&rxq)) != NULL )
     {
         netif   = netdev_priv(skb->dev);
@@ -284,9 +360,12 @@
 
         /* Rederive the machine addresses. */
         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mdata = (unsigned long)skb->data & ~PAGE_MASK;
+#else
         mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
                    ((unsigned long)skb->data & ~PAGE_MASK));
-        
+#endif
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags = 0;
         skb_shinfo(skb)->frag_list = NULL;
@@ -299,13 +378,16 @@
 
         /* Check the reassignment error code. */
         status = NETIF_RSP_OKAY;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        BUG_ON(gop->status != 0);
+#else
         if ( unlikely(mcl[1].result != 0) )
         {
             DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
             free_mfn(mdata >> PAGE_SHIFT);
             status = NETIF_RSP_ERROR;
         }
-
+#endif
         evtchn = netif->evtchn;
         id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
         if ( make_rx_response(netif, id, status, mdata,
@@ -318,9 +400,13 @@
 
         netif_put(netif);
         dev_kfree_skb(skb);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mcl++;
+        gop++;
+#else
         mcl += 2;
         mmuext += 1;
+#endif
     }
 
     while ( notify_nr != 0 )
@@ -404,6 +490,7 @@
     netif_schedule_work(netif);
 }
 
+/* Called after netfront has transmitted */
 static void net_tx_action(unsigned long unused)
 {
     struct list_head *ent;
@@ -412,13 +499,40 @@
     netif_tx_request_t txreq;
     u16 pending_idx;
     NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gnttab_unmap_grant_ref_t unmap_ops[MAX_PENDING_REQS];
+    gnttab_unmap_grant_ref_t *gop;
+
+    gnttab_map_grant_ref_t map_ops[MAX_PENDING_REQS];
+    gnttab_map_grant_ref_t *mop;
+#else
     multicall_entry_t *mcl;
+#endif
     PEND_RING_IDX dc, dp;
     unsigned int data_len;
 
+
     if ( (dc = dealloc_cons) == (dp = dealloc_prod) )
         goto skip_dealloc;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    /*
+     * Free up any grants we have finished using
+     */
+    gop = unmap_ops;
+    while (dc != dp) {
+        pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+        gop->host_virt_addr = MMAP_VADDR(pending_idx);
+        gop->dev_bus_addr = 0;
+        gop->handle = grant_tx_ref[pending_idx];
+        grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
+        gop++;
+    }
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                           unmap_ops, gop - unmap_ops))) {
+        BUG();
+    }
+#else
     mcl = tx_mcl;
     while ( dc != dp )
     {
@@ -433,10 +547,13 @@
         BUG();
 
     mcl = tx_mcl;
+#endif
     while ( dealloc_cons != dp )
     {
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
         /* The update_va_mapping() must not fail. */
         BUG_ON(mcl[0].result != 0);
+#endif
 
         pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
 
@@ -460,11 +577,17 @@
         
         netif_put(netif);
 
+#ifndef CONFIG_XEN_NETDEV_GRANT_TX
         mcl++;
+#endif
     }
 
  skip_dealloc:
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    mop = map_ops;
+#else
     mcl = tx_mcl;
+#endif
     while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
             !list_empty(&net_schedule_list) )
     {
@@ -486,7 +609,6 @@
         rmb(); /* Ensure that we see the request before we copy it. */
         memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, 
                sizeof(txreq));
-
         /* Credit-based scheduling. */
         if ( txreq.size > netif->remaining_credit )
         {
@@ -566,13 +688,20 @@
 
         /* Packets passed to netif_rx() must have some headroom. */
         skb_reserve(skb, 16);
-
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        mop->host_virt_addr = MMAP_VADDR(pending_idx);
+        mop->dom = netif->domid;
+        mop->ref = txreq.addr >> PAGE_SHIFT;
+        mop->flags = GNTMAP_host_map | GNTMAP_readonly;
+        mop++;
+#else
        MULTI_update_va_mapping_otherdomain(
            mcl, MMAP_VADDR(pending_idx),
            pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
            0, netif->domid);
 
         mcl++;
+#endif
 
         memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
         pending_tx_info[pending_idx].netif = netif;
@@ -582,11 +711,26 @@
 
         pending_cons++;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        if ((mop - map_ops) >= ARRAY_SIZE(map_ops))
+            break;
+#else
         /* Filled the batch queue? */
         if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
             break;
-    }
-
+#endif
+    }
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if (mop == map_ops) {
+        return;
+    }
+    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+                                           map_ops, mop - map_ops))) {
+        BUG();
+    }
+    mop = map_ops;
+#else
     if ( mcl == tx_mcl )
         return;
 
@@ -594,6 +738,7 @@
         BUG();
 
     mcl = tx_mcl;
+#endif
     while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
     {
         pending_idx = *((u16 *)skb->data);
@@ -601,6 +746,20 @@
         memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
 
         /* Check the remap error code. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        if (unlikely(mop->dev_bus_addr == 0)) {
+            printk(KERN_ALERT "#### netback grant fails\n");
+            make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+            netif_put(netif);
+            kfree_skb(skb);
+            mop++;
+            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+            continue;
+        }
+        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+                             FOREIGN_FRAME(mop->dev_bus_addr);
+        grant_tx_ref[pending_idx] = mop->handle;
+#else
         if ( unlikely(mcl[0].result != 0) )
         {
             DPRINTK("Bad page frame\n");
@@ -614,6 +773,7 @@
 
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
             FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
+#endif
 
         data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
 
@@ -621,7 +781,6 @@
         memcpy(skb->data, 
                (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
                data_len);
-
         if ( data_len < txreq.size )
         {
             /* Append the packet payload as a fragment. */
@@ -655,7 +814,11 @@
         netif_rx(skb);
         netif->dev->last_rx = jiffies;
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        mop++;
+#else
         mcl++;
+#endif
     }
 }
 
@@ -775,6 +938,12 @@
         return 0;
 
     printk("Initialising Xen netif backend\n");
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    printk("#### netback tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    printk("#### netback rx using grant tables\n");
+#endif
 
     /* We can increase reservation by this much in net_rx_action(). */
     balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
diff -r a49bf96419a4 -r 7bc4ebdd5660 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jul 25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Jul 25 17:07:31 2005
@@ -50,6 +50,9 @@
     /* Private indexes into shared ring. */
     NETIF_RING_IDX rx_req_cons;
     NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    NETIF_RING_IDX rx_resp_prod_copy; /* private version of shared variable */
+#endif
     NETIF_RING_IDX tx_req_cons;
     NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
 
diff -r a49bf96419a4 -r 7bc4ebdd5660 xen/include/public/io/netif.h
--- a/xen/include/public/io/netif.h     Mon Jul 25 09:56:50 2005
+++ b/xen/include/public/io/netif.h     Mon Jul 25 17:07:31 2005
@@ -23,10 +23,17 @@
 
 typedef struct {
     u16       id;    /* Echoed in response message.        */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    grant_ref_t gref;  /* 2: Reference to incoming granted frame */
+#endif
 } netif_rx_request_t;
 
 typedef struct {
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    u32      addr;   /*  0: Offset in page of start of received packet  */
+#else
     memory_t addr;   /* Machine address of packet.              */
+#endif
     u16      csum_valid:1; /* Protocol checksum is validated?       */
     u16      id:15;
     s16      status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
diff -r a49bf96419a4 -r 7bc4ebdd5660 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Jul 25 
09:56:50 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Mon Jul 25 
17:07:31 2005
@@ -54,6 +54,25 @@
 #include <asm/page.h>
 #include <asm/uaccess.h>
 
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#ifdef GRANT_DEBUG
+static void
+dump_packet(int tag, u32 addr, u32 ap)
+{
+    unsigned char *p = (unsigned char *)ap;
+    int i;
+    
+    printk(KERN_ALERT "#### rx_poll   %c %08x ", tag & 0xff, addr);
+    for (i = 0; i < 20; i++) {
+        printk("%02x", p[i]);
+    }
+    printk("\n");
+}
+#endif
+#endif
+
 #ifndef __GFP_NOWARN
 #define __GFP_NOWARN 0
 #endif
@@ -82,6 +101,21 @@
 #define TX_TEST_IDX req_cons  /* conservative: not seen all our requests? */
 #endif
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+static grant_ref_t gref_tx_head, gref_tx_terminal;
+static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
+#endif
+
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+static grant_ref_t gref_rx_head, gref_rx_terminal;
+static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
+#endif
+
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+static domid_t rdomid = 0;
+#define GRANT_INVALID_REF      (0xFFFF)
+#endif
+
 static void network_tx_buf_gc(struct net_device *dev);
 static void network_alloc_rx_buffers(struct net_device *dev);
 
@@ -322,6 +356,14 @@
         for (i = np->tx_resp_cons; i != prod; i++) {
             id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
             skb = np->tx_skbs[id];
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+            if (gnttab_query_foreign_access(grant_tx_ref[id]) != 0) {
+                printk(KERN_ALERT "netfront: query foreign access\n");
+            }
+            gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
+            gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
+            grant_tx_ref[id] = GRANT_INVALID_REF;
+#endif
             ADD_ID_TO_FREELIST(np->tx_skbs, id);
             dev_kfree_skb_irq(skb);
         }
@@ -356,6 +398,9 @@
     struct sk_buff *skb;
     int i, batch_target;
     NETIF_RING_IDX req_prod = np->rx->req_prod;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    int ref;
+#endif
 
     if (unlikely(np->backend_state != BEST_CONNECTED))
         return;
@@ -388,7 +433,16 @@
         np->rx_skbs[id] = skb;
         
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
-        
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        if ((ref = gnttab_claim_grant_reference(&gref_rx_head, 
gref_rx_terminal)) < 0) {
+            printk(KERN_ALERT "#### netfront can't claim rx reference\n");
+            BUG();
+        }
+        grant_rx_ref[id] = ref;
+        gnttab_grant_foreign_transfer_ref(ref, rdomid,
+        virt_to_machine(skb->head) >> PAGE_SHIFT);
+        np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
+#endif
         rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
 
        /* Remove this page from pseudo phys map before passing back to Xen. */
@@ -436,6 +490,10 @@
     struct net_private *np = netdev_priv(dev);
     netif_tx_request_t *tx;
     NETIF_RING_IDX i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    unsigned int ref;
+    unsigned long mfn;
+#endif
 
     if (unlikely(np->tx_full)) {
         printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
@@ -470,7 +528,18 @@
     tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
 
     tx->id   = id;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if ((ref = gnttab_claim_grant_reference(&gref_tx_head, gref_tx_terminal)) 
< 0) {
+        printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
+        BUG();
+    }
+    mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+    gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
+    tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+    grant_tx_ref[id] = ref;
+#else
     tx->addr = virt_to_machine(skb->data);
+#endif
     tx->size = skb->len;
     tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
 
@@ -530,6 +599,10 @@
     int work_done, budget, more_to_do = 1;
     struct sk_buff_head rxq;
     unsigned long flags;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    unsigned long mfn;
+    grant_ref_t ref;
+#endif
 
     spin_lock(&np->rx_lock);
 
@@ -542,7 +615,6 @@
 
     if ((budget = *pbudget) > dev->quota)
         budget = dev->quota;
-
     rp = np->rx->resp_prod;
     rmb(); /* Ensure we see queued responses up to 'rp'. */
 
@@ -550,7 +622,6 @@
                    (i != rp) && (work_done < budget);
                    i++, work_done++) {
         rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
-
         /*
          * An error here is very odd. Usually indicates a backend bug,
          * low-memory condition, or that we didn't have reservation headroom.
@@ -565,11 +636,23 @@
             continue;
         }
 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        ref = grant_rx_ref[rx->id];
+        grant_rx_ref[rx->id] = GRANT_INVALID_REF;
+
+        mfn = gnttab_end_foreign_transfer(ref);
+        gnttab_release_grant_reference(&gref_rx_head, ref);
+#endif
+
         skb = np->rx_skbs[rx->id];
         ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
 
         /* NB. We handle skb overflow later. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        skb->data = skb->head + rx->addr;
+#else
         skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+#endif
         skb->len  = rx->status;
         skb->tail = skb->data + skb->len;
 
@@ -580,16 +663,32 @@
         np->stats.rx_bytes += rx->status;
 
         /* Remap the page. */
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
+#else
         mmu->ptr  = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+#endif
         mmu->val  = __pa(skb->head) >> PAGE_SHIFT;
         mmu++;
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+       MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
+                               pfn_pte_ma(mfn, PAGE_KERNEL), 0);
+#else
        MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
                                pfn_pte_ma(rx->addr >> PAGE_SHIFT, 
PAGE_KERNEL), 0);
+#endif
         mcl++;
 
         phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = 
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+            mfn;
+#else
             rx->addr >> PAGE_SHIFT;
-
+#endif
+#ifdef GRANT_DEBUG
+        printk(KERN_ALERT "#### rx_poll     enqueue vdata=%08x mfn=%08x 
ref=%04x\n",
+               skb->data, mfn, ref);
+#endif
         __skb_queue_tail(&rxq, skb);
     }
 
@@ -608,6 +707,11 @@
     }
 
     while ((skb = __skb_dequeue(&rxq)) != NULL) {
+#ifdef GRANT_DEBUG
+         printk(KERN_ALERT "#### rx_poll     dequeue vdata=%08x mfn=%08x\n",
+                skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+         dump_packet('d', skb->data, (unsigned long)skb->data);
+#endif
         /*
          * Enough room in skbuff for the data we were passed? Also, Linux 
          * expects at least 16 bytes headroom in each receive buffer.
@@ -615,6 +719,7 @@
         if (unlikely(skb->tail > skb->end) || 
                        unlikely((skb->data - skb->head) < 16)) {
             nskb = NULL;
+
 
             /* Only copy the packet if it fits in the current MTU. */
             if (skb->len <= (dev->mtu + ETH_HLEN)) {
@@ -646,7 +751,6 @@
         
         /* Set the shared-info area, which is hidden behind the real data. */
         init_skb_shinfo(skb);
-
         /* Ethernet-specific work. Delayed to here as it peeks the header. */
         skb->protocol = eth_type_trans(skb, dev);
 
@@ -919,6 +1023,9 @@
     network_connect(dev, status);
     np->evtchn = status->evtchn;
     np->irq = bind_evtchn_to_irq(np->evtchn);
+#if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
+    rdomid = status->domid;
+#endif
     (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
     netctrl_connected_count();
     (void)send_fake_arp(dev);
@@ -962,10 +1069,18 @@
     np->rx_max_target = RX_MAX_TARGET;
 
     /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
-    for (i = 0; i <= NETIF_TX_RING_SIZE; i++)
+    for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
         np->tx_skbs[i] = (void *)((unsigned long) i+1);
-    for (i = 0; i <= NETIF_RX_RING_SIZE; i++)
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+        grant_tx_ref[i] = GRANT_INVALID_REF;
+#endif
+    }
+    for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
         np->rx_skbs[i] = (void *)((unsigned long) i+1);
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+        grant_rx_ref[i] = GRANT_INVALID_REF;
+#endif
+    }
 
     dev->open            = network_open;
     dev->hard_start_xmit = network_start_xmit;
@@ -1267,6 +1382,22 @@
 
     if (xen_start_info.flags & SIF_INITDOMAIN)
         return 0;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
+                                      &gref_tx_head, &gref_tx_terminal) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "#### netfront tx using grant tables\n");
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
+                                      &gref_rx_head, &gref_rx_terminal) < 0) {
+        printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+        return 1;
+    }
+    printk(KERN_ALERT "#### netfront rx using grant tables\n");
+#endif
 
     if ((err = xennet_proc_init()) != 0)
         return err;
@@ -1284,6 +1415,16 @@
 
     DPRINTK("< err=%d\n", err);
     return err;
+}
+
+static void netif_exit(void)
+{
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+    gnttab_free_grant_references(NETIF_TX_RING_SIZE, gref_tx_head);
+#endif
+#ifdef CONFIG_XEN_NETDEV_GRANT_RX
+    gnttab_free_grant_references(NETIF_RX_RING_SIZE, gref_rx_head);
+#endif
 }
 
 static void vif_suspend(struct net_private *np)
@@ -1478,3 +1619,4 @@
 #endif
 
 module_init(netif_init);
+module_exit(netif_exit);
diff -r a49bf96419a4 -r 7bc4ebdd5660 
linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32
--- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Mon Jul 
25 09:56:50 2005
+++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32       Mon Jul 
25 17:07:31 2005
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.12-xenU
-# Sun Jul 10 17:32:04 2005
+# Mon Jul 25 10:06:06 2005
 #
 CONFIG_XEN=y
 CONFIG_ARCH_XEN=y
@@ -15,6 +15,8 @@
 CONFIG_XEN_BLKDEV_GRANT=y
 CONFIG_XEN_BLKDEV_FRONTEND=y
 CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_GRANT_TX is not set
+# CONFIG_XEN_NETDEV_GRANT_RX is not set
 # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
 # CONFIG_XEN_BLKDEV_TAP is not set
 # CONFIG_XEN_SHADOW_MODE is not set

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.