[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/3] xen-netback: reintroduce guest Rx stall detection



If a frontend not receiving packets it is useful to detect this and
turn off the carrier so packets are dropped early instead of being
queued and drained when they expire.

A to-guest queue is stalled if it doesn't have enough free slots for a
an extended period of time (default 60 s).

If at least one queue is stalled, the carrier is turned off (in the
expectation that the other queues will soon stall as well).  The
carrier is only turned on once all queues are ready.

When the frontend connects, all the queues start in the stalled state
and only become ready once the frontend queues enough Rx requests.

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 drivers/net/xen-netback/common.h    |    5 +++
 drivers/net/xen-netback/interface.c |    5 ++-
 drivers/net/xen-netback/netback.c   |   76 +++++++++++++++++++++++++++++++++++
 drivers/net/xen-netback/xenbus.c    |    1 +
 4 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index c264240..083ecc9 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -179,6 +179,8 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 
        unsigned int rx_queue_max;
        unsigned int rx_queue_len;
+       unsigned long last_rx_time;
+       bool stalled;
 
        struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
 
@@ -232,6 +234,9 @@ struct xenvif {
        /* Queues */
        struct xenvif_queue *queues;
        unsigned int num_queues; /* active queues, resource allocated */
+       unsigned int stalled_queues;
+
+       spinlock_t lock;
 
 #ifdef CONFIG_DEBUG_FS
        struct dentry *xenvif_dbg_root;
diff --git a/drivers/net/xen-netback/interface.c 
b/drivers/net/xen-netback/interface.c
index a134d52..895fe84 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -419,6 +419,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t 
domid,
        vif->queues = NULL;
        vif->num_queues = 0;
 
+       spin_lock_init(&vif->lock);
+
        dev->netdev_ops = &xenvif_netdev_ops;
        dev->hw_features = NETIF_F_SG |
                NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -505,7 +507,6 @@ void xenvif_carrier_on(struct xenvif *vif)
                dev_set_mtu(vif->dev, ETH_DATA_LEN);
        netdev_update_features(vif->dev);
        set_bit(VIF_STATUS_CONNECTED, &vif->status);
-       netif_carrier_on(vif->dev);
        if (netif_running(vif->dev))
                xenvif_up(vif);
        rtnl_unlock();
@@ -565,6 +566,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned 
long tx_ring_ref,
                disable_irq(queue->rx_irq);
        }
 
+       queue->stalled = true;
+
        task = kthread_create(xenvif_kthread_guest_rx,
                              (void *)queue, "%s-guest-rx", queue->name);
        if (IS_ERR(task)) {
diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index 15893dc..25f4c06 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -62,6 +62,13 @@ unsigned int rx_drain_timeout_msecs = 10000;
 module_param(rx_drain_timeout_msecs, uint, 0444);
 unsigned int rx_drain_timeout_jiffies;
 
+/* The length of time before the frontend is considered unresponsive
+ * because it isn't providing Rx slots.
+ */
+static unsigned int rx_stall_timeout_msecs = 60000;
+module_param(rx_stall_timeout_msecs, uint, 0444);
+static unsigned int rx_stall_timeout_jiffies;
+
 unsigned int xenvif_max_queues;
 module_param_named(max_queues, xenvif_max_queues, uint, 0644);
 MODULE_PARM_DESC(max_queues,
@@ -649,6 +656,8 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
                RING_IDX ring_slots_used;
                int i;
 
+               queue->last_rx_time = jiffies;
+
                /* We need a cheap worse case estimate for the number of
                 * slots we'll use.
                 */
@@ -1972,10 +1981,67 @@ err:
        return err;
 }
 
+static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
+{
+       struct xenvif *vif = queue->vif;
+
+       queue->stalled = true;
+
+       /* At least one queue has stalled? Disable the carrier. */
+       spin_lock(&vif->lock);
+       if (vif->stalled_queues++ == 0) {
+               netdev_info(vif->dev, "Guest Rx stalled");
+               netif_carrier_off(vif->dev);
+       }
+       spin_unlock(&vif->lock);
+}
+
+static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
+{
+       struct xenvif *vif = queue->vif;
+
+       queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
+       queue->stalled = false;
+
+       /* All queues are ready? Enable the carrier. */
+       spin_lock(&vif->lock);
+       if (--vif->stalled_queues == 0) {
+               netdev_info(vif->dev, "Guest Rx ready");
+               netif_carrier_on(vif->dev);
+       }
+       spin_unlock(&vif->lock);
+}
+
+static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
+{
+       RING_IDX prod, cons;
+
+       prod = queue->rx.sring->req_prod;
+       cons = queue->rx.req_cons;
+
+       return !queue->stalled
+               && prod - cons < XEN_NETBK_RX_SLOTS_MAX
+               && time_after(jiffies,
+                             queue->last_rx_time + rx_stall_timeout_jiffies);
+}
+ 
+static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+{
+       RING_IDX prod, cons;
+
+       prod = queue->rx.sring->req_prod;
+       cons = queue->rx.req_cons;
+
+       return queue->stalled
+               && prod - cons >= XEN_NETBK_RX_SLOTS_MAX;
+}
+
 static bool xenvif_have_rx_work(struct xenvif_queue *queue)
 {
        return (!skb_queue_empty(&queue->rx_queue)
                && xenvif_rx_ring_slots_available(queue, 
XEN_NETBK_RX_SLOTS_MAX))
+               || xenvif_rx_queue_stalled(queue)
+               || xenvif_rx_queue_ready(queue)
                || kthread_should_stop()
                || queue->vif->disabled;
 }
@@ -2050,6 +2116,15 @@ int xenvif_kthread_guest_rx(void *data)
                if (!skb_queue_empty(&queue->rx_queue))
                        xenvif_rx_action(queue);
 
+               /* If the guest hasn't provided any Rx slots for a
+                * while it's probably not responsive, drop the
+                * carrier so packets are dropped earlier.
+                */
+               if (xenvif_rx_queue_stalled(queue))
+                       xenvif_queue_carrier_off(queue);
+               else if (xenvif_rx_queue_ready(queue))
+                       xenvif_queue_carrier_on(queue);
+
                /* Queued packets may have foreign pages from other
                 * domains.  These cannot be queued indefinitely as
                 * this would starve guests of grant refs and transmit
@@ -2120,6 +2195,7 @@ static int __init netback_init(void)
                goto failed_init;
 
        rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+       rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
 
 #ifdef CONFIG_DEBUG_FS
        xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 96a754d..4e56a27 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -711,6 +711,7 @@ static void connect(struct backend_info *be)
        be->vif->queues = vzalloc(requested_num_queues *
                                  sizeof(struct xenvif_queue));
        be->vif->num_queues = requested_num_queues;
+       be->vif->stalled_queues = requested_num_queues;
 
        for (queue_index = 0; queue_index < requested_num_queues; 
++queue_index) {
                queue = &be->vif->queues[queue_index];
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.