[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH xenvbd] Introduce a BlkifRing watchdog



From: Paul Durrant <pdurrant@xxxxxxxxxx>

Analogous to similar watchdog threads for XENVIF transmitter and receiver
rings, this patch introduces code to start a watchdog thread for blkif rings.
The thread wakes every 30s and checks for responses remaining pending on the
ring (without the frontend making progress) across two consecutive iterations.
If the ring appears to be 'stuck' in this manner then the ring DebugCallback()
function is triggered, the ring is polled and an event is send to wake up
the backend.

Signed-off-by: Paul Durrant <pdurrant@xxxxxxxxxx>
---
 src/xenvbd/ring.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/src/xenvbd/ring.c b/src/xenvbd/ring.c
index 4bb7475ddc3a..07aa841274ad 100644
--- a/src/xenvbd/ring.c
+++ b/src/xenvbd/ring.c
@@ -47,6 +47,7 @@
 #include "srbext.h"
 #include "driver.h"
 #include "granter.h"
+#include "thread.h"
 
 #include "util.h"
 #include "debug.h"
@@ -90,6 +91,7 @@ typedef struct _XENVBD_BLKIF_RING {
     ULONG                           ResponsesProcessed;
     PXENBUS_DEBUG_CALLBACK          DebugCallback;
     LARGE_INTEGER                   TimeOfLastErrorLog;
+    PXENVBD_THREAD                  WatchdogThread;
 } XENVBD_BLKIF_RING, *PXENVBD_BLKIF_RING;
 
 typedef enum _XENVBD_STAT {
@@ -1623,6 +1625,90 @@ BlkifRingDpc(
                   TRUE);
 }
 
+#define TIME_US(_us)        ((_us) * 10)
+#define TIME_MS(_ms)        (TIME_US((_ms) * 1000))
+#define TIME_S(_s)          (TIME_MS((_s) * 1000))
+#define TIME_RELATIVE(_t)   (-(_t))
+
+#define XENVBD_WATCHDOG_PERIOD 30
+
+static NTSTATUS
+RingWatchdog(
+    IN  PXENVBD_THREAD  Self,
+    IN  PVOID           Context
+    )
+{
+    PXENVBD_BLKIF_RING  BlkifRing = Context;
+    PXENVBD_RING        Ring = BlkifRing->Ring;
+    PROCESSOR_NUMBER    ProcNumber;
+    GROUP_AFFINITY      Affinity;
+    LARGE_INTEGER       Timeout;
+    RING_IDX            rsp_prod;
+    RING_IDX            rsp_cons;
+    NTSTATUS            status;
+
+    Verbose("====> (%u)\n", BlkifRing->Index);
+
+    status = KeGetProcessorNumberFromIndex(BlkifRing->Index, &ProcNumber);
+    ASSERT(NT_SUCCESS(status));
+
+    Affinity.Group = ProcNumber.Group;
+    Affinity.Mask = (KAFFINITY)1 << ProcNumber.Number;
+    KeSetSystemGroupAffinityThread(&Affinity, NULL);
+
+    Timeout.QuadPart = TIME_RELATIVE(TIME_S(XENVBD_WATCHDOG_PERIOD));
+
+    rsp_prod = 0;
+    rsp_cons = 0;
+
+    for (;;) {
+        PKEVENT Event;
+        KIRQL   Irql;
+
+        Event = ThreadGetEvent(Self);
+
+        (VOID) KeWaitForSingleObject(Event,
+                                     Executive,
+                                     KernelMode,
+                                     FALSE,
+                                     &Timeout);
+        KeClearEvent(Event);
+
+        if (ThreadIsAlerted(Self))
+            break;
+
+        KeRaiseIrql(DISPATCH_LEVEL, &Irql);
+        __BlkifRingAcquireLock(BlkifRing);
+
+        if (BlkifRing->Enabled) {
+            KeMemoryBarrier();
+
+            if (BlkifRing->Shared->rsp_prod != rsp_prod &&
+                BlkifRing->Front.rsp_cons == rsp_cons) {
+                XENBUS_DEBUG(Trigger,
+                             &Ring->DebugInterface,
+                             BlkifRing->DebugCallback);
+
+                // Try to move things along
+                __BlkifRingSend(BlkifRing);
+                (VOID) BlkifRingPoll(BlkifRing);
+            }
+
+            KeMemoryBarrier();
+
+            rsp_prod = BlkifRing->Shared->rsp_prod;
+            rsp_cons = BlkifRing->Front.rsp_cons;
+        }
+
+        __BlkifRingReleaseLock(BlkifRing);
+        KeLowerIrql(Irql);
+    }
+
+    Verbose("<====\n");
+
+    return STATUS_SUCCESS;
+}
+
 static NTSTATUS
 BlkifRingCreate(
     IN  PXENVBD_RING        Ring,
@@ -1739,8 +1825,20 @@ BlkifRingCreate(
     if (!NT_SUCCESS(status))
         goto fail9;
 
+    status = ThreadCreate(RingWatchdog,
+                          *BlkifRing,
+                          &(*BlkifRing)->WatchdogThread);
+    if (!NT_SUCCESS(status))
+        goto fail10;
+
     return STATUS_SUCCESS;
 
+fail10:
+    Error("fail10\n");
+    XENBUS_CACHE(Destroy,
+                 &Ring->CacheInterface,
+                 (*BlkifRing)->IndirectCache);
+    (*BlkifRing)->IndirectCache = NULL;
 fail9:
     Error("fail9\n");
 fail8:
@@ -1794,6 +1892,10 @@ BlkifRingDestroy(
 {
     PXENVBD_RING            Ring = BlkifRing->Ring;
 
+    ThreadAlert(BlkifRing->WatchdogThread);
+    ThreadJoin(BlkifRing->WatchdogThread);
+    BlkifRing->WatchdogThread = NULL;
+
     XENBUS_CACHE(Destroy,
                  &Ring->CacheInterface,
                  BlkifRing->IndirectCache);
-- 
2.17.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.