[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Windows PV drivers fail to set up RSS when vCPUs > 8


  • To: <win-pv-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Martin Harvey <Martin.Harvey@xxxxxxxxxx>
  • Date: Tue, 22 Mar 2022 11:12:44 +0000
  • Authentication-results: esa5.hc3370-68.iphmx.com; dkim=none (message not signed) header.i=none
  • Cc: Martin Harvey <Martin.Harvey@xxxxxxxxxx>, Martin Harvey <martin.harvey@xxxxxxxxxx>
  • Delivery-date: Tue, 22 Mar 2022 11:13:31 +0000
  • Ironport-data: A9a23:4VWoaa3vCjx2rkHTpfbD5bVxkn2cJEfYwER7XKvMYLTBsI5bpz0Cz DcXCz2CPK2JYGv0fowgaY6+9RgG6MPTzN9nGVRrpC1hF35El5HIVI+TRqvS04J+DSFhoGZPt Zh2hgzodZhsJpPkjk7xdOCn9xGQ7InQLlbGILes1htZGEk1EE/NtTo5w7Rj2tUy24Dja++wk YiaT/P3aQfNNwFcagr424rbwP+4lK2v0N+wlgVWicFj5DcypVFMZH4sDfjZw0/DaptVBoaHq 9Prl9lVyI97EyAFUbtJmp6jGqEDryW70QKm0hK6UID66vROS7BbPg/W+5PwZG8O4whlkeydx /1Q5ZaJaz0jAZHtkf4PeQZeUAFYHL1/reqvzXiX6aR/zmXDenrohf5vEFs3LcsT/eMf7WNmr KJCbmpXN1ba2rzwkOnTpupE36zPKOHuNZkDu3cmzTjDE/s3aZvCX7/L9ZlT2zJYasVmQ6iFP 5ZGMWMHgBLoZy9/K1sGLcIEgdysozrlNBtZ8Gq8uv9ii4TU5FMoi+W8WDbPQfSVRMMQhljdq m/Y8mDRBhABKMfZ2TeD6mirhOLEgWX8Qo16KVGj3qc02hvJnDVVUUBIEwvgyRWktqKgc88PI XU2wCARkYk77l6BUd++fzuZsEfR63bwROFsO+E97QiMzI/d7ACYGnUIQ1Z9VTA2iCMlbWd0j wHUxrsFERQq6eTIEizFqt94uBvoYUAowXk+iTjopOfvy/3qu8kNgx3GVb6P+4bl34SuSVkcL 91nxRXSZon/b+ZWjs1XHnid2lpAQ6QlqCZvvW07uUr/smtEiHaNPdDA1LQixa8owHylZleAp mMYvMOV8foDC5qA/ATUHrlSTOH2uqnUamOB6bKKI3XH327xk5JEVdoNiAyS2W8zappUEdMXS BG7VfxtCG97YyLxMP4fj3OZAMU216nwfekJpdiPBueilqNZLVfdlAk3PBb49zm0zCAEzPFuU b/GIJ3EJStLVsxaIM+eGr51PUkDnXtlmws+hPnTknya7FZpTCXMGOlfbwfWMLxRAWHtiFy9z uuz/vCik313ONASqAGNmWLPBTjm9UQGOK0=
  • Ironport-hdrordr: A9a23:/xNfQa5cP9GQLaWIKAPXwMbXdLJyesId70hD6qhwISY1TiX+rb HIoB17726MtN9/YgBCpTntAsa9qBDnhPpICOsqTNWftWDd0QPCRuwP0WKL+UyHJ8SUzI5gPM lbHZSWcOeAaGRHsQ==
  • List-id: Developer list for the Windows PV Drivers subproject <win-pv-devel.lists.xenproject.org>

The driver only supports at most 8 queues, however Windows
can decide to assign vCPU numbers starting from a non-zero
offset. E.g. vCPU 8,9,10,11 could get assigned to a device
if you have more than one NIC. The total number of vCPUs
used by a single device is still less than 8, but the vCPU
indexes themselves can be greater than 8. The code
previously incorrectly assumed that individual vCPU
indexes cannot exceed 8, however a 1:1 mapping between
vCPU indexes and queues seems to only exist when using
a single NIC.

Signed-off-by: Martin Harvey <martin.harvey@xxxxxxxxxx>
---
 src/xenvif/receiver.c | 168 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 148 insertions(+), 20 deletions(-)

diff --git a/src/xenvif/receiver.c b/src/xenvif/receiver.c
index 10ac6f5..0c7b32a 100644
--- a/src/xenvif/receiver.c
+++ b/src/xenvif/receiver.c
@@ -106,6 +106,7 @@ typedef struct _XENVIF_RECEIVER_RING {
     PLIST_ENTRY                 PacketQueue;
     KDPC                        QueueDpc;
     ULONG                       QueueDpcs;
+    PROCESSOR_NUMBER            TargetProcessor;
     LIST_ENTRY                  PacketComplete;
     XENVIF_RECEIVER_HASH        Hash;
 } XENVIF_RECEIVER_RING, *PXENVIF_RECEIVER_RING;
@@ -2498,6 +2499,9 @@ __ReceiverRingInitialize(
 
     KeInitializeThreadedDpc(&(*Ring)->QueueDpc, ReceiverRingQueueDpc, *Ring);
 
+    status = KeGetProcessorNumberFromIndex((*Ring)->Index, 
&(*Ring)->TargetProcessor);
+    ASSERT(NT_SUCCESS(status));
+
     return STATUS_SUCCESS;
 
 fail7:
@@ -2550,6 +2554,45 @@ fail1:
     return status;
 }
 
+static NTSTATUS
+__ReceiverRingSetAffinity(
+    IN  PXENVIF_RECEIVER_RING   Ring,
+    IN  PPROCESSOR_NUMBER       Processor
+    )
+{
+    PXENVIF_RECEIVER            Receiver;
+    PXENVIF_FRONTEND            Frontend;
+    NTSTATUS status;
+
+    status = STATUS_INVALID_PARAMETER;
+    if ((Ring == NULL) || (Processor == NULL))
+        goto fail1;
+
+    Receiver = Ring->Receiver;
+    Frontend = Receiver->Frontend;
+
+    /* Always update ring target processor
+       Actually set affinities if frontend override not present.
+       Re-bind event-channel if already connected */
+
+    __ReceiverRingAcquireLock(Ring);
+
+    Ring->TargetProcessor = *Processor;
+
+    /* Don't rebind event channel at this point. */
+    KeSetTargetProcessorDpcEx(&Ring->PollDpc, &Ring->TargetProcessor);
+    KeSetTargetProcessorDpcEx(&Ring->QueueDpc, &Ring->TargetProcessor);
+
+    __ReceiverRingReleaseLock(Ring);
+
+    return STATUS_SUCCESS;
+
+fail1:
+    Error("fail1 (%08x)\n", status);
+
+    return status;
+}
+
 static FORCEINLINE NTSTATUS
 __ReceiverRingConnect(
     IN  PXENVIF_RECEIVER_RING   Ring
@@ -2560,7 +2603,6 @@ __ReceiverRingConnect(
     PFN_NUMBER                  Pfn;
     CHAR                        Name[MAXNAMELEN];
     ULONG                       Index;
-    PROCESSOR_NUMBER            ProcNumber;
     NTSTATUS                    status;
 
     Receiver = Ring->Receiver;
@@ -2637,16 +2679,17 @@ __ReceiverRingConnect(
     if (Ring->Channel == NULL)
         goto fail6;
 
-    status = KeGetProcessorNumberFromIndex(Ring->Index, &ProcNumber);
-    ASSERT(NT_SUCCESS(status));
+    status = XENBUS_EVTCHN(Bind,
+                            &Receiver->EvtchnInterface,
+                            Ring->Channel,
+                            Ring->TargetProcessor.Group,
+                            Ring->TargetProcessor.Number);
+    if (!NT_SUCCESS(status))
+        Warning("Cound not set initial receiver ring affinity: 0x%x\n", 
status);
+    /* You haven't specifically asked for an affinity yet, so just warn. */
 
-    KeSetTargetProcessorDpcEx(&Ring->PollDpc, &ProcNumber);
-
-    (VOID) XENBUS_EVTCHN(Bind,
-                         &Receiver->EvtchnInterface,
-                         Ring->Channel,
-                         ProcNumber.Group,
-                         ProcNumber.Number);
+    KeSetTargetProcessorDpcEx(&Ring->PollDpc, &Ring->TargetProcessor);
+    KeSetTargetProcessorDpcEx(&Ring->QueueDpc, &Ring->TargetProcessor);
 
     (VOID) XENBUS_EVTCHN(Unmask,
                          &Receiver->EvtchnInterface,
@@ -2665,11 +2708,6 @@ __ReceiverRingConnect(
     if (!NT_SUCCESS(status))
         goto fail7;
 
-    status = KeGetProcessorNumberFromIndex(Ring->Index, &ProcNumber);
-    ASSERT(NT_SUCCESS(status));
-
-    KeSetTargetProcessorDpcEx(&Ring->QueueDpc, &ProcNumber);
-
     return STATUS_SUCCESS;
 
 fail7:
@@ -3917,6 +3955,56 @@ fail1:
     return status;
 }
 
+static NTSTATUS
+__ReceiverSetQueueAffinities(
+    IN  PXENVIF_RECEIVER        Receiver,
+    IN  PPROCESSOR_NUMBER       QueueAffinities,
+    IN  ULONG                   Count
+    )
+{
+    PXENVIF_FRONTEND        Frontend;
+    ULONG                   Index;
+    NTSTATUS                status;
+    KIRQL                   Irql;
+
+    Frontend = Receiver->Frontend;
+
+    status = STATUS_INVALID_PARAMETER;
+
+    if (QueueAffinities == NULL)
+        goto fail1;
+
+    if (Count > FrontendGetNumQueues(Frontend))
+        goto fail2;
+
+    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
+
+    for (Index = 0; Index < Count; Index++) {
+        PXENVIF_RECEIVER_RING   Ring = Receiver->Ring[Index];
+
+        status = __ReceiverRingSetAffinity(Ring, &QueueAffinities[Index]);
+        if (!NT_SUCCESS(status))
+            goto fail3;
+    }
+
+    KeLowerIrql(Irql);
+
+    return STATUS_SUCCESS;
+
+fail3:
+    KeLowerIrql(Irql);
+
+    Error("fail3\n");
+
+fail2:
+    Error("fail2\n");
+
+fail1:
+    Error("fail1 (%08x)\n", status);
+
+    return status;
+}
+
 NTSTATUS
 ReceiverUpdateHashMapping(
     IN  PXENVIF_RECEIVER    Receiver,
@@ -3926,10 +4014,15 @@ ReceiverUpdateHashMapping(
 {
     PXENVIF_FRONTEND        Frontend;
     PULONG                  QueueMapping;
+    PPROCESSOR_NUMBER       QueueAffinities;
     ULONG                   NumQueues;
+    ULONG                   QueuesDetermined;
+    ULONG                   QIndex;
     ULONG                   Index;
+    BOOLEAN                 MapEntryDone;
     NTSTATUS                status;
 
+
     Frontend = Receiver->Frontend;
 
     QueueMapping = __ReceiverAllocate(sizeof (ULONG) * Size);
@@ -3939,26 +4032,61 @@ ReceiverUpdateHashMapping(
         goto fail1;
 
     NumQueues = FrontendGetNumQueues(Frontend);
+    QueuesDetermined = 0;
+
+    QueueAffinities = __ReceiverAllocate(sizeof(PROCESSOR_NUMBER) * NumQueues);
+    if (QueueAffinities == NULL)
+        goto fail2;
 
     status = STATUS_INVALID_PARAMETER;
+    /* N^Squared-ish, but performed infrequently */
     for (Index = 0; Index < Size; Index++) {
-        QueueMapping[Index] = 
KeGetProcessorIndexFromNumber(&ProcessorMapping[Index]);
-
-        if (QueueMapping[Index] >= NumQueues)
-            goto fail2;
+        MapEntryDone = FALSE;
+        /* Existing queue meets affinity requirement for the mapping at this 
index? */
+        for (QIndex = 0; QIndex < QueuesDetermined; QIndex++) {
+            if ((QueueAffinities[QIndex].Group == 
ProcessorMapping[Index].Group) &&
+                (QueueAffinities[QIndex].Number == 
ProcessorMapping[Index].Number)) {
+                QueueMapping[Index] = QIndex;
+                MapEntryDone = TRUE;
+            }
+        }
+        if (!MapEntryDone) {
+            /* New queue "allocation", with new affinity, if possible */
+            if (QueuesDetermined < NumQueues) {
+                QIndex = QueuesDetermined;
+                QueueAffinities[QIndex] = ProcessorMapping[Index];
+                QueueMapping[Index] = QIndex;
+                QueuesDetermined ++;
+            } else {
+                goto fail3;
+            }
+        }
     }
 
     status = FrontendSetHashMapping(Frontend, QueueMapping, Size);
     if (!NT_SUCCESS(status))
-        goto fail3;
+        goto fail4;
+
+    status = __ReceiverSetQueueAffinities(Receiver, QueueAffinities, 
QueuesDetermined);
+    if (!NT_SUCCESS(status))
+        goto fail5;
 
     __ReceiverFree(QueueMapping);
+    __ReceiverFree(QueueAffinities);
 
     return STATUS_SUCCESS;
 
+fail5:
+    Error("fail5\n");
+
+fail4:
+    Error("fail4\n");
+
 fail3:
     Error("fail3\n");
 
+    __ReceiverFree(QueueAffinities);
+
 fail2:
     Error("fail2\n");
 
-- 
2.25.0.windows.1




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.