[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] Windows PV drivers fail to set up RSS when vCPUs > 8


  • To: win-pv-devel@xxxxxxxxxxxxxxxxxxxx
  • From: "Durrant, Paul" <xadimgnik@xxxxxxxxx>
  • Date: Tue, 22 Mar 2022 15:45:41 +0100
  • Delivery-date: Tue, 22 Mar 2022 14:45:47 +0000
  • List-id: Developer list for the Windows PV Drivers subproject <win-pv-devel.lists.xenproject.org>

I think the key thing that confused me is that you say
'fail to set up RSS when vCPUs > 8' but actually this would be more accurately stated as 'fail to set up RSS when vCPU index >= 8'

On 22/03/2022 12:12, Martin Harvey wrote:
The driver only supports at most 8 queues, however Windows
can decide to assign vCPU numbers starting from a non-zero
offset. E.g. vCPU 8,9,10,11 could get assigned to a device
if you have more than one NIC. The total number of vCPUs
used by a single device is still less than 8, but the vCPU
indexes themselves can be greater than 8. The code
previously incorrectly assumed that individual vCPU
indexes cannot exceed 8, however a 1:1 mapping between
vCPU indexes and queues seems to only exist when using
a single NIC.

Signed-off-by: Martin Harvey <martin.harvey@xxxxxxxxxx>
---
  src/xenvif/receiver.c | 168 +++++++++++++++++++++++++++++++++++++-----
  1 file changed, 148 insertions(+), 20 deletions(-)

diff --git a/src/xenvif/receiver.c b/src/xenvif/receiver.c
index 10ac6f5..0c7b32a 100644
--- a/src/xenvif/receiver.c
+++ b/src/xenvif/receiver.c
@@ -106,6 +106,7 @@ typedef struct _XENVIF_RECEIVER_RING {
      PLIST_ENTRY                 PacketQueue;
      KDPC                        QueueDpc;
      ULONG                       QueueDpcs;
+    PROCESSOR_NUMBER            TargetProcessor;
      LIST_ENTRY                  PacketComplete;
      XENVIF_RECEIVER_HASH        Hash;
  } XENVIF_RECEIVER_RING, *PXENVIF_RECEIVER_RING;
@@ -2498,6 +2499,9 @@ __ReceiverRingInitialize(
KeInitializeThreadedDpc(&(*Ring)->QueueDpc, ReceiverRingQueueDpc, *Ring); + status = KeGetProcessorNumberFromIndex((*Ring)->Index, &(*Ring)->TargetProcessor);
+    ASSERT(NT_SUCCESS(status));
+
      return STATUS_SUCCESS;
fail7:
@@ -2550,6 +2554,45 @@ fail1:
      return status;
  }
+static NTSTATUS
+__ReceiverRingSetAffinity(

The convention has always been `__` implies `FORCEINLINE`.

+    IN  PXENVIF_RECEIVER_RING   Ring,
+    IN  PPROCESSOR_NUMBER       Processor
+    )
+{
+    PXENVIF_RECEIVER            Receiver;
+    PXENVIF_FRONTEND            Frontend;
+    NTSTATUS status;
+
+    status = STATUS_INVALID_PARAMETER;
+    if ((Ring == NULL) || (Processor == NULL))
+        goto fail1;
+
+    Receiver = Ring->Receiver;
+    Frontend = Receiver->Frontend;
+
+    /* Always update ring target processor
+       Actually set affinities if frontend override not present.
+       Re-bind event-channel if already connected */

Block comment style is messy.

+
+    __ReceiverRingAcquireLock(Ring);
+
+    Ring->TargetProcessor = *Processor;
+
+    /* Don't rebind event channel at this point. */
+    KeSetTargetProcessorDpcEx(&Ring->PollDpc, &Ring->TargetProcessor);
+    KeSetTargetProcessorDpcEx(&Ring->QueueDpc, &Ring->TargetProcessor);
+
+    __ReceiverRingReleaseLock(Ring);
+
+    return STATUS_SUCCESS;
+
+fail1:
+    Error("fail1 (%08x)\n", status);
+
+    return status;
+}
+
  static FORCEINLINE NTSTATUS
  __ReceiverRingConnect(
      IN  PXENVIF_RECEIVER_RING   Ring
@@ -2560,7 +2603,6 @@ __ReceiverRingConnect(
      PFN_NUMBER                  Pfn;
      CHAR                        Name[MAXNAMELEN];
      ULONG                       Index;
-    PROCESSOR_NUMBER            ProcNumber;
      NTSTATUS                    status;
Receiver = Ring->Receiver;
@@ -2637,16 +2679,17 @@ __ReceiverRingConnect(
      if (Ring->Channel == NULL)
          goto fail6;
- status = KeGetProcessorNumberFromIndex(Ring->Index, &ProcNumber);
-    ASSERT(NT_SUCCESS(status));
+    status = XENBUS_EVTCHN(Bind,
+                            &Receiver->EvtchnInterface,
+                            Ring->Channel,
+                            Ring->TargetProcessor.Group,
+                            Ring->TargetProcessor.Number);

Looks like indentation is off here.

+    if (!NT_SUCCESS(status))
+        Warning("Cound not set initial receiver ring affinity: 0x%x\n", 
status);

Typo. Plus wouldn't it be useful to have the ring index in the message?

+    /* You haven't specifically asked for an affinity yet, so just warn. */

Who hasn't asked?

-    KeSetTargetProcessorDpcEx(&Ring->PollDpc, &ProcNumber);
-
-    (VOID) XENBUS_EVTCHN(Bind,
-                         &Receiver->EvtchnInterface,
-                         Ring->Channel,
-                         ProcNumber.Group,
-                         ProcNumber.Number);
+    KeSetTargetProcessorDpcEx(&Ring->PollDpc, &Ring->TargetProcessor);
+    KeSetTargetProcessorDpcEx(&Ring->QueueDpc, &Ring->TargetProcessor);
(VOID) XENBUS_EVTCHN(Unmask,
                           &Receiver->EvtchnInterface,
@@ -2665,11 +2708,6 @@ __ReceiverRingConnect(
      if (!NT_SUCCESS(status))
          goto fail7;
- status = KeGetProcessorNumberFromIndex(Ring->Index, &ProcNumber);
-    ASSERT(NT_SUCCESS(status));
-
-    KeSetTargetProcessorDpcEx(&Ring->QueueDpc, &ProcNumber);
-
      return STATUS_SUCCESS;
fail7:
@@ -3917,6 +3955,56 @@ fail1:
      return status;
  }
+static NTSTATUS
+__ReceiverSetQueueAffinities(
+    IN  PXENVIF_RECEIVER        Receiver,
+    IN  PPROCESSOR_NUMBER       QueueAffinities,
+    IN  ULONG                   Count
+    )
+{
+    PXENVIF_FRONTEND        Frontend;
+    ULONG                   Index;
+    NTSTATUS                status;
+    KIRQL                   Irql;
+
+    Frontend = Receiver->Frontend;
+
+    status = STATUS_INVALID_PARAMETER;
+
+    if (QueueAffinities == NULL)
+        goto fail1;
+
+    if (Count > FrontendGetNumQueues(Frontend))
+        goto fail2;
+
+    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
+
+    for (Index = 0; Index < Count; Index++) {
+        PXENVIF_RECEIVER_RING   Ring = Receiver->Ring[Index];
+
+        status = __ReceiverRingSetAffinity(Ring, &QueueAffinities[Index]);
+        if (!NT_SUCCESS(status))
+            goto fail3;
+    }
+
+    KeLowerIrql(Irql);
+
+    return STATUS_SUCCESS;
+
+fail3:
+    KeLowerIrql(Irql);
+
+    Error("fail3\n");
+
+fail2:
+    Error("fail2\n");
+
+fail1:
+    Error("fail1 (%08x)\n", status);
+
+    return status;
+}
+
  NTSTATUS
  ReceiverUpdateHashMapping(
      IN  PXENVIF_RECEIVER    Receiver,
@@ -3926,10 +4014,15 @@ ReceiverUpdateHashMapping(
  {
      PXENVIF_FRONTEND        Frontend;
      PULONG                  QueueMapping;
+    PPROCESSOR_NUMBER       QueueAffinities;
      ULONG                   NumQueues;
+    ULONG                   QueuesDetermined;
+    ULONG                   QIndex;
      ULONG                   Index;
+    BOOLEAN                 MapEntryDone;
      NTSTATUS                status;
+
      Frontend = Receiver->Frontend;
QueueMapping = __ReceiverAllocate(sizeof (ULONG) * Size);
@@ -3939,26 +4032,61 @@ ReceiverUpdateHashMapping(
          goto fail1;
NumQueues = FrontendGetNumQueues(Frontend);
+    QueuesDetermined = 0;
+
+    QueueAffinities = __ReceiverAllocate(sizeof(PROCESSOR_NUMBER) * NumQueues);
+    if (QueueAffinities == NULL)
+        goto fail2;
status = STATUS_INVALID_PARAMETER;
+    /* N^Squared-ish, but performed infrequently */
      for (Index = 0; Index < Size; Index++) {

The scope of 'MapEntryDone' could be limited to this loop, couldn't it?

-        QueueMapping[Index] = 
KeGetProcessorIndexFromNumber(&ProcessorMapping[Index]);
-
-        if (QueueMapping[Index] >= NumQueues)
-            goto fail2;
+        MapEntryDone = FALSE;
+        /* Existing queue meets affinity requirement for the mapping at this 
index? */
+        for (QIndex = 0; QIndex < QueuesDetermined; QIndex++) {

'QueueIndex' please. It's 'Queue' everywhere else so 'Q' for this looks incongruous.

+            if ((QueueAffinities[QIndex].Group == ProcessorMapping[Index].Group) 
&&
+                (QueueAffinities[QIndex].Number == 
ProcessorMapping[Index].Number)) {
+                QueueMapping[Index] = QIndex;
+                MapEntryDone = TRUE;
+            }
+        }
+        if (!MapEntryDone) {
+            /* New queue "allocation", with new affinity, if possible */
+            if (QueuesDetermined < NumQueues) {
+                QIndex = QueuesDetermined;
+                QueueAffinities[QIndex] = ProcessorMapping[Index];
+                QueueMapping[Index] = QIndex;
+                QueuesDetermined ++;

Stray whitespace before the '++'

+            } else {
+                goto fail3;

You could do:

if (QueuesDetermined >= NumQueues)
    goto fail3;

QueueIndex = QueuesDetermined;
QueueAffinities[QueueIndex] = ProcessorMapping[Index];
QueueMapping[Index] = QueueIndex;
QueuesDetermined++;

then you would not need the 'else'

  Paul

+            }
+        }
      }
status = FrontendSetHashMapping(Frontend, QueueMapping, Size);
      if (!NT_SUCCESS(status))
-        goto fail3;
+        goto fail4;
+
+    status = __ReceiverSetQueueAffinities(Receiver, QueueAffinities, 
QueuesDetermined);
+    if (!NT_SUCCESS(status))
+        goto fail5;
__ReceiverFree(QueueMapping);
+    __ReceiverFree(QueueAffinities);
return STATUS_SUCCESS; +fail5:
+    Error("fail5\n");
+
+fail4:
+    Error("fail4\n");
+
  fail3:
      Error("fail3\n");
+ __ReceiverFree(QueueAffinities);
+
  fail2:
      Error("fail2\n");




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.