[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[win-pv-devel] [PATCH 1/2] Improve the performance of the slab allocator


  • To: <win-pv-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Paul Durrant <paul.durrant@xxxxxxxxxx>
  • Date: Thu, 5 Sep 2019 16:25:12 +0100
  • Authentication-results: esa1.hc3370-68.iphmx.com; dkim=none (message not signed) header.i=none; spf=None smtp.pra=paul.durrant@xxxxxxxxxx; spf=Pass smtp.mailfrom=Paul.Durrant@xxxxxxxxxx; spf=None smtp.helo=postmaster@xxxxxxxxxxxxxxx
  • Cc: Paul Durrant <paul.durrant@xxxxxxxxxx>
  • Delivery-date: Thu, 05 Sep 2019 15:25:27 +0000
  • Ironport-sdr: RiSVQqKc6uwDYbysXDFdrhsp5k3vSfExNx8+wQyRVRXQ6FbvFJYiJR0TxOt+N4qpo1BmF7Xp7l GfHoTKy/DIWMG45lF2NQ9VDOP3RCOOZGf4GrE4VhYkqIsUUG8CePPM9s5ChakVlrhYQV29mOuw Et7mNtGesHQ4VATC/YKU2+MxFjh/OfhJhHyXgYuHIvyXjIM8OX2dKv7OeJiXD/WW+YoG9Lj/A7 2IzpTRftNbACC2uzIpJP0i3sseKIa2zMT1U9D4q/NFGyBsFT4fl2G5bymg+GcbGwbY6cClUNx4 Ud8=
  • List-id: Developer list for the Windows PV Drivers subproject <win-pv-devel.lists.xenproject.org>

This patch makes a couple of changes which testing shows to improve
performance.

Firstly the slabs are now sorted such that the most occupied slab is on
the head of the list. This should lead to a smaller number of slabs, each
with a higher occupancy, which reduces the overall amount of memory
consumed and the number of 'Ctor' invocations (NB: a Ctor may also
allocate other memory).

Secondly, rather than destroying a slab as soon as its occupancy falls to
zero, defer freeing to the re-introduced monitor thread. This takes freeing
(and associated invocations of Dtor) off the performance critical paths
 and also allows an empty slab to be re-used, avoiding calls to
CacheCreateSlab() (and hence more Ctor invocations).

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
---
 src/xenbus/cache.c | 316 +++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 236 insertions(+), 80 deletions(-)

diff --git a/src/xenbus/cache.c b/src/xenbus/cache.c
index 479c411..1c1f4c4 100644
--- a/src/xenbus/cache.c
+++ b/src/xenbus/cache.c
@@ -84,6 +84,7 @@ struct _XENBUS_CACHE {
     VOID                    (*ReleaseLock)(PVOID);
     PVOID                   Argument;
     LIST_ENTRY              SlabList;
+    PLIST_ENTRY             Cursor;
     ULONG                   Count;
     PXENBUS_CACHE_MAGAZINE  Magazine;
     ULONG                   MagazineCount;
@@ -95,6 +96,7 @@ struct _XENBUS_CACHE_CONTEXT {
     LONG                    References;
     XENBUS_DEBUG_INTERFACE  DebugInterface;
     PXENBUS_DEBUG_CALLBACK  DebugCallback;
+    PXENBUS_THREAD          MonitorThread;
     LIST_ENTRY              List;
 };
 
@@ -196,40 +198,95 @@ CachePutObjectToMagazine(
 static VOID
 CacheInsertSlab(
     IN  PXENBUS_CACHE       Cache,
-    IN  PXENBUS_CACHE_SLAB  Slab
+    IN  PXENBUS_CACHE_SLAB  New
     )
 {
-#define INSERT_BEFORE(_Cursor, _New)            \
-        do {                                    \
-            (_New)->Blink = (_Cursor)->Blink;   \
-            (_Cursor)->Blink->Flink = (_New);   \
-                                                \
-            (_Cursor)->Blink = (_New);          \
-            (_New)->Flink = (_Cursor);          \
+#define INSERT_BEFORE(_ListEntry, _New)             \
+        do {                                        \
+            (_New)->Blink = (_ListEntry)->Blink;    \
+            (_ListEntry)->Blink->Flink = (_New);    \
+                                                    \
+            (_ListEntry)->Blink = (_New);           \
+            (_New)->Flink = (_ListEntry);           \
         } while (FALSE)
 
-    PLIST_ENTRY Cursor;
+    PLIST_ENTRY             ListEntry;
+
+    ASSERT(New->CurrentOccupancy < New->MaximumOccupancy);
 
-    for (Cursor = Cache->SlabList.Flink;
-         Cursor != &Cache->SlabList;
-         Cursor = Cursor->Flink) {
-        PXENBUS_CACHE_SLAB  Next;
+    Cache->Cursor = NULL;
+
+    for (ListEntry = Cache->SlabList.Flink;
+         ListEntry != &Cache->SlabList;
+         ListEntry = ListEntry->Flink) {
+        PXENBUS_CACHE_SLAB  Slab;
 
-        Next = CONTAINING_RECORD(Cursor, XENBUS_CACHE_SLAB, ListEntry);
+        Slab = CONTAINING_RECORD(ListEntry, XENBUS_CACHE_SLAB, ListEntry);
 
-        if (Next->CurrentOccupancy > Slab->CurrentOccupancy) {
-            INSERT_BEFORE(Cursor, &Slab->ListEntry);
-            return;
+        if (Slab->CurrentOccupancy < New->CurrentOccupancy) {
+            INSERT_BEFORE(ListEntry, &New->ListEntry);
+            goto done;
         }
+
+        if (Slab->CurrentOccupancy < Slab->MaximumOccupancy &&
+            Cache->Cursor == NULL)
+            Cache->Cursor = ListEntry;
     }
 
-    InsertTailList(&Cache->SlabList, &Slab->ListEntry);
+    InsertTailList(&Cache->SlabList, &New->ListEntry);
+
+done:
+    if (Cache->Cursor == NULL)
+        Cache->Cursor = &New->ListEntry;
 
 #undef  INSERT_BEFORE
 }
 
+#if DBG
+static VOID
+CacheAudit(
+    IN  PXENBUS_CACHE   Cache
+    )
+{
+    ULONG               CurrentOccupancy = ULONG_MAX;
+    PLIST_ENTRY         ListEntry;
+
+    //
+    // The cursror should point at the first slab that is not fully
+    // occupied.
+    //
+    for (ListEntry = Cache->SlabList.Flink;
+         ListEntry != &Cache->SlabList;
+         ListEntry = ListEntry->Flink) {
+        PXENBUS_CACHE_SLAB  Slab;
+
+        Slab = CONTAINING_RECORD(ListEntry, XENBUS_CACHE_SLAB, ListEntry);
+
+        if (Slab->CurrentOccupancy < Slab->MaximumOccupancy) {
+            ASSERT3P(Cache->Cursor, ==, ListEntry);
+            break;
+        }
+    }
+
+    // Slabs should be kept in order of maximum to minimum occupancy
+    for (ListEntry = Cache->SlabList.Flink;
+         ListEntry != &Cache->SlabList;
+         ListEntry = ListEntry->Flink) {
+        PXENBUS_CACHE_SLAB  Slab;
+
+        Slab = CONTAINING_RECORD(ListEntry, XENBUS_CACHE_SLAB, ListEntry);
+
+        ASSERT3U(Slab->CurrentOccupancy, <=, CurrentOccupancy);
+
+        CurrentOccupancy = Slab->CurrentOccupancy;
+    }
+}
+#else
+#define CacheAudit(_Cache) ((VOID)(_Cache))
+#endif
+
 // Must be called with lock held
-static PXENBUS_CACHE_SLAB
+static NTSTATUS
 CacheCreateSlab(
     IN  PXENBUS_CACHE   Cache
     )
@@ -294,7 +351,7 @@ CacheCreateSlab(
     CacheInsertSlab(Cache, Slab);
     Cache->Count += Count;
 
-    return Slab;
+    return STATUS_SUCCESS;
 
 fail4:
     Error("fail4\n");
@@ -318,7 +375,7 @@ fail2:
 fail1:
     Error("fail1 (%08x)\n", status);
 
-    return NULL;
+    return status;
 }
 
 // Must be called with lock held
@@ -334,6 +391,17 @@ CacheDestroySlab(
 
     ASSERT3U(Cache->Count, >=, Slab->MaximumOccupancy);
     Cache->Count -= Slab->MaximumOccupancy;
+
+    //
+    // The only reason the cursor should be pointing at this slab is
+    // if it is the only one in the list.
+    //
+    if (Cache->Cursor == &Slab->ListEntry) {
+        ASSERT(Slab->ListEntry.Flink == &Cache->SlabList);
+        ASSERT(Slab->ListEntry.Blink == &Cache->SlabList);
+        Cache->Cursor = &Cache->SlabList;
+    }
+
     RemoveEntryList(&Slab->ListEntry);
 
     Index = Slab->MaximumOccupancy;
@@ -471,7 +539,6 @@ CacheGet(
     ULONG                   Index;
     PXENBUS_CACHE_MAGAZINE  Magazine;
     PVOID                   Object;
-    PLIST_ENTRY             ListEntry;
 
     UNREFERENCED_PARAMETER(Interface);
 
@@ -488,26 +555,34 @@ CacheGet(
     if (!Locked)
         __CacheAcquireLock(Cache);
 
-    for (ListEntry = Cache->SlabList.Flink;
-         ListEntry != &Cache->SlabList;
-         ListEntry = ListEntry->Flink) {
+again:
+    if (Cache->Cursor != &Cache->SlabList) {
+        PLIST_ENTRY ListEntry = Cache->Cursor;
         PXENBUS_CACHE_SLAB  Slab;
 
         Slab = CONTAINING_RECORD(ListEntry, XENBUS_CACHE_SLAB, ListEntry);
 
         Object = CacheGetObjectFromSlab(Slab);
-        if (Object != NULL)
-            break;
+        ASSERT(Object != NULL);
+
+        if (Slab->CurrentOccupancy == Slab->MaximumOccupancy)
+            Cache->Cursor = Slab->ListEntry.Flink;
     }
 
     if (Object == NULL) {
-        PXENBUS_CACHE_SLAB  Slab;
+        NTSTATUS status;
 
-        Slab = CacheCreateSlab(Cache);
-        if (Slab != NULL)
-            Object = CacheGetObjectFromSlab(Slab);
+        ASSERT3P(Cache->Cursor, ==, &Cache->SlabList);
+
+        status = CacheCreateSlab(Cache);
+        if (NT_SUCCESS(status)) {
+            ASSERT(Cache->Cursor != &Cache->SlabList);
+            goto again;
+        }
     }
 
+    CacheAudit(Cache);
+
     if (!Locked)
         __CacheReleaseLock(Cache);
 
@@ -552,13 +627,11 @@ CachePut(
 
     CachePutObjectToSlab(Slab, Object);
 
-    if (Slab->CurrentOccupancy == 0) {
-        CacheDestroySlab(Cache, Slab);
-    } else {
-        /* Re-insert to keep slab list ordered */
-        RemoveEntryList(&Slab->ListEntry);
-        CacheInsertSlab(Cache, Slab);
-    }
+    /* Re-insert to keep slab list ordered */
+    RemoveEntryList(&Slab->ListEntry);
+    CacheInsertSlab(Cache, Slab);
+
+    CacheAudit(Cache);
 
     if (!Locked)
         __CacheReleaseLock(Cache);
@@ -567,9 +640,10 @@ done:
     KeLowerIrql(Irql);
 }
 
-static FORCEINLINE NTSTATUS
-__CacheFill(
-    IN  PXENBUS_CACHE   Cache
+static NTSTATUS
+CacheFill(
+    IN  PXENBUS_CACHE   Cache,
+    IN  ULONG           Count
     )
 {
     KIRQL               Irql;
@@ -578,33 +652,14 @@ __CacheFill(
     KeRaiseIrql(DISPATCH_LEVEL, &Irql);
     __CacheAcquireLock(Cache);
 
-    while (Cache->Count < Cache->Reservation) {
-        PXENBUS_CACHE_SLAB  Slab;
-
-        Slab = CacheCreateSlab(Cache);
-
-        status = STATUS_NO_MEMORY;
-        if (Slab == NULL)
-            goto fail1;
+    status = STATUS_SUCCESS;
+    while (Cache->Count < Count) {
+        status = CacheCreateSlab(Cache);
+        if (!NT_SUCCESS(status))
+            break;
     }
 
-    __CacheReleaseLock(Cache);
-    KeLowerIrql(Irql);
-
-    return STATUS_SUCCESS;
-
-fail1:
-    Error("fail1 (%08x)\n", status);
-
-    while (!IsListEmpty(&Cache->SlabList)) {
-        PLIST_ENTRY         ListEntry = Cache->SlabList.Flink;
-        PXENBUS_CACHE_SLAB  Slab;
-
-        Slab = CONTAINING_RECORD(ListEntry, XENBUS_CACHE_SLAB, ListEntry);
-
-        CacheDestroySlab(Cache, Slab);
-    }
-    ASSERT3U(Cache->Count, ==, 0);
+    CacheAudit(Cache);
 
     __CacheReleaseLock(Cache);
     KeLowerIrql(Irql);
@@ -612,26 +667,45 @@ fail1:
     return status;
 }
 
-static FORCEINLINE VOID
-__CacheEmpty(
-    IN  PXENBUS_CACHE   Cache
+static VOID
+CacheSpill(
+    IN  PXENBUS_CACHE   Cache,
+    IN  ULONG           Count
     )
 {
     KIRQL               Irql;
+    PLIST_ENTRY         ListEntry;
 
     KeRaiseIrql(DISPATCH_LEVEL, &Irql);
     __CacheAcquireLock(Cache);
 
-    while (!IsListEmpty(&Cache->SlabList)) {
-        PLIST_ENTRY         ListEntry = Cache->SlabList.Flink;
+    if (Cache->Count <= Count)
+        goto done;
+
+    ListEntry = Cache->SlabList.Blink;
+    while (ListEntry != &Cache->SlabList) {
+        PLIST_ENTRY         Prev = ListEntry->Blink;
         PXENBUS_CACHE_SLAB  Slab;
 
+        ASSERT(!IsListEmpty(&Cache->SlabList));
+
         Slab = CONTAINING_RECORD(ListEntry, XENBUS_CACHE_SLAB, ListEntry);
 
+        if (Slab->CurrentOccupancy != 0)
+            break;
+
+        ASSERT(Cache->Count >= Slab->MaximumOccupancy);
+        if (Cache->Count - Slab->MaximumOccupancy < Count)
+            break;
+
         CacheDestroySlab(Cache, Slab);
+
+        ListEntry = Prev;
     }
-    ASSERT3U(Cache->Count, ==, 0);
 
+    CacheAudit(Cache);
+
+done:
     __CacheReleaseLock(Cache);
     KeLowerIrql(Irql);
 }
@@ -715,16 +789,15 @@ CacheCreate(
     (*Cache)->Argument = Argument;
 
     InitializeListHead(&(*Cache)->SlabList);
+    (*Cache)->Cursor = &(*Cache)->SlabList;
 
     status = STATUS_INVALID_PARAMETER;
     if ((*Cache)->Reservation > (*Cache)->Cap)
         goto fail3;
 
-    if ((*Cache)->Reservation != 0) {
-        status = __CacheFill(*Cache);
-        if (!NT_SUCCESS(status))
-            goto fail4;
-    }
+    status = CacheFill(*Cache, (*Cache)->Reservation);
+    if (!NT_SUCCESS(status))
+        goto fail4;
 
     (*Cache)->MagazineCount = 
KeQueryMaximumProcessorCountEx(ALL_PROCESSOR_GROUPS);
     (*Cache)->Magazine = __CacheAllocate(sizeof (XENBUS_CACHE_MAGAZINE) * 
(*Cache)->MagazineCount);
@@ -746,7 +819,7 @@ fail5:
 
     (*Cache)->MagazineCount = 0;
 
-    __CacheEmpty(*Cache);
+    CacheSpill(*Cache, 0);
 
 fail4:
     Error("fail4\n");
@@ -754,6 +827,7 @@ fail4:
 fail3:
     Error("fail3\n");
 
+    (*Cache)->Cursor = NULL;
     ASSERT(IsListEmpty(&(*Cache)->SlabList));
     RtlZeroMemory(&(*Cache)->SlabList, sizeof (LIST_ENTRY));
 
@@ -831,9 +905,9 @@ CacheDestroy(
     Cache->Magazine = NULL;
     Cache->MagazineCount = 0;
 
-    __CacheEmpty(Cache);
-    Cache->Reservation = 0;
+    CacheSpill(Cache, 0);
 
+    Cache->Cursor = NULL;
     ASSERT(IsListEmpty(&Cache->SlabList));
     RtlZeroMemory(&Cache->SlabList, sizeof (LIST_ENTRY));
 
@@ -888,6 +962,71 @@ CacheDebugCallback(
     }
 }
 
+#define TIME_US(_us)        ((_us) * 10)
+#define TIME_MS(_ms)        (TIME_US((_ms) * 1000))
+#define TIME_S(_s)          (TIME_MS((_s) * 1000))
+#define TIME_RELATIVE(_t)   (-(_t))
+
+#define XENBUS_CACHE_MONITOR_PERIOD 5
+
+static NTSTATUS
+CacheMonitor(
+    IN  PXENBUS_THREAD      Self,
+    IN  PVOID               _Context
+    )
+{
+    PXENBUS_CACHE_CONTEXT   Context = _Context;
+    PKEVENT                 Event;
+    LARGE_INTEGER           Timeout;
+    PLIST_ENTRY             ListEntry;
+
+    Trace("====>\n");
+
+    Event = ThreadGetEvent(Self);
+
+    Timeout.QuadPart = TIME_RELATIVE(TIME_S(XENBUS_CACHE_MONITOR_PERIOD));
+
+    for (;;) {
+        KIRQL   Irql;
+
+        (VOID) KeWaitForSingleObject(Event,
+                                     Executive,
+                                     KernelMode,
+                                     FALSE,
+                                     &Timeout);
+        KeClearEvent(Event);
+
+        if (ThreadIsAlerted(Self))
+            break;
+
+        KeAcquireSpinLock(&Context->Lock, &Irql);
+
+        if (Context->References == 0)
+            goto loop;
+
+        for (ListEntry = Context->List.Flink;
+             ListEntry != &Context->List;
+             ListEntry = ListEntry->Flink) {
+            PXENBUS_CACHE   Cache;
+
+            Cache = CONTAINING_RECORD(ListEntry, XENBUS_CACHE, ListEntry);
+
+            if (Cache->Count < Cache->Reservation)
+                CacheFill(Cache, Cache->Reservation);
+            else if (Cache->Count > Cache->Reservation)
+                CacheSpill(Cache,
+                           __max(Cache->Reservation, (Cache->Count / 2)));
+        }
+
+loop:
+        KeReleaseSpinLock(&Context->Lock, Irql);
+    }
+
+    Trace("====>\n");
+
+    return STATUS_SUCCESS;
+}
+
 static NTSTATUS
 CacheAcquire(
     PINTERFACE              Interface
@@ -1016,12 +1155,25 @@ CacheInitialize(
     InitializeListHead(&(*Context)->List);
     KeInitializeSpinLock(&(*Context)->Lock);
 
+    status = ThreadCreate(CacheMonitor, *Context, &(*Context)->MonitorThread);
+    if (!NT_SUCCESS(status))
+        goto fail2;
+
     (*Context)->Fdo = Fdo;
 
     Trace("<====\n");
 
     return STATUS_SUCCESS;
 
+fail2:
+    Error("fail2\n");
+
+    RtlZeroMemory(&(*Context)->Lock, sizeof (KSPIN_LOCK));
+    RtlZeroMemory(&(*Context)->List, sizeof (LIST_ENTRY));
+
+    RtlZeroMemory(&(*Context)->DebugInterface,
+                  sizeof (XENBUS_DEBUG_INTERFACE));
+
 fail1:
     Error("fail1 (%08x)\n", status);
 
@@ -1100,6 +1252,10 @@ CacheTeardown(
 
     Context->Fdo = NULL;
 
+    ThreadAlert(Context->MonitorThread);
+    ThreadJoin(Context->MonitorThread);
+    Context->MonitorThread = NULL;
+
     RtlZeroMemory(&Context->Lock, sizeof (KSPIN_LOCK));
     RtlZeroMemory(&Context->List, sizeof (LIST_ENTRY));
 
-- 
2.5.3


_______________________________________________
win-pv-devel mailing list
win-pv-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/win-pv-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.