[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 4/5] Implement rundown-based MRSW lock


  • To: win-pv-devel@xxxxxxxxxxxxxxxxxxxx
  • From: "Tu Dinh" <ngoc-tu.dinh@xxxxxxxxxx>
  • Date: Thu, 16 Apr 2026 11:10:01 +0000
  • Authentication-results: eu.smtp.expurgate.cloud; dkim=pass header.s=mte1 header.d=mandrillapp.com header.i="@mandrillapp.com" header.h="From:Subject:To:Cc:Message-Id:In-Reply-To:References:Feedback-ID:Date:MIME-Version:Content-Type:Content-Transfer-Encoding"; dkim=pass header.s=mte1 header.d=vates.tech header.i="ngoc-tu.dinh@xxxxxxxxxx" header.h="From:Subject:To:Cc:Message-Id:In-Reply-To:References:Feedback-ID:Date:MIME-Version:Content-Type:Content-Transfer-Encoding"
  • Cc: "Tu Dinh" <ngoc-tu.dinh@xxxxxxxxxx>, "Owen Smith" <owen.smith@xxxxxxxxxx>
  • Delivery-date: Thu, 16 Apr 2026 11:56:32 +0000
  • Feedback-id: 30504962:30504962.20260416:md
  • List-id: Developer list for the Windows PV Drivers subproject <win-pv-devel.lists.xenproject.org>

The current MRSW lock has a few downsides:
* Large size (have to store a 1KB holder list)
* Long fast-path acquire (~1KB of code) due to having to scan the holder
  list every acquire
* Limited to 64 holders
* Multiple IRQL operations per acquire
* Always raises the critical section to DISPATCH_LEVEL

It's currently only used as the cleanup lock for the VIF interface, so
replace it with something simpler.

Implement a read-write lock with the following properties:
* The reader section is protected by an EX_RUNDOWN_REF.
* Writer/cleanup section is implemented by running down the reference
  within a guarded mutex.
* It offers 3 read acquire flavors: try acquire, spin acquire (both of
  which can be used from DISPATCH_LEVEL) and sleeping acquire (using the
  guarded mutex).
* It makes the downgrade call explicit instead of overloading it via
  ReleaseMrswLockExclusive.
* Finally, the lock supports cache-aware mode, where EX_RUNDOWN_REF is
  replaced with EX_RUNDOWN_REF_CACHE_AWARE using the corresponding
  macros.

As the changes are only internal to vif.c, the VIF version doesn't need
to be increased.

Signed-off-by: Tu Dinh <ngoc-tu.dinh@xxxxxxxxxx>
---
 src/xenvif/mrsw.h | 530 +++++++++++++++++++++++++---------------------
 src/xenvif/vif.c  |  65 +++---
 2 files changed, 327 insertions(+), 268 deletions(-)

diff --git a/src/xenvif/mrsw.h b/src/xenvif/mrsw.h
index e1ff056..8bfa441 100644
--- a/src/xenvif/mrsw.h
+++ b/src/xenvif/mrsw.h
@@ -1,289 +1,347 @@
 /* Copyright (c) Xen Project.
  * Copyright (c) Cloud Software Group, Inc.
+ * Copyright (c) Vates.
  * All rights reserved.
- * 
- * Redistribution and use in source and binary forms, 
- * with or without modification, are permitted provided 
+ *
+ * Redistribution and use in source and binary forms,
+ * with or without modification, are permitted provided
  * that the following conditions are met:
- * 
- * *   Redistributions of source code must retain the above 
- *     copyright notice, this list of conditions and the 
+ *
+ * *   Redistributions of source code must retain the above
+ *     copyright notice, this list of conditions and the
  *     following disclaimer.
- * *   Redistributions in binary form must reproduce the above 
- *     copyright notice, this list of conditions and the 
- *     following disclaimer in the documentation and/or other 
+ * *   Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the
+ *     following disclaimer in the documentation and/or other
  *     materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
-#ifndef _XENVIF_MRSW_H
-#define _XENVIF_MRSW_H
+#ifndef _MRSW_H
+#define _MRSW_H
 
-#include <ntddk.h>
+#include <wdm.h>
 
 #include "assert.h"
 #include "util.h"
 
-#pragma warning(disable:4127)   // conditional expression is constant
-
-typedef struct _XENVIF_MRSW_HOLDER {
-    PKTHREAD    Thread;
-    LONG        Level;
-} XENVIF_MRSW_HOLDER, *PXENVIF_MRSW_HOLDER;
-
-typedef struct _XENVIF_MRSW_LOCK {
-    volatile LONG64 Mask;
-    XENVIF_MRSW_HOLDER     Holder[64];
-    KEVENT          Event;
-} XENVIF_MRSW_LOCK, *PXENVIF_MRSW_LOCK;
-
-C_ASSERT(RTL_FIELD_SIZE(XENVIF_MRSW_LOCK, Holder) == 
RTL_FIELD_SIZE(XENVIF_MRSW_LOCK, Mask) * 8 * sizeof (XENVIF_MRSW_HOLDER));
-
-#define XENVIF_MRSW_EXCLUSIVE_SLOT  0
-
-static FORCEINLINE VOID
-InitializeMrswLock(
-    IN  PXENVIF_MRSW_LOCK   Lock
+#pragma warning(push)
+#pragma warning(disable:4201) // nameless struct/union
+struct _MRSW_LOCK {
+    KGUARDED_MUTEX                  Mutex;
+    union {
+        EX_RUNDOWN_REF              Rundown;
+        PEX_RUNDOWN_REF_CACHE_AWARE RundownCacheAware;
+    };
+};
+#pragma warning(pop)
+
+typedef struct _MRSW_LOCK   MRSW_LOCK, *PMRSW_LOCK;
+typedef struct _MRSW_LOCK   MRSW_CACHE_AWARE_LOCK, *PMRSW_CACHE_AWARE_LOCK;
+
+static FORCEINLINE NTSTATUS
+__MrswRundownInitialize(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware,
+    _In_ ULONG                  Tag
     )
 {
-    LONG                    Slot;
-
-    RtlZeroMemory(Lock, sizeof (XENVIF_MRSW_LOCK));
-
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++)
-        Lock->Holder[Slot].Level = -1;
+    if (CacheAware) {
+        Lock->RundownCacheAware = 
ExAllocateCacheAwareRundownProtection(NonPagedPoolNx,
+                                                                        Tag);
+        if (!Lock->RundownCacheAware)
+            return STATUS_NO_MEMORY;
+    } else {
+        ExInitializeRundownProtection(&Lock->Rundown);
+    }
 
-    KeInitializeEvent(&Lock->Event, NotificationEvent, FALSE);
+    return STATUS_SUCCESS;
 }
 
 static FORCEINLINE BOOLEAN
-__ClaimExclusive(
-    IN  PXENVIF_MRSW_LOCK   Lock
+__MrswRundownAcquire(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    LONG64                  Old;
-    LONG64                  New;
-
-    Old = 0;
-    New = 1ll << XENVIF_MRSW_EXCLUSIVE_SLOT;
-
-    return (InterlockedCompareExchange64(&Lock->Mask, New, Old) == Old) ? TRUE 
: FALSE;
+    if (CacheAware)
+        return ExAcquireRundownProtectionCacheAware(Lock->RundownCacheAware);
+    else
+        return ExAcquireRundownProtection(&Lock->Rundown);
 }
 
-static FORCEINLINE KIRQL
-__drv_maxIRQL(APC_LEVEL)
-__drv_raisesIRQL(DISPATCH_LEVEL)
-__drv_savesIRQL
-__AcquireMrswLockExclusive(
-    IN  PXENVIF_MRSW_LOCK   Lock
+static FORCEINLINE VOID
+__MrswRundownRelease(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    KIRQL                   Irql;
-    LONG                    Slot;
-    PKTHREAD                Self;
-    PXENVIF_MRSW_HOLDER     Holder;
-
-    ASSERT3U(KeGetCurrentIrql(), <, DISPATCH_LEVEL);
-    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-
-    Self = KeGetCurrentThread();
-
-    // Make sure we do not already hold the lock
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++)
-        ASSERT(Lock->Holder[Slot].Thread != Self);
-
-    for (;;) {
-        if (__ClaimExclusive(Lock))
-            break;
-
-        KeLowerIrql(Irql);
-
-        (VOID) KeWaitForSingleObject(&Lock->Event,
-                                     Executive,
-                                     KernelMode,
-                                     FALSE,
-                                     NULL);
-        KeClearEvent(&Lock->Event);
-
-        KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-    }
-
-    Holder = &Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT];
-
-    ASSERT3P(Holder->Thread, ==, NULL);
-    Holder->Thread = Self;
-    Holder->Level = 0;
-
-    return Irql;
+    if (CacheAware)
+        ExReleaseRundownProtectionCacheAware(Lock->RundownCacheAware);
+    else
+        ExReleaseRundownProtection(&Lock->Rundown);
 }
 
-#define AcquireMrswLockExclusive(_Lock, _Irql)              \
-        do {                                                \
-            *(_Irql) = __AcquireMrswLockExclusive(_Lock);   \
-        } while (FALSE)
-
 static FORCEINLINE VOID
-__drv_maxIRQL(DISPATCH_LEVEL)
-__drv_requiresIRQL(DISPATCH_LEVEL)
-ReleaseMrswLockExclusive(
-    IN  PXENVIF_MRSW_LOCK           Lock,
-    IN  __drv_restoresIRQL KIRQL    Irql,
-    IN  BOOLEAN                     Shared
+__MrswRundownWait(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    LONG                            Slot;
-    PKTHREAD                        Self;
-    LONG64                          Old;
-    LONG64                          New;
-    PXENVIF_MRSW_HOLDER             Holder;
-
-    ASSERT3U(KeGetCurrentIrql(), ==, DISPATCH_LEVEL);
-
-    Slot = XENVIF_MRSW_EXCLUSIVE_SLOT + 1; // Choose any slot other than the 
exclusive slot
-
-    Old = 1ll << XENVIF_MRSW_EXCLUSIVE_SLOT;
-    New = (Shared) ? (1ll << Slot) : 0;
-
-    Old = InterlockedCompareExchange64(&Lock->Mask, New, Old);
-    ASSERT3U(Old, == , 1ll << XENVIF_MRSW_EXCLUSIVE_SLOT);
-
-    Self = KeGetCurrentThread();
-
-    ASSERT3P(Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT].Thread, ==, Self);
-
-    // If we are leaving the lock held shared then we need to transfer
-    // our identity information into the hew slot.
-    if (Shared)
-        Lock->Holder[Slot] = Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT];
-
-    Holder = &Lock->Holder[XENVIF_MRSW_EXCLUSIVE_SLOT];
-
-    Holder->Thread = NULL;
-    Holder->Level = -1;
-
-    KeLowerIrql(Irql);
+    if (CacheAware)
+        ExWaitForRundownProtectionReleaseCacheAware(Lock->RundownCacheAware);
+    else
+        ExWaitForRundownProtectionRelease(&Lock->Rundown);
 }
 
-static FORCEINLINE LONG
-__ClaimShared(
-    IN  PXENVIF_MRSW_LOCK   Lock
+static FORCEINLINE VOID
+__MrswRundownCompleted(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    LONG                    Slot;
-    LONG64                  Old;
-    LONG64                  New;
-
-    // Make sure the exclusive bit is set so that we don't find it
-    Old = Lock->Mask | (1ll << XENVIF_MRSW_EXCLUSIVE_SLOT);
-
-    Slot = __ffu((ULONG64)Old);
-    ASSERT(Slot >= 0);
-    ASSERT3U(Slot, != , XENVIF_MRSW_EXCLUSIVE_SLOT);
+    if (CacheAware)
+        ExRundownCompletedCacheAware(Lock->RundownCacheAware);
+    else
+        ExRundownCompleted(&Lock->Rundown);
+}
 
-    Old &= ~(1ll << XENVIF_MRSW_EXCLUSIVE_SLOT);
-    New = Old | (1ll << Slot);
+static FORCEINLINE VOID
+__MrswRundownReInitialize(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    if (CacheAware)
+        ExReInitializeRundownProtectionCacheAware(Lock->RundownCacheAware);
+    else
+        ExReInitializeRundownProtection(&Lock->Rundown);
+}
 
-    return (InterlockedCompareExchange64(&Lock->Mask, New, Old) == Old) ? Slot 
: -1;
+static FORCEINLINE VOID
+__MrswRundownTeardown(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    if (CacheAware)
+        ExFreeCacheAwareRundownProtection(Lock->RundownCacheAware);
+    else
+        RtlZeroMemory(&Lock->Rundown, sizeof(Lock->Rundown));
 }
 
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE NTSTATUS
+__InitializeMrswLock(
+    _Out_ struct _MRSW_LOCK     *Lock,
+    _In_ BOOLEAN                CacheAware,
+    _In_ ULONG                  Tag
+    )
+{
+    KeInitializeGuardedMutex(&Lock->Mutex);
+    return __MrswRundownInitialize(Lock, CacheAware, Tag);
+}
+#define InitializeMrswLock(Lock, Tag) \
+    __InitializeMrswLock(Lock, FALSE, Tag)
+#define InitializeMrswCacheAwareLock(Lock, Tag) \
+    __InitializeMrswLock(Lock, TRUE, Tag)
+
+_Requires_lock_not_held_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__TeardownMrswLock(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+#if DBG
+    BUG_ON(!KeTryToAcquireGuardedMutex(&Lock->Mutex));
+    BUG_ON(!__MrswRundownAcquire(Lock, CacheAware));
+    KeReleaseGuardedMutex(&Lock->Mutex);
+    __MrswRundownRelease(Lock, CacheAware);
+#endif
+
+    __MrswRundownTeardown(Lock, CacheAware);
+    RtlZeroMemory(Lock, sizeof(MRSW_LOCK));
+}
+#define TeardownMrswLock(Lock) \
+    __TeardownMrswLock(Lock, FALSE)
+#define TeardownMrswCacheAwareLock(Lock) \
+    __TeardownMrswLock(Lock, TRUE)
+
+_Acquires_lock_(_Global_critical_region_)
+_Requires_lock_not_held_(*Lock)
+_Acquires_exclusive_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
 static FORCEINLINE VOID
-AcquireMrswLockShared(
-    IN  PXENVIF_MRSW_LOCK   Lock
+__AcquireMrswLockExclusive(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    KIRQL                   Irql;
-    LONG                    Level;
-    LONG                    Slot;
-    PKTHREAD                Self;
-    PXENVIF_MRSW_HOLDER     Holder;
+    ASSERT3U(KeGetCurrentIrql(), <=, APC_LEVEL);
 
+    KeAcquireGuardedMutex(&Lock->Mutex);
+    __MrswRundownWait(Lock, CacheAware);
+    __MrswRundownCompleted(Lock, CacheAware);
+}
+#define AcquireMrswLockExclusive(Lock) \
+    __AcquireMrswLockExclusive(Lock, FALSE)
+#define AcquireMrswCacheAwareLockExclusive(Lock) \
+    __AcquireMrswLockExclusive(Lock, TRUE)
+
+_Releases_lock_(_Global_critical_region_)
+_Requires_exclusive_lock_held_(*Lock)
+_Releases_exclusive_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__ReleaseMrswLockExclusive(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    __MrswRundownReInitialize(Lock, CacheAware);
+    KeReleaseGuardedMutex(&Lock->Mutex);
+}
+#define ReleaseMrswLockExclusive(Lock) \
+    __ReleaseMrswLockExclusive(Lock, FALSE)
+#define ReleaseMrswCacheAwareLockExclusive(Lock) \
+    __ReleaseMrswLockExclusive(Lock, TRUE)
+
+_Releases_lock_(_Global_critical_region_)
+_Requires_exclusive_lock_held_(*Lock)
+_Releases_exclusive_lock_(*Lock)
+_Acquires_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__DowngradeMrswLockExclusive(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    __MrswRundownReInitialize(Lock, CacheAware);
+    BUG_ON(!__MrswRundownAcquire(Lock, CacheAware));
+    KeReleaseGuardedMutex(&Lock->Mutex);
+}
+#define DowngradeMrswLockExclusive(Lock) \
+    __DowngradeMrswLockExclusive(Lock, FALSE)
+#define DowngradeMrswCacheAwareLockExclusive(Lock) \
+    __DowngradeMrswLockExclusive(Lock, TRUE)
+
+_When_(return, _Acquires_shared_lock_(*Lock))
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(DISPATCH_LEVEL)
+static FORCEINLINE BOOLEAN
+__TryAcquireMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+#if DBG
     ASSERT3U(KeGetCurrentIrql(), <=, DISPATCH_LEVEL);
-    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-
-    Self = KeGetCurrentThread();
-
-    // Do we already hold the lock? If so, get the nesting level
-    Level = -1;
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++) {
-        if (Lock->Holder[Slot].Thread == Self && Lock->Holder[Slot].Level > 
Level)
-            Level = Lock->Holder[Slot].Level;
-    }
-    Level++;
+#endif
 
-    for (;;) {
-        Slot = __ClaimShared(Lock);
-        if (Slot >= 0)
-            break;
-
-        _mm_pause();
-    }
-
-    Holder = &Lock->Holder[Slot];
-
-    Holder->Thread = Self;
-    Holder->Level = Level;
-
-    KeLowerIrql(Irql);
+    return __MrswRundownAcquire(Lock, CacheAware);
 }
-
+#define TryAcquireMrswLockShared(Lock) \
+    __TryAcquireMrswLockShared(Lock, FALSE)
+#define TryAcquireMrswCacheAwareLockShared(Lock) \
+    __TryAcquireMrswLockShared(Lock, TRUE)
+
+_Acquires_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(DISPATCH_LEVEL)
 static FORCEINLINE VOID
-ReleaseMrswLockShared(
-    IN  PXENVIF_MRSW_LOCK   Lock
+__SpinAcquireMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
     )
 {
-    KIRQL                   Irql;
-    PKTHREAD                Self;
-    LONG                    Level;
-    LONG                    Deepest;
-    LONG                    Slot;
-    LONG64                  Old;
-    LONG64                  New;
-    PXENVIF_MRSW_HOLDER     Holder;
-
+#if DBG
     ASSERT3U(KeGetCurrentIrql(), <=, DISPATCH_LEVEL);
-    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
-
-    Self = KeGetCurrentThread();
-
-    Level = -1;
-    Deepest = -1;
-    for (Slot = 0; Slot < (LONG) sizeof (Lock->Mask) * 8; Slot++) {
-        if (Lock->Holder[Slot].Thread == Self && Lock->Holder[Slot].Level > 
Level) {
-            Level = Lock->Holder[Slot].Level;
-            Deepest = Slot;
-        }
-    }
-    ASSERT(Level >= 0);
-
-    Slot = Deepest;
-    ASSERT3U(Slot, !=, XENVIF_MRSW_EXCLUSIVE_SLOT);
-
-    Holder = &Lock->Holder[Slot];
+#endif
 
-    Holder->Thread = NULL;
-    Holder->Level = -1;
-
-    do {
-        Old = Lock->Mask;
-        New = Old & ~(1ll << Slot);
-    } while (InterlockedCompareExchange64(&Lock->Mask, New, Old) != Old);
-
-    KeSetEvent(&Lock->Event, IO_NO_INCREMENT, FALSE);
-    KeLowerIrql(Irql);
+    while (!__MrswRundownAcquire(Lock, CacheAware))
+        YieldProcessor();
+}
+#define SpinAcquireMrswLockShared(Lock) \
+    __SpinAcquireMrswLockShared(Lock, FALSE)
+#define SpinAcquireMrswCacheAwareLockShared(Lock) \
+    __SpinAcquireMrswLockShared(Lock, TRUE)
+
+/*
+ * Unlike SpinAcquireMrswLockShared, AcquireMrswLockShared will
+ * sleep when the lock acquisition fails. Thus it cannot be used at
+ * DISPATCH_LEVEL.
+ */
+_Acquires_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(APC_LEVEL)
+static FORCEINLINE VOID
+__AcquireMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+#if DBG
+    ASSERT3U(KeGetCurrentIrql(), <=, APC_LEVEL);
+#endif
+
+    if (__MrswRundownAcquire(Lock, CacheAware))
+        return;
+
+    /*
+     * Don't bother retrying, since it's most likely that another writer 
section
+     * is cleaning up and not ending any time soon. Just jump straight into
+     * sleep.
+     */
+    KeAcquireGuardedMutex(&Lock->Mutex);
+    /*
+     * Since we have the write mutex, we know that there are no writers. So 
this
+     * acquire must succeed.
+     */
+    BUG_ON(!__MrswRundownAcquire(Lock, CacheAware));
+    KeReleaseGuardedMutex(&Lock->Mutex);
+}
+#define AcquireMrswLockShared(Lock) \
+    __AcquireMrswLockShared(Lock, FALSE)
+#define AcquireMrswCacheAwareLockShared(Lock) \
+    __AcquireMrswLockShared(Lock, TRUE)
+
+_Requires_shared_lock_held_(*Lock)
+_Releases_shared_lock_(*Lock)
+_IRQL_requires_min_(PASSIVE_LEVEL)
+_IRQL_requires_max_(DISPATCH_LEVEL)
+static FORCEINLINE VOID
+__ReleaseMrswLockShared(
+    _Inout_ struct _MRSW_LOCK   *Lock,
+    _In_ BOOLEAN                CacheAware
+    )
+{
+    __MrswRundownRelease(Lock, CacheAware);
 }
+#define ReleaseMrswLockShared(Lock) \
+    __ReleaseMrswLockShared(Lock, FALSE)
+#define ReleaseMrswCacheAwareLockShared(Lock) \
+    __ReleaseMrswLockShared(Lock, TRUE)
 
-#endif  // _XENVIF_MRSW_H
+#endif  // _MRSW_H
diff --git a/src/xenvif/vif.c b/src/xenvif/vif.c
index 6f468ee..cb4cf84 100644
--- a/src/xenvif/vif.c
+++ b/src/xenvif/vif.c
@@ -47,7 +47,7 @@
 
 struct _XENVIF_VIF_CONTEXT {
     PXENVIF_PDO                 Pdo;
-    XENVIF_MRSW_LOCK            Lock;
+    MRSW_LOCK                   Lock;
     LONG                        References;
     PXENVIF_FRONTEND            Frontend;
     BOOLEAN                     Enabled;
@@ -150,13 +150,12 @@ VifEnable(
     )
 {
     PXENVIF_VIF_CONTEXT     Context = Interface->Context;
-    KIRQL                   Irql;
     BOOLEAN                 Exclusive;
     NTSTATUS                status;
 
     Trace("====>\n");
 
-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);
     Exclusive = TRUE;
 
     if (Context->Enabled)
@@ -188,7 +187,7 @@ VifEnable(
 
 done:
     ASSERT(Exclusive);
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);
 
     Trace("<====\n");
 
@@ -199,7 +198,7 @@ fail3:
 
     (VOID) FrontendSetState(Context->Frontend, FRONTEND_CONNECTED);
 
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, TRUE);
+    DowngradeMrswLockExclusive(&Context->Lock);
     Exclusive = FALSE;
 
     ReceiverWaitForPackets(FrontendGetReceiver(Context->Frontend));
@@ -234,7 +233,7 @@ fail1:
     Context->Callback = NULL;
 
     if (Exclusive)
-        ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+        ReleaseMrswLockExclusive(&Context->Lock);
     else
         ReleaseMrswLockShared(&Context->Lock);
 
@@ -389,17 +388,16 @@ VifEnableVersion9(
     )
 {
     PXENVIF_VIF_CONTEXT             Context = Interface->Context;
-    KIRQL                           Irql;
     NTSTATUS                        status;
 
     Trace("====>\n");
 
-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);
 
     Context->CallbackVersion9 = Callback;
     Context->ArgumentVersion9 = Argument;
 
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);
 
     status = VifEnable(Interface, VifCallbackVersion9, Context);
 
@@ -416,17 +414,16 @@ VifEnableVersion8(
     )
 {
     PXENVIF_VIF_CONTEXT             Context = Interface->Context;
-    KIRQL                           Irql;
     NTSTATUS                        status;
 
     Trace("====>\n");
 
-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);
 
     Context->CallbackVersion8 = Callback;
     Context->ArgumentVersion8 = Argument;
 
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);
 
     status = VifEnableVersion9(Interface, VifCallbackVersion8, Context);
 
@@ -441,14 +438,13 @@ VifDisable(
     )
 {
     PXENVIF_VIF_CONTEXT Context = Interface->Context;
-    KIRQL               Irql;
 
     Trace("====>\n");
 
-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);
 
     if (!Context->Enabled) {
-        ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+        ReleaseMrswLockExclusive(&Context->Lock);
         goto done;
     }
 
@@ -463,7 +459,7 @@ VifDisable(
 
     (VOID) FrontendSetState(Context->Frontend, FRONTEND_CONNECTED);
 
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, TRUE);
+    DowngradeMrswLockExclusive(&Context->Lock);
 
     ReceiverWaitForPackets(FrontendGetReceiver(Context->Frontend));
     TransmitterAbortPackets(FrontendGetTransmitter(Context->Frontend));
@@ -511,7 +507,7 @@ VifQueryStatistic(
     status = STATUS_INVALID_PARAMETER;
     if (Index >= XENVIF_VIF_STATISTIC_COUNT)
         goto done;
-        
+
     AcquireMrswLockShared(&Context->Lock);
 
     FrontendQueryStatistic(Context->Frontend, Index, Value);
@@ -567,7 +563,8 @@ VifReceiverReturnPacket(
 {
     PXENVIF_VIF_CONTEXT Context = Interface->Context;
 
-    AcquireMrswLockShared(&Context->Lock);
+    // Called from MINIPORT_RETURN_NET_BUFFER_LISTS
+    SpinAcquireMrswLockShared(&Context->Lock);
 
     ReceiverReturnPacket(FrontendGetReceiver(Context->Frontend),
                          Cookie);
@@ -592,9 +589,10 @@ VifTransmitterQueuePacket(
     PXENVIF_VIF_CONTEXT             Context = Interface->Context;
     NTSTATUS                        status;
 
-    AcquireMrswLockShared(&Context->Lock);
-
     status = STATUS_UNSUCCESSFUL;
+    if (!TryAcquireMrswLockShared(&Context->Lock))
+        return status;
+
     if (!Context->Enabled)
         goto done;
 
@@ -948,9 +946,8 @@ VifAcquire(
     )
 {
     PXENVIF_VIF_CONTEXT     Context = Interface->Context;
-    KIRQL                   Irql;
 
-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);
 
     if (Context->References++ != 0)
         goto done;
@@ -963,7 +960,7 @@ VifAcquire(
     Trace("<====\n");
 
 done:
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);
 
     return STATUS_SUCCESS;
 }
@@ -974,9 +971,8 @@ VifRelease(
     )
 {
     PXENVIF_VIF_CONTEXT     Context = Interface->Context;
-    KIRQL                   Irql;
 
-    AcquireMrswLockExclusive(&Context->Lock, &Irql);
+    AcquireMrswLockExclusive(&Context->Lock);
 
     if (--Context->References > 0)
         goto done;
@@ -991,7 +987,7 @@ VifRelease(
     Trace("<====\n");
 
 done:
-    ReleaseMrswLockExclusive(&Context->Lock, Irql, FALSE);
+    ReleaseMrswLockExclusive(&Context->Lock);
 }
 
 static struct _XENVIF_VIF_INTERFACE_V8 VifInterfaceVersion8 = {
@@ -1100,7 +1096,9 @@ VifInitialize(
     if (*Context == NULL)
         goto fail1;
 
-    InitializeMrswLock(&(*Context)->Lock);
+    status = InitializeMrswLock(&(*Context)->Lock, XENVIF_VIF_TAG);
+    if (!NT_SUCCESS(status))
+        goto fail2;
 
     FdoGetSuspendInterface(PdoGetFdo(Pdo),&(*Context)->SuspendInterface);
 
@@ -1110,7 +1108,7 @@ VifInitialize(
                           *Context,
                           &(*Context)->MacThread);
     if (!NT_SUCCESS(status))
-        goto fail2;
+        goto fail3;
 
     (*Context)->Pdo = Pdo;
 
@@ -1118,7 +1116,7 @@ VifInitialize(
 
     return STATUS_SUCCESS;
 
-fail2:
+fail3:
     Error("fail3\n");
 
     RtlZeroMemory(&(*Context)->MacEvent, sizeof (KEVENT));
@@ -1126,7 +1124,10 @@ fail2:
     RtlZeroMemory(&(*Context)->SuspendInterface,
                   sizeof (XENBUS_SUSPEND_INTERFACE));
 
-    RtlZeroMemory(&(*Context)->Lock, sizeof (XENVIF_MRSW_LOCK));
+    TeardownMrswLock(&(*Context)->Lock);
+
+fail2:
+    Error("fail2\n");
 
     ASSERT(IsZeroMemory(*Context, sizeof (XENVIF_VIF_CONTEXT)));
     __VifFree(*Context);
@@ -1205,7 +1206,7 @@ VifGetInterface(
     }
 
     return status;
-}   
+}
 
 VOID
 VifTeardown(
@@ -1228,7 +1229,7 @@ VifTeardown(
     RtlZeroMemory(&Context->SuspendInterface,
                   sizeof (XENBUS_SUSPEND_INTERFACE));
 
-    RtlZeroMemory(&Context->Lock, sizeof (XENVIF_MRSW_LOCK));
+    TeardownMrswLock(&Context->Lock);
 
     ASSERT(IsZeroMemory(Context, sizeof (XENVIF_VIF_CONTEXT)));
     __VifFree(Context);
-- 
2.53.0.windows.2



--
Ngoc Tu Dinh | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.