[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH xenbus 10/12] Add "XEN:WATCHDOG=" system start option to specify a watchdog time-out
From: Paul Durrant <pdurrant@xxxxxxxxxx> When this option supplies a non-zero value (in seconds) for the time-out, XENBUS will open a VIRQ_TIMER channel bound to each active processor (unless prevent from doing so by the lack of a FIFO Xen event channel ABI, in which case there will only be one VIRQ_TIMER on vCPU 0). The Xen domain watchdog will be programmed to shut the domain down after the time-out period has elapsed and each VIRQ_TIMER will be programmed to fire at intervals of half the watchdog period in order that the last vCPU to receive the event notification will pat the watchdog. This option will therefore cause lengthy event delivery or processing stalls to be fatal to the domain, which can be useful for debugging and in some I/O fail-over cluster scenarios. Signed-off-by: Paul Durrant <pdurrant@xxxxxxxxxx> --- src/xenbus/fdo.c | 171 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 167 insertions(+), 4 deletions(-) diff --git a/src/xenbus/fdo.c b/src/xenbus/fdo.c index 231748f14bdf..d164aab8a604 100644 --- a/src/xenbus/fdo.c +++ b/src/xenbus/fdo.c @@ -86,6 +86,7 @@ typedef struct _XENBUS_VIRQ { ULONG Type; PXENBUS_EVTCHN_CHANNEL Channel; ULONG Cpu; + ULONG Count; } XENBUS_VIRQ, *PXENBUS_VIRQ; struct _XENBUS_FDO { @@ -152,6 +153,10 @@ struct _XENBUS_FDO { LIST_ENTRY InterruptList; LIST_ENTRY VirqList; + HIGH_LOCK VirqLock; + + ULONG Watchdog; + PXENBUS_DEBUG_CALLBACK DebugCallback; PXENBUS_SUSPEND_CALLBACK SuspendCallbackLate; PLOG_DISPOSITION LogDisposition; @@ -2755,6 +2760,45 @@ FdoOutputBuffer( (ULONG)(Cursor - FdoOutBuffer)); } +static FORCEINLINE BOOLEAN +__FdoVirqPatWatchdog( + IN PXENBUS_VIRQ Virq + ) +{ + PXENBUS_FDO Fdo = Virq->Fdo; + ULONG Cpu; + ULONG Count; + BOOLEAN Pat; + KIRQL Irql; + PLIST_ENTRY ListEntry; + + AcquireHighLock(&Fdo->VirqLock, &Irql); + + Cpu = Virq->Cpu; + Count = Virq->Count++; + Pat = TRUE; + + if (Virq->Count == 0) // wrapped + goto out; + + for (ListEntry = Fdo->VirqList.Flink; + ListEntry != &Fdo->VirqList; + ListEntry = ListEntry->Flink) { + Virq = CONTAINING_RECORD(ListEntry, XENBUS_VIRQ, ListEntry); + + if (Virq->Type != VIRQ_TIMER || Virq->Cpu == Cpu) + continue; + + if (Virq->Count <= Count) + Pat = FALSE; + } + +out: + ReleaseHighLock(&Fdo->VirqLock, Irql); + + return Pat; +} + static _Function_class_(KSERVICE_ROUTINE) _IRQL_requires_(HIGH_LEVEL) @@ -2773,8 +2817,22 @@ FdoVirqCallback( ASSERT(Virq != NULL); Fdo = Virq->Fdo; - ASSERT3U(Virq->Type, ==, VIRQ_DEBUG); - XENBUS_DEBUG(Trigger, &Fdo->DebugInterface, NULL); + switch (Virq->Type) { + case VIRQ_DEBUG: + Virq->Count++; + XENBUS_DEBUG(Trigger, &Fdo->DebugInterface, NULL); + break; + + case VIRQ_TIMER: + if (__FdoVirqPatWatchdog(Virq)) + SystemSetWatchdog(Fdo->Watchdog); + + break; + + default: + ASSERT(FALSE); + break; + } return TRUE; } @@ -2788,6 +2846,16 @@ __FdoVirqDestroy( Info("%s\n", VirqName(Virq->Type)); + if (Virq->Type == VIRQ_TIMER) { + unsigned int vcpu_id; + NTSTATUS status; + + status = SystemVirtualCpuIndex(Virq->Cpu, &vcpu_id); + ASSERT(NT_SUCCESS(status)); + + (VOID) VcpuSetPeriodicTimer(vcpu_id, NULL); + } + XENBUS_EVTCHN(Close, &Fdo->EvtchnInterface, Virq->Channel); @@ -2804,6 +2872,7 @@ __FdoVirqCreate( ) { PROCESSOR_NUMBER ProcNumber; + unsigned int vcpu_id; NTSTATUS status; *Virq = __FdoAllocate(sizeof (XENBUS_VIRQ)); @@ -2832,6 +2901,20 @@ __FdoVirqCreate( if ((*Virq)->Channel == NULL) goto fail2; + if (Type == VIRQ_TIMER) { + LARGE_INTEGER Period; + + status = SystemVirtualCpuIndex(Cpu, &vcpu_id); + ASSERT(NT_SUCCESS(status)); + + BUG_ON(Fdo->Watchdog == 0); + Period.QuadPart = TIME_S(Fdo->Watchdog / 2); + + status = VcpuSetPeriodicTimer(vcpu_id, &Period); + if (!NT_SUCCESS(status)) + goto fail3; + } + (VOID) XENBUS_EVTCHN(Unmask, &Fdo->EvtchnInterface, (*Virq)->Channel, @@ -2843,6 +2926,13 @@ __FdoVirqCreate( return STATUS_SUCCESS; +fail3: + Error("fail3\n"); + + XENBUS_EVTCHN(Close, + &Fdo->EvtchnInterface, + (*Virq)->Channel); + fail2: Error("fail2\n"); @@ -2859,6 +2949,9 @@ FdoVirqTeardown( IN PXENBUS_FDO Fdo ) { + if (Fdo->Watchdog != 0) + SystemStopWatchdog(); + while (!IsListEmpty(&Fdo->VirqList)) { PLIST_ENTRY ListEntry; PXENBUS_VIRQ Virq; @@ -2882,9 +2975,12 @@ FdoVirqInitialize( ) { PXENBUS_VIRQ Virq; + ULONG Count; + ULONG Index; NTSTATUS status; InitializeListHead(&Fdo->VirqList); + InitializeHighLock(&Fdo->VirqLock); status = __FdoVirqCreate(Fdo, VIRQ_DEBUG, 0, &Virq); if (!NT_SUCCESS(status)) @@ -2892,8 +2988,38 @@ FdoVirqInitialize( InsertTailList(&Fdo->VirqList, &Virq->ListEntry); + if (Fdo->Watchdog == 0) + goto done; + + Count = KeQueryActiveProcessorCountEx(ALL_PROCESSOR_GROUPS); + + for (Index = 0; Index < Count; Index++) { + status = __FdoVirqCreate(Fdo, VIRQ_TIMER, Index, &Virq); + if (!NT_SUCCESS(status)) { + if (status != STATUS_NOT_SUPPORTED ) + continue; + + goto fail2; + } + + InsertTailList(&Fdo->VirqList, &Virq->ListEntry); + } + + status = SystemSetWatchdog(Fdo->Watchdog); + if (!NT_SUCCESS(status)) + goto fail3; + +done: return STATUS_SUCCESS; +fail3: + Error("fail3\n"); + +fail2: + Error("fail2\n"); + + FdoVirqTeardown(Fdo); + fail1: Error("fail1 (%08x)\n", status); @@ -3278,10 +3404,11 @@ FdoDebugCallback( XENBUS_DEBUG(Printf, &Fdo->DebugInterface, - "- %s: (%u:%u)\n", + "- %s: (%u:%u) Count = %u\n", VirqName(Virq->Type), ProcNumber.Group, - ProcNumber.Number); + ProcNumber.Number, + Virq->Count); } } } @@ -5472,6 +5599,38 @@ FdoBalloonTeardown( BalloonTeardown(Fdo->BalloonContext); Fdo->BalloonContext = NULL; } + +static VOID +FdoSetWatchdog( + IN PXENBUS_FDO Fdo + ) +{ + CHAR Key[] = "XEN:WATCHDOG="; + PANSI_STRING Option; + ULONG Value; + NTSTATUS status; + + status = RegistryQuerySystemStartOption(Key, &Option); + if (!NT_SUCCESS(status)) + return; + + Value = strtoul(Option->Buffer + sizeof (Key) - 1, NULL, 0); + + RegistryFreeSzValue(Option); + + if (Value && Value < 10) { + Warning("%us TOO SHORT (ROUNDING UP TO 10s)\n"); + Value = 10; + } + + Fdo->Watchdog = Value; + + if (Fdo->Watchdog != 0) + Info("WATCHDOG ENABLED (%us)\n", Fdo->Watchdog); + else + Info("WATCHDOG DISABLED\n"); +} + NTSTATUS FdoCreate( IN PDEVICE_OBJECT PhysicalDeviceObject @@ -5651,6 +5810,8 @@ done: (VOID) FdoSetFriendlyName(Fdo, Header.DeviceID); + FdoSetWatchdog(Fdo); + Info("%p (%s) %s\n", FunctionDeviceObject, __FdoGetName(Fdo), @@ -5810,6 +5971,8 @@ FdoDestroy( Dx->Fdo = NULL; + Fdo->Watchdog = 0; + RtlZeroMemory(&Fdo->List, sizeof (LIST_ENTRY)); RtlZeroMemory(&Fdo->Mutex, sizeof (MUTEX)); -- 2.17.1
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |