server(alarms): provider-mode gauge startup baseline; reconcile-lock comment; de-flake monitor test

This commit is contained in:
Joseph Doherty
2026-06-13 10:29:13 -04:00
parent ee459f43e1
commit bcc54ca56b
3 changed files with 31 additions and 3 deletions
@@ -167,6 +167,10 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic
_providerSince = DateTimeOffset.UtcNow;
}
// Align the observable gauge with the Alarmmgr baseline without recording
// a switch — the gauge was 0 (unknown) from construction until now.
_metrics.SetAlarmProviderMode(ModeToInt(AlarmProviderMode.Alarmmgr));
GatewaySession session = await _sessionManager.OpenSessionAsync(
new SessionOpenRequest(BackendName, MonitorClientName, Guid.NewGuid().ToString("N"), CommandTimeout: null),
MonitorClientName,
@@ -405,6 +409,10 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic
try
{
// Intentionally awaited OUTSIDE _sync: ReconcileAsync acquires _sync itself,
// so holding it across the await here would deadlock. Subscribers therefore
// see the ProviderStatus push (above) slightly before the cache is re-seeded
// by the reconcile — an accepted brief inconsistency.
await ReconcileAsync(sessionId, cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException)
@@ -402,6 +402,12 @@ public sealed class GatewayMetrics : IDisposable
new KeyValuePair<string, object?>("reason", reason));
}
/// <summary>Sets the current alarm provider-mode gauge without recording a switch (e.g. startup baseline).</summary>
public void SetAlarmProviderMode(int mode)
{
lock (_syncRoot) { _alarmProviderMode = mode; }
}
/// <summary>
/// Returns a snapshot of all current metric values.
/// </summary>
@@ -56,8 +56,11 @@ public sealed class GatewayAlarmMonitorProviderModeTests
await monitor.StartAsync(cts.Token);
await sessions.WaitForSubscribeAsync(WaitTimeout);
// Subscribe a live feed reader, drain its first (provider status) message.
// Subscribe a live feed reader. Gate emitting the mode-change event until the
// reader has consumed its baseline ProviderStatus message, avoiding a race where
// the event arrives before the subscriber is registered and draining its snapshot.
List<AlarmFeedMessage> received = [];
TaskCompletionSource baselineReceived = new(TaskCreationOptions.RunContinuationsAsynchronously);
using CancellationTokenSource streamCts = new();
Task reader = Task.Run(async () =>
{
@@ -65,7 +68,15 @@ public sealed class GatewayAlarmMonitorProviderModeTests
{
await foreach (AlarmFeedMessage message in monitor.StreamAsync(null, streamCts.Token))
{
lock (received) { received.Add(message); }
lock (received)
{
received.Add(message);
// Signal once the first message (baseline ProviderStatus) has arrived.
if (received.Count == 1)
{
baselineReceived.TrySetResult();
}
}
}
}
catch (OperationCanceledException)
@@ -74,10 +85,13 @@ public sealed class GatewayAlarmMonitorProviderModeTests
}
});
// Wait for the baseline ProviderStatus to arrive before emitting the mode change,
// so the subscriber is registered and the event is not dropped.
await baselineReceived.Task.WaitAsync(WaitTimeout);
// Emit the worker event that flips the provider into subtag mode.
sessions.EmitEvent(new MxEvent
{
Family = MxEventFamily.OnAlarmProviderModeChanged,
OnAlarmProviderModeChanged = new OnAlarmProviderModeChangedEvent
{
Mode = AlarmProviderMode.Subtag,