feat(centralui): operator Alarm Summary page + per-instance snapshot fan-out (T13)

This commit is contained in:
Joseph Doherty
2026-06-18 02:21:41 -04:00
parent 6a6f8949b9
commit 3c9122bc07
8 changed files with 872 additions and 0 deletions
@@ -0,0 +1,164 @@
using System.Collections.Concurrent;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
/// <summary>
/// Default <see cref="IAlarmSummaryService"/> implementation (M7 T13). Resolves
/// the site's Enabled instances, fans out one debug-snapshot fetch per instance
/// through the injected <see cref="IInstanceSnapshotClient"/> (capped at eight
/// concurrent fetches), and flattens every snapshot's alarm states into rows.
/// </summary>
/// <remarks>
/// Best-effort by design: a per-instance fetch that throws, is cancelled by its
/// own timeout, or reports <see cref="DebugViewSnapshot.InstanceNotFound"/> adds
/// the instance to <see cref="AlarmSummaryResult.NotReportingInstances"/> instead
/// of failing the whole call — an operator with one unreachable site still sees
/// every other instance's alarms. Caller cancellation
/// (<see cref="OperationCanceledException"/> on the supplied token) propagates.
/// </remarks>
public sealed class AlarmSummaryService : IAlarmSummaryService
{
/// <summary>Max concurrent per-instance snapshot fetches.</summary>
private const int MaxConcurrentFetches = 8;
private readonly ITemplateEngineRepository _instanceRepo;
private readonly ISiteRepository _siteRepo;
private readonly IInstanceSnapshotClient _snapshotClient;
/// <summary>
/// Initializes a new instance of the <see cref="AlarmSummaryService"/> class.
/// </summary>
/// <param name="instanceRepo">Repository used to enumerate the site's instances.</param>
/// <param name="siteRepo">Repository used to resolve the site identifier string.</param>
/// <param name="snapshotClient">Single-shot per-instance snapshot client.</param>
public AlarmSummaryService(
ITemplateEngineRepository instanceRepo,
ISiteRepository siteRepo,
IInstanceSnapshotClient snapshotClient)
{
_instanceRepo = instanceRepo ?? throw new ArgumentNullException(nameof(instanceRepo));
_siteRepo = siteRepo ?? throw new ArgumentNullException(nameof(siteRepo));
_snapshotClient = snapshotClient ?? throw new ArgumentNullException(nameof(snapshotClient));
}
/// <inheritdoc/>
public async Task<AlarmSummaryResult> GetSiteAlarmsAsync(
int siteId, CancellationToken cancellationToken = default)
{
var site = await _siteRepo.GetSiteByIdAsync(siteId, cancellationToken);
if (site is null)
{
return new AlarmSummaryResult(Array.Empty<AlarmSummaryRow>(), Array.Empty<string>());
}
var instances = await _instanceRepo.GetInstancesBySiteIdAsync(siteId, cancellationToken);
var enabled = instances.Where(i => i.State == InstanceState.Enabled).ToList();
if (enabled.Count == 0)
{
return new AlarmSummaryResult(Array.Empty<AlarmSummaryRow>(), Array.Empty<string>());
}
var rows = new ConcurrentBag<AlarmSummaryRow>();
var notReporting = new ConcurrentBag<string>();
using var gate = new SemaphoreSlim(MaxConcurrentFetches, MaxConcurrentFetches);
var fetches = enabled.Select(instance => FetchInstanceAsync(
site.SiteIdentifier, instance.UniqueName, gate, rows, notReporting, cancellationToken));
await Task.WhenAll(fetches);
// Deterministic ordering: instance name, then alarm name, so the page's
// initial render and any test assertions are stable before client sorts.
var orderedRows = rows
.OrderBy(r => r.InstanceUniqueName, StringComparer.OrdinalIgnoreCase)
.ThenBy(r => r.Alarm.AlarmName, StringComparer.OrdinalIgnoreCase)
.ToList();
var orderedNotReporting = notReporting
.OrderBy(n => n, StringComparer.OrdinalIgnoreCase)
.ToList();
return new AlarmSummaryResult(orderedRows, orderedNotReporting);
}
private async Task FetchInstanceAsync(
string siteIdentifier,
string instanceUniqueName,
SemaphoreSlim gate,
ConcurrentBag<AlarmSummaryRow> rows,
ConcurrentBag<string> notReporting,
CancellationToken cancellationToken)
{
await gate.WaitAsync(cancellationToken);
try
{
var snapshot = await _snapshotClient.GetSnapshotAsync(
siteIdentifier, instanceUniqueName, cancellationToken);
if (snapshot.InstanceNotFound)
{
notReporting.Add(instanceUniqueName);
return;
}
foreach (var alarm in snapshot.AlarmStates)
{
rows.Add(new AlarmSummaryRow(instanceUniqueName, alarm));
}
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// Caller-initiated cancel — propagate so the page can drop the response.
throw;
}
catch
{
// Any other fault (per-instance timeout, transport error, the snapshot
// Ask throwing) degrades this one instance to "not reporting" rather
// than failing the whole summary.
notReporting.Add(instanceUniqueName);
}
finally
{
gate.Release();
}
}
/// <inheritdoc/>
public AlarmRollup ComputeRollup(IReadOnlyList<AlarmSummaryRow> rows)
{
ArgumentNullException.ThrowIfNull(rows);
var totalActive = 0;
var worstSeverity = 0;
var unackedCount = 0;
var countsByKind = new Dictionary<AlarmKind, int>();
foreach (var row in rows)
{
var alarm = row.Alarm;
countsByKind[alarm.Kind] = countsByKind.GetValueOrDefault(alarm.Kind) + 1;
if (alarm.State == AlarmState.Active)
{
totalActive++;
if (alarm.Condition.Severity > worstSeverity)
{
worstSeverity = alarm.Condition.Severity;
}
}
if (alarm.Condition.Active
&& !alarm.Condition.Acknowledged
&& alarm.Kind != AlarmKind.Computed)
{
unackedCount++;
}
}
return new AlarmRollup(totalActive, worstSeverity, unackedCount, countsByKind);
}
}
@@ -0,0 +1,36 @@
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
using ZB.MOM.WW.ScadaBridge.Communication;
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
/// <summary>
/// Default <see cref="IInstanceSnapshotClient"/> — a thin facade over the
/// existing single-shot
/// <see cref="CommunicationService.RequestDebugSnapshotAsync"/> Ask (the same
/// Deployer-gated snapshot path the CLI <c>debug snapshot</c> command and the
/// Debug View use). Each call issues one <see cref="DebugSnapshotRequest"/>
/// with a fresh correlation id.
/// </summary>
public sealed class CommunicationInstanceSnapshotClient : IInstanceSnapshotClient
{
private readonly CommunicationService _communication;
/// <summary>
/// Initializes a new instance of the <see cref="CommunicationInstanceSnapshotClient"/> class.
/// </summary>
/// <param name="communication">Central-side cluster communication service.</param>
public CommunicationInstanceSnapshotClient(CommunicationService communication)
{
_communication = communication ?? throw new ArgumentNullException(nameof(communication));
}
/// <inheritdoc/>
public Task<DebugViewSnapshot> GetSnapshotAsync(
string siteIdentifier,
string instanceUniqueName,
CancellationToken cancellationToken = default)
{
var request = new DebugSnapshotRequest(instanceUniqueName, Guid.NewGuid().ToString("N"));
return _communication.RequestDebugSnapshotAsync(siteIdentifier, request, cancellationToken);
}
}
@@ -0,0 +1,86 @@
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
/// <summary>
/// Read-only operator service that aggregates the current alarm picture across
/// all Enabled instances of a single site (M7 T13 — Operator Alarm Summary).
/// </summary>
/// <remarks>
/// <para>
/// There is no central alarm store. The summary is assembled at query time by
/// fanning out one <see cref="DebugViewSnapshot"/> request per Enabled instance
/// (via the injected <see cref="IInstanceSnapshotClient"/>, which delegates to
/// the existing single-shot
/// <see cref="ZB.MOM.WW.ScadaBridge.Communication.CommunicationService.RequestDebugSnapshotAsync"/>
/// Ask) and flattening every snapshot's <see cref="DebugViewSnapshot.AlarmStates"/>
/// into <see cref="AlarmSummaryRow"/>s. The fan-out is best-effort: an instance
/// whose snapshot fetch throws, times out, or reports
/// <see cref="DebugViewSnapshot.InstanceNotFound"/> is recorded in
/// <see cref="AlarmSummaryResult.NotReportingInstances"/> and never aborts the
/// whole call.
/// </para>
/// <para>
/// The page is read-only — there are no ack / shelve / write operations. All
/// filtering and roll-up math happens client-side from the returned rows.
/// </para>
/// </remarks>
public interface IAlarmSummaryService
{
/// <summary>
/// Fetches and aggregates the current alarms across every Enabled instance of
/// the given site.
/// </summary>
/// <param name="siteId">The site primary key.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>
/// An <see cref="AlarmSummaryResult"/> with one <see cref="AlarmSummaryRow"/>
/// per active/mirrored alarm condition plus the unique names of any instances
/// whose snapshot could not be obtained.
/// </returns>
Task<AlarmSummaryResult> GetSiteAlarmsAsync(int siteId, CancellationToken cancellationToken = default);
/// <summary>
/// Pure roll-up over a set of <see cref="AlarmSummaryRow"/>s. Exposed so the
/// page (and tests) can recompute the headline tiles without re-querying.
/// </summary>
/// <param name="rows">The alarm rows to summarize.</param>
/// <returns>The aggregated roll-up.</returns>
AlarmRollup ComputeRollup(IReadOnlyList<AlarmSummaryRow> rows);
}
/// <summary>The result of a site alarm-summary query.</summary>
/// <param name="Alarms">One row per alarm condition reported across the site's Enabled instances.</param>
/// <param name="NotReportingInstances">
/// Unique names of Enabled instances whose snapshot could not be obtained
/// (fetch threw, timed out, or returned <see cref="DebugViewSnapshot.InstanceNotFound"/>).
/// </param>
public sealed record AlarmSummaryResult(
IReadOnlyList<AlarmSummaryRow> Alarms,
IReadOnlyList<string> NotReportingInstances);
/// <summary>
/// One alarm condition paired with the instance it belongs to. The
/// <see cref="AlarmStateChanged"/> carries everything the
/// <c>AlarmStateBadges</c> component needs to render.
/// </summary>
/// <param name="InstanceUniqueName">Unique name of the owning instance.</param>
/// <param name="Alarm">The alarm condition (state / kind / severity / level / native sub-state).</param>
public sealed record AlarmSummaryRow(
string InstanceUniqueName,
AlarmStateChanged Alarm);
/// <summary>
/// Pure point-in-time roll-up over a set of <see cref="AlarmSummaryRow"/>s.
/// </summary>
/// <param name="TotalActive">Count of rows whose <see cref="AlarmStateChanged.State"/> is <see cref="AlarmState.Active"/>.</param>
/// <param name="WorstSeverity">Highest <see cref="Commons.Types.Alarms.AlarmConditionState.Severity"/> among active rows; 0 when none active.</param>
/// <param name="UnackedCount">Active, unacknowledged native conditions (<c>Kind != Computed</c>).</param>
/// <param name="CountsByKind">Per-<see cref="AlarmKind"/> row counts (only kinds with at least one row appear).</param>
public sealed record AlarmRollup(
int TotalActive,
int WorstSeverity,
int UnackedCount,
IReadOnlyDictionary<AlarmKind, int> CountsByKind);
@@ -0,0 +1,24 @@
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
/// <summary>
/// Single-shot per-instance debug-snapshot client. A thin seam over the existing
/// <see cref="ZB.MOM.WW.ScadaBridge.Communication.CommunicationService.RequestDebugSnapshotAsync"/>
/// Ask so <see cref="AlarmSummaryService"/> can fan out snapshot fetches while
/// staying unit-testable (the implementation is substituted in tests).
/// </summary>
public interface IInstanceSnapshotClient
{
/// <summary>
/// Requests one debug snapshot for the given instance on the given site.
/// </summary>
/// <param name="siteIdentifier">The site's <c>SiteIdentifier</c> string (not the numeric site id).</param>
/// <param name="instanceUniqueName">The instance's unique name.</param>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The instance's current debug snapshot, including its alarm states.</returns>
Task<DebugViewSnapshot> GetSnapshotAsync(
string siteIdentifier,
string instanceUniqueName,
CancellationToken cancellationToken = default);
}