feat(centralui): operator Alarm Summary page + per-instance snapshot fan-out (T13)
This commit is contained in:
@@ -0,0 +1,164 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Default <see cref="IAlarmSummaryService"/> implementation (M7 T13). Resolves
|
||||
/// the site's Enabled instances, fans out one debug-snapshot fetch per instance
|
||||
/// through the injected <see cref="IInstanceSnapshotClient"/> (capped at eight
|
||||
/// concurrent fetches), and flattens every snapshot's alarm states into rows.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Best-effort by design: a per-instance fetch that throws, is cancelled by its
|
||||
/// own timeout, or reports <see cref="DebugViewSnapshot.InstanceNotFound"/> adds
|
||||
/// the instance to <see cref="AlarmSummaryResult.NotReportingInstances"/> instead
|
||||
/// of failing the whole call — an operator with one unreachable site still sees
|
||||
/// every other instance's alarms. Caller cancellation
|
||||
/// (<see cref="OperationCanceledException"/> on the supplied token) propagates.
|
||||
/// </remarks>
|
||||
public sealed class AlarmSummaryService : IAlarmSummaryService
|
||||
{
|
||||
/// <summary>Max concurrent per-instance snapshot fetches.</summary>
|
||||
private const int MaxConcurrentFetches = 8;
|
||||
|
||||
private readonly ITemplateEngineRepository _instanceRepo;
|
||||
private readonly ISiteRepository _siteRepo;
|
||||
private readonly IInstanceSnapshotClient _snapshotClient;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="AlarmSummaryService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="instanceRepo">Repository used to enumerate the site's instances.</param>
|
||||
/// <param name="siteRepo">Repository used to resolve the site identifier string.</param>
|
||||
/// <param name="snapshotClient">Single-shot per-instance snapshot client.</param>
|
||||
public AlarmSummaryService(
|
||||
ITemplateEngineRepository instanceRepo,
|
||||
ISiteRepository siteRepo,
|
||||
IInstanceSnapshotClient snapshotClient)
|
||||
{
|
||||
_instanceRepo = instanceRepo ?? throw new ArgumentNullException(nameof(instanceRepo));
|
||||
_siteRepo = siteRepo ?? throw new ArgumentNullException(nameof(siteRepo));
|
||||
_snapshotClient = snapshotClient ?? throw new ArgumentNullException(nameof(snapshotClient));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public async Task<AlarmSummaryResult> GetSiteAlarmsAsync(
|
||||
int siteId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var site = await _siteRepo.GetSiteByIdAsync(siteId, cancellationToken);
|
||||
if (site is null)
|
||||
{
|
||||
return new AlarmSummaryResult(Array.Empty<AlarmSummaryRow>(), Array.Empty<string>());
|
||||
}
|
||||
|
||||
var instances = await _instanceRepo.GetInstancesBySiteIdAsync(siteId, cancellationToken);
|
||||
var enabled = instances.Where(i => i.State == InstanceState.Enabled).ToList();
|
||||
if (enabled.Count == 0)
|
||||
{
|
||||
return new AlarmSummaryResult(Array.Empty<AlarmSummaryRow>(), Array.Empty<string>());
|
||||
}
|
||||
|
||||
var rows = new ConcurrentBag<AlarmSummaryRow>();
|
||||
var notReporting = new ConcurrentBag<string>();
|
||||
|
||||
using var gate = new SemaphoreSlim(MaxConcurrentFetches, MaxConcurrentFetches);
|
||||
|
||||
var fetches = enabled.Select(instance => FetchInstanceAsync(
|
||||
site.SiteIdentifier, instance.UniqueName, gate, rows, notReporting, cancellationToken));
|
||||
await Task.WhenAll(fetches);
|
||||
|
||||
// Deterministic ordering: instance name, then alarm name, so the page's
|
||||
// initial render and any test assertions are stable before client sorts.
|
||||
var orderedRows = rows
|
||||
.OrderBy(r => r.InstanceUniqueName, StringComparer.OrdinalIgnoreCase)
|
||||
.ThenBy(r => r.Alarm.AlarmName, StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
var orderedNotReporting = notReporting
|
||||
.OrderBy(n => n, StringComparer.OrdinalIgnoreCase)
|
||||
.ToList();
|
||||
|
||||
return new AlarmSummaryResult(orderedRows, orderedNotReporting);
|
||||
}
|
||||
|
||||
private async Task FetchInstanceAsync(
|
||||
string siteIdentifier,
|
||||
string instanceUniqueName,
|
||||
SemaphoreSlim gate,
|
||||
ConcurrentBag<AlarmSummaryRow> rows,
|
||||
ConcurrentBag<string> notReporting,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
await gate.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
var snapshot = await _snapshotClient.GetSnapshotAsync(
|
||||
siteIdentifier, instanceUniqueName, cancellationToken);
|
||||
|
||||
if (snapshot.InstanceNotFound)
|
||||
{
|
||||
notReporting.Add(instanceUniqueName);
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var alarm in snapshot.AlarmStates)
|
||||
{
|
||||
rows.Add(new AlarmSummaryRow(instanceUniqueName, alarm));
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// Caller-initiated cancel — propagate so the page can drop the response.
|
||||
throw;
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Any other fault (per-instance timeout, transport error, the snapshot
|
||||
// Ask throwing) degrades this one instance to "not reporting" rather
|
||||
// than failing the whole summary.
|
||||
notReporting.Add(instanceUniqueName);
|
||||
}
|
||||
finally
|
||||
{
|
||||
gate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public AlarmRollup ComputeRollup(IReadOnlyList<AlarmSummaryRow> rows)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(rows);
|
||||
|
||||
var totalActive = 0;
|
||||
var worstSeverity = 0;
|
||||
var unackedCount = 0;
|
||||
var countsByKind = new Dictionary<AlarmKind, int>();
|
||||
|
||||
foreach (var row in rows)
|
||||
{
|
||||
var alarm = row.Alarm;
|
||||
countsByKind[alarm.Kind] = countsByKind.GetValueOrDefault(alarm.Kind) + 1;
|
||||
|
||||
if (alarm.State == AlarmState.Active)
|
||||
{
|
||||
totalActive++;
|
||||
if (alarm.Condition.Severity > worstSeverity)
|
||||
{
|
||||
worstSeverity = alarm.Condition.Severity;
|
||||
}
|
||||
}
|
||||
|
||||
if (alarm.Condition.Active
|
||||
&& !alarm.Condition.Acknowledged
|
||||
&& alarm.Kind != AlarmKind.Computed)
|
||||
{
|
||||
unackedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return new AlarmRollup(totalActive, worstSeverity, unackedCount, countsByKind);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
|
||||
using ZB.MOM.WW.ScadaBridge.Communication;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Default <see cref="IInstanceSnapshotClient"/> — a thin facade over the
|
||||
/// existing single-shot
|
||||
/// <see cref="CommunicationService.RequestDebugSnapshotAsync"/> Ask (the same
|
||||
/// Deployer-gated snapshot path the CLI <c>debug snapshot</c> command and the
|
||||
/// Debug View use). Each call issues one <see cref="DebugSnapshotRequest"/>
|
||||
/// with a fresh correlation id.
|
||||
/// </summary>
|
||||
public sealed class CommunicationInstanceSnapshotClient : IInstanceSnapshotClient
|
||||
{
|
||||
private readonly CommunicationService _communication;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CommunicationInstanceSnapshotClient"/> class.
|
||||
/// </summary>
|
||||
/// <param name="communication">Central-side cluster communication service.</param>
|
||||
public CommunicationInstanceSnapshotClient(CommunicationService communication)
|
||||
{
|
||||
_communication = communication ?? throw new ArgumentNullException(nameof(communication));
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public Task<DebugViewSnapshot> GetSnapshotAsync(
|
||||
string siteIdentifier,
|
||||
string instanceUniqueName,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var request = new DebugSnapshotRequest(instanceUniqueName, Guid.NewGuid().ToString("N"));
|
||||
return _communication.RequestDebugSnapshotAsync(siteIdentifier, request, cancellationToken);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Read-only operator service that aggregates the current alarm picture across
|
||||
/// all Enabled instances of a single site (M7 T13 — Operator Alarm Summary).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// There is no central alarm store. The summary is assembled at query time by
|
||||
/// fanning out one <see cref="DebugViewSnapshot"/> request per Enabled instance
|
||||
/// (via the injected <see cref="IInstanceSnapshotClient"/>, which delegates to
|
||||
/// the existing single-shot
|
||||
/// <see cref="ZB.MOM.WW.ScadaBridge.Communication.CommunicationService.RequestDebugSnapshotAsync"/>
|
||||
/// Ask) and flattening every snapshot's <see cref="DebugViewSnapshot.AlarmStates"/>
|
||||
/// into <see cref="AlarmSummaryRow"/>s. The fan-out is best-effort: an instance
|
||||
/// whose snapshot fetch throws, times out, or reports
|
||||
/// <see cref="DebugViewSnapshot.InstanceNotFound"/> is recorded in
|
||||
/// <see cref="AlarmSummaryResult.NotReportingInstances"/> and never aborts the
|
||||
/// whole call.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The page is read-only — there are no ack / shelve / write operations. All
|
||||
/// filtering and roll-up math happens client-side from the returned rows.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public interface IAlarmSummaryService
|
||||
{
|
||||
/// <summary>
|
||||
/// Fetches and aggregates the current alarms across every Enabled instance of
|
||||
/// the given site.
|
||||
/// </summary>
|
||||
/// <param name="siteId">The site primary key.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>
|
||||
/// An <see cref="AlarmSummaryResult"/> with one <see cref="AlarmSummaryRow"/>
|
||||
/// per active/mirrored alarm condition plus the unique names of any instances
|
||||
/// whose snapshot could not be obtained.
|
||||
/// </returns>
|
||||
Task<AlarmSummaryResult> GetSiteAlarmsAsync(int siteId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Pure roll-up over a set of <see cref="AlarmSummaryRow"/>s. Exposed so the
|
||||
/// page (and tests) can recompute the headline tiles without re-querying.
|
||||
/// </summary>
|
||||
/// <param name="rows">The alarm rows to summarize.</param>
|
||||
/// <returns>The aggregated roll-up.</returns>
|
||||
AlarmRollup ComputeRollup(IReadOnlyList<AlarmSummaryRow> rows);
|
||||
}
|
||||
|
||||
/// <summary>The result of a site alarm-summary query.</summary>
|
||||
/// <param name="Alarms">One row per alarm condition reported across the site's Enabled instances.</param>
|
||||
/// <param name="NotReportingInstances">
|
||||
/// Unique names of Enabled instances whose snapshot could not be obtained
|
||||
/// (fetch threw, timed out, or returned <see cref="DebugViewSnapshot.InstanceNotFound"/>).
|
||||
/// </param>
|
||||
public sealed record AlarmSummaryResult(
|
||||
IReadOnlyList<AlarmSummaryRow> Alarms,
|
||||
IReadOnlyList<string> NotReportingInstances);
|
||||
|
||||
/// <summary>
|
||||
/// One alarm condition paired with the instance it belongs to. The
|
||||
/// <see cref="AlarmStateChanged"/> carries everything the
|
||||
/// <c>AlarmStateBadges</c> component needs to render.
|
||||
/// </summary>
|
||||
/// <param name="InstanceUniqueName">Unique name of the owning instance.</param>
|
||||
/// <param name="Alarm">The alarm condition (state / kind / severity / level / native sub-state).</param>
|
||||
public sealed record AlarmSummaryRow(
|
||||
string InstanceUniqueName,
|
||||
AlarmStateChanged Alarm);
|
||||
|
||||
/// <summary>
|
||||
/// Pure point-in-time roll-up over a set of <see cref="AlarmSummaryRow"/>s.
|
||||
/// </summary>
|
||||
/// <param name="TotalActive">Count of rows whose <see cref="AlarmStateChanged.State"/> is <see cref="AlarmState.Active"/>.</param>
|
||||
/// <param name="WorstSeverity">Highest <see cref="Commons.Types.Alarms.AlarmConditionState.Severity"/> among active rows; 0 when none active.</param>
|
||||
/// <param name="UnackedCount">Active, unacknowledged native conditions (<c>Kind != Computed</c>).</param>
|
||||
/// <param name="CountsByKind">Per-<see cref="AlarmKind"/> row counts (only kinds with at least one row appear).</param>
|
||||
public sealed record AlarmRollup(
|
||||
int TotalActive,
|
||||
int WorstSeverity,
|
||||
int UnackedCount,
|
||||
IReadOnlyDictionary<AlarmKind, int> CountsByKind);
|
||||
@@ -0,0 +1,24 @@
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Single-shot per-instance debug-snapshot client. A thin seam over the existing
|
||||
/// <see cref="ZB.MOM.WW.ScadaBridge.Communication.CommunicationService.RequestDebugSnapshotAsync"/>
|
||||
/// Ask so <see cref="AlarmSummaryService"/> can fan out snapshot fetches while
|
||||
/// staying unit-testable (the implementation is substituted in tests).
|
||||
/// </summary>
|
||||
public interface IInstanceSnapshotClient
|
||||
{
|
||||
/// <summary>
|
||||
/// Requests one debug snapshot for the given instance on the given site.
|
||||
/// </summary>
|
||||
/// <param name="siteIdentifier">The site's <c>SiteIdentifier</c> string (not the numeric site id).</param>
|
||||
/// <param name="instanceUniqueName">The instance's unique name.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The instance's current debug snapshot, including its alarm states.</returns>
|
||||
Task<DebugViewSnapshot> GetSnapshotAsync(
|
||||
string siteIdentifier,
|
||||
string instanceUniqueName,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
Reference in New Issue
Block a user