refactor: rename ScadaLink → ZB.MOM.WW.ScadaBridge (code + projects + namespaces)
Solution + 23 src projects + 26 test projects renamed; folders, csproj, namespaces, and ScadaLinkDbContext/ScadaBridgeDbContext class updated. ActorSystem "scadalink" → "scadabridge", Akka seed-node URLs migrated. SQL roles/logins, LDAP domains, CLI command name, and CLI config dir (~/.scadalink → ~/.scadabridge) also renamed. Build green; 5 Host.Tests fail awaiting SQL login rename in next commit. Pre-existing StaleTagMonitor timing flakes unchanged. Rename script committed at tools/rename-to-scadabridge.sh.
This commit is contained in:
+57
@@ -0,0 +1,57 @@
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle C (M5-T7) regression coverage. The Audit Log payload filter
|
||||
/// (<c>DefaultAuditPayloadFilter</c>) increments
|
||||
/// <c>IAuditRedactionFailureCounter</c> every time a header/body/SQL-param
|
||||
/// redactor stage throws and the filter has to over-redact the field with
|
||||
/// the <c><redacted: redactor error></c> marker. Bundle C bridges that
|
||||
/// counter into the Site Health Monitoring report payload as
|
||||
/// <c>AuditRedactionFailure</c> so a misconfigured / catastrophic regex
|
||||
/// surfaces on /monitoring/health rather than disappearing into a NoOp sink.
|
||||
/// Mirrors the Bundle G <c>SiteAuditWriteFailures</c> metric shape — same
|
||||
/// per-interval increment-and-reset semantics, same defaults-to-zero
|
||||
/// contract.
|
||||
/// </summary>
|
||||
public class AuditRedactionFailureMetricTests
|
||||
{
|
||||
private readonly SiteHealthCollector _collector = new();
|
||||
|
||||
[Fact]
|
||||
public void Increment_Three_Times_Counter_Reports_3()
|
||||
{
|
||||
_collector.IncrementAuditRedactionFailure();
|
||||
_collector.IncrementAuditRedactionFailure();
|
||||
_collector.IncrementAuditRedactionFailure();
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(3, report.AuditRedactionFailure);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Report_Payload_Includes_AuditRedactionFailure_AsZeroByDefault()
|
||||
{
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(0, report.AuditRedactionFailure);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Mirrors the existing per-interval reset semantics for ScriptErrorCount /
|
||||
/// AlarmEvaluationErrorCount / DeadLetterCount / SiteAuditWriteFailures —
|
||||
/// AuditRedactionFailure is an interval count, not a running total.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void CollectReport_Resets_AuditRedactionFailure()
|
||||
{
|
||||
_collector.IncrementAuditRedactionFailure();
|
||||
_collector.IncrementAuditRedactionFailure();
|
||||
|
||||
var first = _collector.CollectReport("site-1");
|
||||
Assert.Equal(2, first.AuditRedactionFailure);
|
||||
|
||||
var second = _collector.CollectReport("site-1");
|
||||
Assert.Equal(0, second.AuditRedactionFailure);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,426 @@
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Health;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// A simple fake TimeProvider for testing that allows advancing time manually.
|
||||
/// </summary>
|
||||
internal sealed class TestTimeProvider : TimeProvider
|
||||
{
|
||||
private DateTimeOffset _utcNow;
|
||||
|
||||
public TestTimeProvider(DateTimeOffset startTime)
|
||||
{
|
||||
_utcNow = startTime;
|
||||
}
|
||||
|
||||
public override DateTimeOffset GetUtcNow() => _utcNow;
|
||||
|
||||
public void Advance(TimeSpan duration) => _utcNow += duration;
|
||||
}
|
||||
|
||||
public class CentralHealthAggregatorTests
|
||||
{
|
||||
private readonly TestTimeProvider _timeProvider;
|
||||
private readonly CentralHealthAggregator _aggregator;
|
||||
|
||||
public CentralHealthAggregatorTests()
|
||||
{
|
||||
_timeProvider = new TestTimeProvider(DateTimeOffset.UtcNow);
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
OfflineTimeout = TimeSpan.FromSeconds(60)
|
||||
});
|
||||
_aggregator = new CentralHealthAggregator(
|
||||
options,
|
||||
NullLogger<CentralHealthAggregator>.Instance,
|
||||
_timeProvider);
|
||||
}
|
||||
|
||||
private static SiteHealthReport MakeReport(string siteId, long seq) =>
|
||||
new(
|
||||
SiteId: siteId,
|
||||
SequenceNumber: seq,
|
||||
ReportTimestamp: DateTimeOffset.UtcNow,
|
||||
DataConnectionStatuses: new Dictionary<string, ConnectionHealth>(),
|
||||
TagResolutionCounts: new Dictionary<string, TagResolutionStatus>(),
|
||||
ScriptErrorCount: 0,
|
||||
AlarmEvaluationErrorCount: 0,
|
||||
StoreAndForwardBufferDepths: new Dictionary<string, int>(),
|
||||
DeadLetterCount: 0,
|
||||
DeployedInstanceCount: 0,
|
||||
EnabledInstanceCount: 0,
|
||||
DisabledInstanceCount: 0);
|
||||
|
||||
[Fact]
|
||||
public void ProcessReport_StoresState_ForNewSite()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
var state = _aggregator.GetSiteState("site-1");
|
||||
Assert.NotNull(state);
|
||||
Assert.True(state.IsOnline);
|
||||
Assert.Equal(1, state.LastSequenceNumber);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ProcessReport_UpdatesState_WhenSequenceIncreases()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 2));
|
||||
|
||||
var state = _aggregator.GetSiteState("site-1");
|
||||
Assert.Equal(2, state!.LastSequenceNumber);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ProcessReport_RejectsStaleReport_WhenSequenceNotGreater()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 5));
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 3));
|
||||
|
||||
var state = _aggregator.GetSiteState("site-1");
|
||||
Assert.Equal(5, state!.LastSequenceNumber);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ProcessReport_RejectsEqualSequence()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 5));
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 5));
|
||||
|
||||
var state = _aggregator.GetSiteState("site-1");
|
||||
Assert.Equal(5, state!.LastSequenceNumber);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OfflineDetection_SiteGoesOffline_WhenNoReportWithinTimeout()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
Assert.True(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
|
||||
// Advance past the offline timeout
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(61));
|
||||
_aggregator.CheckForOfflineSites();
|
||||
|
||||
Assert.False(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OnlineRecovery_SiteComesBackOnline_WhenReportReceived()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
// Go offline
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(61));
|
||||
_aggregator.CheckForOfflineSites();
|
||||
Assert.False(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
|
||||
// Receive new report → back online
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 2));
|
||||
Assert.True(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OfflineDetection_SiteRemainsOnline_WhenReportWithinTimeout()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(30));
|
||||
_aggregator.CheckForOfflineSites();
|
||||
|
||||
Assert.True(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetAllSiteStates_ReturnsAllKnownSites()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
_aggregator.ProcessReport(MakeReport("site-2", 1));
|
||||
|
||||
var states = _aggregator.GetAllSiteStates();
|
||||
Assert.Equal(2, states.Count);
|
||||
Assert.Contains("site-1", states.Keys);
|
||||
Assert.Contains("site-2", states.Keys);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetSiteState_ReturnsNull_ForUnknownSite()
|
||||
{
|
||||
var state = _aggregator.GetSiteState("nonexistent");
|
||||
Assert.Null(state);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ProcessReport_StoresLatestReport()
|
||||
{
|
||||
var report = MakeReport("site-1", 1) with { ScriptErrorCount = 42 };
|
||||
_aggregator.ProcessReport(report);
|
||||
|
||||
var state = _aggregator.GetSiteState("site-1");
|
||||
Assert.Equal(42, state!.LatestReport!.ScriptErrorCount);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-002 regression: SiteHealthState is mutated from multiple
|
||||
/// threads (ProcessReport, MarkHeartbeat, CheckForOfflineSites). With a mutable
|
||||
/// class and unsynchronized field writes, a snapshot read could observe a torn
|
||||
/// or half-applied state. The state must be immutable and every transition an
|
||||
/// atomic reference swap, so a snapshot is always internally consistent and the
|
||||
/// monotonic sequence-number guard is never subverted by a lost update.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ProcessReport_ConcurrentUpdates_NeverLoseSequenceOrTearState()
|
||||
{
|
||||
const int iterations = 5_000;
|
||||
// SiteHealthState must be an immutable record so handing the reference to
|
||||
// UI callers (and reading it concurrently) is safe.
|
||||
Assert.True(typeof(SiteHealthState).GetMethod("<Clone>$") != null,
|
||||
"SiteHealthState must be an immutable record for safe concurrent reads.");
|
||||
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 0));
|
||||
|
||||
var writer = Task.Run(() =>
|
||||
{
|
||||
for (long seq = 1; seq <= iterations; seq++)
|
||||
_aggregator.ProcessReport(MakeReport("site-1", seq));
|
||||
});
|
||||
|
||||
var heartbeater = Task.Run(() =>
|
||||
{
|
||||
for (int i = 0; i < iterations; i++)
|
||||
_aggregator.MarkHeartbeat("site-1", _timeProvider.GetUtcNow());
|
||||
});
|
||||
|
||||
long maxObserved = 0;
|
||||
var reader = Task.Run(() =>
|
||||
{
|
||||
for (int i = 0; i < iterations; i++)
|
||||
{
|
||||
var state = _aggregator.GetSiteState("site-1");
|
||||
if (state == null) continue;
|
||||
// A consistent snapshot: the stored report's sequence number must
|
||||
// always match the state's LastSequenceNumber (no half-applied update).
|
||||
Assert.Equal(state.LastSequenceNumber, state.LatestReport!.SequenceNumber);
|
||||
if (state.LastSequenceNumber > maxObserved)
|
||||
maxObserved = state.LastSequenceNumber;
|
||||
}
|
||||
});
|
||||
|
||||
await Task.WhenAll(writer, heartbeater, reader);
|
||||
|
||||
// The final state must reflect the highest sequence — no lost update.
|
||||
var final = _aggregator.GetSiteState("site-1");
|
||||
Assert.Equal(iterations, final!.LastSequenceNumber);
|
||||
Assert.Equal(iterations, final.LatestReport!.SequenceNumber);
|
||||
Assert.True(final.IsOnline);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-007 regression: a heartbeat for a site that has not yet
|
||||
/// sent a full report (e.g. immediately after a central restart/failover, when
|
||||
/// the aggregator's in-memory state is empty) must register the site as online
|
||||
/// rather than being silently discarded. Otherwise reachable sites show as
|
||||
/// "unknown" for up to a full report interval during the failover window.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void MarkHeartbeat_RegistersUnknownSite_AsOnlineAwaitingReport()
|
||||
{
|
||||
var now = _timeProvider.GetUtcNow();
|
||||
|
||||
_aggregator.MarkHeartbeat("site-new", now);
|
||||
|
||||
var state = _aggregator.GetSiteState("site-new");
|
||||
Assert.NotNull(state);
|
||||
Assert.True(state.IsOnline);
|
||||
Assert.Null(state.LatestReport);
|
||||
Assert.Equal(now, state.LastHeartbeatAt);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression test for HealthMonitoring-015. A heartbeat-only registered site
|
||||
/// has never processed a full report, so <see cref="SiteHealthState.LastReportReceivedAt"/>
|
||||
/// must be <c>null</c> — not the <c>DateTimeOffset.MinValue</c> (year-0001)
|
||||
/// sentinel that the UI would otherwise render as a ~2000-year-stale timestamp.
|
||||
/// The "no report yet" signal must be an explicit nullable state, consistent
|
||||
/// with <see cref="SiteHealthState.LatestReport"/>.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void MarkHeartbeat_RegistersUnknownSite_WithNullLastReportReceivedAt()
|
||||
{
|
||||
_aggregator.MarkHeartbeat("site-new", _timeProvider.GetUtcNow());
|
||||
|
||||
var state = _aggregator.GetSiteState("site-new");
|
||||
Assert.NotNull(state);
|
||||
Assert.Null(state.LastReportReceivedAt);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression test for HealthMonitoring-015. Once a full report is processed
|
||||
/// for a heartbeat-registered site, <see cref="SiteHealthState.LastReportReceivedAt"/>
|
||||
/// becomes a real (non-null) instant.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void ProcessReport_SetsLastReportReceivedAt_ForHeartbeatRegisteredSite()
|
||||
{
|
||||
_aggregator.MarkHeartbeat("site-new", _timeProvider.GetUtcNow());
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(5));
|
||||
var reportTime = _timeProvider.GetUtcNow();
|
||||
|
||||
_aggregator.ProcessReport(MakeReport("site-new", 1));
|
||||
|
||||
var state = _aggregator.GetSiteState("site-new");
|
||||
Assert.NotNull(state);
|
||||
Assert.Equal(reportTime, state.LastReportReceivedAt);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MarkHeartbeat_KeepsSiteOnline_BetweenReports()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
// Time advances past the offline timeout, but heartbeats keep arriving.
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(45));
|
||||
_aggregator.MarkHeartbeat("site-1", _timeProvider.GetUtcNow());
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(45));
|
||||
_aggregator.MarkHeartbeat("site-1", _timeProvider.GetUtcNow());
|
||||
|
||||
_aggregator.CheckForOfflineSites();
|
||||
|
||||
Assert.True(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MarkHeartbeat_BringsOfflineSiteBackOnline()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(61));
|
||||
_aggregator.CheckForOfflineSites();
|
||||
Assert.False(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
|
||||
_aggregator.MarkHeartbeat("site-1", _timeProvider.GetUtcNow());
|
||||
Assert.True(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-020 regression: an offline-to-online transition must
|
||||
/// be backed by a fresh LastHeartbeatAt. Previously MarkHeartbeat used
|
||||
/// <c>max(receivedAt, existing.LastHeartbeatAt)</c>, so an out-of-order
|
||||
/// heartbeat carrying an older timestamp would bring the site online with
|
||||
/// a stale heartbeat and CheckForOfflineSites would flap it straight back
|
||||
/// to offline on the next tick.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void MarkHeartbeat_OfflineToOnline_StampsFreshLastHeartbeatAt()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(61));
|
||||
_aggregator.CheckForOfflineSites();
|
||||
Assert.False(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
|
||||
// An out-of-order heartbeat arrives with a timestamp older than the
|
||||
// existing LastHeartbeatAt (e.g. clock skew on the originating node).
|
||||
var nowAfter = _timeProvider.GetUtcNow();
|
||||
var stale = nowAfter - TimeSpan.FromSeconds(120);
|
||||
_aggregator.MarkHeartbeat("site-1", stale);
|
||||
|
||||
var state = _aggregator.GetSiteState("site-1")!;
|
||||
Assert.True(state.IsOnline);
|
||||
// The recorded LastHeartbeatAt must be ~"now", not the stale receivedAt.
|
||||
Assert.InRange((nowAfter - state.LastHeartbeatAt).TotalSeconds, 0, 5);
|
||||
|
||||
// And it must survive the very next offline check — proves no flap.
|
||||
_aggregator.CheckForOfflineSites();
|
||||
Assert.True(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-005 regression: the synthetic "central" site has no
|
||||
/// heartbeat source — its LastHeartbeatAt is only bumped by the 30s
|
||||
/// CentralHealthReportLoop self-report. A single skipped/late self-report
|
||||
/// (leader GC pause, brief stall, mid-failover) would leave it with no signal
|
||||
/// for >60s and flap it offline even though the central cluster is healthy.
|
||||
/// The "central" keyspace entry must get a longer offline grace than real sites.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void OfflineDetection_CentralSite_HasLongerGraceThanRealSites()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport(CentralHealthReportLoop.CentralSiteId, 1));
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
// One missed central self-report (~30s) plus the normal 60s site timeout:
|
||||
// a real site would already be offline here, but central must not be —
|
||||
// it only gets one self-report every 30s, so 60s is barely two reports.
|
||||
_timeProvider.Advance(TimeSpan.FromSeconds(75));
|
||||
_aggregator.CheckForOfflineSites();
|
||||
|
||||
Assert.False(_aggregator.GetSiteState("site-1")!.IsOnline);
|
||||
Assert.True(
|
||||
_aggregator.GetSiteState(CentralHealthReportLoop.CentralSiteId)!.IsOnline,
|
||||
"central must survive a single missed self-report");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OfflineDetection_CentralSite_StillGoesOfflineOnGenuineLoss()
|
||||
{
|
||||
_aggregator.ProcessReport(MakeReport(CentralHealthReportLoop.CentralSiteId, 1));
|
||||
|
||||
// Well beyond even the central grace window — genuine total loss.
|
||||
_timeProvider.Advance(TimeSpan.FromMinutes(10));
|
||||
_aggregator.CheckForOfflineSites();
|
||||
|
||||
Assert.False(_aggregator.GetSiteState(CentralHealthReportLoop.CentralSiteId)!.IsOnline);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-013 regression: the offline-check cadence must be derived
|
||||
/// from the *shorter* of <see cref="HealthMonitoringOptions.OfflineTimeout"/>
|
||||
/// and <see cref="HealthMonitoringOptions.CentralOfflineTimeout"/>, so that if
|
||||
/// an operator configures <c>CentralOfflineTimeout</c> smaller than
|
||||
/// <c>OfflineTimeout</c>, central offline detection is still timely instead of
|
||||
/// being delayed by up to a full <c>OfflineTimeout / 2</c>.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void CheckInterval_IsHalfTheShorterTimeout()
|
||||
{
|
||||
// Default: OfflineTimeout (60s) is the shorter of the two.
|
||||
Assert.Equal(
|
||||
TimeSpan.FromSeconds(30),
|
||||
CentralHealthAggregator.ComputeCheckInterval(new HealthMonitoringOptions
|
||||
{
|
||||
OfflineTimeout = TimeSpan.FromSeconds(60),
|
||||
CentralOfflineTimeout = TimeSpan.FromMinutes(3)
|
||||
}));
|
||||
|
||||
// Operator configures CentralOfflineTimeout shorter — cadence must adapt.
|
||||
Assert.Equal(
|
||||
TimeSpan.FromSeconds(10),
|
||||
CentralHealthAggregator.ComputeCheckInterval(new HealthMonitoringOptions
|
||||
{
|
||||
OfflineTimeout = TimeSpan.FromSeconds(60),
|
||||
CentralOfflineTimeout = TimeSpan.FromSeconds(20)
|
||||
}));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SequenceNumberReset_RejectedUntilExceedsPrevMax()
|
||||
{
|
||||
// Site sends seq 10, then restarts and sends seq 1.
|
||||
// Per design: sequence resets on singleton restart.
|
||||
// The aggregator will reject seq 1 < 10 — expected behavior.
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 10));
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 1));
|
||||
|
||||
Assert.Equal(10, _aggregator.GetSiteState("site-1")!.LastSequenceNumber);
|
||||
|
||||
// Once it exceeds the old max, it works again. SiteHealthState is an
|
||||
// immutable snapshot, so re-fetch to observe the new state.
|
||||
_aggregator.ProcessReport(MakeReport("site-1", 11));
|
||||
Assert.Equal(11, _aggregator.GetSiteState("site-1")!.LastSequenceNumber);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,282 @@
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Health;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-009 regression: the central self-report loop had no test
|
||||
/// coverage at all. These tests exercise leader-only gating (SelfIsPrimary),
|
||||
/// self-report generation for siteId="central", and monotonic sequence
|
||||
/// assignment.
|
||||
/// </summary>
|
||||
public class CentralHealthReportLoopTests
|
||||
{
|
||||
private sealed class FakeClusterNodeProvider : IClusterNodeProvider
|
||||
{
|
||||
public bool SelfIsPrimary { get; set; }
|
||||
public IReadOnlyList<NodeStatus> Nodes { get; set; } = [];
|
||||
public IReadOnlyList<NodeStatus> GetClusterNodes() => Nodes;
|
||||
}
|
||||
|
||||
private sealed class RecordingAggregator : ICentralHealthAggregator
|
||||
{
|
||||
public List<SiteHealthReport> Processed { get; } = [];
|
||||
public void ProcessReport(SiteHealthReport report) => Processed.Add(report);
|
||||
public void MarkHeartbeat(string siteId, DateTimeOffset receivedAt) { }
|
||||
public IReadOnlyDictionary<string, SiteHealthState> GetAllSiteStates() =>
|
||||
new Dictionary<string, SiteHealthState>();
|
||||
public SiteHealthState? GetSiteState(string siteId) => null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-022 de-flake: <see cref="CentralHealthReportLoop"/>'s
|
||||
/// internal cadence is a real <see cref="PeriodicTimer"/>, so the loop is
|
||||
/// timing-sensitive. We can't drive a virtual clock (PeriodicTimer doesn't
|
||||
/// consume <see cref="TimeProvider"/>) without refactoring the production
|
||||
/// loop, so we keep wall-clock waits but use a *generous* budget: a 5 s
|
||||
/// outer cancellation cap with a poll-until-condition wait, instead of a
|
||||
/// fixed <see cref="Task.Delay"/> that fails fast on a slow CI runner. The
|
||||
/// loop's <c>ReportInterval</c> is set to 50 ms in each test, so under
|
||||
/// normal conditions the condition is met almost immediately; under heavy
|
||||
/// CI load the poll loop tolerates the slow tick instead of asserting on a
|
||||
/// timed-out empty list.
|
||||
/// </summary>
|
||||
private static async Task RunLoopUntil(
|
||||
CentralHealthReportLoop loop,
|
||||
Func<bool> condition,
|
||||
TimeSpan? maxWait = null)
|
||||
{
|
||||
var deadline = maxWait ?? TimeSpan.FromSeconds(5);
|
||||
using var cts = new CancellationTokenSource(deadline + TimeSpan.FromSeconds(1));
|
||||
try
|
||||
{
|
||||
await loop.StartAsync(cts.Token);
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
while (sw.Elapsed < deadline && !condition())
|
||||
{
|
||||
await Task.Delay(25, CancellationToken.None);
|
||||
}
|
||||
await loop.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Used by tests that need the loop to run for a bounded period without
|
||||
/// waiting on a specific condition (e.g. asserting <i>no</i> reports were
|
||||
/// produced). The wait is generous (1 s default) — see
|
||||
/// <see cref="RunLoopUntil"/> for the rationale.
|
||||
/// </summary>
|
||||
private static async Task RunLoopBriefly(CentralHealthReportLoop loop, int runForMs)
|
||||
{
|
||||
var totalMs = Math.Max(runForMs, 1000);
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(totalMs + 1000));
|
||||
try
|
||||
{
|
||||
await loop.StartAsync(cts.Token);
|
||||
await Task.Delay(totalMs, CancellationToken.None);
|
||||
await loop.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GeneratesCentralReports_WhenSelfIsPrimary()
|
||||
{
|
||||
var collector = new SiteHealthCollector();
|
||||
var aggregator = new RecordingAggregator();
|
||||
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = true };
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var loop = new CentralHealthReportLoop(
|
||||
collector, aggregator, clusterNodes, options,
|
||||
NullLogger<CentralHealthReportLoop>.Instance);
|
||||
|
||||
// HealthMonitoring-022: wait up to 5 s for at least one report to fire
|
||||
// rather than fixed-budget Task.Delay; tolerates slow CI runners.
|
||||
await RunLoopUntil(loop, () => aggregator.Processed.Count >= 1);
|
||||
|
||||
Assert.NotEmpty(aggregator.Processed);
|
||||
Assert.All(aggregator.Processed,
|
||||
r => Assert.Equal(CentralHealthReportLoop.CentralSiteId, r.SiteId));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task GeneratesNoReports_WhenNotPrimary()
|
||||
{
|
||||
var collector = new SiteHealthCollector();
|
||||
var aggregator = new RecordingAggregator();
|
||||
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = false };
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var loop = new CentralHealthReportLoop(
|
||||
collector, aggregator, clusterNodes, options,
|
||||
NullLogger<CentralHealthReportLoop>.Instance);
|
||||
|
||||
await RunLoopBriefly(loop, 250);
|
||||
|
||||
Assert.Empty(aggregator.Processed);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task AssignsMonotonicSequenceNumbers()
|
||||
{
|
||||
var collector = new SiteHealthCollector();
|
||||
var aggregator = new RecordingAggregator();
|
||||
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = true };
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var loop = new CentralHealthReportLoop(
|
||||
collector, aggregator, clusterNodes, options,
|
||||
NullLogger<CentralHealthReportLoop>.Instance);
|
||||
|
||||
// HealthMonitoring-022: wait up to 5 s for at least 2 reports rather
|
||||
// than a fixed 300 ms window that could miss the second tick on a
|
||||
// slow CI runner; the assertion below proves the sequence is monotonic.
|
||||
await RunLoopUntil(loop, () => aggregator.Processed.Count >= 2);
|
||||
|
||||
Assert.True(aggregator.Processed.Count >= 2,
|
||||
$"Expected at least 2 reports, got {aggregator.Processed.Count}");
|
||||
for (int i = 1; i < aggregator.Processed.Count; i++)
|
||||
{
|
||||
Assert.True(
|
||||
aggregator.Processed[i].SequenceNumber > aggregator.Processed[i - 1].SequenceNumber,
|
||||
$"Sequence numbers not strictly increasing at index {i}");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-006 regression: the central loop's sequence-number seed
|
||||
/// must be derived from the injected <see cref="TimeProvider"/> (Unix-ms),
|
||||
/// not from <c>DateTimeOffset.UtcNow</c> read at field initialization, so the
|
||||
/// seeding strategy is deterministically testable.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void SequenceNumberSeed_UsesInjectedTimeProvider()
|
||||
{
|
||||
var fixedInstant = new DateTimeOffset(2026, 5, 16, 12, 0, 0, TimeSpan.Zero);
|
||||
var timeProvider = new TestTimeProvider(fixedInstant);
|
||||
|
||||
var loop = new CentralHealthReportLoop(
|
||||
new SiteHealthCollector(),
|
||||
new RecordingAggregator(),
|
||||
new FakeClusterNodeProvider { SelfIsPrimary = true },
|
||||
Options.Create(new HealthMonitoringOptions()),
|
||||
NullLogger<CentralHealthReportLoop>.Instance,
|
||||
timeProvider);
|
||||
|
||||
Assert.Equal(fixedInstant.ToUnixTimeMilliseconds(), loop.CurrentSequenceNumber);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-018 regression: when <see cref="ICentralHealthAggregator.ProcessReport"/>
|
||||
/// throws, the per-interval counters that
|
||||
/// <see cref="SiteHealthCollector.CollectReport"/> just drained must be
|
||||
/// restored back into the shared collector so they roll forward into the
|
||||
/// next interval rather than being silently lost. Same shape as the
|
||||
/// HealthMonitoring-017 fix in <see cref="HealthReportSender"/>.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ProcessReportFailure_PreservesIntervalCountersForNextReport()
|
||||
{
|
||||
var collector = new SiteHealthCollector();
|
||||
// Pre-populate every per-interval counter so the restore path on each
|
||||
// field is exercised. The loop's first iteration will read-and-reset
|
||||
// these via CollectReport, then ProcessReport will throw, and the
|
||||
// restore must put them back.
|
||||
collector.IncrementScriptError();
|
||||
collector.IncrementScriptError();
|
||||
collector.IncrementAlarmError();
|
||||
collector.IncrementDeadLetter();
|
||||
collector.IncrementDeadLetter();
|
||||
collector.IncrementDeadLetter();
|
||||
collector.IncrementSiteAuditWriteFailures();
|
||||
collector.IncrementAuditRedactionFailure();
|
||||
collector.IncrementAuditRedactionFailure();
|
||||
|
||||
var aggregator = new FailingThenSucceedingAggregator();
|
||||
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = true };
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var loop = new CentralHealthReportLoop(
|
||||
collector, aggregator, clusterNodes, options,
|
||||
NullLogger<CentralHealthReportLoop>.Instance);
|
||||
|
||||
// HealthMonitoring-022: the first ProcessReport call throws (counters
|
||||
// get restored), the second succeeds. Wait up to 5 s for that second
|
||||
// (successful) call rather than a fixed 450 ms budget.
|
||||
await RunLoopUntil(loop, () => aggregator.Processed.Count >= 1);
|
||||
|
||||
// First call threw, later succeeded — the first successful report
|
||||
// must carry the previously-failed interval's accumulated counts.
|
||||
Assert.NotEmpty(aggregator.Processed);
|
||||
var firstSuccess = aggregator.Processed[0];
|
||||
Assert.Equal(2, firstSuccess.ScriptErrorCount);
|
||||
Assert.Equal(1, firstSuccess.AlarmEvaluationErrorCount);
|
||||
Assert.Equal(3, firstSuccess.DeadLetterCount);
|
||||
Assert.Equal(1, firstSuccess.SiteAuditWriteFailures);
|
||||
Assert.Equal(2, firstSuccess.AuditRedactionFailure);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="ICentralHealthAggregator"/> whose first <c>ProcessReport</c>
|
||||
/// call throws (only the first), then subsequent calls succeed. Used by
|
||||
/// <see cref="ProcessReportFailure_PreservesIntervalCountersForNextReport"/>
|
||||
/// to verify the HealthMonitoring-018 restore-on-failure path.
|
||||
/// </summary>
|
||||
private sealed class FailingThenSucceedingAggregator : ICentralHealthAggregator
|
||||
{
|
||||
private int _callCount;
|
||||
public List<SiteHealthReport> Processed { get; } = [];
|
||||
|
||||
public void ProcessReport(SiteHealthReport report)
|
||||
{
|
||||
var n = Interlocked.Increment(ref _callCount);
|
||||
if (n == 1)
|
||||
{
|
||||
throw new InvalidOperationException("aggregator temporarily unavailable");
|
||||
}
|
||||
Processed.Add(report);
|
||||
}
|
||||
|
||||
public void MarkHeartbeat(string siteId, DateTimeOffset receivedAt) { }
|
||||
public IReadOnlyDictionary<string, SiteHealthState> GetAllSiteStates() =>
|
||||
new Dictionary<string, SiteHealthState>();
|
||||
public SiteHealthState? GetSiteState(string siteId) => null;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SetsActiveNodeFlag_EvenWhenNotPrimary()
|
||||
{
|
||||
// The loop must still report the node's role to the collector when it is
|
||||
// the standby, so the standby's own node card shows the correct role.
|
||||
var collector = new SiteHealthCollector();
|
||||
var aggregator = new RecordingAggregator();
|
||||
var clusterNodes = new FakeClusterNodeProvider { SelfIsPrimary = false };
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var loop = new CentralHealthReportLoop(
|
||||
collector, aggregator, clusterNodes, options,
|
||||
NullLogger<CentralHealthReportLoop>.Instance);
|
||||
|
||||
await RunLoopBriefly(loop, 150);
|
||||
|
||||
Assert.False(collector.IsActiveNode);
|
||||
}
|
||||
}
|
||||
+73
@@ -0,0 +1,73 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-014 regression: <see cref="HealthMonitoringOptions"/> intervals
|
||||
/// are fed straight into <c>new PeriodicTimer(...)</c>, which throws
|
||||
/// <see cref="ArgumentOutOfRangeException"/> for a zero/negative period. A
|
||||
/// misconfigured <c>appsettings.json</c> must be rejected by an
|
||||
/// <see cref="IValidateOptions{TOptions}"/> with a clear, key-naming message
|
||||
/// rather than crashing the hosted service with an opaque exception.
|
||||
/// </summary>
|
||||
public class HealthMonitoringOptionsValidatorTests
|
||||
{
|
||||
private static ValidateOptionsResult Validate(HealthMonitoringOptions options) =>
|
||||
new HealthMonitoringOptionsValidator().Validate(Options.DefaultName, options);
|
||||
|
||||
[Fact]
|
||||
public void DefaultOptions_AreValid()
|
||||
{
|
||||
var result = Validate(new HealthMonitoringOptions());
|
||||
Assert.True(result.Succeeded, result.FailureMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ZeroReportInterval_IsRejected()
|
||||
{
|
||||
var result = Validate(new HealthMonitoringOptions { ReportInterval = TimeSpan.Zero });
|
||||
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains("ReportInterval", result.FailureMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NegativeReportInterval_IsRejected()
|
||||
{
|
||||
var result = Validate(new HealthMonitoringOptions { ReportInterval = TimeSpan.FromSeconds(-1) });
|
||||
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains("ReportInterval", result.FailureMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ZeroOfflineTimeout_IsRejected()
|
||||
{
|
||||
var result = Validate(new HealthMonitoringOptions { OfflineTimeout = TimeSpan.Zero });
|
||||
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains("OfflineTimeout", result.FailureMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ZeroCentralOfflineTimeout_IsRejected()
|
||||
{
|
||||
var result = Validate(new HealthMonitoringOptions { CentralOfflineTimeout = TimeSpan.Zero });
|
||||
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains("CentralOfflineTimeout", result.FailureMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CentralOfflineTimeout_ShorterThanOfflineTimeout_IsRejected()
|
||||
{
|
||||
var result = Validate(new HealthMonitoringOptions
|
||||
{
|
||||
OfflineTimeout = TimeSpan.FromSeconds(60),
|
||||
CentralOfflineTimeout = TimeSpan.FromSeconds(30)
|
||||
});
|
||||
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains("CentralOfflineTimeout", result.FailureMessage);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,423 @@
|
||||
using Microsoft.Data.Sqlite;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Health;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
using ZB.MOM.WW.ScadaBridge.StoreAndForward;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
public class HealthReportSenderTests
|
||||
{
|
||||
private class FakeTransport : IHealthReportTransport
|
||||
{
|
||||
public List<SiteHealthReport> SentReports { get; } = [];
|
||||
public void Send(SiteHealthReport report) => SentReports.Add(report);
|
||||
}
|
||||
|
||||
private class FakeSiteIdentityProvider : ISiteIdentityProvider
|
||||
{
|
||||
public string SiteId { get; set; } = "test-site";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Captures emitted log entries so tests can assert that non-fatal failures
|
||||
/// are surfaced (HealthMonitoring-010) rather than silently swallowed.
|
||||
/// </summary>
|
||||
private sealed class CapturingLogger<T> : ILogger<T>
|
||||
{
|
||||
public sealed record Entry(LogLevel Level, string Message, Exception? Exception);
|
||||
|
||||
public List<Entry> Entries { get; } = [];
|
||||
|
||||
public IDisposable BeginScope<TState>(TState state) where TState : notnull => NullScope.Instance;
|
||||
public bool IsEnabled(LogLevel logLevel) => true;
|
||||
|
||||
public void Log<TState>(
|
||||
LogLevel logLevel, EventId eventId, TState state, Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
lock (Entries)
|
||||
{
|
||||
Entries.Add(new Entry(logLevel, formatter(state, exception), exception));
|
||||
}
|
||||
}
|
||||
|
||||
private sealed class NullScope : IDisposable
|
||||
{
|
||||
public static readonly NullScope Instance = new();
|
||||
public void Dispose() { }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>An <see cref="IClusterNodeProvider"/> whose query always throws.</summary>
|
||||
private sealed class ThrowingClusterNodeProvider : IClusterNodeProvider
|
||||
{
|
||||
public bool SelfIsPrimary => true;
|
||||
public IReadOnlyList<NodeStatus> GetClusterNodes() =>
|
||||
throw new InvalidOperationException("cluster query failed");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SendsReportsWithMonotonicSequenceNumbers()
|
||||
{
|
||||
var transport = new FakeTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
collector.SetActiveNode(true);
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider { SiteId = "site-A" });
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(300));
|
||||
try
|
||||
{
|
||||
await sender.StartAsync(cts.Token);
|
||||
await Task.Delay(280, CancellationToken.None);
|
||||
await sender.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
|
||||
// Should have sent several reports
|
||||
Assert.True(transport.SentReports.Count >= 2,
|
||||
$"Expected at least 2 reports, got {transport.SentReports.Count}");
|
||||
|
||||
// Verify strictly-monotonic sequence numbers and matching site id
|
||||
for (int i = 0; i < transport.SentReports.Count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
Assert.True(
|
||||
transport.SentReports[i].SequenceNumber > transport.SentReports[i - 1].SequenceNumber,
|
||||
$"Sequence numbers not strictly increasing at index {i}");
|
||||
}
|
||||
Assert.Equal("site-A", transport.SentReports[i].SiteId);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task FirstReportSequenceExceedsStartupUnixMs()
|
||||
{
|
||||
// Reports are seeded with Unix-ms at construction so a freshly-active
|
||||
// node always sorts after the prior active. Verify the first emitted
|
||||
// sequence is at least the startup epoch.
|
||||
var transport = new FakeTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
collector.SetActiveNode(true);
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var beforeCtor = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider());
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(150));
|
||||
try
|
||||
{
|
||||
await sender.StartAsync(cts.Token);
|
||||
await Task.Delay(120, CancellationToken.None);
|
||||
await sender.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
|
||||
Assert.True(transport.SentReports.Count >= 1);
|
||||
Assert.True(
|
||||
transport.SentReports[0].SequenceNumber >= beforeCtor,
|
||||
$"First sequence {transport.SentReports[0].SequenceNumber} should be >= startup epoch {beforeCtor}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ReportsIncludeUtcTimestamp()
|
||||
{
|
||||
var transport = new FakeTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
collector.SetActiveNode(true);
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider());
|
||||
|
||||
var before = DateTimeOffset.UtcNow;
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(150));
|
||||
try
|
||||
{
|
||||
await sender.StartAsync(cts.Token);
|
||||
await Task.Delay(120, CancellationToken.None);
|
||||
await sender.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
var after = DateTimeOffset.UtcNow;
|
||||
|
||||
Assert.True(transport.SentReports.Count >= 1);
|
||||
foreach (var report in transport.SentReports)
|
||||
{
|
||||
Assert.InRange(report.ReportTimestamp, before, after);
|
||||
Assert.Equal(TimeSpan.Zero, report.ReportTimestamp.Offset);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-001 regression: the documented "store-and-forward buffer
|
||||
/// depth" metric (pending messages by category) must actually be populated in
|
||||
/// the emitted report. Previously SetStoreAndForwardDepths had no callers, so
|
||||
/// StoreAndForwardBufferDepths was always empty. The sender must query the S&F
|
||||
/// engine's per-category depth API and include it alongside the parked count.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ReportsIncludeStoreAndForwardBufferDepthsFromStorage()
|
||||
{
|
||||
var dbName = $"HealthSfDepth_{Guid.NewGuid():N}";
|
||||
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
|
||||
// Keep one connection alive so the in-memory DB persists for the test.
|
||||
using var keepAlive = new SqliteConnection(connStr);
|
||||
keepAlive.Open();
|
||||
|
||||
var storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
||||
await storage.InitializeAsync();
|
||||
|
||||
// Two pending ExternalSystem messages and one pending Notification message.
|
||||
await storage.EnqueueAsync(MakePendingMessage("m1", StoreAndForwardCategory.ExternalSystem));
|
||||
await storage.EnqueueAsync(MakePendingMessage("m2", StoreAndForwardCategory.ExternalSystem));
|
||||
await storage.EnqueueAsync(MakePendingMessage("m3", StoreAndForwardCategory.Notification));
|
||||
|
||||
var transport = new FakeTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
collector.SetActiveNode(true);
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider(),
|
||||
sfStorage: storage);
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(300));
|
||||
try
|
||||
{
|
||||
await sender.StartAsync(cts.Token);
|
||||
await Task.Delay(250, CancellationToken.None);
|
||||
await sender.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
|
||||
Assert.True(transport.SentReports.Count >= 1);
|
||||
var depths = transport.SentReports[^1].StoreAndForwardBufferDepths;
|
||||
Assert.Equal(2, depths[nameof(StoreAndForwardCategory.ExternalSystem)]);
|
||||
Assert.Equal(1, depths[nameof(StoreAndForwardCategory.Notification)]);
|
||||
Assert.False(depths.ContainsKey(nameof(StoreAndForwardCategory.CachedDbWrite)));
|
||||
}
|
||||
|
||||
private static StoreAndForwardMessage MakePendingMessage(string id, StoreAndForwardCategory category) =>
|
||||
new()
|
||||
{
|
||||
Id = id,
|
||||
Category = category,
|
||||
Target = "target",
|
||||
PayloadJson = "{}",
|
||||
RetryCount = 0,
|
||||
MaxRetries = 50,
|
||||
RetryIntervalMs = 30_000,
|
||||
CreatedAt = DateTimeOffset.UtcNow,
|
||||
Status = StoreAndForwardMessageStatus.Pending
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void InitialSequenceNumberSeededWithUnixMs()
|
||||
{
|
||||
var transport = new FakeTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
var options = Options.Create(new HealthMonitoringOptions());
|
||||
|
||||
var beforeCtor = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider());
|
||||
var afterCtor = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
|
||||
Assert.InRange(sender.CurrentSequenceNumber, beforeCtor, afterCtor);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-010 regression: a failure refreshing cluster nodes is
|
||||
/// non-fatal (the report still ships) but must no longer be swallowed by a
|
||||
/// bare <c>catch {}</c> — it must be logged as a warning with the exception so
|
||||
/// persistent degradation is diagnosable.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ClusterNodeRefreshFailure_IsLoggedNotSwallowed()
|
||||
{
|
||||
var transport = new FakeTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
collector.SetActiveNode(true);
|
||||
var logger = new CapturingLogger<HealthReportSender>();
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
logger,
|
||||
new FakeSiteIdentityProvider(),
|
||||
clusterNodeProvider: new ThrowingClusterNodeProvider());
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(300));
|
||||
try
|
||||
{
|
||||
await sender.StartAsync(cts.Token);
|
||||
await Task.Delay(250, CancellationToken.None);
|
||||
await sender.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
|
||||
// The report loop continues despite the failure...
|
||||
Assert.NotEmpty(transport.SentReports);
|
||||
// ...but the failure is surfaced as a warning carrying the exception.
|
||||
CapturingLogger<HealthReportSender>.Entry[] warnings;
|
||||
lock (logger.Entries)
|
||||
{
|
||||
warnings = logger.Entries
|
||||
.Where(e => e.Level == LogLevel.Warning && e.Exception is InvalidOperationException)
|
||||
.ToArray();
|
||||
}
|
||||
Assert.NotEmpty(warnings);
|
||||
Assert.Contains(warnings, w => w.Message.Contains("cluster nodes", StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-017 regression: when the transport's <c>Send</c> throws,
|
||||
/// the per-interval counters that <see cref="SiteHealthCollector.CollectReport"/>
|
||||
/// just drained via <c>Interlocked.Exchange</c> must be restored back into the
|
||||
/// collector so they roll forward into the next interval rather than being
|
||||
/// silently lost. Before the fix, a transport failure left the counts in the
|
||||
/// un-sent report only, and the next successful report shipped with the
|
||||
/// counters at zero.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task SendFailure_PreservesIntervalCountersForNextReport()
|
||||
{
|
||||
var transport = new FailingThenSucceedingTransport();
|
||||
var collector = new SiteHealthCollector();
|
||||
collector.SetActiveNode(true);
|
||||
// Pre-populate every per-interval counter so the restore path on each
|
||||
// field is exercised — script error, alarm error, dead letter, site
|
||||
// audit write failure, audit redaction failure.
|
||||
collector.IncrementScriptError();
|
||||
collector.IncrementScriptError();
|
||||
collector.IncrementAlarmError();
|
||||
collector.IncrementDeadLetter();
|
||||
collector.IncrementDeadLetter();
|
||||
collector.IncrementDeadLetter();
|
||||
collector.IncrementSiteAuditWriteFailures();
|
||||
collector.IncrementAuditRedactionFailure();
|
||||
collector.IncrementAuditRedactionFailure();
|
||||
|
||||
var options = Options.Create(new HealthMonitoringOptions
|
||||
{
|
||||
ReportInterval = TimeSpan.FromMilliseconds(50)
|
||||
});
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
collector,
|
||||
transport,
|
||||
options,
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider());
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(500));
|
||||
try
|
||||
{
|
||||
await sender.StartAsync(cts.Token);
|
||||
await Task.Delay(450, CancellationToken.None);
|
||||
await sender.StopAsync(CancellationToken.None);
|
||||
}
|
||||
catch (OperationCanceledException) { }
|
||||
|
||||
// The first interval's Send threw, then later intervals succeeded. The
|
||||
// first successful report must include the previously-failed interval's
|
||||
// accumulated counts.
|
||||
Assert.NotEmpty(transport.SentReports);
|
||||
var firstSuccess = transport.SentReports[0];
|
||||
Assert.Equal(2, firstSuccess.ScriptErrorCount);
|
||||
Assert.Equal(1, firstSuccess.AlarmEvaluationErrorCount);
|
||||
Assert.Equal(3, firstSuccess.DeadLetterCount);
|
||||
Assert.Equal(1, firstSuccess.SiteAuditWriteFailures);
|
||||
Assert.Equal(2, firstSuccess.AuditRedactionFailure);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IHealthReportTransport"/> that throws on the first
|
||||
/// <c>Send</c> call (and only the first), then succeeds. Used by
|
||||
/// <see cref="SendFailure_PreservesIntervalCountersForNextReport"/> to
|
||||
/// verify the HealthMonitoring-017 restore-on-failure path.
|
||||
/// </summary>
|
||||
private sealed class FailingThenSucceedingTransport : IHealthReportTransport
|
||||
{
|
||||
private int _callCount;
|
||||
public List<SiteHealthReport> SentReports { get; } = [];
|
||||
|
||||
public void Send(SiteHealthReport report)
|
||||
{
|
||||
var n = Interlocked.Increment(ref _callCount);
|
||||
if (n == 1)
|
||||
{
|
||||
throw new InvalidOperationException("transport temporarily unavailable");
|
||||
}
|
||||
SentReports.Add(report);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-006 regression: the sequence-number seed must be derived
|
||||
/// from the injected <see cref="TimeProvider"/> so the Unix-ms seeding strategy
|
||||
/// is deterministically testable and the clock dependency is explicit, rather
|
||||
/// than reading <c>DateTimeOffset.UtcNow</c> directly at field initialization.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void SequenceNumberSeed_UsesInjectedTimeProvider()
|
||||
{
|
||||
var fixedInstant = new DateTimeOffset(2026, 5, 16, 12, 0, 0, TimeSpan.Zero);
|
||||
var timeProvider = new TestTimeProvider(fixedInstant);
|
||||
|
||||
var sender = new HealthReportSender(
|
||||
new SiteHealthCollector(),
|
||||
new FakeTransport(),
|
||||
Options.Create(new HealthMonitoringOptions()),
|
||||
NullLogger<HealthReportSender>.Instance,
|
||||
new FakeSiteIdentityProvider(),
|
||||
timeProvider: timeProvider);
|
||||
|
||||
Assert.Equal(fixedInstant.ToUnixTimeMilliseconds(), sender.CurrentSequenceNumber);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle E (M6-T6) regression coverage. The site-side audit-log SQLite writer
|
||||
/// exposes a backlog snapshot (<c>SiteAuditBacklogSnapshot</c>) via the
|
||||
/// <c>ISiteAuditQueue.GetBacklogStatsAsync</c> surface. A periodic
|
||||
/// <c>SiteAuditBacklogReporter</c> hosted service polls that snapshot and
|
||||
/// pushes it into the collector via <see cref="ISiteHealthCollector.UpdateSiteAuditBacklog"/>
|
||||
/// so the next <see cref="ISiteHealthCollector.CollectReport"/> includes it in
|
||||
/// the report payload as <c>SiteAuditBacklog</c>. Unlike the
|
||||
/// SiteAuditWriteFailures / AuditRedactionFailure interval counters, the
|
||||
/// backlog snapshot is not reset on collect — the field carries forward
|
||||
/// whatever the most recent refresh pushed in.
|
||||
/// </summary>
|
||||
public class SiteAuditBacklogMetricTests
|
||||
{
|
||||
private readonly SiteHealthCollector _collector = new();
|
||||
|
||||
[Fact]
|
||||
public void Update_Then_CollectReport_IncludesBacklog()
|
||||
{
|
||||
var snapshot = new SiteAuditBacklogSnapshot(
|
||||
PendingCount: 42,
|
||||
OldestPendingUtc: new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc),
|
||||
OnDiskBytes: 1234567);
|
||||
|
||||
_collector.UpdateSiteAuditBacklog(snapshot);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(snapshot, report.SiteAuditBacklog);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Report_Payload_Includes_SiteAuditBacklog_AsNullByDefault()
|
||||
{
|
||||
// No refresh has been pushed yet — the report carries null so the
|
||||
// central UI can distinguish "no data yet" from "queue empty".
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Null(report.SiteAuditBacklog);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CollectReport_DoesNotReset_SiteAuditBacklog()
|
||||
{
|
||||
// Backlog snapshot is a point-in-time reading, not a per-interval
|
||||
// counter — successive CollectReport calls before the next
|
||||
// SiteAuditBacklogReporter tick MUST keep returning the same snapshot
|
||||
// so a slow refresh cadence doesn't blank the central dashboard.
|
||||
var snapshot = new SiteAuditBacklogSnapshot(
|
||||
PendingCount: 7,
|
||||
OldestPendingUtc: null,
|
||||
OnDiskBytes: 8192);
|
||||
|
||||
_collector.UpdateSiteAuditBacklog(snapshot);
|
||||
|
||||
var first = _collector.CollectReport("site-1");
|
||||
var second = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(snapshot, first.SiteAuditBacklog);
|
||||
Assert.Equal(snapshot, second.SiteAuditBacklog);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Update_With_Null_Throws_ArgumentNullException()
|
||||
{
|
||||
Assert.Throws<ArgumentNullException>(
|
||||
() => _collector.UpdateSiteAuditBacklog(null!));
|
||||
}
|
||||
}
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle G (M2-T11) regression coverage. The site-side Audit Log writer chain
|
||||
/// (FallbackAuditWriter) increments <see cref="IAuditWriteFailureCounter"/>
|
||||
/// every time the primary SQLite writer throws. Bundle G bridges that counter
|
||||
/// into the Site Health Monitoring report payload as <c>SiteAuditWriteFailures</c>
|
||||
/// so a sustained audit-write outage surfaces on /monitoring/health rather than
|
||||
/// disappearing into a NoOp sink.
|
||||
/// </summary>
|
||||
public class SiteAuditWriteFailuresMetricTests
|
||||
{
|
||||
private readonly SiteHealthCollector _collector = new();
|
||||
|
||||
[Fact]
|
||||
public void Increment_Three_Times_Counter_Reports_3()
|
||||
{
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(3, report.SiteAuditWriteFailures);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Report_Payload_Includes_SiteAuditWriteFailures_AsZeroByDefault()
|
||||
{
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(0, report.SiteAuditWriteFailures);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Mirrors the existing per-interval reset semantics for ScriptErrorCount /
|
||||
/// AlarmEvaluationErrorCount / DeadLetterCount — SiteAuditWriteFailures is an
|
||||
/// interval count, not a running total.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void CollectReport_Resets_SiteAuditWriteFailures()
|
||||
{
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
_collector.IncrementSiteAuditWriteFailures();
|
||||
|
||||
var first = _collector.CollectReport("site-1");
|
||||
Assert.Equal(2, first.SiteAuditWriteFailures);
|
||||
|
||||
var second = _collector.CollectReport("site-1");
|
||||
Assert.Equal(0, second.SiteAuditWriteFailures);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,282 @@
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.HealthMonitoring.Tests;
|
||||
|
||||
public class SiteHealthCollectorTests
|
||||
{
|
||||
private readonly SiteHealthCollector _collector = new();
|
||||
|
||||
[Fact]
|
||||
public void CollectReport_ReturnsZeroCounters_WhenNoErrorsRecorded()
|
||||
{
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal("site-1", report.SiteId);
|
||||
Assert.Equal(0, report.ScriptErrorCount);
|
||||
Assert.Equal(0, report.AlarmEvaluationErrorCount);
|
||||
Assert.Equal(0, report.DeadLetterCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncrementScriptError_AccumulatesBetweenReports()
|
||||
{
|
||||
_collector.IncrementScriptError();
|
||||
_collector.IncrementScriptError();
|
||||
_collector.IncrementScriptError();
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
Assert.Equal(3, report.ScriptErrorCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncrementAlarmError_AccumulatesBetweenReports()
|
||||
{
|
||||
_collector.IncrementAlarmError();
|
||||
_collector.IncrementAlarmError();
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
Assert.Equal(2, report.AlarmEvaluationErrorCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IncrementDeadLetter_AccumulatesBetweenReports()
|
||||
{
|
||||
_collector.IncrementDeadLetter();
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
Assert.Equal(1, report.DeadLetterCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CollectReport_ResetsCounters_AfterCollection()
|
||||
{
|
||||
_collector.IncrementScriptError();
|
||||
_collector.IncrementAlarmError();
|
||||
_collector.IncrementDeadLetter();
|
||||
|
||||
var first = _collector.CollectReport("site-1");
|
||||
Assert.Equal(1, first.ScriptErrorCount);
|
||||
Assert.Equal(1, first.AlarmEvaluationErrorCount);
|
||||
Assert.Equal(1, first.DeadLetterCount);
|
||||
|
||||
var second = _collector.CollectReport("site-1");
|
||||
Assert.Equal(0, second.ScriptErrorCount);
|
||||
Assert.Equal(0, second.AlarmEvaluationErrorCount);
|
||||
Assert.Equal(0, second.DeadLetterCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UpdateConnectionHealth_ReflectedInReport()
|
||||
{
|
||||
_collector.UpdateConnectionHealth("opc-1", ConnectionHealth.Connected);
|
||||
_collector.UpdateConnectionHealth("opc-2", ConnectionHealth.Disconnected);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(2, report.DataConnectionStatuses.Count);
|
||||
Assert.Equal(ConnectionHealth.Connected, report.DataConnectionStatuses["opc-1"]);
|
||||
Assert.Equal(ConnectionHealth.Disconnected, report.DataConnectionStatuses["opc-2"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ConnectionHealth_NotResetAfterCollect()
|
||||
{
|
||||
_collector.UpdateConnectionHealth("opc-1", ConnectionHealth.Connected);
|
||||
|
||||
_collector.CollectReport("site-1");
|
||||
var second = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Single(second.DataConnectionStatuses);
|
||||
Assert.Equal(ConnectionHealth.Connected, second.DataConnectionStatuses["opc-1"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RemoveConnection_RemovesFromReport()
|
||||
{
|
||||
_collector.UpdateConnectionHealth("opc-1", ConnectionHealth.Connected);
|
||||
_collector.UpdateTagResolution("opc-1", 10, 8);
|
||||
_collector.RemoveConnection("opc-1");
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
Assert.Empty(report.DataConnectionStatuses);
|
||||
Assert.Empty(report.TagResolutionCounts);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UpdateTagResolution_ReflectedInReport()
|
||||
{
|
||||
_collector.UpdateTagResolution("opc-1", 50, 45);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Single(report.TagResolutionCounts);
|
||||
Assert.Equal(50, report.TagResolutionCounts["opc-1"].TotalSubscribed);
|
||||
Assert.Equal(45, report.TagResolutionCounts["opc-1"].SuccessfullyResolved);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StoreAndForwardBufferDepths_DefaultsToEmpty_WhenSetterNotCalled()
|
||||
{
|
||||
var report = _collector.CollectReport("site-1");
|
||||
Assert.Empty(report.StoreAndForwardBufferDepths);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CollectReport_IncludesUtcTimestamp()
|
||||
{
|
||||
var before = DateTimeOffset.UtcNow;
|
||||
var report = _collector.CollectReport("site-1");
|
||||
var after = DateTimeOffset.UtcNow;
|
||||
|
||||
Assert.InRange(report.ReportTimestamp, before, after);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// HealthMonitoring-016 regression: <see cref="SiteHealthCollector.CollectReport"/>
|
||||
/// must stamp <c>ReportTimestamp</c> from an injected <see cref="TimeProvider"/>
|
||||
/// (consistent with the rest of the module), not directly from
|
||||
/// <c>DateTimeOffset.UtcNow</c>, so the report timestamp is deterministically
|
||||
/// testable against a known instant.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void CollectReport_StampsTimestamp_FromInjectedTimeProvider()
|
||||
{
|
||||
var fixedInstant = new DateTimeOffset(2026, 5, 17, 9, 30, 0, TimeSpan.Zero);
|
||||
var collector = new SiteHealthCollector(new TestTimeProvider(fixedInstant));
|
||||
|
||||
var report = collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(fixedInstant, report.ReportTimestamp);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CollectReport_SequenceNumberIsZero_CallerAssignsIt()
|
||||
{
|
||||
var report = _collector.CollectReport("site-1");
|
||||
Assert.Equal(0, report.SequenceNumber);
|
||||
}
|
||||
|
||||
// HealthMonitoring-009 regression: the remaining collector setters had no
|
||||
// "reflected in report" coverage. The following tests verify each setter's
|
||||
// value reaches CollectReport output.
|
||||
|
||||
[Fact]
|
||||
public void SetClusterNodes_ReflectedInReport()
|
||||
{
|
||||
var nodes = new List<ZB.MOM.WW.ScadaBridge.Commons.Messages.Health.NodeStatus>
|
||||
{
|
||||
new("node-a", true, "Active"),
|
||||
new("node-b", true, "Standby")
|
||||
};
|
||||
_collector.SetClusterNodes(nodes);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.NotNull(report.ClusterNodes);
|
||||
Assert.Equal(2, report.ClusterNodes!.Count);
|
||||
Assert.Equal("node-a", report.ClusterNodes[0].Hostname);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SetInstanceCounts_ReflectedInReport()
|
||||
{
|
||||
_collector.SetInstanceCounts(deployed: 10, enabled: 7, disabled: 3);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(10, report.DeployedInstanceCount);
|
||||
Assert.Equal(7, report.EnabledInstanceCount);
|
||||
Assert.Equal(3, report.DisabledInstanceCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SetParkedMessageCount_ReflectedInReport()
|
||||
{
|
||||
_collector.SetParkedMessageCount(42);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(42, report.ParkedMessageCount);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SetNodeHostname_ReflectedInReport()
|
||||
{
|
||||
_collector.SetNodeHostname("site-host-1");
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal("site-host-1", report.NodeHostname);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SetActiveNode_ReflectedInNodeRole()
|
||||
{
|
||||
_collector.SetActiveNode(true);
|
||||
Assert.Equal("Active", _collector.CollectReport("site-1").NodeRole);
|
||||
Assert.True(_collector.IsActiveNode);
|
||||
|
||||
_collector.SetActiveNode(false);
|
||||
Assert.Equal("Standby", _collector.CollectReport("site-1").NodeRole);
|
||||
Assert.False(_collector.IsActiveNode);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UpdateTagQuality_ReflectedInReport()
|
||||
{
|
||||
_collector.UpdateTagQuality("opc-1", good: 80, bad: 15, uncertain: 5);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.NotNull(report.DataConnectionTagQuality);
|
||||
var quality = report.DataConnectionTagQuality!["opc-1"];
|
||||
Assert.Equal(80, quality.Good);
|
||||
Assert.Equal(15, quality.Bad);
|
||||
Assert.Equal(5, quality.Uncertain);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UpdateConnectionEndpoint_ReflectedInReport()
|
||||
{
|
||||
_collector.UpdateConnectionEndpoint("opc-1", "opc.tcp://plc-1:4840");
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.NotNull(report.DataConnectionEndpoints);
|
||||
Assert.Equal("opc.tcp://plc-1:4840", report.DataConnectionEndpoints!["opc-1"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SetStoreAndForwardDepths_ReflectedInReport()
|
||||
{
|
||||
_collector.SetStoreAndForwardDepths(new Dictionary<string, int>
|
||||
{
|
||||
["ExternalSystem"] = 5,
|
||||
["Notification"] = 2
|
||||
});
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
|
||||
Assert.Equal(5, report.StoreAndForwardBufferDepths["ExternalSystem"]);
|
||||
Assert.Equal(2, report.StoreAndForwardBufferDepths["Notification"]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ThreadSafety_ConcurrentIncrements()
|
||||
{
|
||||
const int iterations = 10_000;
|
||||
var tasks = new[]
|
||||
{
|
||||
Task.Run(() => { for (int i = 0; i < iterations; i++) _collector.IncrementScriptError(); }),
|
||||
Task.Run(() => { for (int i = 0; i < iterations; i++) _collector.IncrementAlarmError(); }),
|
||||
Task.Run(() => { for (int i = 0; i < iterations; i++) _collector.IncrementDeadLetter(); })
|
||||
};
|
||||
|
||||
await Task.WhenAll(tasks);
|
||||
|
||||
var report = _collector.CollectReport("site-1");
|
||||
Assert.Equal(iterations, report.ScriptErrorCount);
|
||||
Assert.Equal(iterations, report.AlarmEvaluationErrorCount);
|
||||
Assert.Equal(iterations, report.DeadLetterCount);
|
||||
}
|
||||
}
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<IsPackable>false</IsPackable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="coverlet.collector" />
|
||||
<PackageReference Include="Microsoft.Data.Sqlite" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="Microsoft.Extensions.Options" />
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" />
|
||||
<PackageReference Include="xunit" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Using Include="Xunit" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../../src/ZB.MOM.WW.ScadaBridge.HealthMonitoring/ZB.MOM.WW.ScadaBridge.HealthMonitoring.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user