Files
ScadaBridge/tests/ZB.MOM.WW.ScadaBridge.SiteCallAudit.Tests/SiteCallAuditReconciliationTests.cs
T
Joseph Doherty fd618cf1dc fix(review): full code-review remediation — 5 High + Medium/Low across 16 modules
Remediation from the full per-module code review at 4307c381 (findings recorded
separately in code-reviews/).

Highs fixed:
- DeploymentManager-025/SiteRuntime-031: stop broadcasting notification lists + SMTP
  configs (incl. credentials) to sites; site purges already-persisted rows on apply
  (enforces the central-only delivery design; clears plaintext SMTP creds at rest).
- DataConnectionLayer-023: guard the native-alarm subscribe path against the
  mid-flight-unsubscribe adapter-feed leak (mirrors the DCL-021 tag-path fix).
- SiteEventLogging-024: normalize From/To query bounds to UTC (the -016 fix the
  audit trail claimed but never committed).
- KpiHistory-001: add an in-flight guard to the recorder sample tick.
- ScriptAnalysis-001: harden the trust analyzer's TPA-absent fallback (resolve
  forbidden anchors in the minimal reference set; warn on degraded mode) — anchors
  added to validation references only, never the compile gate.
(InboundAPI-026 left to the feat/ipsen-movein effort per owner decision.)

Medium/Low: DM-026 deterministic deploy-status tiebreaker; SR-027/028/029/030
native-alarm leak/phantom-active/delete-during-redeploy fixes; AL-013/014/016;
TE-024 (folder-mutation audit rows now persisted)/025; SF-025 gauge-provider
clear-on-stop; ESG-025/026; SEC-023/024/025; SCA-007/008/009; plus doc/test
accuracy COM-023/024, HOST-025/026, HM-024/025, NS-027/028.

Full-solution build 0 warnings; ~3560 tests across 18 touched suites green.
2026-06-20 17:55:12 -04:00

440 lines
20 KiB
C#

using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
namespace ZB.MOM.WW.ScadaBridge.SiteCallAudit.Tests;
/// <summary>
/// Reconciliation-tick tests for <see cref="SiteCallAuditActor"/> (#22, Piece A).
/// These exercise the periodic per-site self-heal pull entirely in-memory —
/// fake <see cref="IPullSiteCallsClient"/> + <see cref="ISiteEnumerator"/> + a
/// recording <see cref="ISiteCallAuditRepository"/> — so they run in
/// milliseconds and do NOT depend on a live MSSQL fixture (unlike the
/// MSSQL-backed <see cref="SiteCallAuditActorTests"/>). The actor is built via
/// the internal test ctor that injects all three collaborators; the
/// repo-only test ctor used by the MSSQL tests passes no client/enumerator, so
/// the reconciliation tick is gated off there (see
/// <see cref="TestCtor_RepositoryOnly_DoesNotStartReconciliationTick"/>).
/// </summary>
public class SiteCallAuditReconciliationTests : TestKit
{
private static SiteCall NewRow(
TrackedOperationId id,
string sourceSite,
string status = "Submitted",
DateTime? updatedAtUtc = null)
{
var now = updatedAtUtc ?? DateTime.UtcNow;
return new SiteCall
{
TrackedOperationId = id,
Channel = "ApiOutbound",
Target = "ERP.GetOrder",
SourceSite = sourceSite,
SourceNode = null,
Status = status,
RetryCount = 0,
LastError = null,
HttpStatus = null,
CreatedAtUtc = now,
UpdatedAtUtc = now,
TerminalAtUtc = null,
IngestedAtUtc = now,
};
}
private static SiteCallAuditOptions FastTickOptions(int batchSize = 500) => new()
{
// 100 ms tick keeps each test under a second; AwaitAssert covers
// scheduler jitter so the tick has up to a few seconds to fire.
ReconciliationInterval = TimeSpan.FromMinutes(5),
ReconciliationIntervalOverride = TimeSpan.FromMilliseconds(100),
ReconciliationBatchSize = batchSize,
};
/// <summary>In-memory enumerator returning a static list of sites.</summary>
private sealed class StaticEnumerator : ISiteEnumerator
{
private readonly IReadOnlyList<SiteEntry> _sites;
public StaticEnumerator(params SiteEntry[] sites) => _sites = sites;
public Task<IReadOnlyList<SiteEntry>> EnumerateAsync(CancellationToken ct = default) =>
Task.FromResult(_sites);
}
/// <summary>
/// Scripted pull client — returns the next queued response for the site on
/// each call (looping the last entry once exhausted) and records every
/// invocation so tests can assert call counts + the <c>since</c> cursor.
/// </summary>
private sealed class ScriptedPullClient : IPullSiteCallsClient
{
public List<(string SiteId, DateTime SinceUtc, int BatchSize)> Calls { get; } = new();
private readonly Dictionary<string, Queue<PullSiteCallsResponse>> _scripted = new();
private readonly Dictionary<string, Exception> _throwOnSite = new();
public ScriptedPullClient Script(string siteId, params PullSiteCallsResponse[] responses)
{
_scripted[siteId] = new Queue<PullSiteCallsResponse>(responses);
return this;
}
public ScriptedPullClient ThrowFor(string siteId, Exception ex)
{
_throwOnSite[siteId] = ex;
return this;
}
public Task<PullSiteCallsResponse> PullAsync(
string siteId, DateTime sinceUtc, int batchSize, CancellationToken ct)
{
Calls.Add((siteId, sinceUtc, batchSize));
if (_throwOnSite.TryGetValue(siteId, out var ex))
{
throw ex;
}
if (_scripted.TryGetValue(siteId, out var queue) && queue.Count > 0)
{
return Task.FromResult(queue.Dequeue());
}
return Task.FromResult(
new PullSiteCallsResponse(Array.Empty<SiteCall>(), MoreAvailable: false));
}
}
/// <summary>
/// Pull client that ALWAYS returns the same saturated response
/// (<c>MoreAvailable=true</c>) regardless of the <c>since</c> cursor —
/// simulates the SiteCallAudit-009 single-timestamp no-progress pin: a backlog
/// larger than the batch size all sharing one exact <c>UpdatedAtUtc</c>, so
/// the inclusive max-timestamp cursor never advances. Records every call so
/// the test can assert the within-tick drain is BOUNDED (the actor must not
/// spin the dispatcher forever on this pathological input).
/// </summary>
private sealed class SaturatedPinPullClient : IPullSiteCallsClient
{
private readonly IReadOnlyList<SiteCall> _rows;
public int CallCount { get; private set; }
public SaturatedPinPullClient(IReadOnlyList<SiteCall> rows) => _rows = rows;
public Task<PullSiteCallsResponse> PullAsync(
string siteId, DateTime sinceUtc, int batchSize, CancellationToken ct)
{
CallCount++;
return Task.FromResult(new PullSiteCallsResponse(_rows, MoreAvailable: true));
}
}
/// <summary>
/// Recording repository that captures every <see cref="UpsertAsync"/> call
/// (keyed by id, last-write-wins on the captured row). The reconciliation
/// tick only ever calls <see cref="UpsertAsync"/>; the read/KPI members are
/// inert stubs.
/// </summary>
private sealed class RecordingRepo : ISiteCallAuditRepository
{
public Dictionary<TrackedOperationId, SiteCall> Upserted { get; } = new();
public int UpsertCallCount { get; private set; }
public Task UpsertAsync(SiteCall siteCall, CancellationToken ct = default)
{
UpsertCallCount++;
Upserted[siteCall.TrackedOperationId] = siteCall;
return Task.CompletedTask;
}
public Task<SiteCall?> GetAsync(TrackedOperationId id, CancellationToken ct = default) =>
Task.FromResult(Upserted.TryGetValue(id, out var row) ? row : null);
public Task<IReadOnlyList<SiteCall>> QueryAsync(
SiteCallQueryFilter filter, SiteCallPaging paging, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCall>>(Array.Empty<SiteCall>());
public Task<int> PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default) =>
Task.FromResult(0);
public Task<SiteCallKpiSnapshot> ComputeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult(new SiteCallKpiSnapshot(0, 0, 0, 0, null, 0));
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallNodeKpiSnapshot>>(Array.Empty<SiteCallNodeKpiSnapshot>());
}
private IActorRef CreateActor(
ISiteEnumerator sites,
IPullSiteCallsClient client,
ISiteCallAuditRepository repo,
SiteCallAuditOptions options) =>
Sys.ActorOf(Props.Create(() => new SiteCallAuditActor(
repo,
sites,
client,
NullLogger<SiteCallAuditActor>.Instance,
options)));
// ---------------------------------------------------------------------
// 1. AbsentRow_PulledFromSite_IsUpserted
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_AbsentRow_IsUpsertedFromSitePull()
{
var siteId = "siteA";
var id = TrackedOperationId.New();
var row = NewRow(id, sourceSite: siteId, status: "Parked");
var sites = new StaticEnumerator(new SiteEntry(siteId, "http://siteA:8083"));
var client = new ScriptedPullClient().Script(siteId,
new PullSiteCallsResponse(new[] { row }, MoreAvailable: false));
var repo = new RecordingRepo();
CreateActor(sites, client, repo, FastTickOptions());
AwaitAssert(
() =>
{
Assert.True(repo.Upserted.ContainsKey(id),
"reconciliation tick should upsert the row present at the site but absent centrally");
Assert.Equal("Parked", repo.Upserted[id].Status);
Assert.Equal(siteId, repo.Upserted[id].SourceSite);
},
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
// ---------------------------------------------------------------------
// 2. Cursor_Advances_ToMaxUpdatedAtUtc_NoRePullOfOldRows
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_SecondTick_AdvancesCursorPastAlreadyPulledRows()
{
var siteId = "siteA";
var t1 = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
var t2 = new DateTime(2026, 5, 20, 10, 1, 0, DateTimeKind.Utc);
var t3 = new DateTime(2026, 5, 20, 10, 2, 0, DateTimeKind.Utc);
var r1 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t1);
var r2 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t2);
var r3 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t3);
var sites = new StaticEnumerator(new SiteEntry(siteId, "http://siteA:8083"));
// First pull returns three rows (max UpdatedAtUtc = t3); subsequent
// pulls return empty. The second pull's `since` must be t3, proving the
// cursor advanced and old rows are not re-pulled from the start.
var client = new ScriptedPullClient().Script(siteId,
new PullSiteCallsResponse(new[] { r1, r2, r3 }, MoreAvailable: false));
var repo = new RecordingRepo();
CreateActor(sites, client, repo, FastTickOptions());
AwaitAssert(
() => Assert.True(client.Calls.Count >= 2,
$"need at least 2 pulls to assert cursor advancement, got {client.Calls.Count}"),
duration: TimeSpan.FromSeconds(5),
interval: TimeSpan.FromMilliseconds(50));
Assert.Equal(DateTime.MinValue, client.Calls[0].SinceUtc);
Assert.Equal(t3, client.Calls[1].SinceUtc);
// The batch size flows through from options.
Assert.Equal(500, client.Calls[0].BatchSize);
}
// ---------------------------------------------------------------------
// 3. OneSiteThrows_OtherSitesStillProcessed (failure isolation)
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_OneSiteThrows_OtherSitesStillReconciled()
{
var siteB = "siteB";
var bId = TrackedOperationId.New();
var bRow = NewRow(bId, sourceSite: siteB, status: "Delivered");
var sites = new StaticEnumerator(
new SiteEntry("siteA", "http://siteA:8083"),
new SiteEntry(siteB, "http://siteB:8083"));
var client = new ScriptedPullClient()
.ThrowFor("siteA", new InvalidOperationException("simulated transport failure"))
.Script(siteB, new PullSiteCallsResponse(new[] { bRow }, MoreAvailable: false));
var repo = new RecordingRepo();
CreateActor(sites, client, repo, FastTickOptions());
AwaitAssert(
() =>
{
// siteA was attempted (and threw) yet siteB's row still landed —
// one offline site must not sink the rest of the tick.
Assert.Contains(client.Calls, c => c.SiteId == "siteA");
Assert.True(repo.Upserted.ContainsKey(bId),
"siteB must be reconciled even though siteA threw");
},
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
// ---------------------------------------------------------------------
// 4. RepoOnly test ctor does NOT start the reconciliation tick
// ---------------------------------------------------------------------
[Fact]
public void TestCtor_RepositoryOnly_DoesNotStartReconciliationTick()
{
// The repo-only test ctor (used by the MSSQL-backed actor tests) injects
// no client/enumerator, so the tick must be gated OFF — otherwise those
// tests would fire phantom pulls. Build the actor via that ctor and
// confirm no pull ever happens. We can't observe a non-event directly,
// so we share a ScriptedPullClient with an isolated actor that DOES run
// the tick to bound the wait, then assert the repo-only actor's client
// (a separate instance) recorded nothing.
var repo = new RecordingRepo();
Sys.ActorOf(Props.Create(() => new SiteCallAuditActor(
repo,
NullLogger<SiteCallAuditActor>.Instance,
FastTickOptions())));
// Run a parallel actor with the full reconciliation ctor and a fast
// tick; once IT has pulled we know enough wall-clock elapsed that the
// repo-only actor would have ticked too, had it been wired.
var liveClient = new ScriptedPullClient();
var liveRepo = new RecordingRepo();
CreateActor(
new StaticEnumerator(new SiteEntry("siteX", "http://siteX:8083")),
liveClient,
liveRepo,
FastTickOptions());
AwaitAssert(
() => Assert.True(liveClient.Calls.Count >= 1),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
// The repo-only actor never reconciles: it has no client to pull with,
// so it upserts nothing on its own.
Assert.Equal(0, repo.UpsertCallCount);
}
// ---------------------------------------------------------------------
// 5. SiteCallAudit-009: MoreAvailable drives a within-tick continuation
// drain — a multi-page backlog whose timestamps advance is fully drained
// in ONE tick rather than one page per tick.
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_MoreAvailable_DrainsMultiplePagesWithinOneTick()
{
var siteId = "siteA";
var t1 = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
var t2 = new DateTime(2026, 5, 20, 10, 1, 0, DateTimeKind.Utc);
var t3 = new DateTime(2026, 5, 20, 10, 2, 0, DateTimeKind.Utc);
var p1a = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t1);
var p1b = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t2);
var p2 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t3);
var sites = new StaticEnumerator(new SiteEntry(siteId, "http://siteA:8083"));
// Page 1 saturates (MoreAvailable: true) → the actor continues pulling
// within the SAME tick; page 2 is the final page (MoreAvailable: false).
// The continuation pull's `since` must be t2 (page-1 max), proving the
// cursor advanced page-to-page inside one tick rather than across ticks.
var client = new ScriptedPullClient().Script(siteId,
new PullSiteCallsResponse(new[] { p1a, p1b }, MoreAvailable: true),
new PullSiteCallsResponse(new[] { p2 }, MoreAvailable: false));
var repo = new RecordingRepo();
// Slow tick so the multi-page drain CANNOT be the natural tick cadence —
// it must be the within-tick continuation loop. Long enough that only the
// first tick fires in the assert window.
var options = new SiteCallAuditOptions
{
ReconciliationIntervalOverride = TimeSpan.FromSeconds(2),
ReconciliationBatchSize = 2,
};
CreateActor(sites, client, repo, options);
AwaitAssert(
() =>
{
// All three rows reconciled — including the page-2 row that only a
// within-tick continuation pull could have fetched.
Assert.True(repo.Upserted.ContainsKey(p1a.TrackedOperationId));
Assert.True(repo.Upserted.ContainsKey(p1b.TrackedOperationId));
Assert.True(repo.Upserted.ContainsKey(p2.TrackedOperationId),
"the page-2 row must be reconciled within the same tick via the MoreAvailable continuation drain");
},
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
// Exactly two pulls happened (page 1 + the continuation page 2) and the
// second pull's `since` cursor advanced to the page-1 max (t2).
Assert.True(client.Calls.Count >= 2, $"expected >= 2 pulls within the tick, got {client.Calls.Count}");
Assert.Equal(DateTime.MinValue, client.Calls[0].SinceUtc);
Assert.Equal(t2, client.Calls[1].SinceUtc);
}
// ---------------------------------------------------------------------
// 6. SiteCallAudit-009: single-timestamp saturation pin does NOT spin —
// a saturated batch whose max UpdatedAtUtc never advances past `since`
// breaks the within-tick drain after one page (no unbounded re-pull),
// and still upserts the rows it saw.
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_SingleTimestampSaturation_DoesNotSpin_MakesNoProgressGracefully()
{
var siteId = "siteA";
// A burst sharing ONE exact UpdatedAtUtc that saturates the batch — the
// inclusive max-timestamp cursor cannot advance, so an unbounded
// continuation loop would re-pull this identical window forever.
var ts = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
var r1 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: ts);
var r2 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: ts);
var sites = new StaticEnumerator(new SiteEntry(siteId, "http://siteA:8083"));
var client = new SaturatedPinPullClient(new[] { r1, r2 });
var repo = new RecordingRepo();
// Long interval so AT MOST one tick fires in the assert window — lets us
// bound the WITHIN-tick pull count. A no-progress pin must break after a
// single page, NOT loop up to MaxReconciliationPagesPerTick (50).
var options = new SiteCallAuditOptions
{
ReconciliationIntervalOverride = TimeSpan.FromSeconds(2),
ReconciliationBatchSize = 2,
};
CreateActor(sites, client, repo, options);
AwaitAssert(
() => Assert.True(client.CallCount >= 1, "the first reconciliation tick should have pulled"),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
// The rows it saw were still upserted (idempotent mirror refresh).
Assert.True(repo.Upserted.ContainsKey(r1.TrackedOperationId));
Assert.True(repo.Upserted.ContainsKey(r2.TrackedOperationId));
// Critical SiteCallAudit-009 invariant: the within-tick drain BROKE on the
// no-progress pin rather than looping to the 50-page ceiling. With a 2s
// tick interval, only the first tick has fired in the window, so the pull
// count reflects ONE tick's within-loop behaviour. A correct break yields
// 1 pull for that tick; we allow a small margin for a possible second tick
// edge, but it must be far below the 50-page within-tick ceiling.
Assert.True(client.CallCount < 10,
$"a single-timestamp saturation pin must break the within-tick drain, not spin to the "
+ $"page ceiling; got {client.CallCount} pulls (an unbounded within-tick loop would be 50+)");
}
}