Merge M1: stillpending.md Tier-1 runtime wiring

Closes the Tier-1 silent gaps from the stillpending.md audit (#3-#6):
- AuditLog 365-day purge actor + reconciliation self-heal now actually
  start and run on the central node (were dead code).
- SiteCall reconciliation pull (new PullSiteCalls RPC + plumbing) + daily
  terminal-row purge scheduler.
- Site Event Logging now emits all 5 previously-missing categories
  (alarm, deployment, instance_lifecycle, store_and_forward, notification,
  script started/completed).

14 commits, each implement->review->fix. Build 0/0; cluster verified
healthy with the new singletons starting cleanly (bash docker/deploy.sh).
This commit is contained in:
Joseph Doherty
2026-06-15 12:53:25 -04:00
48 changed files with 5100 additions and 73 deletions
@@ -17,8 +17,10 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// </para>
/// <para>
/// <see cref="IntervalOverride"/> exists for tests to drop the cadence to
/// milliseconds without polluting the production config surface; production
/// binds <see cref="IntervalHours"/> only.
/// milliseconds; production config is expected to set <see cref="IntervalHours"/>
/// only. Because this options class is <c>Bind</c>-ed wholesale, a config value
/// at <c>AuditLog:Purge:IntervalOverride</c> would bind if present (and would
/// bypass the <see cref="Interval"/> minimum clamp) — operators must not set it.
/// </para>
/// </remarks>
public sealed class AuditLogPurgeOptions
@@ -29,15 +31,44 @@ public sealed class AuditLogPurgeOptions
/// <summary>
/// Test-only override for finer control over the tick cadence than
/// whole-hour resolution allows. When non-null, takes precedence over
/// <see cref="IntervalHours"/>. Not bound from config — production
/// config exposes <see cref="IntervalHours"/> only.
/// <see cref="IntervalHours"/> AND bypasses the <see cref="Interval"/>
/// minimum clamp (so tests can use millisecond cadences). Production
/// config exposes <see cref="IntervalHours"/> only and never sets this
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
/// config value at <c>AuditLog:Purge:IntervalOverride</c> WOULD bind if
/// present; operators must not set it.
/// </summary>
public TimeSpan? IntervalOverride { get; set; }
/// <summary>
/// Resolves the effective tick interval, honouring the test override
/// when set. Falls back to <see cref="IntervalHours"/>.
/// Minimum interval the config-bound <see cref="IntervalHours"/> can
/// resolve to. Clamps a misconfigured <c>IntervalHours: 0</c> (or a
/// negative value) away from <see cref="TimeSpan.Zero"/> — a zero
/// interval would make Akka's <c>ScheduleTellRepeatedlyCancelable</c>
/// spin, looping the partition drop/rebuild dance into a sustained SQL
/// outage. The test-only <see cref="IntervalOverride"/> bypasses this
/// clamp so unit tests can still drop the cadence to milliseconds.
/// </summary>
public TimeSpan Interval =>
IntervalOverride ?? TimeSpan.FromHours(IntervalHours);
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromMinutes(1);
/// <summary>
/// Resolves the effective tick interval, honouring the test override
/// when set. Falls back to <see cref="IntervalHours"/>, clamped to at
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config
/// value can never yield <see cref="TimeSpan.Zero"/> (which would spin
/// the scheduler).
/// </summary>
public TimeSpan Interval
{
get
{
if (IntervalOverride is { } overrideValue)
{
return overrideValue;
}
var resolved = TimeSpan.FromHours(IntervalHours);
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
}
}
}
@@ -0,0 +1,289 @@
using System.Collections.Concurrent;
using Google.Protobuf.WellKnownTypes;
using Grpc.Core;
using Grpc.Net.Client;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.Communication;
using ZB.MOM.WW.ScadaBridge.Communication.Grpc;
using ProtoPullRequest = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest;
using ProtoPullResponse = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse;
using PullAuditEventsResponse = ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration.PullAuditEventsResponse;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <summary>
/// Production <see cref="IPullAuditEventsClient"/> (Audit Log #23, M6) that the
/// central <see cref="SiteAuditReconciliationActor"/> uses to pull the next
/// reconciliation batch from a site over the <c>PullAuditEvents</c> unary gRPC
/// RPC served by <c>SiteStreamGrpcServer</c>.
/// </summary>
/// <remarks>
/// <para>
/// <b>Endpoint resolution.</b> The actor passes only a <c>siteId</c>; this
/// client resolves it to a gRPC authority via <see cref="ISiteEnumerator"/>
/// (<see cref="SiteEntry.GrpcEndpoint"/>) on every call so a NodeA→NodeB
/// failover flip or an edited site address takes effect on the next tick — the
/// same liveness guarantee <c>SiteStreamGrpcClientFactory</c> gives the
/// real-time stream. A site with no registered endpoint yields an empty
/// response (no dial); reconciliation simply has nothing to pull from it.
/// </para>
/// <para>
/// <b>Fault tolerance.</b> Per the <see cref="IPullAuditEventsClient"/>
/// contract, tolerable transport faults (connection refused / site offline =
/// <see cref="StatusCode.Unavailable"/>, slow site = <see cref="StatusCode.DeadlineExceeded"/>,
/// shutdown = <see cref="StatusCode.Cancelled"/>, plus bare
/// <see cref="HttpRequestException"/> / <c>SocketException</c> before a gRPC
/// status is established) are caught and collapsed to an empty response — one
/// offline site must never sink the rest of the reconciliation tick. Any other
/// fault (e.g. a malformed reply that fails DTO mapping) is also swallowed to
/// empty: audit reconciliation is best-effort and a throw would only get
/// re-caught by the actor's own per-site guard.
/// </para>
/// <para>
/// <b>Testability.</b> The unary call is reached through the
/// <see cref="IPullAuditEventsInvoker"/> seam. Production binds
/// <see cref="GrpcPullAuditEventsInvoker"/> (one cached <see cref="GrpcChannel"/>
/// per endpoint, keepalive from <see cref="CommunicationOptions"/>); unit tests
/// inject a fake invoker so no real HTTP/2 endpoint is required.
/// </para>
/// </remarks>
public sealed class GrpcPullAuditEventsClient : IPullAuditEventsClient
{
private readonly ISiteEnumerator _sites;
private readonly IPullAuditEventsInvoker _invoker;
private readonly ILogger<GrpcPullAuditEventsClient> _logger;
/// <summary>
/// Creates the client over the given site enumerator and unary-call invoker.
/// </summary>
/// <param name="sites">Resolves a <c>siteId</c> to its gRPC endpoint.</param>
/// <param name="invoker">Seam that issues the <c>PullAuditEvents</c> unary RPC against a resolved endpoint.</param>
/// <param name="logger">Logger for transport-fault diagnostics.</param>
public GrpcPullAuditEventsClient(
ISiteEnumerator sites,
IPullAuditEventsInvoker invoker,
ILogger<GrpcPullAuditEventsClient> logger)
{
_sites = sites ?? throw new ArgumentNullException(nameof(sites));
_invoker = invoker ?? throw new ArgumentNullException(nameof(invoker));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<PullAuditEventsResponse> PullAsync(
string siteId,
DateTime sinceUtc,
int batchSize,
CancellationToken ct)
{
var endpoint = await ResolveEndpointAsync(siteId, ct).ConfigureAwait(false);
if (endpoint is null)
{
// No gRPC address registered for the site — absence of an address is
// a configuration decision (mirrors ISiteEnumerator's own contract),
// not a runtime error, so there is simply nothing to pull.
_logger.LogDebug(
"PullAuditEvents skipped: no gRPC endpoint registered for site {SiteId}.", siteId);
return Empty;
}
var request = new ProtoPullRequest
{
// ReadPendingSinceAsync treats DateTime.MinValue as "from the start";
// EnsureUtc keeps Timestamp.FromDateTime happy (it requires UTC kind).
SinceUtc = Timestamp.FromDateTime(EnsureUtc(sinceUtc)),
BatchSize = batchSize,
};
ProtoPullResponse reply;
try
{
reply = await _invoker.InvokeAsync(endpoint, request, ct).ConfigureAwait(false);
}
catch (RpcException ex) when (IsTolerable(ex.StatusCode))
{
_logger.LogDebug(ex,
"PullAuditEvents tolerable transport fault for site {SiteId} ({Endpoint}): {Status}. Returning empty batch.",
siteId, endpoint, ex.StatusCode);
return Empty;
}
catch (Exception ex) when (ex is HttpRequestException or System.Net.Sockets.SocketException)
{
_logger.LogDebug(ex,
"PullAuditEvents connection-layer fault for site {SiteId} ({Endpoint}). Returning empty batch.",
siteId, endpoint);
return Empty;
}
catch (OperationCanceledException)
{
// Reconciliation tick was cancelled — either the caller's token
// (host shutdown / scope dispose) or an internal gRPC deadline /
// linked-CTS cancellation. Both are tolerable for a best-effort
// pull; collapse to empty rather than letting an internal
// cancellation land noisily in the catch-all below.
return Empty;
}
catch (Exception ex)
{
// Any other fault (e.g. a malformed reply that fails DTO mapping
// below would actually surface here only if mapping moved inline,
// but a non-RpcException transport fault wrapper lands here too).
// Audit reconciliation is best-effort; swallow to empty rather than
// throw — the actor's per-site guard would only re-catch it.
_logger.LogWarning(ex,
"PullAuditEvents unexpected fault for site {SiteId} ({Endpoint}). Returning empty batch.",
siteId, endpoint);
return Empty;
}
// Map proto DTOs to canonical AuditEvent records and order oldest-first
// (the wire is already ordered by the site queue, but the
// IPullAuditEventsClient contract is explicit, so sort defensively).
var events = reply.Events
.Select(AuditEventDtoMapper.FromDto)
.OrderBy(e => e.OccurredAtUtc)
.ToList();
return new PullAuditEventsResponse(events, reply.MoreAvailable);
}
private async Task<string?> ResolveEndpointAsync(string siteId, CancellationToken ct)
{
var sites = await _sites.EnumerateAsync(ct).ConfigureAwait(false);
foreach (var site in sites)
{
if (string.Equals(site.SiteId, siteId, StringComparison.Ordinal) &&
!string.IsNullOrWhiteSpace(site.GrpcEndpoint))
{
return site.GrpcEndpoint;
}
}
return null;
}
private static readonly PullAuditEventsResponse Empty =
new(Array.Empty<ZB.MOM.WW.Audit.AuditEvent>(), MoreAvailable: false);
private static bool IsTolerable(StatusCode code) => code is
StatusCode.Unavailable or
StatusCode.DeadlineExceeded or
StatusCode.Cancelled;
// All ScadaBridge timestamps are UTC by invariant. A non-UTC cursor (the
// reconciliation cursor starts at DateTime.MinValue, Kind=Unspecified) is
// therefore treated AS UTC — never ToUniversalTime()-converted: on a host
// with a positive UTC offset MinValue.ToUniversalTime() underflows and
// Timestamp.FromDateTime throws, crashing the first pull for every site.
private static DateTime EnsureUtc(DateTime value) =>
value.Kind == DateTimeKind.Utc ? value : DateTime.SpecifyKind(value, DateTimeKind.Utc);
/// <summary>
/// Seam over the <c>PullAuditEvents</c> unary gRPC call against a resolved
/// site endpoint. Extracted so <see cref="GrpcPullAuditEventsClient"/> can
/// be unit-tested without a real <see cref="GrpcChannel"/>. Production binds
/// <see cref="GrpcPullAuditEventsInvoker"/>.
/// </summary>
public interface IPullAuditEventsInvoker
{
/// <summary>
/// Issues the <c>PullAuditEvents</c> unary RPC against <paramref name="endpoint"/>.
/// May throw <see cref="RpcException"/> / <see cref="HttpRequestException"/>
/// on transport faults — the caller classifies and swallows tolerable ones.
/// </summary>
/// <param name="endpoint">The site gRPC authority (e.g. <c>http://site-a:8083</c>).</param>
/// <param name="request">The wire-format pull request.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The wire-format pull response.</returns>
Task<ProtoPullResponse> InvokeAsync(string endpoint, ProtoPullRequest request, CancellationToken ct);
}
}
/// <summary>
/// Production <see cref="GrpcPullAuditEventsClient.IPullAuditEventsInvoker"/>:
/// caches one <see cref="GrpcChannel"/> per endpoint (keepalive from
/// <see cref="CommunicationOptions"/>, mirroring <c>SiteStreamGrpcClient</c>)
/// and issues the unary <c>PullAuditEventsAsync</c> call. The cache is keyed by
/// endpoint string, so a changed site address (NodeA→NodeB failover flip / an
/// edited gRPC address) is reached as soon as the resolver hands the new
/// endpoint to <see cref="InvokeAsync"/> — it creates a fresh channel for the
/// new address. Unlike <c>SiteStreamGrpcClientFactory</c> (keyed by siteId,
/// which actively evicts a re-keyed client), the channel for the previous
/// address is NOT actively evicted here; it lingers idle until
/// <see cref="Dispose"/>. Idle channels hold no streams, so this is a minor
/// cache footprint cost, not a correctness or liveness gap.
/// </summary>
public sealed class GrpcPullAuditEventsInvoker
: GrpcPullAuditEventsClient.IPullAuditEventsInvoker, IDisposable
{
private readonly ConcurrentDictionary<string, GrpcChannel> _channels = new(StringComparer.Ordinal);
private readonly CommunicationOptions _options;
/// <summary>
/// Creates the invoker using default <see cref="CommunicationOptions"/>.
/// </summary>
public GrpcPullAuditEventsInvoker()
: this(new CommunicationOptions())
{
}
/// <summary>
/// Creates the invoker, applying the configured gRPC keepalive settings to
/// every channel it opens.
/// </summary>
/// <param name="options">Communication options supplying gRPC keepalive timings.</param>
public GrpcPullAuditEventsInvoker(CommunicationOptions options)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
}
/// <inheritdoc />
public async Task<ProtoPullResponse> InvokeAsync(
string endpoint, ProtoPullRequest request, CancellationToken ct)
{
var channel = GetOrCreateChannel(endpoint);
var client = new SiteStreamService.SiteStreamServiceClient(channel);
using var call = client.PullAuditEventsAsync(request, cancellationToken: ct);
return await call.ResponseAsync.ConfigureAwait(false);
}
// Race-safe channel cache. ConcurrentDictionary.GetOrAdd(key, valueFactory)
// does NOT serialize the factory, so two concurrent first dials of the same
// endpoint can both build a GrpcChannel (each holds an HTTP/2 connection
// pool) and the loser would leak. Create-then-GetOrAdd-then-dispose-if-lost
// mirrors SiteStreamGrpcClientFactory: only the channel actually installed
// survives; a channel that lost the race is disposed immediately.
private GrpcChannel GetOrCreateChannel(string endpoint)
{
if (!_channels.TryGetValue(endpoint, out var channel))
{
var created = CreateChannel(endpoint);
channel = _channels.GetOrAdd(endpoint, created);
if (!ReferenceEquals(channel, created))
{
created.Dispose();
}
}
return channel;
}
private GrpcChannel CreateChannel(string endpoint) =>
GrpcChannel.ForAddress(endpoint, new GrpcChannelOptions
{
HttpHandler = new SocketsHttpHandler
{
KeepAlivePingDelay = _options.GrpcKeepAlivePingDelay,
KeepAlivePingTimeout = _options.GrpcKeepAlivePingTimeout,
KeepAlivePingPolicy = HttpKeepAlivePingPolicy.Always,
},
});
/// <summary>Disposes all cached channels.</summary>
public void Dispose()
{
foreach (var channel in _channels.Values)
{
channel.Dispose();
}
_channels.Clear();
}
}
@@ -0,0 +1,304 @@
using System.Collections.Concurrent;
using Google.Protobuf.WellKnownTypes;
using Grpc.Core;
using Grpc.Net.Client;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
using ZB.MOM.WW.ScadaBridge.Communication;
using ZB.MOM.WW.ScadaBridge.Communication.Grpc;
using ProtoPullRequest = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest;
using ProtoPullResponse = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse;
using PullSiteCallsResponse = ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration.PullSiteCallsResponse;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <summary>
/// Production <see cref="IPullSiteCallsClient"/> (Site Call Audit #22) that the
/// central reconciliation tick (a separate follow-up component) uses to pull the
/// next batch of cached-call operational rows from a site over the
/// <c>PullSiteCalls</c> unary gRPC RPC served by <c>SiteStreamGrpcServer</c>.
/// A near-exact sibling of <see cref="GrpcPullAuditEventsClient"/>.
/// </summary>
/// <remarks>
/// <para>
/// <b>Endpoint resolution.</b> The caller passes only a <c>siteId</c>; this
/// client resolves it to a gRPC authority via <see cref="ISiteEnumerator"/>
/// (<see cref="SiteEntry.GrpcEndpoint"/>) on every call so a NodeA→NodeB
/// failover flip or an edited site address takes effect on the next tick. A site
/// with no registered endpoint yields an empty response (no dial).
/// </para>
/// <para>
/// <b>SourceSite re-stamp.</b> The site leaves
/// <c>SiteCallOperationalDto.SourceSite</c> empty (the tracking store has no
/// site-id column). This client is the authority that knows which site it
/// dialed, so it re-stamps the mapped <see cref="SiteCall.SourceSite"/> from
/// <c>siteId</c> — the same "re-stamp from the forwarder's own id" pattern the
/// site push path uses.
/// </para>
/// <para>
/// <b>Fault tolerance.</b> Per the <see cref="IPullSiteCallsClient"/> contract,
/// tolerable transport faults (<see cref="StatusCode.Unavailable"/>,
/// <see cref="StatusCode.DeadlineExceeded"/>, <see cref="StatusCode.Cancelled"/>,
/// bare <see cref="HttpRequestException"/> / <c>SocketException</c>) are caught
/// and collapsed to an empty response so one offline site never sinks the rest
/// of the reconciliation tick. Any other transport/protocol fault is also
/// swallowed to empty: reconciliation is best-effort. Per-row DTO mapping faults
/// (e.g. a single unparseable <c>TrackedOperationId</c>) are narrower still —
/// the offending row is skipped+logged and the rest of the batch is returned.
/// </para>
/// <para>
/// <b>Testability.</b> The unary call is reached through the
/// <see cref="IPullSiteCallsInvoker"/> seam. Production binds
/// <see cref="GrpcPullSiteCallsInvoker"/> (one cached <see cref="GrpcChannel"/>
/// per endpoint, keepalive from <see cref="CommunicationOptions"/>); unit tests
/// inject a fake invoker so no real HTTP/2 endpoint is required.
/// </para>
/// </remarks>
public sealed class GrpcPullSiteCallsClient : IPullSiteCallsClient
{
private readonly ISiteEnumerator _sites;
private readonly IPullSiteCallsInvoker _invoker;
private readonly ILogger<GrpcPullSiteCallsClient> _logger;
/// <summary>
/// Creates the client over the given site enumerator and unary-call invoker.
/// </summary>
/// <param name="sites">Resolves a <c>siteId</c> to its gRPC endpoint.</param>
/// <param name="invoker">Seam that issues the <c>PullSiteCalls</c> unary RPC against a resolved endpoint.</param>
/// <param name="logger">Logger for transport-fault diagnostics.</param>
public GrpcPullSiteCallsClient(
ISiteEnumerator sites,
IPullSiteCallsInvoker invoker,
ILogger<GrpcPullSiteCallsClient> logger)
{
_sites = sites ?? throw new ArgumentNullException(nameof(sites));
_invoker = invoker ?? throw new ArgumentNullException(nameof(invoker));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <inheritdoc />
public async Task<PullSiteCallsResponse> PullAsync(
string siteId,
DateTime sinceUtc,
int batchSize,
CancellationToken ct)
{
var endpoint = await ResolveEndpointAsync(siteId, ct).ConfigureAwait(false);
if (endpoint is null)
{
// No gRPC address registered for the site — a configuration decision
// (mirrors ISiteEnumerator's own contract), not a runtime error, so
// there is simply nothing to pull.
_logger.LogDebug(
"PullSiteCalls skipped: no gRPC endpoint registered for site {SiteId}.", siteId);
return Empty;
}
var request = new ProtoPullRequest
{
// ReadChangedSinceAsync treats DateTime.MinValue as "from the start";
// EnsureUtc keeps Timestamp.FromDateTime happy (it requires UTC kind).
SinceUtc = Timestamp.FromDateTime(EnsureUtc(sinceUtc)),
BatchSize = batchSize,
};
ProtoPullResponse reply;
try
{
reply = await _invoker.InvokeAsync(endpoint, request, ct).ConfigureAwait(false);
}
catch (RpcException ex) when (IsTolerable(ex.StatusCode))
{
_logger.LogDebug(ex,
"PullSiteCalls tolerable transport fault for site {SiteId} ({Endpoint}): {Status}. Returning empty batch.",
siteId, endpoint, ex.StatusCode);
return Empty;
}
catch (Exception ex) when (ex is HttpRequestException or System.Net.Sockets.SocketException)
{
_logger.LogDebug(ex,
"PullSiteCalls connection-layer fault for site {SiteId} ({Endpoint}). Returning empty batch.",
siteId, endpoint);
return Empty;
}
catch (OperationCanceledException)
{
// Reconciliation tick cancelled — caller token (host shutdown) or an
// internal gRPC deadline / linked-CTS cancellation. Both tolerable for
// a best-effort pull; collapse to empty rather than landing noisily in
// the catch-all below.
return Empty;
}
catch (Exception ex)
{
// Any other fault. Reconciliation is best-effort; swallow to empty
// rather than throw — the (future) actor's per-site guard would only
// re-catch it.
_logger.LogWarning(ex,
"PullSiteCalls unexpected fault for site {SiteId} ({Endpoint}). Returning empty batch.",
siteId, endpoint);
return Empty;
}
// Map proto DTOs to central SiteCall entities PER-ROW so one malformed
// operational (e.g. an unparseable TrackedOperationId) is skipped+logged
// rather than sinking the whole batch through the outer catch-all. Each
// survivor is re-stamped with SourceSite from the dialed siteId (the site
// leaves it empty).
var siteCalls = new List<SiteCall>(reply.Operationals.Count);
foreach (var dto in reply.Operationals)
{
try
{
var sc = SiteCallDtoMapper.FromDto(dto) with { SourceSite = siteId };
siteCalls.Add(sc);
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"PullSiteCalls dropped a malformed operational row from site {SiteId} (id='{Id}'); continuing with the rest of the batch.",
siteId, dto.TrackedOperationId);
}
}
// Order oldest-first by UpdatedAtUtc (the wire is already ordered by the
// site read, but the contract is explicit, so sort defensively).
siteCalls.Sort((a, b) => a.UpdatedAtUtc.CompareTo(b.UpdatedAtUtc));
return new PullSiteCallsResponse(siteCalls, reply.MoreAvailable);
}
private async Task<string?> ResolveEndpointAsync(string siteId, CancellationToken ct)
{
var sites = await _sites.EnumerateAsync(ct).ConfigureAwait(false);
foreach (var site in sites)
{
if (string.Equals(site.SiteId, siteId, StringComparison.Ordinal) &&
!string.IsNullOrWhiteSpace(site.GrpcEndpoint))
{
return site.GrpcEndpoint;
}
}
return null;
}
private static readonly PullSiteCallsResponse Empty =
new(Array.Empty<SiteCall>(), MoreAvailable: false);
private static bool IsTolerable(StatusCode code) => code is
StatusCode.Unavailable or
StatusCode.DeadlineExceeded or
StatusCode.Cancelled;
// All ScadaBridge timestamps are UTC by invariant. A non-UTC cursor (the
// reconciliation cursor starts at DateTime.MinValue, Kind=Unspecified) is
// treated AS UTC — never ToUniversalTime()-converted: on a host with a
// positive UTC offset MinValue.ToUniversalTime() underflows and
// Timestamp.FromDateTime throws, crashing the first pull for every site.
private static DateTime EnsureUtc(DateTime value) =>
value.Kind == DateTimeKind.Utc ? value : DateTime.SpecifyKind(value, DateTimeKind.Utc);
/// <summary>
/// Seam over the <c>PullSiteCalls</c> unary gRPC call against a resolved site
/// endpoint. Extracted so <see cref="GrpcPullSiteCallsClient"/> can be
/// unit-tested without a real <see cref="GrpcChannel"/>. Production binds
/// <see cref="GrpcPullSiteCallsInvoker"/>.
/// </summary>
public interface IPullSiteCallsInvoker
{
/// <summary>
/// Issues the <c>PullSiteCalls</c> unary RPC against <paramref name="endpoint"/>.
/// May throw <see cref="RpcException"/> / <see cref="HttpRequestException"/>
/// on transport faults — the caller classifies and swallows tolerable ones.
/// </summary>
/// <param name="endpoint">The site gRPC authority (e.g. <c>http://site-a:8083</c>).</param>
/// <param name="request">The wire-format pull request.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The wire-format pull response.</returns>
Task<ProtoPullResponse> InvokeAsync(string endpoint, ProtoPullRequest request, CancellationToken ct);
}
}
/// <summary>
/// Production <see cref="GrpcPullSiteCallsClient.IPullSiteCallsInvoker"/>: caches
/// one <see cref="GrpcChannel"/> per endpoint (keepalive from
/// <see cref="CommunicationOptions"/>, mirroring <c>SiteStreamGrpcClient</c>) and
/// issues the unary <c>PullSiteCallsAsync</c> call. The cache is keyed by
/// endpoint string, so a changed site address (NodeA→NodeB failover flip / an
/// edited gRPC address) is reached as soon as the resolver hands the new endpoint
/// to <see cref="InvokeAsync"/>. The channel for a previous address lingers idle
/// until <see cref="Dispose"/> (idle channels hold no streams — a minor cache
/// footprint cost, not a correctness or liveness gap). Sibling of
/// <see cref="GrpcPullAuditEventsInvoker"/>.
/// </summary>
public sealed class GrpcPullSiteCallsInvoker
: GrpcPullSiteCallsClient.IPullSiteCallsInvoker, IDisposable
{
private readonly ConcurrentDictionary<string, GrpcChannel> _channels = new(StringComparer.Ordinal);
private readonly CommunicationOptions _options;
/// <summary>Creates the invoker using default <see cref="CommunicationOptions"/>.</summary>
public GrpcPullSiteCallsInvoker()
: this(new CommunicationOptions())
{
}
/// <summary>
/// Creates the invoker, applying the configured gRPC keepalive settings to
/// every channel it opens.
/// </summary>
/// <param name="options">Communication options supplying gRPC keepalive timings.</param>
public GrpcPullSiteCallsInvoker(CommunicationOptions options)
{
_options = options ?? throw new ArgumentNullException(nameof(options));
}
/// <inheritdoc />
public async Task<ProtoPullResponse> InvokeAsync(
string endpoint, ProtoPullRequest request, CancellationToken ct)
{
var channel = GetOrCreateChannel(endpoint);
var client = new SiteStreamService.SiteStreamServiceClient(channel);
using var call = client.PullSiteCallsAsync(request, cancellationToken: ct);
return await call.ResponseAsync.ConfigureAwait(false);
}
// Race-safe channel cache (create-then-GetOrAdd-then-dispose-if-lost): two
// concurrent first dials of the same endpoint can both build a GrpcChannel;
// only the channel actually installed survives, the loser is disposed.
// Mirrors SiteStreamGrpcClientFactory / GrpcPullAuditEventsInvoker.
private GrpcChannel GetOrCreateChannel(string endpoint)
{
if (!_channels.TryGetValue(endpoint, out var channel))
{
var created = CreateChannel(endpoint);
channel = _channels.GetOrAdd(endpoint, created);
if (!ReferenceEquals(channel, created))
{
created.Dispose();
}
}
return channel;
}
private GrpcChannel CreateChannel(string endpoint) =>
GrpcChannel.ForAddress(endpoint, new GrpcChannelOptions
{
HttpHandler = new SocketsHttpHandler
{
KeepAlivePingDelay = _options.GrpcKeepAlivePingDelay,
KeepAlivePingTimeout = _options.GrpcKeepAlivePingTimeout,
KeepAlivePingPolicy = HttpKeepAlivePingPolicy.Always,
},
});
/// <summary>Disposes all cached channels.</summary>
public void Dispose()
{
foreach (var channel in _channels.Values)
{
channel.Dispose();
}
_channels.Clear();
}
}
@@ -0,0 +1,57 @@
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <summary>
/// Mockable abstraction over the central-side <c>PullSiteCalls</c> gRPC client
/// surface used by the Site Call Audit (#22) reconciliation tick to fetch the
/// next batch of cached-call operational rows from a specific site — the
/// documented periodic self-heal pull that backfills the eventually-consistent
/// central <c>SiteCalls</c> mirror when best-effort push telemetry is lost.
/// Extracted so the (separate, follow-up) reconciliation actor can be
/// unit-tested against an in-memory stub without standing up a real
/// <c>GrpcChannel</c> per site.
/// </summary>
/// <remarks>
/// <para>
/// The home is <c>ZB.MOM.WW.ScadaBridge.AuditLog.Central</c> rather than the
/// <c>ZB.MOM.WW.ScadaBridge.SiteCallAudit</c> project so it can reuse the
/// <see cref="ISiteEnumerator"/> / <see cref="SiteEntry"/> endpoint-resolution
/// abstraction that already lives here (and that the sibling
/// <see cref="IPullAuditEventsClient"/> uses) — SiteCallAudit does not reference
/// AuditLog, so hosting the client there would mean duplicating the enumerator.
/// This mirrors the decision to keep <see cref="SiteCallDtoMapper"/> in
/// <c>ZB.MOM.WW.ScadaBridge.Communication</c>.
/// </para>
/// <para>
/// Implementations MUST NOT throw on transport faults the reconciliation tick
/// can tolerate (connection refused, deadline exceeded, cancellation) — one
/// offline site must never sink the rest of the tick. The
/// <see cref="PullSiteCallsResponse.SiteCalls"/> are returned oldest-first by
/// <c>UpdatedAtUtc</c> with the <c>SourceSite</c> re-stamped from the dialed
/// site id (the site leaves it empty, being unaware of its own id), and a
/// <c>MoreAvailable</c> flag the caller uses to decide whether to fire another
/// pull immediately.
/// </para>
/// </remarks>
public interface IPullSiteCallsClient
{
/// <summary>
/// Issues a <c>PullSiteCalls</c> RPC against the site whose gRPC endpoint is
/// registered against <paramref name="siteId"/>. Returns the next batch of
/// <see cref="ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit.SiteCall"/> rows
/// ordered oldest-first (with <c>SourceSite</c> re-stamped from
/// <paramref name="siteId"/>) AND a <c>MoreAvailable</c> flag the caller uses
/// to decide whether to fire another pull immediately.
/// </summary>
/// <param name="siteId">The identifier of the site to pull cached-call operational rows from.</param>
/// <param name="sinceUtc">Only rows with an <c>UpdatedAtUtc</c> at or after this cursor time are returned.</param>
/// <param name="batchSize">Maximum number of rows to return per call.</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>A task that resolves to the next reconciliation batch with a <c>MoreAvailable</c> flag.</returns>
Task<PullSiteCallsResponse> PullAsync(
string siteId,
DateTime sinceUtc,
int batchSize,
CancellationToken ct);
}
@@ -9,11 +9,12 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <remarks>
/// The production implementation wraps <c>ISiteRepository.GetAllSitesAsync</c>
/// and projects each <c>Site</c> to a <see cref="SiteEntry"/> using the
/// site's configured <c>GrpcNodeAAddress</c> (falling back to
/// <c>GrpcNodeBAddress</c> when NodeA is unset). Sites with NO gRPC address
/// configured are silently skipped — the reconciliation pull cannot reach
/// them, but absence of an address is a configuration decision, not a runtime
/// error.
/// site's configured <c>GrpcNodeAAddress</c>. This is a NodeA-only first cut:
/// sites with a blank <c>GrpcNodeAAddress</c> are silently SKIPPED — the
/// reconciliation pull cannot reach them, but absence of an address is a
/// configuration decision, not a runtime error. NodeB-fallback endpoint
/// selection (dial NodeB when NodeA is unset/unreachable) is a follow-up
/// (mirrors the comment in <c>SiteEnumerator.cs</c>).
/// </remarks>
public interface ISiteEnumerator
{
@@ -182,6 +182,10 @@ public class SiteAuditReconciliationActor : ReceiveActor
IReadOnlyList<SiteEntry> sites;
try
{
// No ambient CancellationToken in a ReceiveActor message handler —
// CancellationToken.None (the EnumerateAsync default) is intentional.
// The work is bounded by the 5-min reconciliation tick plus the
// 10s graceful-stop drain on PhaseClusterLeave.
sites = await _sites.EnumerateAsync().ConfigureAwait(false);
}
catch (Exception ex)
@@ -31,18 +31,45 @@ public sealed class SiteAuditReconciliationOptions
/// <summary>
/// Test-only override for finer control over the tick cadence than
/// whole-second resolution allows. When non-null, takes precedence over
/// <see cref="ReconciliationIntervalSeconds"/>. Not bound from config —
/// production config exposes <see cref="ReconciliationIntervalSeconds"/>
/// only.
/// <see cref="ReconciliationIntervalSeconds"/> AND bypasses the
/// <see cref="ReconciliationInterval"/> minimum clamp (so tests can use
/// millisecond cadences). Production config exposes
/// <see cref="ReconciliationIntervalSeconds"/> only and never sets this
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
/// config value at <c>AuditLog:Reconciliation:ReconciliationIntervalOverride</c>
/// WOULD bind if present; operators must not set it.
/// </summary>
public TimeSpan? ReconciliationIntervalOverride { get; set; }
/// <summary>
/// Resolves the effective tick interval, honouring the test override when
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>.
/// Minimum interval the config-bound <see cref="ReconciliationIntervalSeconds"/>
/// can resolve to. Clamps a misconfigured <c>ReconciliationIntervalSeconds: 0</c>
/// (or a negative value) away from <see cref="TimeSpan.Zero"/>, which would make
/// Akka's <c>ScheduleTellRepeatedlyCancelable</c> spin. The test-only
/// <see cref="ReconciliationIntervalOverride"/> bypasses this clamp so unit tests
/// can still drop the cadence to milliseconds.
/// </summary>
public TimeSpan ReconciliationInterval =>
ReconciliationIntervalOverride ?? TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromSeconds(1);
/// <summary>
/// Resolves the effective tick interval, honouring the test override when
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>, clamped to at
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config value can
/// never yield <see cref="TimeSpan.Zero"/> (which would spin the scheduler).
/// </summary>
public TimeSpan ReconciliationInterval
{
get
{
if (ReconciliationIntervalOverride is { } overrideValue)
{
return overrideValue;
}
var resolved = TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
}
}
/// <summary>
/// Maximum number of <see cref="ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit.AuditEvent"/>
@@ -0,0 +1,77 @@
using Microsoft.Extensions.DependencyInjection;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <summary>
/// Production <see cref="ISiteEnumerator"/> backing the central
/// <see cref="SiteAuditReconciliationActor"/>. Enumerates the configured sites
/// from the config DB via <see cref="ISiteRepository.GetAllSitesAsync"/> and
/// projects each site to a <see cref="SiteEntry"/> using the site's
/// <c>SiteIdentifier</c> as the cursor key and its <c>GrpcNodeAAddress</c> as
/// the dial target.
/// </summary>
/// <remarks>
/// <para>
/// <b>Scope-per-call.</b> <see cref="ISiteRepository"/> is a SCOPED EF Core
/// service (registered by <c>AddConfigurationDatabase</c>); resolving it from
/// the root provider would fail DI scope validation. The enumerator therefore
/// takes the root <see cref="IServiceProvider"/> and opens one
/// <c>CreateAsyncScope</c> per <see cref="EnumerateAsync"/> call — mirroring the
/// per-tick scope pattern in <see cref="SiteAuditReconciliationActor.OnTickAsync"/>.
/// </para>
/// <para>
/// <b>Blank-address skip.</b> Sites with no <c>GrpcNodeAAddress</c> configured
/// are silently skipped: the reconciliation pull cannot dial them, but absence
/// of an address is a configuration decision, not a runtime error (per the
/// <see cref="ISiteEnumerator"/> contract).
/// </para>
/// <para>
/// <b>NodeA-only first cut.</b> This implementation always uses NodeA's gRPC
/// address. NodeA/NodeB failover endpoint selection (dial NodeB when NodeA is
/// unreachable) is a follow-up — the <see cref="SiteEntry"/> shape already
/// carries a single endpoint, so failover will live in the puller/client, not
/// here.
/// </para>
/// </remarks>
public sealed class SiteEnumerator : ISiteEnumerator
{
private readonly IServiceProvider _services;
/// <summary>
/// Initializes the enumerator with the root service provider used to open a
/// fresh DI scope per enumeration call.
/// </summary>
/// <param name="services">Root service provider for resolving the scoped <see cref="ISiteRepository"/>.</param>
public SiteEnumerator(IServiceProvider services)
{
ArgumentNullException.ThrowIfNull(services);
_services = services;
}
/// <inheritdoc />
public async Task<IReadOnlyList<SiteEntry>> EnumerateAsync(CancellationToken ct = default)
{
await using var scope = _services.CreateAsyncScope();
var repository = scope.ServiceProvider.GetRequiredService<ISiteRepository>();
var sites = await repository.GetAllSitesAsync(ct).ConfigureAwait(false);
var entries = new List<SiteEntry>(sites.Count);
foreach (var site in sites)
{
// First cut: NodeA's gRPC address is the dial target. NodeA/NodeB
// failover endpoint selection is a follow-up.
if (string.IsNullOrWhiteSpace(site.GrpcNodeAAddress))
{
continue;
}
// The IsNullOrWhiteSpace guard above proves GrpcNodeAAddress is
// non-null here; explicit null-forgiving for clarity.
entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress!));
}
return entries;
}
}
@@ -50,6 +50,12 @@ public static class ServiceCollectionExtensions
/// <summary>Configuration section bound to <see cref="AuditLogPartitionMaintenanceOptions"/>.</summary>
public const string PartitionMaintenanceSectionName = "AuditLog:PartitionMaintenance";
/// <summary>Configuration section bound to <see cref="ZB.MOM.WW.ScadaBridge.AuditLog.Central.AuditLogPurgeOptions"/>.</summary>
public const string PurgeSectionName = "AuditLog:Purge";
/// <summary>Configuration section bound to <see cref="ZB.MOM.WW.ScadaBridge.AuditLog.Central.SiteAuditReconciliationOptions"/>.</summary>
public const string ReconciliationSectionName = "AuditLog:Reconciliation";
/// <summary>
/// Registers the Audit Log (#23) component services: options, the site
/// SQLite writer chain (primary + ring fallback + failure-counter sink),
@@ -327,6 +333,24 @@ public static class ServiceCollectionExtensions
.Bind(config.GetSection(PartitionMaintenanceSectionName));
services.AddHostedService<AuditLogPartitionMaintenanceService>();
// I1 (review): bind the two central-singleton options HERE rather than in
// AddAuditLogCentralReconciliationClient. AkkaHostedService.RegisterCentralActors
// resolves IOptions<AuditLogPurgeOptions> / <SiteAuditReconciliationOptions>
// via GetRequiredService when it wires the AuditLogPurgeActor +
// SiteAuditReconciliationActor singletons; AddAuditLogCentralMaintenance is
// ALWAYS called on the central path (the reconciliation-client helper is the
// one that could in principle be dropped), so binding the options here means
// the singletons get a valid IOptions even if the gRPC-client helper is not
// wired — instead of a cryptic InvalidOperationException at GetRequiredService.
// Defaults are fine when the section is absent (24 h purge cadence /
// 5 min reconciliation tick); production exposes IntervalHours /
// ReconciliationIntervalSeconds only — the test-only *Override knobs are
// not intended to be set from config (see the options classes' remarks).
services.AddOptions<AuditLogPurgeOptions>()
.Bind(config.GetSection(PurgeSectionName));
services.AddOptions<SiteAuditReconciliationOptions>()
.Bind(config.GetSection(ReconciliationSectionName));
// M6 Bundle E (T8 + T9): central health snapshot — a single object
// that owns the CentralAuditWriteFailures + AuditRedactionFailure
// Interlocked counters AND surfaces them on
@@ -362,4 +386,118 @@ public static class ServiceCollectionExtensions
return services;
}
/// <summary>
/// Audit Log (#23) M6 — central-only registration of the production
/// <see cref="IPullAuditEventsClient"/> (<see cref="GrpcPullAuditEventsClient"/>)
/// and its unary-call invoker (<see cref="GrpcPullAuditEventsInvoker"/>) used
/// by <see cref="SiteAuditReconciliationActor"/> to pull reconciliation
/// batches from each site over the <c>PullAuditEvents</c> gRPC RPC.
/// </summary>
/// <remarks>
/// <para>
/// Kept out of <see cref="AddAuditLog"/> — which also runs on site
/// composition roots — because the client dials sites and resolves
/// <see cref="ISiteEnumerator"/> (a central-only collaborator wired
/// alongside the reconciliation singleton). Folding it into
/// <see cref="AddAuditLog"/> would register a site-dialing client on every
/// site host, violating the "every <c>Add*</c> call is safe from any
/// composition root" invariant. This helper is the central analogue of
/// <see cref="AddAuditLogCentralMaintenance"/>.
/// </para>
/// <para>
/// The <see cref="GrpcPullAuditEventsInvoker"/> binds with default
/// <see cref="ZB.MOM.WW.ScadaBridge.Communication.CommunicationOptions"/>
/// keepalive unless an <c>IOptions&lt;CommunicationOptions&gt;</c> is
/// already registered, in which case the configured timings flow through —
/// matching how <c>SiteStreamGrpcClientFactory</c> takes its keepalive from
/// the same options.
/// </para>
/// <para>
/// The production <see cref="ISiteEnumerator"/> (<see cref="SiteEnumerator"/>,
/// wrapping the scoped <c>ISiteRepository</c>) IS registered here — so the
/// <see cref="SiteAuditReconciliationActor"/> singleton wired in the Host can
/// resolve its enumerator + gRPC client from this central-only helper. Keeping
/// the enumerator on this central path preserves the "every <c>Add*</c> call is
/// safe from any composition root" invariant: a site host never calls this
/// helper, so it never registers a site-dialing enumerator. The
/// <see cref="AuditLogPurgeOptions"/> + <see cref="SiteAuditReconciliationOptions"/>
/// bindings live in <see cref="AddAuditLogCentralMaintenance"/> instead (I1
/// review fix) — that helper is unconditionally called on the central path, so
/// the two maintenance singletons get a valid <c>IOptions</c> even if this
/// gRPC-client helper is ever dropped.
/// </para>
/// </remarks>
/// <param name="services">The service collection to register into.</param>
/// <param name="config">Application configuration used to bind the gRPC client's communication options (purge + reconciliation options are bound by <see cref="AddAuditLogCentralMaintenance"/>).</param>
/// <returns>The same <see cref="IServiceCollection"/> for chaining.</returns>
public static IServiceCollection AddAuditLogCentralReconciliationClient(
this IServiceCollection services,
IConfiguration config)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(config);
// Production ISiteEnumerator: projects the config-DB Site rows into the
// reconciliation targets the SiteAuditReconciliationActor polls. Scoped
// ISiteRepository is resolved per call inside the enumerator, so the
// singleton takes the ROOT provider (mirrors the per-tick scope pattern
// in SiteAuditReconciliationActor / AuditLogPurgeActor).
services.TryAddSingleton<ISiteEnumerator>(sp => new SiteEnumerator(sp));
// I1 (review): the AuditLogPurgeOptions / SiteAuditReconciliationOptions
// bindings moved to AddAuditLogCentralMaintenance — that helper is always
// called on the central path, so the two maintenance singletons resolve a
// valid IOptions even if this gRPC-client helper is ever dropped. Keep the
// ISiteEnumerator + gRPC client registrations here (they dial sites and are
// central-only by design).
// The invoker owns the per-endpoint GrpcChannel cache, so it must be a
// singleton — a fresh invoker per resolution would leak channels.
// Resolve CommunicationOptions if present (the central Host binds it),
// otherwise fall back to defaults so this helper stays standalone.
services.TryAddSingleton<GrpcPullAuditEventsInvoker>(sp =>
{
var options = sp
.GetService<Microsoft.Extensions.Options.IOptions<
ZB.MOM.WW.ScadaBridge.Communication.CommunicationOptions>>();
return options is null
? new GrpcPullAuditEventsInvoker()
: new GrpcPullAuditEventsInvoker(options.Value);
});
services.TryAddSingleton<GrpcPullAuditEventsClient.IPullAuditEventsInvoker>(
sp => sp.GetRequiredService<GrpcPullAuditEventsInvoker>());
services.TryAddSingleton<IPullAuditEventsClient>(sp => new GrpcPullAuditEventsClient(
sp.GetRequiredService<ISiteEnumerator>(),
sp.GetRequiredService<GrpcPullAuditEventsClient.IPullAuditEventsInvoker>(),
sp.GetRequiredService<ILogger<GrpcPullAuditEventsClient>>()));
// Site Call Audit (#22) reconciliation pull client — central-only, the
// sibling of the audit pull client above. Lives here (not in the
// SiteCallAudit project) so it can reuse the central-only
// ISiteEnumerator registered just above; SiteCallAudit does not
// reference AuditLog. The invoker owns the per-endpoint GrpcChannel
// cache, so it must be a singleton (a fresh invoker per resolution
// would leak channels). CommunicationOptions flow through when bound by
// the central Host, else defaults — mirrors the audit invoker.
services.TryAddSingleton<GrpcPullSiteCallsInvoker>(sp =>
{
var options = sp
.GetService<Microsoft.Extensions.Options.IOptions<
ZB.MOM.WW.ScadaBridge.Communication.CommunicationOptions>>();
return options is null
? new GrpcPullSiteCallsInvoker()
: new GrpcPullSiteCallsInvoker(options.Value);
});
services.TryAddSingleton<GrpcPullSiteCallsClient.IPullSiteCallsInvoker>(
sp => sp.GetRequiredService<GrpcPullSiteCallsInvoker>());
services.TryAddSingleton<IPullSiteCallsClient>(sp => new GrpcPullSiteCallsClient(
sp.GetRequiredService<ISiteEnumerator>(),
sp.GetRequiredService<GrpcPullSiteCallsClient.IPullSiteCallsInvoker>(),
sp.GetRequiredService<ILogger<GrpcPullSiteCallsClient>>()));
return services;
}
}
@@ -118,4 +118,40 @@ public interface IOperationTrackingStore
Task PurgeTerminalAsync(
DateTime olderThanUtc,
CancellationToken ct = default);
/// <summary>
/// Reconciliation read (Site Call Audit #22): return tracking rows whose
/// <c>UpdatedAtUtc</c> is at or after <paramref name="sinceUtc"/> as
/// <see cref="SiteCallOperational"/> projections, ordered by
/// <c>UpdatedAtUtc</c> ascending and capped at <paramref name="batchSize"/>.
/// This is the site-side feed for central's <c>PullSiteCalls</c> RPC — the
/// documented periodic self-heal pull that backfills the eventually-consistent
/// central <c>SiteCalls</c> mirror when best-effort push telemetry is lost.
/// </summary>
/// <remarks>
/// <para>
/// The lower bound is inclusive so a caller can resume from the last
/// returned <c>UpdatedAtUtc</c> without skipping a row that shares that
/// instant; central ingest is insert-if-not-exists then upsert-on-newer, so
/// re-reading the boundary row is a harmless no-op. The oldest-first cap lets
/// the caller advance the cursor monotonically across follow-up pulls.
/// </para>
/// <para>
/// <see cref="SiteCallOperational.SourceSite"/> is left as the empty string:
/// the site id is not a tracking-store column, and the central client re-stamps
/// it from the <c>siteId</c> it dialed (the only authority that knows which
/// site the rows came from). <see cref="SiteCallOperational.Channel"/> is
/// projected from the row's <c>Kind</c> (<c>DbWriteCached → DbOutbound</c>,
/// otherwise <c>ApiOutbound</c>) and <see cref="SiteCallOperational.Target"/>
/// from <c>TargetSummary</c>.
/// </para>
/// </remarks>
/// <param name="sinceUtc">Inclusive lower bound on <c>UpdatedAtUtc</c>; <see cref="DateTime.MinValue"/> reads from the start.</param>
/// <param name="batchSize">Maximum number of rows to return (oldest first).</param>
/// <param name="ct">Cancellation token.</param>
/// <returns>The matching rows projected to <see cref="SiteCallOperational"/>, oldest-first, capped at <paramref name="batchSize"/>.</returns>
Task<IReadOnlyList<SiteCallOperational>> ReadChangedSinceAsync(
DateTime sinceUtc,
int batchSize,
CancellationToken ct = default);
}
@@ -0,0 +1,17 @@
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
namespace ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
/// <summary>
/// Site Call Audit (#22) periodic reconciliation pull response: the next batch of
/// site cached-call operational rows (the eventually-consistent <c>SiteCalls</c>
/// mirror's self-heal feed) plus a <paramref name="MoreAvailable"/> flag signalling
/// the caller to advance the watermark and pull again. Mirrors
/// <see cref="PullAuditEventsResponse"/>; carries the central <see cref="SiteCall"/>
/// entity the ingest path upserts. See Component-SiteCallAudit.md.
/// </summary>
/// <param name="SiteCalls">The next batch of operational rows, ordered oldest-first by <see cref="SiteCall.UpdatedAtUtc"/>.</param>
/// <param name="MoreAvailable">True when the site saturated the requested batch size — the caller should advance the cursor and pull again.</param>
public sealed record PullSiteCallsResponse(
IReadOnlyList<SiteCall> SiteCalls,
bool MoreAvailable);
@@ -1,5 +1,6 @@
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using Timestamp = Google.Protobuf.WellKnownTypes.Timestamp;
namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc;
@@ -20,10 +21,15 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc;
/// Mirrors the sibling <see cref="AuditEventDtoMapper"/>.
/// </para>
/// <para>
/// Only the DTO→entity direction is provided: nothing in the system maps a
/// <see cref="SiteCall"/> back onto the wire (sites emit the operational state
/// from <c>SiteCallOperational</c>, never from the central <see cref="SiteCall"/>
/// entity), so an entity→DTO method would be dead code.
/// Two directions are provided. <see cref="FromDto"/> rehydrates the central
/// <see cref="SiteCall"/> entity central writes into the <c>SiteCalls</c> table.
/// <see cref="ToDto"/> projects a site-local <see cref="SiteCallOperational"/>
/// onto the wire — used by the Site Call Audit (#22) <c>PullSiteCalls</c>
/// reconciliation handler (the central→site self-heal pull). The
/// <see cref="SiteCall"/> entity itself is never mapped back onto the wire:
/// sites emit operational state from <see cref="SiteCallOperational"/>, never
/// from the central <see cref="SiteCall"/>, so a <c>SiteCall</c>→DTO method
/// would be dead code.
/// </para>
/// <para>
/// String nullability convention: proto3 scalar strings cannot be absent, so the
@@ -70,4 +76,54 @@ public static class SiteCallDtoMapper
IngestedAtUtc = DateTime.UtcNow, // overwritten by AuditLogIngestActor
};
}
/// <summary>
/// Projects a site-local <see cref="SiteCallOperational"/> onto its
/// wire-format DTO for the Site Call Audit (#22) <c>PullSiteCalls</c>
/// reconciliation RPC. The inverse of <see cref="FromDto"/>; null
/// <see cref="SiteCallOperational.LastError"/> / <see cref="SiteCallOperational.SourceNode"/>
/// collapse to empty strings (proto3 scalar strings cannot be absent), while
/// the nullable <c>HttpStatus</c> and <c>TerminalAtUtc</c> stay unset on the
/// wire so true-null semantics survive the round-trip back through
/// <see cref="FromDto"/>.
/// </summary>
/// <param name="operational">The site-local operational state to project to wire format.</param>
/// <returns>A populated <see cref="SiteCallOperationalDto"/> ready for transmission.</returns>
public static SiteCallOperationalDto ToDto(SiteCallOperational operational)
{
ArgumentNullException.ThrowIfNull(operational);
var dto = new SiteCallOperationalDto
{
TrackedOperationId = operational.TrackedOperationId.ToString(),
Channel = operational.Channel,
Target = operational.Target,
SourceSite = operational.SourceSite,
SourceNode = operational.SourceNode ?? string.Empty,
Status = operational.Status,
RetryCount = operational.RetryCount,
LastError = operational.LastError ?? string.Empty,
CreatedAtUtc = Timestamp.FromDateTime(EnsureUtc(operational.CreatedAtUtc)),
UpdatedAtUtc = Timestamp.FromDateTime(EnsureUtc(operational.UpdatedAtUtc)),
};
if (operational.HttpStatus.HasValue)
{
dto.HttpStatus = operational.HttpStatus.Value;
}
if (operational.TerminalAtUtc.HasValue)
{
dto.TerminalAtUtc = Timestamp.FromDateTime(EnsureUtc(operational.TerminalAtUtc.Value));
}
return dto;
}
// All ScadaBridge timestamps are UTC by invariant; Timestamp.FromDateTime
// requires UTC kind. Specify (never convert) so a row read back from SQLite
// with Kind=Utc passes through and a defensively-unspecified value is
// treated as the UTC it already is. Mirrors AuditEventDtoMapper.EnsureUtc.
private static DateTime EnsureUtc(DateTime value) =>
value.Kind == DateTimeKind.Utc ? value : DateTime.SpecifyKind(value, DateTimeKind.Utc);
}
@@ -5,7 +5,9 @@ using Grpc.Core;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Observability;
using GrpcStatus = Grpc.Core.Status;
@@ -48,6 +50,14 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
// the missing queue as "nothing to ship" and returns an empty response so
// central retries on its next reconciliation cycle.
private ISiteAuditQueue? _siteAuditQueue;
// Site Call Audit (#22): site-local operation-tracking store handed in by
// AkkaHostedService on site roles so the central reconciliation puller's
// PullSiteCalls RPC can read tracking rows changed since a cursor. Null
// when not wired (central-only host or test composing the server in
// isolation) — the handler treats the missing store as "nothing to ship"
// and returns an empty response so central retries on its next cycle.
// Mirrors _siteAuditQueue.
private IOperationTrackingStore? _operationTrackingStore;
/// <summary>
/// Test-only constructor — kept <c>internal</c> so the DI container sees a
@@ -137,6 +147,21 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
_siteAuditQueue = queue;
}
/// <summary>
/// Hands the site-local <see cref="IOperationTrackingStore"/> (the same
/// <c>OperationTrackingStore</c> singleton that backs
/// <c>Tracking.Status(id)</c> on the script thread) to the gRPC server so
/// the Site Call Audit (#22) <see cref="PullSiteCalls"/> RPC can serve
/// central's reconciliation pulls. Mirrors <see cref="SetSiteAuditQueue"/>:
/// wired post-construction because the store and the gRPC server are both
/// DI singletons brought up in independent orders on site startup.
/// </summary>
/// <param name="store">The site operation-tracking store for serving reconciliation pulls.</param>
public void SetOperationTrackingStore(IOperationTrackingStore store)
{
_operationTrackingStore = store;
}
/// <summary>
/// Host-017 / REQ-HOST-7: signals the gRPC server to begin its part of the
/// site shutdown sequence — refuse new <see cref="SubscribeInstance"/>
@@ -432,7 +457,9 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
// sinceUtc defaults to DateTime.MinValue when the wrapper is absent —
// i.e. "pull from the beginning of recorded history", which is the
// intended behaviour for the very first reconciliation cycle.
var since = request.SinceUtc?.ToDateTime().ToUniversalTime() ?? DateTime.MinValue;
var since = request.SinceUtc is not null
? DateTime.SpecifyKind(request.SinceUtc.ToDateTime(), DateTimeKind.Utc)
: DateTime.MinValue;
IReadOnlyList<AuditEvent> events;
try
@@ -488,6 +515,69 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
return response;
}
/// <inheritdoc />
public override async Task<PullSiteCallsResponse> PullSiteCalls(
PullSiteCallsRequest request,
ServerCallContext context)
{
var store = _operationTrackingStore;
if (store is null)
{
_logger.LogWarning(
"PullSiteCalls invoked before SetOperationTrackingStore was called; returning empty response.");
return new PullSiteCallsResponse();
}
if (request.BatchSize <= 0)
{
// Mirrors PullAuditEvents: reject malformed requests cleanly with
// InvalidArgument so the caller doesn't see a generic RpcException
// from the underlying SQLite parameter validation.
throw new RpcException(new GrpcStatus(
StatusCode.InvalidArgument, "batch_size must be > 0"));
}
// since_utc defaults to DateTime.MinValue when the wrapper is absent —
// i.e. "pull from the beginning of recorded history", the intended
// behaviour for the very first reconciliation cycle.
var since = request.SinceUtc is not null
? DateTime.SpecifyKind(request.SinceUtc.ToDateTime(), DateTimeKind.Utc)
: DateTime.MinValue;
IReadOnlyList<SiteCallOperational> operationals;
try
{
operationals = await store.ReadChangedSinceAsync(
since, request.BatchSize, context.CancellationToken);
}
catch (Exception ex)
{
// Best-effort, like PullAuditEvents: a read fault must never abort
// the reconciliation tick — central retries on its next cycle.
_logger.LogError(ex,
"ReadChangedSinceAsync failed for since={Since} batch={Batch}; returning empty response.",
since, request.BatchSize);
return new PullSiteCallsResponse();
}
var response = new PullSiteCallsResponse
{
// batch_size saturated → tell central to issue a follow-up pull with
// an advanced cursor. The site doesn't compute the cursor — central
// walks it forward from the last returned UpdatedAtUtc. Unlike
// PullAuditEvents there is no MarkReconciled step: the tracking store
// is the operational source of truth and the central SiteCalls mirror
// is upsert-on-newer, so re-reading rows is a harmless no-op.
MoreAvailable = operationals.Count >= request.BatchSize,
};
foreach (var op in operationals)
{
response.Operationals.Add(SiteCallDtoMapper.ToDto(op));
}
return response;
}
/// <summary>
/// Tracks a single active stream so cleanup only removes its own entry.
/// </summary>
@@ -10,6 +10,7 @@ service SiteStreamService {
rpc IngestAuditEvents(AuditEventBatch) returns (IngestAck);
rpc IngestCachedTelemetry(CachedTelemetryBatch) returns (IngestAck);
rpc PullAuditEvents(PullAuditEventsRequest) returns (PullAuditEventsResponse);
rpc PullSiteCalls(PullSiteCallsRequest) returns (PullSiteCallsResponse);
}
message InstanceStreamRequest {
@@ -157,3 +158,20 @@ message PullAuditEventsResponse {
repeated AuditEventDto events = 1;
bool more_available = 2;
}
// Site Call Audit (#22) reconciliation pull: central→site request for any
// site-local operation-tracking rows whose UpdatedAtUtc >= since_utc — the
// self-heal feed that backfills the eventually-consistent central SiteCalls
// mirror when best-effort push telemetry is lost. Mirrors PullAuditEvents
// but is a SEPARATE RPC (the tracking store is the operational source of
// truth, distinct from the site audit queue). more_available signals
// batch_size was saturated so the caller advances since_utc and pulls again.
message PullSiteCallsRequest {
google.protobuf.Timestamp since_utc = 1;
int32 batch_size = 2;
}
message PullSiteCallsResponse {
repeated SiteCallOperationalDto operationals = 1;
bool more_available = 2;
}
@@ -81,23 +81,30 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
"dWVzdBItCglzaW5jZV91dGMYASABKAsyGi5nb29nbGUucHJvdG9idWYuVGlt",
"ZXN0YW1wEhIKCmJhdGNoX3NpemUYAiABKAUiXAoXUHVsbEF1ZGl0RXZlbnRz",
"UmVzcG9uc2USKQoGZXZlbnRzGAEgAygLMhkuc2l0ZXN0cmVhbS5BdWRpdEV2",
"ZW50RHRvEhYKDm1vcmVfYXZhaWxhYmxlGAIgASgIKlwKB1F1YWxpdHkSFwoT",
"UVVBTElUWV9VTlNQRUNJRklFRBAAEhAKDFFVQUxJVFlfR09PRBABEhUKEVFV",
"QUxJVFlfVU5DRVJUQUlOEAISDwoLUVVBTElUWV9CQUQQAypdCg5BbGFybVN0",
"YXRlRW51bRIbChdBTEFSTV9TVEFURV9VTlNQRUNJRklFRBAAEhYKEkFMQVJN",
"X1NUQVRFX05PUk1BTBABEhYKEkFMQVJNX1NUQVRFX0FDVElWRRACKoUBCg5B",
"bGFybUxldmVsRW51bRIUChBBTEFSTV9MRVZFTF9OT05FEAASEwoPQUxBUk1f",
"TEVWRUxfTE9XEAESFwoTQUxBUk1fTEVWRUxfTE9XX0xPVxACEhQKEEFMQVJN",
"X0xFVkVMX0hJR0gQAxIZChVBTEFSTV9MRVZFTF9ISUdIX0hJR0gQBDLhAgoR",
"U2l0ZVN0cmVhbVNlcnZpY2USVQoRU3Vic2NyaWJlSW5zdGFuY2USIS5zaXRl",
"c3RyZWFtLkluc3RhbmNlU3RyZWFtUmVxdWVzdBobLnNpdGVzdHJlYW0uU2l0",
"ZVN0cmVhbUV2ZW50MAESRwoRSW5nZXN0QXVkaXRFdmVudHMSGy5zaXRlc3Ry",
"ZWFtLkF1ZGl0RXZlbnRCYXRjaBoVLnNpdGVzdHJlYW0uSW5nZXN0QWNrElAK",
"FUluZ2VzdENhY2hlZFRlbGVtZXRyeRIgLnNpdGVzdHJlYW0uQ2FjaGVkVGVs",
"ZW1ldHJ5QmF0Y2gaFS5zaXRlc3RyZWFtLkluZ2VzdEFjaxJaCg9QdWxsQXVk",
"aXRFdmVudHMSIi5zaXRlc3RyZWFtLlB1bGxBdWRpdEV2ZW50c1JlcXVlc3Qa",
"Iy5zaXRlc3RyZWFtLlB1bGxBdWRpdEV2ZW50c1Jlc3BvbnNlQiuqAihaQi5N",
"T00uV1cuU2NhZGFCcmlkZ2UuQ29tbXVuaWNhdGlvbi5HcnBjYgZwcm90bzM="));
"ZW50RHRvEhYKDm1vcmVfYXZhaWxhYmxlGAIgASgIIlkKFFB1bGxTaXRlQ2Fs",
"bHNSZXF1ZXN0Ei0KCXNpbmNlX3V0YxgBIAEoCzIaLmdvb2dsZS5wcm90b2J1",
"Zi5UaW1lc3RhbXASEgoKYmF0Y2hfc2l6ZRgCIAEoBSJpChVQdWxsU2l0ZUNh",
"bGxzUmVzcG9uc2USOAoMb3BlcmF0aW9uYWxzGAEgAygLMiIuc2l0ZXN0cmVh",
"bS5TaXRlQ2FsbE9wZXJhdGlvbmFsRHRvEhYKDm1vcmVfYXZhaWxhYmxlGAIg",
"ASgIKlwKB1F1YWxpdHkSFwoTUVVBTElUWV9VTlNQRUNJRklFRBAAEhAKDFFV",
"QUxJVFlfR09PRBABEhUKEVFVQUxJVFlfVU5DRVJUQUlOEAISDwoLUVVBTElU",
"WV9CQUQQAypdCg5BbGFybVN0YXRlRW51bRIbChdBTEFSTV9TVEFURV9VTlNQ",
"RUNJRklFRBAAEhYKEkFMQVJNX1NUQVRFX05PUk1BTBABEhYKEkFMQVJNX1NU",
"QVRFX0FDVElWRRACKoUBCg5BbGFybUxldmVsRW51bRIUChBBTEFSTV9MRVZF",
"TF9OT05FEAASEwoPQUxBUk1fTEVWRUxfTE9XEAESFwoTQUxBUk1fTEVWRUxf",
"TE9XX0xPVxACEhQKEEFMQVJNX0xFVkVMX0hJR0gQAxIZChVBTEFSTV9MRVZF",
"TF9ISUdIX0hJR0gQBDK3AwoRU2l0ZVN0cmVhbVNlcnZpY2USVQoRU3Vic2Ny",
"aWJlSW5zdGFuY2USIS5zaXRlc3RyZWFtLkluc3RhbmNlU3RyZWFtUmVxdWVz",
"dBobLnNpdGVzdHJlYW0uU2l0ZVN0cmVhbUV2ZW50MAESRwoRSW5nZXN0QXVk",
"aXRFdmVudHMSGy5zaXRlc3RyZWFtLkF1ZGl0RXZlbnRCYXRjaBoVLnNpdGVz",
"dHJlYW0uSW5nZXN0QWNrElAKFUluZ2VzdENhY2hlZFRlbGVtZXRyeRIgLnNp",
"dGVzdHJlYW0uQ2FjaGVkVGVsZW1ldHJ5QmF0Y2gaFS5zaXRlc3RyZWFtLklu",
"Z2VzdEFjaxJaCg9QdWxsQXVkaXRFdmVudHMSIi5zaXRlc3RyZWFtLlB1bGxB",
"dWRpdEV2ZW50c1JlcXVlc3QaIy5zaXRlc3RyZWFtLlB1bGxBdWRpdEV2ZW50",
"c1Jlc3BvbnNlElQKDVB1bGxTaXRlQ2FsbHMSIC5zaXRlc3RyZWFtLlB1bGxT",
"aXRlQ2FsbHNSZXF1ZXN0GiEuc2l0ZXN0cmVhbS5QdWxsU2l0ZUNhbGxzUmVz",
"cG9uc2VCK6oCKFpCLk1PTS5XVy5TY2FkYUJyaWRnZS5Db21tdW5pY2F0aW9u",
"LkdycGNiBnByb3RvMw=="));
descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData,
new pbr::FileDescriptor[] { global::Google.Protobuf.WellKnownTypes.TimestampReflection.Descriptor, global::Google.Protobuf.WellKnownTypes.WrappersReflection.Descriptor, },
new pbr::GeneratedClrTypeInfo(new[] {typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.Quality), typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.AlarmStateEnum), typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.AlarmLevelEnum), }, null, new pbr::GeneratedClrTypeInfo[] {
@@ -112,7 +119,9 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
new pbr::GeneratedClrTypeInfo(typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.CachedTelemetryPacket), global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.CachedTelemetryPacket.Parser, new[]{ "AuditEvent", "Operational" }, null, null, null, null),
new pbr::GeneratedClrTypeInfo(typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.CachedTelemetryBatch), global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.CachedTelemetryBatch.Parser, new[]{ "Packets" }, null, null, null, null),
new pbr::GeneratedClrTypeInfo(typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest), global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest.Parser, new[]{ "SinceUtc", "BatchSize" }, null, null, null, null),
new pbr::GeneratedClrTypeInfo(typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse), global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse.Parser, new[]{ "Events", "MoreAvailable" }, null, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse), global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse.Parser, new[]{ "Events", "MoreAvailable" }, null, null, null, null),
new pbr::GeneratedClrTypeInfo(typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest), global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest.Parser, new[]{ "SinceUtc", "BatchSize" }, null, null, null, null),
new pbr::GeneratedClrTypeInfo(typeof(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse), global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse.Parser, new[]{ "Operationals", "MoreAvailable" }, null, null, null, null)
}));
}
#endregion
@@ -5064,6 +5073,483 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
}
/// <summary>
/// Site Call Audit (#22) reconciliation pull: central→site request for any
/// site-local operation-tracking rows whose UpdatedAtUtc >= since_utc — the
/// self-heal feed that backfills the eventually-consistent central SiteCalls
/// mirror when best-effort push telemetry is lost. Mirrors PullAuditEvents
/// but is a SEPARATE RPC (the tracking store is the operational source of
/// truth, distinct from the site audit queue). more_available signals
/// batch_size was saturated so the caller advances since_utc and pulls again.
/// </summary>
[global::System.Diagnostics.DebuggerDisplayAttribute("{ToString(),nq}")]
public sealed partial class PullSiteCallsRequest : pb::IMessage<PullSiteCallsRequest>
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
, pb::IBufferMessage
#endif
{
private static readonly pb::MessageParser<PullSiteCallsRequest> _parser = new pb::MessageParser<PullSiteCallsRequest>(() => new PullSiteCallsRequest());
private pb::UnknownFieldSet _unknownFields;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public static pb::MessageParser<PullSiteCallsRequest> Parser { get { return _parser; } }
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public static pbr::MessageDescriptor Descriptor {
get { return global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SitestreamReflection.Descriptor.MessageTypes[12]; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
pbr::MessageDescriptor pb::IMessage.Descriptor {
get { return Descriptor; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public PullSiteCallsRequest() {
OnConstruction();
}
partial void OnConstruction();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public PullSiteCallsRequest(PullSiteCallsRequest other) : this() {
sinceUtc_ = other.sinceUtc_ != null ? other.sinceUtc_.Clone() : null;
batchSize_ = other.batchSize_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public PullSiteCallsRequest Clone() {
return new PullSiteCallsRequest(this);
}
/// <summary>Field number for the "since_utc" field.</summary>
public const int SinceUtcFieldNumber = 1;
private global::Google.Protobuf.WellKnownTypes.Timestamp sinceUtc_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public global::Google.Protobuf.WellKnownTypes.Timestamp SinceUtc {
get { return sinceUtc_; }
set {
sinceUtc_ = value;
}
}
/// <summary>Field number for the "batch_size" field.</summary>
public const int BatchSizeFieldNumber = 2;
private int batchSize_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public int BatchSize {
get { return batchSize_; }
set {
batchSize_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public override bool Equals(object other) {
return Equals(other as PullSiteCallsRequest);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public bool Equals(PullSiteCallsRequest other) {
if (ReferenceEquals(other, null)) {
return false;
}
if (ReferenceEquals(other, this)) {
return true;
}
if (!object.Equals(SinceUtc, other.SinceUtc)) return false;
if (BatchSize != other.BatchSize) return false;
return Equals(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public override int GetHashCode() {
int hash = 1;
if (sinceUtc_ != null) hash ^= SinceUtc.GetHashCode();
if (BatchSize != 0) hash ^= BatchSize.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}
return hash;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public override string ToString() {
return pb::JsonFormatter.ToDiagnosticString(this);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public void WriteTo(pb::CodedOutputStream output) {
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
output.WriteRawMessage(this);
#else
if (sinceUtc_ != null) {
output.WriteRawTag(10);
output.WriteMessage(SinceUtc);
}
if (BatchSize != 0) {
output.WriteRawTag(16);
output.WriteInt32(BatchSize);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}
#endif
}
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
void pb::IBufferMessage.InternalWriteTo(ref pb::WriteContext output) {
if (sinceUtc_ != null) {
output.WriteRawTag(10);
output.WriteMessage(SinceUtc);
}
if (BatchSize != 0) {
output.WriteRawTag(16);
output.WriteInt32(BatchSize);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(ref output);
}
}
#endif
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public int CalculateSize() {
int size = 0;
if (sinceUtc_ != null) {
size += 1 + pb::CodedOutputStream.ComputeMessageSize(SinceUtc);
}
if (BatchSize != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(BatchSize);
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}
return size;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public void MergeFrom(PullSiteCallsRequest other) {
if (other == null) {
return;
}
if (other.sinceUtc_ != null) {
if (sinceUtc_ == null) {
SinceUtc = new global::Google.Protobuf.WellKnownTypes.Timestamp();
}
SinceUtc.MergeFrom(other.SinceUtc);
}
if (other.BatchSize != 0) {
BatchSize = other.BatchSize;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public void MergeFrom(pb::CodedInputStream input) {
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
input.ReadRawMessage(this);
#else
uint tag;
while ((tag = input.ReadTag()) != 0) {
if ((tag & 7) == 4) {
// Abort on any end group tag.
return;
}
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
break;
case 10: {
if (sinceUtc_ == null) {
SinceUtc = new global::Google.Protobuf.WellKnownTypes.Timestamp();
}
input.ReadMessage(SinceUtc);
break;
}
case 16: {
BatchSize = input.ReadInt32();
break;
}
}
}
#endif
}
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
void pb::IBufferMessage.InternalMergeFrom(ref pb::ParseContext input) {
uint tag;
while ((tag = input.ReadTag()) != 0) {
if ((tag & 7) == 4) {
// Abort on any end group tag.
return;
}
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, ref input);
break;
case 10: {
if (sinceUtc_ == null) {
SinceUtc = new global::Google.Protobuf.WellKnownTypes.Timestamp();
}
input.ReadMessage(SinceUtc);
break;
}
case 16: {
BatchSize = input.ReadInt32();
break;
}
}
}
}
#endif
}
[global::System.Diagnostics.DebuggerDisplayAttribute("{ToString(),nq}")]
public sealed partial class PullSiteCallsResponse : pb::IMessage<PullSiteCallsResponse>
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
, pb::IBufferMessage
#endif
{
private static readonly pb::MessageParser<PullSiteCallsResponse> _parser = new pb::MessageParser<PullSiteCallsResponse>(() => new PullSiteCallsResponse());
private pb::UnknownFieldSet _unknownFields;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public static pb::MessageParser<PullSiteCallsResponse> Parser { get { return _parser; } }
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public static pbr::MessageDescriptor Descriptor {
get { return global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SitestreamReflection.Descriptor.MessageTypes[13]; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
pbr::MessageDescriptor pb::IMessage.Descriptor {
get { return Descriptor; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public PullSiteCallsResponse() {
OnConstruction();
}
partial void OnConstruction();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public PullSiteCallsResponse(PullSiteCallsResponse other) : this() {
operationals_ = other.operationals_.Clone();
moreAvailable_ = other.moreAvailable_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public PullSiteCallsResponse Clone() {
return new PullSiteCallsResponse(this);
}
/// <summary>Field number for the "operationals" field.</summary>
public const int OperationalsFieldNumber = 1;
private static readonly pb::FieldCodec<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SiteCallOperationalDto> _repeated_operationals_codec
= pb::FieldCodec.ForMessage(10, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SiteCallOperationalDto.Parser);
private readonly pbc::RepeatedField<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SiteCallOperationalDto> operationals_ = new pbc::RepeatedField<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SiteCallOperationalDto>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public pbc::RepeatedField<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SiteCallOperationalDto> Operationals {
get { return operationals_; }
}
/// <summary>Field number for the "more_available" field.</summary>
public const int MoreAvailableFieldNumber = 2;
private bool moreAvailable_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public bool MoreAvailable {
get { return moreAvailable_; }
set {
moreAvailable_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public override bool Equals(object other) {
return Equals(other as PullSiteCallsResponse);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public bool Equals(PullSiteCallsResponse other) {
if (ReferenceEquals(other, null)) {
return false;
}
if (ReferenceEquals(other, this)) {
return true;
}
if(!operationals_.Equals(other.operationals_)) return false;
if (MoreAvailable != other.MoreAvailable) return false;
return Equals(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public override int GetHashCode() {
int hash = 1;
hash ^= operationals_.GetHashCode();
if (MoreAvailable != false) hash ^= MoreAvailable.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}
return hash;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public override string ToString() {
return pb::JsonFormatter.ToDiagnosticString(this);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public void WriteTo(pb::CodedOutputStream output) {
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
output.WriteRawMessage(this);
#else
operationals_.WriteTo(output, _repeated_operationals_codec);
if (MoreAvailable != false) {
output.WriteRawTag(16);
output.WriteBool(MoreAvailable);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}
#endif
}
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
void pb::IBufferMessage.InternalWriteTo(ref pb::WriteContext output) {
operationals_.WriteTo(ref output, _repeated_operationals_codec);
if (MoreAvailable != false) {
output.WriteRawTag(16);
output.WriteBool(MoreAvailable);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(ref output);
}
}
#endif
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public int CalculateSize() {
int size = 0;
size += operationals_.CalculateSize(_repeated_operationals_codec);
if (MoreAvailable != false) {
size += 1 + 1;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}
return size;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public void MergeFrom(PullSiteCallsResponse other) {
if (other == null) {
return;
}
operationals_.Add(other.operationals_);
if (other.MoreAvailable != false) {
MoreAvailable = other.MoreAvailable;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
public void MergeFrom(pb::CodedInputStream input) {
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
input.ReadRawMessage(this);
#else
uint tag;
while ((tag = input.ReadTag()) != 0) {
if ((tag & 7) == 4) {
// Abort on any end group tag.
return;
}
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
break;
case 10: {
operationals_.AddEntriesFrom(input, _repeated_operationals_codec);
break;
}
case 16: {
MoreAvailable = input.ReadBool();
break;
}
}
}
#endif
}
#if !GOOGLE_PROTOBUF_REFSTRUCT_COMPATIBILITY_MODE
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
[global::System.CodeDom.Compiler.GeneratedCode("protoc", null)]
void pb::IBufferMessage.InternalMergeFrom(ref pb::ParseContext input) {
uint tag;
while ((tag = input.ReadTag()) != 0) {
if ((tag & 7) == 4) {
// Abort on any end group tag.
return;
}
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, ref input);
break;
case 10: {
operationals_.AddEntriesFrom(ref input, _repeated_operationals_codec);
break;
}
case 16: {
MoreAvailable = input.ReadBool();
break;
}
}
}
}
#endif
}
#endregion
}
@@ -59,6 +59,10 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
static readonly grpc::Marshaller<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest> __Marshaller_sitestream_PullAuditEventsRequest = grpc::Marshallers.Create(__Helper_SerializeMessage, context => __Helper_DeserializeMessage(context, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest.Parser));
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
static readonly grpc::Marshaller<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse> __Marshaller_sitestream_PullAuditEventsResponse = grpc::Marshallers.Create(__Helper_SerializeMessage, context => __Helper_DeserializeMessage(context, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse.Parser));
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
static readonly grpc::Marshaller<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest> __Marshaller_sitestream_PullSiteCallsRequest = grpc::Marshallers.Create(__Helper_SerializeMessage, context => __Helper_DeserializeMessage(context, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest.Parser));
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
static readonly grpc::Marshaller<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse> __Marshaller_sitestream_PullSiteCallsResponse = grpc::Marshallers.Create(__Helper_SerializeMessage, context => __Helper_DeserializeMessage(context, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse.Parser));
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
static readonly grpc::Method<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.InstanceStreamRequest, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SiteStreamEvent> __Method_SubscribeInstance = new grpc::Method<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.InstanceStreamRequest, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.SiteStreamEvent>(
@@ -92,6 +96,14 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
__Marshaller_sitestream_PullAuditEventsRequest,
__Marshaller_sitestream_PullAuditEventsResponse);
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
static readonly grpc::Method<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse> __Method_PullSiteCalls = new grpc::Method<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse>(
grpc::MethodType.Unary,
__ServiceName,
"PullSiteCalls",
__Marshaller_sitestream_PullSiteCallsRequest,
__Marshaller_sitestream_PullSiteCallsResponse);
/// <summary>Service descriptor</summary>
public static global::Google.Protobuf.Reflection.ServiceDescriptor Descriptor
{
@@ -126,6 +138,12 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
throw new grpc::RpcException(new grpc::Status(grpc::StatusCode.Unimplemented, ""));
}
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
public virtual global::System.Threading.Tasks.Task<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse> PullSiteCalls(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest request, grpc::ServerCallContext context)
{
throw new grpc::RpcException(new grpc::Status(grpc::StatusCode.Unimplemented, ""));
}
}
/// <summary>Client for SiteStreamService</summary>
@@ -225,6 +243,26 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
{
return CallInvoker.AsyncUnaryCall(__Method_PullAuditEvents, null, options, request);
}
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
public virtual global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse PullSiteCalls(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest request, grpc::Metadata headers = null, global::System.DateTime? deadline = null, global::System.Threading.CancellationToken cancellationToken = default(global::System.Threading.CancellationToken))
{
return PullSiteCalls(request, new grpc::CallOptions(headers, deadline, cancellationToken));
}
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
public virtual global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse PullSiteCalls(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest request, grpc::CallOptions options)
{
return CallInvoker.BlockingUnaryCall(__Method_PullSiteCalls, null, options, request);
}
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
public virtual grpc::AsyncUnaryCall<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse> PullSiteCallsAsync(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest request, grpc::Metadata headers = null, global::System.DateTime? deadline = null, global::System.Threading.CancellationToken cancellationToken = default(global::System.Threading.CancellationToken))
{
return PullSiteCallsAsync(request, new grpc::CallOptions(headers, deadline, cancellationToken));
}
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
public virtual grpc::AsyncUnaryCall<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse> PullSiteCallsAsync(global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest request, grpc::CallOptions options)
{
return CallInvoker.AsyncUnaryCall(__Method_PullSiteCalls, null, options, request);
}
/// <summary>Creates a new instance of client from given <c>ClientBaseConfiguration</c>.</summary>
[global::System.CodeDom.Compiler.GeneratedCode("grpc_csharp_plugin", null)]
protected override SiteStreamServiceClient NewInstance(ClientBaseConfiguration configuration)
@@ -242,7 +280,8 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
.AddMethod(__Method_SubscribeInstance, serviceImpl.SubscribeInstance)
.AddMethod(__Method_IngestAuditEvents, serviceImpl.IngestAuditEvents)
.AddMethod(__Method_IngestCachedTelemetry, serviceImpl.IngestCachedTelemetry)
.AddMethod(__Method_PullAuditEvents, serviceImpl.PullAuditEvents).Build();
.AddMethod(__Method_PullAuditEvents, serviceImpl.PullAuditEvents)
.AddMethod(__Method_PullSiteCalls, serviceImpl.PullSiteCalls).Build();
}
/// <summary>Register service method with a service binder with or without implementation. Useful when customizing the service binding logic.
@@ -256,6 +295,7 @@ namespace ZB.MOM.WW.ScadaBridge.Communication.Grpc {
serviceBinder.AddMethod(__Method_IngestAuditEvents, serviceImpl == null ? null : new grpc::UnaryServerMethod<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.AuditEventBatch, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.IngestAck>(serviceImpl.IngestAuditEvents));
serviceBinder.AddMethod(__Method_IngestCachedTelemetry, serviceImpl == null ? null : new grpc::UnaryServerMethod<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.CachedTelemetryBatch, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.IngestAck>(serviceImpl.IngestCachedTelemetry));
serviceBinder.AddMethod(__Method_PullAuditEvents, serviceImpl == null ? null : new grpc::UnaryServerMethod<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse>(serviceImpl.PullAuditEvents));
serviceBinder.AddMethod(__Method_PullSiteCalls, serviceImpl == null ? null : new grpc::UnaryServerMethod<global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest, global::ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse>(serviceImpl.PullSiteCalls));
}
}
@@ -588,6 +588,117 @@ akka {{
_logger.LogInformation(
"SiteCallAuditActor singleton created and registered with CentralCommunicationActor");
// Audit Log (#23) M6 Bundle B/C — start the two central-only maintenance
// singletons that were fully implemented but never instantiated: the
// daily AuditLog partition-switch purge (AuditLogPurgeActor) and the
// periodic per-site audit-event reconciliation pull
// (SiteAuditReconciliationActor). Both mirror the SiteCallAudit /
// NotificationOutbox singleton pattern above: a ClusterSingletonManager
// pins the actor to the active central node, a ClusterSingletonProxy
// gives a stable address, and a PhaseClusterLeave graceful-stop task
// drains the in-flight tick before handover. Options + the production
// ISiteEnumerator + IPullAuditEventsClient come from
// AddAuditLogCentralReconciliationClient (central composition root only).
// Both actors take the root IServiceProvider and open their own per-tick
// DI scope because IAuditLogRepository / ISiteRepository are scoped EF
// Core services.
var auditPurgeLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
.CreateLogger<ZB.MOM.WW.ScadaBridge.AuditLog.Central.AuditLogPurgeActor>();
var auditPurgeOptions = _serviceProvider
.GetRequiredService<IOptions<ZB.MOM.WW.ScadaBridge.AuditLog.Central.AuditLogPurgeOptions>>();
var auditLogOptions = _serviceProvider
.GetRequiredService<IOptions<ZB.MOM.WW.ScadaBridge.AuditLog.Configuration.AuditLogOptions>>();
var auditPurgeSingletonProps = ClusterSingletonManager.Props(
singletonProps: Props.Create(() => new ZB.MOM.WW.ScadaBridge.AuditLog.Central.AuditLogPurgeActor(
_serviceProvider,
auditPurgeOptions,
auditLogOptions,
auditPurgeLogger)),
terminationMessage: PoisonPill.Instance,
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
.WithSingletonName("audit-log-purge"));
var auditPurgeSingletonManager =
_actorSystem!.ActorOf(auditPurgeSingletonProps, "audit-log-purge-singleton");
var auditPurgeShutdown = Akka.Actor.CoordinatedShutdown.Get(_actorSystem);
auditPurgeShutdown.AddTask(
Akka.Actor.CoordinatedShutdown.PhaseClusterLeave,
"drain-audit-log-purge-singleton",
async () =>
{
try
{
await auditPurgeSingletonManager.GracefulStop(TimeSpan.FromSeconds(10));
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"AuditLogPurge singleton did not drain within the graceful-stop "
+ "timeout; falling through to PoisonPill handover");
}
return Akka.Done.Instance;
});
var auditPurgeProxyProps = ClusterSingletonProxy.Props(
singletonManagerPath: "/user/audit-log-purge-singleton",
settings: ClusterSingletonProxySettings.Create(_actorSystem)
.WithSingletonName("audit-log-purge"));
_actorSystem.ActorOf(auditPurgeProxyProps, "audit-log-purge-proxy");
_logger.LogInformation("AuditLogPurgeActor singleton created");
// SiteAuditReconciliationActor — self-healing fallback puller. Resolves
// its production ISiteEnumerator (config-DB Site projection) and
// IPullAuditEventsClient (gRPC) from the central reconciliation-client
// helper registered in Program.cs.
var auditReconLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
.CreateLogger<ZB.MOM.WW.ScadaBridge.AuditLog.Central.SiteAuditReconciliationActor>();
var auditReconOptions = _serviceProvider
.GetRequiredService<IOptions<ZB.MOM.WW.ScadaBridge.AuditLog.Central.SiteAuditReconciliationOptions>>();
var auditReconSites = _serviceProvider
.GetRequiredService<ZB.MOM.WW.ScadaBridge.AuditLog.Central.ISiteEnumerator>();
var auditReconClient = _serviceProvider
.GetRequiredService<ZB.MOM.WW.ScadaBridge.AuditLog.Central.IPullAuditEventsClient>();
var auditReconSingletonProps = ClusterSingletonManager.Props(
singletonProps: Props.Create(() => new ZB.MOM.WW.ScadaBridge.AuditLog.Central.SiteAuditReconciliationActor(
auditReconSites,
auditReconClient,
_serviceProvider,
auditReconOptions,
auditReconLogger)),
terminationMessage: PoisonPill.Instance,
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
.WithSingletonName("site-audit-reconciliation"));
var auditReconSingletonManager =
_actorSystem!.ActorOf(auditReconSingletonProps, "site-audit-reconciliation-singleton");
var auditReconShutdown = Akka.Actor.CoordinatedShutdown.Get(_actorSystem);
auditReconShutdown.AddTask(
Akka.Actor.CoordinatedShutdown.PhaseClusterLeave,
"drain-site-audit-reconciliation-singleton",
async () =>
{
try
{
await auditReconSingletonManager.GracefulStop(TimeSpan.FromSeconds(10));
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"SiteAuditReconciliation singleton did not drain within the graceful-stop "
+ "timeout; falling through to PoisonPill handover");
}
return Akka.Done.Instance;
});
var auditReconProxyProps = ClusterSingletonProxy.Props(
singletonManagerPath: "/user/site-audit-reconciliation-singleton",
settings: ClusterSingletonProxySettings.Create(_actorSystem)
.WithSingletonName("site-audit-reconciliation"));
_actorSystem.ActorOf(auditReconProxyProps, "site-audit-reconciliation-proxy");
_logger.LogInformation("SiteAuditReconciliationActor singleton created");
_logger.LogInformation("Central actors registered. CentralCommunicationActor created.");
}
@@ -898,6 +1009,18 @@ akka {{
// direction one-way (Host knows both; Communication doesn't reach back
// into AuditLog).
grpcServer?.SetSiteAuditQueue(siteAuditQueue);
// Site Call Audit (#22): hand the site-local OperationTrackingStore to
// the gRPC server so the PullSiteCalls reconciliation RPC can serve
// central's self-heal pulls. siteTrackingStore is resolved above with
// GetService — present on site composition roots, null on central — so
// wire the seam only when the store exists. Like SetSiteAuditQueue, both
// the store and the gRPC server are singletons; wiring here keeps the
// dependency direction one-way (Host knows both; Communication doesn't
// reach back into SiteRuntime).
if (siteTrackingStore is not null)
{
grpcServer?.SetOperationTrackingStore(siteTrackingStore);
}
grpcServer?.SetReady(_actorSystem!);
}
}
@@ -97,6 +97,13 @@ try
// pf_AuditLog_Month forward monthly. Depends on IPartitionMaintenance
// (registered below by AddConfigurationDatabase).
builder.Services.AddAuditLogCentralMaintenance(builder.Configuration);
// #23 M6 Bundle B/C — central-only registration backing the two
// maintenance singletons started in AkkaHostedService: the production
// ISiteEnumerator + IPullAuditEventsClient (gRPC) used by the
// SiteAuditReconciliationActor, plus the AuditLogPurgeOptions /
// SiteAuditReconciliationOptions bindings consumed by both singletons.
// Central-only by design (it dials sites), kept out of AddAuditLog.
builder.Services.AddAuditLogCentralReconciliationClient(builder.Configuration);
// Site Call Audit (#22) — central node owns the SiteCallAuditActor
// singleton (M3 Bundle F). The extension itself currently registers
// nothing — actor Props are constructed inline in AkkaHostedService —
@@ -1,6 +1,7 @@
using Akka.Actor;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
@@ -24,13 +25,17 @@ namespace ZB.MOM.WW.ScadaBridge.SiteCallAudit;
/// <remarks>
/// <para>
/// Implemented: direct <see cref="UpsertSiteCallCommand"/> telemetry ingest,
/// query, detail and KPI handlers (Task 4), and the central→site Retry/Discard
/// relay (Task 5 — the relay handlers live in this actor). Deferred (per
/// CLAUDE.md scope discipline — both land in a later follow-up): the periodic
/// per-site reconciliation puller that backfills lost telemetry, and the daily
/// terminal-row purge scheduler (the repository exposes
/// <c>PurgeTerminalAsync</c> but nothing in this module currently invokes it
/// on a schedule).
/// query, detail and KPI handlers (Task 4), the central→site Retry/Discard
/// relay (Task 5 — the relay handlers live in this actor), the periodic
/// per-site reconciliation puller that backfills lost telemetry (Piece A —
/// <see cref="OnReconciliationTickAsync"/>, the documented self-heal pull), and
/// the daily terminal-row purge scheduler (Piece B —
/// <see cref="OnPurgeTickAsync"/>, which invokes
/// <see cref="ISiteCallAuditRepository.PurgeTerminalAsync"/> on a timer). Both
/// background timers are started in <see cref="PreStart"/> and gate on the
/// reconciliation collaborators (<see cref="IPullSiteCallsClient"/> +
/// <see cref="ISiteEnumerator"/>) being available — the repo-only test ctor
/// injects neither, so neither timer runs there.
/// </para>
/// <para>
/// Per CLAUDE.md "audit-write failure NEVER aborts the user-facing action" —
@@ -68,6 +73,36 @@ public class SiteCallAuditActor : ReceiveActor
private readonly SiteCallAuditOptions _options;
private readonly ILogger<SiteCallAuditActor> _logger;
/// <summary>
/// Reconciliation collaborators (Piece A). The per-site self-heal pull
/// (<see cref="IPullSiteCallsClient"/>) and the site list
/// (<see cref="ISiteEnumerator"/>). On the production path these are
/// resolved once from the root <see cref="IServiceProvider"/> (central
/// singletons registered by <c>AddAuditLogCentralReconciliationClient</c>);
/// in the test path they are injected directly. They are <c>null</c> when
/// the actor was built via the repo-only test ctor — in that case the
/// reconciliation tick is NOT started (see <see cref="StartReconciliationTimer"/>);
/// the purge tick gates on the same collaborators (see <see cref="StartPurgeTimer"/>).
/// </summary>
private readonly IPullSiteCallsClient? _pullClient;
private readonly ISiteEnumerator? _siteEnumerator;
/// <summary>
/// Per-site reconciliation watermark — the highest
/// <see cref="SiteCall.UpdatedAtUtc"/> seen for that site on a previous
/// tick. The next tick asks for rows at or after this cursor; idempotent
/// monotonic <see cref="ISiteCallAuditRepository.UpsertAsync"/> swallows any
/// duplicate-with-same-timestamp rows. In-memory for the singleton's
/// lifetime — a failover / restart resets every cursor to
/// <see cref="DateTime.MinValue"/>, which is conservative but correct
/// (the next tick re-pulls and idempotent upsert dedupes). Mirrors
/// <c>SiteAuditReconciliationActor</c>.
/// </summary>
private readonly Dictionary<string, DateTime> _reconciliationCursors = new();
private ICancelable? _reconciliationTimer;
private ICancelable? _purgeTimer;
/// <summary>
/// Task 5 (#22): the central→site command transport — the
/// <c>CentralCommunicationActor</c>, which owns the per-site
@@ -87,6 +122,11 @@ public class SiteCallAuditActor : ReceiveActor
/// across every message. Used by Bundle C's MSSQL-backed TestKit fixture.
/// An optional <paramref name="options"/> lets a test pin the stuck/KPI
/// windows; when omitted the production defaults apply.
/// <para>
/// This ctor injects NO reconciliation client/enumerator, so the
/// reconciliation tick is gated off (see <see cref="StartReconciliationTimer"/>)
/// — the MSSQL-backed read/upsert tests must not fire phantom pulls.
/// </para>
/// </summary>
/// <param name="repository">Concrete repository instance to use for all messages.</param>
/// <param name="logger">Logger for diagnostics and error reporting.</param>
@@ -106,6 +146,49 @@ public class SiteCallAuditActor : ReceiveActor
RegisterHandlers();
}
/// <summary>
/// Test-mode constructor for the reconciliation tick (Piece A) — injects a
/// concrete repository PLUS the two reconciliation collaborators directly,
/// so the per-site self-heal pull is unit-testable in-memory without a DI
/// container or a live gRPC channel. Because the client + enumerator are
/// present, the reconciliation tick IS started; the purge tick is also
/// started (both gate on the collaborators being available — see
/// <see cref="StartReconciliationTimer"/> / <see cref="StartPurgeTimer"/>).
/// </summary>
/// <param name="repository">Concrete repository instance used for upserts and purges.</param>
/// <param name="siteEnumerator">Enumerates the sites to reconcile each tick.</param>
/// <param name="pullClient">Pull client used to fetch changed rows from each site.</param>
/// <param name="logger">Logger for diagnostics and error reporting.</param>
/// <param name="options">Optional configuration overrides; production defaults apply when null.</param>
/// <remarks>
/// Public (not internal) because Akka's default <c>ActivatorProducer</c>
/// instantiates the actor via reflection with public-only binding flags —
/// an internal ctor yields a <c>MissingMethodException</c> at actor
/// creation. Distinguished from the production <see cref="IServiceProvider"/>
/// ctor by its concrete-collaborator parameter list; only the test project
/// (or a host that hand-resolves the collaborators) constructs it this way.
/// </remarks>
public SiteCallAuditActor(
ISiteCallAuditRepository repository,
ISiteEnumerator siteEnumerator,
IPullSiteCallsClient pullClient,
ILogger<SiteCallAuditActor> logger,
SiteCallAuditOptions? options = null)
{
ArgumentNullException.ThrowIfNull(repository);
ArgumentNullException.ThrowIfNull(siteEnumerator);
ArgumentNullException.ThrowIfNull(pullClient);
ArgumentNullException.ThrowIfNull(logger);
_injectedRepository = repository;
_siteEnumerator = siteEnumerator;
_pullClient = pullClient;
_logger = logger;
_options = options ?? new SiteCallAuditOptions();
RegisterHandlers();
}
/// <summary>
/// Production constructor — resolves <see cref="ISiteCallAuditRepository"/>
/// from a fresh DI scope per message because the repository is a scoped EF
@@ -129,6 +212,17 @@ public class SiteCallAuditActor : ReceiveActor
_options = options;
_logger = logger;
// Reconciliation collaborators (Piece A) are central-only singletons
// registered by AddAuditLogCentralReconciliationClient — always on the
// central composition root (Program.cs). Resolve them once here (the
// actor itself is a long-lived singleton; the repository is the only
// scoped service and is still resolved per-tick/per-message). GetService
// (not GetRequiredService) so a host that somehow omits the helper
// degrades to "no reconciliation tick" rather than a startup crash —
// the tick startup gates on both being non-null.
_pullClient = serviceProvider.GetService<IPullSiteCallsClient>();
_siteEnumerator = serviceProvider.GetService<ISiteEnumerator>();
RegisterHandlers();
}
@@ -154,6 +248,75 @@ public class SiteCallAuditActor : ReceiveActor
});
Receive<RetrySiteCallRequest>(HandleRetrySiteCall);
Receive<DiscardSiteCallRequest>(HandleDiscardSiteCall);
// Piece A/B (#22): self-ticks for the periodic reconciliation pull and
// the daily terminal-row purge. Handlers stay alive across faults via
// their own per-site / per-tick try/catch (mirroring the ingest path);
// the timers are only started when their collaborators are available.
ReceiveAsync<ReconciliationTick>(_ => OnReconciliationTickAsync());
ReceiveAsync<PurgeTick>(_ => OnPurgeTickAsync());
}
/// <inheritdoc />
protected override void PreStart()
{
base.PreStart();
StartReconciliationTimer();
StartPurgeTimer();
}
/// <inheritdoc />
protected override void PostStop()
{
_reconciliationTimer?.Cancel();
_purgeTimer?.Cancel();
base.PostStop();
}
/// <summary>
/// Starts the periodic reconciliation tick — but ONLY when both the pull
/// client and the site enumerator are available. The repo-only test ctor
/// injects neither, so the tick is gated off there (the MSSQL read/upsert
/// tests must not fire phantom pulls); the reconciliation test ctor and the
/// production ctor (which resolves both from the SP) start it.
/// </summary>
private void StartReconciliationTimer()
{
if (_pullClient is null || _siteEnumerator is null)
{
return;
}
var interval = _options.ResolvedReconciliationInterval;
_reconciliationTimer = Context.System.Scheduler.ScheduleTellRepeatedlyCancelable(
initialDelay: interval,
interval: interval,
receiver: Self,
message: ReconciliationTick.Instance,
sender: Self);
}
/// <summary>
/// Starts the daily purge tick — gated on the same collaborator presence as
/// the reconciliation tick. The purge itself only needs the repository, but
/// gating both schedulers together keeps the repo-only test ctor (no
/// client/enumerator) free of BOTH background timers, so the MSSQL read/
/// upsert tests see no scheduled side effects.
/// </summary>
private void StartPurgeTimer()
{
if (_pullClient is null || _siteEnumerator is null)
{
return;
}
var interval = _options.ResolvedPurgeInterval;
_purgeTimer = Context.System.Scheduler.ScheduleTellRepeatedlyCancelable(
initialDelay: interval,
interval: interval,
receiver: Self,
message: PurgeTick.Instance,
sender: Self);
}
/// <inheritdoc />
@@ -212,6 +375,228 @@ public class SiteCallAuditActor : ReceiveActor
}
}
// ── Piece A: periodic per-site reconciliation pull (self-heal) ──
/// <summary>
/// One reconciliation pass: enumerate every known site and, per site, pull
/// changed <see cref="SiteCall"/> rows since that site's cursor and upsert
/// them idempotently — the documented self-heal when best-effort gRPC push
/// telemetry is lost. This is a mirror, NOT a dispatcher: cached-call
/// delivery stays site-local; upserting reconciled rows only refreshes the
/// eventually-consistent central <c>SiteCalls</c> mirror.
/// </summary>
/// <remarks>
/// Mirrors <c>SiteAuditReconciliationActor</c>'s structure (per-site cursor,
/// per-site try/catch failure isolation, advance the cursor by the max
/// observed <see cref="SiteCall.UpdatedAtUtc"/>) but is deliberately simpler:
/// no stalled-detection EventStream machinery — just cursor + pull + upsert
/// + advance. One DI scope per tick is opened and the same repository reused
/// across every site in that tick.
/// </remarks>
private async Task OnReconciliationTickAsync()
{
// The collaborators are guaranteed non-null: the tick is only scheduled
// when both are present (StartReconciliationTimer). Assert via the
// local copies so a future refactor that drops the gate fails loudly.
var enumerator = _siteEnumerator!;
var client = _pullClient!;
IReadOnlyList<SiteEntry> sites;
try
{
// No ambient CancellationToken in a ReceiveActor handler — None is
// intentional; the work is bounded by the reconciliation interval
// plus the singleton's graceful-stop drain on PhaseClusterLeave.
sites = await enumerator.EnumerateAsync().ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "SiteCallAudit site enumeration failed; skipping reconciliation tick.");
return;
}
if (sites.Count == 0)
{
return;
}
// AuditLog-003: open the scope INLINE with CreateAsyncScope + await using
// so the scoped EF Core repository (an IAsyncDisposable DbContext) disposes
// asynchronously at end of tick rather than blocking the Akka dispatcher
// thread on a synchronous Dispose() of pending connection cleanup — the tick
// holds the scope across many awaited UpsertAsync calls. Mirrors the sibling
// SiteAuditReconciliationActor.OnTickAsync. ResolveRepository() (sync Dispose)
// is retained for the synchronous message-handler paths. In the injected-
// repository test path there is no scope to open and the test repo is reused.
if (_injectedRepository is not null)
{
await ReconcileSitesAsync(sites, client, _injectedRepository).ConfigureAwait(false);
return;
}
await using var scope = _serviceProvider!.CreateAsyncScope();
var repository = scope.ServiceProvider.GetRequiredService<ISiteCallAuditRepository>();
await ReconcileSitesAsync(sites, client, repository).ConfigureAwait(false);
}
/// <summary>
/// Reconciles every site in the tick against a single resolved repository,
/// isolating per-site faults so one bad site never sinks the rest of the
/// pass (the failing site's cursor is left at its previous value so the next
/// tick retries the same window).
/// </summary>
private async Task ReconcileSitesAsync(
IReadOnlyList<SiteEntry> sites, IPullSiteCallsClient client, ISiteCallAuditRepository repository)
{
foreach (var site in sites)
{
try
{
await ReconcileSiteAsync(site, client, repository).ConfigureAwait(false);
}
catch (Exception ex)
{
// Failure-isolation invariant: one site's fault (transport,
// repository write) must NOT sink the rest of the tick. The
// failing site's cursor is left at its previous value so the
// next tick retries the same window.
_logger.LogWarning(
ex,
"SiteCallAudit reconciliation pull failed for site {SiteId}; other sites continue.",
site.SiteId);
}
}
}
/// <summary>
/// Issues one <c>PullSiteCalls</c> RPC against the site, upserts the
/// returned rows idempotently, and advances the site's cursor to the maximum
/// <see cref="SiteCall.UpdatedAtUtc"/> observed. The pull client returns rows
/// oldest-first with <c>SourceSite</c> already re-stamped from the dialed
/// site id, so the actor upserts them verbatim (re-stamping
/// <c>IngestedAtUtc</c> at central persist time, as the telemetry path does).
/// </summary>
/// <remarks>
/// <para>
/// <b>Coarse per-site retry — a deliberate divergence from
/// <c>SiteAuditReconciliationActor</c>.</b> That sibling (AuditLog-004) tracks
/// a per-EventId attempt counter and permanently abandons a row after a
/// threshold so a single un-insertable row cannot block a site's cursor
/// forever. This actor deliberately does NOT: any throw inside the loop
/// propagates to <see cref="OnReconciliationTickAsync"/>'s per-site catch,
/// which leaves the site's cursor at its previous value, so the next tick
/// re-pulls the whole batch from <c>since</c>. A persistently-bad row therefore
/// holds the site's cursor and re-pulls the batch every tick. This is
/// acceptable here because <see cref="ISiteCallAuditRepository.UpsertAsync"/> is
/// monotonic and idempotent — re-pulling already-ingested rows is a cheap
/// no-op — and the <c>SiteCalls</c> table is an eventually-consistent mirror,
/// not the source of truth, so a slow site simply lags rather than corrupts.
/// </para>
/// <para>
/// <b>Inclusive cursor boundary.</b> The cursor is advanced to the maximum
/// <see cref="SiteCall.UpdatedAtUtc"/> seen, and the pull asks for rows at or
/// after it (<c>since</c> is <c>&gt;=</c>, not <c>&gt;</c>). The row whose
/// timestamp equals the cursor is therefore re-pulled on the next tick and
/// deduplicated by the idempotent monotonic upsert — the same inclusive-boundary
/// contract as <c>SiteAuditReconciliationActor</c>'s cursor.
/// </para>
/// </remarks>
private async Task ReconcileSiteAsync(
SiteEntry site, IPullSiteCallsClient client, ISiteCallAuditRepository repository)
{
var since = _reconciliationCursors.TryGetValue(site.SiteId, out var c) ? c : DateTime.MinValue;
var response = await client
.PullAsync(site.SiteId, since, _options.ReconciliationBatchSize, CancellationToken.None)
.ConfigureAwait(false);
var maxUpdated = since;
var nowUtc = DateTime.UtcNow;
foreach (var row in response.SiteCalls)
{
// IngestedAtUtc is the "central ingested (or last refreshed) this
// row" stamp — owned by the central actor, exactly as OnUpsertAsync
// does for the telemetry path. Monotonic UpsertAsync makes a row
// already present (from a prior push) a silent no-op.
var siteCall = row with { IngestedAtUtc = nowUtc };
await repository.UpsertAsync(siteCall).ConfigureAwait(false);
if (row.UpdatedAtUtc > maxUpdated)
{
maxUpdated = row.UpdatedAtUtc;
}
}
// Advance the cursor to the newest row seen. A MoreAvailable response
// means the site saturated the batch; the next tick continues draining
// from the advanced cursor (no immediate re-pull loop — the natural
// tick cadence drains the backlog, matching SiteAuditReconciliationActor).
_reconciliationCursors[site.SiteId] = maxUpdated;
}
// ── Piece B: daily terminal-row purge scheduler ──
/// <summary>
/// One purge pass: drops terminal <c>SiteCalls</c> rows whose
/// <see cref="SiteCall.TerminalAtUtc"/> is older than
/// <c>UtcNow - RetentionDays</c> via
/// <see cref="ISiteCallAuditRepository.PurgeTerminalAsync"/>. Non-terminal
/// rows are never purged (enforced in the repository). The threshold is
/// computed each tick so an operator who lowers <c>RetentionDays</c> sees it
/// applied on the next purge without an actor restart. Mirrors
/// <c>AuditLogPurgeActor</c>'s daily cadence + continue-on-error posture: a
/// purge fault is logged and swallowed so the singleton stays alive.
/// </summary>
private async Task OnPurgeTickAsync()
{
var threshold = DateTime.UtcNow - TimeSpan.FromDays(_options.RetentionDays);
// AuditLog-003: open the scope INLINE with CreateAsyncScope + await using
// so the scoped EF Core repository (an IAsyncDisposable DbContext) disposes
// asynchronously rather than blocking the Akka dispatcher thread on a
// synchronous Dispose(). Mirrors SiteAuditReconciliationActor; the
// injected-repository test path reuses the test repo with no scope.
if (_injectedRepository is not null)
{
await PurgeWithRepositoryAsync(_injectedRepository, threshold).ConfigureAwait(false);
return;
}
await using var scope = _serviceProvider!.CreateAsyncScope();
var repository = scope.ServiceProvider.GetRequiredService<ISiteCallAuditRepository>();
await PurgeWithRepositoryAsync(repository, threshold).ConfigureAwait(false);
}
/// <summary>
/// Runs one terminal-row purge against the resolved repository, logging and
/// swallowing any fault (continue-on-error) so a transient SQL failure or
/// contention never crashes the central singleton — the next tick retries
/// the same window.
/// </summary>
private async Task PurgeWithRepositoryAsync(ISiteCallAuditRepository repository, DateTime threshold)
{
try
{
var rowsDeleted = await repository.PurgeTerminalAsync(threshold).ConfigureAwait(false);
if (rowsDeleted > 0)
{
_logger.LogInformation(
"SiteCallAudit purged {RowsDeleted} terminal SiteCalls rows older than {ThresholdUtc:o}.",
rowsDeleted,
threshold);
}
}
catch (Exception ex)
{
// Continue-on-error: a purge fault (transient SQL failure,
// contention) must NOT crash the central singleton. The next tick
// retries the same window.
_logger.LogError(
ex,
"SiteCallAudit terminal-row purge failed (threshold {ThresholdUtc:o}); will retry next tick.",
threshold);
}
}
// ── Task 4: read-side (query / detail / KPI) ──
/// <summary>
@@ -693,6 +1078,20 @@ public class SiteCallAuditActor : ReceiveActor
{
return string.IsNullOrWhiteSpace(value) ? null : value;
}
/// <summary>Self-tick triggering a reconciliation pass across all sites (Piece A).</summary>
internal sealed class ReconciliationTick
{
public static readonly ReconciliationTick Instance = new();
private ReconciliationTick() { }
}
/// <summary>Self-tick triggering a terminal-row purge pass (Piece B).</summary>
internal sealed class PurgeTick
{
public static readonly PurgeTick Instance = new();
private PurgeTick() { }
}
}
/// <summary>
@@ -1,11 +1,13 @@
namespace ZB.MOM.WW.ScadaBridge.SiteCallAudit;
/// <summary>
/// Configuration options for the Site Call Audit (#22) read-side: stuck-call
/// detection and KPI windowing. Mirrors the KPI-relevant subset of
/// <c>NotificationOutboxOptions</c> — the reconciliation, purge and dispatch
/// cadence options the Notification Outbox carries are not part of the Site
/// Call Audit read-side backend and are deliberately omitted here.
/// Configuration options for the Site Call Audit (#22): stuck-call detection +
/// KPI windowing for the read-side, plus the cadence/retention knobs for the
/// two central-singleton schedulers — the periodic per-site reconciliation
/// pull (self-heal for lost telemetry) and the daily terminal-row purge.
/// Mirrors the KPI-relevant subset of <c>NotificationOutboxOptions</c> and the
/// scheduler-cadence shape of <c>SiteAuditReconciliationOptions</c> /
/// <c>AuditLogPurgeOptions</c>.
/// </summary>
public class SiteCallAuditOptions
{
@@ -44,4 +46,99 @@ public class SiteCallAuditOptions
/// </para>
/// </summary>
public TimeSpan RelayTimeout { get; set; } = TimeSpan.FromSeconds(10);
// ── Reconciliation tick (#22): periodic per-site self-heal pull ──
/// <summary>
/// Period of the reconciliation tick. Each tick visits every known site
/// once, pulls changed <c>SiteCall</c> rows since a per-site cursor, and
/// upserts them idempotently — the documented self-heal when best-effort
/// push telemetry is lost. Default 5 minutes, matching the sibling
/// <c>SiteAuditReconciliationOptions</c> (#23) cadence. Clamped to at least
/// <see cref="MinReconciliationInterval"/> via <see cref="ReconciliationInterval"/>.
/// </summary>
public TimeSpan ReconciliationInterval { get; set; } = TimeSpan.FromMinutes(5);
/// <summary>
/// Test-only override for the reconciliation tick cadence — bypasses the
/// <see cref="MinReconciliationInterval"/> clamp so unit tests can drop the
/// cadence to milliseconds. Production config never sets this; leave null.
/// </summary>
public TimeSpan? ReconciliationIntervalOverride { get; set; }
/// <summary>
/// Maximum number of <c>SiteCall</c> rows requested per <c>PullSiteCalls</c>
/// RPC. Default 500. A <c>MoreAvailable=true</c> response signals the cursor
/// advanced and the next tick should keep draining the backlog.
/// </summary>
public int ReconciliationBatchSize { get; set; } = 500;
/// <summary>
/// Minimum interval the config-bound <see cref="ReconciliationInterval"/> can
/// resolve to. Clamps a misconfigured <c>0</c> (or negative) value away from
/// <see cref="TimeSpan.Zero"/>, which would make Akka's
/// <c>ScheduleTellRepeatedlyCancelable</c> spin — the exact footgun flagged in
/// a prior review of the sibling reconciliation options.
/// </summary>
private static readonly TimeSpan MinReconciliationInterval = TimeSpan.FromSeconds(1);
/// <summary>
/// Resolves the effective reconciliation tick interval: the test override
/// when set (bypassing the clamp), otherwise <see cref="ReconciliationInterval"/>
/// clamped to at least <see cref="MinReconciliationInterval"/> so a
/// zero/negative config value can never yield <see cref="TimeSpan.Zero"/>.
/// </summary>
public TimeSpan ResolvedReconciliationInterval =>
ReconciliationIntervalOverride is { } o
? o
: ReconciliationInterval < MinReconciliationInterval
? MinReconciliationInterval
: ReconciliationInterval;
// ── Purge scheduler (#22): daily terminal-row purge ──
/// <summary>
/// Period of the purge tick. Each tick drops terminal <c>SiteCalls</c> rows
/// older than the retention window via
/// <see cref="ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories.ISiteCallAuditRepository.PurgeTerminalAsync"/>.
/// Default 24 hours, matching <c>AuditLogPurgeOptions</c>. Clamped to at
/// least <see cref="MinPurgeInterval"/> via <see cref="ResolvedPurgeInterval"/>.
/// </summary>
public TimeSpan PurgeInterval { get; set; } = TimeSpan.FromHours(24);
/// <summary>
/// Test-only override for the purge tick cadence — bypasses the
/// <see cref="MinPurgeInterval"/> clamp so unit tests can drop the cadence
/// to milliseconds. Production config never sets this; leave null.
/// </summary>
public TimeSpan? PurgeIntervalOverride { get; set; }
/// <summary>
/// Retention window for terminal rows. On each purge tick a row whose
/// <c>TerminalAtUtc</c> is older than <c>UtcNow - RetentionDays</c> is
/// deleted; non-terminal rows are never purged. Default 365 days, matching
/// the central audit-store retention policy.
/// </summary>
public int RetentionDays { get; set; } = 365;
/// <summary>
/// Minimum interval the config-bound <see cref="PurgeInterval"/> can resolve
/// to. Clamps a misconfigured <c>0</c> (or negative) value away from
/// <see cref="TimeSpan.Zero"/> for the same scheduler-spin reason as
/// <see cref="MinReconciliationInterval"/>; the purge is daily so the floor
/// is a more generous 1 minute.
/// </summary>
private static readonly TimeSpan MinPurgeInterval = TimeSpan.FromMinutes(1);
/// <summary>
/// Resolves the effective purge tick interval: the test override when set
/// (bypassing the clamp), otherwise <see cref="PurgeInterval"/> clamped to at
/// least <see cref="MinPurgeInterval"/>.
/// </summary>
public TimeSpan ResolvedPurgeInterval =>
PurgeIntervalOverride is { } o
? o
: PurgeInterval < MinPurgeInterval
? MinPurgeInterval
: PurgeInterval;
}
@@ -29,6 +29,15 @@
the same transport every other central→site command uses. SiteEnvelope is defined
in ZB.MOM.WW.ScadaBridge.Communication (no cycle: Communication does not reference SiteCallAudit). -->
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.Communication/ZB.MOM.WW.ScadaBridge.Communication.csproj" />
<!-- Reconciliation tick (#22): the per-site PullSiteCalls self-heal pull resolves
IPullSiteCallsClient + ISiteEnumerator (both central-only singletons registered by
AddAuditLogCentralReconciliationClient) from the actor's root IServiceProvider. They live
in ZB.MOM.WW.ScadaBridge.AuditLog.Central so the SiteCall pull client reuses the shared
SiteEntry enumerator the sibling IPullAuditEventsClient already uses. No cycle: AuditLog
references only Commons / ConfigurationDatabase / Communication — none of which reference
SiteCallAudit. Preferred over moving the interfaces into Commons (Commons has no Akka /
Communication dependency and would have to carry a Communication-adjacent message). -->
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.AuditLog/ZB.MOM.WW.ScadaBridge.AuditLog.csproj" />
</ItemGroup>
<ItemGroup>
@@ -11,7 +11,7 @@ public interface ISiteEventLogger
/// <see cref="Task"/> completes once the event is durably persisted and faults if
/// the write fails, so callers that <c>await</c> it observe success or failure.
/// </summary>
/// <param name="eventType">Category: script, alarm, deployment, connection, store_and_forward, instance_lifecycle</param>
/// <param name="eventType">Category: script, alarm, deployment, connection, store_and_forward, instance_lifecycle, notification</param>
/// <param name="severity">Info, Warning, or Error</param>
/// <param name="instanceId">Optional instance ID associated with the event</param>
/// <param name="source">Source identifier, e.g., "ScriptActor:MonitorSpeed"</param>
@@ -1,10 +1,12 @@
using Akka.Actor;
using Microsoft.CodeAnalysis.Scripting;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
using System.Globalization;
using System.Text.Json;
@@ -37,6 +39,25 @@ public class AlarmActor : ReceiveActor
private readonly SiteRuntimeOptions _options;
private readonly ILogger _logger;
private readonly ISiteHealthCollector? _healthCollector;
private readonly IServiceProvider? _serviceProvider;
/// <summary>
/// M1.5: the optional site operational-event log, resolved once from
/// <see cref="_serviceProvider"/> at construction and cached. The
/// registration is process-lifetime (a singleton), so resolving once on
/// the actor's own thread is both correct and cheaper than a per-event
/// <c>GetService</c> on the hot path. <c>null</c> when no provider was
/// supplied (the test/no-logging path) — <see cref="LogAlarmEvent"/> then
/// no-ops.
/// </summary>
private readonly ISiteEventLogger? _siteEventLogger;
/// <summary>
/// M1.5: priority at or above which a computed-alarm raise is logged as
/// <c>Error</c> to the site event log; below it, raises log as <c>Warning</c>.
/// Mirrors the 01000 alarm-severity scale.
/// </summary>
private const int ErrorPriorityThreshold = 700;
private AlarmState _currentState = AlarmState.Normal;
/// <summary>
@@ -83,6 +104,9 @@ public class AlarmActor : ReceiveActor
/// <param name="compiledTriggerExpression">Pre-compiled trigger expression, or <c>null</c> for non-expression triggers.</param>
/// <param name="initialAttributes">Seed attribute snapshot so static attributes evaluate correctly at startup.</param>
/// <param name="healthCollector">Optional health collector for surfacing alarm execution metrics.</param>
/// <param name="serviceProvider">Optional DI service provider used to resolve the optional
/// <see cref="ISiteEventLogger"/> for M1.5 <c>alarm</c> operational events. Fire-and-forget;
/// a logging failure never affects alarm evaluation.</param>
public AlarmActor(
string alarmName,
string instanceName,
@@ -94,7 +118,8 @@ public class AlarmActor : ReceiveActor
ILogger logger,
Script<object?>? compiledTriggerExpression = null,
IReadOnlyDictionary<string, object?>? initialAttributes = null,
ISiteHealthCollector? healthCollector = null)
ISiteHealthCollector? healthCollector = null,
IServiceProvider? serviceProvider = null)
{
_alarmName = alarmName;
_instanceName = instanceName;
@@ -103,6 +128,10 @@ public class AlarmActor : ReceiveActor
_options = options;
_logger = logger;
_healthCollector = healthCollector;
_serviceProvider = serviceProvider;
// M1.5: resolve the optional site event logger once and cache it,
// rather than calling GetService on every alarm transition.
_siteEventLogger = serviceProvider?.GetService<ISiteEventLogger>();
_priority = alarmConfig.PriorityLevel;
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
_onTriggerCompiledScript = onTriggerCompiledScript;
@@ -208,6 +237,9 @@ public class AlarmActor : ReceiveActor
_instanceName, _alarmName, AlarmState.Active, _priority, DateTimeOffset.UtcNow);
_instanceActor.Tell(alarmChanged);
// M1.5: operational `alarm` event — raise. Severity by priority.
LogAlarmEvent(RaiseSeverity(_priority), $"Alarm {_alarmName} activated (priority {_priority})");
// Spawn AlarmExecutionActor if on-trigger script defined
if (_onTriggerCompiledScript != null)
{
@@ -225,6 +257,9 @@ public class AlarmActor : ReceiveActor
var alarmChanged = new AlarmStateChanged(
_instanceName, _alarmName, AlarmState.Normal, _priority, DateTimeOffset.UtcNow);
_instanceActor.Tell(alarmChanged);
// M1.5: operational `alarm` event — return to normal.
LogAlarmEvent("Info", $"Alarm {_alarmName} cleared");
}
}
catch (Exception ex)
@@ -265,6 +300,24 @@ public class AlarmActor : ReceiveActor
};
_instanceActor.Tell(alarmChanged);
// M1.5: operational `alarm` event. Entering a band from Normal is a raise
// (severity by the band's priority); returning to None is a clear; a
// level-to-level escalation/de-escalation is an informational transition.
if (newLevel == AlarmLevel.None)
{
LogAlarmEvent("Info", $"Alarm {_alarmName} cleared ({previousLevel} → Normal)");
}
else if (previousLevel == AlarmLevel.None)
{
LogAlarmEvent(RaiseSeverity(priority),
$"Alarm {_alarmName} activated at {newLevel} (priority {priority})");
}
else
{
LogAlarmEvent("Info",
$"Alarm {_alarmName} transitioned {previousLevel} → {newLevel} (priority {priority})");
}
if (previousLevel == AlarmLevel.None
&& newLevel != AlarmLevel.None
&& _onTriggerCompiledScript != null)
@@ -273,6 +326,28 @@ public class AlarmActor : ReceiveActor
}
}
/// <summary>
/// M1.5: maps an alarm priority (01000) to a site-event severity for a
/// <i>raise</i> transition — <c>Error</c> at or above
/// <see cref="ErrorPriorityThreshold"/>, otherwise <c>Warning</c>. Clears and
/// inter-band transitions always log as <c>Info</c>.
/// </summary>
private static string RaiseSeverity(int priority) =>
priority >= ErrorPriorityThreshold ? "Error" : "Warning";
/// <summary>
/// M1.5: fire-and-forget an <c>alarm</c> operational event to the optional
/// <see cref="ISiteEventLogger"/> (resolved once at construction and cached
/// in <see cref="_siteEventLogger"/>). Never awaited so a logging failure
/// cannot affect alarm evaluation (matching the established
/// ScriptActor/ScriptExecutionActor pattern).
/// </summary>
private void LogAlarmEvent(string severity, string message)
{
_ = _siteEventLogger?.LogEventAsync(
"alarm", severity, _instanceName, $"AlarmActor:{_alarmName}", message);
}
/// <summary>
/// Returns the per-setpoint priority for the given level. Falls back to
/// the alarm-level <see cref="_priority"/> when the HiLo config did not
@@ -1,4 +1,5 @@
using Akka.Actor;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Artifacts;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
@@ -10,6 +11,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Messages.Management;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.ScriptExecution;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Messages;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
@@ -456,6 +458,10 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
{
if (result.Success)
{
// M1.6: operational `deployment` event — deploy succeeded.
LogDeploymentEvent("Info", result.InstanceName,
$"Instance {result.InstanceName} deployed (deploymentId={result.DeploymentId})");
result.OriginalSender.Tell(new DeploymentStatusResponse(
result.DeploymentId,
result.InstanceName,
@@ -469,6 +475,11 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
"Failed to persist deployment {DeploymentId} for {Instance}: {Error}",
result.DeploymentId, result.InstanceName, result.Error);
// M1.6: operational `deployment` event — deploy failed.
LogDeploymentEvent("Error", result.InstanceName,
$"Instance {result.InstanceName} deploy failed (deploymentId={result.DeploymentId})",
result.Error);
// Persistence failed — undo the optimistic actor creation and counter bump so
// the site does not advertise an instance it cannot durably recover.
if (_instanceActors.Remove(result.InstanceName, out var orphan))
@@ -504,7 +515,17 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
_storage.SetInstanceEnabledAsync(instanceName, false).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
{
_replicationActor?.Tell(new ReplicateConfigSetEnabled(instanceName, false));
// M1.6: operational `deployment` event — disable succeeded.
LogDeploymentEvent("Info", instanceName, $"Instance {instanceName} disabled");
}
else
{
LogDeploymentEvent("Error", instanceName,
$"Instance {instanceName} disable failed",
t.Exception?.GetBaseException().Message);
}
return new InstanceLifecycleResponse(
command.CommandId,
@@ -551,6 +572,9 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
if (result.Error != null || result.Config == null)
{
var error = result.Error ?? $"No deployed config found for {instanceName}";
// M1.6: operational `deployment` event — enable failed.
LogDeploymentEvent("Error", instanceName,
$"Instance {instanceName} enable failed", error);
result.OriginalSender.Tell(new InstanceLifecycleResponse(
result.Command.CommandId, instanceName, false, error, DateTimeOffset.UtcNow));
return;
@@ -562,6 +586,9 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
}
UpdateInstanceCounts();
// M1.6: operational `deployment` event — enable succeeded.
LogDeploymentEvent("Info", instanceName, $"Instance {instanceName} enabled");
result.OriginalSender.Tell(new InstanceLifecycleResponse(
result.Command.CommandId, instanceName, true, null, DateTimeOffset.UtcNow));
@@ -588,7 +615,17 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
_storage.RemoveDeployedConfigAsync(instanceName).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
{
_replicationActor?.Tell(new ReplicateConfigRemove(instanceName));
// M1.6: operational `deployment` event — delete succeeded.
LogDeploymentEvent("Info", instanceName, $"Instance {instanceName} deleted");
}
else
{
LogDeploymentEvent("Error", instanceName,
$"Instance {instanceName} delete failed",
t.Exception?.GetBaseException().Message);
}
return new InstanceLifecycleResponse(
command.CommandId,
@@ -601,6 +638,30 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
_logger.LogInformation("Instance {Instance} deleted", instanceName);
}
/// <summary>
/// M1.6: fire-and-forget a <c>deployment</c> operational event to the optional
/// <see cref="ISiteEventLogger"/> on a deploy/enable/disable/delete outcome.
/// Resolved optionally and never awaited so a logging failure cannot affect the
/// deployment pipeline (matching the established ScriptActor/ScriptExecutionActor
/// pattern).
/// <para>
/// <b>Thread-safety:</b> the disable (<see cref="HandleDisable"/>) and delete
/// (<see cref="HandleDelete"/>) paths call this from a
/// <see cref="System.Threading.Tasks.Task.ContinueWith(System.Action{System.Threading.Tasks.Task})"/>
/// continuation that runs on a thread-pool thread, NOT on the actor thread —
/// so it must touch only immutable, thread-safe state. It does: the only
/// field it reads is the <c>readonly _serviceProvider</c> captured at
/// construction (the resolved <see cref="ISiteEventLogger"/> is a process
/// singleton). No actor-private mutable state is referenced, which is what
/// makes calling it off the actor thread safe.
/// </para>
/// </summary>
private void LogDeploymentEvent(string severity, string instanceName, string message, string? details = null)
{
_ = _serviceProvider?.GetService<ISiteEventLogger>()?.LogEventAsync(
"deployment", severity, instanceName, "DeploymentManagerActor", message, details);
}
/// <summary>
/// DeploymentManager-006: answers a central query for the instance's
/// currently-applied deployment identity. The site's deployed-config store
@@ -1,5 +1,6 @@
using Akka.Actor;
using Microsoft.CodeAnalysis.Scripting;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DataConnection;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DebugView;
@@ -9,6 +10,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Streaming;
@@ -164,6 +166,11 @@ public class InstanceActor : ReceiveActor
base.PreStart();
_logger.LogInformation("InstanceActor started for {Instance}", _instanceUniqueName);
// M1.6: operational `instance_lifecycle` event — instance started.
// An instance starts on deploy, on enable (DeploymentManager re-creates
// the actor), and on failover/restart; this single point covers them all.
LogLifecycleEvent($"Instance {_instanceUniqueName} started");
// Asynchronously load static overrides from SQLite and pipe to self
var self = Self;
_storage.GetStaticOverridesAsync(_instanceUniqueName).ContinueWith(t =>
@@ -180,6 +187,29 @@ public class InstanceActor : ReceiveActor
SubscribeToDcl();
}
/// <inheritdoc />
protected override void PostStop()
{
// M1.6: operational `instance_lifecycle` event — instance stopped. An
// instance stops on disable, delete, redeployment, and graceful shutdown;
// this single point covers them all.
LogLifecycleEvent($"Instance {_instanceUniqueName} stopped");
base.PostStop();
}
/// <summary>
/// M1.6: fire-and-forget an <c>instance_lifecycle</c> operational event to the
/// optional <see cref="ISiteEventLogger"/>. Resolved optionally and never
/// awaited so a logging failure cannot affect the instance lifecycle
/// (matching the established ScriptActor/ScriptExecutionActor pattern).
/// </summary>
private void LogLifecycleEvent(string message)
{
_ = _serviceProvider?.GetService<ISiteEventLogger>()?.LogEventAsync(
"instance_lifecycle", "Info", _instanceUniqueName,
$"InstanceActor:{_instanceUniqueName}", message);
}
/// <inheritdoc />
protected override SupervisorStrategy SupervisorStrategy()
{
@@ -763,7 +793,8 @@ public class InstanceActor : ReceiveActor
_logger,
triggerExpression,
attributeSnapshot,
_healthCollector));
_healthCollector,
_serviceProvider));
var actorRef = Context.ActorOf(props, $"alarm-{alarm.CanonicalName}");
_alarmActors[alarm.CanonicalName] = actorRef;
@@ -793,7 +824,8 @@ public class InstanceActor : ReceiveActor
_storage,
_options,
_logger,
nativeKind));
nativeKind,
_serviceProvider));
var actorRef = Context.ActorOf(props, $"native-alarm-{nativeSource.CanonicalName}");
_nativeAlarmActors[nativeSource.CanonicalName] = actorRef;
@@ -1,11 +1,13 @@
using System.Text.Json;
using Akka.Actor;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DataConnection;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Alarms;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
@@ -35,6 +37,14 @@ public class NativeAlarmActor : ReceiveActor
private readonly SiteRuntimeOptions _options;
private readonly ILogger _logger;
private readonly AlarmKind _nativeKind;
private readonly IServiceProvider? _serviceProvider;
/// <summary>
/// M1.5: severity at or above which a native-alarm raise is logged as
/// <c>Error</c> to the site event log; below it, raises log as <c>Warning</c>.
/// Mirrors the 01000 condition-severity scale.
/// </summary>
private const int ErrorSeverityThreshold = 700;
/// <summary>Current mirrored conditions, keyed by source reference.</summary>
private readonly Dictionary<string, NativeAlarmTransition> _alarms = new();
@@ -54,6 +64,9 @@ public class NativeAlarmActor : ReceiveActor
/// <param name="logger">Logger for diagnostics.</param>
/// <param name="nativeKind">Alarm kind to stamp on emitted events (OPC UA vs MxAccess); set by the
/// Instance Actor from the connection protocol. Defaults to <see cref="AlarmKind.NativeOpcUa"/>.</param>
/// <param name="serviceProvider">Optional DI service provider used to resolve the optional
/// <see cref="ISiteEventLogger"/> for M1.5 <c>alarm</c> operational events. Fire-and-forget;
/// a logging failure never affects the mirror.</param>
public NativeAlarmActor(
ResolvedNativeAlarmSource source,
string instanceName,
@@ -62,7 +75,8 @@ public class NativeAlarmActor : ReceiveActor
SiteStorageService storage,
SiteRuntimeOptions options,
ILogger logger,
AlarmKind nativeKind = AlarmKind.NativeOpcUa)
AlarmKind nativeKind = AlarmKind.NativeOpcUa,
IServiceProvider? serviceProvider = null)
{
_source = source;
_instanceName = instanceName;
@@ -72,6 +86,7 @@ public class NativeAlarmActor : ReceiveActor
_options = options;
_logger = logger;
_nativeKind = nativeKind;
_serviceProvider = serviceProvider;
Receive<RehydrationCompleted>(HandleRehydration);
Receive<NativeAlarmTransitionUpdate>(HandleTransition);
@@ -150,7 +165,10 @@ public class NativeAlarmActor : ReceiveActor
condition, string.Empty, string.Empty, string.Empty, string.Empty, string.Empty,
null, row.LastTransitionAt, string.Empty, string.Empty);
_alarms[row.SourceReference] = t;
Emit(t, t.Condition);
// M1.5: rehydration replays last-known state on (re)start — surface it
// upward for the DebugView but do NOT re-log it as a fresh operational
// event (it is not a live transition).
Emit(t, t.Condition, logSiteEvent: false);
}
}
@@ -194,7 +212,14 @@ public class NativeAlarmActor : ReceiveActor
{
_alarms[sourceRef] = t;
PersistUpsert(t);
Emit(t, t.Condition);
// M1.5: a snapshot replay is a re-sync of the source's current
// active set on (re)subscribe, NOT a live transition — surface it
// upward for the DebugView but do NOT re-log an `alarm` operational
// event. Otherwise every DCL reconnect would re-emit an `alarm`
// event for every already-active native condition (the
// synthesised return-to-normal above IS a real state change and
// keeps logSiteEvent: true).
Emit(t, t.Condition, logSiteEvent: false);
}
_snapshotBuffer.Clear();
@@ -277,8 +302,16 @@ public class NativeAlarmActor : ReceiveActor
}
}
/// <summary>Builds and tells the parent an enriched <see cref="AlarmStateChanged"/> for a condition.</summary>
private void Emit(NativeAlarmTransition t, AlarmConditionState condition)
/// <summary>
/// Builds and tells the parent an enriched <see cref="AlarmStateChanged"/> for a condition.
/// </summary>
/// <param name="t">The mirrored transition.</param>
/// <param name="condition">The condition state to surface (may differ from <paramref name="t"/>'s
/// own condition, e.g. a synthesised return-to-normal on snapshot swap).</param>
/// <param name="logSiteEvent">M1.5: when <c>true</c> (live + snapshot transitions), emit an
/// <c>alarm</c> operational event. Suppressed for SQLite rehydration so a node restart does not
/// re-log every last-known condition.</param>
private void Emit(NativeAlarmTransition t, AlarmConditionState condition, bool logSiteEvent = true)
{
var change = new AlarmStateChanged(
_instanceName,
@@ -301,6 +334,49 @@ public class NativeAlarmActor : ReceiveActor
};
_instanceActor.Tell(change);
if (logSiteEvent)
{
LogAlarmEvent(t, condition);
}
}
/// <summary>
/// M1.5: fire-and-forget an <c>alarm</c> operational event mirroring a native
/// condition transition. An active condition is a raise (severity by the
/// condition's severity); an inactive condition is a return-to-normal; an
/// acknowledge transition is informational. Resolved optionally and never
/// awaited so a logging failure cannot affect the mirror (matching the
/// established ScriptActor/ScriptExecutionActor pattern).
/// </summary>
private void LogAlarmEvent(NativeAlarmTransition t, AlarmConditionState condition)
{
var logger = _serviceProvider?.GetService<ISiteEventLogger>();
if (logger == null)
{
return;
}
string severity;
string message;
if (t.Kind == AlarmTransitionKind.Acknowledge)
{
severity = "Info";
message = $"Native alarm {t.SourceReference} acknowledged";
}
else if (condition.Active)
{
severity = condition.Severity >= ErrorSeverityThreshold ? "Error" : "Warning";
message = $"Native alarm {t.SourceReference} active (severity {condition.Severity})";
}
else
{
severity = "Info";
message = $"Native alarm {t.SourceReference} returned to normal";
}
_ = logger.LogEventAsync(
"alarm", severity, _instanceName, $"NativeAlarmActor:{_source.CanonicalName}", message);
}
private void PersistUpsert(NativeAlarmTransition t)
@@ -217,6 +217,13 @@ public class ScriptExecutionActor : ReceiveActor
Scope = scope
};
// M1.8: operational `script` event — execution started. Fire-and-forget
// (the `_ =` discards the task) so the event log can never block or
// fault the script's own run; mirrors the existing Error-path emit.
_ = siteEventLogger?.LogEventAsync(
"script", "Info", instanceName, $"ScriptActor:{scriptName}",
$"Script '{scriptName}' on instance '{instanceName}' started");
var state = await compiledScript.RunAsync(globals, cts.Token);
// Send result to requester if this was an Ask-based call
@@ -225,6 +232,11 @@ public class ScriptExecutionActor : ReceiveActor
replyTo.Tell(new ScriptCallResult(correlationId, true, state.ReturnValue, null));
}
// M1.8: operational `script` event — execution completed successfully.
_ = siteEventLogger?.LogEventAsync(
"script", "Info", instanceName, $"ScriptActor:{scriptName}",
$"Script '{scriptName}' on instance '{instanceName}' completed");
// Notify parent of completion
parent.Tell(new ScriptActor.ScriptExecutionCompleted(scriptName, true, null));
}
@@ -91,6 +91,8 @@ public class OperationTrackingStore : IOperationTrackingStore, IAsyncDisposable,
);
CREATE INDEX IF NOT EXISTS IX_OperationTracking_Status_Updated
ON OperationTracking (Status, UpdatedAtUtc);
CREATE INDEX IF NOT EXISTS IX_OperationTracking_UpdatedAt
ON OperationTracking (UpdatedAtUtc);
""";
cmd.ExecuteNonQuery();
@@ -360,6 +362,84 @@ public class OperationTrackingStore : IOperationTrackingStore, IAsyncDisposable,
}
}
/// <inheritdoc/>
public async Task<IReadOnlyList<SiteCallOperational>> ReadChangedSinceAsync(
DateTime sinceUtc,
int batchSize,
CancellationToken ct = default)
{
ObjectDisposedException.ThrowIf(Volatile.Read(ref _disposeState) != 0, this);
// SiteRuntime-024: like GetStatusAsync, the reconciliation pull opens a
// fresh, ungated read connection so a long-running write never blocks
// central's PullSiteCalls. The query is a bounded, ordered scan served by
// the standalone IX_OperationTracking_UpdatedAt index — UpdatedAtUtc is
// the cursor. (The composite (Status, UpdatedAtUtc) index cannot satisfy a
// status-less UpdatedAtUtc range scan; this dedicated index does.)
await using var readConnection = new SqliteConnection(_connectionString);
await readConnection.OpenAsync(ct).ConfigureAwait(false);
await using var cmd = readConnection.CreateCommand();
// Inclusive lower bound on UpdatedAtUtc (>=) so a caller resuming from
// the last returned timestamp does not skip a row sharing that instant;
// central ingest is insert-if-not-exists + upsert-on-newer, so the
// boundary row re-read is a no-op. ORDER BY ... ASC + LIMIT yields the
// OLDEST matching rows so the cursor advances monotonically.
cmd.CommandText = """
SELECT TrackedOperationId, Kind, TargetSummary, Status,
RetryCount, LastError, HttpStatus,
CreatedAtUtc, UpdatedAtUtc, TerminalAtUtc, SourceNode
FROM OperationTracking
WHERE UpdatedAtUtc >= $since
ORDER BY UpdatedAtUtc ASC
LIMIT $batchSize;
""";
// Force UTC kind before formatting so the cursor's "o" text matches the
// 'Z'-suffixed round-trip form the write path persists (DateTime.UtcNow
// .ToString("o")). A first-cycle DateTime.MinValue arrives Unspecified —
// without this its "o" rendering would lack the 'Z', and the SQLite text
// compare against 'Z'-suffixed stored values would be subtly inconsistent.
var sinceText = DateTime
.SpecifyKind(sinceUtc, DateTimeKind.Utc)
.ToString("o", CultureInfo.InvariantCulture);
cmd.Parameters.AddWithValue("$since", sinceText);
cmd.Parameters.AddWithValue("$batchSize", batchSize);
var rows = new List<SiteCallOperational>();
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
var kind = reader.GetString(1);
rows.Add(new SiteCallOperational(
TrackedOperationId: TrackedOperationId.Parse(reader.GetString(0)),
Channel: KindToChannel(kind),
Target: reader.IsDBNull(2) ? string.Empty : reader.GetString(2),
// The site id is not a tracking-store column; the central client
// re-stamps SourceSite from the siteId it dialed.
SourceSite: string.Empty,
SourceNode: reader.IsDBNull(10) ? null : reader.GetString(10),
Status: reader.GetString(3),
RetryCount: reader.GetInt32(4),
LastError: reader.IsDBNull(5) ? null : reader.GetString(5),
HttpStatus: reader.IsDBNull(6) ? null : reader.GetInt32(6),
CreatedAtUtc: ParseUtc(reader.GetString(7)),
UpdatedAtUtc: ParseUtc(reader.GetString(8)),
TerminalAtUtc: reader.IsDBNull(9) ? null : ParseUtc(reader.GetString(9))));
}
return rows;
}
// Cached-call Kind → SiteCalls Channel. Only ApiCallCached / DbWriteCached
// ever reach the tracking store (RecordEnqueueAsync is the cached-call
// entry point); DbWriteCached maps to DbOutbound, everything else to the
// ApiOutbound default. Mirrors CachedCallLifecycleBridge's channel handling.
private static string KindToChannel(string kind) => kind switch
{
nameof(Commons.Types.Enums.AuditKind.DbWriteCached) => nameof(Commons.Types.Enums.AuditChannel.DbOutbound),
_ => nameof(Commons.Types.Enums.AuditChannel.ApiOutbound),
};
private static DateTime ParseUtc(string raw)
{
return DateTime.Parse(
@@ -2,6 +2,7 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
namespace ZB.MOM.WW.ScadaBridge.StoreAndForward;
@@ -49,13 +50,19 @@ public static class ServiceCollectionExtensions
// observable in the central audit log instead of producing a
// silent empty-string SourceSite.
var siteId = siteContext?.SiteId ?? string.Empty;
// M1.7: optional site operational-event log. Resolved through
// GetService so a host (or test) that has not called
// AddSiteEventLogging simply gets null and the S&F activity stays
// a no-op for site-event purposes.
var siteEventLogger = sp.GetService<ISiteEventLogger>();
return new StoreAndForwardService(
storage,
options,
logger,
replication,
cachedCallObserver,
siteId);
siteId,
siteEventLogger);
});
services.AddSingleton<ReplicationService>(sp =>
@@ -3,6 +3,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Observability;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
namespace ZB.MOM.WW.ScadaBridge.StoreAndForward;
@@ -44,6 +45,15 @@ public class StoreAndForwardService
/// </summary>
private readonly ICachedCallLifecycleObserver? _cachedCallObserver;
/// <summary>
/// M1.7: optional site operational-event log. When non-null the service maps
/// its own buffer/retry/park activity (the same activity that drives
/// <see cref="OnActivity"/>) onto site events — <c>store_and_forward</c> for the
/// cached-call categories and <c>notification</c> for the site's
/// forward-to-central notification path. Best-effort and fire-and-forget so a
/// failing logger never affects delivery bookkeeping.
/// </summary>
private readonly ISiteEventLogger? _siteEventLogger;
/// <summary>
/// Audit Log #23 (M3 Bundle E — Task E4): site id stamped onto the
/// cached-call attempt context so the audit bridge can build the
/// <see cref="SiteCallOperational"/> half of the telemetry packet.
@@ -72,6 +82,18 @@ public class StoreAndForwardService
/// recognisable instead of an unattributable empty string.
/// </summary>
public const string UnknownSiteSentinel = "$unknown-site";
/// <summary>
/// M1.7: the detail-string prefix written by <see cref="EnqueueAsync"/>
/// when an immediate forward attempt throws and the message is buffered for
/// the retry sweep. <see cref="EmitSiteEvent"/> matches on this same prefix
/// to distinguish a forward <i>failure</i> (logged) from a routine
/// no-handler enqueue (not logged), so both the construction site and the
/// check reference this single constant rather than duplicating the
/// literal — keeping the two ends from drifting apart.
/// </summary>
private const string BufferedForRetryDetailPrefix = "Buffered for retry";
private Timer? _retryTimer;
private int _retryInProgress;
@@ -173,13 +195,20 @@ public class StoreAndForwardService
/// <param name="replication">Optional replication service for standby synchronization.</param>
/// <param name="cachedCallObserver">Optional observer for cached call lifecycle events.</param>
/// <param name="siteId">The site identifier this service belongs to.</param>
/// <param name="siteEventLogger">
/// M1.7: optional site operational-event log. When non-null, buffer/retry/park
/// activity is mirrored to site events (<c>store_and_forward</c> /
/// <c>notification</c> by category). Optional with a <c>null</c> default so the
/// many direct-construction tests still compile unchanged.
/// </param>
public StoreAndForwardService(
StoreAndForwardStorage storage,
StoreAndForwardOptions options,
ILogger<StoreAndForwardService> logger,
ReplicationService? replication = null,
ICachedCallLifecycleObserver? cachedCallObserver = null,
string siteId = "")
string siteId = "",
ISiteEventLogger? siteEventLogger = null)
{
_storage = storage;
_options = options;
@@ -191,6 +220,92 @@ public class StoreAndForwardService
// audit pipeline keying off SourceSite) never see an empty string and
// a misconfigured host is recognisable in the central log.
_siteId = string.IsNullOrWhiteSpace(siteId) ? UnknownSiteSentinel : siteId;
_siteEventLogger = siteEventLogger;
// M1.7: ride the existing activity hook to emit site operational events.
// RaiseActivity already isolates a throwing subscriber, so a failing
// event log can never be misclassified as a transient delivery failure
// (StoreAndForward-009). Only subscribe when a logger is wired so the
// legacy (test/central) construction path stays a no-op.
if (_siteEventLogger != null)
{
OnActivity += EmitSiteEvent;
}
}
/// <summary>
/// M1.7: maps one store-and-forward activity to a site operational event,
/// following the Site Event Logging spec's per-category scope
/// (Component-SiteEventLogging.md §"Events Logged"):
/// <list type="bullet">
/// <item><description>Cached-call categories
/// (<see cref="StoreAndForwardCategory.ExternalSystem"/> /
/// <see cref="StoreAndForwardCategory.CachedDbWrite"/>) log under
/// <c>store_and_forward</c> for queued / retried / parked / retry-delivered
/// activity.</description></item>
/// <item><description>The site's notification forward-to-central path
/// (<see cref="StoreAndForwardCategory.Notification"/>) logs under
/// <c>notification</c> ONLY on a forward FAILURE (buffered after the
/// immediate forward threw) or a park (long-buffered / retries exhausted).
/// Routine enqueue and forward-success are deliberately NOT logged — central's
/// <c>Notifications</c> table is the record of audit; the site only fills the
/// in-transit blind spot when central is unreachable.</description></item>
/// </list>
/// A successful immediate cached-call <c>Delivered</c> is the normal hot path and
/// is not logged.
/// </summary>
private void EmitSiteEvent(string action, StoreAndForwardCategory category, string detail)
{
var logger = _siteEventLogger;
if (logger == null)
{
return;
}
// An immediate-delivery success is the normal hot path, not an
// operational event. A retry-loop success (detail "Delivered to … after
// N retries") IS logged for cached calls — it records a recovery.
if (action == "Delivered" && detail.StartsWith("Immediate", StringComparison.Ordinal))
{
return;
}
if (category == StoreAndForwardCategory.Notification)
{
// Spec: log only forward-failure (the immediate forward threw and the
// notification was buffered for retry — detail prefixed
// BufferedForRetryDetailPrefix) and park. A routine "No handler
// registered, buffered" enqueue and a forward-success "Delivered"
// are deliberately NOT logged.
var isForwardFailure = action == "Queued"
&& detail.StartsWith(BufferedForRetryDetailPrefix, StringComparison.Ordinal);
if (!isForwardFailure && action != "Parked")
{
return;
}
var notifSeverity = action == "Parked" ? "Error" : "Warning";
_ = logger.LogEventAsync(
"notification", notifSeverity, instanceId: null,
source: "StoreAndForwardService",
message: $"Notification {action.ToLowerInvariant()}: {detail}");
return;
}
// Cached-call categories: queued / retried / parked / retry-delivered.
// Severity: parking is an Error (delivery abandoned for retry purposes);
// queue/retry/requeue are Warning; a retry-loop Delivered is Info.
var severity = action switch
{
"Parked" => "Error",
"Delivered" => "Info",
_ => "Warning",
};
_ = logger.LogEventAsync(
"store_and_forward", severity, instanceId: null,
source: "StoreAndForwardService",
message: $"Operation {action.ToLowerInvariant()}: {detail}");
}
/// <summary>
@@ -434,7 +549,7 @@ public class StoreAndForwardService
message.LastError = ex.Message;
await BufferAsync(message);
RaiseActivity("Queued", category, $"Buffered for retry: {target} ({ex.Message})");
RaiseActivity("Queued", category, $"{BufferedForRetryDetailPrefix}: {target} ({ex.Message})");
return new StoreAndForwardResult(true, message.Id, true);
}
}
@@ -451,7 +566,7 @@ public class StoreAndForwardService
await BufferAsync(message);
RaiseActivity("Queued", category, attemptImmediateDelivery
? $"No handler registered, buffered: {target}"
: $"Buffered for retry: {target}");
: $"{BufferedForRetryDetailPrefix}: {target}");
return new StoreAndForwardResult(true, message.Id, true);
}
@@ -17,6 +17,7 @@
<ItemGroup>
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.Commons/ZB.MOM.WW.ScadaBridge.Commons.csproj" />
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.SiteEventLogging/ZB.MOM.WW.ScadaBridge.SiteEventLogging.csproj" />
</ItemGroup>
<ItemGroup>
@@ -0,0 +1,215 @@
using Grpc.Core;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.Audit;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.Communication.Grpc;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using Google.Protobuf.WellKnownTypes;
using ProtoPullRequest = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsRequest;
using ProtoPullResponse = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullAuditEventsResponse;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Tests.Central;
/// <summary>
/// Bundle (M6) tests for <see cref="GrpcPullAuditEventsClient"/> — the
/// production <see cref="IPullAuditEventsClient"/> that dials a site over gRPC
/// and issues the <c>PullAuditEvents</c> unary RPC for the reconciliation loop.
/// The real <c>GrpcChannel</c> is replaced by an injected
/// <see cref="GrpcPullAuditEventsClient.IPullAuditEventsInvoker"/> seam so the
/// client's mapping / ordering / fault-swallowing behaviour can be asserted
/// without standing up a Kestrel HTTP/2 endpoint.
/// </summary>
public class GrpcPullAuditEventsClientTests
{
private static readonly DateTime BaseTime =
new(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
/// <summary>Static enumerator returning a fixed site→endpoint map.</summary>
private sealed class StaticEnumerator : ISiteEnumerator
{
private readonly IReadOnlyList<SiteEntry> _sites;
public StaticEnumerator(params SiteEntry[] sites) => _sites = sites;
public Task<IReadOnlyList<SiteEntry>> EnumerateAsync(CancellationToken ct = default) =>
Task.FromResult(_sites);
}
/// <summary>
/// Test invoker: records the endpoint + request it was asked to dial, then
/// returns a scripted proto response (or throws a scripted exception so the
/// fault-swallowing path can be exercised).
/// </summary>
private sealed class FakeInvoker : GrpcPullAuditEventsClient.IPullAuditEventsInvoker
{
public string? Endpoint { get; private set; }
public ProtoPullRequest? Request { get; private set; }
public int CallCount { get; private set; }
private readonly ProtoPullResponse? _response;
private readonly Exception? _throw;
private FakeInvoker(ProtoPullResponse? response, Exception? toThrow)
{
_response = response;
_throw = toThrow;
}
public static FakeInvoker Returning(ProtoPullResponse response) => new(response, null);
public static FakeInvoker Throwing(Exception ex) => new(null, ex);
public Task<ProtoPullResponse> InvokeAsync(
string endpoint, ProtoPullRequest request, CancellationToken ct)
{
CallCount++;
Endpoint = endpoint;
Request = request;
if (_throw is not null)
{
throw _throw;
}
return Task.FromResult(_response!);
}
}
private static AuditEventDto Dto(Guid id, DateTime occurredAtUtc) =>
AuditEventDtoMapper.ToDto(ScadaBridgeAuditEventFactory.Create(
eventId: id,
occurredAtUtc: occurredAtUtc,
channel: AuditChannel.ApiOutbound,
kind: AuditKind.ApiCall,
status: AuditStatus.Delivered,
sourceSiteId: "site-a"));
[Fact]
public async Task PullAsync_dials_the_resolved_endpoint_and_maps_events_oldest_first()
{
var older = Guid.NewGuid();
var newer = Guid.NewGuid();
// Wire is delivered newest-first on purpose to prove the client sorts.
var proto = new ProtoPullResponse { MoreAvailable = true };
proto.Events.Add(Dto(newer, BaseTime.AddMinutes(5)));
proto.Events.Add(Dto(older, BaseTime));
var invoker = FakeInvoker.Returning(proto);
var sut = new GrpcPullAuditEventsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullAuditEventsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
// Endpoint resolution + request shaping.
Assert.Equal("http://site-a:8083", invoker.Endpoint);
Assert.NotNull(invoker.Request);
Assert.Equal(256, invoker.Request!.BatchSize);
Assert.Equal(BaseTime, invoker.Request.SinceUtc.ToDateTime());
// Mapping + ordering + MoreAvailable surface.
Assert.True(result.MoreAvailable);
Assert.Equal(2, result.Events.Count);
Assert.Equal(older, result.Events[0].EventId);
Assert.Equal(newer, result.Events[1].EventId);
}
[Fact]
public async Task PullAsync_returns_empty_when_site_endpoint_is_unknown()
{
var invoker = FakeInvoker.Returning(new ProtoPullResponse());
var sut = new GrpcPullAuditEventsClient(
new StaticEnumerator(), // no sites registered
invoker,
NullLogger<GrpcPullAuditEventsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.Events);
Assert.False(result.MoreAvailable);
Assert.Equal(0, invoker.CallCount); // never dialled — nothing to dial
}
[Theory]
[InlineData(StatusCode.Unavailable)] // connection refused / site offline
[InlineData(StatusCode.DeadlineExceeded)] // slow site / network blip
[InlineData(StatusCode.Cancelled)]
public async Task PullAsync_swallows_tolerable_transport_faults_to_empty_response(StatusCode code)
{
var invoker = FakeInvoker.Throwing(new RpcException(new Status(code, "transport fault")));
var sut = new GrpcPullAuditEventsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullAuditEventsClient>.Instance);
// MUST NOT throw — per the IPullAuditEventsClient contract.
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.Events);
Assert.False(result.MoreAvailable);
}
[Fact]
public async Task PullAsync_swallows_connection_layer_faults_to_empty_response()
{
// A bare HttpRequestException (e.g. DNS / refused socket before a gRPC
// status is established) is also tolerable.
var invoker = FakeInvoker.Throwing(new HttpRequestException("connection refused"));
var sut = new GrpcPullAuditEventsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullAuditEventsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.Events);
Assert.False(result.MoreAvailable);
}
[Fact]
public async Task PullAsync_swallows_unexpected_faults_to_empty_response()
{
// I3(a): the catch-all path. A non-transport fault (e.g. a mapping/
// protocol error surfacing as InvalidOperationException) must still be
// swallowed to empty — audit reconciliation is best-effort and a throw
// would only get re-caught by the actor's per-site guard.
var invoker = FakeInvoker.Throwing(new InvalidOperationException("boom"));
var sut = new GrpcPullAuditEventsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullAuditEventsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.Events);
Assert.False(result.MoreAvailable);
}
[Fact]
public async Task PullAsync_with_minvalue_unspecified_cursor_does_not_throw_and_dials()
{
// I3(b) / guards I2: the reconciliation cursor starts at DateTime.MinValue
// with Kind=Unspecified. EnsureUtc must treat it AS UTC (per the system-wide
// "all timestamps are UTC" invariant) and NOT call ToUniversalTime() — on a
// host with a positive UTC offset that underflows and Timestamp.FromDateTime
// throws ArgumentOutOfRangeException, crashing the FIRST pull for every site.
var minUnspecified = default(DateTime); // DateTime.MinValue, Kind=Unspecified
Assert.Equal(DateTimeKind.Unspecified, minUnspecified.Kind);
var invoker = FakeInvoker.Returning(new ProtoPullResponse());
var sut = new GrpcPullAuditEventsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullAuditEventsClient>.Instance);
// MUST NOT throw — must dial successfully.
var result = await sut.PullAsync("site-a", minUnspecified, batchSize: 256, CancellationToken.None);
Assert.Equal(1, invoker.CallCount);
Assert.Equal("http://site-a:8083", invoker.Endpoint);
Assert.NotNull(invoker.Request);
// The unspecified-MinValue cursor is carried through verbatim as UTC
// MinValue (no local-TZ conversion).
Assert.Equal(DateTime.MinValue, invoker.Request!.SinceUtc.ToDateTime());
Assert.Empty(result.Events);
Assert.False(result.MoreAvailable);
}
}
@@ -0,0 +1,251 @@
using Google.Protobuf.WellKnownTypes;
using Grpc.Core;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.Communication.Grpc;
using ProtoPullRequest = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsRequest;
using ProtoPullResponse = ZB.MOM.WW.ScadaBridge.Communication.Grpc.PullSiteCallsResponse;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Tests.Central;
/// <summary>
/// Tests for <see cref="GrpcPullSiteCallsClient"/> — the production
/// <see cref="IPullSiteCallsClient"/> that dials a site over gRPC and issues the
/// <c>PullSiteCalls</c> unary RPC for the Site Call Audit (#22) reconciliation
/// loop. The real <c>GrpcChannel</c> is replaced by an injected
/// <see cref="GrpcPullSiteCallsClient.IPullSiteCallsInvoker"/> seam so the
/// client's mapping / ordering / SourceSite-restamp / fault-swallowing behaviour
/// can be asserted without standing up a Kestrel HTTP/2 endpoint. Mirrors
/// <see cref="GrpcPullAuditEventsClientTests"/>.
/// </summary>
public class GrpcPullSiteCallsClientTests
{
private static readonly DateTime BaseTime =
new(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
private sealed class StaticEnumerator : ISiteEnumerator
{
private readonly IReadOnlyList<SiteEntry> _sites;
public StaticEnumerator(params SiteEntry[] sites) => _sites = sites;
public Task<IReadOnlyList<SiteEntry>> EnumerateAsync(CancellationToken ct = default) =>
Task.FromResult(_sites);
}
private sealed class FakeInvoker : GrpcPullSiteCallsClient.IPullSiteCallsInvoker
{
public string? Endpoint { get; private set; }
public ProtoPullRequest? Request { get; private set; }
public int CallCount { get; private set; }
private readonly ProtoPullResponse? _response;
private readonly Exception? _throw;
private FakeInvoker(ProtoPullResponse? response, Exception? toThrow)
{
_response = response;
_throw = toThrow;
}
public static FakeInvoker Returning(ProtoPullResponse response) => new(response, null);
public static FakeInvoker Throwing(Exception ex) => new(null, ex);
public Task<ProtoPullResponse> InvokeAsync(
string endpoint, ProtoPullRequest request, CancellationToken ct)
{
CallCount++;
Endpoint = endpoint;
Request = request;
if (_throw is not null)
{
throw _throw;
}
return Task.FromResult(_response!);
}
}
// The site leaves SourceSite empty (it is not a tracking-store column); the
// client re-stamps it from the dialed siteId. Mint DTOs with empty SourceSite
// to prove that re-stamp.
private static SiteCallOperationalDto Dto(Guid id, DateTime updatedAtUtc) =>
new()
{
TrackedOperationId = id.ToString(),
Channel = "ApiOutbound",
Target = "ERP.GetOrder",
SourceSite = string.Empty,
SourceNode = "node-a",
Status = "Attempted",
RetryCount = 1,
LastError = string.Empty,
CreatedAtUtc = Timestamp.FromDateTime(BaseTime),
UpdatedAtUtc = Timestamp.FromDateTime(updatedAtUtc),
};
[Fact]
public async Task PullAsync_dials_resolved_endpoint_maps_oldest_first_and_restamps_source_site()
{
var older = Guid.NewGuid();
var newer = Guid.NewGuid();
// Wire delivered newest-first on purpose to prove the client sorts.
var proto = new ProtoPullResponse { MoreAvailable = true };
proto.Operationals.Add(Dto(newer, BaseTime.AddMinutes(5)));
proto.Operationals.Add(Dto(older, BaseTime));
var invoker = FakeInvoker.Returning(proto);
var sut = new GrpcPullSiteCallsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullSiteCallsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
// Endpoint resolution + request shaping.
Assert.Equal("http://site-a:8083", invoker.Endpoint);
Assert.NotNull(invoker.Request);
Assert.Equal(256, invoker.Request!.BatchSize);
Assert.Equal(BaseTime, invoker.Request.SinceUtc.ToDateTime());
// Mapping + ordering + MoreAvailable surface.
Assert.True(result.MoreAvailable);
Assert.Equal(2, result.SiteCalls.Count);
Assert.Equal(older, result.SiteCalls[0].TrackedOperationId.Value);
Assert.Equal(newer, result.SiteCalls[1].TrackedOperationId.Value);
// SourceSite re-stamped from the dialed siteId (DTO carried empty).
Assert.Equal("site-a", result.SiteCalls[0].SourceSite);
Assert.Equal("site-a", result.SiteCalls[1].SourceSite);
// Round-tripped fields survive FromDto.
Assert.Equal("ApiOutbound", result.SiteCalls[0].Channel);
Assert.Equal("node-a", result.SiteCalls[0].SourceNode);
Assert.Equal(1, result.SiteCalls[0].RetryCount);
}
[Fact]
public async Task PullAsync_returns_empty_when_site_endpoint_is_unknown()
{
var invoker = FakeInvoker.Returning(new ProtoPullResponse());
var sut = new GrpcPullSiteCallsClient(
new StaticEnumerator(), // no sites registered
invoker,
NullLogger<GrpcPullSiteCallsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.SiteCalls);
Assert.False(result.MoreAvailable);
Assert.Equal(0, invoker.CallCount); // never dialled — nothing to dial
}
[Theory]
[InlineData(StatusCode.Unavailable)]
[InlineData(StatusCode.DeadlineExceeded)]
[InlineData(StatusCode.Cancelled)]
public async Task PullAsync_swallows_tolerable_transport_faults_to_empty_response(StatusCode code)
{
var invoker = FakeInvoker.Throwing(new RpcException(new Status(code, "transport fault")));
var sut = new GrpcPullSiteCallsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullSiteCallsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.SiteCalls);
Assert.False(result.MoreAvailable);
}
[Fact]
public async Task PullAsync_swallows_connection_layer_faults_to_empty_response()
{
var invoker = FakeInvoker.Throwing(new HttpRequestException("connection refused"));
var sut = new GrpcPullSiteCallsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullSiteCallsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.SiteCalls);
Assert.False(result.MoreAvailable);
}
[Fact]
public async Task PullAsync_swallows_unexpected_faults_to_empty_response()
{
var invoker = FakeInvoker.Throwing(new InvalidOperationException("boom"));
var sut = new GrpcPullSiteCallsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullSiteCallsClient>.Instance);
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Empty(result.SiteCalls);
Assert.False(result.MoreAvailable);
}
[Fact]
public async Task PullAsync_skips_poison_row_and_returns_the_good_rows()
{
// Poison-row resilience: one malformed operational (an unparseable
// TrackedOperationId fails SiteCallDtoMapper.FromDto → Guid.Parse) must be
// skipped+logged PER ROW rather than sinking the whole batch through the
// outer catch-all. The two good rows survive, re-stamped + oldest-first.
var older = Guid.NewGuid();
var newer = Guid.NewGuid();
var proto = new ProtoPullResponse { MoreAvailable = false };
proto.Operationals.Add(Dto(newer, BaseTime.AddMinutes(5)));
// Malformed row in the middle of the batch.
var bad = Dto(Guid.NewGuid(), BaseTime.AddMinutes(2));
bad.TrackedOperationId = "not-a-guid";
proto.Operationals.Add(bad);
proto.Operationals.Add(Dto(older, BaseTime));
var invoker = FakeInvoker.Returning(proto);
var sut = new GrpcPullSiteCallsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullSiteCallsClient>.Instance);
// Must NOT throw — the bad row is dropped, the good rows are returned.
var result = await sut.PullAsync("site-a", BaseTime, batchSize: 256, CancellationToken.None);
Assert.Equal(2, result.SiteCalls.Count);
// Survivors are oldest-first and SourceSite re-stamped from the dialed siteId.
Assert.Equal(older, result.SiteCalls[0].TrackedOperationId.Value);
Assert.Equal(newer, result.SiteCalls[1].TrackedOperationId.Value);
Assert.Equal("site-a", result.SiteCalls[0].SourceSite);
Assert.Equal("site-a", result.SiteCalls[1].SourceSite);
Assert.False(result.MoreAvailable);
}
[Fact]
public async Task PullAsync_with_minvalue_unspecified_cursor_does_not_throw_and_dials()
{
// The reconciliation cursor starts at DateTime.MinValue with
// Kind=Unspecified. EnsureUtc must treat it AS UTC (per the system-wide
// invariant) and NOT call ToUniversalTime() — on a host with a positive
// UTC offset that underflows and Timestamp.FromDateTime throws, crashing
// the FIRST pull for every site.
var minUnspecified = default(DateTime);
Assert.Equal(DateTimeKind.Unspecified, minUnspecified.Kind);
var invoker = FakeInvoker.Returning(new ProtoPullResponse());
var sut = new GrpcPullSiteCallsClient(
new StaticEnumerator(new SiteEntry("site-a", "http://site-a:8083")),
invoker,
NullLogger<GrpcPullSiteCallsClient>.Instance);
var result = await sut.PullAsync("site-a", minUnspecified, batchSize: 256, CancellationToken.None);
Assert.Equal(1, invoker.CallCount);
Assert.Equal("http://site-a:8083", invoker.Endpoint);
Assert.NotNull(invoker.Request);
Assert.Equal(DateTime.MinValue, invoker.Request!.SinceUtc.ToDateTime());
Assert.Empty(result.SiteCalls);
Assert.False(result.MoreAvailable);
}
}
@@ -0,0 +1,91 @@
using Microsoft.Extensions.DependencyInjection;
using NSubstitute;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using SiteEntity = ZB.MOM.WW.ScadaBridge.Commons.Entities.Sites.Site;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Tests.Central;
/// <summary>
/// Unit tests for the production <see cref="SiteEnumerator"/> — the central
/// reconciliation-singleton collaborator that projects the config-DB
/// <see cref="SiteEntity"/> rows into the <see cref="SiteEntry"/> targets the
/// <see cref="SiteAuditReconciliationActor"/> polls.
/// </summary>
/// <remarks>
/// The enumerator opens a fresh DI scope per <see cref="SiteEnumerator.EnumerateAsync"/>
/// call (mirroring the per-tick scope pattern in the reconciliation actor)
/// because <see cref="ISiteRepository"/> is a SCOPED EF Core service. The tests
/// register a substituted repository as a scoped service so the enumerator's
/// <c>CreateAsyncScope</c> resolves it and the projection / blank-address
/// filtering can be exercised without an MSSQL container.
/// </remarks>
public class SiteEnumeratorTests
{
private static SiteEntity SiteWith(string identifier, string? grpcNodeA, string? grpcNodeB = null)
{
var site = new SiteEntity($"Display {identifier}", identifier)
{
GrpcNodeAAddress = grpcNodeA,
GrpcNodeBAddress = grpcNodeB,
};
return site;
}
private static IServiceProvider BuildProvider(ISiteRepository repository)
{
var services = new ServiceCollection();
// Scoped to match the production lifetime (EF Core); the enumerator
// must open a scope to resolve it.
services.AddScoped(_ => repository);
return services.BuildServiceProvider();
}
[Fact]
public async Task EnumerateAsync_ProjectsSitesWithNodeAAddress_AndSkipsBlankOnes()
{
var repository = Substitute.For<ISiteRepository>();
repository.GetAllSitesAsync(Arg.Any<CancellationToken>()).Returns(new List<SiteEntity>
{
SiteWith("site-a", "http://site-a:8083"),
SiteWith("site-b", grpcNodeA: " "), // blank NodeA -> skipped
});
var enumerator = new SiteEnumerator(BuildProvider(repository));
var result = await enumerator.EnumerateAsync();
var entry = Assert.Single(result);
Assert.Equal("site-a", entry.SiteId);
Assert.Equal("http://site-a:8083", entry.GrpcEndpoint);
}
[Fact]
public async Task EnumerateAsync_SkipsNullNodeAAddress()
{
var repository = Substitute.For<ISiteRepository>();
repository.GetAllSitesAsync(Arg.Any<CancellationToken>()).Returns(new List<SiteEntity>
{
SiteWith("site-null", grpcNodeA: null),
});
var enumerator = new SiteEnumerator(BuildProvider(repository));
var result = await enumerator.EnumerateAsync();
Assert.Empty(result);
}
[Fact]
public async Task EnumerateAsync_ReturnsEmpty_WhenNoSites()
{
var repository = Substitute.For<ISiteRepository>();
repository.GetAllSitesAsync(Arg.Any<CancellationToken>()).Returns(new List<SiteEntity>());
var enumerator = new SiteEnumerator(BuildProvider(repository));
var result = await enumerator.EnumerateAsync();
Assert.Empty(result);
}
}
@@ -0,0 +1,221 @@
using Akka.TestKit.Xunit2;
using Google.Protobuf.WellKnownTypes;
using Grpc.Core;
using Microsoft.Extensions.Logging.Abstractions;
using NSubstitute;
using NSubstitute.ExceptionExtensions;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Communication.Grpc;
namespace ZB.MOM.WW.ScadaBridge.Communication.Tests;
/// <summary>
/// Tests for <see cref="SiteStreamGrpcServer.PullSiteCalls"/> (Site Call Audit
/// #22 reconciliation handler). Verifies the request →
/// <see cref="IOperationTrackingStore.ReadChangedSinceAsync"/> → response
/// round-trip through the gRPC handler. The store is an NSubstitute stub so the
/// tests never touch SQLite. Mirrors <see cref="SiteStreamPullAuditEventsTests"/>
/// — but there is no MarkReconciled step (the tracking store is the operational
/// source of truth; the central SiteCalls mirror is upsert-on-newer).
/// </summary>
public class SiteStreamPullSiteCallsTests : TestKit
{
private readonly ISiteStreamSubscriber _subscriber = Substitute.For<ISiteStreamSubscriber>();
private SiteStreamGrpcServer CreateServer() =>
new(_subscriber, NullLogger<SiteStreamGrpcServer>.Instance);
private static ServerCallContext NewContext(CancellationToken ct = default)
{
var context = Substitute.For<ServerCallContext>();
context.CancellationToken.Returns(ct);
return context;
}
private static SiteCallOperational NewOperational() =>
new(
TrackedOperationId: TrackedOperationId.New(),
Channel: "ApiOutbound",
Target: "ERP.GetOrder",
SourceSite: string.Empty,
SourceNode: "node-a",
Status: "Attempted",
RetryCount: 1,
LastError: null,
HttpStatus: 503,
CreatedAtUtc: DateTime.SpecifyKind(new DateTime(2026, 5, 20, 10, 0, 0), DateTimeKind.Utc),
UpdatedAtUtc: DateTime.SpecifyKind(new DateTime(2026, 5, 20, 10, 1, 0), DateTimeKind.Utc),
TerminalAtUtc: null);
[Fact]
public async Task PullSiteCalls_NoStoreWired_ReturnsEmptyResponse()
{
var server = CreateServer();
// Intentionally do NOT call SetOperationTrackingStore — simulates a
// central-only host or a wiring-incomplete startup window.
var request = new PullSiteCallsRequest
{
SinceUtc = Timestamp.FromDateTime(DateTime.UtcNow.AddMinutes(-5)),
BatchSize = 100,
};
var response = await server.PullSiteCalls(request, NewContext());
Assert.Empty(response.Operationals);
Assert.False(response.MoreAvailable);
}
[Fact]
public async Task PullSiteCalls_With5Rows_ReturnsAllFiveDtos()
{
var store = Substitute.For<IOperationTrackingStore>();
var rows = Enumerable.Range(0, 5).Select(_ => NewOperational()).ToList();
store.ReadChangedSinceAsync(Arg.Any<DateTime>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
.Returns((IReadOnlyList<SiteCallOperational>)rows);
var server = CreateServer();
server.SetOperationTrackingStore(store);
var request = new PullSiteCallsRequest
{
SinceUtc = Timestamp.FromDateTime(DateTime.UtcNow.AddHours(-1)),
BatchSize = 100, // larger than returned count so MoreAvailable should be false
};
var response = await server.PullSiteCalls(request, NewContext());
Assert.Equal(5, response.Operationals.Count);
Assert.False(response.MoreAvailable); // 5 < 100
var expectedIds = rows.Select(r => r.TrackedOperationId.ToString()).ToHashSet();
Assert.True(expectedIds.SetEquals(response.Operationals.Select(d => d.TrackedOperationId).ToHashSet()));
}
[Fact]
public async Task PullSiteCalls_PassesSinceUtcThroughVerbatim()
{
var store = Substitute.For<IOperationTrackingStore>();
var capturedSince = DateTime.MinValue;
store.ReadChangedSinceAsync(Arg.Any<DateTime>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
.Returns(call =>
{
capturedSince = call.ArgAt<DateTime>(0);
return (IReadOnlyList<SiteCallOperational>)Array.Empty<SiteCallOperational>();
});
var server = CreateServer();
server.SetOperationTrackingStore(store);
var since = DateTime.SpecifyKind(new DateTime(2026, 5, 20, 9, 30, 0), DateTimeKind.Utc);
var request = new PullSiteCallsRequest
{
SinceUtc = Timestamp.FromDateTime(since),
BatchSize = 50,
};
var response = await server.PullSiteCalls(request, NewContext());
Assert.Empty(response.Operationals);
Assert.False(response.MoreAvailable);
Assert.Equal(since, capturedSince);
}
[Fact]
public async Task PullSiteCalls_SinceUtcUnset_PassesDateTimeMinValue()
{
// First reconciliation cycle: central has no cursor yet, so the request's
// SinceUtc wrapper is absent (null). The handler must default to
// DateTime.MinValue ("pull from the beginning of recorded history")
// without a null-deref — this proves the very first cycle doesn't crash.
var store = Substitute.For<IOperationTrackingStore>();
var captured = new DateTime(2099, 1, 1, 0, 0, 0, DateTimeKind.Utc); // sentinel
store.ReadChangedSinceAsync(Arg.Any<DateTime>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
.Returns(call =>
{
captured = call.ArgAt<DateTime>(0);
return (IReadOnlyList<SiteCallOperational>)Array.Empty<SiteCallOperational>();
});
var server = CreateServer();
server.SetOperationTrackingStore(store);
// SinceUtc intentionally left unset (null) — the proto wrapper is absent.
var request = new PullSiteCallsRequest
{
BatchSize = 100,
};
var response = await server.PullSiteCalls(request, NewContext());
Assert.Empty(response.Operationals);
Assert.False(response.MoreAvailable);
Assert.Equal(DateTime.MinValue, captured);
}
[Fact]
public async Task PullSiteCalls_BatchSize3_Returns3Rows_MoreAvailableTrue()
{
var store = Substitute.For<IOperationTrackingStore>();
var rows = Enumerable.Range(0, 3).Select(_ => NewOperational()).ToList();
store.ReadChangedSinceAsync(Arg.Any<DateTime>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
.Returns((IReadOnlyList<SiteCallOperational>)rows);
var server = CreateServer();
server.SetOperationTrackingStore(store);
var request = new PullSiteCallsRequest
{
SinceUtc = Timestamp.FromDateTime(DateTime.UtcNow.AddHours(-1)),
BatchSize = 3,
};
var response = await server.PullSiteCalls(request, NewContext());
Assert.Equal(3, response.Operationals.Count);
// saturated batch → central needs to know to issue a follow-up pull
Assert.True(response.MoreAvailable);
}
[Fact]
public async Task PullSiteCalls_NonPositiveBatchSize_ThrowsInvalidArgument()
{
var store = Substitute.For<IOperationTrackingStore>();
var server = CreateServer();
server.SetOperationTrackingStore(store);
var request = new PullSiteCallsRequest
{
SinceUtc = Timestamp.FromDateTime(DateTime.UtcNow.AddHours(-1)),
BatchSize = 0,
};
var ex = await Assert.ThrowsAsync<RpcException>(
() => server.PullSiteCalls(request, NewContext()));
Assert.Equal(StatusCode.InvalidArgument, ex.StatusCode);
}
[Fact]
public async Task PullSiteCalls_ReadThrows_ReturnsEmptyResponse()
{
// Best-effort: a read fault must never abort the reconciliation tick.
var store = Substitute.For<IOperationTrackingStore>();
store.ReadChangedSinceAsync(Arg.Any<DateTime>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
.ThrowsAsync(new InvalidOperationException("SQLite disposed mid-call"));
var server = CreateServer();
server.SetOperationTrackingStore(store);
var request = new PullSiteCallsRequest
{
SinceUtc = Timestamp.FromDateTime(DateTime.UtcNow.AddHours(-1)),
BatchSize = 100,
};
// Must NOT throw — the handler swallows the fault to an empty response.
var response = await server.PullSiteCalls(request, NewContext());
Assert.Empty(response.Operationals);
Assert.False(response.MoreAvailable);
}
}
@@ -117,6 +117,22 @@ public class CentralActorPathTests : IAsyncLifetime
public async Task CentralActors_NotificationOutboxProxy_Exists()
=> await AssertActorExists("/user/notification-outbox-proxy");
[Fact]
public async Task CentralActors_AuditLogPurgeSingleton_Exists()
=> await AssertActorExists("/user/audit-log-purge-singleton");
[Fact]
public async Task CentralActors_AuditLogPurgeProxy_Exists()
=> await AssertActorExists("/user/audit-log-purge-proxy");
[Fact]
public async Task CentralActors_SiteAuditReconciliationSingleton_Exists()
=> await AssertActorExists("/user/site-audit-reconciliation-singleton");
[Fact]
public async Task CentralActors_SiteAuditReconciliationProxy_Exists()
=> await AssertActorExists("/user/site-audit-reconciliation-proxy");
private async Task AssertActorExists(string path)
{
Assert.NotNull(_actorSystem);
@@ -7,6 +7,7 @@ using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.ScadaBridge.AuditLog;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.AuditLog.Site;
using ZB.MOM.WW.ScadaBridge.AuditLog.Site.Telemetry;
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
@@ -238,6 +239,36 @@ public class CentralAuditWiringTests : IDisposable
Assert.NotNull(forwarder);
Assert.IsType<CachedCallTelemetryForwarder>(forwarder);
}
/// <summary>
/// I4 (review): the central composition root must register the production
/// reconciliation collaborators via
/// <c>AddAuditLogCentralReconciliationClient</c>. Asserting the concrete
/// implementations resolve here is a faster, clearer signal than a runtime
/// "actor not found" / cryptic <c>GetRequiredService</c> throw in
/// <c>AkkaHostedService.RegisterCentralActors</c> if that helper is ever
/// dropped from <c>Program.cs</c>.
/// </summary>
[Fact]
public void Central_Resolves_ISiteEnumerator_AsSiteEnumerator()
{
var enumerator = _factory.Services.GetService<ISiteEnumerator>();
Assert.NotNull(enumerator);
Assert.IsType<SiteEnumerator>(enumerator);
}
/// <summary>
/// I4 (review): companion to <see cref="Central_Resolves_ISiteEnumerator_AsSiteEnumerator"/>
/// — the production gRPC pull client must resolve on the central composition
/// root so the SiteAuditReconciliationActor singleton can dial sites.
/// </summary>
[Fact]
public void Central_Resolves_IPullAuditEventsClient_AsGrpcClient()
{
var client = _factory.Services.GetService<IPullAuditEventsClient>();
Assert.NotNull(client);
Assert.IsType<GrpcPullAuditEventsClient>(client);
}
}
/// <summary>
@@ -11,5 +11,108 @@ public class SiteCallAuditOptionsTests
Assert.Equal(TimeSpan.FromMinutes(10), options.StuckAgeThreshold);
// KPI interval mirrors NotificationOutboxOptions.DeliveredKpiWindow.
Assert.Equal(TimeSpan.FromMinutes(1), options.KpiInterval);
// Reconciliation tick cadence mirrors SiteAuditReconciliationOptions (#23).
Assert.Equal(TimeSpan.FromMinutes(5), options.ReconciliationInterval);
// Purge tick cadence mirrors AuditLogPurgeOptions.
Assert.Equal(TimeSpan.FromHours(24), options.PurgeInterval);
// Retention window mirrors the central audit-store retention policy.
Assert.Equal(365, options.RetentionDays);
}
[Fact]
public void ResolvedReconciliationInterval_DefaultsToConfiguredValue()
{
var options = new SiteCallAuditOptions();
Assert.Equal(options.ReconciliationInterval, options.ResolvedReconciliationInterval);
}
[Theory]
[InlineData(0)]
[InlineData(-5)]
public void ResolvedReconciliationInterval_ClampsZeroOrNegativeToMinimum(int configuredSeconds)
{
// A misconfigured 0 / negative interval must never resolve to TimeSpan.Zero
// (which would make Akka's ScheduleTellRepeatedlyCancelable spin). The
// documented floor is >= 1 second.
var options = new SiteCallAuditOptions
{
ReconciliationInterval = TimeSpan.FromSeconds(configuredSeconds),
};
Assert.True(
options.ResolvedReconciliationInterval >= TimeSpan.FromSeconds(1),
$"expected the resolved interval to clamp to >= 1s, got {options.ResolvedReconciliationInterval}");
Assert.Equal(TimeSpan.FromSeconds(1), options.ResolvedReconciliationInterval);
}
[Fact]
public void ResolvedReconciliationInterval_OverrideBypassesClamp()
{
// The test-only override drops the cadence below the clamp floor so unit
// tests can run the tick at millisecond cadence.
var sub1Second = TimeSpan.FromMilliseconds(50);
var options = new SiteCallAuditOptions
{
ReconciliationInterval = TimeSpan.FromMinutes(5),
ReconciliationIntervalOverride = sub1Second,
};
Assert.Equal(sub1Second, options.ResolvedReconciliationInterval);
}
[Fact]
public void ResolvedPurgeInterval_DefaultsToConfiguredValue()
{
var options = new SiteCallAuditOptions();
Assert.Equal(options.PurgeInterval, options.ResolvedPurgeInterval);
}
[Theory]
[InlineData(0)]
[InlineData(-30)]
public void ResolvedPurgeInterval_ClampsZeroOrNegativeToMinimum(int configuredSeconds)
{
// A misconfigured 0 / negative purge interval clamps to the documented
// >= 1 minute floor (the purge is daily, so a more generous floor than
// the reconciliation tick).
var options = new SiteCallAuditOptions
{
PurgeInterval = TimeSpan.FromSeconds(configuredSeconds),
};
Assert.True(
options.ResolvedPurgeInterval >= TimeSpan.FromMinutes(1),
$"expected the resolved interval to clamp to >= 1min, got {options.ResolvedPurgeInterval}");
Assert.Equal(TimeSpan.FromMinutes(1), options.ResolvedPurgeInterval);
}
[Fact]
public void ResolvedPurgeInterval_BelowMinuteFloorClampsToMinimum()
{
// A positive-but-sub-minute config value still clamps to the 1-minute floor.
var options = new SiteCallAuditOptions
{
PurgeInterval = TimeSpan.FromSeconds(5),
};
Assert.Equal(TimeSpan.FromMinutes(1), options.ResolvedPurgeInterval);
}
[Fact]
public void ResolvedPurgeInterval_OverrideBypassesClamp()
{
// The test-only override drops the cadence below the clamp floor so unit
// tests can run the purge tick at millisecond cadence.
var subMinute = TimeSpan.FromMilliseconds(50);
var options = new SiteCallAuditOptions
{
PurgeInterval = TimeSpan.FromHours(24),
PurgeIntervalOverride = subMinute,
};
Assert.Equal(subMinute, options.ResolvedPurgeInterval);
}
}
@@ -0,0 +1,175 @@
using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
namespace ZB.MOM.WW.ScadaBridge.SiteCallAudit.Tests;
/// <summary>
/// Purge-scheduler tests for <see cref="SiteCallAuditActor"/> (#22, Piece B).
/// Exercises the daily terminal-row purge tick in-memory — a recording
/// <see cref="ISiteCallAuditRepository"/> captures the
/// <see cref="ISiteCallAuditRepository.PurgeTerminalAsync"/> threshold the actor
/// computes, with no live MSSQL fixture. The reconciliation collaborators are
/// inert stubs (the purge tick doesn't use them, but they must be present to
/// arm the scheduler — both timers gate on the collaborators together).
/// </summary>
public class SiteCallAuditPurgeTests : TestKit
{
private static SiteCallAuditOptions FastPurgeOptions(int retentionDays = 365) => new()
{
// Keep the reconciliation tick slow so it doesn't fight the purge tick
// for the test window; drop the purge tick to 100 ms via its override.
ReconciliationIntervalOverride = TimeSpan.FromMinutes(5),
PurgeIntervalOverride = TimeSpan.FromMilliseconds(100),
RetentionDays = retentionDays,
};
/// <summary>Empty enumerator — the purge path never touches it, but it must be present to arm the scheduler.</summary>
private sealed class EmptyEnumerator : ISiteEnumerator
{
public Task<IReadOnlyList<SiteEntry>> EnumerateAsync(CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteEntry>>(Array.Empty<SiteEntry>());
}
/// <summary>No-op pull client — present only to arm the scheduler.</summary>
private sealed class NoOpPullClient : IPullSiteCallsClient
{
public Task<PullSiteCallsResponse> PullAsync(
string siteId, DateTime sinceUtc, int batchSize, CancellationToken ct) =>
Task.FromResult(new PullSiteCallsResponse(Array.Empty<SiteCall>(), MoreAvailable: false));
}
/// <summary>
/// Recording repository capturing every <see cref="PurgeTerminalAsync"/>
/// threshold (and the configured deleted-row count it returns).
/// </summary>
private sealed class RecordingRepo : ISiteCallAuditRepository
{
public List<DateTime> PurgeThresholds { get; } = new();
public int RowsDeletedPerCall { get; set; }
public Task<int> PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default)
{
PurgeThresholds.Add(olderThanUtc);
return Task.FromResult(RowsDeletedPerCall);
}
public Task UpsertAsync(SiteCall siteCall, CancellationToken ct = default) => Task.CompletedTask;
public Task<SiteCall?> GetAsync(TrackedOperationId id, CancellationToken ct = default) =>
Task.FromResult<SiteCall?>(null);
public Task<IReadOnlyList<SiteCall>> QueryAsync(
SiteCallQueryFilter filter, SiteCallPaging paging, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCall>>(Array.Empty<SiteCall>());
public Task<SiteCallKpiSnapshot> ComputeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult(new SiteCallKpiSnapshot(0, 0, 0, 0, null, 0));
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
}
/// <summary>Repository whose purge always throws — to prove continue-on-error keeps the singleton alive.</summary>
private sealed class PurgeThrowingRepo : ISiteCallAuditRepository
{
public int PurgeCallCount;
public Task<int> PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default)
{
Interlocked.Increment(ref PurgeCallCount);
throw new InvalidOperationException("simulated purge failure");
}
public Task UpsertAsync(SiteCall siteCall, CancellationToken ct = default) => Task.CompletedTask;
public Task<SiteCall?> GetAsync(TrackedOperationId id, CancellationToken ct = default) => Task.FromResult<SiteCall?>(null);
public Task<IReadOnlyList<SiteCall>> QueryAsync(SiteCallQueryFilter f, SiteCallPaging p, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCall>>(Array.Empty<SiteCall>());
public Task<SiteCallKpiSnapshot> ComputeKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult(new SiteCallKpiSnapshot(0, 0, 0, 0, null, 0));
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
}
private IActorRef CreateActor(ISiteCallAuditRepository repo, SiteCallAuditOptions options) =>
Sys.ActorOf(Props.Create(() => new SiteCallAuditActor(
repo,
new EmptyEnumerator(),
new NoOpPullClient(),
NullLogger<SiteCallAuditActor>.Instance,
options)));
// ---------------------------------------------------------------------
// 1. PurgeTick_CallsPurgeTerminal_WithRetentionThreshold
// ---------------------------------------------------------------------
[Fact]
public void PurgeTick_CallsPurgeTerminalAsync_WithRetentionThreshold()
{
var repo = new RecordingRepo { RowsDeletedPerCall = 7 };
// Non-default retention (30 days) so the assertion isn't accidentally
// satisfied by the 365-day default.
CreateActor(repo, FastPurgeOptions(retentionDays: 30));
AwaitAssert(
() => Assert.True(repo.PurgeThresholds.Count >= 1,
$"expected >= 1 PurgeTerminalAsync call, got {repo.PurgeThresholds.Count}"),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
// The threshold the actor passed must be ~UtcNow - 30 days. 1-minute
// slack covers scheduling jitter between the tick firing and the assert.
var threshold = repo.PurgeThresholds[0];
var expected = DateTime.UtcNow - TimeSpan.FromDays(30);
Assert.True(
Math.Abs((threshold - expected).TotalMinutes) < 1.0,
$"purge threshold {threshold:o} should be within 1 minute of {expected:o}");
}
// ---------------------------------------------------------------------
// 2. PurgeTick_UsesDefaultRetention_365Days
// ---------------------------------------------------------------------
[Fact]
public void PurgeTick_DefaultRetention_Uses365DayThreshold()
{
var repo = new RecordingRepo();
CreateActor(repo, FastPurgeOptions()); // default 365 days
AwaitAssert(
() => Assert.True(repo.PurgeThresholds.Count >= 1),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
var threshold = repo.PurgeThresholds[0];
var expected = DateTime.UtcNow - TimeSpan.FromDays(365);
Assert.True(
Math.Abs((threshold - expected).TotalMinutes) < 1.0,
$"purge threshold {threshold:o} should be within 1 minute of {expected:o}");
}
// ---------------------------------------------------------------------
// 3. PurgeTick_RepoThrows_ActorStaysAlive_RetriesNextTick (continue-on-error)
// ---------------------------------------------------------------------
[Fact]
public void PurgeTick_PurgeThrows_ActorStaysAlive_RetriesNextTick()
{
var repo = new PurgeThrowingRepo();
CreateActor(repo, FastPurgeOptions());
// The singleton must NOT die on a purge fault — a second tick must still
// arrive (continue-on-error). Two purge calls prove the actor survived
// the first throw and the timer kept ticking.
AwaitAssert(
() => Assert.True(repo.PurgeCallCount >= 2,
$"expected >= 2 purge attempts (actor survived the throw), got {repo.PurgeCallCount}"),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
}
@@ -0,0 +1,300 @@
using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
namespace ZB.MOM.WW.ScadaBridge.SiteCallAudit.Tests;
/// <summary>
/// Reconciliation-tick tests for <see cref="SiteCallAuditActor"/> (#22, Piece A).
/// These exercise the periodic per-site self-heal pull entirely in-memory —
/// fake <see cref="IPullSiteCallsClient"/> + <see cref="ISiteEnumerator"/> + a
/// recording <see cref="ISiteCallAuditRepository"/> — so they run in
/// milliseconds and do NOT depend on a live MSSQL fixture (unlike the
/// MSSQL-backed <see cref="SiteCallAuditActorTests"/>). The actor is built via
/// the internal test ctor that injects all three collaborators; the
/// repo-only test ctor used by the MSSQL tests passes no client/enumerator, so
/// the reconciliation tick is gated off there (see
/// <see cref="TestCtor_RepositoryOnly_DoesNotStartReconciliationTick"/>).
/// </summary>
public class SiteCallAuditReconciliationTests : TestKit
{
private static SiteCall NewRow(
TrackedOperationId id,
string sourceSite,
string status = "Submitted",
DateTime? updatedAtUtc = null)
{
var now = updatedAtUtc ?? DateTime.UtcNow;
return new SiteCall
{
TrackedOperationId = id,
Channel = "ApiOutbound",
Target = "ERP.GetOrder",
SourceSite = sourceSite,
SourceNode = null,
Status = status,
RetryCount = 0,
LastError = null,
HttpStatus = null,
CreatedAtUtc = now,
UpdatedAtUtc = now,
TerminalAtUtc = null,
IngestedAtUtc = now,
};
}
private static SiteCallAuditOptions FastTickOptions(int batchSize = 500) => new()
{
// 100 ms tick keeps each test under a second; AwaitAssert covers
// scheduler jitter so the tick has up to a few seconds to fire.
ReconciliationInterval = TimeSpan.FromMinutes(5),
ReconciliationIntervalOverride = TimeSpan.FromMilliseconds(100),
ReconciliationBatchSize = batchSize,
};
/// <summary>In-memory enumerator returning a static list of sites.</summary>
private sealed class StaticEnumerator : ISiteEnumerator
{
private readonly IReadOnlyList<SiteEntry> _sites;
public StaticEnumerator(params SiteEntry[] sites) => _sites = sites;
public Task<IReadOnlyList<SiteEntry>> EnumerateAsync(CancellationToken ct = default) =>
Task.FromResult(_sites);
}
/// <summary>
/// Scripted pull client — returns the next queued response for the site on
/// each call (looping the last entry once exhausted) and records every
/// invocation so tests can assert call counts + the <c>since</c> cursor.
/// </summary>
private sealed class ScriptedPullClient : IPullSiteCallsClient
{
public List<(string SiteId, DateTime SinceUtc, int BatchSize)> Calls { get; } = new();
private readonly Dictionary<string, Queue<PullSiteCallsResponse>> _scripted = new();
private readonly Dictionary<string, Exception> _throwOnSite = new();
public ScriptedPullClient Script(string siteId, params PullSiteCallsResponse[] responses)
{
_scripted[siteId] = new Queue<PullSiteCallsResponse>(responses);
return this;
}
public ScriptedPullClient ThrowFor(string siteId, Exception ex)
{
_throwOnSite[siteId] = ex;
return this;
}
public Task<PullSiteCallsResponse> PullAsync(
string siteId, DateTime sinceUtc, int batchSize, CancellationToken ct)
{
Calls.Add((siteId, sinceUtc, batchSize));
if (_throwOnSite.TryGetValue(siteId, out var ex))
{
throw ex;
}
if (_scripted.TryGetValue(siteId, out var queue) && queue.Count > 0)
{
return Task.FromResult(queue.Dequeue());
}
return Task.FromResult(
new PullSiteCallsResponse(Array.Empty<SiteCall>(), MoreAvailable: false));
}
}
/// <summary>
/// Recording repository that captures every <see cref="UpsertAsync"/> call
/// (keyed by id, last-write-wins on the captured row). The reconciliation
/// tick only ever calls <see cref="UpsertAsync"/>; the read/KPI members are
/// inert stubs.
/// </summary>
private sealed class RecordingRepo : ISiteCallAuditRepository
{
public Dictionary<TrackedOperationId, SiteCall> Upserted { get; } = new();
public int UpsertCallCount { get; private set; }
public Task UpsertAsync(SiteCall siteCall, CancellationToken ct = default)
{
UpsertCallCount++;
Upserted[siteCall.TrackedOperationId] = siteCall;
return Task.CompletedTask;
}
public Task<SiteCall?> GetAsync(TrackedOperationId id, CancellationToken ct = default) =>
Task.FromResult(Upserted.TryGetValue(id, out var row) ? row : null);
public Task<IReadOnlyList<SiteCall>> QueryAsync(
SiteCallQueryFilter filter, SiteCallPaging paging, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCall>>(Array.Empty<SiteCall>());
public Task<int> PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default) =>
Task.FromResult(0);
public Task<SiteCallKpiSnapshot> ComputeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult(new SiteCallKpiSnapshot(0, 0, 0, 0, null, 0));
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
}
private IActorRef CreateActor(
ISiteEnumerator sites,
IPullSiteCallsClient client,
ISiteCallAuditRepository repo,
SiteCallAuditOptions options) =>
Sys.ActorOf(Props.Create(() => new SiteCallAuditActor(
repo,
sites,
client,
NullLogger<SiteCallAuditActor>.Instance,
options)));
// ---------------------------------------------------------------------
// 1. AbsentRow_PulledFromSite_IsUpserted
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_AbsentRow_IsUpsertedFromSitePull()
{
var siteId = "siteA";
var id = TrackedOperationId.New();
var row = NewRow(id, sourceSite: siteId, status: "Parked");
var sites = new StaticEnumerator(new SiteEntry(siteId, "http://siteA:8083"));
var client = new ScriptedPullClient().Script(siteId,
new PullSiteCallsResponse(new[] { row }, MoreAvailable: false));
var repo = new RecordingRepo();
CreateActor(sites, client, repo, FastTickOptions());
AwaitAssert(
() =>
{
Assert.True(repo.Upserted.ContainsKey(id),
"reconciliation tick should upsert the row present at the site but absent centrally");
Assert.Equal("Parked", repo.Upserted[id].Status);
Assert.Equal(siteId, repo.Upserted[id].SourceSite);
},
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
// ---------------------------------------------------------------------
// 2. Cursor_Advances_ToMaxUpdatedAtUtc_NoRePullOfOldRows
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_SecondTick_AdvancesCursorPastAlreadyPulledRows()
{
var siteId = "siteA";
var t1 = new DateTime(2026, 5, 20, 10, 0, 0, DateTimeKind.Utc);
var t2 = new DateTime(2026, 5, 20, 10, 1, 0, DateTimeKind.Utc);
var t3 = new DateTime(2026, 5, 20, 10, 2, 0, DateTimeKind.Utc);
var r1 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t1);
var r2 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t2);
var r3 = NewRow(TrackedOperationId.New(), siteId, updatedAtUtc: t3);
var sites = new StaticEnumerator(new SiteEntry(siteId, "http://siteA:8083"));
// First pull returns three rows (max UpdatedAtUtc = t3); subsequent
// pulls return empty. The second pull's `since` must be t3, proving the
// cursor advanced and old rows are not re-pulled from the start.
var client = new ScriptedPullClient().Script(siteId,
new PullSiteCallsResponse(new[] { r1, r2, r3 }, MoreAvailable: false));
var repo = new RecordingRepo();
CreateActor(sites, client, repo, FastTickOptions());
AwaitAssert(
() => Assert.True(client.Calls.Count >= 2,
$"need at least 2 pulls to assert cursor advancement, got {client.Calls.Count}"),
duration: TimeSpan.FromSeconds(5),
interval: TimeSpan.FromMilliseconds(50));
Assert.Equal(DateTime.MinValue, client.Calls[0].SinceUtc);
Assert.Equal(t3, client.Calls[1].SinceUtc);
// The batch size flows through from options.
Assert.Equal(500, client.Calls[0].BatchSize);
}
// ---------------------------------------------------------------------
// 3. OneSiteThrows_OtherSitesStillProcessed (failure isolation)
// ---------------------------------------------------------------------
[Fact]
public void ReconciliationTick_OneSiteThrows_OtherSitesStillReconciled()
{
var siteB = "siteB";
var bId = TrackedOperationId.New();
var bRow = NewRow(bId, sourceSite: siteB, status: "Delivered");
var sites = new StaticEnumerator(
new SiteEntry("siteA", "http://siteA:8083"),
new SiteEntry(siteB, "http://siteB:8083"));
var client = new ScriptedPullClient()
.ThrowFor("siteA", new InvalidOperationException("simulated transport failure"))
.Script(siteB, new PullSiteCallsResponse(new[] { bRow }, MoreAvailable: false));
var repo = new RecordingRepo();
CreateActor(sites, client, repo, FastTickOptions());
AwaitAssert(
() =>
{
// siteA was attempted (and threw) yet siteB's row still landed —
// one offline site must not sink the rest of the tick.
Assert.Contains(client.Calls, c => c.SiteId == "siteA");
Assert.True(repo.Upserted.ContainsKey(bId),
"siteB must be reconciled even though siteA threw");
},
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
// ---------------------------------------------------------------------
// 4. RepoOnly test ctor does NOT start the reconciliation tick
// ---------------------------------------------------------------------
[Fact]
public void TestCtor_RepositoryOnly_DoesNotStartReconciliationTick()
{
// The repo-only test ctor (used by the MSSQL-backed actor tests) injects
// no client/enumerator, so the tick must be gated OFF — otherwise those
// tests would fire phantom pulls. Build the actor via that ctor and
// confirm no pull ever happens. We can't observe a non-event directly,
// so we share a ScriptedPullClient with an isolated actor that DOES run
// the tick to bound the wait, then assert the repo-only actor's client
// (a separate instance) recorded nothing.
var repo = new RecordingRepo();
Sys.ActorOf(Props.Create(() => new SiteCallAuditActor(
repo,
NullLogger<SiteCallAuditActor>.Instance,
FastTickOptions())));
// Run a parallel actor with the full reconciliation ctor and a fast
// tick; once IT has pulled we know enough wall-clock elapsed that the
// repo-only actor would have ticked too, had it been wired.
var liveClient = new ScriptedPullClient();
var liveRepo = new RecordingRepo();
CreateActor(
new StaticEnumerator(new SiteEntry("siteX", "http://siteX:8083")),
liveClient,
liveRepo,
FastTickOptions());
AwaitAssert(
() => Assert.True(liveClient.Calls.Count >= 1),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
// The repo-only actor never reconciles: it has no client to pull with,
// so it upserts nothing on its own.
Assert.Equal(0, repo.UpsertCallCount);
}
}
@@ -7,6 +7,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.TestSupport;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors;
@@ -877,6 +878,112 @@ public class AlarmActorTests : TestKit, IDisposable
Assert.Equal(AlarmLevel.HighHigh, escalated.Level);
}
// ── M1.5: site event log `alarm` category ──────────────────────────────
[Fact]
public void AlarmActor_Raise_EmitsAlarmSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var sp = new SingleServiceProvider(siteLog);
var alarmConfig = new ResolvedAlarm
{
CanonicalName = "HighTemp",
TriggerType = "ValueMatch",
TriggerConfiguration = "{\"attributeName\":\"Status\",\"matchValue\":\"Critical\"}",
PriorityLevel = 800
};
var instanceProbe = CreateTestProbe();
var alarm = ActorOf(Props.Create(() => new AlarmActor(
"HighTemp", "Pump1", instanceProbe.Ref, alarmConfig,
null, _sharedLibrary, _options,
NullLogger<AlarmActor>.Instance, null, null, null, sp)));
alarm.Tell(new AttributeValueChanged(
"Pump1", "Status", "Status", "Critical", "Good", DateTimeOffset.UtcNow));
instanceProbe.ExpectMsg<AlarmStateChanged>(TimeSpan.FromSeconds(5));
// Background fire-and-forget; allow it to land.
AwaitAssert(() =>
{
var rows = siteLog.OfType("alarm");
Assert.Single(rows);
var row = rows[0];
Assert.Equal("Error", row.Severity); // priority 800 → Error
Assert.Equal("Pump1", row.InstanceId);
Assert.Equal("AlarmActor:HighTemp", row.Source);
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void AlarmActor_RaiseLowPriority_EmitsWarningAlarmSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var sp = new SingleServiceProvider(siteLog);
var alarmConfig = new ResolvedAlarm
{
CanonicalName = "MinorTemp",
TriggerType = "ValueMatch",
TriggerConfiguration = "{\"attributeName\":\"Status\",\"matchValue\":\"Warn\"}",
PriorityLevel = 100
};
var instanceProbe = CreateTestProbe();
var alarm = ActorOf(Props.Create(() => new AlarmActor(
"MinorTemp", "Pump1", instanceProbe.Ref, alarmConfig,
null, _sharedLibrary, _options,
NullLogger<AlarmActor>.Instance, null, null, null, sp)));
alarm.Tell(new AttributeValueChanged(
"Pump1", "Status", "Status", "Warn", "Good", DateTimeOffset.UtcNow));
instanceProbe.ExpectMsg<AlarmStateChanged>(TimeSpan.FromSeconds(5));
AwaitAssert(() =>
{
var rows = siteLog.OfType("alarm");
Assert.Single(rows);
Assert.Equal("Warning", rows[0].Severity); // priority 100 → Warning
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void AlarmActor_Clear_EmitsInfoAlarmSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var sp = new SingleServiceProvider(siteLog);
var alarmConfig = new ResolvedAlarm
{
CanonicalName = "HighTemp",
TriggerType = "ValueMatch",
TriggerConfiguration = "{\"attributeName\":\"Status\",\"matchValue\":\"Critical\"}",
PriorityLevel = 800
};
var instanceProbe = CreateTestProbe();
var alarm = ActorOf(Props.Create(() => new AlarmActor(
"HighTemp", "Pump1", instanceProbe.Ref, alarmConfig,
null, _sharedLibrary, _options,
NullLogger<AlarmActor>.Instance, null, null, null, sp)));
alarm.Tell(new AttributeValueChanged(
"Pump1", "Status", "Status", "Critical", "Good", DateTimeOffset.UtcNow));
instanceProbe.ExpectMsg<AlarmStateChanged>(TimeSpan.FromSeconds(5));
alarm.Tell(new AttributeValueChanged(
"Pump1", "Status", "Status", "Normal", "Critical", DateTimeOffset.UtcNow));
instanceProbe.ExpectMsg<AlarmStateChanged>(TimeSpan.FromSeconds(5));
AwaitAssert(() =>
{
var rows = siteLog.OfType("alarm");
Assert.Equal(2, rows.Count); // raise + clear
Assert.Equal("Error", rows[0].Severity);
Assert.Equal("Info", rows[1].Severity); // clear → Info
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void AlarmActor_MalformedTriggerConfig_DoesNotCrash()
{
@@ -10,6 +10,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.TestSupport;
using System.Text.Json;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors;
@@ -44,7 +45,8 @@ public class DeploymentManagerActorTests : TestKit, IDisposable
try { File.Delete(_dbFile); } catch { /* cleanup */ }
}
private IActorRef CreateDeploymentManager(SiteRuntimeOptions? options = null)
private IActorRef CreateDeploymentManager(
SiteRuntimeOptions? options = null, IServiceProvider? serviceProvider = null)
{
options ??= new SiteRuntimeOptions();
return ActorOf(Props.Create(() => new DeploymentManagerActor(
@@ -53,7 +55,12 @@ public class DeploymentManagerActorTests : TestKit, IDisposable
_sharedScriptLibrary,
null, // no stream manager in tests
options,
NullLogger<DeploymentManagerActor>.Instance)));
NullLogger<DeploymentManagerActor>.Instance,
null,
null,
null,
serviceProvider,
null)));
}
private static string MakeConfigJson(string instanceName)
@@ -171,6 +178,70 @@ public class DeploymentManagerActorTests : TestKit, IDisposable
Assert.Equal("NewPump", response.InstanceUniqueName);
}
// ── M1.6: site event log `deployment` category ─────────────────────────
[Fact]
public async Task DeploymentManager_DeploySuccess_EmitsDeploymentSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var actor = CreateDeploymentManager(serviceProvider: new SingleServiceProvider(siteLog));
await Task.Delay(500); // wait for empty startup
actor.Tell(new DeployInstanceCommand(
"dep-evt-1", "AuditedPump", "sha256:xyz",
MakeConfigJson("AuditedPump"), "admin", DateTimeOffset.UtcNow));
var response = ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
Assert.Equal(DeploymentStatus.Success, response.Status);
AwaitAssert(() =>
{
var rows = siteLog.OfType("deployment");
Assert.Contains(rows, r =>
r.Severity == "Info" &&
r.InstanceId == "AuditedPump" &&
r.Source == "DeploymentManagerActor" &&
r.Message.Contains("deploy", StringComparison.OrdinalIgnoreCase));
}, TimeSpan.FromSeconds(2));
}
[Fact]
public async Task DeploymentManager_DisableEnableDelete_EmitDeploymentSiteEvents()
{
var siteLog = new FakeSiteEventLogger();
var actor = CreateDeploymentManager(serviceProvider: new SingleServiceProvider(siteLog));
await Task.Delay(500);
actor.Tell(new DeployInstanceCommand(
"dep-evt-2", "EvtPump", "sha256:abc",
MakeConfigJson("EvtPump"), "admin", DateTimeOffset.UtcNow));
ExpectMsg<DeploymentStatusResponse>(TimeSpan.FromSeconds(5));
await Task.Delay(1000);
// The deployment site events are emitted fire-and-forget off the actor
// thread (LogDeploymentEvent runs in a ContinueWith continuation), so
// poll for each event with AwaitAssert rather than a bare Task.Delay —
// a fixed sleep is racy under CI load.
actor.Tell(new DisableInstanceCommand("cmd-de1", "EvtPump", DateTimeOffset.UtcNow));
Assert.True(ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(5)).Success);
AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"),
r => r.Message.Contains("disabled", StringComparison.OrdinalIgnoreCase)),
TimeSpan.FromSeconds(2));
actor.Tell(new EnableInstanceCommand("cmd-en1", "EvtPump", DateTimeOffset.UtcNow));
Assert.True(ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(5)).Success);
AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"),
r => r.Message.Contains("enabled", StringComparison.OrdinalIgnoreCase)),
TimeSpan.FromSeconds(2));
actor.Tell(new DeleteInstanceCommand("cmd-del-evt", "EvtPump", DateTimeOffset.UtcNow));
Assert.True(ExpectMsg<InstanceLifecycleResponse>(TimeSpan.FromSeconds(5)).Success);
AwaitAssert(() => Assert.Contains(siteLog.OfType("deployment"),
r => r.Message.Contains("deleted", StringComparison.OrdinalIgnoreCase)),
TimeSpan.FromSeconds(2));
}
[Fact]
public async Task DeploymentManager_Lifecycle_DisableEnableDelete()
{
@@ -8,6 +8,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Scripts;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.TestSupport;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors;
@@ -71,6 +72,71 @@ public class ExecutionActorTests : TestKit, IDisposable
ExpectTerminated(exec, TimeSpan.FromSeconds(5));
}
// ── M1.8: site event log `script` started/completed ────────────────────
[Fact]
public void ScriptExecutionActor_Success_EmitsScriptStartedAndCompletedInfoEvents()
{
var compiled = CompileScript("return 7 * 6;");
var replyTo = CreateTestProbe();
var instanceActor = CreateTestProbe();
var siteLog = new FakeSiteEventLogger();
var exec = ActorOf(Props.Create(() => new ScriptExecutionActor(
"Answer", "Inst1", compiled, null, 0,
instanceActor.Ref, _sharedLibrary, Options(),
replyTo.Ref, "corr-evt-1", NullLogger.Instance,
ScriptScope.Root, null, new SingleServiceProvider(siteLog))));
Watch(exec);
replyTo.ExpectMsg<ScriptCallResult>(TimeSpan.FromSeconds(10));
ExpectTerminated(exec, TimeSpan.FromSeconds(5));
AwaitAssert(() =>
{
var rows = siteLog.OfType("script");
// started + completed, both Info, in order.
Assert.Equal(2, rows.Count);
Assert.All(rows, r =>
{
Assert.Equal("Info", r.Severity);
Assert.Equal("Inst1", r.InstanceId);
Assert.Equal("ScriptActor:Answer", r.Source);
});
Assert.Contains("started", rows[0].Message, StringComparison.OrdinalIgnoreCase);
Assert.Contains("completed", rows[1].Message, StringComparison.OrdinalIgnoreCase);
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void ScriptExecutionActor_Failure_EmitsStartedInfoThenErrorEvent()
{
var compiled = CompileScript("throw new InvalidOperationException(\"boom\");");
var replyTo = CreateTestProbe();
var instanceActor = CreateTestProbe();
var siteLog = new FakeSiteEventLogger();
var exec = ActorOf(Props.Create(() => new ScriptExecutionActor(
"Bad", "Inst1", compiled, null, 0,
instanceActor.Ref, _sharedLibrary, Options(),
replyTo.Ref, "corr-evt-2", NullLogger.Instance,
ScriptScope.Root, null, new SingleServiceProvider(siteLog))));
Watch(exec);
replyTo.ExpectMsg<ScriptCallResult>(TimeSpan.FromSeconds(10));
ExpectTerminated(exec, TimeSpan.FromSeconds(5));
AwaitAssert(() =>
{
var rows = siteLog.OfType("script");
// started (Info) + failed (Error) — no completed.
Assert.Equal(2, rows.Count);
Assert.Equal("Info", rows[0].Severity);
Assert.Contains("started", rows[0].Message, StringComparison.OrdinalIgnoreCase);
Assert.Equal("Error", rows[1].Severity);
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void ScriptExecutionActor_ScriptThrows_RepliesFailureAndStops()
{
@@ -10,6 +10,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.TestSupport;
using System.Text.Json;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors;
@@ -58,6 +59,82 @@ public class InstanceActorTests : TestKit, IDisposable
try { File.Delete(_dbFile); } catch { /* cleanup */ }
}
// ── M1.6: site event log `instance_lifecycle` category ──────────────────
[Fact]
public void InstanceActor_Start_EmitsInstanceLifecycleSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var config = new FlattenedConfiguration
{
InstanceUniqueName = "LifecyclePump",
Attributes = [new ResolvedAttribute { CanonicalName = "T", Value = "1", DataType = "Int32" }]
};
ActorOf(Props.Create(() => new InstanceActor(
"LifecyclePump",
JsonSerializer.Serialize(config),
_storage,
_compilationService,
_sharedScriptLibrary,
null,
_options,
NullLogger<InstanceActor>.Instance,
null,
null,
new SingleServiceProvider(siteLog))));
AwaitAssert(() =>
{
var rows = siteLog.OfType("instance_lifecycle");
Assert.Contains(rows, r =>
r.Severity == "Info" &&
r.InstanceId == "LifecyclePump" &&
r.Source == "InstanceActor:LifecyclePump" &&
r.Message.Contains("started", StringComparison.OrdinalIgnoreCase));
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void InstanceActor_Stop_EmitsInstanceLifecycleSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var config = new FlattenedConfiguration
{
InstanceUniqueName = "StoppedPump",
Attributes = [new ResolvedAttribute { CanonicalName = "T", Value = "1", DataType = "Int32" }]
};
var actor = ActorOf(Props.Create(() => new InstanceActor(
"StoppedPump",
JsonSerializer.Serialize(config),
_storage,
_compilationService,
_sharedScriptLibrary,
null,
_options,
NullLogger<InstanceActor>.Instance,
null,
null,
new SingleServiceProvider(siteLog))));
// Let PreStart land its started event, then stop the actor.
AwaitAssert(() => Assert.NotEmpty(siteLog.OfType("instance_lifecycle")),
TimeSpan.FromSeconds(2));
Watch(actor);
actor.Tell(PoisonPill.Instance);
ExpectTerminated(actor, TimeSpan.FromSeconds(5));
AwaitAssert(() =>
{
var rows = siteLog.OfType("instance_lifecycle");
Assert.Contains(rows, r =>
r.Severity == "Info" &&
r.InstanceId == "StoppedPump" &&
r.Message.Contains("stopped", StringComparison.OrdinalIgnoreCase));
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void InstanceActor_LoadsAttributesFromConfig()
{
@@ -1,3 +1,4 @@
using System.Text.Json;
using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.Logging.Abstractions;
@@ -9,6 +10,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.SiteRuntime;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.TestSupport;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Actors;
@@ -41,9 +43,10 @@ public class NativeAlarmActorTests : TestKit, IDisposable
new(sourceRef, "T01", "AnalogLimit.Hi", kind, condition,
"Process", "hi", "hi", "", "", null, time ?? DateTimeOffset.UtcNow, "92", "90");
private IActorRef Spawn(IActorRef instanceActor, IActorRef dclManager) =>
private IActorRef Spawn(IActorRef instanceActor, IActorRef dclManager, IServiceProvider? serviceProvider = null) =>
ActorOf(Props.Create(() => new NativeAlarmActor(
Source(), "inst", instanceActor, dclManager, _storage, _options, NullLogger<NativeAlarmActor>.Instance)));
Source(), "inst", instanceActor, dclManager, _storage, _options,
NullLogger<NativeAlarmActor>.Instance, AlarmKind.NativeOpcUa, serviceProvider)));
[Fact]
public void SubscribeOnStart_SendsRequestForSourceBinding()
@@ -121,6 +124,158 @@ public class NativeAlarmActorTests : TestKit, IDisposable
instance.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
}
// ── M1.5: site event log `alarm` category ──────────────────────────────
[Fact]
public void Raise_EmitsAlarmSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var instance = CreateTestProbe();
var dcl = CreateTestProbe();
var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
dcl.ExpectMsg<SubscribeAlarmsRequest>();
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.Raise,
new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800))));
instance.ExpectMsg<AlarmStateChanged>(m => m.State == AlarmState.Active);
AwaitAssert(() =>
{
var rows = siteLog.OfType("alarm");
Assert.Single(rows);
var row = rows[0];
Assert.Equal("Error", row.Severity); // severity 800 → Error
Assert.Equal("inst", row.InstanceId);
Assert.Equal("NativeAlarmActor:Pressure", row.Source);
}, TimeSpan.FromSeconds(2));
}
[Fact]
public void Clear_EmitsInfoAlarmSiteEvent()
{
var siteLog = new FakeSiteEventLogger();
var instance = CreateTestProbe();
var dcl = CreateTestProbe();
var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
dcl.ExpectMsg<SubscribeAlarmsRequest>();
var t0 = DateTimeOffset.UtcNow;
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.Raise,
new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800), t0)));
instance.ExpectMsg<AlarmStateChanged>(m => m.State == AlarmState.Active);
// Clear (inactive but not yet acked → stays mirrored, return-to-normal emit).
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.Clear,
new AlarmConditionState(false, false, null, AlarmShelveState.Unshelved, false, 0), t0.AddSeconds(5))));
instance.ExpectMsg<AlarmStateChanged>(m => m.State == AlarmState.Normal);
AwaitAssert(() =>
{
var rows = siteLog.OfType("alarm");
Assert.Equal(2, rows.Count); // raise + clear
Assert.Equal("Error", rows[0].Severity);
Assert.Equal("Info", rows[1].Severity); // return-to-normal → Info
}, TimeSpan.FromSeconds(2));
}
[Fact]
public async Task Rehydration_DoesNotEmitSiteEvent()
{
// Pre-populate SQLite with an active condition so the actor rehydrates
// it on PreStart. Rehydration replays last-known state — it is NOT a
// live transition, so it must surface upward (for the DebugView) but
// must NOT re-log an `alarm` operational event.
var condition = new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800);
await _storage.UpsertNativeAlarmAsync(
"inst", "Pressure", "T01.Hi",
JsonSerializer.Serialize(condition), DateTimeOffset.UtcNow);
var siteLog = new FakeSiteEventLogger();
var instance = CreateTestProbe();
var dcl = CreateTestProbe();
Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
// The rehydrated condition is surfaced upward...
var emitted = instance.ExpectMsg<AlarmStateChanged>(TimeSpan.FromSeconds(2));
Assert.Equal("T01.Hi", emitted.SourceReference);
Assert.Equal(AlarmState.Active, emitted.State);
dcl.ExpectMsg<SubscribeAlarmsRequest>();
// ...but no `alarm` operational event is logged for it.
AwaitAssert(
() => Assert.Empty(siteLog.OfType("alarm")),
TimeSpan.FromSeconds(1));
}
[Fact]
public void SnapshotSwap_ExistingActiveCondition_DoesNotReEmit()
{
var siteLog = new FakeSiteEventLogger();
var instance = CreateTestProbe();
var dcl = CreateTestProbe();
var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
dcl.ExpectMsg<SubscribeAlarmsRequest>();
// Live raise — the one and only `alarm` event we expect.
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.Raise,
new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800))));
instance.ExpectMsg<AlarmStateChanged>(m => m.State == AlarmState.Active);
AwaitAssert(() => Assert.Single(siteLog.OfType("alarm")), TimeSpan.FromSeconds(2));
// A reconnect snapshot that RE-INCLUDES the same still-active condition is
// a re-sync, not a live transition. It must NOT re-log a second `alarm`
// event (regression for the spurious-reconnect-event bug).
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.Snapshot,
new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800))));
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.SnapshotComplete,
new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800))));
// The snapshot still surfaces the condition upward (DebugView re-sync)...
instance.ExpectMsg<AlarmStateChanged>(m => m.SourceReference == "T01.Hi" && m.State == AlarmState.Active);
// ...but the `alarm` event count stays at exactly 1 — no re-emit.
Thread.Sleep(200); // give any spurious fire-and-forget log time to land
Assert.Single(siteLog.OfType("alarm"));
}
[Fact]
public void Acknowledge_EmitsInfoAlarmSiteEventMentioningAcknowledged()
{
var siteLog = new FakeSiteEventLogger();
var instance = CreateTestProbe();
var dcl = CreateTestProbe();
var actor = Spawn(instance.Ref, dcl.Ref, new SingleServiceProvider(siteLog));
dcl.ExpectMsg<SubscribeAlarmsRequest>();
var t0 = DateTimeOffset.UtcNow;
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.Raise,
new AlarmConditionState(true, false, null, AlarmShelveState.Unshelved, false, 800), t0)));
instance.ExpectMsg<AlarmStateChanged>(m => m.State == AlarmState.Active);
// Operator acknowledges the still-active condition. The Acknowledge
// branch of LogAlarmEvent logs Info and mentions "acknowledged".
actor.Tell(new NativeAlarmTransitionUpdate("Opc", Transition(
"T01.Hi", AlarmTransitionKind.Acknowledge,
new AlarmConditionState(true, true, null, AlarmShelveState.Unshelved, false, 800), t0.AddSeconds(5))));
instance.ExpectMsg<AlarmStateChanged>();
AwaitAssert(() =>
{
var rows = siteLog.OfType("alarm");
Assert.Equal(2, rows.Count); // raise + acknowledge
var ack = rows[1];
Assert.Equal("Info", ack.Severity);
Assert.Contains("acknowledged", ack.Message, StringComparison.OrdinalIgnoreCase);
}, TimeSpan.FromSeconds(2));
}
void IDisposable.Dispose()
{
Shutdown();
@@ -0,0 +1,83 @@
using System.Collections.Concurrent;
using Microsoft.Extensions.DependencyInjection;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.TestSupport;
/// <summary>
/// M1 Site Event Logging categories: a capturing fake <see cref="ISiteEventLogger"/>
/// used by the actor tests to assert that the right operational events are emitted.
/// Thread-safe — the actors fire-and-forget <c>LogEventAsync</c> from background
/// tasks, so multiple captures can land concurrently.
/// </summary>
public sealed class FakeSiteEventLogger : ISiteEventLogger
{
/// <summary>One captured <see cref="ISiteEventLogger.LogEventAsync"/> invocation.</summary>
public sealed record Entry(
string EventType,
string Severity,
string? InstanceId,
string Source,
string Message,
string? Details);
private readonly ConcurrentQueue<Entry> _entries = new();
/// <summary>All captured events, in arrival order.</summary>
public IReadOnlyList<Entry> Entries => _entries.ToArray();
/// <summary>Captured events filtered to a single category.</summary>
public IReadOnlyList<Entry> OfType(string eventType) =>
_entries.Where(e => e.EventType == eventType).ToArray();
/// <inheritdoc />
public Task LogEventAsync(
string eventType,
string severity,
string? instanceId,
string source,
string message,
string? details = null)
{
_entries.Enqueue(new Entry(eventType, severity, instanceId, source, message, details));
return Task.CompletedTask;
}
/// <inheritdoc />
public long FailedWriteCount => 0;
}
/// <summary>
/// Minimal <see cref="IServiceProvider"/> that resolves a single
/// <see cref="ISiteEventLogger"/> — enough for the actors' optional
/// <c>_serviceProvider?.GetService&lt;ISiteEventLogger&gt;()</c> resolution
/// without pulling a full DI container into the actor tests.
/// <para>
/// Also serves <see cref="IServiceScopeFactory"/> (returning a scope that just
/// re-exposes this provider) so callers that do
/// <c>serviceProvider.CreateScope()</c> — e.g. <c>ScriptExecutionActor</c> —
/// don't throw before they reach the logging hot path.
/// </para>
/// </summary>
public sealed class SingleServiceProvider(ISiteEventLogger logger)
: IServiceProvider, IServiceScopeFactory, IServiceScope
{
private readonly ISiteEventLogger _logger = logger;
/// <inheritdoc />
public object? GetService(Type serviceType)
{
if (serviceType == typeof(ISiteEventLogger)) return _logger;
if (serviceType == typeof(IServiceScopeFactory)) return this;
return null;
}
/// <inheritdoc />
public IServiceScope CreateScope() => this;
/// <inheritdoc />
public IServiceProvider ServiceProvider => this;
/// <inheritdoc />
public void Dispose() { }
}
@@ -439,6 +439,138 @@ public class OperationTrackingStoreTests
Assert.NotNull(await store.GetStatusAsync(cId)); // kept (non-terminal)
}
// ── Site Call Audit #22: ReadChangedSinceAsync (reconciliation pull) ───
[Fact]
public async Task ReadChangedSinceAsync_ReturnsRowsAtOrAfterCursor_OldestFirst()
{
var (store, dataSource) = CreateStore(nameof(ReadChangedSinceAsync_ReturnsRowsAtOrAfterCursor_OldestFirst));
await using var _store = store;
// Three rows with distinct UpdatedAtUtc, written out of chronological
// order to prove the read sorts by UpdatedAtUtc ascending.
var older = TrackedOperationId.New();
var middle = TrackedOperationId.New();
var newer = TrackedOperationId.New();
await store.RecordEnqueueAsync(older, nameof(AuditKind.ApiCallCached), "ERP.A", null, null, "node-a");
await store.RecordEnqueueAsync(middle, nameof(AuditKind.DbWriteCached), "DB.B", null, null, "node-b");
await store.RecordEnqueueAsync(newer, nameof(AuditKind.ApiCallCached), "ERP.C", null, null, null);
// Backdate UpdatedAtUtc so the ordering is deterministic and a cursor
// can be placed cleanly between rows. (Enqueue stamps DateTime.UtcNow;
// we cannot inject the clock, so set the timestamps directly.)
var t0 = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc);
SetUpdatedAt(dataSource, older, t0);
SetUpdatedAt(dataSource, middle, t0.AddMinutes(10));
SetUpdatedAt(dataSource, newer, t0.AddMinutes(20));
// Cursor at the middle row's UpdatedAtUtc: inclusive lower bound, so
// middle + newer come back, older is excluded.
var result = await store.ReadChangedSinceAsync(t0.AddMinutes(10), batchSize: 100, CancellationToken.None);
Assert.Equal(2, result.Count);
Assert.Equal(middle, result[0].TrackedOperationId);
Assert.Equal(newer, result[1].TrackedOperationId);
Assert.True(result[0].UpdatedAtUtc <= result[1].UpdatedAtUtc);
}
[Fact]
public async Task ReadChangedSinceAsync_FromMinValue_ReturnsAllRows()
{
var (store, _) = CreateStore(nameof(ReadChangedSinceAsync_FromMinValue_ReturnsAllRows));
await using var _store = store;
await store.RecordEnqueueAsync(TrackedOperationId.New(), nameof(AuditKind.ApiCallCached), "A", null, null, null);
await store.RecordEnqueueAsync(TrackedOperationId.New(), nameof(AuditKind.ApiCallCached), "B", null, null, null);
var result = await store.ReadChangedSinceAsync(DateTime.MinValue, batchSize: 100, CancellationToken.None);
Assert.Equal(2, result.Count);
}
[Fact]
public async Task ReadChangedSinceAsync_IsBatchCapped()
{
var (store, dataSource) = CreateStore(nameof(ReadChangedSinceAsync_IsBatchCapped));
await using var _store = store;
var ids = new List<TrackedOperationId>();
var t0 = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc);
for (var i = 0; i < 5; i++)
{
var id = TrackedOperationId.New();
ids.Add(id);
await store.RecordEnqueueAsync(id, nameof(AuditKind.ApiCallCached), $"T{i}", null, null, null);
SetUpdatedAt(dataSource, id, t0.AddMinutes(i));
}
var result = await store.ReadChangedSinceAsync(DateTime.MinValue, batchSize: 3, CancellationToken.None);
// Capped to 3 — and the cap takes the OLDEST 3 (asc order) so the
// caller can advance the cursor monotonically across follow-up pulls.
Assert.Equal(3, result.Count);
Assert.Equal(ids[0], result[0].TrackedOperationId);
Assert.Equal(ids[1], result[1].TrackedOperationId);
Assert.Equal(ids[2], result[2].TrackedOperationId);
}
[Fact]
public async Task ReadChangedSinceAsync_MapsTrackingRowOntoSiteCallOperational()
{
var (store, _) = CreateStore(nameof(ReadChangedSinceAsync_MapsTrackingRowOntoSiteCallOperational));
await using var _store = store;
var apiId = TrackedOperationId.New();
var dbId = TrackedOperationId.New();
await store.RecordEnqueueAsync(apiId, nameof(AuditKind.ApiCallCached), "ERP.GetOrder", "inst-1", "ScriptActor:OnTick", "node-a");
await store.RecordEnqueueAsync(dbId, nameof(AuditKind.DbWriteCached), "Historian.Write", null, null, "node-b");
await store.RecordAttemptAsync(apiId, nameof(AuditStatus.Attempted), 2, "HTTP 503", 503);
await store.RecordTerminalAsync(dbId, nameof(AuditStatus.Parked), "max retries", null);
var result = await store.ReadChangedSinceAsync(DateTime.MinValue, batchSize: 100, CancellationToken.None);
var api = result.Single(r => r.TrackedOperationId == apiId);
var db = result.Single(r => r.TrackedOperationId == dbId);
// Kind → Channel projection.
Assert.Equal("ApiOutbound", api.Channel);
Assert.Equal("DbOutbound", db.Channel);
// TargetSummary → Target; SourceNode carried verbatim.
Assert.Equal("ERP.GetOrder", api.Target);
Assert.Equal("node-a", api.SourceNode);
Assert.Equal("node-b", db.SourceNode);
// Status / RetryCount / LastError / HttpStatus carried through.
Assert.Equal(nameof(AuditStatus.Attempted), api.Status);
Assert.Equal(2, api.RetryCount);
Assert.Equal("HTTP 503", api.LastError);
Assert.Equal(503, api.HttpStatus);
// SourceSite is left empty by the store (the site id is not a tracking
// column); the central client re-stamps it from the dialed siteId.
Assert.Equal(string.Empty, api.SourceSite);
// Terminal row carries TerminalAtUtc (UTC kind); active row leaves it null.
Assert.Null(api.TerminalAtUtc);
Assert.NotNull(db.TerminalAtUtc);
Assert.Equal(DateTimeKind.Utc, db.TerminalAtUtc!.Value.Kind);
// Timestamps round-trip as UTC.
Assert.Equal(DateTimeKind.Utc, api.CreatedAtUtc.Kind);
Assert.Equal(DateTimeKind.Utc, api.UpdatedAtUtc.Kind);
}
/// <summary>Directly sets a row's UpdatedAtUtc so cursor/ordering tests are deterministic.</summary>
private static void SetUpdatedAt(string dataSource, TrackedOperationId id, DateTime updatedAtUtc)
{
using var connection = OpenVerifierConnection(dataSource);
using var cmd = connection.CreateCommand();
cmd.CommandText = "UPDATE OperationTracking SET UpdatedAtUtc = $u WHERE TrackedOperationId = $id;";
cmd.Parameters.AddWithValue("$u", updatedAtUtc.ToString("o", System.Globalization.CultureInfo.InvariantCulture));
cmd.Parameters.AddWithValue("$id", id.ToString());
cmd.ExecuteNonQuery();
}
// ── SiteRuntime-024: read/write split + sync-safe Dispose ──────────────
[Fact]
@@ -0,0 +1,168 @@
using System.Collections.Concurrent;
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
namespace ZB.MOM.WW.ScadaBridge.StoreAndForward.Tests;
/// <summary>
/// M1.7: the StoreAndForwardService emits site operational events for its own
/// buffer/park activity — <c>store_and_forward</c> for cached-call categories
/// (ExternalSystem / CachedDbWrite) and <c>notification</c> for the site's
/// notification forward-to-central path. Emission rides the existing
/// <c>OnActivity</c> hook and is best-effort (a failing logger never affects
/// delivery bookkeeping).
/// </summary>
public class StoreAndForwardSiteEventTests : IAsyncLifetime, IDisposable
{
private sealed record Entry(string EventType, string Severity, string Source, string Message);
private sealed class FakeSiteEventLogger : ISiteEventLogger
{
private readonly ConcurrentQueue<Entry> _entries = new();
public IReadOnlyList<Entry> Entries => _entries.ToArray();
public IReadOnlyList<Entry> OfType(string t) => _entries.Where(e => e.EventType == t).ToArray();
public Task LogEventAsync(string eventType, string severity, string? instanceId,
string source, string message, string? details = null)
{
_entries.Enqueue(new Entry(eventType, severity, source, message));
return Task.CompletedTask;
}
public long FailedWriteCount => 0;
}
private readonly SqliteConnection _keepAlive;
private readonly StoreAndForwardStorage _storage;
private readonly StoreAndForwardOptions _options;
private readonly FakeSiteEventLogger _siteLog = new();
private readonly StoreAndForwardService _service;
public StoreAndForwardSiteEventTests()
{
var dbName = $"SiteEvt_{Guid.NewGuid():N}";
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
_keepAlive = new SqliteConnection(connStr);
_keepAlive.Open();
_storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
_options = new StoreAndForwardOptions
{
DefaultRetryInterval = TimeSpan.Zero,
DefaultMaxRetries = 1,
RetryTimerInterval = TimeSpan.FromMinutes(10)
};
_service = new StoreAndForwardService(
_storage, _options, NullLogger<StoreAndForwardService>.Instance,
replication: null, cachedCallObserver: null, siteId: "site-a",
siteEventLogger: _siteLog);
}
public async Task InitializeAsync() => await _storage.InitializeAsync();
public Task DisposeAsync() => Task.CompletedTask;
public void Dispose() => _keepAlive.Dispose();
[Fact]
public async Task BufferForRetry_ExternalSystem_EmitsStoreAndForwardSiteEvent()
{
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => throw new HttpRequestException("transient"));
await _service.EnqueueAsync(StoreAndForwardCategory.ExternalSystem, "api.example.com", """{}""", "Pump1");
var rows = _siteLog.OfType("store_and_forward");
Assert.Contains(rows, r => r.Severity == "Warning" &&
r.Source == "StoreAndForwardService" &&
r.Message.Contains("queued", StringComparison.OrdinalIgnoreCase));
// The cached-call categories must NOT surface as notification events.
Assert.Empty(_siteLog.OfType("notification"));
}
[Fact]
public async Task ForwardFailure_Notification_EmitsNotificationSiteEvent()
{
// The site's notification role is forward-to-central. When the immediate
// forward to central throws (central unreachable), the notification is
// buffered for retry — a forward FAILURE, which the spec says to log as a
// `notification` site event (filling the in-transit blind spot).
_service.RegisterDeliveryHandler(StoreAndForwardCategory.Notification,
_ => throw new HttpRequestException("central unreachable"));
await _service.EnqueueAsync(StoreAndForwardCategory.Notification, "list-a", """{}""", "Pump1");
var rows = _siteLog.OfType("notification");
Assert.Contains(rows, r => r.Severity == "Warning" &&
r.Source == "StoreAndForwardService" &&
r.Message.Contains("queued", StringComparison.OrdinalIgnoreCase));
// A notification forward-failure is not a store_and_forward (cached-call) event.
Assert.Empty(_siteLog.OfType("store_and_forward"));
}
[Fact]
public async Task RoutineEnqueue_Notification_DoesNotEmitSiteEvent()
{
// Spec: routine enqueue / forward-success on the notification path are
// deliberately NOT logged — central's Notifications table is the audit
// record of record. A successful immediate forward emits no site event.
_service.RegisterDeliveryHandler(StoreAndForwardCategory.Notification,
_ => Task.FromResult(true));
await _service.EnqueueAsync(StoreAndForwardCategory.Notification, "list-a", """{}""", "Pump1");
Assert.Empty(_siteLog.OfType("notification"));
}
[Fact]
public async Task Park_Notification_EmitsErrorNotificationSiteEvent()
{
// A long-buffered notification that exhausts retries is parked — the spec
// logs this as a `notification` event (Error severity).
_service.RegisterDeliveryHandler(StoreAndForwardCategory.Notification,
_ => throw new HttpRequestException("central unreachable"));
await _service.EnqueueAsync(
StoreAndForwardCategory.Notification, "list-a", """{}""", "Pump1",
attemptImmediateDelivery: false, maxRetries: 1);
await _service.RetryPendingMessagesAsync();
var rows = _siteLog.OfType("notification");
Assert.Contains(rows, r => r.Severity == "Error" &&
r.Message.Contains("parked", StringComparison.OrdinalIgnoreCase));
}
[Fact]
public async Task Park_ExternalSystem_EmitsErrorStoreAndForwardSiteEvent()
{
// MaxRetries = 1 → the first sweep retry parks the message.
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => throw new HttpRequestException("transient"));
await _service.EnqueueAsync(
StoreAndForwardCategory.ExternalSystem, "api.example.com", """{}""", "Pump1",
attemptImmediateDelivery: false, maxRetries: 1);
await _service.RetryPendingMessagesAsync();
var rows = _siteLog.OfType("store_and_forward");
Assert.Contains(rows, r => r.Severity == "Error" &&
r.Message.Contains("parked", StringComparison.OrdinalIgnoreCase));
}
[Fact]
public async Task DeliveredImmediately_DoesNotEmitSiteEvent()
{
// A successful immediate delivery is the normal hot path — it is not a
// store-and-forward buffering event, so no operational event is logged.
_service.RegisterDeliveryHandler(StoreAndForwardCategory.ExternalSystem,
_ => Task.FromResult(true));
await _service.EnqueueAsync(StoreAndForwardCategory.ExternalSystem, "api", """{}""", "Pump1");
Assert.Empty(_siteLog.OfType("store_and_forward"));
Assert.Empty(_siteLog.OfType("notification"));
}
}