refactor: rename ScadaLink → ZB.MOM.WW.ScadaBridge (code + projects + namespaces)
Solution + 23 src projects + 26 test projects renamed; folders, csproj, namespaces, and ScadaLinkDbContext/ScadaBridgeDbContext class updated. ActorSystem "scadalink" → "scadabridge", Akka seed-node URLs migrated. SQL roles/logins, LDAP domains, CLI command name, and CLI config dir (~/.scadalink → ~/.scadabridge) also renamed. Build green; 5 Host.Tests fail awaiting SQL login rename in next commit. Pre-existing StaleTagMonitor timing flakes unchanged. Rename script committed at tools/rename-to-scadabridge.sh.
This commit is contained in:
@@ -0,0 +1,717 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.RemoteQuery;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Communication;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.SiteCallAudit;
|
||||
|
||||
/// <summary>
|
||||
/// Central singleton for Site Call Audit (#22). Receives
|
||||
/// <see cref="UpsertSiteCallCommand"/> messages and persists each
|
||||
/// <see cref="ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit.SiteCall"/> row via
|
||||
/// <see cref="ISiteCallAuditRepository.UpsertAsync"/> — idempotent monotonic
|
||||
/// upsert. Out-of-order or duplicate updates are silent no-ops at the
|
||||
/// repository layer; the actor always replies <see cref="UpsertSiteCallReply"/>
|
||||
/// with <c>Accepted=true</c> in that case because storage state is consistent
|
||||
/// and the site is free to consider its packet acked.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Implemented: direct <see cref="UpsertSiteCallCommand"/> telemetry ingest,
|
||||
/// query, detail and KPI handlers (Task 4), and the central→site Retry/Discard
|
||||
/// relay (Task 5 — the relay handlers live in this actor). Deferred (per
|
||||
/// CLAUDE.md scope discipline — both land in a later follow-up): the periodic
|
||||
/// per-site reconciliation puller that backfills lost telemetry, and the daily
|
||||
/// terminal-row purge scheduler (the repository exposes
|
||||
/// <c>PurgeTerminalAsync</c> but nothing in this module currently invokes it
|
||||
/// on a schedule).
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Per CLAUDE.md "audit-write failure NEVER aborts the user-facing action" —
|
||||
/// the actor catches every exception from the repository call and replies
|
||||
/// <c>Accepted=false</c> without rethrowing, so the central singleton stays
|
||||
/// alive. The <see cref="SupervisorStrategy"/> override governs the actor's
|
||||
/// <em>children</em>, not the actor itself; this actor has no children today,
|
||||
/// so the override is currently inert. It returns a one-for-one strategy with
|
||||
/// <see cref="Akka.Actor.SupervisorStrategy.DefaultDecider"/> (Restart on most
|
||||
/// exceptions, Stop on <see cref="ActorInitializationException"/> /
|
||||
/// <see cref="ActorKilledException"/>) and <c>maxNrOfRetries: 0</c>, so any
|
||||
/// future child that throws is Stopped on the first failure — a deliberate
|
||||
/// "fail loudly" posture for the central singleton's eventual sub-actors
|
||||
/// (reconciliation puller, purge scheduler). Self-supervision of this actor
|
||||
/// is whatever the parent <see cref="Akka.Cluster.Tools.Singleton.ClusterSingletonManager"/>
|
||||
/// supplies; the in-handler <c>try/catch</c> in <see cref="OnUpsertAsync"/>
|
||||
/// is what actually keeps the singleton alive across repository faults.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Two constructors exist for the same reason as
|
||||
/// <c>AuditLogIngestActor</c>: production wiring (Bundle F) resolves the
|
||||
/// scoped EF repository from a fresh DI scope per message because the actor
|
||||
/// is a long-lived cluster singleton, while tests inject a concrete
|
||||
/// <see cref="ISiteCallAuditRepository"/> against a per-test MSSQL fixture
|
||||
/// so the actor exercises the real monotonic upsert SQL end to end.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public class SiteCallAuditActor : ReceiveActor
|
||||
{
|
||||
/// <summary>Maximum page size honoured by a <see cref="SiteCallQueryRequest"/>.</summary>
|
||||
private const int MaxPageSize = 200;
|
||||
|
||||
private readonly IServiceProvider? _serviceProvider;
|
||||
private readonly ISiteCallAuditRepository? _injectedRepository;
|
||||
private readonly SiteCallAuditOptions _options;
|
||||
private readonly ILogger<SiteCallAuditActor> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): the central→site command transport — the
|
||||
/// <c>CentralCommunicationActor</c>, which owns the per-site
|
||||
/// <c>ClusterClient</c> map and routes a <see cref="SiteEnvelope"/> to the
|
||||
/// owning site. Set via <see cref="RegisterCentralCommunication"/> by the
|
||||
/// Host after both actors exist (this actor is a cluster singleton; the
|
||||
/// transport actor is created separately). Null until registration
|
||||
/// completes — a relay arriving before then is answered with a
|
||||
/// <see cref="SiteCallRelayOutcome.SiteUnreachable"/> outcome, because there
|
||||
/// is genuinely no route to any site yet.
|
||||
/// </summary>
|
||||
private IActorRef? _centralCommunication;
|
||||
|
||||
/// <summary>
|
||||
/// Test-mode constructor — injects a concrete repository instance whose
|
||||
/// lifetime exceeds the test, so the actor reuses the same instance
|
||||
/// across every message. Used by Bundle C's MSSQL-backed TestKit fixture.
|
||||
/// An optional <paramref name="options"/> lets a test pin the stuck/KPI
|
||||
/// windows; when omitted the production defaults apply.
|
||||
/// </summary>
|
||||
/// <param name="repository">Concrete repository instance to use for all messages.</param>
|
||||
/// <param name="logger">Logger for diagnostics and error reporting.</param>
|
||||
/// <param name="options">Optional configuration overrides; production defaults apply when null.</param>
|
||||
public SiteCallAuditActor(
|
||||
ISiteCallAuditRepository repository,
|
||||
ILogger<SiteCallAuditActor> logger,
|
||||
SiteCallAuditOptions? options = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(repository);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
|
||||
_injectedRepository = repository;
|
||||
_logger = logger;
|
||||
_options = options ?? new SiteCallAuditOptions();
|
||||
|
||||
RegisterHandlers();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Production constructor — resolves <see cref="ISiteCallAuditRepository"/>
|
||||
/// from a fresh DI scope per message because the repository is a scoped EF
|
||||
/// Core service registered by <c>AddConfigurationDatabase</c>. The actor
|
||||
/// itself is a long-lived cluster singleton, so it cannot hold a scope
|
||||
/// across messages.
|
||||
/// </summary>
|
||||
/// <param name="serviceProvider">DI service provider used to create a scoped repository per message.</param>
|
||||
/// <param name="options">Actor configuration (stuck threshold, KPI interval, relay timeout).</param>
|
||||
/// <param name="logger">Logger for diagnostics and error reporting.</param>
|
||||
public SiteCallAuditActor(
|
||||
IServiceProvider serviceProvider,
|
||||
SiteCallAuditOptions options,
|
||||
ILogger<SiteCallAuditActor> logger)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(serviceProvider);
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
|
||||
_serviceProvider = serviceProvider;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
|
||||
RegisterHandlers();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wires up the message handlers shared by both constructors: the M3
|
||||
/// ingest path plus the Task 4 read-side (query, detail, global + per-site
|
||||
/// KPI). All read handlers reply to an Ask, so they capture <c>Sender</c>
|
||||
/// before the first await and <c>PipeTo</c> the result back.
|
||||
/// </summary>
|
||||
private void RegisterHandlers()
|
||||
{
|
||||
ReceiveAsync<UpsertSiteCallCommand>(OnUpsertAsync);
|
||||
Receive<SiteCallQueryRequest>(HandleQuery);
|
||||
Receive<SiteCallDetailRequest>(HandleDetail);
|
||||
Receive<SiteCallKpiRequest>(HandleKpi);
|
||||
Receive<PerSiteSiteCallKpiRequest>(HandlePerSiteKpi);
|
||||
|
||||
// Task 5 (#22): central→site Retry/Discard relay for parked cached calls.
|
||||
Receive<RegisterCentralCommunication>(msg =>
|
||||
{
|
||||
_centralCommunication = msg.CentralCommunication;
|
||||
_logger.LogInformation("SiteCallAudit registered central→site communication transport");
|
||||
});
|
||||
Receive<RetrySiteCallRequest>(HandleRetrySiteCall);
|
||||
Receive<DiscardSiteCallRequest>(HandleDiscardSiteCall);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// SiteCallAudit-001: child supervision strategy — governs children, not this
|
||||
/// actor. The actor has no children today, so this override is inert; it
|
||||
/// returns a one-for-one strategy with the framework
|
||||
/// <see cref="Akka.Actor.SupervisorStrategy.DefaultDecider"/> (Restart on
|
||||
/// most exceptions; Stop on <see cref="ActorInitializationException"/> /
|
||||
/// <see cref="ActorKilledException"/>) and <c>maxNrOfRetries: 0</c>, so any
|
||||
/// future child that throws is Stopped on the first failure. The actor's
|
||||
/// own resilience comes from the <c>try/catch</c> in <see cref="OnUpsertAsync"/>
|
||||
/// plus the parent <see cref="Akka.Cluster.Tools.Singleton.ClusterSingletonManager"/>'s
|
||||
/// supervision — not from this override.
|
||||
/// </summary>
|
||||
protected override SupervisorStrategy SupervisorStrategy()
|
||||
{
|
||||
return new OneForOneStrategy(maxNrOfRetries: 0, withinTimeRange: TimeSpan.Zero, decider:
|
||||
Akka.Actor.SupervisorStrategy.DefaultDecider);
|
||||
}
|
||||
|
||||
private async Task OnUpsertAsync(UpsertSiteCallCommand cmd)
|
||||
{
|
||||
// Sender is captured before the first await — Akka resets Sender
|
||||
// between message dispatches, so a post-await Tell would go to
|
||||
// DeadLetters.
|
||||
var replyTo = Sender;
|
||||
var id = cmd.SiteCall.TrackedOperationId;
|
||||
|
||||
// Scope-per-message mirrors AuditLogIngestActor — production EF
|
||||
// repository is scoped; the injected-repository mode (tests) skips
|
||||
// the scope entirely.
|
||||
IServiceScope? scope = null;
|
||||
ISiteCallAuditRepository repository;
|
||||
if (_injectedRepository is not null)
|
||||
{
|
||||
repository = _injectedRepository;
|
||||
}
|
||||
else
|
||||
{
|
||||
scope = _serviceProvider!.CreateScope();
|
||||
repository = scope.ServiceProvider.GetRequiredService<ISiteCallAuditRepository>();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// SiteCallAudit-003: stamp IngestedAtUtc at central-side persist
|
||||
// time on every upsert, mirroring AuditLogIngestActor's combined-
|
||||
// telemetry hot path. IngestedAtUtc is the "central ingested (or
|
||||
// last refreshed) this row" timestamp; callers (telemetry,
|
||||
// future reconciliation puller, direct-writes) cannot in general
|
||||
// know they are running on central, so the actor owns the stamp.
|
||||
var siteCall = cmd.SiteCall with { IngestedAtUtc = DateTime.UtcNow };
|
||||
await repository.UpsertAsync(siteCall).ConfigureAwait(false);
|
||||
replyTo.Tell(new UpsertSiteCallReply(id, Accepted: true));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Per CLAUDE.md: audit-write failure NEVER aborts the user-facing
|
||||
// action — log and reply Accepted=false; do NOT rethrow (the
|
||||
// central singleton MUST stay alive).
|
||||
_logger.LogError(ex, "SiteCallAudit upsert failed for {TrackedOperationId}", id);
|
||||
replyTo.Tell(new UpsertSiteCallReply(id, Accepted: false));
|
||||
}
|
||||
finally
|
||||
{
|
||||
scope?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
// ── Task 4: read-side (query / detail / KPI) ──
|
||||
|
||||
/// <summary>
|
||||
/// Handles a paginated, filtered query over the <c>SiteCalls</c> table.
|
||||
/// Builds a <see cref="SiteCallQueryFilter"/> + <see cref="SiteCallPaging"/>
|
||||
/// keyset cursor from the request, runs the query on a scoped repository,
|
||||
/// and pipes the mapped response back to the captured sender. A repository
|
||||
/// fault yields a failure response with an empty list.
|
||||
/// </summary>
|
||||
private void HandleQuery(SiteCallQueryRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTime.UtcNow;
|
||||
|
||||
QueryAsync(request, now).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new SiteCallQueryResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
SiteCalls: Array.Empty<SiteCallSummary>(),
|
||||
NextAfterCreatedAtUtc: null,
|
||||
NextAfterId: null));
|
||||
}
|
||||
|
||||
private async Task<SiteCallQueryResponse> QueryAsync(SiteCallQueryRequest request, DateTime now)
|
||||
{
|
||||
var stuckCutoff = now - _options.StuckAgeThreshold;
|
||||
|
||||
var filter = new SiteCallQueryFilter(
|
||||
Channel: NullIfBlank(request.ChannelFilter),
|
||||
SourceSite: NullIfBlank(request.SourceSiteFilter),
|
||||
Status: NullIfBlank(request.StatusFilter),
|
||||
Target: NullIfBlank(request.TargetKeyword),
|
||||
FromUtc: request.FromUtc,
|
||||
ToUtc: request.ToUtc,
|
||||
// StuckOnly is pushed into the repository SQL via StuckCutoffUtc —
|
||||
// TerminalAtUtc IS NULL AND CreatedAtUtc < cutoff composes with the
|
||||
// keyset cursor, so the page is always honest (full pages, no empty
|
||||
// pages with a non-null next cursor).
|
||||
StuckCutoffUtc: request.StuckOnly ? stuckCutoff : null,
|
||||
SourceNode: NullIfBlank(request.SourceNodeFilter));
|
||||
|
||||
var pageSize = Math.Clamp(request.PageSize, 1, MaxPageSize);
|
||||
var paging = new SiteCallPaging(
|
||||
PageSize: pageSize,
|
||||
AfterCreatedAtUtc: request.AfterCreatedAtUtc,
|
||||
AfterId: request.AfterId is { } id ? new TrackedOperationId(id) : null);
|
||||
|
||||
var (scope, repository) = ResolveRepository();
|
||||
try
|
||||
{
|
||||
var rows = await repository.QueryAsync(filter, paging).ConfigureAwait(false);
|
||||
|
||||
var summaries = rows
|
||||
.Select(row => ToSummary(row, stuckCutoff))
|
||||
.ToList();
|
||||
|
||||
// The next-page cursor is the last row of the materialised page.
|
||||
var cursorRow = rows.Count > 0 ? rows[^1] : null;
|
||||
|
||||
return new SiteCallQueryResponse(
|
||||
request.CorrelationId,
|
||||
Success: true,
|
||||
ErrorMessage: null,
|
||||
SiteCalls: summaries,
|
||||
NextAfterCreatedAtUtc: cursorRow?.CreatedAtUtc,
|
||||
NextAfterId: cursorRow?.TrackedOperationId.Value);
|
||||
}
|
||||
finally
|
||||
{
|
||||
scope?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a full-detail query for a single cached call — backs the report
|
||||
/// detail modal. A missing row yields <c>Success=false</c> with a "not
|
||||
/// found" message; a repository fault yields <c>Success=false</c> with the
|
||||
/// fault message.
|
||||
/// </summary>
|
||||
private void HandleDetail(SiteCallDetailRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
DetailAsync(request).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new SiteCallDetailResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
Detail: null));
|
||||
}
|
||||
|
||||
private async Task<SiteCallDetailResponse> DetailAsync(SiteCallDetailRequest request)
|
||||
{
|
||||
var (scope, repository) = ResolveRepository();
|
||||
try
|
||||
{
|
||||
var row = await repository
|
||||
.GetAsync(new TrackedOperationId(request.TrackedOperationId))
|
||||
.ConfigureAwait(false);
|
||||
|
||||
if (row is null)
|
||||
{
|
||||
return new SiteCallDetailResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: "site call not found",
|
||||
Detail: null);
|
||||
}
|
||||
|
||||
return new SiteCallDetailResponse(
|
||||
request.CorrelationId,
|
||||
Success: true,
|
||||
ErrorMessage: null,
|
||||
Detail: ToDetail(row));
|
||||
}
|
||||
finally
|
||||
{
|
||||
scope?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a global KPI snapshot request, deriving the stuck cutoff from
|
||||
/// <see cref="SiteCallAuditOptions.StuckAgeThreshold"/> and the
|
||||
/// failed/delivered interval bound from <see cref="SiteCallAuditOptions.KpiInterval"/>.
|
||||
/// </summary>
|
||||
private void HandleKpi(SiteCallKpiRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTime.UtcNow;
|
||||
var stuckCutoff = now - _options.StuckAgeThreshold;
|
||||
var intervalSince = now - _options.KpiInterval;
|
||||
|
||||
KpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new SiteCallKpiResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
BufferedCount: 0,
|
||||
ParkedCount: 0,
|
||||
FailedLastInterval: 0,
|
||||
DeliveredLastInterval: 0,
|
||||
OldestPendingAge: null,
|
||||
StuckCount: 0));
|
||||
}
|
||||
|
||||
private async Task<SiteCallKpiResponse> KpiAsync(
|
||||
string correlationId, DateTime stuckCutoff, DateTime intervalSince)
|
||||
{
|
||||
var (scope, repository) = ResolveRepository();
|
||||
try
|
||||
{
|
||||
var snapshot = await repository
|
||||
.ComputeKpisAsync(stuckCutoff, intervalSince)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return new SiteCallKpiResponse(
|
||||
correlationId,
|
||||
Success: true,
|
||||
ErrorMessage: null,
|
||||
snapshot.BufferedCount,
|
||||
snapshot.ParkedCount,
|
||||
snapshot.FailedLastInterval,
|
||||
snapshot.DeliveredLastInterval,
|
||||
snapshot.OldestPendingAge,
|
||||
snapshot.StuckCount);
|
||||
}
|
||||
finally
|
||||
{
|
||||
scope?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a per-source-site KPI request, using the same stuck cutoff and
|
||||
/// interval bound as <see cref="HandleKpi"/>.
|
||||
/// </summary>
|
||||
private void HandlePerSiteKpi(PerSiteSiteCallKpiRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTime.UtcNow;
|
||||
var stuckCutoff = now - _options.StuckAgeThreshold;
|
||||
var intervalSince = now - _options.KpiInterval;
|
||||
|
||||
PerSiteKpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new PerSiteSiteCallKpiResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
Sites: Array.Empty<SiteCallSiteKpiSnapshot>()));
|
||||
}
|
||||
|
||||
private async Task<PerSiteSiteCallKpiResponse> PerSiteKpiAsync(
|
||||
string correlationId, DateTime stuckCutoff, DateTime intervalSince)
|
||||
{
|
||||
var (scope, repository) = ResolveRepository();
|
||||
try
|
||||
{
|
||||
var sites = await repository
|
||||
.ComputePerSiteKpisAsync(stuckCutoff, intervalSince)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return new PerSiteSiteCallKpiResponse(
|
||||
correlationId, Success: true, ErrorMessage: null, sites);
|
||||
}
|
||||
finally
|
||||
{
|
||||
scope?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
// ── Task 5: central→site Retry/Discard relay ──
|
||||
|
||||
/// <summary>
|
||||
/// Relays an operator Retry of a parked cached call to its owning site. The
|
||||
/// site is the source of truth — this handler NEVER writes the central
|
||||
/// <c>SiteCalls</c> mirror row. It wraps a <see cref="RetryParkedOperation"/>
|
||||
/// in a <see cref="SiteEnvelope"/> addressed to <c>SourceSite</c>, Asks the
|
||||
/// <c>CentralCommunicationActor</c> (which routes it over the per-site
|
||||
/// <c>ClusterClient</c>), and maps the site's
|
||||
/// <see cref="ParkedOperationActionAck"/> — or an Ask timeout — onto a
|
||||
/// <see cref="RetrySiteCallResponse"/>. A timeout / no-route is reported as
|
||||
/// the distinct <see cref="SiteCallRelayOutcome.SiteUnreachable"/> outcome,
|
||||
/// not a generic failure, so the Central UI can tell "site offline" from
|
||||
/// "operation failed".
|
||||
/// </summary>
|
||||
private void HandleRetrySiteCall(RetrySiteCallRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
if (_centralCommunication is null)
|
||||
{
|
||||
// No transport registered yet — there is genuinely no route to any
|
||||
// site, so the only honest answer is unreachable.
|
||||
_logger.LogWarning(
|
||||
"RetrySiteCall {TrackedOperationId} for site {SourceSite} arrived before the "
|
||||
+ "central→site transport was registered; reporting site unreachable",
|
||||
request.TrackedOperationId, request.SourceSite);
|
||||
sender.Tell(UnreachableRetry(request.CorrelationId));
|
||||
return;
|
||||
}
|
||||
|
||||
var relay = new RetryParkedOperation(
|
||||
request.CorrelationId, new TrackedOperationId(request.TrackedOperationId));
|
||||
var envelope = new SiteEnvelope(request.SourceSite, relay);
|
||||
|
||||
_centralCommunication.Ask<ParkedOperationActionAck>(envelope, _options.RelayTimeout)
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: ack => MapRetryResponse(request.CorrelationId, ack),
|
||||
failure: ex => MapRetryFailure(request.CorrelationId, request.SourceSite, ex));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Relays an operator Discard of a parked cached call to its owning site.
|
||||
/// Mirrors <see cref="HandleRetrySiteCall"/> — see that method for the
|
||||
/// source-of-truth and site-unreachable rationale.
|
||||
/// </summary>
|
||||
private void HandleDiscardSiteCall(DiscardSiteCallRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
if (_centralCommunication is null)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"DiscardSiteCall {TrackedOperationId} for site {SourceSite} arrived before the "
|
||||
+ "central→site transport was registered; reporting site unreachable",
|
||||
request.TrackedOperationId, request.SourceSite);
|
||||
sender.Tell(UnreachableDiscard(request.CorrelationId));
|
||||
return;
|
||||
}
|
||||
|
||||
var relay = new DiscardParkedOperation(
|
||||
request.CorrelationId, new TrackedOperationId(request.TrackedOperationId));
|
||||
var envelope = new SiteEnvelope(request.SourceSite, relay);
|
||||
|
||||
_centralCommunication.Ask<ParkedOperationActionAck>(envelope, _options.RelayTimeout)
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: ack => MapDiscardResponse(request.CorrelationId, ack),
|
||||
failure: ex => MapDiscardFailure(request.CorrelationId, request.SourceSite, ex));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maps the site's <see cref="ParkedOperationActionAck"/> for a Retry onto a
|
||||
/// <see cref="RetrySiteCallResponse"/>: an applied action is
|
||||
/// <see cref="SiteCallRelayOutcome.Applied"/>; a clean no-op
|
||||
/// (<c>Applied=false</c>, no error) is <see cref="SiteCallRelayOutcome.NotParked"/>;
|
||||
/// an ack carrying an error is <see cref="SiteCallRelayOutcome.OperationFailed"/>
|
||||
/// — in every case the site WAS reached.
|
||||
/// </summary>
|
||||
private static RetrySiteCallResponse MapRetryResponse(string correlationId, ParkedOperationActionAck ack)
|
||||
{
|
||||
var outcome = ClassifyAck(ack);
|
||||
return new RetrySiteCallResponse(
|
||||
correlationId,
|
||||
outcome,
|
||||
Success: outcome == SiteCallRelayOutcome.Applied,
|
||||
SiteReachable: true,
|
||||
ErrorMessage: AckErrorMessage(outcome, ack));
|
||||
}
|
||||
|
||||
private static DiscardSiteCallResponse MapDiscardResponse(string correlationId, ParkedOperationActionAck ack)
|
||||
{
|
||||
var outcome = ClassifyAck(ack);
|
||||
return new DiscardSiteCallResponse(
|
||||
correlationId,
|
||||
outcome,
|
||||
Success: outcome == SiteCallRelayOutcome.Applied,
|
||||
SiteReachable: true,
|
||||
ErrorMessage: AckErrorMessage(outcome, ack));
|
||||
}
|
||||
|
||||
private RetrySiteCallResponse MapRetryFailure(string correlationId, string sourceSite, Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Retry relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite);
|
||||
return UnreachableRetry(correlationId);
|
||||
}
|
||||
|
||||
private DiscardSiteCallResponse MapDiscardFailure(string correlationId, string sourceSite, Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Discard relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite);
|
||||
return UnreachableDiscard(correlationId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Classifies a site ack: <c>Applied=true</c> → applied; <c>Applied=false</c>
|
||||
/// with no error → the site definitively had nothing parked; <c>Applied=false</c>
|
||||
/// with an error → the site could not apply the action.
|
||||
/// </summary>
|
||||
private static SiteCallRelayOutcome ClassifyAck(ParkedOperationActionAck ack)
|
||||
{
|
||||
if (ack.Applied)
|
||||
{
|
||||
return SiteCallRelayOutcome.Applied;
|
||||
}
|
||||
|
||||
return ack.ErrorMessage is null
|
||||
? SiteCallRelayOutcome.NotParked
|
||||
: SiteCallRelayOutcome.OperationFailed;
|
||||
}
|
||||
|
||||
private static string? AckErrorMessage(SiteCallRelayOutcome outcome, ParkedOperationActionAck ack)
|
||||
{
|
||||
return outcome switch
|
||||
{
|
||||
SiteCallRelayOutcome.Applied => null,
|
||||
SiteCallRelayOutcome.NotParked =>
|
||||
"The operation is no longer parked at the site (already delivered, discarded, or retrying).",
|
||||
SiteCallRelayOutcome.OperationFailed => ack.ErrorMessage,
|
||||
// SiteUnreachable is never produced from a ParkedOperationActionAck —
|
||||
// unreachable responses are built by UnreachableRetry/UnreachableDiscard
|
||||
// before any ack is classified, so this arm is unreachable by construction.
|
||||
// We deliberately return ack.ErrorMessage (rather than throwing) to keep
|
||||
// AckErrorMessage total and side-effect-free: site-unreachable is classified
|
||||
// as transient by the upstream relay path (which has already constructed the
|
||||
// SiteUnreachable response and detail text via SiteUnreachableMessage), so a
|
||||
// defensive fall-through here just surfaces whatever error text the ack
|
||||
// carries and lets the caller schedule a retry. Throwing would turn a benign
|
||||
// refactor invariant violation into a relay-path crash.
|
||||
SiteCallRelayOutcome.SiteUnreachable => ack.ErrorMessage,
|
||||
_ => throw new ArgumentOutOfRangeException(
|
||||
nameof(outcome), outcome, "unknown SiteCallRelayOutcome"),
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Shared "site unreachable" detail text for both relay directions.</summary>
|
||||
private const string SiteUnreachableMessage =
|
||||
"The owning site is unreachable; the action was not applied. Retry when the site is back online.";
|
||||
|
||||
private static RetrySiteCallResponse UnreachableRetry(string correlationId)
|
||||
{
|
||||
return new RetrySiteCallResponse(
|
||||
correlationId,
|
||||
SiteCallRelayOutcome.SiteUnreachable,
|
||||
Success: false,
|
||||
SiteReachable: false,
|
||||
ErrorMessage: SiteUnreachableMessage);
|
||||
}
|
||||
|
||||
private static DiscardSiteCallResponse UnreachableDiscard(string correlationId)
|
||||
{
|
||||
return new DiscardSiteCallResponse(
|
||||
correlationId,
|
||||
SiteCallRelayOutcome.SiteUnreachable,
|
||||
Success: false,
|
||||
SiteReachable: false,
|
||||
ErrorMessage: SiteUnreachableMessage);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves an <see cref="ISiteCallAuditRepository"/> for one read message.
|
||||
/// In test mode the injected instance is returned with a null scope; in
|
||||
/// production a fresh DI scope is created and returned so the caller can
|
||||
/// dispose it once the read completes — the same scope-per-message pattern
|
||||
/// as <see cref="OnUpsertAsync"/>.
|
||||
/// </summary>
|
||||
private (IServiceScope? Scope, ISiteCallAuditRepository Repository) ResolveRepository()
|
||||
{
|
||||
if (_injectedRepository is not null)
|
||||
{
|
||||
return (null, _injectedRepository);
|
||||
}
|
||||
|
||||
var scope = _serviceProvider!.CreateScope();
|
||||
return (scope, scope.ServiceProvider.GetRequiredService<ISiteCallAuditRepository>());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// A cached call counts as stuck when it is still non-terminal and was
|
||||
/// created before <paramref name="stuckCutoff"/>. Non-terminal is keyed off
|
||||
/// <see cref="SiteCall.TerminalAtUtc"/> being <c>null</c> — the
|
||||
/// <c>SiteCalls</c> operational mirror stores <c>AuditStatus</c>-derived
|
||||
/// status strings (<c>Attempted</c>/<c>Delivered</c>/<c>Parked</c>/...), not
|
||||
/// the tracking-lifecycle <c>Pending</c>/<c>Retrying</c> names the spec's
|
||||
/// KPI section uses, so there is no status string that means "buffered".
|
||||
/// <c>TerminalAtUtc</c> is the entity's own active/terminal discriminator
|
||||
/// and is consistent with the repository KPI counts and
|
||||
/// <c>PurgeTerminalAsync</c>.
|
||||
/// </summary>
|
||||
private static bool IsStuck(SiteCall row, DateTime stuckCutoff)
|
||||
{
|
||||
return row.TerminalAtUtc is null && row.CreatedAtUtc < stuckCutoff;
|
||||
}
|
||||
|
||||
private static SiteCallSummary ToSummary(SiteCall row, DateTime stuckCutoff)
|
||||
{
|
||||
return new SiteCallSummary(
|
||||
TrackedOperationId: row.TrackedOperationId.Value,
|
||||
SourceSite: row.SourceSite,
|
||||
Channel: row.Channel,
|
||||
Target: row.Target,
|
||||
Status: row.Status,
|
||||
RetryCount: row.RetryCount,
|
||||
LastError: row.LastError,
|
||||
HttpStatus: row.HttpStatus,
|
||||
CreatedAtUtc: row.CreatedAtUtc,
|
||||
UpdatedAtUtc: row.UpdatedAtUtc,
|
||||
TerminalAtUtc: row.TerminalAtUtc,
|
||||
IsStuck: IsStuck(row, stuckCutoff),
|
||||
SourceNode: row.SourceNode);
|
||||
}
|
||||
|
||||
private static SiteCallDetail ToDetail(SiteCall row)
|
||||
{
|
||||
return new SiteCallDetail(
|
||||
TrackedOperationId: row.TrackedOperationId.Value,
|
||||
SourceSite: row.SourceSite,
|
||||
Channel: row.Channel,
|
||||
Target: row.Target,
|
||||
Status: row.Status,
|
||||
RetryCount: row.RetryCount,
|
||||
LastError: row.LastError,
|
||||
HttpStatus: row.HttpStatus,
|
||||
CreatedAtUtc: row.CreatedAtUtc,
|
||||
UpdatedAtUtc: row.UpdatedAtUtc,
|
||||
TerminalAtUtc: row.TerminalAtUtc,
|
||||
IngestedAtUtc: row.IngestedAtUtc,
|
||||
SourceNode: row.SourceNode);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Treats an empty/whitespace filter string as "no constraint" — the
|
||||
/// repository's <see cref="SiteCallQueryFilter"/> interprets <c>null</c> as
|
||||
/// a no-op predicate, so a blank UI filter must collapse to <c>null</c>.
|
||||
/// </summary>
|
||||
private static string? NullIfBlank(string? value)
|
||||
{
|
||||
return string.IsNullOrWhiteSpace(value) ? null : value;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the central→site command transport (the <c>CentralCommunicationActor</c>)
|
||||
/// with the <see cref="SiteCallAuditActor"/> so it can relay Retry/Discard
|
||||
/// actions on parked cached calls to their owning sites. Sent by the Host after
|
||||
/// both actors exist. Lives here (not in Commons) because it carries an
|
||||
/// <see cref="IActorRef"/> and <c>ZB.MOM.WW.ScadaBridge.Commons</c> has no Akka reference —
|
||||
/// the same rationale as <c>RegisterAuditIngest</c>.
|
||||
/// </summary>
|
||||
public sealed record RegisterCentralCommunication(IActorRef CentralCommunication);
|
||||
Reference in New Issue
Block a user