feat(sitecallaudit): central→site Retry/Discard relay for parked operations
This commit is contained in:
@@ -24,6 +24,11 @@
|
||||
project reference is documented here so the actor's scope-per-message
|
||||
GetRequiredService<ISiteCallAuditRepository>() compiles. -->
|
||||
<ProjectReference Include="../ScadaLink.ConfigurationDatabase/ScadaLink.ConfigurationDatabase.csproj" />
|
||||
<!-- Task 5 (#22): the central→site Retry/Discard relay routes RetryParkedOperation /
|
||||
DiscardParkedOperation to the owning site via SiteEnvelope + CentralCommunicationActor,
|
||||
the same transport every other central→site command uses. SiteEnvelope is defined
|
||||
in ScadaLink.Communication (no cycle: Communication does not reference SiteCallAudit). -->
|
||||
<ProjectReference Include="../ScadaLink.Communication/ScadaLink.Communication.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -4,8 +4,10 @@ using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Audit;
|
||||
using ScadaLink.Commons.Messages.RemoteQuery;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Audit;
|
||||
using ScadaLink.Communication;
|
||||
|
||||
namespace ScadaLink.SiteCallAudit;
|
||||
|
||||
@@ -52,6 +54,19 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
private readonly SiteCallAuditOptions _options;
|
||||
private readonly ILogger<SiteCallAuditActor> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): the central→site command transport — the
|
||||
/// <c>CentralCommunicationActor</c>, which owns the per-site
|
||||
/// <c>ClusterClient</c> map and routes a <see cref="SiteEnvelope"/> to the
|
||||
/// owning site. Set via <see cref="RegisterCentralCommunication"/> by the
|
||||
/// Host after both actors exist (this actor is a cluster singleton; the
|
||||
/// transport actor is created separately). Null until registration
|
||||
/// completes — a relay arriving before then is answered with a
|
||||
/// <see cref="SiteCallRelayOutcome.SiteUnreachable"/> outcome, because there
|
||||
/// is genuinely no route to any site yet.
|
||||
/// </summary>
|
||||
private IActorRef? _centralCommunication;
|
||||
|
||||
/// <summary>
|
||||
/// Test-mode constructor — injects a concrete repository instance whose
|
||||
/// lifetime exceeds the test, so the actor reuses the same instance
|
||||
@@ -110,6 +125,15 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
Receive<SiteCallDetailRequest>(HandleDetail);
|
||||
Receive<SiteCallKpiRequest>(HandleKpi);
|
||||
Receive<PerSiteSiteCallKpiRequest>(HandlePerSiteKpi);
|
||||
|
||||
// Task 5 (#22): central→site Retry/Discard relay for parked cached calls.
|
||||
Receive<RegisterCentralCommunication>(msg =>
|
||||
{
|
||||
_centralCommunication = msg.CentralCommunication;
|
||||
_logger.LogInformation("SiteCallAudit registered central→site communication transport");
|
||||
});
|
||||
Receive<RetrySiteCallRequest>(HandleRetrySiteCall);
|
||||
Receive<DiscardSiteCallRequest>(HandleDiscardSiteCall);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -385,6 +409,175 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
}
|
||||
}
|
||||
|
||||
// ── Task 5: central→site Retry/Discard relay ──
|
||||
|
||||
/// <summary>
|
||||
/// Relays an operator Retry of a parked cached call to its owning site. The
|
||||
/// site is the source of truth — this handler NEVER writes the central
|
||||
/// <c>SiteCalls</c> mirror row. It wraps a <see cref="RetryParkedOperation"/>
|
||||
/// in a <see cref="SiteEnvelope"/> addressed to <c>SourceSite</c>, Asks the
|
||||
/// <c>CentralCommunicationActor</c> (which routes it over the per-site
|
||||
/// <c>ClusterClient</c>), and maps the site's
|
||||
/// <see cref="ParkedOperationActionAck"/> — or an Ask timeout — onto a
|
||||
/// <see cref="RetrySiteCallResponse"/>. A timeout / no-route is reported as
|
||||
/// the distinct <see cref="SiteCallRelayOutcome.SiteUnreachable"/> outcome,
|
||||
/// not a generic failure, so the Central UI can tell "site offline" from
|
||||
/// "operation failed".
|
||||
/// </summary>
|
||||
private void HandleRetrySiteCall(RetrySiteCallRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
if (_centralCommunication is null)
|
||||
{
|
||||
// No transport registered yet — there is genuinely no route to any
|
||||
// site, so the only honest answer is unreachable.
|
||||
_logger.LogWarning(
|
||||
"RetrySiteCall {TrackedOperationId} for site {SourceSite} arrived before the "
|
||||
+ "central→site transport was registered; reporting site unreachable",
|
||||
request.TrackedOperationId, request.SourceSite);
|
||||
sender.Tell(UnreachableRetry(request.CorrelationId));
|
||||
return;
|
||||
}
|
||||
|
||||
var relay = new RetryParkedOperation(
|
||||
request.CorrelationId, new TrackedOperationId(request.TrackedOperationId));
|
||||
var envelope = new SiteEnvelope(request.SourceSite, relay);
|
||||
|
||||
_centralCommunication.Ask<ParkedOperationActionAck>(envelope, _options.RelayTimeout)
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: ack => MapRetryResponse(request.CorrelationId, ack),
|
||||
failure: ex => MapRetryFailure(request.CorrelationId, request.SourceSite, ex));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Relays an operator Discard of a parked cached call to its owning site.
|
||||
/// Mirrors <see cref="HandleRetrySiteCall"/> — see that method for the
|
||||
/// source-of-truth and site-unreachable rationale.
|
||||
/// </summary>
|
||||
private void HandleDiscardSiteCall(DiscardSiteCallRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
if (_centralCommunication is null)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"DiscardSiteCall {TrackedOperationId} for site {SourceSite} arrived before the "
|
||||
+ "central→site transport was registered; reporting site unreachable",
|
||||
request.TrackedOperationId, request.SourceSite);
|
||||
sender.Tell(UnreachableDiscard(request.CorrelationId));
|
||||
return;
|
||||
}
|
||||
|
||||
var relay = new DiscardParkedOperation(
|
||||
request.CorrelationId, new TrackedOperationId(request.TrackedOperationId));
|
||||
var envelope = new SiteEnvelope(request.SourceSite, relay);
|
||||
|
||||
_centralCommunication.Ask<ParkedOperationActionAck>(envelope, _options.RelayTimeout)
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: ack => MapDiscardResponse(request.CorrelationId, ack),
|
||||
failure: ex => MapDiscardFailure(request.CorrelationId, request.SourceSite, ex));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maps the site's <see cref="ParkedOperationActionAck"/> for a Retry onto a
|
||||
/// <see cref="RetrySiteCallResponse"/>: an applied action is
|
||||
/// <see cref="SiteCallRelayOutcome.Applied"/>; a clean no-op
|
||||
/// (<c>Applied=false</c>, no error) is <see cref="SiteCallRelayOutcome.NotParked"/>;
|
||||
/// an ack carrying an error is <see cref="SiteCallRelayOutcome.OperationFailed"/>
|
||||
/// — in every case the site WAS reached.
|
||||
/// </summary>
|
||||
private static RetrySiteCallResponse MapRetryResponse(string correlationId, ParkedOperationActionAck ack)
|
||||
{
|
||||
var outcome = ClassifyAck(ack);
|
||||
return new RetrySiteCallResponse(
|
||||
correlationId,
|
||||
outcome,
|
||||
Success: outcome == SiteCallRelayOutcome.Applied,
|
||||
SiteReachable: true,
|
||||
ErrorMessage: AckErrorMessage(outcome, ack));
|
||||
}
|
||||
|
||||
private static DiscardSiteCallResponse MapDiscardResponse(string correlationId, ParkedOperationActionAck ack)
|
||||
{
|
||||
var outcome = ClassifyAck(ack);
|
||||
return new DiscardSiteCallResponse(
|
||||
correlationId,
|
||||
outcome,
|
||||
Success: outcome == SiteCallRelayOutcome.Applied,
|
||||
SiteReachable: true,
|
||||
ErrorMessage: AckErrorMessage(outcome, ack));
|
||||
}
|
||||
|
||||
private RetrySiteCallResponse MapRetryFailure(string correlationId, string sourceSite, Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Retry relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite);
|
||||
return UnreachableRetry(correlationId);
|
||||
}
|
||||
|
||||
private DiscardSiteCallResponse MapDiscardFailure(string correlationId, string sourceSite, Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Discard relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite);
|
||||
return UnreachableDiscard(correlationId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Classifies a site ack: <c>Applied=true</c> → applied; <c>Applied=false</c>
|
||||
/// with no error → the site definitively had nothing parked; <c>Applied=false</c>
|
||||
/// with an error → the site could not apply the action.
|
||||
/// </summary>
|
||||
private static SiteCallRelayOutcome ClassifyAck(ParkedOperationActionAck ack)
|
||||
{
|
||||
if (ack.Applied)
|
||||
{
|
||||
return SiteCallRelayOutcome.Applied;
|
||||
}
|
||||
|
||||
return ack.ErrorMessage is null
|
||||
? SiteCallRelayOutcome.NotParked
|
||||
: SiteCallRelayOutcome.OperationFailed;
|
||||
}
|
||||
|
||||
private static string? AckErrorMessage(SiteCallRelayOutcome outcome, ParkedOperationActionAck ack)
|
||||
{
|
||||
return outcome switch
|
||||
{
|
||||
SiteCallRelayOutcome.Applied => null,
|
||||
SiteCallRelayOutcome.NotParked =>
|
||||
"The operation is no longer parked at the site (already delivered, discarded, or retrying).",
|
||||
SiteCallRelayOutcome.OperationFailed => ack.ErrorMessage,
|
||||
_ => ack.ErrorMessage,
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Shared "site unreachable" detail text for both relay directions.</summary>
|
||||
private const string SiteUnreachableMessage =
|
||||
"The owning site is unreachable; the action was not applied. Retry when the site is back online.";
|
||||
|
||||
private static RetrySiteCallResponse UnreachableRetry(string correlationId)
|
||||
{
|
||||
return new RetrySiteCallResponse(
|
||||
correlationId,
|
||||
SiteCallRelayOutcome.SiteUnreachable,
|
||||
Success: false,
|
||||
SiteReachable: false,
|
||||
ErrorMessage: SiteUnreachableMessage);
|
||||
}
|
||||
|
||||
private static DiscardSiteCallResponse UnreachableDiscard(string correlationId)
|
||||
{
|
||||
return new DiscardSiteCallResponse(
|
||||
correlationId,
|
||||
SiteCallRelayOutcome.SiteUnreachable,
|
||||
Success: false,
|
||||
SiteReachable: false,
|
||||
ErrorMessage: SiteUnreachableMessage);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves an <see cref="ISiteCallAuditRepository"/> for one read message.
|
||||
/// In test mode the injected instance is returned with a null scope; in
|
||||
@@ -464,3 +657,13 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
return string.IsNullOrWhiteSpace(value) ? null : value;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the central→site command transport (the <c>CentralCommunicationActor</c>)
|
||||
/// with the <see cref="SiteCallAuditActor"/> so it can relay Retry/Discard
|
||||
/// actions on parked cached calls to their owning sites. Sent by the Host after
|
||||
/// both actors exist. Lives here (not in Commons) because it carries an
|
||||
/// <see cref="IActorRef"/> and <c>ScadaLink.Commons</c> has no Akka reference —
|
||||
/// the same rationale as <c>RegisterAuditIngest</c>.
|
||||
/// </summary>
|
||||
public sealed record RegisterCentralCommunication(IActorRef CentralCommunication);
|
||||
|
||||
@@ -23,4 +23,15 @@ public class SiteCallAuditOptions
|
||||
/// <c>NotificationOutboxOptions.DeliveredKpiWindow</c>.
|
||||
/// </summary>
|
||||
public TimeSpan KpiInterval { get; set; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): Ask timeout for the central→site Retry/Discard relay. When
|
||||
/// the owning site does not ack a <c>RetryParkedOperation</c> /
|
||||
/// <c>DiscardParkedOperation</c> within this window — site offline, no
|
||||
/// ClusterClient route, or central buffering deliberately absent — the relay
|
||||
/// reports a <c>SiteUnreachable</c> outcome. Default 10 seconds: long enough
|
||||
/// to absorb a healthy cross-cluster round-trip, short enough that an
|
||||
/// operator clicking Retry on an offline site gets a fast, honest answer.
|
||||
/// </summary>
|
||||
public TimeSpan RelayTimeout { get; set; } = TimeSpan.FromSeconds(10);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user