feat(sitecallaudit): central→site Retry/Discard relay for parked operations
This commit is contained in:
113
src/ScadaLink.Commons/Messages/Audit/SiteCallRelayMessages.cs
Normal file
113
src/ScadaLink.Commons/Messages/Audit/SiteCallRelayMessages.cs
Normal file
@@ -0,0 +1,113 @@
|
||||
namespace ScadaLink.Commons.Messages.Audit;
|
||||
|
||||
/// <summary>
|
||||
/// Outcome of a Site Call Audit (#22) Retry/Discard relay — distinguishes the
|
||||
/// three cases the Central UI Site Calls page must surface differently.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The "site unreachable" case is deliberately separate from
|
||||
/// <see cref="OperationFailed"/>: central is an eventually-consistent mirror,
|
||||
/// not the source of truth, so a relay that never reaches the owning site is a
|
||||
/// transient transport condition the operator can retry — not a failed
|
||||
/// operation. The UI shows "site unreachable" rather than a generic error.
|
||||
/// </remarks>
|
||||
public enum SiteCallRelayOutcome
|
||||
{
|
||||
/// <summary>
|
||||
/// The owning site received the relay command and applied the action to its
|
||||
/// Store-and-Forward buffer (the parked cached call was reset to retry, or
|
||||
/// discarded). The corrected state reaches central later via telemetry.
|
||||
/// </summary>
|
||||
Applied,
|
||||
|
||||
/// <summary>
|
||||
/// The owning site received the relay command but found nothing to do — no
|
||||
/// parked row matched the tracked id (already delivered/discarded, or no
|
||||
/// longer <c>Parked</c>). A definitive answer from the site, not a failure.
|
||||
/// </summary>
|
||||
NotParked,
|
||||
|
||||
/// <summary>
|
||||
/// The owning site could not be reached (offline / no ClusterClient route /
|
||||
/// relay timed out). The action was NOT applied; the operator may retry once
|
||||
/// the site is back online.
|
||||
/// </summary>
|
||||
SiteUnreachable,
|
||||
|
||||
/// <summary>
|
||||
/// The owning site was reached but reported it could not apply the action
|
||||
/// (its parked-message handler was unavailable or its store faulted).
|
||||
/// </summary>
|
||||
OperationFailed,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Central UI → Site Call Audit: relay a Retry of a parked cached call to its
|
||||
/// owning site. The owning site performs the actual retry on its
|
||||
/// Store-and-Forward buffer — central never mutates the central <c>SiteCalls</c>
|
||||
/// mirror row. Mirrors
|
||||
/// <see cref="ScadaLink.Commons.Messages.Notification.RetryNotificationRequest"/>
|
||||
/// but carries <see cref="SourceSite"/> (the relay target) and answers with a
|
||||
/// distinct site-unreachable outcome.
|
||||
/// </summary>
|
||||
/// <param name="CorrelationId">Request correlation id, echoed on the response.</param>
|
||||
/// <param name="TrackedOperationId">
|
||||
/// The cached operation to retry — the PK of the central <c>SiteCalls</c> row
|
||||
/// and the S&F buffer message id at the owning site.
|
||||
/// </param>
|
||||
/// <param name="SourceSite">
|
||||
/// The owning site (<c>SiteCall.SourceSite</c>) the relay is routed to.
|
||||
/// </param>
|
||||
public sealed record RetrySiteCallRequest(
|
||||
string CorrelationId,
|
||||
Guid TrackedOperationId,
|
||||
string SourceSite);
|
||||
|
||||
/// <summary>
|
||||
/// Site Call Audit → Central UI: result of a <see cref="RetrySiteCallRequest"/>.
|
||||
/// </summary>
|
||||
/// <param name="CorrelationId">Echoed request correlation id.</param>
|
||||
/// <param name="Outcome">
|
||||
/// The relay outcome — <see cref="SiteCallRelayOutcome.Applied"/>,
|
||||
/// <see cref="SiteCallRelayOutcome.NotParked"/>,
|
||||
/// <see cref="SiteCallRelayOutcome.SiteUnreachable"/> or
|
||||
/// <see cref="SiteCallRelayOutcome.OperationFailed"/>.
|
||||
/// </param>
|
||||
/// <param name="Success">
|
||||
/// Convenience flag — <c>true</c> only for <see cref="SiteCallRelayOutcome.Applied"/>.
|
||||
/// </param>
|
||||
/// <param name="SiteReachable">
|
||||
/// <c>false</c> only for <see cref="SiteCallRelayOutcome.SiteUnreachable"/>; lets
|
||||
/// the UI distinguish "site offline" from "operation failed" without switching
|
||||
/// on the enum.
|
||||
/// </param>
|
||||
/// <param name="ErrorMessage">
|
||||
/// Human-readable detail for a non-applied outcome; <c>null</c> on success.
|
||||
/// </param>
|
||||
public sealed record RetrySiteCallResponse(
|
||||
string CorrelationId,
|
||||
SiteCallRelayOutcome Outcome,
|
||||
bool Success,
|
||||
bool SiteReachable,
|
||||
string? ErrorMessage);
|
||||
|
||||
/// <summary>
|
||||
/// Central UI → Site Call Audit: relay a Discard of a parked cached call to its
|
||||
/// owning site. See <see cref="RetrySiteCallRequest"/> for the source-of-truth
|
||||
/// and routing rationale.
|
||||
/// </summary>
|
||||
public sealed record DiscardSiteCallRequest(
|
||||
string CorrelationId,
|
||||
Guid TrackedOperationId,
|
||||
string SourceSite);
|
||||
|
||||
/// <summary>
|
||||
/// Site Call Audit → Central UI: result of a <see cref="DiscardSiteCallRequest"/>.
|
||||
/// Same shape as <see cref="RetrySiteCallResponse"/>.
|
||||
/// </summary>
|
||||
public sealed record DiscardSiteCallResponse(
|
||||
string CorrelationId,
|
||||
SiteCallRelayOutcome Outcome,
|
||||
bool Success,
|
||||
bool SiteReachable,
|
||||
string? ErrorMessage);
|
||||
@@ -0,0 +1,75 @@
|
||||
using ScadaLink.Commons.Types;
|
||||
|
||||
namespace ScadaLink.Commons.Messages.RemoteQuery;
|
||||
|
||||
/// <summary>
|
||||
/// Central → site relay command: retry a parked cached operation
|
||||
/// (<c>ExternalSystem.CachedCall</c> / <c>Database.CachedWrite</c>) on the
|
||||
/// owning site's Store-and-Forward buffer. Sent over the command/control
|
||||
/// channel by <c>SiteCallAuditActor</c> when an operator clicks Retry on a
|
||||
/// <c>Parked</c> Site Call row in the Central UI.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The site is the source of truth for cached-call status — central never
|
||||
/// mutates the central <c>SiteCalls</c> mirror row directly. This command asks
|
||||
/// the site to reset its own parked row back to <c>Pending</c> so the S&F
|
||||
/// retry sweep attempts delivery again; the corrected state then flows back to
|
||||
/// central via the normal cached-call telemetry path.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The cached call's S&F buffer message id is the
|
||||
/// <see cref="TrackedOperationId"/> itself (the tracked id is supplied as the
|
||||
/// buffered row's id at enqueue time), so the site can resolve the parked row
|
||||
/// directly from <see cref="TrackedOperationId"/>. A retry on a row that is not
|
||||
/// actually <c>Parked</c> is a safe no-op at the site — the ack reports
|
||||
/// <c>Applied=false</c> rather than corrupting a non-parked row.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// This is a plain record carrying only ids, so it lives in Commons (no
|
||||
/// <c>IActorRef</c> field). It mirrors <see cref="ParkedMessageRetryRequest"/>
|
||||
/// but keys on <see cref="TrackedOperationId"/> rather than the opaque S&F
|
||||
/// message-id string.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed record RetryParkedOperation(
|
||||
string CorrelationId,
|
||||
TrackedOperationId TrackedOperationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central → site relay command: discard a parked cached operation on the
|
||||
/// owning site's Store-and-Forward buffer. Sent over the command/control
|
||||
/// channel by <c>SiteCallAuditActor</c> when an operator clicks Discard on a
|
||||
/// <c>Parked</c> Site Call row in the Central UI. See
|
||||
/// <see cref="RetryParkedOperation"/> for the source-of-truth and message-id
|
||||
/// rationale; Discard marks the operation terminally <c>Discarded</c> at the
|
||||
/// site by removing the parked S&F buffer row.
|
||||
/// </summary>
|
||||
public sealed record DiscardParkedOperation(
|
||||
string CorrelationId,
|
||||
TrackedOperationId TrackedOperationId);
|
||||
|
||||
/// <summary>
|
||||
/// Site → central ack for a <see cref="RetryParkedOperation"/> /
|
||||
/// <see cref="DiscardParkedOperation"/> relay command. The site replies this
|
||||
/// after applying (or safely no-op-ing) the action against its own
|
||||
/// Store-and-Forward buffer.
|
||||
/// </summary>
|
||||
/// <param name="CorrelationId">Correlation id of the originating relay command.</param>
|
||||
/// <param name="Applied">
|
||||
/// <c>true</c> when the parked operation was found and the action was applied;
|
||||
/// <c>false</c> when no parked row matched the <see cref="RetryParkedOperation.TrackedOperationId"/>
|
||||
/// (already delivered, discarded, never cached, or not in a <c>Parked</c>
|
||||
/// state). A <c>false</c> ack is a definitive "nothing to do" answer from the
|
||||
/// site — it is NOT a transport failure, so the relay must distinguish it from
|
||||
/// a site-unreachable timeout.
|
||||
/// </param>
|
||||
/// <param name="ErrorMessage">
|
||||
/// Populated only when the site could not apply the action (e.g. the parked
|
||||
/// message handler is not available, or the S&F store faulted); <c>null</c>
|
||||
/// on a clean <c>Applied=true</c>/<c>Applied=false</c> outcome.
|
||||
/// </param>
|
||||
public sealed record ParkedOperationActionAck(
|
||||
string CorrelationId,
|
||||
bool Applied,
|
||||
string? ErrorMessage = null);
|
||||
@@ -167,6 +167,33 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
});
|
||||
|
||||
// Task 5 (#22): central→site Retry/Discard relay for parked cached
|
||||
// operations. SiteCallAuditActor relays these over the command/control
|
||||
// channel; the parked-message handler executes them against the local
|
||||
// S&F buffer and replies a ParkedOperationActionAck that routes back to
|
||||
// the relaying SiteCallAuditActor's Ask.
|
||||
Receive<RetryParkedOperation>(msg =>
|
||||
{
|
||||
if (_parkedMessageHandler != null)
|
||||
_parkedMessageHandler.Forward(msg);
|
||||
else
|
||||
{
|
||||
Sender.Tell(new ParkedOperationActionAck(
|
||||
msg.CorrelationId, Applied: false, "Parked message handler not available"));
|
||||
}
|
||||
});
|
||||
|
||||
Receive<DiscardParkedOperation>(msg =>
|
||||
{
|
||||
if (_parkedMessageHandler != null)
|
||||
_parkedMessageHandler.Forward(msg);
|
||||
else
|
||||
{
|
||||
Sender.Tell(new ParkedOperationActionAck(
|
||||
msg.CorrelationId, Applied: false, "Parked message handler not available"));
|
||||
}
|
||||
});
|
||||
|
||||
// Notification Outbox: forward a buffered notification submitted by the site
|
||||
// Store-and-Forward Engine to the central cluster. The original Sender (the
|
||||
// S&F forwarder's Ask) is forwarded as the ClusterClient.Send sender so the
|
||||
|
||||
@@ -347,6 +347,33 @@ public class CommunicationService
|
||||
return await GetSiteCallAudit().Ask<PerSiteSiteCallKpiResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): relays an operator Retry of a parked cached call to its
|
||||
/// owning site. The <c>SiteCallAuditActor</c> is Asked directly (it is
|
||||
/// central-local); it in turn relays a <c>RetryParkedOperation</c> to the
|
||||
/// owning site and replies a <see cref="RetrySiteCallResponse"/> carrying a
|
||||
/// distinct site-unreachable outcome. Central never mutates the central
|
||||
/// <c>SiteCalls</c> mirror row.
|
||||
/// </summary>
|
||||
public async Task<RetrySiteCallResponse> RetrySiteCallAsync(
|
||||
RetrySiteCallRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetSiteCallAudit().Ask<RetrySiteCallResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): relays an operator Discard of a parked cached call to its
|
||||
/// owning site. See <see cref="RetrySiteCallAsync"/> for the routing and
|
||||
/// source-of-truth rationale.
|
||||
/// </summary>
|
||||
public async Task<DiscardSiteCallResponse> DiscardSiteCallAsync(
|
||||
DiscardSiteCallRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetSiteCallAudit().Ask<DiscardSiteCallResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -446,7 +446,17 @@ akka {{
|
||||
// the Site Call Audit actor directly (query, KPIs, detail) — mirrors the
|
||||
// SetNotificationOutbox wiring above.
|
||||
commService?.SetSiteCallAudit(siteCallAuditProxy);
|
||||
_logger.LogInformation("SiteCallAuditActor singleton created");
|
||||
|
||||
// Task 5 (#22): hand the CentralCommunicationActor to the SiteCallAudit
|
||||
// actor so it can relay operator Retry/Discard on parked cached calls to
|
||||
// the owning site (over the per-site ClusterClient via SiteEnvelope).
|
||||
// Mirrors the RegisterAuditIngest / RegisterNotificationOutbox wiring;
|
||||
// the message is sent to the singleton proxy so it reaches whichever
|
||||
// central node currently hosts the singleton.
|
||||
siteCallAuditProxy.Tell(
|
||||
new ScadaLink.SiteCallAudit.RegisterCentralCommunication(centralCommActor));
|
||||
_logger.LogInformation(
|
||||
"SiteCallAuditActor singleton created and registered with CentralCommunicationActor");
|
||||
|
||||
_logger.LogInformation("Central actors registered. CentralCommunicationActor created.");
|
||||
}
|
||||
|
||||
@@ -24,6 +24,11 @@
|
||||
project reference is documented here so the actor's scope-per-message
|
||||
GetRequiredService<ISiteCallAuditRepository>() compiles. -->
|
||||
<ProjectReference Include="../ScadaLink.ConfigurationDatabase/ScadaLink.ConfigurationDatabase.csproj" />
|
||||
<!-- Task 5 (#22): the central→site Retry/Discard relay routes RetryParkedOperation /
|
||||
DiscardParkedOperation to the owning site via SiteEnvelope + CentralCommunicationActor,
|
||||
the same transport every other central→site command uses. SiteEnvelope is defined
|
||||
in ScadaLink.Communication (no cycle: Communication does not reference SiteCallAudit). -->
|
||||
<ProjectReference Include="../ScadaLink.Communication/ScadaLink.Communication.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -4,8 +4,10 @@ using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Messages.Audit;
|
||||
using ScadaLink.Commons.Messages.RemoteQuery;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.Commons.Types.Audit;
|
||||
using ScadaLink.Communication;
|
||||
|
||||
namespace ScadaLink.SiteCallAudit;
|
||||
|
||||
@@ -52,6 +54,19 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
private readonly SiteCallAuditOptions _options;
|
||||
private readonly ILogger<SiteCallAuditActor> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): the central→site command transport — the
|
||||
/// <c>CentralCommunicationActor</c>, which owns the per-site
|
||||
/// <c>ClusterClient</c> map and routes a <see cref="SiteEnvelope"/> to the
|
||||
/// owning site. Set via <see cref="RegisterCentralCommunication"/> by the
|
||||
/// Host after both actors exist (this actor is a cluster singleton; the
|
||||
/// transport actor is created separately). Null until registration
|
||||
/// completes — a relay arriving before then is answered with a
|
||||
/// <see cref="SiteCallRelayOutcome.SiteUnreachable"/> outcome, because there
|
||||
/// is genuinely no route to any site yet.
|
||||
/// </summary>
|
||||
private IActorRef? _centralCommunication;
|
||||
|
||||
/// <summary>
|
||||
/// Test-mode constructor — injects a concrete repository instance whose
|
||||
/// lifetime exceeds the test, so the actor reuses the same instance
|
||||
@@ -110,6 +125,15 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
Receive<SiteCallDetailRequest>(HandleDetail);
|
||||
Receive<SiteCallKpiRequest>(HandleKpi);
|
||||
Receive<PerSiteSiteCallKpiRequest>(HandlePerSiteKpi);
|
||||
|
||||
// Task 5 (#22): central→site Retry/Discard relay for parked cached calls.
|
||||
Receive<RegisterCentralCommunication>(msg =>
|
||||
{
|
||||
_centralCommunication = msg.CentralCommunication;
|
||||
_logger.LogInformation("SiteCallAudit registered central→site communication transport");
|
||||
});
|
||||
Receive<RetrySiteCallRequest>(HandleRetrySiteCall);
|
||||
Receive<DiscardSiteCallRequest>(HandleDiscardSiteCall);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -385,6 +409,175 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
}
|
||||
}
|
||||
|
||||
// ── Task 5: central→site Retry/Discard relay ──
|
||||
|
||||
/// <summary>
|
||||
/// Relays an operator Retry of a parked cached call to its owning site. The
|
||||
/// site is the source of truth — this handler NEVER writes the central
|
||||
/// <c>SiteCalls</c> mirror row. It wraps a <see cref="RetryParkedOperation"/>
|
||||
/// in a <see cref="SiteEnvelope"/> addressed to <c>SourceSite</c>, Asks the
|
||||
/// <c>CentralCommunicationActor</c> (which routes it over the per-site
|
||||
/// <c>ClusterClient</c>), and maps the site's
|
||||
/// <see cref="ParkedOperationActionAck"/> — or an Ask timeout — onto a
|
||||
/// <see cref="RetrySiteCallResponse"/>. A timeout / no-route is reported as
|
||||
/// the distinct <see cref="SiteCallRelayOutcome.SiteUnreachable"/> outcome,
|
||||
/// not a generic failure, so the Central UI can tell "site offline" from
|
||||
/// "operation failed".
|
||||
/// </summary>
|
||||
private void HandleRetrySiteCall(RetrySiteCallRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
if (_centralCommunication is null)
|
||||
{
|
||||
// No transport registered yet — there is genuinely no route to any
|
||||
// site, so the only honest answer is unreachable.
|
||||
_logger.LogWarning(
|
||||
"RetrySiteCall {TrackedOperationId} for site {SourceSite} arrived before the "
|
||||
+ "central→site transport was registered; reporting site unreachable",
|
||||
request.TrackedOperationId, request.SourceSite);
|
||||
sender.Tell(UnreachableRetry(request.CorrelationId));
|
||||
return;
|
||||
}
|
||||
|
||||
var relay = new RetryParkedOperation(
|
||||
request.CorrelationId, new TrackedOperationId(request.TrackedOperationId));
|
||||
var envelope = new SiteEnvelope(request.SourceSite, relay);
|
||||
|
||||
_centralCommunication.Ask<ParkedOperationActionAck>(envelope, _options.RelayTimeout)
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: ack => MapRetryResponse(request.CorrelationId, ack),
|
||||
failure: ex => MapRetryFailure(request.CorrelationId, request.SourceSite, ex));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Relays an operator Discard of a parked cached call to its owning site.
|
||||
/// Mirrors <see cref="HandleRetrySiteCall"/> — see that method for the
|
||||
/// source-of-truth and site-unreachable rationale.
|
||||
/// </summary>
|
||||
private void HandleDiscardSiteCall(DiscardSiteCallRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
if (_centralCommunication is null)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"DiscardSiteCall {TrackedOperationId} for site {SourceSite} arrived before the "
|
||||
+ "central→site transport was registered; reporting site unreachable",
|
||||
request.TrackedOperationId, request.SourceSite);
|
||||
sender.Tell(UnreachableDiscard(request.CorrelationId));
|
||||
return;
|
||||
}
|
||||
|
||||
var relay = new DiscardParkedOperation(
|
||||
request.CorrelationId, new TrackedOperationId(request.TrackedOperationId));
|
||||
var envelope = new SiteEnvelope(request.SourceSite, relay);
|
||||
|
||||
_centralCommunication.Ask<ParkedOperationActionAck>(envelope, _options.RelayTimeout)
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: ack => MapDiscardResponse(request.CorrelationId, ack),
|
||||
failure: ex => MapDiscardFailure(request.CorrelationId, request.SourceSite, ex));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maps the site's <see cref="ParkedOperationActionAck"/> for a Retry onto a
|
||||
/// <see cref="RetrySiteCallResponse"/>: an applied action is
|
||||
/// <see cref="SiteCallRelayOutcome.Applied"/>; a clean no-op
|
||||
/// (<c>Applied=false</c>, no error) is <see cref="SiteCallRelayOutcome.NotParked"/>;
|
||||
/// an ack carrying an error is <see cref="SiteCallRelayOutcome.OperationFailed"/>
|
||||
/// — in every case the site WAS reached.
|
||||
/// </summary>
|
||||
private static RetrySiteCallResponse MapRetryResponse(string correlationId, ParkedOperationActionAck ack)
|
||||
{
|
||||
var outcome = ClassifyAck(ack);
|
||||
return new RetrySiteCallResponse(
|
||||
correlationId,
|
||||
outcome,
|
||||
Success: outcome == SiteCallRelayOutcome.Applied,
|
||||
SiteReachable: true,
|
||||
ErrorMessage: AckErrorMessage(outcome, ack));
|
||||
}
|
||||
|
||||
private static DiscardSiteCallResponse MapDiscardResponse(string correlationId, ParkedOperationActionAck ack)
|
||||
{
|
||||
var outcome = ClassifyAck(ack);
|
||||
return new DiscardSiteCallResponse(
|
||||
correlationId,
|
||||
outcome,
|
||||
Success: outcome == SiteCallRelayOutcome.Applied,
|
||||
SiteReachable: true,
|
||||
ErrorMessage: AckErrorMessage(outcome, ack));
|
||||
}
|
||||
|
||||
private RetrySiteCallResponse MapRetryFailure(string correlationId, string sourceSite, Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Retry relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite);
|
||||
return UnreachableRetry(correlationId);
|
||||
}
|
||||
|
||||
private DiscardSiteCallResponse MapDiscardFailure(string correlationId, string sourceSite, Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Discard relay to site {SourceSite} did not complete; reporting site unreachable", sourceSite);
|
||||
return UnreachableDiscard(correlationId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Classifies a site ack: <c>Applied=true</c> → applied; <c>Applied=false</c>
|
||||
/// with no error → the site definitively had nothing parked; <c>Applied=false</c>
|
||||
/// with an error → the site could not apply the action.
|
||||
/// </summary>
|
||||
private static SiteCallRelayOutcome ClassifyAck(ParkedOperationActionAck ack)
|
||||
{
|
||||
if (ack.Applied)
|
||||
{
|
||||
return SiteCallRelayOutcome.Applied;
|
||||
}
|
||||
|
||||
return ack.ErrorMessage is null
|
||||
? SiteCallRelayOutcome.NotParked
|
||||
: SiteCallRelayOutcome.OperationFailed;
|
||||
}
|
||||
|
||||
private static string? AckErrorMessage(SiteCallRelayOutcome outcome, ParkedOperationActionAck ack)
|
||||
{
|
||||
return outcome switch
|
||||
{
|
||||
SiteCallRelayOutcome.Applied => null,
|
||||
SiteCallRelayOutcome.NotParked =>
|
||||
"The operation is no longer parked at the site (already delivered, discarded, or retrying).",
|
||||
SiteCallRelayOutcome.OperationFailed => ack.ErrorMessage,
|
||||
_ => ack.ErrorMessage,
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Shared "site unreachable" detail text for both relay directions.</summary>
|
||||
private const string SiteUnreachableMessage =
|
||||
"The owning site is unreachable; the action was not applied. Retry when the site is back online.";
|
||||
|
||||
private static RetrySiteCallResponse UnreachableRetry(string correlationId)
|
||||
{
|
||||
return new RetrySiteCallResponse(
|
||||
correlationId,
|
||||
SiteCallRelayOutcome.SiteUnreachable,
|
||||
Success: false,
|
||||
SiteReachable: false,
|
||||
ErrorMessage: SiteUnreachableMessage);
|
||||
}
|
||||
|
||||
private static DiscardSiteCallResponse UnreachableDiscard(string correlationId)
|
||||
{
|
||||
return new DiscardSiteCallResponse(
|
||||
correlationId,
|
||||
SiteCallRelayOutcome.SiteUnreachable,
|
||||
Success: false,
|
||||
SiteReachable: false,
|
||||
ErrorMessage: SiteUnreachableMessage);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolves an <see cref="ISiteCallAuditRepository"/> for one read message.
|
||||
/// In test mode the injected instance is returned with a null scope; in
|
||||
@@ -464,3 +657,13 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
return string.IsNullOrWhiteSpace(value) ? null : value;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the central→site command transport (the <c>CentralCommunicationActor</c>)
|
||||
/// with the <see cref="SiteCallAuditActor"/> so it can relay Retry/Discard
|
||||
/// actions on parked cached calls to their owning sites. Sent by the Host after
|
||||
/// both actors exist. Lives here (not in Commons) because it carries an
|
||||
/// <see cref="IActorRef"/> and <c>ScadaLink.Commons</c> has no Akka reference —
|
||||
/// the same rationale as <c>RegisterAuditIngest</c>.
|
||||
/// </summary>
|
||||
public sealed record RegisterCentralCommunication(IActorRef CentralCommunication);
|
||||
|
||||
@@ -23,4 +23,15 @@ public class SiteCallAuditOptions
|
||||
/// <c>NotificationOutboxOptions.DeliveredKpiWindow</c>.
|
||||
/// </summary>
|
||||
public TimeSpan KpiInterval { get; set; } = TimeSpan.FromMinutes(1);
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): Ask timeout for the central→site Retry/Discard relay. When
|
||||
/// the owning site does not ack a <c>RetryParkedOperation</c> /
|
||||
/// <c>DiscardParkedOperation</c> within this window — site offline, no
|
||||
/// ClusterClient route, or central buffering deliberately absent — the relay
|
||||
/// reports a <c>SiteUnreachable</c> outcome. Default 10 seconds: long enough
|
||||
/// to absorb a healthy cross-cluster round-trip, short enough that an
|
||||
/// operator clicking Retry on an offline site gets a fast, honest answer.
|
||||
/// </summary>
|
||||
public TimeSpan RelayTimeout { get; set; } = TimeSpan.FromSeconds(10);
|
||||
}
|
||||
|
||||
@@ -24,6 +24,13 @@ public class ParkedMessageHandlerActor : ReceiveActor
|
||||
Receive<ParkedMessageQueryRequest>(HandleQuery);
|
||||
Receive<ParkedMessageRetryRequest>(HandleRetry);
|
||||
Receive<ParkedMessageDiscardRequest>(HandleDiscard);
|
||||
|
||||
// Task 5 (#22): central→site Retry/Discard relay for parked cached
|
||||
// operations. The cached call's S&F buffer message id is the
|
||||
// TrackedOperationId, so these reuse the same parked-message primitive
|
||||
// as HandleRetry/HandleDiscard, keyed off the tracked id.
|
||||
Receive<RetryParkedOperation>(HandleRetryParkedOperation);
|
||||
Receive<DiscardParkedOperation>(HandleDiscardParkedOperation);
|
||||
}
|
||||
|
||||
private void HandleQuery(ParkedMessageQueryRequest msg)
|
||||
@@ -90,6 +97,46 @@ public class ParkedMessageHandlerActor : ReceiveActor
|
||||
msg.CorrelationId, false, ex.GetBaseException().Message));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): executes a central-relayed Retry of a parked cached call.
|
||||
/// The tracked id is the S&F buffer message id, so this reuses
|
||||
/// <see cref="StoreAndForwardService.RetryParkedMessageAsync"/> — which only
|
||||
/// touches rows that are actually <c>Parked</c> (a non-parked or unknown
|
||||
/// operation yields <c>false</c>, a safe no-op). Central never mutates the
|
||||
/// central <c>SiteCalls</c> mirror; the reset row's corrected state flows
|
||||
/// back via the normal cached-call telemetry path.
|
||||
/// </summary>
|
||||
private void HandleRetryParkedOperation(RetryParkedOperation msg)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
_service.RetryParkedMessageAsync(msg.TrackedOperationId.ToString())
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: applied => new ParkedOperationActionAck(
|
||||
msg.CorrelationId, applied, ErrorMessage: null),
|
||||
failure: ex => new ParkedOperationActionAck(
|
||||
msg.CorrelationId, Applied: false, ex.GetBaseException().Message));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): executes a central-relayed Discard of a parked cached call.
|
||||
/// Mirrors <see cref="HandleRetryParkedOperation"/>; Discard removes the
|
||||
/// parked S&F buffer row (only when it is actually <c>Parked</c>).
|
||||
/// </summary>
|
||||
private void HandleDiscardParkedOperation(DiscardParkedOperation msg)
|
||||
{
|
||||
var sender = Sender;
|
||||
|
||||
_service.DiscardParkedMessageAsync(msg.TrackedOperationId.ToString())
|
||||
.PipeTo(
|
||||
sender,
|
||||
success: applied => new ParkedOperationActionAck(
|
||||
msg.CorrelationId, applied, ErrorMessage: null),
|
||||
failure: ex => new ParkedOperationActionAck(
|
||||
msg.CorrelationId, Applied: false, ex.GetBaseException().Message));
|
||||
}
|
||||
|
||||
private static string ExtractMethodName(string payloadJson, Commons.Types.Enums.StoreAndForwardCategory category)
|
||||
{
|
||||
if (string.IsNullOrEmpty(payloadJson))
|
||||
|
||||
Reference in New Issue
Block a user