233 lines
11 KiB
C#
233 lines
11 KiB
C#
using Microsoft.Extensions.Logging;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
|
|
|
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Site.Telemetry;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M3 Bundle E — Tasks E4/E5): translates per-attempt
|
|
/// notifications from the store-and-forward retry loop into one (or two)
|
|
/// <see cref="CachedCallTelemetry"/> packets and pushes them through
|
|
/// <see cref="ICachedCallTelemetryForwarder"/>.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// The S&F loop's <see cref="ICachedCallLifecycleObserver"/> reports a
|
|
/// single coarse outcome per attempt; the audit pipeline however models the
|
|
/// lifecycle as TWO rows on terminal outcomes — an <c>Attempted</c>
|
|
/// (<see cref="AuditKind.ApiCallCached"/> / <see cref="AuditKind.DbWriteCached"/>)
|
|
/// row capturing the per-attempt mechanics, plus a <see cref="AuditKind.CachedResolve"/>
|
|
/// row marking the terminal state for downstream consumers. The bridge fans
|
|
/// out per outcome:
|
|
/// </para>
|
|
/// <list type="bullet">
|
|
/// <item><description><c>TransientFailure</c> -> one Attempted(Failed) row.</description></item>
|
|
/// <item><description><c>Delivered</c> -> Attempted(Delivered) + CachedResolve(Delivered).</description></item>
|
|
/// <item><description><c>PermanentFailure</c> -> Attempted(Failed) + CachedResolve(Parked).</description></item>
|
|
/// <item><description><c>ParkedMaxRetries</c> -> Attempted(Failed) + CachedResolve(Parked).</description></item>
|
|
/// </list>
|
|
/// <para>
|
|
/// <b>Best-effort emission (alog.md §7):</b> the bridge itself never throws;
|
|
/// the underlying forwarder swallows + logs its own failures.
|
|
/// </para>
|
|
/// </remarks>
|
|
public sealed class CachedCallLifecycleBridge : ICachedCallLifecycleObserver
|
|
{
|
|
private readonly ICachedCallTelemetryForwarder _forwarder;
|
|
private readonly ILogger<CachedCallLifecycleBridge> _logger;
|
|
|
|
/// <summary>
|
|
/// SourceNode-stamping (Task 14): the local node identity provider used to
|
|
/// stamp <c>SiteCallOperational.SourceNode</c> on every cached-call
|
|
/// lifecycle row this bridge emits. Optional — when null (legacy hosts /
|
|
/// tests that don't register the provider) SourceNode stays null and
|
|
/// central persists the <c>SiteCalls</c> row with SourceNode NULL.
|
|
/// </summary>
|
|
private readonly INodeIdentityProvider? _nodeIdentity;
|
|
|
|
/// <summary>Initializes a new <see cref="CachedCallLifecycleBridge"/> with the given telemetry forwarder, logger, and optional node identity provider.</summary>
|
|
/// <param name="forwarder">The telemetry forwarder used to ship cached-call lifecycle events to central.</param>
|
|
/// <param name="logger">Logger for bridge diagnostics.</param>
|
|
/// <param name="nodeIdentity">Optional node identity provider used to stamp <c>SourceNode</c> on emitted telemetry rows.</param>
|
|
public CachedCallLifecycleBridge(
|
|
ICachedCallTelemetryForwarder forwarder,
|
|
ILogger<CachedCallLifecycleBridge> logger,
|
|
INodeIdentityProvider? nodeIdentity = null)
|
|
{
|
|
_forwarder = forwarder ?? throw new ArgumentNullException(nameof(forwarder));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
_nodeIdentity = nodeIdentity;
|
|
}
|
|
|
|
/// <inheritdoc/>
|
|
public async Task OnAttemptCompletedAsync(
|
|
CachedCallAttemptContext context, CancellationToken ct = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(context);
|
|
|
|
try
|
|
{
|
|
await EmitAttemptedAsync(context, ct).ConfigureAwait(false);
|
|
|
|
if (IsTerminal(context.Outcome))
|
|
{
|
|
await EmitResolveAsync(context, ct).ConfigureAwait(false);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Defensive — both EmitX paths call the forwarder which is itself
|
|
// best-effort. A throw here is unexpected, but the alog.md §7
|
|
// contract requires we never propagate.
|
|
_logger.LogWarning(ex,
|
|
"CachedCallLifecycleBridge: unexpected throw for {TrackedOperationId} (Outcome {Outcome})",
|
|
context.TrackedOperationId, context.Outcome);
|
|
}
|
|
}
|
|
|
|
private async Task EmitAttemptedAsync(CachedCallAttemptContext context, CancellationToken ct)
|
|
{
|
|
// Per-attempt row: kind discriminates channel; status is always
|
|
// Attempted regardless of outcome (success vs. failure is captured
|
|
// by the companion HttpStatus / ErrorMessage fields, NOT by flipping
|
|
// the status — CachedResolve carries the terminal Status). Per the
|
|
// M3 brief and alog.md §4.
|
|
var kind = ChannelToAttemptKind(context.Channel);
|
|
var status = AuditStatus.Attempted;
|
|
|
|
var packet = BuildPacket(
|
|
context,
|
|
kind: kind,
|
|
status: status,
|
|
// Operational status mirror — for the per-attempt row the
|
|
// operational state is the running status; the bridge always
|
|
// writes "Attempted" so reconciliation can't roll back.
|
|
operationalStatus: "Attempted",
|
|
terminalAtUtc: null,
|
|
lastError: context.LastError,
|
|
httpStatus: context.HttpStatus);
|
|
|
|
await _forwarder.ForwardAsync(packet, ct).ConfigureAwait(false);
|
|
}
|
|
|
|
private async Task EmitResolveAsync(CachedCallAttemptContext context, CancellationToken ct)
|
|
{
|
|
var (auditStatus, operationalStatus) = TerminalOutcomeToStatuses(context.Outcome);
|
|
|
|
var packet = BuildPacket(
|
|
context,
|
|
kind: AuditKind.CachedResolve,
|
|
status: auditStatus,
|
|
operationalStatus: operationalStatus,
|
|
terminalAtUtc: context.OccurredAtUtc,
|
|
lastError: context.LastError,
|
|
httpStatus: context.HttpStatus);
|
|
|
|
await _forwarder.ForwardAsync(packet, ct).ConfigureAwait(false);
|
|
}
|
|
|
|
private CachedCallTelemetry BuildPacket(
|
|
CachedCallAttemptContext context,
|
|
AuditKind kind,
|
|
AuditStatus status,
|
|
string operationalStatus,
|
|
DateTime? terminalAtUtc,
|
|
string? lastError,
|
|
int? httpStatus)
|
|
{
|
|
var channel = ChannelStringToEnum(context.Channel);
|
|
|
|
return new CachedCallTelemetry(
|
|
Audit: ScadaBridgeAuditEventFactory.Create(
|
|
channel: channel,
|
|
kind: kind,
|
|
status: status,
|
|
occurredAtUtc: DateTime.SpecifyKind(context.OccurredAtUtc, DateTimeKind.Utc),
|
|
target: context.Target,
|
|
correlationId: context.TrackedOperationId.Value,
|
|
// Audit Log #23 (ExecutionId Task 4): the originating script
|
|
// execution's per-run correlation id, threaded through the S&F
|
|
// buffer; null on rows buffered before Task 4 (back-compat).
|
|
executionId: context.ExecutionId,
|
|
// Audit Log #23 (ParentExecutionId Task 6): the spawning
|
|
// inbound-API request's ExecutionId, threaded through the S&F
|
|
// buffer alongside ExecutionId so the retry-loop cached rows
|
|
// correlate back to the cross-execution chain. Null for a
|
|
// non-routed run and on rows buffered before Task 6.
|
|
parentExecutionId: context.ParentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(context.SourceSite) ? null : context.SourceSite,
|
|
sourceInstanceId: context.SourceInstanceId,
|
|
// Audit Log #23 (ExecutionId Task 4): SourceScript is now
|
|
// threaded through the S&F buffer alongside ExecutionId — the
|
|
// retry-loop cached rows carry the same provenance the
|
|
// script-side cached rows do. Null on pre-Task-4 buffered rows.
|
|
sourceScript: context.SourceScript,
|
|
httpStatus: httpStatus,
|
|
durationMs: context.DurationMs,
|
|
errorMessage: lastError),
|
|
Operational: new SiteCallOperational(
|
|
TrackedOperationId: context.TrackedOperationId,
|
|
Channel: context.Channel,
|
|
Target: context.Target,
|
|
SourceSite: context.SourceSite,
|
|
// SourceNode-stamping (Task 14): the local cluster node name
|
|
// (node-a/node-b on a site). Stamped from the injected
|
|
// INodeIdentityProvider; null when no provider was wired so
|
|
// central persists SiteCalls.SourceNode as NULL.
|
|
SourceNode: _nodeIdentity?.NodeName,
|
|
Status: operationalStatus,
|
|
RetryCount: context.RetryCount,
|
|
LastError: lastError,
|
|
HttpStatus: httpStatus,
|
|
CreatedAtUtc: DateTime.SpecifyKind(context.CreatedAtUtc, DateTimeKind.Utc),
|
|
UpdatedAtUtc: DateTime.SpecifyKind(context.OccurredAtUtc, DateTimeKind.Utc),
|
|
TerminalAtUtc: terminalAtUtc is null
|
|
? null
|
|
: DateTime.SpecifyKind(terminalAtUtc.Value, DateTimeKind.Utc)));
|
|
}
|
|
|
|
private static AuditKind ChannelToAttemptKind(string channel) => channel switch
|
|
{
|
|
"ApiOutbound" => AuditKind.ApiCallCached,
|
|
"DbOutbound" => AuditKind.DbWriteCached,
|
|
// Defensive default — the S&F observer is filtered to cached-call
|
|
// categories so this branch shouldn't fire in practice.
|
|
_ => AuditKind.ApiCallCached,
|
|
};
|
|
|
|
private static AuditChannel ChannelStringToEnum(string channel) => channel switch
|
|
{
|
|
"ApiOutbound" => AuditChannel.ApiOutbound,
|
|
"DbOutbound" => AuditChannel.DbOutbound,
|
|
_ => AuditChannel.ApiOutbound,
|
|
};
|
|
|
|
private static (AuditStatus auditStatus, string operationalStatus) TerminalOutcomeToStatuses(
|
|
CachedCallAttemptOutcome outcome) => outcome switch
|
|
{
|
|
CachedCallAttemptOutcome.Delivered =>
|
|
(AuditStatus.Delivered, "Delivered"),
|
|
CachedCallAttemptOutcome.PermanentFailure =>
|
|
(AuditStatus.Parked, "Parked"),
|
|
CachedCallAttemptOutcome.ParkedMaxRetries =>
|
|
(AuditStatus.Parked, "Parked"),
|
|
// TransientFailure isn't terminal — see IsTerminal — but the switch
|
|
// is exhaustive so we route it through Failed for safety.
|
|
CachedCallAttemptOutcome.TransientFailure =>
|
|
(AuditStatus.Failed, "Failed"),
|
|
_ => (AuditStatus.Failed, "Failed"),
|
|
};
|
|
|
|
private static bool IsTerminal(CachedCallAttemptOutcome outcome) => outcome switch
|
|
{
|
|
CachedCallAttemptOutcome.Delivered => true,
|
|
CachedCallAttemptOutcome.PermanentFailure => true,
|
|
CachedCallAttemptOutcome.ParkedMaxRetries => true,
|
|
CachedCallAttemptOutcome.TransientFailure => false,
|
|
_ => false,
|
|
};
|
|
}
|