Files
scadalink-design/src/ScadaLink.AuditLog/Site/Telemetry/CachedCallTelemetryForwarder.cs
Joseph Doherty 42430dd10a feat(siteruntime): ExternalSystem.CachedCall emits CachedSubmit telemetry (#23 M3)
Rework ScriptRuntimeContext.ExternalSystem.CachedCall to fit the M3
combined-telemetry model:

* Mints a fresh TrackedOperationId and emits one CachedSubmit packet
  via ICachedCallTelemetryForwarder BEFORE handing the call off — the
  SiteCalls row is materialised before the first delivery attempt so
  Tracking.Status(id) can observe a Submitted row even if immediate
  delivery resolves before the helper returns.
* Threads the TrackedOperationId into IExternalSystemClient.CachedCallAsync
  as a new optional parameter (and into IDatabaseGateway.CachedWriteAsync
  for the Database mirror set up here for E6). The gateway uses the id
  as the StoreAndForward messageId so the retry loop (Tasks E4/E5) can
  recover it from StoreAndForwardMessage.Id.
* Returns the TrackedOperationId rather than ExternalCallResult — the
  script's contract is now "get a tracking handle, observe outcome via
  Tracking.Status". Best-effort emission: a thrown forwarder is logged
  + swallowed; the original call still runs and the id is still returned.

DatabaseHelper gets the matching siteId / sourceScript / forwarder
fields and a parallel CachedSubmit emitter (Channel=DbOutbound) so Task
E6's Database.CachedWrite mirror plugs in without further runtime
wiring.

New ICachedCallTelemetryForwarder seam in Commons.Interfaces.Services
so SiteRuntime depends on Commons (existing arrow) rather than
ScadaLink.AuditLog (would have introduced a new dependency).

Bundle E task E3 (and helper-shape work for E6).
2026-05-20 14:48:05 -04:00

162 lines
7.6 KiB
C#

using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Messages.Integration;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.AuditLog.Site.Telemetry;
/// <summary>
/// Site-side dual emitter for cached-call lifecycle telemetry (Audit Log #23 /
/// M3). Sister to <see cref="SiteAuditTelemetryActor"/>: where the M2 actor
/// drains audit-only events, this forwarder takes a combined
/// <see cref="CachedCallTelemetry"/> packet and fans it out to the two
/// site-local stores in a single call:
/// <list type="bullet">
/// <item><description>The <see cref="AuditEvent"/> row is written via
/// <see cref="IAuditWriter"/> (the site <c>FallbackAuditWriter</c> +
/// <c>SqliteAuditWriter</c> chain established in M2).</description></item>
/// <item><description>The operational <see cref="SiteCallOperational"/> half
/// updates the site-local <c>OperationTracking</c> SQLite store via
/// <see cref="IOperationTrackingStore"/>, with the per-lifecycle method
/// (<c>Enqueue</c> / <c>Attempt</c> / <c>Terminal</c>) selected from the
/// audit row's <see cref="AuditKind"/>.</description></item>
/// </list>
/// </summary>
/// <remarks>
/// <para>
/// <b>Best-effort contract (alog.md §7):</b> a thrown writer OR a thrown
/// tracking store must never propagate to the calling script. Both emission
/// halves are wrapped in independent try/catch blocks so a SQLite outage on
/// one side cannot starve the other — the failure is logged and the call
/// returns normally.
/// </para>
/// <para>
/// <b>Wire push deferred to M6.</b> M3 keeps this forwarder synchronous
/// against the local stores: there is no site→central gRPC channel yet, so
/// the <see cref="ISiteStreamAuditClient.IngestCachedTelemetryAsync"/> RPC
/// is registered on the interface (Bundle E1) but the production binding
/// remains <c>NoOpSiteStreamAuditClient</c>. Once M6 wires a real client the
/// drain pattern from <c>SiteAuditTelemetryActor</c> can be reused — the
/// <c>AuditEvent</c> rows already live in SQLite tagged
/// <see cref="AuditForwardState.Pending"/>, so a single drain loop sweeps
/// both M2 and M3 emissions.
/// </para>
/// </remarks>
public sealed class CachedCallTelemetryForwarder : ICachedCallTelemetryForwarder
{
private readonly IAuditWriter _auditWriter;
private readonly IOperationTrackingStore _trackingStore;
private readonly ILogger<CachedCallTelemetryForwarder> _logger;
public CachedCallTelemetryForwarder(
IAuditWriter auditWriter,
IOperationTrackingStore trackingStore,
ILogger<CachedCallTelemetryForwarder> logger)
{
_auditWriter = auditWriter ?? throw new ArgumentNullException(nameof(auditWriter));
_trackingStore = trackingStore ?? throw new ArgumentNullException(nameof(trackingStore));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
/// <summary>
/// Fan out one combined-telemetry packet to the audit writer and the
/// tracking store. Returns once both halves have been attempted (success
/// OR logged failure). NEVER throws — exceptions are caught per-half and
/// logged at warning level so the calling script's outbound action is not
/// disturbed.
/// </summary>
public async Task ForwardAsync(CachedCallTelemetry telemetry, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(telemetry);
// Independent try/catch — a thrown audit writer must not prevent the
// tracking-store update from running (and vice-versa). Both halves
// are best-effort.
await TryEmitAuditAsync(telemetry, ct).ConfigureAwait(false);
await TryEmitTrackingAsync(telemetry, ct).ConfigureAwait(false);
}
private async Task TryEmitAuditAsync(CachedCallTelemetry telemetry, CancellationToken ct)
{
try
{
await _auditWriter.WriteAsync(telemetry.Audit, ct).ConfigureAwait(false);
}
catch (Exception ex)
{
// alog.md §7 best-effort contract — log and swallow. The audit
// pipeline's own retry/recovery (RingBufferFallback in the
// FallbackAuditWriter) handles transient writer failures upstream;
// a throw bubbling up here means the writer's own swallow contract
// failed, which is itself best-effort-handled.
_logger.LogWarning(ex,
"CachedCallTelemetryForwarder: audit emission threw for EventId {EventId} (Kind {Kind}, Status {Status})",
telemetry.Audit.EventId, telemetry.Audit.Kind, telemetry.Audit.Status);
}
}
private async Task TryEmitTrackingAsync(CachedCallTelemetry telemetry, CancellationToken ct)
{
try
{
switch (telemetry.Audit.Kind)
{
case AuditKind.CachedSubmit:
// Enqueue — insert-if-not-exists with the operational
// channel as the kind discriminator. RetryCount is fixed
// at 0 by the tracking store's INSERT contract.
await _trackingStore.RecordEnqueueAsync(
telemetry.Operational.TrackedOperationId,
telemetry.Operational.Channel,
telemetry.Operational.Target,
telemetry.Audit.SourceInstanceId,
telemetry.Audit.SourceScript,
ct).ConfigureAwait(false);
break;
case AuditKind.ApiCallCached:
case AuditKind.DbWriteCached:
// Attempt — advance retry counter + last-error/HTTP-status.
// Terminal rows are guarded by the store's WHERE clause.
await _trackingStore.RecordAttemptAsync(
telemetry.Operational.TrackedOperationId,
telemetry.Operational.Status,
telemetry.Operational.RetryCount,
telemetry.Operational.LastError,
telemetry.Operational.HttpStatus,
ct).ConfigureAwait(false);
break;
case AuditKind.CachedResolve:
// Terminal — first-write-wins on the resolve flip.
await _trackingStore.RecordTerminalAsync(
telemetry.Operational.TrackedOperationId,
telemetry.Operational.Status,
telemetry.Operational.LastError,
telemetry.Operational.HttpStatus,
ct).ConfigureAwait(false);
break;
default:
// Defensive — only the four cached-lifecycle kinds are
// expected on this path. Anything else is logged so a
// mis-routed packet is visible but never crashes the
// forwarder.
_logger.LogWarning(
"CachedCallTelemetryForwarder: unexpected audit kind {Kind} on tracking emission for EventId {EventId}",
telemetry.Audit.Kind, telemetry.Audit.EventId);
break;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"CachedCallTelemetryForwarder: tracking-store emission threw for TrackedOperationId {Id} (Status {Status})",
telemetry.Operational.TrackedOperationId, telemetry.Operational.Status);
}
}
}