75ffa09b8f
Adds InstanceActor one-shot waiter registry (fast-path + change-match + scheduled timeout self-eviction), threads per-script timeout token through ScriptRuntimeContext, and exposes Attributes.WaitAsync(value|predicate, timeout). Replaces handshake busy-poll. Implements spec docs/plans/2026-06-17-waitfor-attribute-change-helper-spec.md §3-§5; §6 routed variant + WaitForAsync + quality-only mode deferred.
2279 lines
111 KiB
C#
2279 lines
111 KiB
C#
using System.Diagnostics;
|
|
using System.Text.Json;
|
|
using System.Text.RegularExpressions;
|
|
using Akka.Actor;
|
|
using Microsoft.Extensions.Logging;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Instance;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.ScriptExecution;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
|
using AuditEvent = ZB.MOM.WW.Audit.AuditEvent;
|
|
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
|
|
using ZB.MOM.WW.ScadaBridge.StoreAndForward;
|
|
|
|
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
|
|
|
|
/// <summary>
|
|
/// WP-18: Script Runtime API — injected into Script/Alarm Execution Actors.
|
|
/// Provides the API surface that user scripts interact with:
|
|
/// Instance.GetAttribute("name")
|
|
/// Instance.SetAttribute("name", value)
|
|
/// Instance.CallScript("scriptName", params)
|
|
/// Scripts.CallShared("scriptName", params)
|
|
///
|
|
/// WP-13 (Phase 7): Integration surface APIs:
|
|
/// ExternalSystem.Call("systemName", "methodName", params)
|
|
/// ExternalSystem.CachedCall("systemName", "methodName", params)
|
|
/// Database.Connection("name")
|
|
/// Database.CachedWrite("name", "sql", params)
|
|
/// Notify.To("listName").Send("subject", "message")
|
|
///
|
|
/// WP-20: Recursion Limit — call depth tracked and enforced.
|
|
/// </summary>
|
|
public class ScriptRuntimeContext
|
|
{
|
|
private readonly IActorRef _instanceActor;
|
|
private readonly IActorRef _self;
|
|
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
|
private readonly int _currentCallDepth;
|
|
private readonly int _maxCallDepth;
|
|
private readonly TimeSpan _askTimeout;
|
|
private readonly ILogger _logger;
|
|
private readonly string _instanceName;
|
|
|
|
/// <summary>
|
|
/// WaitForAttribute (spec §4.3): the per-script execution-timeout token from
|
|
/// the owning <c>ScriptExecutionActor</c>/<c>AlarmExecutionActor</c>
|
|
/// (<c>cts.Token</c>). Bounds the <c>Attributes.WaitAsync</c> Ask so a script
|
|
/// that hits its own <c>ExecutionTimeoutSeconds</c> abandons the wait. Defaults
|
|
/// to <see cref="CancellationToken.None"/> for contexts that do not thread one
|
|
/// (legacy callers / tests / the alarm path when it has no CTS).
|
|
/// </summary>
|
|
private readonly CancellationToken _scriptTimeoutToken;
|
|
|
|
/// <summary>
|
|
/// WP-13: External system client for ExternalSystem.Call/CachedCall.
|
|
/// </summary>
|
|
private readonly IExternalSystemClient? _externalSystemClient;
|
|
|
|
/// <summary>
|
|
/// WP-13: Database gateway for Database.Connection/CachedWrite.
|
|
/// </summary>
|
|
private readonly IDatabaseGateway? _databaseGateway;
|
|
|
|
/// <summary>
|
|
/// Notification Outbox: the site Store-and-Forward Engine that <c>Notify.Send</c>
|
|
/// enqueues notifications into. The S&F engine forwards them to central.
|
|
/// </summary>
|
|
private readonly StoreAndForwardService? _storeAndForward;
|
|
|
|
/// <summary>
|
|
/// Notification Outbox: the site communication actor that <c>Notify.Status</c>
|
|
/// queries central through (via the ClusterClient command/control transport).
|
|
/// </summary>
|
|
private readonly ICanTell? _siteCommunicationActor;
|
|
|
|
/// <summary>
|
|
/// Notification Outbox: this site's identifier, stamped on enqueued notifications.
|
|
/// </summary>
|
|
private readonly string _siteId;
|
|
|
|
/// <summary>
|
|
/// SourceNode-stamping (Task 13/14): the cluster node name supplied by
|
|
/// <c>INodeIdentityProvider</c> on the local host — <c>node-a</c>/<c>node-b</c>
|
|
/// for site nodes. Stamped onto <c>NotificationSubmit.SourceNode</c> by
|
|
/// <see cref="NotifyTarget.Send"/> and onto <c>SiteCallOperational.SourceNode</c>
|
|
/// by the four <see cref="ExternalSystemHelper"/> / <see cref="DatabaseHelper"/>
|
|
/// cached-call telemetry construction sites so central can persist it on the
|
|
/// <c>Notifications</c> / <c>SiteCalls</c> rows. Null when no provider is
|
|
/// wired (legacy hosts / tests) — the helper construction sites pass null
|
|
/// through verbatim, leaving the central row's SourceNode as NULL too.
|
|
/// </summary>
|
|
private readonly string? _sourceNode;
|
|
|
|
/// <summary>
|
|
/// Notification Outbox (FU3): identifier of the script currently executing in this
|
|
/// context — stamped onto <c>NotificationSubmit.SourceScript</c> for the central
|
|
/// audit trail. Uses the Site Event Logging "Source" convention
|
|
/// (<c>"ScriptActor:<scriptName>"</c>). Null when no single script owns the
|
|
/// context (e.g. alarm on-trigger paths that do not wire the Notify outbox).
|
|
/// </summary>
|
|
private readonly string? _sourceScript;
|
|
|
|
/// <summary>
|
|
/// M2.12 (#25): site event logger for recording recursion-limit violations
|
|
/// to the local SQLite event log. Optional — when null the emission is
|
|
/// skipped; the existing <c>_logger.LogError</c> + throw path is unchanged.
|
|
/// </summary>
|
|
private readonly ISiteEventLogger? _siteEventLogger;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23: best-effort emitter for boundary-crossing actions executed
|
|
/// by the script. Optional — when null the helpers degrade to a no-op audit
|
|
/// path so tests / contexts that do not need the audit pipeline still work.
|
|
/// </summary>
|
|
private readonly IAuditWriter? _auditWriter;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M3): site-local tracking store consulted by
|
|
/// <c>Tracking.Status(TrackedOperationId)</c>. Optional — when null the
|
|
/// helper throws on access, mirroring the existing
|
|
/// "service-not-wired" behaviour of the other integration helpers.
|
|
/// </summary>
|
|
private readonly IOperationTrackingStore? _operationTrackingStore;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M3 Bundle E — Task E3): site-side dual emitter for
|
|
/// cached-call lifecycle telemetry. Optional — when null
|
|
/// <c>ExternalSystem.CachedCall</c> / <c>Database.CachedWrite</c> still
|
|
/// return a <see cref="TrackedOperationId"/> and invoke the underlying
|
|
/// store-and-forward path, but no audit / SiteCalls telemetry is emitted
|
|
/// (tests / minimal hosts that don't wire the audit pipeline).
|
|
/// </summary>
|
|
private readonly ICachedCallTelemetryForwarder? _cachedForwarder;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23: the per-execution id for this script run. Every
|
|
/// trust-boundary audit row emitted by this script execution
|
|
/// (sync <c>ApiCall</c>/<c>DbWrite</c>, cached-call lifecycle rows,
|
|
/// <c>NotifySend</c>) is stamped into <c>AuditEvent.ExecutionId</c> with
|
|
/// this value so all the rows from one script run can be correlated
|
|
/// together — independently of the per-operation
|
|
/// <c>AuditEvent.CorrelationId</c>.
|
|
/// </summary>
|
|
private readonly Guid _executionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (ParentExecutionId): the spawning execution's
|
|
/// <see cref="_executionId"/> when this script run was spawned by another
|
|
/// execution — for an inbound-API-routed call this is the inbound request's
|
|
/// per-request execution id. <c>null</c> for normal (tag-change /
|
|
/// timer-triggered) runs and nested <c>CallScript</c> invocations. The
|
|
/// routed script still mints its OWN fresh <see cref="_executionId"/>; this
|
|
/// field records the spawner so a spawned execution's audit rows can point
|
|
/// back at the execution that spawned it. (Task 5 wires the emitter that
|
|
/// stamps this onto <c>AuditEvent.ParentExecutionId</c>.)
|
|
/// </summary>
|
|
private readonly Guid? _parentExecutionId;
|
|
|
|
/// <summary>
|
|
/// Initializes a new script runtime context with all dependencies for script
|
|
/// execution, external system calls, database access, and notification delivery.
|
|
/// </summary>
|
|
/// <param name="instanceActor">Reference to the Instance Actor managing this instance's state.</param>
|
|
/// <param name="self">Reference to the executing script actor.</param>
|
|
/// <param name="sharedScriptLibrary">Library containing shared scripts available to all instances.</param>
|
|
/// <param name="currentCallDepth">Current recursion depth of script calls.</param>
|
|
/// <param name="maxCallDepth">Maximum allowed recursion depth before an error is thrown.</param>
|
|
/// <param name="askTimeout">Timeout for Ask pattern messages to the Instance Actor.</param>
|
|
/// <param name="instanceName">Unique name of the instance being executed.</param>
|
|
/// <param name="logger">Logger for diagnostics and warnings.</param>
|
|
/// <param name="externalSystemClient">Optional client for external system API calls.</param>
|
|
/// <param name="databaseGateway">Optional gateway for database connection and cached write access.</param>
|
|
/// <param name="storeAndForward">Optional store-and-forward service for notification delivery.</param>
|
|
/// <param name="siteCommunicationActor">Optional actor for site-to-central communication (ClusterClient).</param>
|
|
/// <param name="siteId">Identifier of the site where this instance is running.</param>
|
|
/// <param name="sourceScript">Optional name of the source script for audit trail identification.</param>
|
|
/// <param name="auditWriter">Optional writer for audit log entries.</param>
|
|
/// <param name="operationTrackingStore">Optional store for tracking cached operation status.</param>
|
|
/// <param name="cachedForwarder">Optional forwarder for cached call telemetry.</param>
|
|
/// <param name="executionId">
|
|
/// Audit Log #23: the per-execution id for this script run. When omitted
|
|
/// (tag-change / timer-triggered executions) a fresh id is generated; an
|
|
/// inbound caller may supply one to tie the execution to an upstream
|
|
/// request. Stamped into <c>AuditEvent.ExecutionId</c> on every
|
|
/// trust-boundary audit row this execution emits.
|
|
/// </param>
|
|
/// <param name="parentExecutionId">
|
|
/// Audit Log #23 (ParentExecutionId): the spawning execution's
|
|
/// <c>ExecutionId</c> — supplied for an inbound-API-routed call (the
|
|
/// inbound request's per-request id), <c>null</c> for normal (tag-change /
|
|
/// timer-triggered) runs. The routed script still generates its own fresh
|
|
/// <paramref name="executionId"/>; this only records the spawner.
|
|
/// </param>
|
|
/// <param name="sourceNode">Optional cluster node identifier (node-a/node-b) for audit trail stamping.</param>
|
|
/// <param name="siteEventLogger">
|
|
/// M2.12 (#25): optional site event logger. When supplied, recursion-limit
|
|
/// violations at <c>CallScript</c> and <c>CallShared</c> emit a
|
|
/// <c>script</c> Error event in addition to the existing
|
|
/// <c>ILogger.LogError</c> + throw. When null the existing behaviour is
|
|
/// unchanged; all existing callers and tests remain source-compatible.
|
|
/// </param>
|
|
/// <param name="scriptTimeoutToken">
|
|
/// WaitForAttribute (spec §4.3): the per-script execution-timeout token
|
|
/// (<c>cts.Token</c> on the owning execution actor) used to bound
|
|
/// <c>Attributes.WaitAsync</c>. Defaults to
|
|
/// <see cref="CancellationToken.None"/> for callers / tests that do not
|
|
/// thread one — those waits are bounded only by their own timeout.
|
|
/// </param>
|
|
public ScriptRuntimeContext(
|
|
IActorRef instanceActor,
|
|
IActorRef self,
|
|
SharedScriptLibrary sharedScriptLibrary,
|
|
int currentCallDepth,
|
|
int maxCallDepth,
|
|
TimeSpan askTimeout,
|
|
string instanceName,
|
|
ILogger logger,
|
|
IExternalSystemClient? externalSystemClient = null,
|
|
IDatabaseGateway? databaseGateway = null,
|
|
StoreAndForwardService? storeAndForward = null,
|
|
ICanTell? siteCommunicationActor = null,
|
|
string siteId = "",
|
|
string? sourceScript = null,
|
|
IAuditWriter? auditWriter = null,
|
|
IOperationTrackingStore? operationTrackingStore = null,
|
|
ICachedCallTelemetryForwarder? cachedForwarder = null,
|
|
Guid? executionId = null,
|
|
Guid? parentExecutionId = null,
|
|
string? sourceNode = null,
|
|
ISiteEventLogger? siteEventLogger = null,
|
|
CancellationToken scriptTimeoutToken = default)
|
|
{
|
|
_instanceActor = instanceActor;
|
|
_self = self;
|
|
_sharedScriptLibrary = sharedScriptLibrary;
|
|
_currentCallDepth = currentCallDepth;
|
|
_maxCallDepth = maxCallDepth;
|
|
_askTimeout = askTimeout;
|
|
_instanceName = instanceName;
|
|
_logger = logger;
|
|
_externalSystemClient = externalSystemClient;
|
|
_databaseGateway = databaseGateway;
|
|
_storeAndForward = storeAndForward;
|
|
_siteCommunicationActor = siteCommunicationActor;
|
|
_siteId = siteId;
|
|
_sourceScript = sourceScript;
|
|
_auditWriter = auditWriter;
|
|
_operationTrackingStore = operationTrackingStore;
|
|
_cachedForwarder = cachedForwarder;
|
|
// SourceNode-stamping (Task 13/14): the local node name read from
|
|
// INodeIdentityProvider at the ScriptExecutionActor; null when no
|
|
// provider was wired so the downstream callsites pass null through
|
|
// verbatim — leaving central SourceNode as NULL.
|
|
_sourceNode = sourceNode;
|
|
_executionId = executionId ?? Guid.NewGuid();
|
|
// Audit Log #23 (ParentExecutionId): stored verbatim — no `?? NewGuid()`
|
|
// fallback. A non-routed run legitimately has no parent and stays null.
|
|
_parentExecutionId = parentExecutionId;
|
|
// M2.12 (#25): optional — null when not wired (tests / AlarmExecutionActor).
|
|
_siteEventLogger = siteEventLogger;
|
|
// WaitForAttribute (spec §4.3): default(CancellationToken) == None when
|
|
// not threaded in — the WaitAsync Ask is then bounded only by its own timeout.
|
|
_scriptTimeoutToken = scriptTimeoutToken;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M5.4): this run's own per-execution id. Exposed so a
|
|
/// nested <c>Scripts.CallShared</c> can record it as the spawned shared
|
|
/// script's <c>ParentExecutionId</c>, forming a true execution tree.
|
|
/// </summary>
|
|
internal Guid ExecutionId => _executionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M5.4): the spawning execution's id for this run (null for
|
|
/// a root run). Exposed for test assertions on the execution tree.
|
|
/// </summary>
|
|
internal Guid? ParentExecutionId => _parentExecutionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M5.4 — ParentExecutionId tag-cascade): builds a child
|
|
/// <see cref="ScriptRuntimeContext"/> for an inline <c>Scripts.CallShared</c>
|
|
/// invocation. The shared script runs inline (no actor hop) but is modelled
|
|
/// as its OWN execution node in the audit tree: it mints a fresh
|
|
/// <see cref="_executionId"/> and records THIS run's <see cref="_executionId"/>
|
|
/// as its <c>ParentExecutionId</c>, so <c>B → CallShared(C)</c> yields
|
|
/// <c>C.ParentExecutionId == B.ExecutionId</c>. Every other dependency
|
|
/// (actors, gateways, audit writer, site id, source node, call-depth) is
|
|
/// carried over verbatim from this context.
|
|
/// </summary>
|
|
/// <param name="childCallDepth">The recursion depth of the shared-script call.</param>
|
|
internal ScriptRuntimeContext CreateChildContextForSharedScript(int childCallDepth)
|
|
{
|
|
return new ScriptRuntimeContext(
|
|
_instanceActor,
|
|
_self,
|
|
_sharedScriptLibrary,
|
|
childCallDepth,
|
|
_maxCallDepth,
|
|
_askTimeout,
|
|
_instanceName,
|
|
_logger,
|
|
_externalSystemClient,
|
|
_databaseGateway,
|
|
_storeAndForward,
|
|
_siteCommunicationActor,
|
|
_siteId,
|
|
_sourceScript,
|
|
_auditWriter,
|
|
_operationTrackingStore,
|
|
_cachedForwarder,
|
|
// Fresh execution id for the shared-script run (omit so the ctor mints one)…
|
|
executionId: null,
|
|
// …parented to THIS run's execution id (the spawner).
|
|
parentExecutionId: _executionId,
|
|
sourceNode: _sourceNode,
|
|
siteEventLogger: _siteEventLogger,
|
|
// WaitForAttribute (spec §4.3): an inline shared-script call shares the
|
|
// parent run's execution-timeout token so a WaitAsync inside the shared
|
|
// script is bounded by the SAME script deadline.
|
|
scriptTimeoutToken: _scriptTimeoutToken);
|
|
}
|
|
|
|
/// <summary>
|
|
/// M2.12 (#25): fire-and-forget emission of a <c>script</c> Error site event
|
|
/// for a recursion-limit violation. Mirrors the call shape used by
|
|
/// <c>ScriptExecutionActor</c>'s catch blocks (WP-32 / M1.8). A fault from
|
|
/// the site-event logger is observed-and-dropped (best-effort) via
|
|
/// <c>ContinueWith(OnlyOnFaulted)</c> — it never blocks or faults the
|
|
/// <c>_logger.LogError</c> + throw path that follows. A null logger is a no-op.
|
|
/// </summary>
|
|
private void EmitRecursionLimitEventAsync(string msg)
|
|
{
|
|
if (_siteEventLogger == null)
|
|
return;
|
|
|
|
var source = string.IsNullOrEmpty(_instanceName)
|
|
? "recursion-guard"
|
|
: $"InstanceScript:{_instanceName}";
|
|
|
|
var logTask = _siteEventLogger.LogEventAsync("script", "Error", _instanceName, source, msg);
|
|
if (!logTask.IsCompleted)
|
|
{
|
|
logTask.ContinueWith(
|
|
t => _logger.LogWarning(t.Exception,
|
|
"Site event log write failed for recursion-limit violation on instance '{Instance}'",
|
|
_instanceName),
|
|
CancellationToken.None,
|
|
TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously,
|
|
TaskScheduler.Default);
|
|
}
|
|
else if (logTask.IsFaulted)
|
|
{
|
|
_logger.LogWarning(logTask.Exception,
|
|
"Site event log write failed for recursion-limit violation on instance '{Instance}'",
|
|
_instanceName);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the current value of an attribute from the Instance Actor.
|
|
/// Uses Ask pattern (system boundary between script execution and instance state).
|
|
/// </summary>
|
|
/// <param name="attributeName">Name of the attribute to retrieve.</param>
|
|
/// <returns>The current attribute value, or null if not found.</returns>
|
|
public async Task<object?> GetAttribute(string attributeName)
|
|
{
|
|
var correlationId = Guid.NewGuid().ToString();
|
|
var request = new GetAttributeRequest(
|
|
correlationId, _instanceName, attributeName, DateTimeOffset.UtcNow);
|
|
|
|
var response = await _instanceActor.Ask<GetAttributeResponse>(request, _askTimeout);
|
|
|
|
if (!response.Found)
|
|
{
|
|
_logger.LogWarning(
|
|
"GetAttribute: attribute '{Attribute}' not found on instance '{Instance}'",
|
|
attributeName, _instanceName);
|
|
}
|
|
|
|
return response.Value;
|
|
}
|
|
|
|
/// <summary>
|
|
/// WaitForAttribute (spec §3-§5): waits event-driven for an attribute to reach
|
|
/// a value (encoded-equality), satisfy a site-local predicate, or change at all,
|
|
/// bounded by <paramref name="timeout"/>. Returns <c>true</c> if matched within
|
|
/// the timeout, <c>false</c> on timeout — NEVER throws on timeout. The backing
|
|
/// <c>Attributes.WaitAsync</c> for the accessor.
|
|
///
|
|
/// <para>
|
|
/// The Ask is bounded by the script's own execution-timeout token (§4.3): a
|
|
/// script that hits its <c>ExecutionTimeoutSeconds</c> abandons the wait. The
|
|
/// Ask timeout is the wait timeout plus a small <see cref="_askTimeout"/> slack
|
|
/// so the InstanceActor's own scheduled timeout reply is the authoritative path
|
|
/// for the false/timed-out outcome, not the Ask deadline.
|
|
/// </para>
|
|
/// </summary>
|
|
/// <param name="name">The scope-resolved attribute name to wait on.</param>
|
|
/// <param name="targetValueEncoded">
|
|
/// The codec-encoded target value; null (with null <paramref name="predicate"/>)
|
|
/// means "any change".
|
|
/// </param>
|
|
/// <param name="predicate">Site-local predicate; null when the encoded target is used.</param>
|
|
/// <param name="timeout">How long to wait before returning false.</param>
|
|
/// <returns><c>true</c> on match within the timeout; <c>false</c> on timeout.</returns>
|
|
public async Task<bool> WaitAttribute(
|
|
string name, string? targetValueEncoded, Func<object?, bool>? predicate, TimeSpan timeout)
|
|
{
|
|
var cid = Guid.NewGuid().ToString();
|
|
var req = new WaitForAttributeRequest(
|
|
cid, _instanceName, name, targetValueEncoded, predicate, timeout, DateTimeOffset.UtcNow);
|
|
|
|
var resp = await _instanceActor.Ask<WaitForAttributeResponse>(
|
|
req, timeout + _askTimeout, _scriptTimeoutToken);
|
|
|
|
return resp.Matched;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sets an attribute value. For data-connected attributes the Instance Actor
|
|
/// forwards the write to the DCL, which writes the physical device; the
|
|
/// in-memory value is not optimistically updated. For static attributes the
|
|
/// Instance Actor updates the in-memory value and persists the override to
|
|
/// SQLite. All mutations are serialized through the Instance Actor mailbox.
|
|
///
|
|
/// The write is awaited so that a device-write failure on a data-connected
|
|
/// attribute is surfaced synchronously to the calling script as an
|
|
/// <see cref="InvalidOperationException"/>.
|
|
/// </summary>
|
|
/// <param name="attributeName">Name of the attribute to set.</param>
|
|
/// <param name="value">String value to set for the attribute.</param>
|
|
/// <returns>A task that represents the asynchronous operation.</returns>
|
|
public async Task SetAttribute(string attributeName, string value)
|
|
{
|
|
var correlationId = Guid.NewGuid().ToString();
|
|
var command = new SetStaticAttributeCommand(
|
|
correlationId, _instanceName, attributeName, value, DateTimeOffset.UtcNow);
|
|
|
|
// Ask — mutation serialized through the Instance Actor mailbox; the reply
|
|
// carries the device-write outcome for data-connected attributes.
|
|
var response = await _instanceActor.Ask<SetStaticAttributeResponse>(command, _askTimeout);
|
|
|
|
if (!response.Success)
|
|
{
|
|
throw new InvalidOperationException(
|
|
$"SetAttribute('{attributeName}') failed: {response.ErrorMessage}");
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Calls a sibling script on the same instance by name (Ask pattern).
|
|
/// WP-20: Enforces recursion limit.
|
|
/// WP-22: Uses Ask pattern for CallScript.
|
|
/// <paramref name="parameters"/> may be a dictionary or an anonymous object
|
|
/// (<c>new { name = "Bob" }</c>) — see <see cref="ScriptArgs"/>.
|
|
/// </summary>
|
|
/// <param name="scriptName">Name of the script to call.</param>
|
|
/// <param name="parameters">Optional parameters to pass to the script (dictionary or anonymous object).</param>
|
|
/// <returns>The return value from the called script.</returns>
|
|
public async Task<object?> CallScript(string scriptName, object? parameters = null)
|
|
{
|
|
var nextDepth = _currentCallDepth + 1;
|
|
if (nextDepth > _maxCallDepth)
|
|
{
|
|
var msg = $"Script call depth exceeded maximum of {_maxCallDepth}. " +
|
|
$"CallScript('{scriptName}') rejected at depth {nextDepth}.";
|
|
_logger.LogError(msg);
|
|
// M2.12 (#25): emit to site event log in addition to ILogger; fire-and-forget.
|
|
EmitRecursionLimitEventAsync(msg);
|
|
throw new InvalidOperationException(msg);
|
|
}
|
|
|
|
var correlationId = Guid.NewGuid().ToString();
|
|
var request = new ScriptCallRequest(
|
|
scriptName,
|
|
ScriptArgs.Normalize(parameters),
|
|
nextDepth,
|
|
correlationId,
|
|
// Audit Log #23 (M5.4 — ParentExecutionId tag-cascade): the child
|
|
// script run is a NEW execution spawned BY this run. Its parent is
|
|
// THIS run's own ExecutionId — NOT the inherited _parentExecutionId.
|
|
// So A → CallScript(B) yields B.ParentExecutionId == A.ExecutionId,
|
|
// building a true multi-level execution tree rather than flattening
|
|
// every nested call under the original inbound spawner.
|
|
ParentExecutionId: _executionId);
|
|
|
|
// Ask the Instance Actor, which routes to the appropriate Script Actor
|
|
var result = await _instanceActor.Ask<ScriptCallResult>(request, _askTimeout);
|
|
|
|
if (!result.Success)
|
|
{
|
|
throw new InvalidOperationException(
|
|
$"CallScript('{scriptName}') failed: {result.ErrorMessage}");
|
|
}
|
|
|
|
return result.ReturnValue;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Provides access to shared script execution via the Scripts property.
|
|
/// </summary>
|
|
public ScriptCallHelper Scripts => new(_sharedScriptLibrary, this, _currentCallDepth, _maxCallDepth, _logger);
|
|
|
|
/// <summary>
|
|
/// WP-13: Provides access to external system calls.
|
|
/// ExternalSystem.Call("systemName", "methodName", params)
|
|
/// ExternalSystem.CachedCall("systemName", "methodName", params)
|
|
/// </summary>
|
|
public ExternalSystemHelper ExternalSystem => new(
|
|
_externalSystemClient, _instanceName, _logger, _executionId, _auditWriter, _siteId, _sourceScript,
|
|
// Audit Log #23 (M3 Bundle E — Task E3): emit CachedSubmit telemetry
|
|
// on every ExternalSystem.CachedCall enqueue.
|
|
_cachedForwarder,
|
|
// Audit Log #23 (ParentExecutionId): the spawning execution's id,
|
|
// threaded alongside _executionId. Null for non-routed runs.
|
|
_parentExecutionId,
|
|
// SourceNode-stamping (Task 14): the local node name (node-a/node-b),
|
|
// threaded so the cached-call telemetry construction sites can stamp
|
|
// it onto SiteCallOperational.SourceNode.
|
|
_sourceNode);
|
|
|
|
/// <summary>
|
|
/// WP-13: Provides access to database operations.
|
|
/// Database.Connection("name")
|
|
/// Database.CachedWrite("name", "sql", params)
|
|
/// </summary>
|
|
public DatabaseHelper Database => new(
|
|
_databaseGateway,
|
|
_instanceName,
|
|
_logger,
|
|
_executionId,
|
|
// Audit Log #23 (M4 Bundle A): wire the IAuditWriter so
|
|
// Database.Connection(name) returns an auditing decorator that
|
|
// emits one DbOutbound/DbWrite row per script-initiated
|
|
// Execute / ExecuteScalar / ExecuteReader.
|
|
_auditWriter,
|
|
_siteId,
|
|
_sourceScript,
|
|
// Audit Log #23 (M3 Bundle E — Task E6): emit CachedSubmit telemetry on
|
|
// every Database.CachedWrite enqueue.
|
|
_cachedForwarder,
|
|
// Audit Log #23 (ParentExecutionId): the spawning execution's id,
|
|
// threaded alongside _executionId. Null for non-routed runs.
|
|
_parentExecutionId,
|
|
// SourceNode-stamping (Task 14): the local node name (node-a/node-b),
|
|
// threaded so Database.CachedWrite's CachedSubmit telemetry can
|
|
// stamp it onto SiteCallOperational.SourceNode.
|
|
_sourceNode);
|
|
|
|
/// <summary>
|
|
/// Provides access to the Notification Outbox API.
|
|
/// <c>Notify.To("listName").Send("subject", "message")</c> enqueues a notification
|
|
/// for central delivery and returns its <c>NotificationId</c>;
|
|
/// <c>Notify.Status(id)</c> queries the delivery status of that notification.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Audit Log #23 (M4 Bundle C): the <see cref="IAuditWriter"/> is threaded
|
|
/// through so <c>Notify.To(list).Send(...)</c> emits one
|
|
/// <c>Notification</c>/<c>NotifySend</c> audit row per accepted submission.
|
|
/// Best-effort per alog.md §7 — a thrown writer never aborts the script's
|
|
/// <c>Send</c>.
|
|
/// </remarks>
|
|
public NotifyHelper Notify => new(
|
|
_storeAndForward, _siteCommunicationActor, _siteId, _instanceName, _sourceScript, _askTimeout, _logger,
|
|
_executionId, _auditWriter,
|
|
// Audit Log #23 (ParentExecutionId): the spawning execution's id,
|
|
// threaded alongside _executionId. Null for non-routed runs.
|
|
_parentExecutionId,
|
|
// SourceNode-stamping (Task 13): the local node name (node-a/node-b),
|
|
// threaded so NotifyTarget.Send can stamp it onto NotificationSubmit.
|
|
_sourceNode);
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M3): site-local tracking-status API for cached operations.
|
|
/// <c>Tracking.Status(trackedOperationId)</c> reads the site SQLite tracking row
|
|
/// directly (authoritative source of truth — no central round-trip) and
|
|
/// returns a <see cref="TrackingStatusSnapshot"/>, or <c>null</c> when the
|
|
/// id is unknown / has already been purged.
|
|
/// </summary>
|
|
public TrackingHelper Tracking => new(_operationTrackingStore, _logger);
|
|
|
|
/// <summary>
|
|
/// Helper class for Scripts.CallShared() syntax.
|
|
/// </summary>
|
|
/// <summary>
|
|
/// Helper class for calling shared scripts through the Scripts property.
|
|
/// </summary>
|
|
public class ScriptCallHelper
|
|
{
|
|
private readonly SharedScriptLibrary _library;
|
|
private readonly ScriptRuntimeContext _context;
|
|
private readonly int _currentCallDepth;
|
|
private readonly int _maxCallDepth;
|
|
private readonly ILogger _logger;
|
|
|
|
/// <summary>
|
|
/// Initializes a new shared script call helper.
|
|
/// </summary>
|
|
/// <param name="library">The shared script library containing available scripts.</param>
|
|
/// <param name="context">The runtime context of the calling script.</param>
|
|
/// <param name="currentCallDepth">Current recursion depth of script calls.</param>
|
|
/// <param name="maxCallDepth">Maximum allowed recursion depth.</param>
|
|
/// <param name="logger">Logger for diagnostics.</param>
|
|
internal ScriptCallHelper(
|
|
SharedScriptLibrary library,
|
|
ScriptRuntimeContext context,
|
|
int currentCallDepth,
|
|
int maxCallDepth,
|
|
ILogger logger)
|
|
{
|
|
_library = library;
|
|
_context = context;
|
|
_currentCallDepth = currentCallDepth;
|
|
_maxCallDepth = maxCallDepth;
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <summary>
|
|
/// WP-17: Executes a shared script inline (direct method call, not actor message).
|
|
/// WP-20: Enforces recursion limit.
|
|
/// <paramref name="parameters"/> may be a dictionary or an anonymous
|
|
/// object (<c>new { name = "Bob" }</c>) — see <see cref="ScriptArgs"/>.
|
|
/// </summary>
|
|
/// <param name="scriptName">Name of the shared script to execute.</param>
|
|
/// <param name="parameters">Optional parameters to pass to the script (dictionary or anonymous object).</param>
|
|
/// <param name="cancellationToken">Cancellation token for async execution.</param>
|
|
/// <returns>The return value from the shared script.</returns>
|
|
public async Task<object?> CallShared(
|
|
string scriptName,
|
|
object? parameters = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var nextDepth = _currentCallDepth + 1;
|
|
if (nextDepth > _maxCallDepth)
|
|
{
|
|
var msg = $"Script call depth exceeded maximum of {_maxCallDepth}. " +
|
|
$"CallShared('{scriptName}') rejected at depth {nextDepth}.";
|
|
_logger.LogError(msg);
|
|
// M2.12 (#25): emit to site event log via the parent context's
|
|
// helper — single emission path, fire-and-forget.
|
|
_context.EmitRecursionLimitEventAsync(msg);
|
|
throw new InvalidOperationException(msg);
|
|
}
|
|
|
|
// Audit Log #23 (M5.4 — ParentExecutionId tag-cascade): the shared
|
|
// script runs inline, but is modelled as its OWN execution node — a
|
|
// child context mints a fresh ExecutionId parented to the caller's
|
|
// ExecutionId, so its audit rows chain under the calling run.
|
|
var childContext = _context.CreateChildContextForSharedScript(nextDepth);
|
|
|
|
return await _library.ExecuteAsync(
|
|
scriptName, childContext, ScriptArgs.Normalize(parameters), cancellationToken);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// WP-13: Helper for ExternalSystem.Call/CachedCall syntax.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Audit Log #23 (M2 Bundle F): every <see cref="Call"/> invocation emits
|
|
/// one <c>ApiOutbound</c>/<c>ApiCall</c> audit row via <see cref="IAuditWriter"/>.
|
|
/// The audit emission is wrapped in a try/catch that swallows every exception
|
|
/// — the audit pipeline is best-effort and must NEVER abort the script's
|
|
/// outbound call (alog.md §7). The original <see cref="ExternalCallResult"/>
|
|
/// (or the original thrown exception) flows back to the caller unchanged.
|
|
/// </remarks>
|
|
/// <summary>
|
|
/// Helper for external system API calls from scripts.
|
|
/// </summary>
|
|
public class ExternalSystemHelper
|
|
{
|
|
private static readonly Regex HttpStatusRegex = new(
|
|
@"HTTP\s+(?<code>\d{3})",
|
|
RegexOptions.Compiled | RegexOptions.CultureInvariant);
|
|
|
|
private readonly IExternalSystemClient? _client;
|
|
private readonly string _instanceName;
|
|
private readonly ILogger _logger;
|
|
private readonly Guid _executionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (ParentExecutionId): the spawning execution's id when
|
|
/// this run was inbound-API-routed; <c>null</c> for non-routed runs.
|
|
/// Threaded alongside <see cref="_executionId"/> ready for the Task 5
|
|
/// emitter — no audit row carries it yet.
|
|
/// </summary>
|
|
private readonly Guid? _parentExecutionId;
|
|
|
|
private readonly IAuditWriter? _auditWriter;
|
|
private readonly string _siteId;
|
|
private readonly string? _sourceScript;
|
|
private readonly ICachedCallTelemetryForwarder? _cachedForwarder;
|
|
|
|
/// <summary>
|
|
/// SourceNode-stamping (Task 14): the local cluster node name on
|
|
/// which this script is executing (<c>node-a</c>/<c>node-b</c>).
|
|
/// Stamped onto <c>SiteCallOperational.SourceNode</c> on the three
|
|
/// cached-call telemetry construction sites (CachedSubmit + the two
|
|
/// immediate-completion rows) so central can persist it on the
|
|
/// <c>SiteCalls</c> row.
|
|
/// </summary>
|
|
private readonly string? _sourceNode;
|
|
|
|
/// <summary>
|
|
/// Initializes a new external system helper for script API calls.
|
|
/// </summary>
|
|
/// <param name="client">Optional client for external system API calls.</param>
|
|
/// <param name="instanceName">Unique name of the instance making the call.</param>
|
|
/// <param name="logger">Logger for diagnostics and warnings.</param>
|
|
/// <param name="executionId">Unique identifier for this script execution.</param>
|
|
/// <param name="auditWriter">Optional writer for audit log entries.</param>
|
|
/// <param name="siteId">Identifier of the site where this call originates.</param>
|
|
/// <param name="sourceScript">Optional name of the source script for audit trail.</param>
|
|
/// <param name="cachedForwarder">Optional forwarder for cached call telemetry.</param>
|
|
/// <param name="parentExecutionId">Optional identifier of the parent execution (for routed calls).</param>
|
|
/// <param name="sourceNode">Optional cluster node identifier (node-a/node-b) for audit stamping.</param>
|
|
// Internal constructor for tests living in ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests
|
|
// (via InternalsVisibleTo). Production sites resolve the helper through
|
|
// ScriptRuntimeContext.ExternalSystem.
|
|
//
|
|
// Parameter ordering: executionId sits immediately after the
|
|
// ILogger across all four audit-threaded ctors (ExternalSystemHelper,
|
|
// DatabaseHelper, AuditingDbConnection, AuditingDbCommand) — a required
|
|
// Guid cannot follow the optional provenance params without a
|
|
// required-after-optional compile error, so the post-logger slot is the
|
|
// one consistent position that compiles cleanly everywhere. The nullable
|
|
// parentExecutionId is a trailing optional param so existing positional
|
|
// callers stay source-compatible.
|
|
internal ExternalSystemHelper(
|
|
IExternalSystemClient? client,
|
|
string instanceName,
|
|
ILogger logger,
|
|
Guid executionId,
|
|
IAuditWriter? auditWriter = null,
|
|
string siteId = "",
|
|
string? sourceScript = null,
|
|
ICachedCallTelemetryForwarder? cachedForwarder = null,
|
|
Guid? parentExecutionId = null,
|
|
string? sourceNode = null)
|
|
{
|
|
_client = client;
|
|
_instanceName = instanceName;
|
|
_logger = logger;
|
|
_executionId = executionId;
|
|
_auditWriter = auditWriter;
|
|
_siteId = siteId;
|
|
_sourceScript = sourceScript;
|
|
_cachedForwarder = cachedForwarder;
|
|
_parentExecutionId = parentExecutionId;
|
|
_sourceNode = sourceNode;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Makes a synchronous external system API call with immediate response.
|
|
/// </summary>
|
|
/// <param name="systemName">Name of the external system to call.</param>
|
|
/// <param name="methodName">Name of the method to invoke on the external system.</param>
|
|
/// <param name="parameters">Optional parameters to pass to the method.</param>
|
|
/// <param name="cancellationToken">Cancellation token for the async operation.</param>
|
|
/// <returns>Result of the external API call including status and response data.</returns>
|
|
public async Task<ExternalCallResult> Call(
|
|
string systemName,
|
|
string methodName,
|
|
IReadOnlyDictionary<string, object?>? parameters = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
if (_client == null)
|
|
throw new InvalidOperationException("External system client not available");
|
|
|
|
// Audit Log #23 (M2 Bundle F): wrap the outbound call so every
|
|
// attempt emits exactly one ApiOutbound/ApiCall row. The wrapper
|
|
// mirrors the existing call-site behaviour — the original result
|
|
// OR original exception flows back to the script untouched; the
|
|
// audit emission is best-effort.
|
|
var occurredAtUtc = DateTime.UtcNow;
|
|
var startTicks = Stopwatch.GetTimestamp();
|
|
ExternalCallResult? result = null;
|
|
Exception? thrown = null;
|
|
try
|
|
{
|
|
result = await _client.CallAsync(systemName, methodName, parameters, cancellationToken);
|
|
return result;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
thrown = ex;
|
|
throw;
|
|
}
|
|
finally
|
|
{
|
|
var elapsedMs = (int)((Stopwatch.GetTimestamp() - startTicks)
|
|
* 1000d / Stopwatch.Frequency);
|
|
EmitCallAudit(systemName, methodName, occurredAtUtc, elapsedMs, result, thrown, parameters);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Submit a cached outbound API call (Audit Log #23 / M3). Mints a
|
|
/// fresh <see cref="TrackedOperationId"/>, emits the lifecycle's first
|
|
/// <c>CachedSubmit</c> telemetry packet, hands the call to the
|
|
/// store-and-forward retry loop (which emits per-attempt and terminal
|
|
/// telemetry under the same id — Bundle E Tasks E4/E5), and returns
|
|
/// the id immediately so the script can later query
|
|
/// <c>Tracking.Status(id)</c>.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <b>Best-effort emission (alog.md §7):</b> if the forwarder throws,
|
|
/// the failure is logged and swallowed; the underlying cached-call
|
|
/// path still runs and the id is still returned. The script must never
|
|
/// be aborted by an audit-pipeline failure.
|
|
/// </remarks>
|
|
/// <param name="systemName">Name of the external system to call.</param>
|
|
/// <param name="methodName">Name of the method to invoke on the external system.</param>
|
|
/// <param name="parameters">Optional parameters to pass to the method.</param>
|
|
/// <param name="cancellationToken">Cancellation token for the async operation.</param>
|
|
/// <returns>Tracked operation identifier for status queries.</returns>
|
|
public async Task<TrackedOperationId> CachedCall(
|
|
string systemName,
|
|
string methodName,
|
|
IReadOnlyDictionary<string, object?>? parameters = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
if (_client == null)
|
|
throw new InvalidOperationException("External system client not available");
|
|
|
|
var trackedId = TrackedOperationId.New();
|
|
var occurredAtUtc = DateTime.UtcNow;
|
|
var target = $"{systemName}.{methodName}";
|
|
|
|
// Emit CachedSubmit telemetry BEFORE handing off to the S&F
|
|
// engine — that way the SiteCalls row is materialised before the
|
|
// first delivery attempt and Tracking.Status(id) can observe a
|
|
// Submitted row even if the immediate-delivery attempt happens to
|
|
// resolve before this method returns.
|
|
await EmitCachedSubmitTelemetryAsync(
|
|
systemName, methodName, target, trackedId, occurredAtUtc, parameters, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
// Hand off to the existing cached-call path. The TrackedOperationId
|
|
// becomes the S&F message id so the retry loop (Bundle E Tasks
|
|
// E4/E5) can read it back via StoreAndForwardMessage.Id.
|
|
//
|
|
// M3 Bundle F (F2): the result is now retained because the
|
|
// immediate-success path (WasBuffered=false) bypasses S&F entirely
|
|
// — no retry loop, no ICachedCallLifecycleObserver fire. The
|
|
// helper must emit the Attempted + CachedResolve terminal rows
|
|
// itself, otherwise Tracking.Status(id) would stay in Submitted
|
|
// forever and the audit log would be missing the M3 lifecycle.
|
|
// The WasBuffered=true path is unaffected — the S&F retry loop
|
|
// owns the Attempted + Resolve emissions in that case.
|
|
ExternalCallResult? result;
|
|
try
|
|
{
|
|
result = await _client.CachedCallAsync(
|
|
systemName,
|
|
methodName,
|
|
parameters,
|
|
_instanceName,
|
|
cancellationToken,
|
|
trackedId,
|
|
// Audit Log #23 (ExecutionId Task 4): thread the script
|
|
// execution's ExecutionId + SourceScript so a buffered
|
|
// cached call's retry-loop audit rows carry them.
|
|
executionId: _executionId,
|
|
sourceScript: _sourceScript,
|
|
// Audit Log #23 (ParentExecutionId Task 6): thread the
|
|
// spawning inbound-API request's ExecutionId so a buffered
|
|
// cached call's retry-loop audit rows carry it too. Null
|
|
// for a non-routed run.
|
|
parentExecutionId: _parentExecutionId).ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// The cached-call surface returns ExternalCallResult on permanent
|
|
// failure rather than throwing; a throw here is exceptional
|
|
// (e.g. cancellation, resolver outage). Log it and rethrow — the
|
|
// script does need to learn about catastrophic failures. The
|
|
// tracked id was still returned via the telemetry submit above.
|
|
_logger.LogWarning(ex,
|
|
"ExternalSystem.CachedCall threw for {System}.{Method} (TrackedOperationId {Id})",
|
|
systemName, methodName, trackedId);
|
|
throw;
|
|
}
|
|
|
|
// M3 Bundle F (F2): immediate-completion lifecycle — emit the
|
|
// missing Attempted + CachedResolve rows when the underlying call
|
|
// resolved without engaging the store-and-forward retry loop.
|
|
if (result is { WasBuffered: false })
|
|
{
|
|
await EmitImmediateTerminalTelemetryAsync(
|
|
systemName, methodName, target, trackedId, result, parameters, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
|
|
return trackedId;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Best-effort emission of the CachedSubmit lifecycle event. Any
|
|
/// exception thrown by the forwarder is logged and swallowed so the
|
|
/// calling script's enqueue is not disturbed.
|
|
/// </summary>
|
|
private async Task EmitCachedSubmitTelemetryAsync(
|
|
string systemName,
|
|
string methodName,
|
|
string target,
|
|
TrackedOperationId trackedId,
|
|
DateTime occurredAtUtc,
|
|
IReadOnlyDictionary<string, object?>? parameters,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
if (_cachedForwarder == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
CachedCallTelemetry telemetry;
|
|
try
|
|
{
|
|
telemetry = new CachedCallTelemetry(
|
|
Audit: ScadaBridgeAuditEventFactory.Create(
|
|
channel: AuditChannel.ApiOutbound,
|
|
kind: AuditKind.CachedSubmit,
|
|
status: AuditStatus.Submitted,
|
|
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
|
target: target,
|
|
// CorrelationId stays the per-operation lifecycle id
|
|
// (TrackedOperationId); ExecutionId carries the
|
|
// per-execution id shared across this script run.
|
|
correlationId: trackedId.Value,
|
|
executionId: _executionId,
|
|
// Audit Log #23 (ParentExecutionId): the spawning
|
|
// execution's id; null for non-routed runs.
|
|
parentExecutionId: _parentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
|
sourceInstanceId: _instanceName,
|
|
sourceScript: _sourceScript,
|
|
// Submit precedes the call — request args only, no response yet.
|
|
requestSummary: SerializeRequest(parameters)),
|
|
Operational: new SiteCallOperational(
|
|
TrackedOperationId: trackedId,
|
|
Channel: "ApiOutbound",
|
|
Target: target,
|
|
SourceSite: _siteId,
|
|
// SourceNode-stamping (Task 14): the local node name
|
|
// (node-a/node-b) — threaded through INodeIdentityProvider
|
|
// at the ScriptExecutionActor; null when no provider was
|
|
// wired so central persists SiteCalls.SourceNode as NULL.
|
|
SourceNode: _sourceNode,
|
|
Status: "Submitted",
|
|
RetryCount: 0,
|
|
LastError: null,
|
|
HttpStatus: null,
|
|
CreatedAtUtc: occurredAtUtc,
|
|
UpdatedAtUtc: occurredAtUtc,
|
|
TerminalAtUtc: null));
|
|
}
|
|
catch (Exception buildEx)
|
|
{
|
|
_logger.LogWarning(buildEx,
|
|
"Failed to build CachedSubmit telemetry for {System}.{Method} (TrackedOperationId {Id}) — skipping emission",
|
|
systemName, methodName, trackedId);
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
await _cachedForwarder.ForwardAsync(telemetry, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex,
|
|
"CachedSubmit telemetry forward failed for {System}.{Method} (TrackedOperationId {Id})",
|
|
systemName, methodName, trackedId);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// M3 Bundle F (F2): emit the Attempted + CachedResolve lifecycle
|
|
/// rows for an immediate-completion <c>CachedCall</c> (WasBuffered=false).
|
|
/// The S&F retry loop never engaged, so the
|
|
/// <c>ICachedCallLifecycleObserver</c> never fires — the helper must
|
|
/// produce both rows itself to keep the M3 audit contract whole
|
|
/// (Submit → Attempted → Resolve under one TrackedOperationId).
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Best-effort emission: a throwing forwarder is logged and swallowed
|
|
/// per alog.md §7. The two rows are emitted INDEPENDENTLY so a single
|
|
/// forwarder fault doesn't drop both halves of the terminal pair.
|
|
/// </remarks>
|
|
private async Task EmitImmediateTerminalTelemetryAsync(
|
|
string systemName,
|
|
string methodName,
|
|
string target,
|
|
TrackedOperationId trackedId,
|
|
ExternalCallResult result,
|
|
IReadOnlyDictionary<string, object?>? parameters,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
if (_cachedForwarder == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var occurredAtUtc = DateTime.UtcNow;
|
|
// Extract an HTTP status from the error message when present
|
|
// (mirrors EmitCallAudit's existing HttpStatusRegex behaviour so
|
|
// the immediate-failure row carries the same HttpStatus value the
|
|
// synchronous Call() audit row would have stamped).
|
|
int? httpStatus = null;
|
|
if (!result.Success && !string.IsNullOrEmpty(result.ErrorMessage))
|
|
{
|
|
var match = HttpStatusRegex.Match(result.ErrorMessage);
|
|
if (match.Success && int.TryParse(match.Groups["code"].Value, out var code))
|
|
{
|
|
httpStatus = code;
|
|
}
|
|
}
|
|
|
|
// Status mapping for immediate completion:
|
|
// Success=true -> Delivered (audit) / "Delivered" (operational)
|
|
// Success=false -> Failed (audit) / "Failed" (operational)
|
|
// Permanent vs transient is not relevant here: a permanent failure
|
|
// returns Success=false WasBuffered=false (parked-equivalent); a
|
|
// transient failure with NO S&F engine wired likewise lands here
|
|
// with Success=false. Either way the terminal state is "the
|
|
// immediate attempt failed and the operation is done".
|
|
var auditTerminalStatus = result.Success
|
|
? AuditStatus.Delivered
|
|
: AuditStatus.Failed;
|
|
var operationalTerminalStatus = result.Success ? "Delivered" : "Failed";
|
|
|
|
// --- Attempted row -------------------------------------------------
|
|
CachedCallTelemetry attempted;
|
|
try
|
|
{
|
|
attempted = new CachedCallTelemetry(
|
|
Audit: ScadaBridgeAuditEventFactory.Create(
|
|
channel: AuditChannel.ApiOutbound,
|
|
kind: AuditKind.ApiCallCached,
|
|
status: AuditStatus.Attempted,
|
|
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
|
target: target,
|
|
// CorrelationId = per-operation lifecycle id;
|
|
// ExecutionId = per-execution id for this script run.
|
|
correlationId: trackedId.Value,
|
|
executionId: _executionId,
|
|
// Audit Log #23 (ParentExecutionId): the spawning
|
|
// execution's id; null for non-routed runs.
|
|
parentExecutionId: _parentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
|
sourceInstanceId: _instanceName,
|
|
sourceScript: _sourceScript,
|
|
httpStatus: httpStatus,
|
|
errorMessage: result.Success ? null : result.ErrorMessage,
|
|
requestSummary: SerializeRequest(parameters),
|
|
responseSummary: result.ResponseJson),
|
|
Operational: new SiteCallOperational(
|
|
TrackedOperationId: trackedId,
|
|
Channel: "ApiOutbound",
|
|
Target: target,
|
|
SourceSite: _siteId,
|
|
// SourceNode-stamping (Task 14): the local node name
|
|
// (node-a/node-b) — threaded through INodeIdentityProvider
|
|
// at the ScriptExecutionActor; null when no provider was
|
|
// wired so central persists SiteCalls.SourceNode as NULL.
|
|
SourceNode: _sourceNode,
|
|
Status: "Attempted",
|
|
// RetryCount stays 0 — the operation never reached the
|
|
// S&F retry sweep, so no retries were performed.
|
|
RetryCount: 0,
|
|
LastError: result.Success ? null : result.ErrorMessage,
|
|
HttpStatus: httpStatus,
|
|
CreatedAtUtc: occurredAtUtc,
|
|
UpdatedAtUtc: occurredAtUtc,
|
|
TerminalAtUtc: null));
|
|
}
|
|
catch (Exception buildEx)
|
|
{
|
|
_logger.LogWarning(buildEx,
|
|
"Failed to build immediate-Attempted telemetry for {System}.{Method} (TrackedOperationId {Id}) — skipping emission",
|
|
systemName, methodName, trackedId);
|
|
attempted = null!;
|
|
}
|
|
|
|
if (attempted is not null)
|
|
{
|
|
try
|
|
{
|
|
await _cachedForwarder.ForwardAsync(attempted, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex,
|
|
"Immediate-Attempted telemetry forward failed for {System}.{Method} (TrackedOperationId {Id})",
|
|
systemName, methodName, trackedId);
|
|
}
|
|
}
|
|
|
|
// --- CachedResolve row --------------------------------------------
|
|
CachedCallTelemetry resolve;
|
|
try
|
|
{
|
|
resolve = new CachedCallTelemetry(
|
|
Audit: ScadaBridgeAuditEventFactory.Create(
|
|
channel: AuditChannel.ApiOutbound,
|
|
kind: AuditKind.CachedResolve,
|
|
status: auditTerminalStatus,
|
|
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
|
target: target,
|
|
// CorrelationId = per-operation lifecycle id;
|
|
// ExecutionId = per-execution id for this script run.
|
|
correlationId: trackedId.Value,
|
|
executionId: _executionId,
|
|
// Audit Log #23 (ParentExecutionId): the spawning
|
|
// execution's id; null for non-routed runs.
|
|
parentExecutionId: _parentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
|
sourceInstanceId: _instanceName,
|
|
sourceScript: _sourceScript,
|
|
httpStatus: httpStatus,
|
|
errorMessage: result.Success ? null : result.ErrorMessage,
|
|
requestSummary: SerializeRequest(parameters),
|
|
responseSummary: result.ResponseJson),
|
|
Operational: new SiteCallOperational(
|
|
TrackedOperationId: trackedId,
|
|
Channel: "ApiOutbound",
|
|
Target: target,
|
|
SourceSite: _siteId,
|
|
// SourceNode-stamping (Task 14): the local node name
|
|
// (node-a/node-b) — threaded through INodeIdentityProvider
|
|
// at the ScriptExecutionActor; null when no provider was
|
|
// wired so central persists SiteCalls.SourceNode as NULL.
|
|
SourceNode: _sourceNode,
|
|
Status: operationalTerminalStatus,
|
|
RetryCount: 0,
|
|
LastError: result.Success ? null : result.ErrorMessage,
|
|
HttpStatus: httpStatus,
|
|
CreatedAtUtc: occurredAtUtc,
|
|
UpdatedAtUtc: occurredAtUtc,
|
|
// Immediate-completion terminal — mark TerminalAtUtc so
|
|
// SiteCallAudit can move the row to its purge eligible
|
|
// set.
|
|
TerminalAtUtc: occurredAtUtc));
|
|
}
|
|
catch (Exception buildEx)
|
|
{
|
|
_logger.LogWarning(buildEx,
|
|
"Failed to build immediate-CachedResolve telemetry for {System}.{Method} (TrackedOperationId {Id}) — skipping emission",
|
|
systemName, methodName, trackedId);
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
await _cachedForwarder.ForwardAsync(resolve, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex,
|
|
"Immediate-CachedResolve telemetry forward failed for {System}.{Method} (TrackedOperationId {Id})",
|
|
systemName, methodName, trackedId);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Best-effort emission of one <c>ApiOutbound</c>/<c>ApiCall</c> audit
|
|
/// row. Any exception thrown by the writer is logged and swallowed —
|
|
/// audit-write failures must never abort the user-facing action.
|
|
/// </summary>
|
|
private void EmitCallAudit(
|
|
string systemName,
|
|
string methodName,
|
|
DateTime occurredAtUtc,
|
|
int durationMs,
|
|
ExternalCallResult? result,
|
|
Exception? thrown,
|
|
IReadOnlyDictionary<string, object?>? parameters)
|
|
{
|
|
if (_auditWriter == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
AuditEvent evt;
|
|
try
|
|
{
|
|
evt = BuildCallAuditEvent(
|
|
systemName, methodName, occurredAtUtc, durationMs, result, thrown, parameters);
|
|
}
|
|
catch (Exception buildEx)
|
|
{
|
|
// Building the event itself must never propagate. This is a
|
|
// defensive guard — populating a record from already-validated
|
|
// values shouldn't throw, but we honour the alog.md §7
|
|
// best-effort contract regardless.
|
|
_logger.LogWarning(buildEx,
|
|
"Failed to build Audit Log #23 event for {System}.{Method} — skipping emission",
|
|
systemName, methodName);
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
// Fire-and-forget so we never block the script on the audit
|
|
// writer; the writer itself is responsible for fast, durable
|
|
// enqueue (site SQLite hot-path). We DO observe failures via
|
|
// ContinueWith so a thrown writer is logged rather than going
|
|
// to the unobserved-task firehose.
|
|
var writeTask = _auditWriter.WriteAsync(evt, CancellationToken.None);
|
|
if (!writeTask.IsCompleted)
|
|
{
|
|
writeTask.ContinueWith(
|
|
t => _logger.LogWarning(t.Exception,
|
|
"Audit Log #23 write failed for EventId {EventId} ({System}.{Method})",
|
|
evt.EventId, systemName, methodName),
|
|
CancellationToken.None,
|
|
TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously,
|
|
TaskScheduler.Default);
|
|
}
|
|
else if (writeTask.IsFaulted)
|
|
{
|
|
_logger.LogWarning(writeTask.Exception,
|
|
"Audit Log #23 write failed for EventId {EventId} ({System}.{Method})",
|
|
evt.EventId, systemName, methodName);
|
|
}
|
|
}
|
|
catch (Exception writeEx)
|
|
{
|
|
// Synchronous throw from WriteAsync (e.g. ArgumentNullException
|
|
// before the writer's own try/catch). Swallow + log per the
|
|
// alog.md §7 contract.
|
|
_logger.LogWarning(writeEx,
|
|
"Audit Log #23 write threw synchronously for EventId {EventId} ({System}.{Method})",
|
|
evt.EventId, systemName, methodName);
|
|
}
|
|
}
|
|
|
|
private AuditEvent BuildCallAuditEvent(
|
|
string systemName,
|
|
string methodName,
|
|
DateTime occurredAtUtc,
|
|
int durationMs,
|
|
ExternalCallResult? result,
|
|
Exception? thrown,
|
|
IReadOnlyDictionary<string, object?>? parameters)
|
|
{
|
|
// Status: Delivered on a Success result; Failed otherwise (the
|
|
// ExternalSystemClient already maps HTTP non-2xx + transient
|
|
// exceptions into Success=false on the result, or surfaces a raw
|
|
// exception). M2 makes no distinction between transient + permanent
|
|
// failure here — both manifest as Status.Failed on the sync path.
|
|
var status = (thrown == null && result != null && result.Success)
|
|
? AuditStatus.Delivered
|
|
: AuditStatus.Failed;
|
|
|
|
string? errorMessage = null;
|
|
string? errorDetail = null;
|
|
int? httpStatus = null;
|
|
|
|
if (thrown != null)
|
|
{
|
|
errorMessage = thrown.Message;
|
|
errorDetail = thrown.ToString();
|
|
}
|
|
else if (result != null && !result.Success)
|
|
{
|
|
errorMessage = result.ErrorMessage;
|
|
// The ExternalSystemClient embeds the HTTP status code in the
|
|
// error message as "HTTP {code}". Parse it back out so the
|
|
// audit row carries the structured value.
|
|
if (!string.IsNullOrEmpty(result.ErrorMessage))
|
|
{
|
|
var match = HttpStatusRegex.Match(result.ErrorMessage);
|
|
if (match.Success
|
|
&& int.TryParse(match.Groups["code"].Value, out var parsed))
|
|
{
|
|
httpStatus = parsed;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ScadaBridgeAuditEventFactory.Create(
|
|
channel: AuditChannel.ApiOutbound,
|
|
kind: AuditKind.ApiCall,
|
|
status: status,
|
|
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
|
// Outbound channel: per the Audit Log Actor-column spec the actor
|
|
// is the calling script. Null when no single script owns the call
|
|
// (e.g. a shared script running inline).
|
|
actor: _sourceScript,
|
|
target: $"{systemName}.{methodName}",
|
|
// Audit Log #23: a sync one-shot call has no operation
|
|
// lifecycle, so CorrelationId is null. ExecutionId carries the
|
|
// per-execution id so all the sync ApiCall/DbWrite rows from
|
|
// one script run can be correlated together.
|
|
correlationId: null,
|
|
executionId: _executionId,
|
|
// Audit Log #23 (ParentExecutionId): the spawning execution's
|
|
// id; null for non-routed runs.
|
|
parentExecutionId: _parentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
|
sourceInstanceId: _instanceName,
|
|
sourceScript: _sourceScript,
|
|
httpStatus: httpStatus,
|
|
durationMs: durationMs,
|
|
errorMessage: errorMessage,
|
|
errorDetail: errorDetail,
|
|
// Payload capture: the request arguments and the response body.
|
|
// The audit writer's redactor applies the configured size cap and
|
|
// header/secret redaction downstream — the emitter just hands
|
|
// over the raw values.
|
|
requestSummary: SerializeRequest(parameters),
|
|
responseSummary: result?.ResponseJson,
|
|
payloadTruncated: false,
|
|
extra: null);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Serialises the outbound-call argument dictionary into the JSON
|
|
/// <c>RequestSummary</c> stamped on <c>ApiOutbound</c> audit rows.
|
|
/// Returns <c>null</c> for a null/empty argument set. Serialization
|
|
/// failure is swallowed (returns <c>null</c>) — a payload that cannot be
|
|
/// summarised must never abort the best-effort audit emission.
|
|
/// </summary>
|
|
private static string? SerializeRequest(IReadOnlyDictionary<string, object?>? parameters)
|
|
{
|
|
if (parameters is null || parameters.Count == 0)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
try
|
|
{
|
|
return JsonSerializer.Serialize(parameters);
|
|
}
|
|
catch (Exception)
|
|
{
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// WP-13: Helper for Database.Connection/CachedWrite syntax.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Audit Log #23 (M3 Bundle E — Task E6): <see cref="CachedWrite"/> mirrors
|
|
/// <see cref="ExternalSystemHelper.CachedCall"/> — mints a
|
|
/// <see cref="TrackedOperationId"/>, emits the lifecycle's first
|
|
/// CachedSubmit packet (Channel <c>DbOutbound</c>), hands off to the S&F
|
|
/// retry loop, and returns the id. Per-attempt + terminal telemetry is
|
|
/// emitted by the retry loop (Tasks E4/E5).
|
|
/// </remarks>
|
|
public class DatabaseHelper
|
|
{
|
|
private readonly IDatabaseGateway? _gateway;
|
|
private readonly string _instanceName;
|
|
private readonly ILogger _logger;
|
|
private readonly Guid _executionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (ParentExecutionId): the spawning execution's id when
|
|
/// this run was inbound-API-routed; <c>null</c> for non-routed runs.
|
|
/// Threaded alongside <see cref="_executionId"/> ready for the Task 5
|
|
/// emitter — no audit row carries it yet.
|
|
/// </summary>
|
|
private readonly Guid? _parentExecutionId;
|
|
|
|
private readonly string _siteId;
|
|
private readonly string? _sourceScript;
|
|
private readonly ICachedCallTelemetryForwarder? _cachedForwarder;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M4 Bundle A): best-effort emitter for synchronous
|
|
/// <c>Database.Connection</c>-routed Execute / ExecuteScalar /
|
|
/// ExecuteReader calls. When wired, <see cref="Connection"/> returns
|
|
/// an <see cref="AuditingDbConnection"/> that intercepts each command
|
|
/// execution and writes one <c>DbOutbound</c>/<c>DbWrite</c> audit
|
|
/// row. Optional — when null the helper falls back to the raw
|
|
/// inner <see cref="System.Data.Common.DbConnection"/> the gateway
|
|
/// returns (tests / minimal hosts that don't wire audit).
|
|
/// </summary>
|
|
private readonly IAuditWriter? _auditWriter;
|
|
|
|
/// <summary>
|
|
/// SourceNode-stamping (Task 14): the local cluster node name on
|
|
/// which this script is executing (<c>node-a</c>/<c>node-b</c>).
|
|
/// Stamped onto <c>SiteCallOperational.SourceNode</c> at the
|
|
/// <c>Database.CachedWrite</c> CachedSubmit telemetry construction
|
|
/// site so central can persist it on the <c>SiteCalls</c> row.
|
|
/// </summary>
|
|
private readonly string? _sourceNode;
|
|
|
|
/// <summary>
|
|
/// Initializes a new database helper for script database access.
|
|
/// </summary>
|
|
/// <param name="gateway">Optional gateway for database connection and cached write access.</param>
|
|
/// <param name="instanceName">Unique name of the instance making the call.</param>
|
|
/// <param name="logger">Logger for diagnostics and warnings.</param>
|
|
/// <param name="executionId">Unique identifier for this script execution.</param>
|
|
/// <param name="auditWriter">Optional writer for audit log entries.</param>
|
|
/// <param name="siteId">Identifier of the site where this call originates.</param>
|
|
/// <param name="sourceScript">Optional name of the source script for audit trail.</param>
|
|
/// <param name="cachedForwarder">Optional forwarder for cached call telemetry.</param>
|
|
/// <param name="parentExecutionId">Optional identifier of the parent execution (for routed calls).</param>
|
|
/// <param name="sourceNode">Optional cluster node identifier (node-a/node-b) for audit stamping.</param>
|
|
// Parameter ordering: executionId sits immediately after the
|
|
// ILogger — see the note on ExternalSystemHelper's ctor for why the
|
|
// post-logger slot is the one consistent position across all four
|
|
// audit-threaded ctors. parentExecutionId is a trailing optional param.
|
|
internal DatabaseHelper(
|
|
IDatabaseGateway? gateway,
|
|
string instanceName,
|
|
ILogger logger,
|
|
Guid executionId,
|
|
IAuditWriter? auditWriter = null,
|
|
string siteId = "",
|
|
string? sourceScript = null,
|
|
ICachedCallTelemetryForwarder? cachedForwarder = null,
|
|
Guid? parentExecutionId = null,
|
|
string? sourceNode = null)
|
|
{
|
|
_gateway = gateway;
|
|
_instanceName = instanceName;
|
|
_logger = logger;
|
|
_executionId = executionId;
|
|
_auditWriter = auditWriter;
|
|
_siteId = siteId;
|
|
_sourceScript = sourceScript;
|
|
_cachedForwarder = cachedForwarder;
|
|
_parentExecutionId = parentExecutionId;
|
|
_sourceNode = sourceNode;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets a database connection by name, optionally wrapped with audit logging.
|
|
/// </summary>
|
|
/// <param name="name">Name of the database connection to retrieve.</param>
|
|
/// <param name="cancellationToken">Cancellation token for the async operation.</param>
|
|
/// <returns>An auditing-wrapped database connection ready for use.</returns>
|
|
public async Task<System.Data.Common.DbConnection> Connection(
|
|
string name,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
if (_gateway == null)
|
|
throw new InvalidOperationException("Database gateway not available");
|
|
|
|
var inner = await _gateway.GetConnectionAsync(name, cancellationToken);
|
|
|
|
// Audit Log #23 (M4 Bundle A): wrap in an auditing decorator so
|
|
// every script-initiated Execute* / ExecuteReader on the returned
|
|
// connection emits one DbOutbound/DbWrite audit row. The wrapper
|
|
// delegates all other ADO.NET behaviour to the inner connection
|
|
// unchanged — including disposal, so the caller's existing
|
|
// dispose pattern (await using var conn = ...) still releases
|
|
// the underlying connection to the pool.
|
|
if (_auditWriter == null)
|
|
{
|
|
return inner;
|
|
}
|
|
|
|
return new AuditingDbConnection(
|
|
inner,
|
|
_auditWriter,
|
|
connectionName: name,
|
|
siteId: _siteId,
|
|
instanceName: _instanceName,
|
|
sourceScript: _sourceScript,
|
|
logger: _logger,
|
|
executionId: _executionId,
|
|
// Audit Log #23 (ParentExecutionId): the spawning execution's
|
|
// id, threaded alongside _executionId. Null for non-routed runs.
|
|
parentExecutionId: _parentExecutionId);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Submit a cached outbound database write. Mints a fresh
|
|
/// <see cref="TrackedOperationId"/>, emits CachedSubmit telemetry on
|
|
/// <c>DbOutbound</c>, hands off to the cached-write S&F path, and
|
|
/// returns the id. Best-effort emission per alog.md §7.
|
|
/// </summary>
|
|
/// <param name="name">Name of the database connection to use.</param>
|
|
/// <param name="sql">SQL statement to execute.</param>
|
|
/// <param name="parameters">Optional parameters to pass to the SQL statement.</param>
|
|
/// <param name="cancellationToken">Cancellation token for the async operation.</param>
|
|
/// <returns>Tracked operation identifier for status queries.</returns>
|
|
public async Task<TrackedOperationId> CachedWrite(
|
|
string name,
|
|
string sql,
|
|
IReadOnlyDictionary<string, object?>? parameters = null,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
if (_gateway == null)
|
|
throw new InvalidOperationException("Database gateway not available");
|
|
|
|
var trackedId = TrackedOperationId.New();
|
|
var occurredAtUtc = DateTime.UtcNow;
|
|
// The DB cached-write target uses the connection name (the only
|
|
// human-readable handle the gateway carries on the buffered row).
|
|
var target = name;
|
|
|
|
await EmitCachedDbSubmitTelemetryAsync(
|
|
name, trackedId, target, occurredAtUtc, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
|
|
// M2.3 (#7): the gateway now attempts the write immediately and
|
|
// classifies the outcome (mirroring ExternalSystem.CachedCall). The
|
|
// result is retained because the immediate paths (WasBuffered=false —
|
|
// immediate success OR a synchronous permanent failure) bypass the
|
|
// S&F retry loop entirely, so no retry-loop telemetry ever fires.
|
|
// This helper must emit the Attempted + CachedResolve terminal rows
|
|
// itself, otherwise Tracking.Status(id) would stay Submitted forever
|
|
// and the audit log would be missing the terminal lifecycle. The
|
|
// WasBuffered=true path is unaffected — the S&F retry loop owns the
|
|
// Attempted + Resolve emissions there.
|
|
ExternalCallResult? result;
|
|
try
|
|
{
|
|
result = await _gateway.CachedWriteAsync(
|
|
name, sql, parameters, _instanceName, cancellationToken, trackedId,
|
|
// Audit Log #23 (ExecutionId Task 4): thread the script
|
|
// execution's ExecutionId + SourceScript so a buffered
|
|
// cached write's retry-loop audit rows carry them.
|
|
executionId: _executionId,
|
|
sourceScript: _sourceScript,
|
|
// Audit Log #23 (ParentExecutionId Task 6): thread the
|
|
// spawning inbound-API request's ExecutionId so a buffered
|
|
// cached write's retry-loop audit rows carry it too. Null
|
|
// for a non-routed run.
|
|
parentExecutionId: _parentExecutionId)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex,
|
|
"Database.CachedWrite threw for {Connection} (TrackedOperationId {Id})",
|
|
name, trackedId);
|
|
throw;
|
|
}
|
|
|
|
// M2.3 (#7): immediate-completion lifecycle — emit the missing
|
|
// Attempted + CachedResolve rows when the underlying write resolved
|
|
// without engaging the store-and-forward retry loop (immediate
|
|
// success or a synchronous permanent failure).
|
|
if (result is { WasBuffered: false })
|
|
{
|
|
await EmitImmediateDbTerminalTelemetryAsync(
|
|
name, target, trackedId, result, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
|
|
return trackedId;
|
|
}
|
|
|
|
/// <summary>
|
|
/// M2.3 (#7): best-effort emission of the immediate-completion lifecycle
|
|
/// for a <c>Database.CachedWrite</c> that resolved without the S&F
|
|
/// retry loop — emits an <c>Attempted</c> row then a terminal
|
|
/// <c>CachedResolve</c> row (<c>Delivered</c> on success, <c>Failed</c> on
|
|
/// a synchronous permanent SQL error). The DB parallel of
|
|
/// <see cref="EmitImmediateTerminalTelemetryAsync"/>. Any forwarder
|
|
/// failure is logged and swallowed (alog.md §7).
|
|
/// </summary>
|
|
private async Task EmitImmediateDbTerminalTelemetryAsync(
|
|
string connectionName,
|
|
string target,
|
|
TrackedOperationId trackedId,
|
|
ExternalCallResult result,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
if (_cachedForwarder == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var occurredAtUtc = DateTime.UtcNow;
|
|
|
|
// Status mapping mirrors the API path: success -> Delivered, a
|
|
// synchronous permanent failure -> Failed. A transient failure never
|
|
// reaches here (WasBuffered=true), so "the immediate attempt failed
|
|
// and the operation is done" always means a permanent failure.
|
|
var auditTerminalStatus = result.Success ? AuditStatus.Delivered : AuditStatus.Failed;
|
|
var operationalTerminalStatus = result.Success ? "Delivered" : "Failed";
|
|
|
|
// --- Attempted row -------------------------------------------------
|
|
CachedCallTelemetry? attempted = TryBuildDbTerminalTelemetry(
|
|
connectionName, target, trackedId, occurredAtUtc,
|
|
AuditKind.DbWriteCached, AuditStatus.Attempted, "Attempted",
|
|
result, isTerminal: false);
|
|
|
|
if (attempted is not null)
|
|
{
|
|
try
|
|
{
|
|
await _cachedForwarder.ForwardAsync(attempted, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex,
|
|
"Immediate-Attempted telemetry forward failed for Database.CachedWrite {Connection} (TrackedOperationId {Id})",
|
|
connectionName, trackedId);
|
|
}
|
|
}
|
|
|
|
// --- CachedResolve row --------------------------------------------
|
|
CachedCallTelemetry? resolve = TryBuildDbTerminalTelemetry(
|
|
connectionName, target, trackedId, occurredAtUtc,
|
|
AuditKind.CachedResolve, auditTerminalStatus, operationalTerminalStatus,
|
|
result, isTerminal: true);
|
|
|
|
if (resolve is not null)
|
|
{
|
|
try
|
|
{
|
|
await _cachedForwarder.ForwardAsync(resolve, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex,
|
|
"Immediate-CachedResolve telemetry forward failed for Database.CachedWrite {Connection} (TrackedOperationId {Id})",
|
|
connectionName, trackedId);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Builds one immediate-completion <c>DbOutbound</c> telemetry packet, or
|
|
/// returns <c>null</c> (and logs) when construction throws — so a build
|
|
/// failure skips emission rather than aborting the script.
|
|
/// </summary>
|
|
private CachedCallTelemetry? TryBuildDbTerminalTelemetry(
|
|
string connectionName,
|
|
string target,
|
|
TrackedOperationId trackedId,
|
|
DateTime occurredAtUtc,
|
|
AuditKind kind,
|
|
AuditStatus auditStatus,
|
|
string operationalStatus,
|
|
ExternalCallResult result,
|
|
bool isTerminal)
|
|
{
|
|
try
|
|
{
|
|
return new CachedCallTelemetry(
|
|
Audit: ScadaBridgeAuditEventFactory.Create(
|
|
channel: AuditChannel.DbOutbound,
|
|
kind: kind,
|
|
status: auditStatus,
|
|
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
|
target: target,
|
|
correlationId: trackedId.Value,
|
|
executionId: _executionId,
|
|
parentExecutionId: _parentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
|
sourceInstanceId: _instanceName,
|
|
sourceScript: _sourceScript,
|
|
errorMessage: result.Success ? null : result.ErrorMessage),
|
|
Operational: new SiteCallOperational(
|
|
TrackedOperationId: trackedId,
|
|
Channel: "DbOutbound",
|
|
Target: target,
|
|
SourceSite: _siteId,
|
|
SourceNode: _sourceNode,
|
|
Status: operationalStatus,
|
|
RetryCount: 0,
|
|
LastError: result.Success ? null : result.ErrorMessage,
|
|
HttpStatus: null,
|
|
CreatedAtUtc: occurredAtUtc,
|
|
UpdatedAtUtc: occurredAtUtc,
|
|
TerminalAtUtc: isTerminal ? occurredAtUtc : null));
|
|
}
|
|
catch (Exception buildEx)
|
|
{
|
|
_logger.LogWarning(buildEx,
|
|
"Failed to build immediate-{Kind} telemetry for Database.CachedWrite {Connection} (TrackedOperationId {Id}) — skipping emission",
|
|
kind, connectionName, trackedId);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private async Task EmitCachedDbSubmitTelemetryAsync(
|
|
string connectionName,
|
|
TrackedOperationId trackedId,
|
|
string target,
|
|
DateTime occurredAtUtc,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
if (_cachedForwarder == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
CachedCallTelemetry telemetry;
|
|
try
|
|
{
|
|
telemetry = new CachedCallTelemetry(
|
|
Audit: ScadaBridgeAuditEventFactory.Create(
|
|
channel: AuditChannel.DbOutbound,
|
|
kind: AuditKind.CachedSubmit,
|
|
status: AuditStatus.Submitted,
|
|
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
|
target: target,
|
|
// CorrelationId = per-operation lifecycle id
|
|
// (TrackedOperationId); ExecutionId = per-execution id.
|
|
correlationId: trackedId.Value,
|
|
executionId: _executionId,
|
|
// Audit Log #23 (ParentExecutionId): the spawning
|
|
// execution's id; null for non-routed runs.
|
|
parentExecutionId: _parentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
|
sourceInstanceId: _instanceName,
|
|
sourceScript: _sourceScript),
|
|
Operational: new SiteCallOperational(
|
|
TrackedOperationId: trackedId,
|
|
Channel: "DbOutbound",
|
|
Target: target,
|
|
SourceSite: _siteId,
|
|
// SourceNode-stamping (Task 14): the local node name
|
|
// (node-a/node-b) — threaded through INodeIdentityProvider
|
|
// at the ScriptExecutionActor; null when no provider was
|
|
// wired so central persists SiteCalls.SourceNode as NULL.
|
|
SourceNode: _sourceNode,
|
|
Status: "Submitted",
|
|
RetryCount: 0,
|
|
LastError: null,
|
|
HttpStatus: null,
|
|
CreatedAtUtc: occurredAtUtc,
|
|
UpdatedAtUtc: occurredAtUtc,
|
|
TerminalAtUtc: null));
|
|
}
|
|
catch (Exception buildEx)
|
|
{
|
|
_logger.LogWarning(buildEx,
|
|
"Failed to build CachedSubmit telemetry for Database.CachedWrite {Connection} (TrackedOperationId {Id}) — skipping emission",
|
|
connectionName, trackedId);
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
await _cachedForwarder.ForwardAsync(telemetry, cancellationToken)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogWarning(ex,
|
|
"CachedSubmit telemetry forward failed for Database.CachedWrite {Connection} (TrackedOperationId {Id})",
|
|
connectionName, trackedId);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Notification Outbox: helper for the <c>Notify</c> script API.
|
|
///
|
|
/// In the outbox design the site no longer delivers notification email inline.
|
|
/// <c>Notify.To("listName").Send(...)</c> enqueues the notification into the site
|
|
/// Store-and-Forward Engine — which forwards it to central — and returns a
|
|
/// <c>NotificationId</c> handle immediately. <c>Notify.Status(id)</c> later queries
|
|
/// the delivery status of that notification.
|
|
/// </summary>
|
|
public class NotifyHelper
|
|
{
|
|
private readonly StoreAndForwardService? _storeAndForward;
|
|
private readonly ICanTell? _siteCommunicationActor;
|
|
private readonly string _siteId;
|
|
private readonly string _instanceName;
|
|
private readonly string? _sourceScript;
|
|
private readonly TimeSpan _askTimeout;
|
|
private readonly ILogger _logger;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23: the per-execution id for this script run, stamped
|
|
/// into <c>AuditEvent.ExecutionId</c> on the <c>NotifySend</c> row.
|
|
/// </summary>
|
|
private readonly Guid _executionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (ParentExecutionId): the spawning execution's id when
|
|
/// this run was inbound-API-routed; <c>null</c> for non-routed runs.
|
|
/// Threaded alongside <see cref="_executionId"/> ready for the Task 5
|
|
/// emitter — no audit row carries it yet.
|
|
/// </summary>
|
|
private readonly Guid? _parentExecutionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M4 Bundle C): best-effort emitter for the
|
|
/// <c>Notification</c>/<c>NotifySend</c> row produced when the script
|
|
/// calls <c>Notify.To(list).Send(...)</c>. Optional — when null the
|
|
/// <see cref="NotifyTarget"/> degrades to a no-op audit path so tests
|
|
/// / minimal hosts that don't wire AddAuditLog still work (mirrors the
|
|
/// M2 Bundle F <c>IExternalSystemClient</c> wrapper).
|
|
/// </summary>
|
|
private readonly IAuditWriter? _auditWriter;
|
|
|
|
/// <summary>
|
|
/// SourceNode-stamping (Task 13): the cluster node name on which this
|
|
/// script is executing — <c>node-a</c>/<c>node-b</c>. Stamped onto
|
|
/// <c>NotificationSubmit.SourceNode</c> by <see cref="NotifyTarget.Send"/>
|
|
/// so central can persist it on the <c>Notifications</c> row.
|
|
/// </summary>
|
|
private readonly string? _sourceNode;
|
|
|
|
/// <summary>
|
|
/// Initializes a new notification helper for script notification delivery.
|
|
/// </summary>
|
|
/// <param name="storeAndForward">Optional store-and-forward service for notification delivery.</param>
|
|
/// <param name="siteCommunicationActor">Optional actor for site-to-central communication.</param>
|
|
/// <param name="siteId">Identifier of the site where this notification originates.</param>
|
|
/// <param name="instanceName">Unique name of the instance sending the notification.</param>
|
|
/// <param name="sourceScript">Optional name of the source script for audit trail.</param>
|
|
/// <param name="askTimeout">Timeout for Ask pattern messages.</param>
|
|
/// <param name="logger">Logger for diagnostics and warnings.</param>
|
|
/// <param name="executionId">Unique identifier for this script execution.</param>
|
|
/// <param name="auditWriter">Optional writer for audit log entries.</param>
|
|
/// <param name="parentExecutionId">Optional identifier of the parent execution (for routed calls).</param>
|
|
/// <param name="sourceNode">Optional cluster node identifier (node-a/node-b) for audit stamping.</param>
|
|
// Parameter ordering: executionId sits immediately after the ILogger,
|
|
// consistent with the other audit-threaded ctors. parentExecutionId is
|
|
// a trailing optional param.
|
|
internal NotifyHelper(
|
|
StoreAndForwardService? storeAndForward,
|
|
ICanTell? siteCommunicationActor,
|
|
string siteId,
|
|
string instanceName,
|
|
string? sourceScript,
|
|
TimeSpan askTimeout,
|
|
ILogger logger,
|
|
Guid executionId,
|
|
IAuditWriter? auditWriter = null,
|
|
Guid? parentExecutionId = null,
|
|
string? sourceNode = null)
|
|
{
|
|
_storeAndForward = storeAndForward;
|
|
_siteCommunicationActor = siteCommunicationActor;
|
|
_siteId = siteId;
|
|
_instanceName = instanceName;
|
|
_sourceScript = sourceScript;
|
|
_askTimeout = askTimeout;
|
|
_logger = logger;
|
|
_executionId = executionId;
|
|
_auditWriter = auditWriter;
|
|
_parentExecutionId = parentExecutionId;
|
|
_sourceNode = sourceNode;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Selects the notification list to send to.
|
|
/// </summary>
|
|
/// <param name="listName">Name of the notification list to target.</param>
|
|
/// <returns>A notification target ready for message composition and sending.</returns>
|
|
public NotifyTarget To(string listName)
|
|
{
|
|
return new NotifyTarget(
|
|
listName, _storeAndForward, _siteId, _instanceName, _sourceScript, _logger,
|
|
// Audit Log #23: the per-execution id stamped into the
|
|
// NotifySend row's ExecutionId column.
|
|
_executionId,
|
|
// Audit Log #23 (M4 Bundle C): forward the writer so Send()
|
|
// can emit one NotifySend(Submitted) row per accepted submission.
|
|
_auditWriter,
|
|
// Audit Log #23 (ParentExecutionId): the spawning execution's
|
|
// id, threaded alongside _executionId. Null for non-routed runs.
|
|
_parentExecutionId,
|
|
// SourceNode-stamping (Task 13): the local node name, stamped
|
|
// onto NotificationSubmit.SourceNode in Send().
|
|
_sourceNode);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Queries the delivery status of a previously-sent notification.
|
|
///
|
|
/// The query is issued to central via the site communication actor. While the
|
|
/// notification is still buffered in the site Store-and-Forward Engine — central
|
|
/// has no row for it yet (<c>Found: false</c>) but the buffer still holds the id —
|
|
/// the status is reported as the site-local <c>Forwarding</c> state. If central
|
|
/// has a row, its status is mapped through verbatim. If central does not know the
|
|
/// id and it is not buffered locally, the status is <c>Unknown</c>.
|
|
/// </summary>
|
|
/// <param name="notificationId">The notification identifier returned from Send.</param>
|
|
/// <returns>The delivery status of the notification.</returns>
|
|
public async Task<NotificationDeliveryStatus> Status(string notificationId)
|
|
{
|
|
if (_siteCommunicationActor == null)
|
|
throw new InvalidOperationException(
|
|
"Notification status query is not available — site communication actor not wired");
|
|
|
|
var correlationId = Guid.NewGuid().ToString();
|
|
var query = new NotificationStatusQuery(correlationId, notificationId);
|
|
|
|
NotificationStatusResponse response;
|
|
try
|
|
{
|
|
response = await _siteCommunicationActor
|
|
.Ask<NotificationStatusResponse>(query, _askTimeout);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Central could not be reached. Fall through to the buffer check: if the
|
|
// notification is still in the local S&F buffer it is Forwarding.
|
|
_logger.LogWarning(ex,
|
|
"Notification status query for {NotificationId} did not reach central",
|
|
notificationId);
|
|
response = new NotificationStatusResponse(
|
|
correlationId, Found: false, Status: "Unknown",
|
|
RetryCount: 0, LastError: null, DeliveredAt: null);
|
|
}
|
|
|
|
if (response.Found)
|
|
{
|
|
return new NotificationDeliveryStatus(
|
|
response.Status, response.RetryCount, response.LastError, response.DeliveredAt);
|
|
}
|
|
|
|
// Central has no row. If the notification is still buffered at the site it
|
|
// is in transit — report the site-local Forwarding state. Otherwise it is
|
|
// genuinely unknown (never sent, or already forwarded and central lost it).
|
|
if (_storeAndForward != null)
|
|
{
|
|
var buffered = await _storeAndForward.GetMessageByIdAsync(notificationId);
|
|
if (buffered != null)
|
|
{
|
|
return new NotificationDeliveryStatus(
|
|
"Forwarding", buffered.RetryCount, buffered.LastError, DeliveredAt: null);
|
|
}
|
|
}
|
|
|
|
return new NotificationDeliveryStatus("Unknown", 0, null, null);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Notification Outbox: target for <c>Notify.To("listName").Send(...)</c>.
|
|
/// </summary>
|
|
public class NotifyTarget
|
|
{
|
|
private readonly string _listName;
|
|
private readonly StoreAndForwardService? _storeAndForward;
|
|
private readonly string _siteId;
|
|
private readonly string _instanceName;
|
|
private readonly string? _sourceScript;
|
|
private readonly ILogger _logger;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23: the per-execution id for this script run, stamped
|
|
/// into <c>AuditEvent.ExecutionId</c> on the <c>NotifySend</c> row.
|
|
/// </summary>
|
|
private readonly Guid _executionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (ParentExecutionId): the spawning execution's id when
|
|
/// this run was inbound-API-routed; <c>null</c> for non-routed runs.
|
|
/// Threaded alongside <see cref="_executionId"/> ready for the Task 5
|
|
/// emitter — no audit row carries it yet.
|
|
/// </summary>
|
|
private readonly Guid? _parentExecutionId;
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M4 Bundle C): best-effort emitter for the
|
|
/// <c>Notification</c>/<c>NotifySend</c> row written immediately after
|
|
/// the underlying S&F enqueue accepts the submission. Optional —
|
|
/// when null no audit row is emitted (no-op path).
|
|
/// </summary>
|
|
private readonly IAuditWriter? _auditWriter;
|
|
|
|
/// <summary>
|
|
/// SourceNode-stamping (Task 13): the cluster node name on which this
|
|
/// script is executing (<c>node-a</c>/<c>node-b</c>). Stamped onto the
|
|
/// <c>NotificationSubmit.SourceNode</c> field in <see cref="Send"/> so
|
|
/// the central <c>NotificationOutboxActor</c> can persist it on the
|
|
/// <c>Notifications</c> row.
|
|
/// </summary>
|
|
private readonly string? _sourceNode;
|
|
|
|
/// <summary>
|
|
/// Initializes a new notification target for a specific notification list.
|
|
/// </summary>
|
|
/// <param name="listName">Name of the notification list to target.</param>
|
|
/// <param name="storeAndForward">Optional store-and-forward service for notification delivery.</param>
|
|
/// <param name="siteId">Identifier of the site where this notification originates.</param>
|
|
/// <param name="instanceName">Unique name of the instance sending the notification.</param>
|
|
/// <param name="sourceScript">Optional name of the source script for audit trail.</param>
|
|
/// <param name="logger">Logger for diagnostics and warnings.</param>
|
|
/// <param name="executionId">Unique identifier for this script execution.</param>
|
|
/// <param name="auditWriter">Optional writer for audit log entries.</param>
|
|
/// <param name="parentExecutionId">Optional identifier of the parent execution (for routed calls).</param>
|
|
/// <param name="sourceNode">Optional cluster node identifier (node-a/node-b) for audit stamping.</param>
|
|
internal NotifyTarget(
|
|
string listName,
|
|
StoreAndForwardService? storeAndForward,
|
|
string siteId,
|
|
string instanceName,
|
|
string? sourceScript,
|
|
ILogger logger,
|
|
Guid executionId,
|
|
IAuditWriter? auditWriter = null,
|
|
Guid? parentExecutionId = null,
|
|
string? sourceNode = null)
|
|
{
|
|
_listName = listName;
|
|
_storeAndForward = storeAndForward;
|
|
_siteId = siteId;
|
|
_instanceName = instanceName;
|
|
_sourceScript = sourceScript;
|
|
_logger = logger;
|
|
_executionId = executionId;
|
|
_auditWriter = auditWriter;
|
|
_parentExecutionId = parentExecutionId;
|
|
_sourceNode = sourceNode;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Enqueues a notification for central delivery and returns its
|
|
/// <c>NotificationId</c> immediately.
|
|
///
|
|
/// The notification is buffered into the site Store-and-Forward Engine under the
|
|
/// <see cref="StoreAndForwardCategory.Notification"/> category; the S&F
|
|
/// engine's <c>NotificationForwarder</c> forwards it to central and treats
|
|
/// central's ack as the delivery outcome. The returned <c>NotificationId</c> is
|
|
/// the single idempotency key end-to-end: it is the S&F message id, it is
|
|
/// carried inside the buffered payload, and it is the id the forwarder submits to
|
|
/// central. Pass it to <see cref="NotifyHelper.Status"/> to track delivery.
|
|
/// </summary>
|
|
/// <param name="subject">Subject line for the notification.</param>
|
|
/// <param name="message">Message body for the notification.</param>
|
|
/// <param name="cancellationToken">Cancellation token for the async operation.</param>
|
|
/// <returns>The notification identifier for status tracking.</returns>
|
|
public async Task<string> Send(
|
|
string subject,
|
|
string message,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
if (_storeAndForward == null)
|
|
throw new InvalidOperationException(
|
|
"Notification store-and-forward engine not available");
|
|
|
|
// The script controls the idempotency key: generate the NotificationId here,
|
|
// use it as the S&F message id, and carry it inside the buffered payload so
|
|
// the forwarder submits the same id to central on every retry.
|
|
var notificationId = Guid.NewGuid().ToString("N");
|
|
|
|
var payload = new NotificationSubmit(
|
|
NotificationId: notificationId,
|
|
ListName: _listName,
|
|
Subject: subject,
|
|
Body: message,
|
|
// SourceSiteId is re-stamped by the forwarder from its own site id; this
|
|
// value is the best-effort site id known to the script runtime.
|
|
SourceSiteId: _siteId,
|
|
SourceInstanceId: _instanceName,
|
|
// SourceScript (FU3): identifier of the script that raised this
|
|
// notification, threaded down from the script-execution context for the
|
|
// central audit trail. Null when no single script owns the context.
|
|
SourceScript: _sourceScript,
|
|
SiteEnqueuedAt: DateTimeOffset.UtcNow,
|
|
// OriginExecutionId (Audit Log #23): the SAME per-execution id stamped
|
|
// onto this run's NotifySend audit row. It rides inside the serialized
|
|
// payload through the S&F buffer to central, where the dispatcher echoes
|
|
// it onto the NotifyDeliver rows so all rows for one run share an id.
|
|
OriginExecutionId: _executionId,
|
|
// OriginParentExecutionId (Audit Log #23): the SAME parent-execution id
|
|
// stamped onto this run's NotifySend audit row — the spawning run's id
|
|
// for an inbound-API-routed execution, null otherwise. It rides through
|
|
// the S&F buffer to central, where the dispatcher echoes it onto the
|
|
// NotifyDeliver rows so the central rows carry the routed run's parent id.
|
|
OriginParentExecutionId: _parentExecutionId,
|
|
// SourceNode-stamping (Task 13): the cluster node name on which this
|
|
// notification was emitted (node-a/node-b). Stamped from the local
|
|
// INodeIdentityProvider via ScriptExecutionActor. Rides inside the
|
|
// serialized payload through the S&F buffer to central, where
|
|
// NotificationOutboxActor persists it on the Notifications row.
|
|
SourceNode: _sourceNode);
|
|
|
|
var payloadJson = JsonSerializer.Serialize(payload);
|
|
|
|
// The S&F engine assigns its own GUID to the message; pin the message id to
|
|
// the NotificationId so the buffer can be queried by it (Notify.Status) and
|
|
// the forwarder's idempotency key matches the buffered row.
|
|
var occurredAtUtc = DateTime.UtcNow;
|
|
await _storeAndForward.EnqueueAsync(
|
|
StoreAndForwardCategory.Notification,
|
|
target: _listName,
|
|
payloadJson: payloadJson,
|
|
originInstanceName: _instanceName,
|
|
messageId: notificationId);
|
|
|
|
_logger.LogDebug(
|
|
"Notify enqueued notification {NotificationId} to list '{List}' for central delivery",
|
|
notificationId, _listName);
|
|
|
|
// Audit Log #23 (M4 Bundle C): emit one Notification/NotifySend
|
|
// (Submitted) row per accepted submission. The emission is wired
|
|
// AFTER the EnqueueAsync returns so we only audit submissions the
|
|
// S&F engine accepted — a failed enqueue throws, never produces an
|
|
// audit row (mirrors ESG: audit fires after the boundary call
|
|
// returned a result, never speculatively). Best-effort per alog.md
|
|
// §7 — the audit write is wrapped in try/catch and any failure is
|
|
// logged + swallowed so the script's Send call still returns the
|
|
// NotificationId.
|
|
EmitNotifySendAudit(notificationId, subject, message, occurredAtUtc);
|
|
|
|
return notificationId;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Best-effort emission of one <c>Notification</c>/<c>NotifySend</c>
|
|
/// (Status <c>Submitted</c>) audit row. Any exception thrown by the
|
|
/// writer is logged and swallowed — audit-write failures must never
|
|
/// abort the user-facing <c>Notify.Send</c> call (alog.md §7).
|
|
/// </summary>
|
|
private void EmitNotifySendAudit(
|
|
string notificationId,
|
|
string subject,
|
|
string body,
|
|
DateTime occurredAtUtc)
|
|
{
|
|
if (_auditWriter == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
AuditEvent evt;
|
|
try
|
|
{
|
|
// CorrelationId is the NotificationId parsed as a Guid. Notify
|
|
// mints the id via Guid.NewGuid().ToString("N") so the parse
|
|
// is expected to succeed; on the off-chance the format
|
|
// changes / a caller injects an unparseable value, leave it
|
|
// null per Bundle B's pattern rather than fail the emission.
|
|
Guid? correlationId = Guid.TryParse(notificationId, out var parsed) ? parsed : (Guid?)null;
|
|
|
|
// Capture the request summary — {"subject": "...", "body": "..."}.
|
|
// Payload cap and per-target body redaction are applied at write
|
|
// time by the audit redactor (AuditLogOptions / PerTargetRedactionOverride).
|
|
var requestSummary = JsonSerializer.Serialize(new
|
|
{
|
|
subject = subject,
|
|
body = body,
|
|
});
|
|
|
|
evt = ScadaBridgeAuditEventFactory.Create(
|
|
channel: AuditChannel.Notification,
|
|
kind: AuditKind.NotifySend,
|
|
status: AuditStatus.Submitted,
|
|
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
|
// Outbound channel: per the Audit Log Actor-column spec the
|
|
// actor is the calling script. Null when no single script
|
|
// owns the call (e.g. a shared script running inline).
|
|
actor: _sourceScript,
|
|
target: _listName,
|
|
// CorrelationId is the NotificationId-derived per-operation
|
|
// lifecycle id; ExecutionId carries the per-execution id.
|
|
correlationId: correlationId,
|
|
executionId: _executionId,
|
|
// Audit Log #23 (ParentExecutionId): the spawning
|
|
// execution's id; null for non-routed runs.
|
|
parentExecutionId: _parentExecutionId,
|
|
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
|
sourceInstanceId: _instanceName,
|
|
sourceScript: _sourceScript,
|
|
httpStatus: null,
|
|
// Send is fire-and-forget from the script's perspective —
|
|
// the dispatcher (NotificationOutboxActor) times each
|
|
// delivery attempt and stamps DurationMs on its
|
|
// NotifyDeliver(Attempted) rows.
|
|
durationMs: null,
|
|
errorMessage: null,
|
|
errorDetail: null,
|
|
requestSummary: requestSummary,
|
|
responseSummary: null,
|
|
payloadTruncated: false,
|
|
extra: null);
|
|
}
|
|
catch (Exception buildEx)
|
|
{
|
|
// Defensive: building the event itself must never propagate.
|
|
_logger.LogWarning(buildEx,
|
|
"Failed to build Audit Log #23 NotifySend event for NotificationId {NotificationId} list '{List}' — skipping emission",
|
|
notificationId, _listName);
|
|
return;
|
|
}
|
|
|
|
try
|
|
{
|
|
// Fire-and-forget (mirrors ExternalSystemHelper.EmitCallAudit)
|
|
// so the script is never blocked on the audit writer; we observe
|
|
// failures via ContinueWith so a thrown writer is logged rather
|
|
// than going to the unobserved-task firehose.
|
|
var writeTask = _auditWriter.WriteAsync(evt, CancellationToken.None);
|
|
if (!writeTask.IsCompleted)
|
|
{
|
|
writeTask.ContinueWith(
|
|
t => _logger.LogWarning(t.Exception,
|
|
"Audit Log #23 write failed for EventId {EventId} (NotifySend NotificationId {NotificationId})",
|
|
evt.EventId, notificationId),
|
|
CancellationToken.None,
|
|
TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously,
|
|
TaskScheduler.Default);
|
|
}
|
|
else if (writeTask.IsFaulted)
|
|
{
|
|
_logger.LogWarning(writeTask.Exception,
|
|
"Audit Log #23 write failed for EventId {EventId} (NotifySend NotificationId {NotificationId})",
|
|
evt.EventId, notificationId);
|
|
}
|
|
}
|
|
catch (Exception writeEx)
|
|
{
|
|
// Synchronous throw from WriteAsync (e.g. ArgumentNullException
|
|
// before the writer's own try/catch). Swallow + log per alog.md §7.
|
|
_logger.LogWarning(writeEx,
|
|
"Audit Log #23 write threw synchronously for EventId {EventId} (NotifySend NotificationId {NotificationId})",
|
|
evt.EventId, notificationId);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Audit Log #23 (M3): script-side accessor for cached-operation tracking.
|
|
/// <c>Tracking.Status(trackedOperationId)</c> reads the site-local SQLite
|
|
/// row directly via <see cref="IOperationTrackingStore.GetStatusAsync"/> —
|
|
/// the site is the single source of truth for cached-call status, so no
|
|
/// central round-trip is needed and the call is answered authoritatively.
|
|
/// </summary>
|
|
public class TrackingHelper
|
|
{
|
|
private readonly IOperationTrackingStore? _store;
|
|
private readonly ILogger _logger;
|
|
|
|
/// <summary>
|
|
/// Initializes a new operation tracking helper.
|
|
/// </summary>
|
|
/// <param name="store">Optional store for tracking cached operation status.</param>
|
|
/// <param name="logger">Logger for diagnostics and warnings.</param>
|
|
internal TrackingHelper(IOperationTrackingStore? store, ILogger logger)
|
|
{
|
|
_store = store;
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the latest tracking snapshot for the supplied id, or
|
|
/// <c>null</c> when the id is unknown (never recorded, or purged after
|
|
/// the retention window).
|
|
/// </summary>
|
|
/// <param name="trackedOperationId">The tracked operation identifier to query.</param>
|
|
/// <param name="cancellationToken">Cancellation token for the async operation.</param>
|
|
/// <returns>The tracking status snapshot, or null if the operation is not found.</returns>
|
|
/// <exception cref="InvalidOperationException">
|
|
/// Thrown when the script runtime was constructed without an
|
|
/// <see cref="IOperationTrackingStore"/> — mirrors the
|
|
/// "service-not-wired" failure mode of the other integration helpers.
|
|
/// </exception>
|
|
public Task<TrackingStatusSnapshot?> Status(
|
|
TrackedOperationId trackedOperationId,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
if (_store == null)
|
|
{
|
|
throw new InvalidOperationException(
|
|
"Operation tracking store not available");
|
|
}
|
|
|
|
return _store.GetStatusAsync(trackedOperationId, cancellationToken);
|
|
}
|
|
}
|
|
}
|