feat(siteruntime): OperationTrackingStore site-local SQLite (#23 M3)

This commit is contained in:
Joseph Doherty
2026-05-20 13:51:09 -04:00
parent 1c38dd540f
commit b86d7c61ab
5 changed files with 767 additions and 0 deletions

View File

@@ -0,0 +1,87 @@
using ScadaLink.Commons.Types;
namespace ScadaLink.Commons.Interfaces;
/// <summary>
/// Site-local source of truth for cached-operation tracking
/// (<c>ExternalSystem.CachedCall</c> / <c>Database.CachedWrite</c>) — alongside the
/// Store-and-Forward buffer, this is the row that <c>Tracking.Status(id)</c>
/// reads (Audit Log #23 / M3). One row per <see cref="TrackedOperationId"/>;
/// terminal rows are purged after a configurable retention window
/// (default 7 days).
/// </summary>
/// <remarks>
/// <para>
/// The store is intentionally a thin write-API on top of SQLite — not a
/// dispatcher. Status transitions follow
/// <c>Submitted → Retrying → Delivered / Parked / Failed / Discarded</c>; rows
/// in a terminal state never roll back. Implementations must:
/// <list type="bullet">
/// <item><description><see cref="RecordEnqueueAsync"/> is insert-if-not-exists
/// (caller-supplied id is the idempotency key — duplicate enqueues are no-ops).</description></item>
/// <item><description><see cref="RecordAttemptAsync"/> only updates non-terminal rows.</description></item>
/// <item><description><see cref="RecordTerminalAsync"/> only flips a non-terminal row to terminal.</description></item>
/// <item><description><see cref="PurgeTerminalAsync"/> deletes terminal rows whose
/// <c>TerminalAtUtc</c> is strictly older than the supplied threshold.</description></item>
/// </list>
/// </para>
/// </remarks>
public interface IOperationTrackingStore
{
/// <summary>
/// Insert a new tracking row in <c>Submitted</c> state with <c>RetryCount = 0</c>.
/// Idempotent — a duplicate id is silently ignored (the existing row is left
/// untouched), matching the at-least-once semantics of the calling site
/// store-and-forward path.
/// </summary>
Task RecordEnqueueAsync(
TrackedOperationId id,
string kind,
string? targetSummary,
string? sourceInstanceId,
string? sourceScript,
CancellationToken ct = default);
/// <summary>
/// Advance an in-flight tracking row's status, retry counter, and most-
/// recent error/HTTP-status. Terminal rows (<see cref="RecordTerminalAsync"/>
/// already applied) are NOT mutated — the operation has reached its final
/// outcome and any late-arriving attempt telemetry is dropped on the floor.
/// </summary>
Task RecordAttemptAsync(
TrackedOperationId id,
string status,
int retryCount,
string? lastError,
int? httpStatus,
CancellationToken ct = default);
/// <summary>
/// Flip a non-terminal tracking row to terminal — sets
/// <c>TerminalAtUtc = now</c> and writes the final status / error. A row
/// already in terminal state is left untouched (first-write-wins).
/// </summary>
Task RecordTerminalAsync(
TrackedOperationId id,
string status,
string? lastError,
int? httpStatus,
CancellationToken ct = default);
/// <summary>
/// Return the latest snapshot for the supplied id, or <c>null</c> when no
/// tracking row exists (purged or never recorded).
/// </summary>
Task<TrackingStatusSnapshot?> GetStatusAsync(
TrackedOperationId id,
CancellationToken ct = default);
/// <summary>
/// Delete terminal rows whose <c>TerminalAtUtc</c> is strictly older than
/// <paramref name="olderThanUtc"/>. Non-terminal rows are kept regardless
/// of age (the operation is still in flight).
/// </summary>
Task PurgeTerminalAsync(
DateTime olderThanUtc,
CancellationToken ct = default);
}

View File

@@ -0,0 +1,40 @@
namespace ScadaLink.Commons.Types;
/// <summary>
/// Site-local snapshot of a cached operation's tracking state, returned by the
/// <c>Tracking.Status(TrackedOperationId)</c> script API (Audit Log #23 / M3).
/// </summary>
/// <param name="Id">Tracking handle returned by <c>CachedCall</c>/<c>CachedWrite</c>.</param>
/// <param name="Kind">
/// Operation category — <c>"ApiCallCached"</c> or <c>"DbWriteCached"</c> — mirroring
/// the <see cref="ScadaLink.Commons.Types.Enums.AuditKind"/> per-attempt vocabulary.
/// </param>
/// <param name="TargetSummary">
/// Human-readable target (e.g. <c>"ERP.GetOrder"</c> or <c>"WarehouseDb"</c>); may be
/// null for early-lifecycle rows recorded before the target was resolved.
/// </param>
/// <param name="Status">
/// Lifecycle status — one of <c>Submitted</c>, <c>Forwarded</c>, <c>Retrying</c>,
/// <c>Attempted</c>, <c>Delivered</c>, <c>Failed</c>, <c>Parked</c>, <c>Discarded</c>.
/// </param>
/// <param name="RetryCount">Number of attempts made; 0 prior to first dispatch.</param>
/// <param name="LastError">Most recent error message; null while non-terminal-and-no-failures.</param>
/// <param name="HttpStatus">Most recent HTTP status code where applicable; null otherwise.</param>
/// <param name="CreatedAtUtc">UTC timestamp the tracking row was first recorded.</param>
/// <param name="UpdatedAtUtc">UTC timestamp of the latest status mutation.</param>
/// <param name="TerminalAtUtc">UTC timestamp the row reached a terminal status; null while still active.</param>
/// <param name="SourceInstanceId">Instance id that issued the cached call, when known.</param>
/// <param name="SourceScript">Script that issued the cached call, when known.</param>
public sealed record TrackingStatusSnapshot(
TrackedOperationId Id,
string Kind,
string? TargetSummary,
string Status,
int RetryCount,
string? LastError,
int? HttpStatus,
DateTime CreatedAtUtc,
DateTime UpdatedAtUtc,
DateTime? TerminalAtUtc,
string? SourceInstanceId,
string? SourceScript);

View File

@@ -0,0 +1,21 @@
namespace ScadaLink.SiteRuntime.Tracking;
/// <summary>
/// Options for <see cref="OperationTrackingStore"/> — site-local cached-call
/// tracking SQLite store (Audit Log #23 / M3).
/// </summary>
public class OperationTrackingOptions
{
/// <summary>
/// Full ADO.NET connection string for the SQLite database (e.g.
/// <c>"Data Source=site-tracking.db"</c>). Tests use the
/// <c>Mode=Memory;Cache=Shared</c> form to keep the database in-memory.
/// </summary>
public string ConnectionString { get; set; } = "Data Source=site-tracking.db";
/// <summary>
/// Retention window for terminal tracking rows. The default purge cadence
/// (driven by the host) deletes terminal rows older than this many days.
/// </summary>
public int RetentionDays { get; set; } = 7;
}

View File

@@ -0,0 +1,333 @@
using System.Globalization;
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Interfaces;
using ScadaLink.Commons.Types;
namespace ScadaLink.SiteRuntime.Tracking;
/// <summary>
/// Site-local SQLite source-of-truth for cached-operation tracking — the row
/// that <c>Tracking.Status(TrackedOperationId)</c> reads (Audit Log #23 / M3).
/// </summary>
/// <remarks>
/// <para>
/// One row per <see cref="TrackedOperationId"/>; lifecycle is
/// <c>Submitted → Retrying → Delivered / Parked / Failed / Discarded</c>; terminal
/// rows are purged after the configured retention window
/// (<see cref="OperationTrackingOptions.RetentionDays"/>). Volume is bounded —
/// only cached calls produce rows, and only a handful of lifecycle events per
/// call — so we keep the implementation deliberately simple: a single owned
/// <see cref="SqliteConnection"/> serialised behind a <see cref="SemaphoreSlim"/>
/// (one async writer at a time). This is the pattern the M3 brief calls out as
/// "cleaner than the M2 Channel&lt;T&gt; pipeline given the volume"; the M2
/// audit-writer's batched-channel design is reserved for the high-volume audit
/// hot-path.
/// </para>
/// <para>
/// All mutations are idempotent / monotonic: <see cref="RecordEnqueueAsync"/> is
/// <c>INSERT OR IGNORE</c>, <see cref="RecordAttemptAsync"/> filters out terminal
/// rows in the <c>WHERE</c> clause, and <see cref="RecordTerminalAsync"/> only
/// fires on rows that haven't terminated yet (first-write-wins). This makes the
/// store safe under the at-least-once semantics of the site→central telemetry
/// path.
/// </para>
/// </remarks>
public class OperationTrackingStore : IOperationTrackingStore, IAsyncDisposable, IDisposable
{
private readonly SqliteConnection _connection;
private readonly SemaphoreSlim _gate = new(1, 1);
private readonly ILogger<OperationTrackingStore> _logger;
private bool _disposed;
public OperationTrackingStore(
IOptions<OperationTrackingOptions> options,
ILogger<OperationTrackingStore> logger)
{
ArgumentNullException.ThrowIfNull(options);
ArgumentNullException.ThrowIfNull(logger);
_logger = logger;
_connection = new SqliteConnection(options.Value.ConnectionString);
_connection.Open();
InitializeSchema();
}
private void InitializeSchema()
{
using var cmd = _connection.CreateCommand();
cmd.CommandText = """
CREATE TABLE IF NOT EXISTS OperationTracking (
TrackedOperationId TEXT NOT NULL PRIMARY KEY,
Kind TEXT NOT NULL,
TargetSummary TEXT NULL,
Status TEXT NOT NULL,
RetryCount INTEGER NOT NULL DEFAULT 0,
LastError TEXT NULL,
HttpStatus INTEGER NULL,
CreatedAtUtc TEXT NOT NULL,
UpdatedAtUtc TEXT NOT NULL,
TerminalAtUtc TEXT NULL,
SourceInstanceId TEXT NULL,
SourceScript TEXT NULL
);
CREATE INDEX IF NOT EXISTS IX_OperationTracking_Status_Updated
ON OperationTracking (Status, UpdatedAtUtc);
""";
cmd.ExecuteNonQuery();
}
/// <inheritdoc/>
public async Task RecordEnqueueAsync(
TrackedOperationId id,
string kind,
string? targetSummary,
string? sourceInstanceId,
string? sourceScript,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(kind);
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
ObjectDisposedException.ThrowIf(_disposed, this);
var now = DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture);
using var cmd = _connection.CreateCommand();
// INSERT OR IGNORE: duplicate ids are no-ops (first-write-wins) —
// matches the at-least-once semantics the site emits under.
cmd.CommandText = """
INSERT OR IGNORE INTO OperationTracking (
TrackedOperationId, Kind, TargetSummary, Status,
RetryCount, LastError, HttpStatus,
CreatedAtUtc, UpdatedAtUtc, TerminalAtUtc,
SourceInstanceId, SourceScript
) VALUES (
$id, $kind, $targetSummary, $status,
0, NULL, NULL,
$now, $now, NULL,
$sourceInstanceId, $sourceScript
);
""";
cmd.Parameters.AddWithValue("$id", id.ToString());
cmd.Parameters.AddWithValue("$kind", kind);
cmd.Parameters.AddWithValue("$targetSummary", (object?)targetSummary ?? DBNull.Value);
cmd.Parameters.AddWithValue("$status", "Submitted");
cmd.Parameters.AddWithValue("$now", now);
cmd.Parameters.AddWithValue("$sourceInstanceId", (object?)sourceInstanceId ?? DBNull.Value);
cmd.Parameters.AddWithValue("$sourceScript", (object?)sourceScript ?? DBNull.Value);
cmd.ExecuteNonQuery();
}
finally
{
_gate.Release();
}
}
/// <inheritdoc/>
public async Task RecordAttemptAsync(
TrackedOperationId id,
string status,
int retryCount,
string? lastError,
int? httpStatus,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(status);
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
ObjectDisposedException.ThrowIf(_disposed, this);
var now = DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture);
using var cmd = _connection.CreateCommand();
// Terminal rows are immutable — the WHERE clause filters them out so
// late-arriving attempt telemetry never overwrites a resolved row.
cmd.CommandText = """
UPDATE OperationTracking
SET Status = $status,
RetryCount = $retryCount,
LastError = $lastError,
HttpStatus = $httpStatus,
UpdatedAtUtc = $now
WHERE TrackedOperationId = $id
AND TerminalAtUtc IS NULL;
""";
cmd.Parameters.AddWithValue("$id", id.ToString());
cmd.Parameters.AddWithValue("$status", status);
cmd.Parameters.AddWithValue("$retryCount", retryCount);
cmd.Parameters.AddWithValue("$lastError", (object?)lastError ?? DBNull.Value);
cmd.Parameters.AddWithValue("$httpStatus", (object?)httpStatus ?? DBNull.Value);
cmd.Parameters.AddWithValue("$now", now);
cmd.ExecuteNonQuery();
}
finally
{
_gate.Release();
}
}
/// <inheritdoc/>
public async Task RecordTerminalAsync(
TrackedOperationId id,
string status,
string? lastError,
int? httpStatus,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(status);
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
ObjectDisposedException.ThrowIf(_disposed, this);
var now = DateTime.UtcNow.ToString("o", CultureInfo.InvariantCulture);
using var cmd = _connection.CreateCommand();
// First-write-wins on the terminal flip: only update rows that
// haven't already terminated.
cmd.CommandText = """
UPDATE OperationTracking
SET Status = $status,
LastError = $lastError,
HttpStatus = $httpStatus,
UpdatedAtUtc = $now,
TerminalAtUtc = $now
WHERE TrackedOperationId = $id
AND TerminalAtUtc IS NULL;
""";
cmd.Parameters.AddWithValue("$id", id.ToString());
cmd.Parameters.AddWithValue("$status", status);
cmd.Parameters.AddWithValue("$lastError", (object?)lastError ?? DBNull.Value);
cmd.Parameters.AddWithValue("$httpStatus", (object?)httpStatus ?? DBNull.Value);
cmd.Parameters.AddWithValue("$now", now);
cmd.ExecuteNonQuery();
}
finally
{
_gate.Release();
}
}
/// <inheritdoc/>
public async Task<TrackingStatusSnapshot?> GetStatusAsync(
TrackedOperationId id,
CancellationToken ct = default)
{
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _connection.CreateCommand();
cmd.CommandText = """
SELECT TrackedOperationId, Kind, TargetSummary, Status,
RetryCount, LastError, HttpStatus,
CreatedAtUtc, UpdatedAtUtc, TerminalAtUtc,
SourceInstanceId, SourceScript
FROM OperationTracking
WHERE TrackedOperationId = $id;
""";
cmd.Parameters.AddWithValue("$id", id.ToString());
using var reader = cmd.ExecuteReader();
if (!reader.Read())
{
return null;
}
return new TrackingStatusSnapshot(
Id: TrackedOperationId.Parse(reader.GetString(0)),
Kind: reader.GetString(1),
TargetSummary: reader.IsDBNull(2) ? null : reader.GetString(2),
Status: reader.GetString(3),
RetryCount: reader.GetInt32(4),
LastError: reader.IsDBNull(5) ? null : reader.GetString(5),
HttpStatus: reader.IsDBNull(6) ? null : reader.GetInt32(6),
CreatedAtUtc: ParseUtc(reader.GetString(7)),
UpdatedAtUtc: ParseUtc(reader.GetString(8)),
TerminalAtUtc: reader.IsDBNull(9) ? null : ParseUtc(reader.GetString(9)),
SourceInstanceId: reader.IsDBNull(10) ? null : reader.GetString(10),
SourceScript: reader.IsDBNull(11) ? null : reader.GetString(11));
}
finally
{
_gate.Release();
}
}
/// <inheritdoc/>
public async Task PurgeTerminalAsync(
DateTime olderThanUtc,
CancellationToken ct = default)
{
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _connection.CreateCommand();
// Non-terminal rows (TerminalAtUtc IS NULL) are kept regardless of
// age — the operation is still in flight.
cmd.CommandText = """
DELETE FROM OperationTracking
WHERE TerminalAtUtc IS NOT NULL
AND TerminalAtUtc < $threshold;
""";
cmd.Parameters.AddWithValue(
"$threshold",
olderThanUtc.ToString("o", CultureInfo.InvariantCulture));
cmd.ExecuteNonQuery();
}
finally
{
_gate.Release();
}
}
private static DateTime ParseUtc(string raw)
{
return DateTime.Parse(
raw,
CultureInfo.InvariantCulture,
DateTimeStyles.RoundtripKind);
}
public void Dispose()
{
DisposeAsyncCore().AsTask().GetAwaiter().GetResult();
GC.SuppressFinalize(this);
}
public async ValueTask DisposeAsync()
{
await DisposeAsyncCore().ConfigureAwait(false);
GC.SuppressFinalize(this);
}
private async ValueTask DisposeAsyncCore()
{
await _gate.WaitAsync().ConfigureAwait(false);
try
{
if (_disposed) return;
_disposed = true;
_connection.Dispose();
}
finally
{
_gate.Release();
_gate.Dispose();
}
}
}