Files
ScadaBridge/src/ScadaLink.Commons/Interfaces/Repositories/IAuditLogRepository.cs
T
Joseph Doherty 1eb6e972b0 docs: add XML doc comments across src + Sister Projects section in CLAUDE.md
Bulk CommentChecker pass: fills in <param>/<inheritdoc> tags on public
APIs across all 23 src/ projects so the doc-coverage gate is green. Also
adds a Sister Projects section to CLAUDE.md pointing at the MxAccess
Gateway and OtOpcUa sibling repos, and gitignores local credential
captures (*login*.txt) and the wonder-app-vd03 deploy/ artifacts.
2026-05-28 01:55:24 -04:00

199 lines
10 KiB
C#

using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Audit;
namespace ScadaLink.Commons.Interfaces.Repositories;
/// <summary>
/// Append-only data access for the central <c>AuditLog</c> table (Audit Log #23).
/// </summary>
/// <remarks>
/// <para>
/// The append-only invariant is enforced both at the SQL level (the
/// <c>scadalink_audit_writer</c> role has only INSERT + SELECT — UPDATE and DELETE
/// are not granted) and at the API level: this interface deliberately exposes no
/// Update and no single-row Delete. Bulk purge is performed exclusively via
/// monthly partition switch-out (<see cref="SwitchOutPartitionAsync"/>).
/// </para>
/// <para>
/// Ingest is idempotent on <c>EventId</c>: <see cref="InsertIfNotExistsAsync"/> is
/// first-write-wins, so retrying telemetry and reconciliation pulls can both feed
/// the same writer without producing duplicates.
/// </para>
/// </remarks>
public interface IAuditLogRepository
{
/// <summary>
/// Inserts <paramref name="evt"/> if no row with the same
/// <see cref="AuditEvent.EventId"/> exists; otherwise silently leaves the
/// stored row untouched (first-write-wins). Bypasses the EF change tracker
/// so the row never enters a tracked state.
/// </summary>
/// <param name="evt">The audit event to insert.</param>
/// <param name="ct">Cancellation token.</param>
Task InsertIfNotExistsAsync(AuditEvent evt, CancellationToken ct = default);
/// <summary>
/// Returns up to <see cref="AuditLogPaging.PageSize"/> rows matching
/// <paramref name="filter"/>, ordered by <c>(OccurredAtUtc DESC, EventId DESC)</c>.
/// Use keyset paging by passing the last returned row's
/// <c>OccurredAtUtc</c> + <c>EventId</c> back via
/// <see cref="AuditLogPaging.AfterOccurredAtUtc"/> +
/// <see cref="AuditLogPaging.AfterEventId"/> to fetch the next page.
/// </summary>
/// <param name="filter">Filter criteria to apply to the query.</param>
/// <param name="paging">Paging cursor and page size.</param>
/// <param name="ct">Cancellation token.</param>
Task<IReadOnlyList<AuditEvent>> QueryAsync(
AuditLogQueryFilter filter,
AuditLogPaging paging,
CancellationToken ct = default);
/// <summary>
/// Switches out (purges) the monthly partition whose lower bound is
/// <paramref name="monthBoundary"/> and returns the approximate number
/// of rows discarded — sampled inside the transaction BEFORE the switch
/// so the row count reflects what the switch removed, not a post-purge
/// scan of a table that no longer exists.
/// </summary>
/// <remarks>
/// <para>
/// <b>Drop-and-rebuild dance.</b> <c>UX_AuditLog_EventId</c> is intentionally
/// non-partition-aligned (it lives on <c>[PRIMARY]</c> so single-column
/// EventId uniqueness — required by <see cref="InsertIfNotExistsAsync"/> —
/// can be enforced cheaply). SQL Server rejects
/// <c>ALTER TABLE … SWITCH PARTITION</c> while a non-aligned unique index
/// is present, so the M6 implementation drops the index, creates a staging
/// table with byte-identical schema, switches the partition's data into
/// staging, drops staging (discarding the rows), and rebuilds the unique
/// index. The CATCH branch guarantees the index is rebuilt even on partial
/// failure so the table never returns to live traffic without its
/// idempotency-supporting index.
/// </para>
/// <para>
/// <b>Outage window.</b> The dance briefly removes the unique index, so
/// concurrent <see cref="InsertIfNotExistsAsync"/> calls during the switch
/// could in principle race past the IF NOT EXISTS check without the index
/// catching the duplicate. This is acceptable for the daily purge cadence
/// — the inserts that the IF NOT EXISTS check guards are themselves rare
/// enough that a sub-second collision window is operationally negligible,
/// and the composite PK still rejects same-(EventId, OccurredAtUtc) rows.
/// </para>
/// </remarks>
/// <param name="monthBoundary">Lower-bound datetime of the monthly partition to switch out.</param>
/// <param name="ct">Cancellation token.</param>
Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default);
/// <summary>
/// Returns the set of <c>pf_AuditLog_Month</c> partition lower-bound
/// boundaries whose partitions contain only rows with
/// <see cref="AuditEvent.OccurredAtUtc"/> strictly older than
/// <paramref name="threshold"/>. Boundaries whose partition is empty are
/// excluded (a no-op switch is wasted work). Used by the M6 purge actor
/// to enumerate retention-eligible months on every tick.
/// </summary>
/// <param name="threshold">Only partitions whose data is entirely older than this UTC datetime are returned.</param>
/// <param name="ct">Cancellation token.</param>
Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
DateTime threshold,
CancellationToken ct = default);
/// <summary>
/// Audit Log (#23) M7 Bundle E (T13) — returns aggregate counts over the
/// trailing <paramref name="window"/> driving the central Health
/// dashboard's Audit KPI tiles.
/// </summary>
/// <param name="window">
/// Trailing time window (e.g. <c>TimeSpan.FromHours(1)</c>). Rows whose
/// <c>OccurredAtUtc &gt;= nowUtc - window</c> are counted; the upper
/// bound is <paramref name="nowUtc"/>.
/// </param>
/// <param name="nowUtc">
/// Optional explicit "now" timestamp used to anchor the trailing window.
/// Defaults to <see cref="DateTime.UtcNow"/> at call time when null —
/// production callers should leave this null; tests pin a deterministic
/// value so the window is reproducible across runs.
/// </param>
/// <param name="ct">Cancellation token.</param>
/// <returns>
/// A snapshot with <c>TotalEventsLastHour</c> + <c>ErrorEventsLastHour</c>
/// populated; <c>BacklogTotal</c> is left at zero (this method has no
/// visibility into per-site backlogs — the service layer composes it in
/// from <see cref="ScadaLink.HealthMonitoring.ICentralHealthAggregator"/>).
/// <c>AsOfUtc</c> is set to the server-side <c>UtcNow</c> at the time of
/// the query.
/// </returns>
/// <remarks>
/// <para>
/// Implemented as a single aggregate query
/// (<c>SELECT COUNT_BIG(*) AS Total, SUM(CASE …) AS Errors</c>) rather than
/// two round trips so the volume + error rate tiles read a consistent
/// snapshot — the denominator and numerator come from the same scan.
/// </para>
/// <para>
/// Errors are defined as <see cref="ScadaLink.Commons.Types.Enums.AuditStatus.Failed"/>,
/// <see cref="ScadaLink.Commons.Types.Enums.AuditStatus.Parked"/>, or
/// <see cref="ScadaLink.Commons.Types.Enums.AuditStatus.Discarded"/>
/// — every non-success terminal lifecycle state. <c>Submitted</c>,
/// <c>Forwarded</c>, <c>Attempted</c> are in-flight and are NOT errors;
/// <c>Delivered</c> is success; <c>Skipped</c> is an intentional no-op.
/// </para>
/// </remarks>
Task<AuditLogKpiSnapshot> GetKpiSnapshotAsync(
TimeSpan window,
DateTime? nowUtc = null,
CancellationToken ct = default);
/// <summary>
/// Audit Log ParentExecutionId feature (Task 8) — given any
/// <paramref name="executionId"/> in an execution chain, returns the whole
/// chain rooted at the topmost ancestor: one <see cref="ExecutionTreeNode"/>
/// per distinct execution, summarising its <c>AuditLog</c> rows. The Central
/// UI renders the result as a tree.
/// </summary>
/// <remarks>
/// <para>
/// The input id may be any node in the chain — a leaf, the root, or a middle
/// node. The implementation first walks <em>up</em> via
/// <c>ParentExecutionId</c> to find the root, then walks <em>down</em> from
/// the root via a recursive CTE, so the full chain is returned regardless of
/// entry point.
/// </para>
/// <para>
/// The <c>ParentExecutionId</c> graph is a tree (acyclic by construction —
/// each execution is minted fresh and its parent always pre-exists). Both
/// the upward walk and the downward CTE are nonetheless bounded at 32 levels
/// as a guard against corrupt/pathological data: a depth that exceeds the
/// guard raises an error rather than hanging the server. Chains are shallow
/// (1-2 levels typical) so the guard is never reached in practice.
/// </para>
/// <para>
/// A "stub" node — an execution that emitted no rows of its own yet is
/// referenced by a child via <c>ParentExecutionId</c>, or whose rows have
/// been purged — still appears, with <see cref="ExecutionTreeNode.RowCount"/>
/// = 0. A purged/missing parent simply ends the upward walk.
/// </para>
/// <para>
/// When no <c>AuditLog</c> row carries <paramref name="executionId"/> in
/// either <c>ExecutionId</c> or <c>ParentExecutionId</c>, the result is a
/// single stub node for <paramref name="executionId"/> itself
/// (<see cref="ExecutionTreeNode.RowCount"/> = 0) — consistent with the
/// stub-node treatment of any other row-less execution.
/// </para>
/// </remarks>
/// <param name="executionId">Any execution id in the chain; the implementation walks to the root and back down.</param>
/// <param name="ct">Cancellation token.</param>
Task<IReadOnlyList<ExecutionTreeNode>> GetExecutionTreeAsync(
Guid executionId,
CancellationToken ct = default);
/// <summary>
/// Returns the distinct, non-null <c>SourceNode</c> values present in the
/// <c>AuditLog</c> table, in ascending order. Backs the Audit Log page's
/// "Node" multi-select filter dropdown — the Central UI caches the result
/// for ~60s so the repository is hit at most once per minute per circuit.
/// </summary>
/// <param name="ct">Cancellation token.</param>
Task<IReadOnlyList<string>> GetDistinctSourceNodesAsync(CancellationToken ct = default);
}