1eb6e972b0
Bulk CommentChecker pass: fills in <param>/<inheritdoc> tags on public APIs across all 23 src/ projects so the doc-coverage gate is green. Also adds a Sister Projects section to CLAUDE.md pointing at the MxAccess Gateway and OtOpcUa sibling repos, and gitignores local credential captures (*login*.txt) and the wonder-app-vd03 deploy/ artifacts.
139 lines
5.5 KiB
C#
139 lines
5.5 KiB
C#
using Microsoft.Extensions.Hosting;
|
|
using Microsoft.Extensions.Logging;
|
|
using ScadaLink.Commons.Interfaces.Services;
|
|
using ScadaLink.HealthMonitoring;
|
|
|
|
namespace ScadaLink.AuditLog.Site;
|
|
|
|
/// <summary>
|
|
/// Audit Log (#23) M6 Bundle E (T6) — site-side hosted service that
|
|
/// periodically pulls a backlog snapshot from <see cref="ISiteAuditQueue"/>
|
|
/// and pushes it into <see cref="ISiteHealthCollector"/> so the next
|
|
/// <see cref="ISiteHealthCollector.CollectReport"/> emits a fresh
|
|
/// <c>SiteAuditBacklog</c> field on the site health report.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// <b>Why a hosted service, not the report sender.</b> Querying SQLite for the
|
|
/// backlog requires the queue's write lock; doing it inline in
|
|
/// <see cref="ISiteHealthCollector.CollectReport"/> would couple the collector
|
|
/// to <see cref="ISiteAuditQueue"/> and turn an in-memory snapshot read into
|
|
/// a synchronous I/O call on the report path. The hosted-service pattern keeps
|
|
/// the report path pure and the SQL probe off the report timing budget.
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>Cadence.</b> 30 s by default — coarse enough to amortise the SQL probe
|
|
/// across many reports, fine enough that the central dashboard never lags by
|
|
/// more than one health-report interval. Tunable via
|
|
/// <see cref="ScadaLink.AuditLog.Site.SqliteAuditWriterOptions"/> in a follow-up
|
|
/// if ops needs a different cadence; for M6 we hard-code the value because the
|
|
/// brief calls it out explicitly.
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>Failure containment.</b> The probe call is wrapped in a try/catch so a
|
|
/// transient SQLite error never tears down the hosted service — the next tick
|
|
/// retries. Mirrors <see cref="ScadaLink.AuditLog.Central.AuditLogPartitionMaintenanceService"/>'s
|
|
/// "exception logged, not propagated" contract.
|
|
/// </para>
|
|
/// </remarks>
|
|
public sealed class SiteAuditBacklogReporter : IHostedService, IDisposable
|
|
{
|
|
/// <summary>
|
|
/// Default poll cadence. Half a typical 60 s health-report interval keeps
|
|
/// the snapshot fresh without spinning the SQL probe more often than
|
|
/// necessary.
|
|
/// </summary>
|
|
internal static readonly TimeSpan DefaultRefreshInterval = TimeSpan.FromSeconds(30);
|
|
|
|
private readonly ISiteAuditQueue _queue;
|
|
private readonly ISiteHealthCollector _collector;
|
|
private readonly ILogger<SiteAuditBacklogReporter> _logger;
|
|
private readonly TimeSpan _refreshInterval;
|
|
private CancellationTokenSource? _cts;
|
|
private Task? _loop;
|
|
|
|
/// <summary>Initializes a new instance of <see cref="SiteAuditBacklogReporter"/>.</summary>
|
|
/// <param name="queue">The site audit queue used to probe the backlog count.</param>
|
|
/// <param name="collector">The site health collector that receives the backlog snapshot.</param>
|
|
/// <param name="logger">Logger instance.</param>
|
|
/// <param name="refreshInterval">Poll interval override; defaults to <see cref="DefaultRefreshInterval"/> (30 s).</param>
|
|
public SiteAuditBacklogReporter(
|
|
ISiteAuditQueue queue,
|
|
ISiteHealthCollector collector,
|
|
ILogger<SiteAuditBacklogReporter> logger,
|
|
TimeSpan? refreshInterval = null)
|
|
{
|
|
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
|
|
_collector = collector ?? throw new ArgumentNullException(nameof(collector));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
_refreshInterval = refreshInterval ?? DefaultRefreshInterval;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public Task StartAsync(CancellationToken ct)
|
|
{
|
|
// Linked CTS lets StopAsync's cancellation AND the host's shutdown
|
|
// token both terminate the loop; either side firing aborts the
|
|
// pending Task.Delay.
|
|
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
|
_loop = Task.Run(() => RunLoopAsync(_cts.Token));
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
private async Task RunLoopAsync(CancellationToken ct)
|
|
{
|
|
// First tick runs immediately so the very first health report after
|
|
// process start carries a real backlog snapshot — without this the
|
|
// dashboard would show null for the first 30 s after a deploy.
|
|
await SafeProbeAsync(ct).ConfigureAwait(false);
|
|
|
|
while (!ct.IsCancellationRequested)
|
|
{
|
|
try
|
|
{
|
|
await Task.Delay(_refreshInterval, ct).ConfigureAwait(false);
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
break;
|
|
}
|
|
|
|
await SafeProbeAsync(ct).ConfigureAwait(false);
|
|
}
|
|
}
|
|
|
|
private async Task SafeProbeAsync(CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
var snapshot = await _queue.GetBacklogStatsAsync(ct).ConfigureAwait(false);
|
|
_collector.UpdateSiteAuditBacklog(snapshot);
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
// Shutdown — let the outer loop exit cleanly.
|
|
throw;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Catch-all is deliberate: the hosted service must survive every
|
|
// class of probe failure (transient SQLite lock contention, disk
|
|
// I/O hiccup, …) so the next tick gets a chance.
|
|
_logger.LogWarning(ex, "SiteAuditBacklogReporter probe failed; next tick will retry.");
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public Task StopAsync(CancellationToken ct)
|
|
{
|
|
_cts?.Cancel();
|
|
return _loop ?? Task.CompletedTask;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public void Dispose()
|
|
{
|
|
_cts?.Dispose();
|
|
}
|
|
}
|