Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging

Communication Layer (WP-1–5):
- 8 message patterns with correlation IDs, per-pattern timeouts
- Central/Site communication actors, transport heartbeat config
- Connection failure handling (no central buffering, debug streams killed)

Data Connection Layer (WP-6–14, WP-34):
- Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting)
- OPC UA + LmxProxy adapters behind IDataConnection
- Auto-reconnect, bad quality propagation, transparent re-subscribe
- Write-back, tag path resolution with retry, health reporting
- Protocol extensibility via DataConnectionFactory

Site Runtime (WP-15–25, WP-32–33):
- ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher)
- AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state)
- SharedScriptLibrary (inline execution), ScriptRuntimeContext (API)
- ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout)
- Recursion limit (default 10), call direction enforcement
- SiteStreamManager (per-subscriber bounded buffers, fire-and-forget)
- Debug view backend (snapshot + stream), concurrency serialization
- Local artifact storage (4 SQLite tables)

Health Monitoring (WP-26–28):
- SiteHealthCollector (thread-safe counters, connection state)
- HealthReportSender (30s interval, monotonic sequence numbers)
- CentralHealthAggregator (offline detection 60s, online recovery)

Site Event Logging (WP-29–31):
- SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC)
- EventLogPurgeService (30-day retention, 1GB cap)
- EventLogQueryService (filters, keyword search, keyset pagination)

541 tests pass, zero warnings.
This commit is contained in:
Joseph Doherty
2026-03-16 20:57:25 -04:00
parent a3bf0c43f3
commit 389f5a0378
97 changed files with 8308 additions and 127 deletions

View File

@@ -0,0 +1,120 @@
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ScadaLink.SiteEventLogging;
/// <summary>
/// Background service that periodically purges old events from the SQLite event log.
/// Enforces both time-based retention (default 30 days) and storage cap (default 1GB).
/// Runs on a background thread and does not block event recording.
/// </summary>
public class EventLogPurgeService : BackgroundService
{
private readonly SiteEventLogger _eventLogger;
private readonly SiteEventLogOptions _options;
private readonly ILogger<EventLogPurgeService> _logger;
public EventLogPurgeService(
ISiteEventLogger eventLogger,
IOptions<SiteEventLogOptions> options,
ILogger<EventLogPurgeService> logger)
{
// We need the concrete type to access the connection
_eventLogger = (SiteEventLogger)eventLogger;
_options = options.Value;
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation(
"Event log purge service started — retention: {Days} days, cap: {Cap} MB, interval: {Interval}",
_options.RetentionDays, _options.MaxStorageMb, _options.PurgeInterval);
using var timer = new PeriodicTimer(_options.PurgeInterval);
// Run an initial purge on startup
RunPurge();
while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
{
RunPurge();
}
}
internal void RunPurge()
{
try
{
PurgeByRetention();
PurgeByStorageCap();
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during event log purge");
}
}
private void PurgeByRetention()
{
var cutoff = DateTimeOffset.UtcNow.AddDays(-_options.RetentionDays).ToString("o");
using var cmd = _eventLogger.Connection.CreateCommand();
cmd.CommandText = "DELETE FROM site_events WHERE timestamp < $cutoff";
cmd.Parameters.AddWithValue("$cutoff", cutoff);
var deleted = cmd.ExecuteNonQuery();
if (deleted > 0)
{
_logger.LogInformation("Purged {Count} events older than {Days} days", deleted, _options.RetentionDays);
}
}
private void PurgeByStorageCap()
{
var currentSizeBytes = GetDatabaseSizeBytes();
var capBytes = (long)_options.MaxStorageMb * 1024 * 1024;
if (currentSizeBytes <= capBytes)
return;
_logger.LogWarning(
"Event log size {Size:F1} MB exceeds cap {Cap} MB — purging oldest events",
currentSizeBytes / (1024.0 * 1024.0), _options.MaxStorageMb);
// Delete oldest events in batches until under the cap
while (currentSizeBytes > capBytes)
{
using var cmd = _eventLogger.Connection.CreateCommand();
cmd.CommandText = """
DELETE FROM site_events WHERE id IN (
SELECT id FROM site_events ORDER BY id ASC LIMIT 1000
)
""";
var deleted = cmd.ExecuteNonQuery();
if (deleted == 0) break;
// Reclaim space
using var vacuumCmd = _eventLogger.Connection.CreateCommand();
vacuumCmd.CommandText = "PRAGMA incremental_vacuum";
vacuumCmd.ExecuteNonQuery();
currentSizeBytes = GetDatabaseSizeBytes();
}
}
internal long GetDatabaseSizeBytes()
{
using var pageCountCmd = _eventLogger.Connection.CreateCommand();
pageCountCmd.CommandText = "PRAGMA page_count";
var pageCount = (long)pageCountCmd.ExecuteScalar()!;
using var pageSizeCmd = _eventLogger.Connection.CreateCommand();
pageSizeCmd.CommandText = "PRAGMA page_size";
var pageSize = (long)pageSizeCmd.ExecuteScalar()!;
return pageCount * pageSize;
}
}

View File

@@ -0,0 +1,146 @@
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Messages.RemoteQuery;
namespace ScadaLink.SiteEventLogging;
/// <summary>
/// Processes event log queries locally against SQLite.
/// Supports filtering by event_type, time range, instance_id, severity,
/// and keyword search (LIKE on message and source).
/// Uses keyset pagination with continuation token (last event ID).
/// </summary>
public class EventLogQueryService : IEventLogQueryService
{
private readonly SiteEventLogger _eventLogger;
private readonly SiteEventLogOptions _options;
private readonly ILogger<EventLogQueryService> _logger;
public EventLogQueryService(
ISiteEventLogger eventLogger,
IOptions<SiteEventLogOptions> options,
ILogger<EventLogQueryService> logger)
{
_eventLogger = (SiteEventLogger)eventLogger;
_options = options.Value;
_logger = logger;
}
public EventLogQueryResponse ExecuteQuery(EventLogQueryRequest request)
{
try
{
var pageSize = request.PageSize > 0 ? request.PageSize : _options.QueryPageSize;
using var cmd = _eventLogger.Connection.CreateCommand();
var whereClauses = new List<string>();
var parameters = new List<SqliteParameter>();
// Keyset pagination: only return events with id > continuation token
if (request.ContinuationToken.HasValue)
{
whereClauses.Add("id > $afterId");
parameters.Add(new SqliteParameter("$afterId", request.ContinuationToken.Value));
}
if (request.From.HasValue)
{
whereClauses.Add("timestamp >= $from");
parameters.Add(new SqliteParameter("$from", request.From.Value.ToString("o")));
}
if (request.To.HasValue)
{
whereClauses.Add("timestamp <= $to");
parameters.Add(new SqliteParameter("$to", request.To.Value.ToString("o")));
}
if (!string.IsNullOrWhiteSpace(request.EventType))
{
whereClauses.Add("event_type = $eventType");
parameters.Add(new SqliteParameter("$eventType", request.EventType));
}
if (!string.IsNullOrWhiteSpace(request.Severity))
{
whereClauses.Add("severity = $severity");
parameters.Add(new SqliteParameter("$severity", request.Severity));
}
if (!string.IsNullOrWhiteSpace(request.InstanceId))
{
whereClauses.Add("instance_id = $instanceId");
parameters.Add(new SqliteParameter("$instanceId", request.InstanceId));
}
if (!string.IsNullOrWhiteSpace(request.KeywordFilter))
{
whereClauses.Add("(message LIKE $keyword OR source LIKE $keyword)");
parameters.Add(new SqliteParameter("$keyword", $"%{request.KeywordFilter}%"));
}
var whereClause = whereClauses.Count > 0
? "WHERE " + string.Join(" AND ", whereClauses)
: "";
// Fetch pageSize + 1 to determine if there are more results
cmd.CommandText = $"""
SELECT id, timestamp, event_type, severity, instance_id, source, message, details
FROM site_events
{whereClause}
ORDER BY id ASC
LIMIT $limit
""";
cmd.Parameters.AddWithValue("$limit", pageSize + 1);
foreach (var p in parameters)
cmd.Parameters.Add(p);
var entries = new List<EventLogEntry>();
using var reader = cmd.ExecuteReader();
while (reader.Read())
{
entries.Add(new EventLogEntry(
Id: reader.GetInt64(0),
Timestamp: DateTimeOffset.Parse(reader.GetString(1)),
EventType: reader.GetString(2),
Severity: reader.GetString(3),
InstanceId: reader.IsDBNull(4) ? null : reader.GetString(4),
Source: reader.GetString(5),
Message: reader.GetString(6),
Details: reader.IsDBNull(7) ? null : reader.GetString(7)));
}
var hasMore = entries.Count > pageSize;
if (hasMore)
{
entries.RemoveAt(entries.Count - 1);
}
var continuationToken = entries.Count > 0 ? entries[^1].Id : (long?)null;
return new EventLogQueryResponse(
CorrelationId: request.CorrelationId,
SiteId: request.SiteId,
Entries: entries,
ContinuationToken: continuationToken,
HasMore: hasMore,
Success: true,
ErrorMessage: null,
Timestamp: DateTimeOffset.UtcNow);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to execute event log query: {CorrelationId}", request.CorrelationId);
return new EventLogQueryResponse(
CorrelationId: request.CorrelationId,
SiteId: request.SiteId,
Entries: [],
ContinuationToken: null,
HasMore: false,
Success: false,
ErrorMessage: ex.Message,
Timestamp: DateTimeOffset.UtcNow);
}
}
}

View File

@@ -0,0 +1,12 @@
using ScadaLink.Commons.Messages.RemoteQuery;
namespace ScadaLink.SiteEventLogging;
/// <summary>
/// Interface for querying site event logs.
/// Used by Communication Layer to process remote queries from central.
/// </summary>
public interface IEventLogQueryService
{
EventLogQueryResponse ExecuteQuery(EventLogQueryRequest request);
}

View File

@@ -0,0 +1,24 @@
namespace ScadaLink.SiteEventLogging;
/// <summary>
/// Interface for recording operational events to the local SQLite event log.
/// </summary>
public interface ISiteEventLogger
{
/// <summary>
/// Record an event asynchronously.
/// </summary>
/// <param name="eventType">Category: script, alarm, deployment, connection, store_and_forward, instance_lifecycle</param>
/// <param name="severity">Info, Warning, or Error</param>
/// <param name="instanceId">Optional instance ID associated with the event</param>
/// <param name="source">Source identifier, e.g., "ScriptActor:MonitorSpeed"</param>
/// <param name="message">Human-readable event description</param>
/// <param name="details">Optional JSON details (stack traces, compilation errors, etc.)</param>
Task LogEventAsync(
string eventType,
string severity,
string? instanceId,
string source,
string message,
string? details = null);
}

View File

@@ -8,7 +8,10 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Data.Sqlite" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.5" />
<PackageReference Include="Microsoft.Extensions.Options" Version="10.0.5" />
</ItemGroup>
@@ -16,4 +19,8 @@
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
</ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="ScadaLink.SiteEventLogging.Tests" />
</ItemGroup>
</Project>

View File

@@ -4,15 +4,20 @@ namespace ScadaLink.SiteEventLogging;
public static class ServiceCollectionExtensions
{
/// <summary>
/// Register site event logging services (recording, purge, query).
/// </summary>
public static IServiceCollection AddSiteEventLogging(this IServiceCollection services)
{
// Phase 0: skeleton only
services.AddSingleton<ISiteEventLogger, SiteEventLogger>();
services.AddSingleton<IEventLogQueryService, EventLogQueryService>();
services.AddHostedService<EventLogPurgeService>();
return services;
}
public static IServiceCollection AddSiteEventLoggingActors(this IServiceCollection services)
{
// Phase 0: placeholder for Akka actor registration
// Placeholder for Akka actor registration (Phase 4+)
return services;
}
}

View File

@@ -4,5 +4,7 @@ public class SiteEventLogOptions
{
public int RetentionDays { get; set; } = 30;
public int MaxStorageMb { get; set; } = 1024;
public string PurgeScheduleCron { get; set; } = "0 2 * * *";
public string DatabasePath { get; set; } = "site_events.db";
public int QueryPageSize { get; set; } = 500;
public TimeSpan PurgeInterval { get; set; } = TimeSpan.FromHours(24);
}

View File

@@ -0,0 +1,107 @@
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ScadaLink.SiteEventLogging;
/// <summary>
/// Records operational events to a local SQLite database.
/// Only the active node generates events. Not replicated to standby.
/// On failover, the new active node starts a fresh log.
/// </summary>
public class SiteEventLogger : ISiteEventLogger, IDisposable
{
private readonly SqliteConnection _connection;
private readonly ILogger<SiteEventLogger> _logger;
private readonly object _writeLock = new();
private bool _disposed;
public SiteEventLogger(
IOptions<SiteEventLogOptions> options,
ILogger<SiteEventLogger> logger,
string? connectionStringOverride = null)
{
_logger = logger;
var connectionString = connectionStringOverride
?? $"Data Source={options.Value.DatabasePath};Cache=Shared";
_connection = new SqliteConnection(connectionString);
_connection.Open();
InitializeSchema();
}
internal SqliteConnection Connection => _connection;
private void InitializeSchema()
{
using var cmd = _connection.CreateCommand();
cmd.CommandText = """
CREATE TABLE IF NOT EXISTS site_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TEXT NOT NULL,
event_type TEXT NOT NULL,
severity TEXT NOT NULL,
instance_id TEXT,
source TEXT NOT NULL,
message TEXT NOT NULL,
details TEXT
);
CREATE INDEX IF NOT EXISTS idx_events_timestamp ON site_events(timestamp);
CREATE INDEX IF NOT EXISTS idx_events_type ON site_events(event_type);
CREATE INDEX IF NOT EXISTS idx_events_instance ON site_events(instance_id);
""";
cmd.ExecuteNonQuery();
}
public Task LogEventAsync(
string eventType,
string severity,
string? instanceId,
string source,
string message,
string? details = null)
{
ArgumentException.ThrowIfNullOrWhiteSpace(eventType);
ArgumentException.ThrowIfNullOrWhiteSpace(severity);
ArgumentException.ThrowIfNullOrWhiteSpace(source);
ArgumentException.ThrowIfNullOrWhiteSpace(message);
var timestamp = DateTimeOffset.UtcNow.ToString("o");
lock (_writeLock)
{
if (_disposed) return Task.CompletedTask;
try
{
using var cmd = _connection.CreateCommand();
cmd.CommandText = """
INSERT INTO site_events (timestamp, event_type, severity, instance_id, source, message, details)
VALUES ($timestamp, $event_type, $severity, $instance_id, $source, $message, $details)
""";
cmd.Parameters.AddWithValue("$timestamp", timestamp);
cmd.Parameters.AddWithValue("$event_type", eventType);
cmd.Parameters.AddWithValue("$severity", severity);
cmd.Parameters.AddWithValue("$instance_id", (object?)instanceId ?? DBNull.Value);
cmd.Parameters.AddWithValue("$source", source);
cmd.Parameters.AddWithValue("$message", message);
cmd.Parameters.AddWithValue("$details", (object?)details ?? DBNull.Value);
cmd.ExecuteNonQuery();
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to record event: {EventType} from {Source}", eventType, source);
}
}
return Task.CompletedTask;
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_connection.Dispose();
}
}