refactor: rename ScadaLink → ZB.MOM.WW.ScadaBridge (code + projects + namespaces)

Solution + 23 src projects + 26 test projects renamed; folders, csproj, namespaces, and ScadaLinkDbContext/ScadaBridgeDbContext class updated. ActorSystem "scadalink" → "scadabridge", Akka seed-node URLs migrated. SQL roles/logins, LDAP domains, CLI command name, and CLI config dir (~/.scadalink → ~/.scadabridge) also renamed. Build green; 5 Host.Tests fail awaiting SQL login rename in next commit. Pre-existing StaleTagMonitor timing flakes unchanged. Rename script committed at tools/rename-to-scadabridge.sh.
2026-05-28 09:37:45 -04:00
parent 6d87ee3c3b
commit 7b0b9c7365
1531 changed files with 11180 additions and 11054 deletions
@@ -0,0 +1,223 @@
+using Microsoft.Extensions.Hosting;
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Options;
+
+namespace ZB.MOM.WW.ScadaBridge.SiteEventLogging;
+
+/// <summary>
+/// SiteEventLogging-019: predicate the <see cref="EventLogPurgeService"/>
+/// consults at the top of every purge tick to decide whether THIS node should
+/// run the daily purge. The design states "a daily background job runs on the
+/// active node and deletes all events older than 30 days"; the standby's local
+/// SQLite receives no writes, so purging there is harmless but unnecessary —
+/// and silently doing it anyway diverges from the design.
+///
+/// Registration is the Host's responsibility (it knows the cluster topology);
+/// when no implementation is registered the purge service defaults to "always
+/// active" so non-clustered hosts and unit tests are unaffected — backward
+/// compatible with the prior "run on every host" behaviour.
+/// </summary>
+public delegate bool SiteEventLogActiveNodeCheck();
+
+/// <summary>
+/// Background service that periodically purges old events from the SQLite event log.
+/// Enforces both time-based retention (default 30 days) and storage cap (default 1GB).
+/// Runs on a background thread and does not block event recording.
+/// </summary>
+public class EventLogPurgeService : BackgroundService
+{
+    /// <summary>Number of events deleted per cap-purge batch.</summary>
+    private const int CapPurgeBatchSize = 1000;
+
+    private readonly SiteEventLogger _eventLogger;
+    private readonly SiteEventLogOptions _options;
+    private readonly ILogger<EventLogPurgeService> _logger;
+    private readonly SiteEventLogActiveNodeCheck _isActiveNode;
+
+    /// <summary>Initializes a new instance of <see cref="EventLogPurgeService"/>.</summary>
+    /// <param name="eventLogger">The concrete event logger providing lock-guarded database access.</param>
+    /// <param name="options">Site event log options (retention days, storage cap, purge interval).</param>
+    /// <param name="logger">Logger instance.</param>
+    /// <param name="isActiveNode">
+    /// SiteEventLogging-019: optional active-node check. When <c>null</c>, the
+    /// service runs the purge on every tick (preserves the pre-fix behaviour
+    /// for non-clustered hosts and existing tests). When supplied — e.g. by
+    /// the Host on a site node — each tick early-exits on the standby so the
+    /// daily purge runs only on the active node, matching the design.
+    /// </param>
+    public EventLogPurgeService(
+        SiteEventLogger eventLogger,
+        IOptions<SiteEventLogOptions> options,
+        ILogger<EventLogPurgeService> logger,
+        SiteEventLogActiveNodeCheck? isActiveNode = null)
+    {
+        // Depend on the concrete recorder directly: purge must funnel database access
+        // through its lock-guarded WithConnection. Taking ISiteEventLogger and
+        // downcasting would throw InvalidCastException for any other implementation.
+        _eventLogger = eventLogger;
+        _options = options.Value;
+        _logger = logger;
+        _isActiveNode = isActiveNode ?? (static () => true);
+    }
+
+    /// <inheritdoc />
+    protected override async Task ExecuteAsync(CancellationToken stoppingToken)
+    {
+        _logger.LogInformation(
+            "Event log purge service started — retention: {Days} days, cap: {Cap} MB, interval: {Interval}",
+            _options.RetentionDays, _options.MaxStorageMb, _options.PurgeInterval);
+
+        using var timer = new PeriodicTimer(_options.PurgeInterval);
+
+        // Run an initial purge on startup
+        RunPurge();
+
+        while (await timer.WaitForNextTickAsync(stoppingToken).ConfigureAwait(false))
+        {
+            RunPurge();
+        }
+    }
+
+    /// <summary>Runs a single purge cycle: first by retention age, then by storage cap.</summary>
+    internal void RunPurge()
+    {
+        try
+        {
+            // SiteEventLogging-019: gate every tick on the active-node check.
+            // The standby's local SQLite receives no writes, so purging there
+            // is harmless but unnecessary; the design (Component-SiteEventLogging
+            // → Storage) explicitly states the purge runs on the active node.
+            // Defensive try/catch: a transient cluster-state read failure must
+            // not stop the purge loop — fall back to running the purge (the
+            // pre-fix behaviour was "always run", which is harmless on standby).
+            bool isActive;
+            try
+            {
+                isActive = _isActiveNode();
+            }
+            catch (Exception checkEx)
+            {
+                _logger.LogDebug(checkEx,
+                    "Active-node check threw during purge tick; running purge to be safe");
+                isActive = true;
+            }
+
+            if (!isActive)
+            {
+                _logger.LogDebug("Skipping event log purge tick — this node is not the active site member");
+                return;
+            }
+
+            PurgeByRetention();
+            PurgeByStorageCap();
+        }
+        catch (Exception ex)
+        {
+            _logger.LogError(ex, "Error during event log purge");
+        }
+    }
+
+    private void PurgeByRetention()
+    {
+        var cutoff = DateTimeOffset.UtcNow.AddDays(-_options.RetentionDays).ToString("o");
+
+        var deleted = _eventLogger.WithConnection(connection =>
+        {
+            using var cmd = connection.CreateCommand();
+            cmd.CommandText = "DELETE FROM site_events WHERE timestamp < $cutoff";
+            cmd.Parameters.AddWithValue("$cutoff", cutoff);
+            return cmd.ExecuteNonQuery();
+        });
+
+        if (deleted > 0)
+        {
+            _logger.LogInformation("Purged {Count} events older than {Days} days", deleted, _options.RetentionDays);
+        }
+    }
+
+    private void PurgeByStorageCap()
+    {
+        var capBytes = (long)_options.MaxStorageMb * 1024 * 1024;
+        var currentSizeBytes = GetDatabaseSizeBytes();
+
+        if (currentSizeBytes <= capBytes)
+            return;
+
+        _logger.LogWarning(
+            "Event log size {Size:F1} MB exceeds cap {Cap} MB — purging oldest events",
+            currentSizeBytes / (1024.0 * 1024.0), _options.MaxStorageMb);
+
+        // Delete the oldest events in batches until the database is under the cap.
+        // The loop also stops if the on-disk size fails to decrease across an
+        // iteration (e.g. if vacuum cannot reclaim space), so a cap that can never
+        // be met does not silently empty the entire table.
+        while (currentSizeBytes > capBytes)
+        {
+            var previousSizeBytes = currentSizeBytes;
+
+            var deleted = _eventLogger.WithConnection(connection =>
+            {
+                using var cmd = connection.CreateCommand();
+                cmd.CommandText = $"""
+                    DELETE FROM site_events WHERE id IN (
+                        SELECT id FROM site_events ORDER BY id ASC LIMIT {CapPurgeBatchSize}
+                    )
+                    """;
+                var rows = cmd.ExecuteNonQuery();
+
+                // Reclaim free pages so page_count/freelist measurement reflects the
+                // delete. Effective because auto_vacuum = INCREMENTAL is set at schema
+                // creation; harmless otherwise.
+                using var vacuumCmd = connection.CreateCommand();
+                vacuumCmd.CommandText = "PRAGMA incremental_vacuum";
+                vacuumCmd.ExecuteNonQuery();
+
+                return rows;
+            });
+
+            if (deleted == 0)
+                break;
+
+            currentSizeBytes = GetDatabaseSizeBytes();
+
+            if (currentSizeBytes >= previousSizeBytes)
+            {
+                // Size is not shrinking despite deletes — stop rather than wipe the
+                // whole table. This should not happen now that logical size is
+                // measured, but guards against any future regression.
+                _logger.LogWarning(
+                    "Event log size did not decrease after a cap-purge batch ({Size:F1} MB); " +
+                    "stopping to avoid emptying the log",
+                    currentSizeBytes / (1024.0 * 1024.0));
+                break;
+            }
+        }
+    }
+
+    /// <summary>
+    /// Returns the logical size of the database in bytes — only pages that hold live
+    /// data, excluding free pages on the freelist. Measuring logical size (rather than
+    /// the raw file size from <c>page_count</c>) means the storage-cap loop observes
+    /// space being reclaimed even if free pages have not yet been returned to the OS.
+    /// </summary>
+    internal long GetDatabaseSizeBytes()
+    {
+        return _eventLogger.WithConnection(connection =>
+        {
+            using var pageCountCmd = connection.CreateCommand();
+            pageCountCmd.CommandText = "PRAGMA page_count";
+            var pageCount = (long)pageCountCmd.ExecuteScalar()!;
+
+            using var freeListCmd = connection.CreateCommand();
+            freeListCmd.CommandText = "PRAGMA freelist_count";
+            var freeListCount = (long)freeListCmd.ExecuteScalar()!;
+
+            using var pageSizeCmd = connection.CreateCommand();
+            pageSizeCmd.CommandText = "PRAGMA page_size";
+            var pageSize = (long)pageSizeCmd.ExecuteScalar()!;
+
+            var usedPages = Math.Max(0, pageCount - freeListCount);
+            return usedPages * pageSize;
+        });
+    }
+}