From 73fe61895356eff3b826a4d61ba16bf2a1b1bc4d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 24 Mar 2026 12:20:05 -0400 Subject: [PATCH] fix(lmxproxy): protect probe subscription from ReadAsync teardown, add instance configs ReadAsync internally subscribes/unsubscribes the same ScanTime tag used by the persistent probe, which was tearing down the probe subscription and triggering false reconnects every ~5s. Guard UnsubscribeInternal and stored subscription state so the probe tag is never removed by other callers. Also removes DetailedHealthCheckService (redundant with the persistent probe), adds per-instance config files (appsettings.v2.json, appsettings.v2b.json) loaded via LMXPROXY_INSTANCE env var so deploys no longer overwrite port settings. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Health/DetailedHealthCheckService.cs | 90 --------------- .../LmxProxyService.cs | 5 +- .../MxAccess/MxAccessClient.EventHandlers.cs | 1 - .../MxAccess/MxAccessClient.Subscription.cs | 24 +++- .../src/ZB.MOM.WW.LmxProxy.Host/Program.cs | 4 +- .../Status/StatusModels.cs | 1 - .../Status/StatusReportService.cs | 36 +----- .../appsettings.v2.json | 6 + .../appsettings.v2b.json | 6 + .../Health/HealthCheckServiceTests.cs | 105 ------------------ .../Status/StatusReportServiceTests.cs | 3 +- 11 files changed, 39 insertions(+), 242 deletions(-) delete mode 100644 lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Health/DetailedHealthCheckService.cs create mode 100644 lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2.json create mode 100644 lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2b.json diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Health/DetailedHealthCheckService.cs b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Health/DetailedHealthCheckService.cs deleted file mode 100644 index 36eb342..0000000 --- a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Health/DetailedHealthCheckService.cs +++ /dev/null @@ -1,90 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Threading; -using System.Threading.Tasks; -using Microsoft.Extensions.Diagnostics.HealthChecks; -using Serilog; -using ZB.MOM.WW.LmxProxy.Host.Domain; - -namespace ZB.MOM.WW.LmxProxy.Host.Health -{ - /// - /// Detailed health check: reads a test tag, checks quality and timestamp staleness. - /// - public class DetailedHealthCheckService : IHealthCheck - { - private static readonly ILogger Logger = Log.ForContext(); - - private readonly IScadaClient _scadaClient; - private readonly string _testTagAddress; - - public DetailedHealthCheckService( - IScadaClient scadaClient, - string testTagAddress = "DevPlatform.Scheduler.ScanTime") - { - _scadaClient = scadaClient; - _testTagAddress = testTagAddress; - } - - public async Task CheckHealthAsync( - HealthCheckContext context, - CancellationToken cancellationToken = default) - { - try - { - if (!_scadaClient.IsConnected) - { - return HealthCheckResult.Unhealthy("SCADA client is not connected"); - } - - Vtq vtq; - try - { - vtq = await _scadaClient.ReadAsync(_testTagAddress, cancellationToken); - } - catch (Exception ex) - { - Logger.Warning(ex, "Could not read test tag {Tag}", _testTagAddress); - return HealthCheckResult.Degraded( - "Could not read test tag: " + ex.Message, - data: new Dictionary - { - { "test_tag", _testTagAddress }, - { "error", ex.Message } - }); - } - - var data = new Dictionary - { - { "test_tag", _testTagAddress }, - { "quality", vtq.Quality.ToString() }, - { "timestamp", vtq.Timestamp.ToString("o") } - }; - - if (!vtq.Quality.IsGood()) - { - return HealthCheckResult.Degraded( - "Test tag quality is not Good: " + vtq.Quality, - data: data); - } - - if (DateTime.UtcNow - vtq.Timestamp > TimeSpan.FromMinutes(5)) - { - return HealthCheckResult.Degraded( - "Test tag data is stale (older than 5 minutes)", - data: data); - } - - return HealthCheckResult.Healthy( - "Test tag read successful with good quality", - data: data); - } - catch (Exception ex) - { - Logger.Error(ex, "Detailed health check failed"); - return HealthCheckResult.Unhealthy( - "Detailed health check failed: " + ex.Message, ex); - } - } - } -} diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/LmxProxyService.cs b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/LmxProxyService.cs index 970a2d4..2e5beed 100644 --- a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/LmxProxyService.cs +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/LmxProxyService.cs @@ -30,7 +30,6 @@ namespace ZB.MOM.WW.LmxProxy.Host private ApiKeyService? _apiKeyService; private PerformanceMetrics? _performanceMetrics; private HealthCheckService? _healthCheckService; - private DetailedHealthCheckService? _detailedHealthCheckService; private StatusReportService? _statusReportService; private StatusWebServer? _statusWebServer; private Server? _grpcServer; @@ -119,13 +118,11 @@ namespace ZB.MOM.WW.LmxProxy.Host // 10. Create health check services _healthCheckService = new HealthCheckService(_mxAccessClient, _subscriptionManager, _performanceMetrics); - _detailedHealthCheckService = new DetailedHealthCheckService( - _mxAccessClient, _config.HealthCheck.TestTagAddress); // 11. Create status report service _statusReportService = new StatusReportService( _mxAccessClient, _subscriptionManager, _performanceMetrics, - _healthCheckService, _detailedHealthCheckService); + _healthCheckService); // 12. Start status web server _statusWebServer = new StatusWebServer(_config.WebServer, _statusReportService); diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.EventHandlers.cs b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.EventHandlers.cs index c43a72e..9c05593 100644 --- a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.EventHandlers.cs +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.EventHandlers.cs @@ -28,7 +28,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess { try { - Log.Information("OnDataChange FIRED: handle={Handle}", phItemHandle); var quality = MapQuality(pwItemQuality); var timestamp = ConvertTimestamp(pftItemTimeStamp); diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.Subscription.cs b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.Subscription.cs index 741fff9..ba2a7cc 100644 --- a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.Subscription.cs +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/MxAccess/MxAccessClient.Subscription.cs @@ -36,8 +36,12 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess { SubscribeInternal(address); - // Store for reconnect replay - _storedSubscriptions[address] = callback; + // Store for reconnect replay (but don't overwrite the probe tag's callback) + if (_probeTestTagAddress == null || + !string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase)) + { + _storedSubscriptions[address] = callback; + } } } }); @@ -70,7 +74,13 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess foreach (var address in addressList) { UnsubscribeInternal(address); - _storedSubscriptions.Remove(address); + + // Don't remove probe tag from stored subscriptions — it's permanent + if (_probeTestTagAddress == null || + !string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase)) + { + _storedSubscriptions.Remove(address); + } } } }); @@ -149,6 +159,14 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess /// private void UnsubscribeInternal(string address) { + // Never unsubscribe the probe tag — it's a permanent connection health monitor + if (_probeTestTagAddress != null && + string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase)) + { + Log.Debug("Skipping unsubscribe for probe tag {Address}", address); + return; + } + if (!_addressToHandle.TryGetValue(address, out int itemHandle)) { Log.Debug("No active subscription for {Address}, skipping unsubscribe", address); diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Program.cs b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Program.cs index 76bbb6f..bcbe790 100644 --- a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Program.cs +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Program.cs @@ -10,10 +10,12 @@ namespace ZB.MOM.WW.LmxProxy.Host { static int Main(string[] args) { - // 1. Build configuration + // 1. Build configuration (instance override file loaded from LMXPROXY_INSTANCE env var) + var instance = Environment.GetEnvironmentVariable("LMXPROXY_INSTANCE"); var configuration = new ConfigurationBuilder() .SetBasePath(AppDomain.CurrentDomain.BaseDirectory) .AddJsonFile("appsettings.json", optional: false, reloadOnChange: false) + .AddJsonFile($"appsettings.{instance}.json", optional: true, reloadOnChange: false) .AddEnvironmentVariables() .Build(); diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusModels.cs b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusModels.cs index 05b9db1..ee3a748 100644 --- a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusModels.cs +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusModels.cs @@ -12,7 +12,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status public SubscriptionStatus Subscriptions { get; set; } = new SubscriptionStatus(); public PerformanceStatus Performance { get; set; } = new PerformanceStatus(); public HealthInfo Health { get; set; } = new HealthInfo(); - public HealthInfo? DetailedHealth { get; set; } } public class ConnectionStatus diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusReportService.cs b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusReportService.cs index 695361a..dcaf454 100644 --- a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusReportService.cs +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/Status/StatusReportService.cs @@ -9,7 +9,6 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using Serilog; using ZB.MOM.WW.LmxProxy.Host.Domain; -using ZB.MOM.WW.LmxProxy.Host.Health; using HealthCheckService = ZB.MOM.WW.LmxProxy.Host.Health.HealthCheckService; using ZB.MOM.WW.LmxProxy.Host.Metrics; using ZB.MOM.WW.LmxProxy.Host.Subscriptions; @@ -27,20 +26,17 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status private readonly SubscriptionManager _subscriptionManager; private readonly PerformanceMetrics _performanceMetrics; private readonly HealthCheckService _healthCheckService; - private readonly DetailedHealthCheckService? _detailedHealthCheckService; public StatusReportService( IScadaClient scadaClient, SubscriptionManager subscriptionManager, PerformanceMetrics performanceMetrics, - HealthCheckService healthCheckService, - DetailedHealthCheckService? detailedHealthCheckService = null) + HealthCheckService healthCheckService) { _scadaClient = scadaClient; _subscriptionManager = subscriptionManager; _performanceMetrics = performanceMetrics; _healthCheckService = healthCheckService; - _detailedHealthCheckService = detailedHealthCheckService; } public async Task GenerateHtmlReportAsync() @@ -144,24 +140,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status } } - // Detailed health check (optional) - if (_detailedHealthCheckService != null) - { - var detailedResult = await _detailedHealthCheckService.CheckHealthAsync(new HealthCheckContext()); - statusData.DetailedHealth = new HealthInfo - { - Status = detailedResult.Status.ToString(), - Description = detailedResult.Description ?? "" - }; - if (detailedResult.Data != null) - { - foreach (var kvp in detailedResult.Data) - { - statusData.DetailedHealth.Data[kvp.Key] = kvp.Value?.ToString() ?? ""; - } - } - } - return statusData; } @@ -264,18 +242,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status sb.AppendLine(" "); sb.AppendLine(" "); - // Detailed health (if available) - if (statusData.DetailedHealth != null) - { - var detailedClass = GetHealthCardClass(statusData.DetailedHealth.Status); - var detailedCss = GetHealthStatusCss(statusData.DetailedHealth.Status); - sb.AppendLine($"
"); - sb.AppendLine("

Detailed Health Check

"); - sb.AppendLine($"

{statusData.DetailedHealth.Status}

"); - sb.AppendLine($"

{statusData.DetailedHealth.Description}

"); - sb.AppendLine("
"); - } - sb.AppendLine($"
Last updated: {statusData.Timestamp:yyyy-MM-dd HH:mm:ss} UTC | Service: {statusData.ServiceName} v{statusData.Version}
"); sb.AppendLine(""); sb.AppendLine(""); diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2.json b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2.json new file mode 100644 index 0000000..91d60e2 --- /dev/null +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2.json @@ -0,0 +1,6 @@ +{ + "GrpcPort": 50100, + "WebServer": { + "Port": 8081 + } +} diff --git a/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2b.json b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2b.json new file mode 100644 index 0000000..99be187 --- /dev/null +++ b/lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2b.json @@ -0,0 +1,6 @@ +{ + "GrpcPort": 50101, + "WebServer": { + "Port": 8082 + } +} diff --git a/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Health/HealthCheckServiceTests.cs b/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Health/HealthCheckServiceTests.cs index a5b2ea6..fc6102c 100644 --- a/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Health/HealthCheckServiceTests.cs +++ b/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Health/HealthCheckServiceTests.cs @@ -132,109 +132,4 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Health } } - public class DetailedHealthCheckServiceTests - { - private class FakeScadaClient : IScadaClient - { - public bool IsConnected { get; set; } = true; - public ConnectionState ConnectionState { get; set; } = ConnectionState.Connected; - public Vtq? ReadResult { get; set; } - public Exception? ReadException { get; set; } - public event EventHandler? ConnectionStateChanged; - public Task ConnectAsync(CancellationToken ct = default) => Task.CompletedTask; - public Task DisconnectAsync(CancellationToken ct = default) => Task.CompletedTask; - public Task ReadAsync(string address, CancellationToken ct = default) - { - if (ReadException != null) throw ReadException; - return Task.FromResult(ReadResult ?? Vtq.Good(true)); - } - public Task> ReadBatchAsync(IEnumerable addresses, CancellationToken ct = default) => - Task.FromResult>(new Dictionary()); - public Task WriteAsync(string address, object value, CancellationToken ct = default) => Task.CompletedTask; - public Task WriteBatchAsync(IReadOnlyDictionary values, CancellationToken ct = default) => Task.CompletedTask; - public Task<(bool flagReached, int elapsedMs)> WriteBatchAndWaitAsync( - IReadOnlyDictionary values, string flagTag, object flagValue, - int timeoutMs, int pollIntervalMs, CancellationToken ct = default) => - Task.FromResult((false, 0)); - public Task UnsubscribeByAddressAsync(IEnumerable addresses) => Task.CompletedTask; - public Task SubscribeAsync(IEnumerable addresses, Action callback, CancellationToken ct = default) => - Task.FromResult(new FakeHandle()); - public ValueTask DisposeAsync() => default; - internal void FireEvent() => ConnectionStateChanged?.Invoke(this, null!); - private class FakeHandle : IAsyncDisposable { public ValueTask DisposeAsync() => default; } - } - - [Fact] - public async Task ReturnsUnhealthy_WhenNotConnected() - { - var client = new FakeScadaClient { IsConnected = false }; - var svc = new DetailedHealthCheckService(client); - - var result = await svc.CheckHealthAsync(new HealthCheckContext()); - - result.Status.Should().Be(HealthStatus.Unhealthy); - } - - [Fact] - public async Task ReturnsHealthy_WhenTestTagGoodAndRecent() - { - var client = new FakeScadaClient - { - IsConnected = true, - ReadResult = Vtq.New(true, DateTime.UtcNow, Quality.Good) - }; - var svc = new DetailedHealthCheckService(client); - - var result = await svc.CheckHealthAsync(new HealthCheckContext()); - - result.Status.Should().Be(HealthStatus.Healthy); - } - - [Fact] - public async Task ReturnsDegraded_WhenTestTagQualityNotGood() - { - var client = new FakeScadaClient - { - IsConnected = true, - ReadResult = Vtq.New(true, DateTime.UtcNow, Quality.Uncertain) - }; - var svc = new DetailedHealthCheckService(client); - - var result = await svc.CheckHealthAsync(new HealthCheckContext()); - - result.Status.Should().Be(HealthStatus.Degraded); - } - - [Fact] - public async Task ReturnsDegraded_WhenTestTagTimestampStale() - { - var client = new FakeScadaClient - { - IsConnected = true, - ReadResult = Vtq.New(true, DateTime.UtcNow.AddMinutes(-10), Quality.Good) - }; - var svc = new DetailedHealthCheckService(client); - - var result = await svc.CheckHealthAsync(new HealthCheckContext()); - - result.Status.Should().Be(HealthStatus.Degraded); - result.Description.Should().Contain("stale"); - } - - [Fact] - public async Task ReturnsDegraded_WhenTestTagReadThrows() - { - var client = new FakeScadaClient - { - IsConnected = true, - ReadException = new InvalidOperationException("COM error") - }; - var svc = new DetailedHealthCheckService(client); - - var result = await svc.CheckHealthAsync(new HealthCheckContext()); - - result.Status.Should().Be(HealthStatus.Degraded); - result.Description.Should().Contain("Could not read test tag"); - } - } } diff --git a/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Status/StatusReportServiceTests.cs b/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Status/StatusReportServiceTests.cs index aee7e84..b1a12f3 100644 --- a/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Status/StatusReportServiceTests.cs +++ b/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Status/StatusReportServiceTests.cs @@ -52,8 +52,7 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Status var sm = new SubscriptionManager(client); var pm = new PerformanceMetrics(); var health = new HealthCheckService(client, sm, pm); - var detailed = new DetailedHealthCheckService(client); - var svc = new StatusReportService(client, sm, pm, health, detailed); + var svc = new StatusReportService(client, sm, pm, health); return (svc, pm, sm); }