fix(lmxproxy): protect probe subscription from ReadAsync teardown, add instance configs

ReadAsync internally subscribes/unsubscribes the same ScanTime tag used
by the persistent probe, which was tearing down the probe subscription
and triggering false reconnects every ~5s. Guard UnsubscribeInternal and
stored subscription state so the probe tag is never removed by other
callers. Also removes DetailedHealthCheckService (redundant with the
persistent probe), adds per-instance config files (appsettings.v2.json,
appsettings.v2b.json) loaded via LMXPROXY_INSTANCE env var so deploys
no longer overwrite port settings.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-03-24 12:20:05 -04:00
parent 95168253fc
commit 73fe618953
11 changed files with 39 additions and 242 deletions

View File

@@ -1,90 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Serilog;
using ZB.MOM.WW.LmxProxy.Host.Domain;
namespace ZB.MOM.WW.LmxProxy.Host.Health
{
/// <summary>
/// Detailed health check: reads a test tag, checks quality and timestamp staleness.
/// </summary>
public class DetailedHealthCheckService : IHealthCheck
{
private static readonly ILogger Logger = Log.ForContext<DetailedHealthCheckService>();
private readonly IScadaClient _scadaClient;
private readonly string _testTagAddress;
public DetailedHealthCheckService(
IScadaClient scadaClient,
string testTagAddress = "DevPlatform.Scheduler.ScanTime")
{
_scadaClient = scadaClient;
_testTagAddress = testTagAddress;
}
public async Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
try
{
if (!_scadaClient.IsConnected)
{
return HealthCheckResult.Unhealthy("SCADA client is not connected");
}
Vtq vtq;
try
{
vtq = await _scadaClient.ReadAsync(_testTagAddress, cancellationToken);
}
catch (Exception ex)
{
Logger.Warning(ex, "Could not read test tag {Tag}", _testTagAddress);
return HealthCheckResult.Degraded(
"Could not read test tag: " + ex.Message,
data: new Dictionary<string, object>
{
{ "test_tag", _testTagAddress },
{ "error", ex.Message }
});
}
var data = new Dictionary<string, object>
{
{ "test_tag", _testTagAddress },
{ "quality", vtq.Quality.ToString() },
{ "timestamp", vtq.Timestamp.ToString("o") }
};
if (!vtq.Quality.IsGood())
{
return HealthCheckResult.Degraded(
"Test tag quality is not Good: " + vtq.Quality,
data: data);
}
if (DateTime.UtcNow - vtq.Timestamp > TimeSpan.FromMinutes(5))
{
return HealthCheckResult.Degraded(
"Test tag data is stale (older than 5 minutes)",
data: data);
}
return HealthCheckResult.Healthy(
"Test tag read successful with good quality",
data: data);
}
catch (Exception ex)
{
Logger.Error(ex, "Detailed health check failed");
return HealthCheckResult.Unhealthy(
"Detailed health check failed: " + ex.Message, ex);
}
}
}
}

View File

@@ -30,7 +30,6 @@ namespace ZB.MOM.WW.LmxProxy.Host
private ApiKeyService? _apiKeyService;
private PerformanceMetrics? _performanceMetrics;
private HealthCheckService? _healthCheckService;
private DetailedHealthCheckService? _detailedHealthCheckService;
private StatusReportService? _statusReportService;
private StatusWebServer? _statusWebServer;
private Server? _grpcServer;
@@ -119,13 +118,11 @@ namespace ZB.MOM.WW.LmxProxy.Host
// 10. Create health check services
_healthCheckService = new HealthCheckService(_mxAccessClient, _subscriptionManager, _performanceMetrics);
_detailedHealthCheckService = new DetailedHealthCheckService(
_mxAccessClient, _config.HealthCheck.TestTagAddress);
// 11. Create status report service
_statusReportService = new StatusReportService(
_mxAccessClient, _subscriptionManager, _performanceMetrics,
_healthCheckService, _detailedHealthCheckService);
_healthCheckService);
// 12. Start status web server
_statusWebServer = new StatusWebServer(_config.WebServer, _statusReportService);

View File

@@ -28,7 +28,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
{
try
{
Log.Information("OnDataChange FIRED: handle={Handle}", phItemHandle);
var quality = MapQuality(pwItemQuality);
var timestamp = ConvertTimestamp(pftItemTimeStamp);

View File

@@ -36,8 +36,12 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
{
SubscribeInternal(address);
// Store for reconnect replay
_storedSubscriptions[address] = callback;
// Store for reconnect replay (but don't overwrite the probe tag's callback)
if (_probeTestTagAddress == null ||
!string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase))
{
_storedSubscriptions[address] = callback;
}
}
}
});
@@ -70,7 +74,13 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
foreach (var address in addressList)
{
UnsubscribeInternal(address);
_storedSubscriptions.Remove(address);
// Don't remove probe tag from stored subscriptions — it's permanent
if (_probeTestTagAddress == null ||
!string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase))
{
_storedSubscriptions.Remove(address);
}
}
}
});
@@ -149,6 +159,14 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
/// </summary>
private void UnsubscribeInternal(string address)
{
// Never unsubscribe the probe tag — it's a permanent connection health monitor
if (_probeTestTagAddress != null &&
string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase))
{
Log.Debug("Skipping unsubscribe for probe tag {Address}", address);
return;
}
if (!_addressToHandle.TryGetValue(address, out int itemHandle))
{
Log.Debug("No active subscription for {Address}, skipping unsubscribe", address);

View File

@@ -10,10 +10,12 @@ namespace ZB.MOM.WW.LmxProxy.Host
{
static int Main(string[] args)
{
// 1. Build configuration
// 1. Build configuration (instance override file loaded from LMXPROXY_INSTANCE env var)
var instance = Environment.GetEnvironmentVariable("LMXPROXY_INSTANCE");
var configuration = new ConfigurationBuilder()
.SetBasePath(AppDomain.CurrentDomain.BaseDirectory)
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: false)
.AddJsonFile($"appsettings.{instance}.json", optional: true, reloadOnChange: false)
.AddEnvironmentVariables()
.Build();

View File

@@ -12,7 +12,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
public SubscriptionStatus Subscriptions { get; set; } = new SubscriptionStatus();
public PerformanceStatus Performance { get; set; } = new PerformanceStatus();
public HealthInfo Health { get; set; } = new HealthInfo();
public HealthInfo? DetailedHealth { get; set; }
}
public class ConnectionStatus

View File

@@ -9,7 +9,6 @@ using Newtonsoft.Json;
using Newtonsoft.Json.Serialization;
using Serilog;
using ZB.MOM.WW.LmxProxy.Host.Domain;
using ZB.MOM.WW.LmxProxy.Host.Health;
using HealthCheckService = ZB.MOM.WW.LmxProxy.Host.Health.HealthCheckService;
using ZB.MOM.WW.LmxProxy.Host.Metrics;
using ZB.MOM.WW.LmxProxy.Host.Subscriptions;
@@ -27,20 +26,17 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
private readonly SubscriptionManager _subscriptionManager;
private readonly PerformanceMetrics _performanceMetrics;
private readonly HealthCheckService _healthCheckService;
private readonly DetailedHealthCheckService? _detailedHealthCheckService;
public StatusReportService(
IScadaClient scadaClient,
SubscriptionManager subscriptionManager,
PerformanceMetrics performanceMetrics,
HealthCheckService healthCheckService,
DetailedHealthCheckService? detailedHealthCheckService = null)
HealthCheckService healthCheckService)
{
_scadaClient = scadaClient;
_subscriptionManager = subscriptionManager;
_performanceMetrics = performanceMetrics;
_healthCheckService = healthCheckService;
_detailedHealthCheckService = detailedHealthCheckService;
}
public async Task<string> GenerateHtmlReportAsync()
@@ -144,24 +140,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
}
}
// Detailed health check (optional)
if (_detailedHealthCheckService != null)
{
var detailedResult = await _detailedHealthCheckService.CheckHealthAsync(new HealthCheckContext());
statusData.DetailedHealth = new HealthInfo
{
Status = detailedResult.Status.ToString(),
Description = detailedResult.Description ?? ""
};
if (detailedResult.Data != null)
{
foreach (var kvp in detailedResult.Data)
{
statusData.DetailedHealth.Data[kvp.Key] = kvp.Value?.ToString() ?? "";
}
}
}
return statusData;
}
@@ -264,18 +242,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
sb.AppendLine(" </table>");
sb.AppendLine(" </div>");
// Detailed health (if available)
if (statusData.DetailedHealth != null)
{
var detailedClass = GetHealthCardClass(statusData.DetailedHealth.Status);
var detailedCss = GetHealthStatusCss(statusData.DetailedHealth.Status);
sb.AppendLine($" <div class=\"card {detailedClass}\">");
sb.AppendLine(" <h3>Detailed Health Check</h3>");
sb.AppendLine($" <p class=\"{detailedCss}\">{statusData.DetailedHealth.Status}</p>");
sb.AppendLine($" <p>{statusData.DetailedHealth.Description}</p>");
sb.AppendLine(" </div>");
}
sb.AppendLine($" <div class=\"footer\">Last updated: {statusData.Timestamp:yyyy-MM-dd HH:mm:ss} UTC | Service: {statusData.ServiceName} v{statusData.Version}</div>");
sb.AppendLine("</body>");
sb.AppendLine("</html>");

View File

@@ -0,0 +1,6 @@
{
"GrpcPort": 50100,
"WebServer": {
"Port": 8081
}
}

View File

@@ -0,0 +1,6 @@
{
"GrpcPort": 50101,
"WebServer": {
"Port": 8082
}
}