fix(lmxproxy): protect probe subscription from ReadAsync teardown, add instance configs
ReadAsync internally subscribes/unsubscribes the same ScanTime tag used by the persistent probe, which was tearing down the probe subscription and triggering false reconnects every ~5s. Guard UnsubscribeInternal and stored subscription state so the probe tag is never removed by other callers. Also removes DetailedHealthCheckService (redundant with the persistent probe), adds per-instance config files (appsettings.v2.json, appsettings.v2b.json) loaded via LMXPROXY_INSTANCE env var so deploys no longer overwrite port settings. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,90 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Serilog;
|
||||
using ZB.MOM.WW.LmxProxy.Host.Domain;
|
||||
|
||||
namespace ZB.MOM.WW.LmxProxy.Host.Health
|
||||
{
|
||||
/// <summary>
|
||||
/// Detailed health check: reads a test tag, checks quality and timestamp staleness.
|
||||
/// </summary>
|
||||
public class DetailedHealthCheckService : IHealthCheck
|
||||
{
|
||||
private static readonly ILogger Logger = Log.ForContext<DetailedHealthCheckService>();
|
||||
|
||||
private readonly IScadaClient _scadaClient;
|
||||
private readonly string _testTagAddress;
|
||||
|
||||
public DetailedHealthCheckService(
|
||||
IScadaClient scadaClient,
|
||||
string testTagAddress = "DevPlatform.Scheduler.ScanTime")
|
||||
{
|
||||
_scadaClient = scadaClient;
|
||||
_testTagAddress = testTagAddress;
|
||||
}
|
||||
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (!_scadaClient.IsConnected)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("SCADA client is not connected");
|
||||
}
|
||||
|
||||
Vtq vtq;
|
||||
try
|
||||
{
|
||||
vtq = await _scadaClient.ReadAsync(_testTagAddress, cancellationToken);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warning(ex, "Could not read test tag {Tag}", _testTagAddress);
|
||||
return HealthCheckResult.Degraded(
|
||||
"Could not read test tag: " + ex.Message,
|
||||
data: new Dictionary<string, object>
|
||||
{
|
||||
{ "test_tag", _testTagAddress },
|
||||
{ "error", ex.Message }
|
||||
});
|
||||
}
|
||||
|
||||
var data = new Dictionary<string, object>
|
||||
{
|
||||
{ "test_tag", _testTagAddress },
|
||||
{ "quality", vtq.Quality.ToString() },
|
||||
{ "timestamp", vtq.Timestamp.ToString("o") }
|
||||
};
|
||||
|
||||
if (!vtq.Quality.IsGood())
|
||||
{
|
||||
return HealthCheckResult.Degraded(
|
||||
"Test tag quality is not Good: " + vtq.Quality,
|
||||
data: data);
|
||||
}
|
||||
|
||||
if (DateTime.UtcNow - vtq.Timestamp > TimeSpan.FromMinutes(5))
|
||||
{
|
||||
return HealthCheckResult.Degraded(
|
||||
"Test tag data is stale (older than 5 minutes)",
|
||||
data: data);
|
||||
}
|
||||
|
||||
return HealthCheckResult.Healthy(
|
||||
"Test tag read successful with good quality",
|
||||
data: data);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error(ex, "Detailed health check failed");
|
||||
return HealthCheckResult.Unhealthy(
|
||||
"Detailed health check failed: " + ex.Message, ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -30,7 +30,6 @@ namespace ZB.MOM.WW.LmxProxy.Host
|
||||
private ApiKeyService? _apiKeyService;
|
||||
private PerformanceMetrics? _performanceMetrics;
|
||||
private HealthCheckService? _healthCheckService;
|
||||
private DetailedHealthCheckService? _detailedHealthCheckService;
|
||||
private StatusReportService? _statusReportService;
|
||||
private StatusWebServer? _statusWebServer;
|
||||
private Server? _grpcServer;
|
||||
@@ -119,13 +118,11 @@ namespace ZB.MOM.WW.LmxProxy.Host
|
||||
|
||||
// 10. Create health check services
|
||||
_healthCheckService = new HealthCheckService(_mxAccessClient, _subscriptionManager, _performanceMetrics);
|
||||
_detailedHealthCheckService = new DetailedHealthCheckService(
|
||||
_mxAccessClient, _config.HealthCheck.TestTagAddress);
|
||||
|
||||
// 11. Create status report service
|
||||
_statusReportService = new StatusReportService(
|
||||
_mxAccessClient, _subscriptionManager, _performanceMetrics,
|
||||
_healthCheckService, _detailedHealthCheckService);
|
||||
_healthCheckService);
|
||||
|
||||
// 12. Start status web server
|
||||
_statusWebServer = new StatusWebServer(_config.WebServer, _statusReportService);
|
||||
|
||||
@@ -28,7 +28,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
|
||||
{
|
||||
try
|
||||
{
|
||||
Log.Information("OnDataChange FIRED: handle={Handle}", phItemHandle);
|
||||
var quality = MapQuality(pwItemQuality);
|
||||
var timestamp = ConvertTimestamp(pftItemTimeStamp);
|
||||
|
||||
|
||||
@@ -36,8 +36,12 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
|
||||
{
|
||||
SubscribeInternal(address);
|
||||
|
||||
// Store for reconnect replay
|
||||
_storedSubscriptions[address] = callback;
|
||||
// Store for reconnect replay (but don't overwrite the probe tag's callback)
|
||||
if (_probeTestTagAddress == null ||
|
||||
!string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
_storedSubscriptions[address] = callback;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -70,7 +74,13 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
|
||||
foreach (var address in addressList)
|
||||
{
|
||||
UnsubscribeInternal(address);
|
||||
_storedSubscriptions.Remove(address);
|
||||
|
||||
// Don't remove probe tag from stored subscriptions — it's permanent
|
||||
if (_probeTestTagAddress == null ||
|
||||
!string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
_storedSubscriptions.Remove(address);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -149,6 +159,14 @@ namespace ZB.MOM.WW.LmxProxy.Host.MxAccess
|
||||
/// </summary>
|
||||
private void UnsubscribeInternal(string address)
|
||||
{
|
||||
// Never unsubscribe the probe tag — it's a permanent connection health monitor
|
||||
if (_probeTestTagAddress != null &&
|
||||
string.Equals(address, _probeTestTagAddress, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Log.Debug("Skipping unsubscribe for probe tag {Address}", address);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!_addressToHandle.TryGetValue(address, out int itemHandle))
|
||||
{
|
||||
Log.Debug("No active subscription for {Address}, skipping unsubscribe", address);
|
||||
|
||||
@@ -10,10 +10,12 @@ namespace ZB.MOM.WW.LmxProxy.Host
|
||||
{
|
||||
static int Main(string[] args)
|
||||
{
|
||||
// 1. Build configuration
|
||||
// 1. Build configuration (instance override file loaded from LMXPROXY_INSTANCE env var)
|
||||
var instance = Environment.GetEnvironmentVariable("LMXPROXY_INSTANCE");
|
||||
var configuration = new ConfigurationBuilder()
|
||||
.SetBasePath(AppDomain.CurrentDomain.BaseDirectory)
|
||||
.AddJsonFile("appsettings.json", optional: false, reloadOnChange: false)
|
||||
.AddJsonFile($"appsettings.{instance}.json", optional: true, reloadOnChange: false)
|
||||
.AddEnvironmentVariables()
|
||||
.Build();
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
|
||||
public SubscriptionStatus Subscriptions { get; set; } = new SubscriptionStatus();
|
||||
public PerformanceStatus Performance { get; set; } = new PerformanceStatus();
|
||||
public HealthInfo Health { get; set; } = new HealthInfo();
|
||||
public HealthInfo? DetailedHealth { get; set; }
|
||||
}
|
||||
|
||||
public class ConnectionStatus
|
||||
|
||||
@@ -9,7 +9,6 @@ using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Serialization;
|
||||
using Serilog;
|
||||
using ZB.MOM.WW.LmxProxy.Host.Domain;
|
||||
using ZB.MOM.WW.LmxProxy.Host.Health;
|
||||
using HealthCheckService = ZB.MOM.WW.LmxProxy.Host.Health.HealthCheckService;
|
||||
using ZB.MOM.WW.LmxProxy.Host.Metrics;
|
||||
using ZB.MOM.WW.LmxProxy.Host.Subscriptions;
|
||||
@@ -27,20 +26,17 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
|
||||
private readonly SubscriptionManager _subscriptionManager;
|
||||
private readonly PerformanceMetrics _performanceMetrics;
|
||||
private readonly HealthCheckService _healthCheckService;
|
||||
private readonly DetailedHealthCheckService? _detailedHealthCheckService;
|
||||
|
||||
public StatusReportService(
|
||||
IScadaClient scadaClient,
|
||||
SubscriptionManager subscriptionManager,
|
||||
PerformanceMetrics performanceMetrics,
|
||||
HealthCheckService healthCheckService,
|
||||
DetailedHealthCheckService? detailedHealthCheckService = null)
|
||||
HealthCheckService healthCheckService)
|
||||
{
|
||||
_scadaClient = scadaClient;
|
||||
_subscriptionManager = subscriptionManager;
|
||||
_performanceMetrics = performanceMetrics;
|
||||
_healthCheckService = healthCheckService;
|
||||
_detailedHealthCheckService = detailedHealthCheckService;
|
||||
}
|
||||
|
||||
public async Task<string> GenerateHtmlReportAsync()
|
||||
@@ -144,24 +140,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
|
||||
}
|
||||
}
|
||||
|
||||
// Detailed health check (optional)
|
||||
if (_detailedHealthCheckService != null)
|
||||
{
|
||||
var detailedResult = await _detailedHealthCheckService.CheckHealthAsync(new HealthCheckContext());
|
||||
statusData.DetailedHealth = new HealthInfo
|
||||
{
|
||||
Status = detailedResult.Status.ToString(),
|
||||
Description = detailedResult.Description ?? ""
|
||||
};
|
||||
if (detailedResult.Data != null)
|
||||
{
|
||||
foreach (var kvp in detailedResult.Data)
|
||||
{
|
||||
statusData.DetailedHealth.Data[kvp.Key] = kvp.Value?.ToString() ?? "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return statusData;
|
||||
}
|
||||
|
||||
@@ -264,18 +242,6 @@ namespace ZB.MOM.WW.LmxProxy.Host.Status
|
||||
sb.AppendLine(" </table>");
|
||||
sb.AppendLine(" </div>");
|
||||
|
||||
// Detailed health (if available)
|
||||
if (statusData.DetailedHealth != null)
|
||||
{
|
||||
var detailedClass = GetHealthCardClass(statusData.DetailedHealth.Status);
|
||||
var detailedCss = GetHealthStatusCss(statusData.DetailedHealth.Status);
|
||||
sb.AppendLine($" <div class=\"card {detailedClass}\">");
|
||||
sb.AppendLine(" <h3>Detailed Health Check</h3>");
|
||||
sb.AppendLine($" <p class=\"{detailedCss}\">{statusData.DetailedHealth.Status}</p>");
|
||||
sb.AppendLine($" <p>{statusData.DetailedHealth.Description}</p>");
|
||||
sb.AppendLine(" </div>");
|
||||
}
|
||||
|
||||
sb.AppendLine($" <div class=\"footer\">Last updated: {statusData.Timestamp:yyyy-MM-dd HH:mm:ss} UTC | Service: {statusData.ServiceName} v{statusData.Version}</div>");
|
||||
sb.AppendLine("</body>");
|
||||
sb.AppendLine("</html>");
|
||||
|
||||
6
lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2.json
Normal file
6
lmxproxy/src/ZB.MOM.WW.LmxProxy.Host/appsettings.v2.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"GrpcPort": 50100,
|
||||
"WebServer": {
|
||||
"Port": 8081
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"GrpcPort": 50101,
|
||||
"WebServer": {
|
||||
"Port": 8082
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user