using System; using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Diagnostics.HealthChecks; using Serilog; using ZB.MOM.WW.LmxProxy.Host.Domain; namespace ZB.MOM.WW.LmxProxy.Host.Services { /// /// Health check service for monitoring LmxProxy health /// public class HealthCheckService : IHealthCheck { private static readonly ILogger Logger = Log.ForContext(); private readonly PerformanceMetrics _performanceMetrics; private readonly IScadaClient _scadaClient; private readonly SubscriptionManager _subscriptionManager; public HealthCheckService( IScadaClient scadaClient, SubscriptionManager subscriptionManager, PerformanceMetrics performanceMetrics) { _scadaClient = scadaClient ?? throw new ArgumentNullException(nameof(scadaClient)); _subscriptionManager = subscriptionManager ?? throw new ArgumentNullException(nameof(subscriptionManager)); _performanceMetrics = performanceMetrics ?? throw new ArgumentNullException(nameof(performanceMetrics)); } public Task CheckHealthAsync( HealthCheckContext context, CancellationToken cancellationToken = default) { var data = new Dictionary(); try { // Check SCADA connection bool isConnected = _scadaClient.IsConnected; ConnectionState connectionState = _scadaClient.ConnectionState; data["scada_connected"] = isConnected; data["scada_connection_state"] = connectionState.ToString(); // Get subscription statistics SubscriptionStats subscriptionStats = _subscriptionManager.GetSubscriptionStats(); data["total_clients"] = subscriptionStats.TotalClients; data["total_tags"] = subscriptionStats.TotalTags; // Get performance metrics IReadOnlyDictionary metrics = _performanceMetrics.GetAllMetrics(); long totalOperations = 0L; double averageSuccessRate = 0.0; foreach (OperationMetrics? metric in metrics.Values) { MetricsStatistics stats = metric.GetStatistics(); totalOperations += stats.TotalCount; averageSuccessRate += stats.SuccessRate; } if (metrics.Count > 0) { averageSuccessRate /= metrics.Count; } data["total_operations"] = totalOperations; data["average_success_rate"] = averageSuccessRate; // Determine health status if (!isConnected) { return Task.FromResult(HealthCheckResult.Unhealthy( "SCADA client is not connected", data: data)); } if (averageSuccessRate < 0.5 && totalOperations > 100) { return Task.FromResult(HealthCheckResult.Degraded( $"Low success rate: {averageSuccessRate:P}", data: data)); } if (subscriptionStats.TotalClients > 100) { return Task.FromResult(HealthCheckResult.Degraded( $"High client count: {subscriptionStats.TotalClients}", data: data)); } return Task.FromResult(HealthCheckResult.Healthy( "LmxProxy is healthy", data)); } catch (Exception ex) { Logger.Error(ex, "Health check failed"); data["error"] = ex.Message; return Task.FromResult(HealthCheckResult.Unhealthy( "Health check threw an exception", ex, data)); } } } /// /// Detailed health check that performs additional connectivity tests /// public class DetailedHealthCheckService : IHealthCheck { private static readonly ILogger Logger = Log.ForContext(); private readonly IScadaClient _scadaClient; private readonly string _testTagAddress; public DetailedHealthCheckService(IScadaClient scadaClient, string testTagAddress = "System.Heartbeat") { _scadaClient = scadaClient ?? throw new ArgumentNullException(nameof(scadaClient)); _testTagAddress = testTagAddress; } public async Task CheckHealthAsync( HealthCheckContext context, CancellationToken cancellationToken = default) { var data = new Dictionary(); try { // Basic connectivity check if (!_scadaClient.IsConnected) { data["connected"] = false; return HealthCheckResult.Unhealthy("SCADA client is not connected", data: data); } data["connected"] = true; // Try to read a test tag try { Vtq vtq = await _scadaClient.ReadAsync(_testTagAddress, cancellationToken); data["test_tag_quality"] = vtq.Quality.ToString(); data["test_tag_timestamp"] = vtq.Timestamp; if (vtq.Quality != Quality.Good) { return HealthCheckResult.Degraded( $"Test tag quality is {vtq.Quality}", data: data); } // Check if timestamp is recent (within last 5 minutes) TimeSpan age = DateTime.UtcNow - vtq.Timestamp; if (age > TimeSpan.FromMinutes(5)) { data["timestamp_age_minutes"] = age.TotalMinutes; return HealthCheckResult.Degraded( $"Test tag timestamp is stale ({age.TotalMinutes:F1} minutes old)", data: data); } } catch (Exception readEx) { data["test_tag_error"] = readEx.Message; return HealthCheckResult.Degraded( "Could not read test tag", data: data); } return HealthCheckResult.Healthy("All checks passed", data); } catch (Exception ex) { Logger.Error(ex, "Detailed health check failed"); data["error"] = ex.Message; return HealthCheckResult.Unhealthy( "Health check threw an exception", ex, data); } } } }