190 lines
7.1 KiB
C#
190 lines
7.1 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
|
using Serilog;
|
|
using ZB.MOM.WW.LmxProxy.Host.Domain;
|
|
|
|
namespace ZB.MOM.WW.LmxProxy.Host.Services
|
|
{
|
|
/// <summary>
|
|
/// Health check service for monitoring LmxProxy health
|
|
/// </summary>
|
|
public class HealthCheckService : IHealthCheck
|
|
{
|
|
private static readonly ILogger Logger = Log.ForContext<HealthCheckService>();
|
|
private readonly PerformanceMetrics _performanceMetrics;
|
|
|
|
private readonly IScadaClient _scadaClient;
|
|
private readonly SubscriptionManager _subscriptionManager;
|
|
|
|
public HealthCheckService(
|
|
IScadaClient scadaClient,
|
|
SubscriptionManager subscriptionManager,
|
|
PerformanceMetrics performanceMetrics)
|
|
{
|
|
_scadaClient = scadaClient ?? throw new ArgumentNullException(nameof(scadaClient));
|
|
_subscriptionManager = subscriptionManager ?? throw new ArgumentNullException(nameof(subscriptionManager));
|
|
_performanceMetrics = performanceMetrics ?? throw new ArgumentNullException(nameof(performanceMetrics));
|
|
}
|
|
|
|
public Task<HealthCheckResult> CheckHealthAsync(
|
|
HealthCheckContext context,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var data = new Dictionary<string, object>();
|
|
|
|
try
|
|
{
|
|
// Check SCADA connection
|
|
bool isConnected = _scadaClient.IsConnected;
|
|
ConnectionState connectionState = _scadaClient.ConnectionState;
|
|
data["scada_connected"] = isConnected;
|
|
data["scada_connection_state"] = connectionState.ToString();
|
|
|
|
// Get subscription statistics
|
|
SubscriptionStats subscriptionStats = _subscriptionManager.GetSubscriptionStats();
|
|
data["total_clients"] = subscriptionStats.TotalClients;
|
|
data["total_tags"] = subscriptionStats.TotalTags;
|
|
|
|
// Get performance metrics
|
|
IReadOnlyDictionary<string, OperationMetrics> metrics = _performanceMetrics.GetAllMetrics();
|
|
long totalOperations = 0L;
|
|
double averageSuccessRate = 0.0;
|
|
|
|
foreach (OperationMetrics? metric in metrics.Values)
|
|
{
|
|
MetricsStatistics stats = metric.GetStatistics();
|
|
totalOperations += stats.TotalCount;
|
|
averageSuccessRate += stats.SuccessRate;
|
|
}
|
|
|
|
if (metrics.Count > 0)
|
|
{
|
|
averageSuccessRate /= metrics.Count;
|
|
}
|
|
|
|
data["total_operations"] = totalOperations;
|
|
data["average_success_rate"] = averageSuccessRate;
|
|
|
|
// Determine health status
|
|
if (!isConnected)
|
|
{
|
|
return Task.FromResult(HealthCheckResult.Unhealthy(
|
|
"SCADA client is not connected",
|
|
data: data));
|
|
}
|
|
|
|
if (averageSuccessRate < 0.5 && totalOperations > 100)
|
|
{
|
|
return Task.FromResult(HealthCheckResult.Degraded(
|
|
$"Low success rate: {averageSuccessRate:P}",
|
|
data: data));
|
|
}
|
|
|
|
if (subscriptionStats.TotalClients > 100)
|
|
{
|
|
return Task.FromResult(HealthCheckResult.Degraded(
|
|
$"High client count: {subscriptionStats.TotalClients}",
|
|
data: data));
|
|
}
|
|
|
|
return Task.FromResult(HealthCheckResult.Healthy(
|
|
"LmxProxy is healthy",
|
|
data));
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Logger.Error(ex, "Health check failed");
|
|
data["error"] = ex.Message;
|
|
|
|
return Task.FromResult(HealthCheckResult.Unhealthy(
|
|
"Health check threw an exception",
|
|
ex,
|
|
data));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Detailed health check that performs additional connectivity tests
|
|
/// </summary>
|
|
public class DetailedHealthCheckService : IHealthCheck
|
|
{
|
|
private static readonly ILogger Logger = Log.ForContext<DetailedHealthCheckService>();
|
|
|
|
private readonly IScadaClient _scadaClient;
|
|
private readonly string _testTagAddress;
|
|
|
|
public DetailedHealthCheckService(IScadaClient scadaClient, string testTagAddress = "System.Heartbeat")
|
|
{
|
|
_scadaClient = scadaClient ?? throw new ArgumentNullException(nameof(scadaClient));
|
|
_testTagAddress = testTagAddress;
|
|
}
|
|
|
|
public async Task<HealthCheckResult> CheckHealthAsync(
|
|
HealthCheckContext context,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
var data = new Dictionary<string, object>();
|
|
|
|
try
|
|
{
|
|
// Basic connectivity check
|
|
if (!_scadaClient.IsConnected)
|
|
{
|
|
data["connected"] = false;
|
|
return HealthCheckResult.Unhealthy("SCADA client is not connected", data: data);
|
|
}
|
|
|
|
data["connected"] = true;
|
|
|
|
// Try to read a test tag
|
|
try
|
|
{
|
|
Vtq vtq = await _scadaClient.ReadAsync(_testTagAddress, cancellationToken);
|
|
data["test_tag_quality"] = vtq.Quality.ToString();
|
|
data["test_tag_timestamp"] = vtq.Timestamp;
|
|
|
|
if (vtq.Quality != Quality.Good)
|
|
{
|
|
return HealthCheckResult.Degraded(
|
|
$"Test tag quality is {vtq.Quality}",
|
|
data: data);
|
|
}
|
|
|
|
// Check if timestamp is recent (within last 5 minutes)
|
|
TimeSpan age = DateTime.UtcNow - vtq.Timestamp;
|
|
if (age > TimeSpan.FromMinutes(5))
|
|
{
|
|
data["timestamp_age_minutes"] = age.TotalMinutes;
|
|
return HealthCheckResult.Degraded(
|
|
$"Test tag timestamp is stale ({age.TotalMinutes:F1} minutes old)",
|
|
data: data);
|
|
}
|
|
}
|
|
catch (Exception readEx)
|
|
{
|
|
data["test_tag_error"] = readEx.Message;
|
|
return HealthCheckResult.Degraded(
|
|
"Could not read test tag",
|
|
data: data);
|
|
}
|
|
|
|
return HealthCheckResult.Healthy("All checks passed", data);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Logger.Error(ex, "Detailed health check failed");
|
|
data["error"] = ex.Message;
|
|
|
|
return HealthCheckResult.Unhealthy(
|
|
"Health check threw an exception",
|
|
ex,
|
|
data);
|
|
}
|
|
}
|
|
}
|
|
}
|