feat(lmxproxy): phase 4 — host health monitoring, metrics, status web server

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-03-22 00:14:40 -04:00
parent 16d1b95e9a
commit 9eb81180c0
12 changed files with 1546 additions and 12 deletions

View File

@@ -0,0 +1,238 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using FluentAssertions;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Xunit;
using ZB.MOM.WW.LmxProxy.Host.Domain;
using ZB.MOM.WW.LmxProxy.Host.Health;
using HealthCheckService = ZB.MOM.WW.LmxProxy.Host.Health.HealthCheckService;
using ZB.MOM.WW.LmxProxy.Host.Metrics;
using ZB.MOM.WW.LmxProxy.Host.Subscriptions;
namespace ZB.MOM.WW.LmxProxy.Host.Tests.Health
{
public class HealthCheckServiceTests
{
private class FakeScadaClient : IScadaClient
{
public bool IsConnected { get; set; } = true;
public ConnectionState ConnectionState { get; set; } = ConnectionState.Connected;
public event EventHandler<ConnectionStateChangedEventArgs>? ConnectionStateChanged;
public Task ConnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task DisconnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task<Vtq> ReadAsync(string address, CancellationToken ct = default) =>
Task.FromResult(Vtq.Good(42.0));
public Task<IReadOnlyDictionary<string, Vtq>> ReadBatchAsync(IEnumerable<string> addresses, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyDictionary<string, Vtq>>(new Dictionary<string, Vtq>());
public Task WriteAsync(string address, object value, CancellationToken ct = default) => Task.CompletedTask;
public Task WriteBatchAsync(IReadOnlyDictionary<string, object> values, CancellationToken ct = default) => Task.CompletedTask;
public Task<(bool flagReached, int elapsedMs)> WriteBatchAndWaitAsync(
IReadOnlyDictionary<string, object> values, string flagTag, object flagValue,
int timeoutMs, int pollIntervalMs, CancellationToken ct = default) =>
Task.FromResult((false, 0));
public Task<IAsyncDisposable> SubscribeAsync(IEnumerable<string> addresses, Action<string, Vtq> callback, CancellationToken ct = default) =>
Task.FromResult<IAsyncDisposable>(new FakeHandle());
public ValueTask DisposeAsync() => default;
internal void FireEvent() => ConnectionStateChanged?.Invoke(this, null!);
private class FakeHandle : IAsyncDisposable { public ValueTask DisposeAsync() => default; }
}
[Fact]
public async Task ReturnsHealthy_WhenConnectedAndNormalMetrics()
{
var client = new FakeScadaClient { IsConnected = true, ConnectionState = ConnectionState.Connected };
using var sm = new SubscriptionManager(client);
using var pm = new PerformanceMetrics();
pm.RecordOperation("Read", TimeSpan.FromMilliseconds(10), true);
var svc = new HealthCheckService(client, sm, pm);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Healthy);
}
[Fact]
public async Task ReturnsUnhealthy_WhenNotConnected()
{
var client = new FakeScadaClient { IsConnected = false, ConnectionState = ConnectionState.Disconnected };
using var sm = new SubscriptionManager(client);
using var pm = new PerformanceMetrics();
var svc = new HealthCheckService(client, sm, pm);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Unhealthy);
result.Description.Should().Contain("not connected");
}
[Fact]
public async Task ReturnsDegraded_WhenSuccessRateBelow50Percent()
{
var client = new FakeScadaClient { IsConnected = true };
using var sm = new SubscriptionManager(client);
using var pm = new PerformanceMetrics();
// Record 200 operations with 40% success rate
for (int i = 0; i < 80; i++)
pm.RecordOperation("Read", TimeSpan.FromMilliseconds(10), true);
for (int i = 0; i < 120; i++)
pm.RecordOperation("Read", TimeSpan.FromMilliseconds(10), false);
var svc = new HealthCheckService(client, sm, pm);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Degraded);
result.Description.Should().Contain("success rate");
}
[Fact]
public async Task ReturnsDegraded_WhenClientCountOver100()
{
var client = new FakeScadaClient { IsConnected = true };
using var sm = new SubscriptionManager(client);
using var pm = new PerformanceMetrics();
// Create 101 subscriptions to exceed the threshold
for (int i = 0; i < 101; i++)
{
using var cts = new CancellationTokenSource();
sm.Subscribe("client-" + i, new[] { "tag1" }, cts.Token);
}
var svc = new HealthCheckService(client, sm, pm);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Degraded);
result.Description.Should().Contain("client count");
}
[Fact]
public async Task DoesNotFlagLowSuccessRate_Under100Operations()
{
var client = new FakeScadaClient { IsConnected = true };
using var sm = new SubscriptionManager(client);
using var pm = new PerformanceMetrics();
// Record 50 operations with 0% success rate (under 100 threshold)
for (int i = 0; i < 50; i++)
pm.RecordOperation("Read", TimeSpan.FromMilliseconds(10), false);
var svc = new HealthCheckService(client, sm, pm);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Healthy);
}
}
public class DetailedHealthCheckServiceTests
{
private class FakeScadaClient : IScadaClient
{
public bool IsConnected { get; set; } = true;
public ConnectionState ConnectionState { get; set; } = ConnectionState.Connected;
public Vtq? ReadResult { get; set; }
public Exception? ReadException { get; set; }
public event EventHandler<ConnectionStateChangedEventArgs>? ConnectionStateChanged;
public Task ConnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task DisconnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task<Vtq> ReadAsync(string address, CancellationToken ct = default)
{
if (ReadException != null) throw ReadException;
return Task.FromResult(ReadResult ?? Vtq.Good(true));
}
public Task<IReadOnlyDictionary<string, Vtq>> ReadBatchAsync(IEnumerable<string> addresses, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyDictionary<string, Vtq>>(new Dictionary<string, Vtq>());
public Task WriteAsync(string address, object value, CancellationToken ct = default) => Task.CompletedTask;
public Task WriteBatchAsync(IReadOnlyDictionary<string, object> values, CancellationToken ct = default) => Task.CompletedTask;
public Task<(bool flagReached, int elapsedMs)> WriteBatchAndWaitAsync(
IReadOnlyDictionary<string, object> values, string flagTag, object flagValue,
int timeoutMs, int pollIntervalMs, CancellationToken ct = default) =>
Task.FromResult((false, 0));
public Task<IAsyncDisposable> SubscribeAsync(IEnumerable<string> addresses, Action<string, Vtq> callback, CancellationToken ct = default) =>
Task.FromResult<IAsyncDisposable>(new FakeHandle());
public ValueTask DisposeAsync() => default;
internal void FireEvent() => ConnectionStateChanged?.Invoke(this, null!);
private class FakeHandle : IAsyncDisposable { public ValueTask DisposeAsync() => default; }
}
[Fact]
public async Task ReturnsUnhealthy_WhenNotConnected()
{
var client = new FakeScadaClient { IsConnected = false };
var svc = new DetailedHealthCheckService(client);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Unhealthy);
}
[Fact]
public async Task ReturnsHealthy_WhenTestTagGoodAndRecent()
{
var client = new FakeScadaClient
{
IsConnected = true,
ReadResult = Vtq.New(true, DateTime.UtcNow, Quality.Good)
};
var svc = new DetailedHealthCheckService(client);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Healthy);
}
[Fact]
public async Task ReturnsDegraded_WhenTestTagQualityNotGood()
{
var client = new FakeScadaClient
{
IsConnected = true,
ReadResult = Vtq.New(true, DateTime.UtcNow, Quality.Uncertain)
};
var svc = new DetailedHealthCheckService(client);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Degraded);
}
[Fact]
public async Task ReturnsDegraded_WhenTestTagTimestampStale()
{
var client = new FakeScadaClient
{
IsConnected = true,
ReadResult = Vtq.New(true, DateTime.UtcNow.AddMinutes(-10), Quality.Good)
};
var svc = new DetailedHealthCheckService(client);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Degraded);
result.Description.Should().Contain("stale");
}
[Fact]
public async Task ReturnsDegraded_WhenTestTagReadThrows()
{
var client = new FakeScadaClient
{
IsConnected = true,
ReadException = new InvalidOperationException("COM error")
};
var svc = new DetailedHealthCheckService(client);
var result = await svc.CheckHealthAsync(new HealthCheckContext());
result.Status.Should().Be(HealthStatus.Degraded);
result.Description.Should().Contain("Could not read test tag");
}
}
}

View File

@@ -0,0 +1,147 @@
using System;
using System.Threading.Tasks;
using FluentAssertions;
using Xunit;
using ZB.MOM.WW.LmxProxy.Host.Metrics;
namespace ZB.MOM.WW.LmxProxy.Host.Tests.Metrics
{
public class PerformanceMetricsTests
{
[Fact]
public void RecordOperation_TracksCountAndDuration()
{
using var metrics = new PerformanceMetrics();
for (int i = 0; i < 5; i++)
{
metrics.RecordOperation("TestOp", TimeSpan.FromMilliseconds(10), true);
}
var stats = metrics.GetStatistics();
stats.Should().ContainKey("TestOp");
stats["TestOp"].TotalCount.Should().Be(5);
}
[Fact]
public void RecordOperation_TracksSuccessAndFailure()
{
using var metrics = new PerformanceMetrics();
for (int i = 0; i < 3; i++)
{
metrics.RecordOperation("TestOp", TimeSpan.FromMilliseconds(10), true);
}
for (int i = 0; i < 2; i++)
{
metrics.RecordOperation("TestOp", TimeSpan.FromMilliseconds(10), false);
}
var stats = metrics.GetStatistics();
stats["TestOp"].SuccessRate.Should().BeApproximately(0.6, 0.001);
}
[Fact]
public void GetStatistics_CalculatesP95Correctly()
{
using var metrics = new PerformanceMetrics();
for (int i = 1; i <= 100; i++)
{
metrics.RecordOperation("TestOp", TimeSpan.FromMilliseconds(i), true);
}
var stats = metrics.GetStatistics();
stats["TestOp"].Percentile95Milliseconds.Should().BeApproximately(95.0, 1.0);
}
[Fact]
public void RollingBuffer_CapsAt1000Samples()
{
using var metrics = new PerformanceMetrics();
for (int i = 0; i < 1500; i++)
{
metrics.RecordOperation("TestOp", TimeSpan.FromMilliseconds(i), true);
}
var stats = metrics.GetStatistics();
// TotalCount tracks all 1500 but percentile is computed from the last 1000
stats["TestOp"].TotalCount.Should().Be(1500);
// The rolling buffer should have entries from 500-1499
// P95 of 500..1499 should be around 1449
stats["TestOp"].Percentile95Milliseconds.Should().BeGreaterThan(1000);
}
[Fact]
public void BeginOperation_RecordsDurationOnDispose()
{
using var metrics = new PerformanceMetrics();
using (var scope = metrics.BeginOperation("TestOp"))
{
System.Threading.Thread.Sleep(50);
}
var stats = metrics.GetStatistics();
stats.Should().ContainKey("TestOp");
stats["TestOp"].TotalCount.Should().Be(1);
stats["TestOp"].AverageMilliseconds.Should().BeGreaterOrEqualTo(40);
}
[Fact]
public void TimingScope_DefaultsToSuccess()
{
using var metrics = new PerformanceMetrics();
using (metrics.BeginOperation("TestOp"))
{
// Do nothing — default is success
}
var stats = metrics.GetStatistics();
stats["TestOp"].SuccessCount.Should().Be(1);
}
[Fact]
public void TimingScope_RespectsSetSuccessFalse()
{
using var metrics = new PerformanceMetrics();
using (var scope = metrics.BeginOperation("TestOp"))
{
scope.SetSuccess(false);
}
var stats = metrics.GetStatistics();
stats["TestOp"].SuccessCount.Should().Be(0);
stats["TestOp"].TotalCount.Should().Be(1);
}
[Fact]
public void GetMetrics_ReturnsNullForUnknownOperation()
{
using var metrics = new PerformanceMetrics();
var result = metrics.GetMetrics("DoesNotExist");
result.Should().BeNull();
}
[Fact]
public void GetAllMetrics_ReturnsAllTrackedOperations()
{
using var metrics = new PerformanceMetrics();
metrics.RecordOperation("Read", TimeSpan.FromMilliseconds(10), true);
metrics.RecordOperation("Write", TimeSpan.FromMilliseconds(20), true);
metrics.RecordOperation("Subscribe", TimeSpan.FromMilliseconds(5), true);
var all = metrics.GetAllMetrics();
all.Should().ContainKey("Read");
all.Should().ContainKey("Write");
all.Should().ContainKey("Subscribe");
all.Count.Should().Be(3);
}
}
}

View File

@@ -0,0 +1,130 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using FluentAssertions;
using Newtonsoft.Json.Linq;
using Xunit;
using ZB.MOM.WW.LmxProxy.Host.Domain;
using ZB.MOM.WW.LmxProxy.Host.Health;
using HealthCheckService = ZB.MOM.WW.LmxProxy.Host.Health.HealthCheckService;
using ZB.MOM.WW.LmxProxy.Host.Metrics;
using ZB.MOM.WW.LmxProxy.Host.Status;
using ZB.MOM.WW.LmxProxy.Host.Subscriptions;
namespace ZB.MOM.WW.LmxProxy.Host.Tests.Status
{
public class StatusReportServiceTests
{
private class FakeScadaClient : IScadaClient
{
public bool IsConnected { get; set; } = true;
public ConnectionState ConnectionState { get; set; } = ConnectionState.Connected;
public event EventHandler<ConnectionStateChangedEventArgs>? ConnectionStateChanged;
public Task ConnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task DisconnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task<Vtq> ReadAsync(string address, CancellationToken ct = default) =>
Task.FromResult(Vtq.Good(42.0));
public Task<IReadOnlyDictionary<string, Vtq>> ReadBatchAsync(IEnumerable<string> addresses, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyDictionary<string, Vtq>>(new Dictionary<string, Vtq>());
public Task WriteAsync(string address, object value, CancellationToken ct = default) => Task.CompletedTask;
public Task WriteBatchAsync(IReadOnlyDictionary<string, object> values, CancellationToken ct = default) => Task.CompletedTask;
public Task<(bool flagReached, int elapsedMs)> WriteBatchAndWaitAsync(
IReadOnlyDictionary<string, object> values, string flagTag, object flagValue,
int timeoutMs, int pollIntervalMs, CancellationToken ct = default) =>
Task.FromResult((false, 0));
public Task<IAsyncDisposable> SubscribeAsync(IEnumerable<string> addresses, Action<string, Vtq> callback, CancellationToken ct = default) =>
Task.FromResult<IAsyncDisposable>(new FakeHandle());
public ValueTask DisposeAsync() => default;
internal void FireEvent() => ConnectionStateChanged?.Invoke(this, null!);
private class FakeHandle : IAsyncDisposable { public ValueTask DisposeAsync() => default; }
}
private (StatusReportService svc, PerformanceMetrics pm, SubscriptionManager sm) CreateService(
bool connected = true)
{
var client = new FakeScadaClient
{
IsConnected = connected,
ConnectionState = connected ? ConnectionState.Connected : ConnectionState.Disconnected
};
var sm = new SubscriptionManager(client);
var pm = new PerformanceMetrics();
var health = new HealthCheckService(client, sm, pm);
var detailed = new DetailedHealthCheckService(client);
var svc = new StatusReportService(client, sm, pm, health, detailed);
return (svc, pm, sm);
}
[Fact]
public async Task GenerateJsonReportAsync_ReturnsCamelCaseJson()
{
var (svc, pm, sm) = CreateService();
using (pm) using (sm)
{
var json = await svc.GenerateJsonReportAsync();
json.Should().Contain("\"serviceName\"");
json.Should().Contain("\"connection\"");
json.Should().Contain("\"isConnected\"");
}
}
[Fact]
public async Task GenerateHtmlReportAsync_ContainsAutoRefresh()
{
var (svc, pm, sm) = CreateService();
using (pm) using (sm)
{
var html = await svc.GenerateHtmlReportAsync();
html.Should().Contain("<meta http-equiv=\"refresh\" content=\"30\">");
}
}
[Fact]
public async Task IsHealthyAsync_ReturnsTrueWhenHealthy()
{
var (svc, pm, sm) = CreateService(connected: true);
using (pm) using (sm)
{
var result = await svc.IsHealthyAsync();
result.Should().BeTrue();
}
}
[Fact]
public async Task IsHealthyAsync_ReturnsFalseWhenUnhealthy()
{
var (svc, pm, sm) = CreateService(connected: false);
using (pm) using (sm)
{
var result = await svc.IsHealthyAsync();
result.Should().BeFalse();
}
}
[Fact]
public async Task GenerateJsonReportAsync_IncludesPerformanceMetrics()
{
var (svc, pm, sm) = CreateService();
using (pm) using (sm)
{
pm.RecordOperation("Read", TimeSpan.FromMilliseconds(15), true);
pm.RecordOperation("Write", TimeSpan.FromMilliseconds(25), true);
var json = await svc.GenerateJsonReportAsync();
var parsed = JObject.Parse(json);
var operations = parsed["performance"]?["operations"];
operations.Should().NotBeNull();
// Newtonsoft CamelCasePropertyNamesContractResolver camelCases dictionary keys
operations!["read"].Should().NotBeNull();
operations!["write"].Should().NotBeNull();
((long)operations!["read"]!["totalCount"]!).Should().Be(1);
}
}
}
}