feat(lmxproxy): phase 4 — host health monitoring, metrics, status web server

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-03-22 00:14:40 -04:00
parent 16d1b95e9a
commit 9eb81180c0
12 changed files with 1546 additions and 12 deletions

View File

@@ -0,0 +1,205 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using Serilog;
namespace ZB.MOM.WW.LmxProxy.Host.Metrics
{
/// <summary>
/// Disposable scope returned by <see cref="PerformanceMetrics.BeginOperation"/>.
/// </summary>
public interface ITimingScope : IDisposable
{
void SetSuccess(bool success);
}
/// <summary>
/// Statistics snapshot for a single operation type.
/// </summary>
public class MetricsStatistics
{
public long TotalCount { get; set; }
public long SuccessCount { get; set; }
public double SuccessRate { get; set; }
public double AverageMilliseconds { get; set; }
public double MinMilliseconds { get; set; }
public double MaxMilliseconds { get; set; }
public double Percentile95Milliseconds { get; set; }
}
/// <summary>
/// Per-operation timing and success tracking with a rolling buffer for percentile computation.
/// </summary>
public class OperationMetrics
{
private readonly List<double> _durations = new List<double>();
private readonly object _lock = new object();
private long _totalCount;
private long _successCount;
private double _totalMilliseconds;
private double _minMilliseconds = double.MaxValue;
private double _maxMilliseconds;
public void Record(TimeSpan duration, bool success)
{
lock (_lock)
{
_totalCount++;
if (success)
{
_successCount++;
}
var ms = duration.TotalMilliseconds;
_durations.Add(ms);
_totalMilliseconds += ms;
if (ms < _minMilliseconds)
_minMilliseconds = ms;
if (ms > _maxMilliseconds)
_maxMilliseconds = ms;
if (_durations.Count > 1000)
{
_durations.RemoveAt(0);
}
}
}
public MetricsStatistics GetStatistics()
{
lock (_lock)
{
if (_totalCount == 0)
{
return new MetricsStatistics();
}
var sortedDurations = _durations.OrderBy(d => d).ToList();
var p95Index = (int)Math.Ceiling(sortedDurations.Count * 0.95) - 1;
p95Index = Math.Max(0, p95Index);
return new MetricsStatistics
{
TotalCount = _totalCount,
SuccessCount = _successCount,
SuccessRate = (double)_successCount / _totalCount,
AverageMilliseconds = _totalMilliseconds / _totalCount,
MinMilliseconds = _minMilliseconds,
MaxMilliseconds = _maxMilliseconds,
Percentile95Milliseconds = sortedDurations[p95Index]
};
}
}
}
/// <summary>
/// Tracks per-operation performance metrics with periodic logging.
/// </summary>
public class PerformanceMetrics : IDisposable
{
private static readonly ILogger Logger = Log.ForContext<PerformanceMetrics>();
private readonly ConcurrentDictionary<string, OperationMetrics> _metrics
= new ConcurrentDictionary<string, OperationMetrics>(StringComparer.OrdinalIgnoreCase);
private readonly Timer _reportingTimer;
private bool _disposed;
public PerformanceMetrics()
{
_reportingTimer = new Timer(ReportMetrics, null,
TimeSpan.FromSeconds(60), TimeSpan.FromSeconds(60));
}
public void RecordOperation(string operationName, TimeSpan duration, bool success = true)
{
var metrics = _metrics.GetOrAdd(operationName, _ => new OperationMetrics());
metrics.Record(duration, success);
}
public ITimingScope BeginOperation(string operationName)
{
return new TimingScope(this, operationName);
}
public OperationMetrics? GetMetrics(string operationName)
{
return _metrics.TryGetValue(operationName, out var metrics) ? metrics : null;
}
public IReadOnlyDictionary<string, OperationMetrics> GetAllMetrics()
{
return _metrics;
}
public Dictionary<string, MetricsStatistics> GetStatistics()
{
var result = new Dictionary<string, MetricsStatistics>(StringComparer.OrdinalIgnoreCase);
foreach (var kvp in _metrics)
{
result[kvp.Key] = kvp.Value.GetStatistics();
}
return result;
}
private void ReportMetrics(object? state)
{
foreach (var kvp in _metrics)
{
var stats = kvp.Value.GetStatistics();
if (stats.TotalCount == 0) continue;
Logger.Information(
"Metrics: {Operation} — Count={Count}, SuccessRate={SuccessRate:P1}, " +
"AvgMs={AverageMs:F1}, MinMs={MinMs:F1}, MaxMs={MaxMs:F1}, P95Ms={P95Ms:F1}",
kvp.Key, stats.TotalCount, stats.SuccessRate,
stats.AverageMilliseconds, stats.MinMilliseconds,
stats.MaxMilliseconds, stats.Percentile95Milliseconds);
}
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_reportingTimer.Dispose();
ReportMetrics(null);
}
/// <summary>
/// Disposable timing scope that records duration on dispose.
/// </summary>
private class TimingScope : ITimingScope
{
private readonly PerformanceMetrics _metrics;
private readonly string _operationName;
private readonly Stopwatch _stopwatch;
private bool _success = true;
private bool _disposed;
public TimingScope(PerformanceMetrics metrics, string operationName)
{
_metrics = metrics;
_operationName = operationName;
_stopwatch = Stopwatch.StartNew();
}
public void SetSuccess(bool success)
{
_success = success;
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_stopwatch.Stop();
_metrics.RecordOperation(_operationName, _stopwatch.Elapsed, _success);
}
}
}
}