feat(lmxproxy): phase 4 — host health monitoring, metrics, status web server
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,205 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using Serilog;
|
||||
|
||||
namespace ZB.MOM.WW.LmxProxy.Host.Metrics
|
||||
{
|
||||
/// <summary>
|
||||
/// Disposable scope returned by <see cref="PerformanceMetrics.BeginOperation"/>.
|
||||
/// </summary>
|
||||
public interface ITimingScope : IDisposable
|
||||
{
|
||||
void SetSuccess(bool success);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Statistics snapshot for a single operation type.
|
||||
/// </summary>
|
||||
public class MetricsStatistics
|
||||
{
|
||||
public long TotalCount { get; set; }
|
||||
public long SuccessCount { get; set; }
|
||||
public double SuccessRate { get; set; }
|
||||
public double AverageMilliseconds { get; set; }
|
||||
public double MinMilliseconds { get; set; }
|
||||
public double MaxMilliseconds { get; set; }
|
||||
public double Percentile95Milliseconds { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Per-operation timing and success tracking with a rolling buffer for percentile computation.
|
||||
/// </summary>
|
||||
public class OperationMetrics
|
||||
{
|
||||
private readonly List<double> _durations = new List<double>();
|
||||
private readonly object _lock = new object();
|
||||
private long _totalCount;
|
||||
private long _successCount;
|
||||
private double _totalMilliseconds;
|
||||
private double _minMilliseconds = double.MaxValue;
|
||||
private double _maxMilliseconds;
|
||||
|
||||
public void Record(TimeSpan duration, bool success)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_totalCount++;
|
||||
if (success)
|
||||
{
|
||||
_successCount++;
|
||||
}
|
||||
|
||||
var ms = duration.TotalMilliseconds;
|
||||
_durations.Add(ms);
|
||||
_totalMilliseconds += ms;
|
||||
|
||||
if (ms < _minMilliseconds)
|
||||
_minMilliseconds = ms;
|
||||
if (ms > _maxMilliseconds)
|
||||
_maxMilliseconds = ms;
|
||||
|
||||
if (_durations.Count > 1000)
|
||||
{
|
||||
_durations.RemoveAt(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public MetricsStatistics GetStatistics()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_totalCount == 0)
|
||||
{
|
||||
return new MetricsStatistics();
|
||||
}
|
||||
|
||||
var sortedDurations = _durations.OrderBy(d => d).ToList();
|
||||
var p95Index = (int)Math.Ceiling(sortedDurations.Count * 0.95) - 1;
|
||||
p95Index = Math.Max(0, p95Index);
|
||||
|
||||
return new MetricsStatistics
|
||||
{
|
||||
TotalCount = _totalCount,
|
||||
SuccessCount = _successCount,
|
||||
SuccessRate = (double)_successCount / _totalCount,
|
||||
AverageMilliseconds = _totalMilliseconds / _totalCount,
|
||||
MinMilliseconds = _minMilliseconds,
|
||||
MaxMilliseconds = _maxMilliseconds,
|
||||
Percentile95Milliseconds = sortedDurations[p95Index]
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tracks per-operation performance metrics with periodic logging.
|
||||
/// </summary>
|
||||
public class PerformanceMetrics : IDisposable
|
||||
{
|
||||
private static readonly ILogger Logger = Log.ForContext<PerformanceMetrics>();
|
||||
|
||||
private readonly ConcurrentDictionary<string, OperationMetrics> _metrics
|
||||
= new ConcurrentDictionary<string, OperationMetrics>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private readonly Timer _reportingTimer;
|
||||
private bool _disposed;
|
||||
|
||||
public PerformanceMetrics()
|
||||
{
|
||||
_reportingTimer = new Timer(ReportMetrics, null,
|
||||
TimeSpan.FromSeconds(60), TimeSpan.FromSeconds(60));
|
||||
}
|
||||
|
||||
public void RecordOperation(string operationName, TimeSpan duration, bool success = true)
|
||||
{
|
||||
var metrics = _metrics.GetOrAdd(operationName, _ => new OperationMetrics());
|
||||
metrics.Record(duration, success);
|
||||
}
|
||||
|
||||
public ITimingScope BeginOperation(string operationName)
|
||||
{
|
||||
return new TimingScope(this, operationName);
|
||||
}
|
||||
|
||||
public OperationMetrics? GetMetrics(string operationName)
|
||||
{
|
||||
return _metrics.TryGetValue(operationName, out var metrics) ? metrics : null;
|
||||
}
|
||||
|
||||
public IReadOnlyDictionary<string, OperationMetrics> GetAllMetrics()
|
||||
{
|
||||
return _metrics;
|
||||
}
|
||||
|
||||
public Dictionary<string, MetricsStatistics> GetStatistics()
|
||||
{
|
||||
var result = new Dictionary<string, MetricsStatistics>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var kvp in _metrics)
|
||||
{
|
||||
result[kvp.Key] = kvp.Value.GetStatistics();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private void ReportMetrics(object? state)
|
||||
{
|
||||
foreach (var kvp in _metrics)
|
||||
{
|
||||
var stats = kvp.Value.GetStatistics();
|
||||
if (stats.TotalCount == 0) continue;
|
||||
|
||||
Logger.Information(
|
||||
"Metrics: {Operation} — Count={Count}, SuccessRate={SuccessRate:P1}, " +
|
||||
"AvgMs={AverageMs:F1}, MinMs={MinMs:F1}, MaxMs={MaxMs:F1}, P95Ms={P95Ms:F1}",
|
||||
kvp.Key, stats.TotalCount, stats.SuccessRate,
|
||||
stats.AverageMilliseconds, stats.MinMilliseconds,
|
||||
stats.MaxMilliseconds, stats.Percentile95Milliseconds);
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_reportingTimer.Dispose();
|
||||
ReportMetrics(null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disposable timing scope that records duration on dispose.
|
||||
/// </summary>
|
||||
private class TimingScope : ITimingScope
|
||||
{
|
||||
private readonly PerformanceMetrics _metrics;
|
||||
private readonly string _operationName;
|
||||
private readonly Stopwatch _stopwatch;
|
||||
private bool _success = true;
|
||||
private bool _disposed;
|
||||
|
||||
public TimingScope(PerformanceMetrics metrics, string operationName)
|
||||
{
|
||||
_metrics = metrics;
|
||||
_operationName = operationName;
|
||||
_stopwatch = Stopwatch.StartNew();
|
||||
}
|
||||
|
||||
public void SetSuccess(bool success)
|
||||
{
|
||||
_success = success;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_stopwatch.Stop();
|
||||
_metrics.RecordOperation(_operationName, _stopwatch.Elapsed, _success);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user