using System;
using System.Collections.Generic;
using System.Linq;
using ZB.MOM.WW.OtOpcUa.Host.Configuration;
using ZB.MOM.WW.OtOpcUa.Host.Historian;
namespace ZB.MOM.WW.OtOpcUa.Historian.Aveva
{
///
/// Thread-safe, pure-logic endpoint picker for the Wonderware Historian cluster. Tracks which
/// configured nodes are healthy, places failed nodes in a time-bounded cooldown, and hands
/// out an ordered list of eligible candidates for the data source to try in sequence.
///
///
/// Design notes:
///
/// - No SDK dependency — fully unit-testable with an injected clock.
/// - Per-node state is guarded by a single lock; operations are microsecond-scale
/// so contention is a non-issue.
/// - Cooldown is purely passive: a node re-enters the healthy pool the next time
/// it is queried after its cooldown window elapses. There is no background probe.
/// - Nodes are returned in configuration order so operators can express a
/// preference (primary first, fallback second).
/// - When is empty, the picker is
/// initialized with a single entry from
/// so legacy deployments continue to work unchanged.
///
///
internal sealed class HistorianClusterEndpointPicker
{
private readonly Func _clock;
private readonly TimeSpan _cooldown;
private readonly object _lock = new object();
private readonly List _nodes;
public HistorianClusterEndpointPicker(HistorianConfiguration config)
: this(config, () => DateTime.UtcNow) { }
internal HistorianClusterEndpointPicker(HistorianConfiguration config, Func clock)
{
_clock = clock ?? throw new ArgumentNullException(nameof(clock));
_cooldown = TimeSpan.FromSeconds(Math.Max(0, config.FailureCooldownSeconds));
var names = (config.ServerNames != null && config.ServerNames.Count > 0)
? config.ServerNames
: new List { config.ServerName };
_nodes = names
.Where(n => !string.IsNullOrWhiteSpace(n))
.Select(n => n.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.Select(n => new NodeEntry { Name = n })
.ToList();
}
///
/// Gets the total number of configured cluster nodes. Stable — nodes are never added
/// or removed after construction.
///
public int NodeCount
{
get
{
lock (_lock)
return _nodes.Count;
}
}
///
/// Returns an ordered snapshot of nodes currently eligible for a connection attempt,
/// with any node whose cooldown has elapsed automatically restored to the pool.
/// An empty list means all nodes are in active cooldown.
///
public IReadOnlyList GetHealthyNodes()
{
lock (_lock)
{
var now = _clock();
return _nodes
.Where(n => IsHealthyAt(n, now))
.Select(n => n.Name)
.ToList();
}
}
///
/// Gets the count of nodes currently eligible for a connection attempt (i.e., not in cooldown).
///
public int HealthyNodeCount
{
get
{
lock (_lock)
{
var now = _clock();
return _nodes.Count(n => IsHealthyAt(n, now));
}
}
}
///
/// Places into cooldown starting at the current clock time.
/// Increments the node's failure counter and stores the latest error message for
/// surfacing on the dashboard. Unknown node names are ignored.
///
public void MarkFailed(string node, string? error)
{
lock (_lock)
{
var entry = FindEntry(node);
if (entry == null)
return;
var now = _clock();
entry.FailureCount++;
entry.LastError = error;
entry.LastFailureTime = now;
entry.CooldownUntil = _cooldown.TotalMilliseconds > 0 ? now + _cooldown : (DateTime?)null;
}
}
///
/// Marks as healthy immediately — clears any active cooldown but
/// leaves the cumulative failure counter intact for operator diagnostics. Unknown node
/// names are ignored.
///
public void MarkHealthy(string node)
{
lock (_lock)
{
var entry = FindEntry(node);
if (entry == null)
return;
entry.CooldownUntil = null;
}
}
///
/// Captures the current per-node state for the health dashboard. Freshly computed from
/// so recently-expired cooldowns are reported as healthy.
///
public List SnapshotNodeStates()
{
lock (_lock)
{
var now = _clock();
return _nodes.Select(n => new HistorianClusterNodeState
{
Name = n.Name,
IsHealthy = IsHealthyAt(n, now),
CooldownUntil = IsHealthyAt(n, now) ? null : n.CooldownUntil,
FailureCount = n.FailureCount,
LastError = n.LastError,
LastFailureTime = n.LastFailureTime
}).ToList();
}
}
private static bool IsHealthyAt(NodeEntry entry, DateTime now)
{
return entry.CooldownUntil == null || entry.CooldownUntil <= now;
}
private NodeEntry? FindEntry(string node)
{
for (var i = 0; i < _nodes.Count; i++)
if (string.Equals(_nodes[i].Name, node, StringComparison.OrdinalIgnoreCase))
return _nodes[i];
return null;
}
private sealed class NodeEntry
{
public string Name { get; set; } = "";
public DateTime? CooldownUntil { get; set; }
public int FailureCount { get; set; }
public string? LastError { get; set; }
public DateTime? LastFailureTime { get; set; }
}
}
}