using System; using System.Collections.Generic; using System.Linq; using ZB.MOM.WW.OtOpcUa.Host.Configuration; using ZB.MOM.WW.OtOpcUa.Host.Historian; namespace ZB.MOM.WW.OtOpcUa.Historian.Aveva { /// /// Thread-safe, pure-logic endpoint picker for the Wonderware Historian cluster. Tracks which /// configured nodes are healthy, places failed nodes in a time-bounded cooldown, and hands /// out an ordered list of eligible candidates for the data source to try in sequence. /// /// /// Design notes: /// /// No SDK dependency — fully unit-testable with an injected clock. /// Per-node state is guarded by a single lock; operations are microsecond-scale /// so contention is a non-issue. /// Cooldown is purely passive: a node re-enters the healthy pool the next time /// it is queried after its cooldown window elapses. There is no background probe. /// Nodes are returned in configuration order so operators can express a /// preference (primary first, fallback second). /// When is empty, the picker is /// initialized with a single entry from /// so legacy deployments continue to work unchanged. /// /// internal sealed class HistorianClusterEndpointPicker { private readonly Func _clock; private readonly TimeSpan _cooldown; private readonly object _lock = new object(); private readonly List _nodes; public HistorianClusterEndpointPicker(HistorianConfiguration config) : this(config, () => DateTime.UtcNow) { } internal HistorianClusterEndpointPicker(HistorianConfiguration config, Func clock) { _clock = clock ?? throw new ArgumentNullException(nameof(clock)); _cooldown = TimeSpan.FromSeconds(Math.Max(0, config.FailureCooldownSeconds)); var names = (config.ServerNames != null && config.ServerNames.Count > 0) ? config.ServerNames : new List { config.ServerName }; _nodes = names .Where(n => !string.IsNullOrWhiteSpace(n)) .Select(n => n.Trim()) .Distinct(StringComparer.OrdinalIgnoreCase) .Select(n => new NodeEntry { Name = n }) .ToList(); } /// /// Gets the total number of configured cluster nodes. Stable — nodes are never added /// or removed after construction. /// public int NodeCount { get { lock (_lock) return _nodes.Count; } } /// /// Returns an ordered snapshot of nodes currently eligible for a connection attempt, /// with any node whose cooldown has elapsed automatically restored to the pool. /// An empty list means all nodes are in active cooldown. /// public IReadOnlyList GetHealthyNodes() { lock (_lock) { var now = _clock(); return _nodes .Where(n => IsHealthyAt(n, now)) .Select(n => n.Name) .ToList(); } } /// /// Gets the count of nodes currently eligible for a connection attempt (i.e., not in cooldown). /// public int HealthyNodeCount { get { lock (_lock) { var now = _clock(); return _nodes.Count(n => IsHealthyAt(n, now)); } } } /// /// Places into cooldown starting at the current clock time. /// Increments the node's failure counter and stores the latest error message for /// surfacing on the dashboard. Unknown node names are ignored. /// public void MarkFailed(string node, string? error) { lock (_lock) { var entry = FindEntry(node); if (entry == null) return; var now = _clock(); entry.FailureCount++; entry.LastError = error; entry.LastFailureTime = now; entry.CooldownUntil = _cooldown.TotalMilliseconds > 0 ? now + _cooldown : (DateTime?)null; } } /// /// Marks as healthy immediately — clears any active cooldown but /// leaves the cumulative failure counter intact for operator diagnostics. Unknown node /// names are ignored. /// public void MarkHealthy(string node) { lock (_lock) { var entry = FindEntry(node); if (entry == null) return; entry.CooldownUntil = null; } } /// /// Captures the current per-node state for the health dashboard. Freshly computed from /// so recently-expired cooldowns are reported as healthy. /// public List SnapshotNodeStates() { lock (_lock) { var now = _clock(); return _nodes.Select(n => new HistorianClusterNodeState { Name = n.Name, IsHealthy = IsHealthyAt(n, now), CooldownUntil = IsHealthyAt(n, now) ? null : n.CooldownUntil, FailureCount = n.FailureCount, LastError = n.LastError, LastFailureTime = n.LastFailureTime }).ToList(); } } private static bool IsHealthyAt(NodeEntry entry, DateTime now) { return entry.CooldownUntil == null || entry.CooldownUntil <= now; } private NodeEntry? FindEntry(string node) { for (var i = 0; i < _nodes.Count; i++) if (string.Equals(_nodes[i].Name, node, StringComparison.OrdinalIgnoreCase)) return _nodes[i]; return null; } private sealed class NodeEntry { public string Name { get; set; } = ""; public DateTime? CooldownUntil { get; set; } public int FailureCount { get; set; } public string? LastError { get; set; } public DateTime? LastFailureTime { get; set; } } } }