Renames all 11 projects (5 src + 6 tests), the .slnx solution file, all source-file namespaces, all axaml namespace references, and all v1 documentation references in CLAUDE.md and docs/*.md (excluding docs/v2/ which is already in OtOpcUa form). Also updates the TopShelf service registration name from "LmxOpcUa" to "OtOpcUa" per Phase 0 Task 0.6.
Preserves runtime identifiers per Phase 0 Out-of-Scope rules to avoid breaking v1/v2 client trust during coexistence: OPC UA `ApplicationUri` defaults (`urn:{GalaxyName}:LmxOpcUa`), server `EndpointPath` (`/LmxOpcUa`), `ServerName` default (feeds cert subject CN), `MxAccessConfiguration.ClientName` default (defensive — stays "LmxOpcUa" for MxAccess audit-trail consistency), client OPC UA identifiers (`ApplicationName = "LmxOpcUaClient"`, `ApplicationUri = "urn:localhost:LmxOpcUaClient"`, cert directory `%LocalAppData%\LmxOpcUaClient\pki\`), and the `LmxOpcUaServer` class name (class rename out of Phase 0 scope per Task 0.5 sed pattern; happens in Phase 1 alongside `LmxNodeManager → GenericDriverNodeManager` Core extraction). 23 LmxOpcUa references retained, all enumerated and justified in `docs/v2/implementation/exit-gate-phase-0.md`.
Build clean: 0 errors, 30 warnings (lower than baseline 167). Tests at strict improvement over baseline: 821 passing / 1 failing vs baseline 820 / 2 (one flaky pre-existing failure passed this run; the other still fails — both pre-existing and unrelated to the rename). `Client.UI.Tests`, `Historian.Aveva.Tests`, `Client.Shared.Tests`, `IntegrationTests` all match baseline exactly. Exit gate compliance results recorded in `docs/v2/implementation/exit-gate-phase-0.md` with all 7 checks PASS or DEFERRED-to-PR-review (#7 service install verification needs Windows service permissions on the reviewer's box).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
182 lines
6.9 KiB
C#
182 lines
6.9 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using ZB.MOM.WW.OtOpcUa.Host.Configuration;
|
|
using ZB.MOM.WW.OtOpcUa.Host.Historian;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Historian.Aveva
|
|
{
|
|
/// <summary>
|
|
/// Thread-safe, pure-logic endpoint picker for the Wonderware Historian cluster. Tracks which
|
|
/// configured nodes are healthy, places failed nodes in a time-bounded cooldown, and hands
|
|
/// out an ordered list of eligible candidates for the data source to try in sequence.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Design notes:
|
|
/// <list type="bullet">
|
|
/// <item>No SDK dependency — fully unit-testable with an injected clock.</item>
|
|
/// <item>Per-node state is guarded by a single lock; operations are microsecond-scale
|
|
/// so contention is a non-issue.</item>
|
|
/// <item>Cooldown is purely passive: a node re-enters the healthy pool the next time
|
|
/// it is queried after its cooldown window elapses. There is no background probe.</item>
|
|
/// <item>Nodes are returned in configuration order so operators can express a
|
|
/// preference (primary first, fallback second).</item>
|
|
/// <item>When <see cref="HistorianConfiguration.ServerNames"/> is empty, the picker is
|
|
/// initialized with a single entry from <see cref="HistorianConfiguration.ServerName"/>
|
|
/// so legacy deployments continue to work unchanged.</item>
|
|
/// </list>
|
|
/// </remarks>
|
|
internal sealed class HistorianClusterEndpointPicker
|
|
{
|
|
private readonly Func<DateTime> _clock;
|
|
private readonly TimeSpan _cooldown;
|
|
private readonly object _lock = new object();
|
|
private readonly List<NodeEntry> _nodes;
|
|
|
|
public HistorianClusterEndpointPicker(HistorianConfiguration config)
|
|
: this(config, () => DateTime.UtcNow) { }
|
|
|
|
internal HistorianClusterEndpointPicker(HistorianConfiguration config, Func<DateTime> clock)
|
|
{
|
|
_clock = clock ?? throw new ArgumentNullException(nameof(clock));
|
|
_cooldown = TimeSpan.FromSeconds(Math.Max(0, config.FailureCooldownSeconds));
|
|
|
|
var names = (config.ServerNames != null && config.ServerNames.Count > 0)
|
|
? config.ServerNames
|
|
: new List<string> { config.ServerName };
|
|
|
|
_nodes = names
|
|
.Where(n => !string.IsNullOrWhiteSpace(n))
|
|
.Select(n => n.Trim())
|
|
.Distinct(StringComparer.OrdinalIgnoreCase)
|
|
.Select(n => new NodeEntry { Name = n })
|
|
.ToList();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the total number of configured cluster nodes. Stable — nodes are never added
|
|
/// or removed after construction.
|
|
/// </summary>
|
|
public int NodeCount
|
|
{
|
|
get
|
|
{
|
|
lock (_lock)
|
|
return _nodes.Count;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns an ordered snapshot of nodes currently eligible for a connection attempt,
|
|
/// with any node whose cooldown has elapsed automatically restored to the pool.
|
|
/// An empty list means all nodes are in active cooldown.
|
|
/// </summary>
|
|
public IReadOnlyList<string> GetHealthyNodes()
|
|
{
|
|
lock (_lock)
|
|
{
|
|
var now = _clock();
|
|
return _nodes
|
|
.Where(n => IsHealthyAt(n, now))
|
|
.Select(n => n.Name)
|
|
.ToList();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Gets the count of nodes currently eligible for a connection attempt (i.e., not in cooldown).
|
|
/// </summary>
|
|
public int HealthyNodeCount
|
|
{
|
|
get
|
|
{
|
|
lock (_lock)
|
|
{
|
|
var now = _clock();
|
|
return _nodes.Count(n => IsHealthyAt(n, now));
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Places <paramref name="node"/> into cooldown starting at the current clock time.
|
|
/// Increments the node's failure counter and stores the latest error message for
|
|
/// surfacing on the dashboard. Unknown node names are ignored.
|
|
/// </summary>
|
|
public void MarkFailed(string node, string? error)
|
|
{
|
|
lock (_lock)
|
|
{
|
|
var entry = FindEntry(node);
|
|
if (entry == null)
|
|
return;
|
|
|
|
var now = _clock();
|
|
entry.FailureCount++;
|
|
entry.LastError = error;
|
|
entry.LastFailureTime = now;
|
|
entry.CooldownUntil = _cooldown.TotalMilliseconds > 0 ? now + _cooldown : (DateTime?)null;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Marks <paramref name="node"/> as healthy immediately — clears any active cooldown but
|
|
/// leaves the cumulative failure counter intact for operator diagnostics. Unknown node
|
|
/// names are ignored.
|
|
/// </summary>
|
|
public void MarkHealthy(string node)
|
|
{
|
|
lock (_lock)
|
|
{
|
|
var entry = FindEntry(node);
|
|
if (entry == null)
|
|
return;
|
|
entry.CooldownUntil = null;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Captures the current per-node state for the health dashboard. Freshly computed from
|
|
/// <see cref="_clock"/> so recently-expired cooldowns are reported as healthy.
|
|
/// </summary>
|
|
public List<HistorianClusterNodeState> SnapshotNodeStates()
|
|
{
|
|
lock (_lock)
|
|
{
|
|
var now = _clock();
|
|
return _nodes.Select(n => new HistorianClusterNodeState
|
|
{
|
|
Name = n.Name,
|
|
IsHealthy = IsHealthyAt(n, now),
|
|
CooldownUntil = IsHealthyAt(n, now) ? null : n.CooldownUntil,
|
|
FailureCount = n.FailureCount,
|
|
LastError = n.LastError,
|
|
LastFailureTime = n.LastFailureTime
|
|
}).ToList();
|
|
}
|
|
}
|
|
|
|
private static bool IsHealthyAt(NodeEntry entry, DateTime now)
|
|
{
|
|
return entry.CooldownUntil == null || entry.CooldownUntil <= now;
|
|
}
|
|
|
|
private NodeEntry? FindEntry(string node)
|
|
{
|
|
for (var i = 0; i < _nodes.Count; i++)
|
|
if (string.Equals(_nodes[i].Name, node, StringComparison.OrdinalIgnoreCase))
|
|
return _nodes[i];
|
|
return null;
|
|
}
|
|
|
|
private sealed class NodeEntry
|
|
{
|
|
public string Name { get; set; } = "";
|
|
public DateTime? CooldownUntil { get; set; }
|
|
public int FailureCount { get; set; }
|
|
public string? LastError { get; set; }
|
|
public DateTime? LastFailureTime { get; set; }
|
|
}
|
|
}
|
|
}
|