chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)
Group all 69 projects into category subfolders under src/ and tests/ so the Rider Solution Explorer mirrors the module structure. Folders: Core, Server, Drivers (with a nested Driver CLIs subfolder), Client, Tooling. - Move every project folder on disk with git mv (history preserved as renames). - Recompute relative paths in 57 .csproj files: cross-category ProjectReferences, the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external mxaccessgw refs in Driver.Galaxy and its test project. - Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders. - Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL, integration, install). Build green (0 errors); unit tests pass. Docs left for a separate pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,48 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// Tri-state result of an <see cref="IPermissionEvaluator.Authorize"/> call, per decision
|
||||
/// #149. Phase 6.2 only produces <see cref="AuthorizationVerdict.Allow"/> and
|
||||
/// <see cref="AuthorizationVerdict.NotGranted"/>; the <see cref="AuthorizationVerdict.Denied"/>
|
||||
/// variant exists in the model so v2.1 Explicit Deny lands without an API break. Provenance
|
||||
/// carries the matched grants (or empty when not granted) for audit + the Admin UI "Probe
|
||||
/// this permission" diagnostic.
|
||||
/// </summary>
|
||||
public sealed record AuthorizationDecision(
|
||||
AuthorizationVerdict Verdict,
|
||||
IReadOnlyList<MatchedGrant> Provenance)
|
||||
{
|
||||
public bool IsAllowed => Verdict == AuthorizationVerdict.Allow;
|
||||
|
||||
/// <summary>Convenience constructor for the common "no grants matched" outcome.</summary>
|
||||
public static AuthorizationDecision NotGranted() => new(AuthorizationVerdict.NotGranted, []);
|
||||
|
||||
/// <summary>Allow with the list of grants that matched.</summary>
|
||||
public static AuthorizationDecision Allowed(IReadOnlyList<MatchedGrant> provenance)
|
||||
=> new(AuthorizationVerdict.Allow, provenance);
|
||||
}
|
||||
|
||||
/// <summary>Three-valued authorization outcome.</summary>
|
||||
public enum AuthorizationVerdict
|
||||
{
|
||||
/// <summary>At least one grant matches the requested (operation, scope) pair.</summary>
|
||||
Allow,
|
||||
|
||||
/// <summary>No grant matches. Phase 6.2 default — treated as deny at the OPC UA surface.</summary>
|
||||
NotGranted,
|
||||
|
||||
/// <summary>Explicit deny grant matched. Reserved for v2.1; never produced by Phase 6.2.</summary>
|
||||
Denied,
|
||||
}
|
||||
|
||||
/// <summary>One grant that contributed to an Allow verdict — for audit / UI diagnostics.</summary>
|
||||
/// <param name="LdapGroup">LDAP group the matched grant belongs to.</param>
|
||||
/// <param name="Scope">Where in the hierarchy the grant was anchored.</param>
|
||||
/// <param name="PermissionFlags">The bitmask the grant contributed.</param>
|
||||
public sealed record MatchedGrant(
|
||||
string LdapGroup,
|
||||
NodeAclScopeKind Scope,
|
||||
NodePermissions PermissionFlags);
|
||||
@@ -0,0 +1,23 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates whether a session is authorized to perform an OPC UA <see cref="OpcUaOperation"/>
|
||||
/// on the node addressed by a <see cref="NodeScope"/>. Phase 6.2 Stream B central surface.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Data-plane only. Reads <c>NodeAcl</c> rows joined against the session's resolved LDAP
|
||||
/// groups (via <see cref="UserAuthorizationState"/>). Must not depend on the control-plane
|
||||
/// admin-role mapping table per decision #150 — the two concerns share zero runtime code.
|
||||
/// </remarks>
|
||||
public interface IPermissionEvaluator
|
||||
{
|
||||
/// <summary>
|
||||
/// Authorize the requested operation for the session. Callers (<c>DriverNodeManager</c>
|
||||
/// Read / Write / HistoryRead / Subscribe / Browse / Call dispatch) map their native
|
||||
/// failure to <c>BadUserAccessDenied</c> per OPC UA Part 4 when the result is not
|
||||
/// <see cref="AuthorizationVerdict.Allow"/>.
|
||||
/// </summary>
|
||||
AuthorizationDecision Authorize(UserAuthorizationState session, OpcUaOperation operation, NodeScope scope);
|
||||
}
|
||||
58
src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/NodeScope.cs
Normal file
58
src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/NodeScope.cs
Normal file
@@ -0,0 +1,58 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// Address of a node in the 6-level scope hierarchy the Phase 6.2 evaluator walks.
|
||||
/// Assembled by the dispatch layer from the node's namespace + UNS path + tag; passed
|
||||
/// to <see cref="IPermissionEvaluator"/> which walks the matching trie path.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Per decision #129 and the Phase 6.2 Stream B plan the hierarchy is
|
||||
/// <c>Cluster → Namespace → UnsArea → UnsLine → Equipment → Tag</c> for UNS
|
||||
/// (Equipment-kind) namespaces. Galaxy (SystemPlatform-kind) namespaces instead use
|
||||
/// <c>Cluster → Namespace → FolderSegment(s) → Tag</c>, and each folder segment takes
|
||||
/// one trie level — so a deeply-nested Galaxy folder implicitly reaches the same
|
||||
/// depth as a full UNS path.</para>
|
||||
///
|
||||
/// <para>Unset mid-path levels (e.g. a Cluster-scoped request with no UnsArea) leave
|
||||
/// the corresponding id <c>null</c>. The evaluator walks as far as the scope goes +
|
||||
/// stops at the first null.</para>
|
||||
/// </remarks>
|
||||
public sealed record NodeScope
|
||||
{
|
||||
/// <summary>Cluster the node belongs to. Required.</summary>
|
||||
public required string ClusterId { get; init; }
|
||||
|
||||
/// <summary>Namespace within the cluster. Null is not allowed for a request against a real node.</summary>
|
||||
public string? NamespaceId { get; init; }
|
||||
|
||||
/// <summary>For Equipment-kind namespaces: UNS area (e.g. "warsaw-west"). Null on Galaxy.</summary>
|
||||
public string? UnsAreaId { get; init; }
|
||||
|
||||
/// <summary>For Equipment-kind namespaces: UNS line below the area. Null on Galaxy.</summary>
|
||||
public string? UnsLineId { get; init; }
|
||||
|
||||
/// <summary>For Equipment-kind namespaces: equipment row below the line. Null on Galaxy.</summary>
|
||||
public string? EquipmentId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// For Galaxy (SystemPlatform-kind) namespaces only: the folder path segments from
|
||||
/// namespace root to the target tag, in order. Empty on Equipment namespaces.
|
||||
/// </summary>
|
||||
public IReadOnlyList<string> FolderSegments { get; init; } = [];
|
||||
|
||||
/// <summary>Target tag id when the scope addresses a specific tag; null for folder / equipment-level scopes.</summary>
|
||||
public string? TagId { get; init; }
|
||||
|
||||
/// <summary>Which hierarchy applies — Equipment-kind (UNS) or SystemPlatform-kind (Galaxy).</summary>
|
||||
public required NodeHierarchyKind Kind { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Selector between the two scope-hierarchy shapes.</summary>
|
||||
public enum NodeHierarchyKind
|
||||
{
|
||||
/// <summary><c>Cluster → Namespace → UnsArea → UnsLine → Equipment → Tag</c> — UNS / Equipment kind.</summary>
|
||||
Equipment,
|
||||
|
||||
/// <summary><c>Cluster → Namespace → FolderSegment(s) → Tag</c> — Galaxy / SystemPlatform kind.</summary>
|
||||
SystemPlatform,
|
||||
}
|
||||
125
src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/PermissionTrie.cs
Normal file
125
src/Core/ZB.MOM.WW.OtOpcUa.Core/Authorization/PermissionTrie.cs
Normal file
@@ -0,0 +1,125 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// In-memory permission trie for one <c>(ClusterId, GenerationId)</c>. Walk from the cluster
|
||||
/// root down through namespace → UNS levels (or folder segments) → tag, OR-ing the
|
||||
/// <see cref="TrieGrant.PermissionFlags"/> granted at each visited level for each of the session's
|
||||
/// LDAP groups. The accumulated bitmask is compared to the permission required by the
|
||||
/// requested <see cref="Abstractions.OpcUaOperation"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per decision #129 (additive grants, no explicit Deny in v2.0) the walk is pure union:
|
||||
/// encountering a grant at any level contributes its flags, never revokes them. A grant at
|
||||
/// the Cluster root therefore cascades to every tag below it; a grant at a deep equipment
|
||||
/// leaf is visible only on that equipment subtree.
|
||||
/// </remarks>
|
||||
public sealed class PermissionTrie
|
||||
{
|
||||
/// <summary>Cluster this trie belongs to.</summary>
|
||||
public required string ClusterId { get; init; }
|
||||
|
||||
/// <summary>Config generation the trie was built from — used by the cache for invalidation.</summary>
|
||||
public required long GenerationId { get; init; }
|
||||
|
||||
/// <summary>Root of the trie. Level 0 (cluster-level grants) live directly here.</summary>
|
||||
public PermissionTrieNode Root { get; init; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Walk the trie collecting grants that apply to <paramref name="scope"/> for any of the
|
||||
/// session's <paramref name="ldapGroups"/>. Returns the matched-grant list; the caller
|
||||
/// OR-s the flag bits to decide whether the requested permission is carried.
|
||||
/// </summary>
|
||||
public IReadOnlyList<MatchedGrant> CollectMatches(NodeScope scope, IEnumerable<string> ldapGroups)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(scope);
|
||||
ArgumentNullException.ThrowIfNull(ldapGroups);
|
||||
|
||||
var groups = ldapGroups.ToHashSet(StringComparer.OrdinalIgnoreCase);
|
||||
if (groups.Count == 0) return [];
|
||||
|
||||
var matches = new List<MatchedGrant>();
|
||||
|
||||
// Level 0 — cluster-scoped grants.
|
||||
CollectAtLevel(Root, NodeAclScopeKind.Cluster, groups, matches);
|
||||
|
||||
// Level 1 — namespace.
|
||||
if (scope.NamespaceId is null) return matches;
|
||||
if (!Root.Children.TryGetValue(scope.NamespaceId, out var ns)) return matches;
|
||||
CollectAtLevel(ns, NodeAclScopeKind.Namespace, groups, matches);
|
||||
|
||||
// Two hierarchies diverge below the namespace.
|
||||
if (scope.Kind == NodeHierarchyKind.Equipment)
|
||||
WalkEquipment(ns, scope, groups, matches);
|
||||
else
|
||||
WalkSystemPlatform(ns, scope, groups, matches);
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
private static void WalkEquipment(PermissionTrieNode ns, NodeScope scope, HashSet<string> groups, List<MatchedGrant> matches)
|
||||
{
|
||||
if (scope.UnsAreaId is null) return;
|
||||
if (!ns.Children.TryGetValue(scope.UnsAreaId, out var area)) return;
|
||||
CollectAtLevel(area, NodeAclScopeKind.UnsArea, groups, matches);
|
||||
|
||||
if (scope.UnsLineId is null) return;
|
||||
if (!area.Children.TryGetValue(scope.UnsLineId, out var line)) return;
|
||||
CollectAtLevel(line, NodeAclScopeKind.UnsLine, groups, matches);
|
||||
|
||||
if (scope.EquipmentId is null) return;
|
||||
if (!line.Children.TryGetValue(scope.EquipmentId, out var eq)) return;
|
||||
CollectAtLevel(eq, NodeAclScopeKind.Equipment, groups, matches);
|
||||
|
||||
if (scope.TagId is null) return;
|
||||
if (!eq.Children.TryGetValue(scope.TagId, out var tag)) return;
|
||||
CollectAtLevel(tag, NodeAclScopeKind.Tag, groups, matches);
|
||||
}
|
||||
|
||||
private static void WalkSystemPlatform(PermissionTrieNode ns, NodeScope scope, HashSet<string> groups, List<MatchedGrant> matches)
|
||||
{
|
||||
// FolderSegments are nested under the namespace; each is its own trie level. Reuse the
|
||||
// UnsArea scope kind for the flags — NodeAcl rows for Galaxy tags carry ScopeKind.Tag
|
||||
// for leaf grants and ScopeKind.Namespace for folder-root grants; deeper folder grants
|
||||
// are modeled as Equipment-level rows today since NodeAclScopeKind doesn't enumerate
|
||||
// a dedicated FolderSegment kind. Future-proof TODO tracked in Stream B follow-up.
|
||||
var current = ns;
|
||||
foreach (var segment in scope.FolderSegments)
|
||||
{
|
||||
if (!current.Children.TryGetValue(segment, out var child)) return;
|
||||
CollectAtLevel(child, NodeAclScopeKind.Equipment, groups, matches);
|
||||
current = child;
|
||||
}
|
||||
|
||||
if (scope.TagId is null) return;
|
||||
if (!current.Children.TryGetValue(scope.TagId, out var tag)) return;
|
||||
CollectAtLevel(tag, NodeAclScopeKind.Tag, groups, matches);
|
||||
}
|
||||
|
||||
private static void CollectAtLevel(PermissionTrieNode node, NodeAclScopeKind level, HashSet<string> groups, List<MatchedGrant> matches)
|
||||
{
|
||||
foreach (var grant in node.Grants)
|
||||
{
|
||||
if (groups.Contains(grant.LdapGroup))
|
||||
matches.Add(new MatchedGrant(grant.LdapGroup, level, grant.PermissionFlags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>One node in a <see cref="PermissionTrie"/>.</summary>
|
||||
public sealed class PermissionTrieNode
|
||||
{
|
||||
/// <summary>Grants anchored at this trie level.</summary>
|
||||
public List<TrieGrant> Grants { get; } = [];
|
||||
|
||||
/// <summary>
|
||||
/// Children keyed by the next level's id — namespace id under cluster; UnsAreaId or
|
||||
/// folder-segment name under namespace; etc. Comparer is OrdinalIgnoreCase so the walk
|
||||
/// tolerates case drift between the NodeAcl row and the requested scope.
|
||||
/// </summary>
|
||||
public Dictionary<string, PermissionTrieNode> Children { get; } = new(StringComparer.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>Projection of a <see cref="Configuration.Entities.NodeAcl"/> row into the trie.</summary>
|
||||
public sealed record TrieGrant(string LdapGroup, NodePermissions PermissionFlags);
|
||||
@@ -0,0 +1,97 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// Builds a <see cref="PermissionTrie"/> from a set of <see cref="NodeAcl"/> rows anchored
|
||||
/// in one generation. The trie is keyed on the rows' scope hierarchy — rows with
|
||||
/// <see cref="NodeAclScopeKind.Cluster"/> land at the trie root, rows with
|
||||
/// <see cref="NodeAclScopeKind.Tag"/> land at a leaf, etc.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Intended to be called by <see cref="PermissionTrieCache"/> once per published
|
||||
/// generation; the resulting trie is immutable for the life of the cache entry. Idempotent —
|
||||
/// two builds from the same rows produce equal tries (grant lists may be in insertion order;
|
||||
/// evaluators don't depend on order).</para>
|
||||
///
|
||||
/// <para>The builder deliberately does not know about the node-row metadata the trie path
|
||||
/// will be walked with. The caller assembles <see cref="NodeScope"/> values from the live
|
||||
/// config (UnsArea parent of UnsLine, etc.); this class only honors the <c>ScopeId</c>
|
||||
/// each row carries.</para>
|
||||
/// </remarks>
|
||||
public static class PermissionTrieBuilder
|
||||
{
|
||||
/// <summary>
|
||||
/// Build a trie for one cluster/generation from the supplied rows. The caller is
|
||||
/// responsible for pre-filtering rows to the target generation + cluster.
|
||||
/// </summary>
|
||||
public static PermissionTrie Build(
|
||||
string clusterId,
|
||||
long generationId,
|
||||
IReadOnlyList<NodeAcl> rows,
|
||||
IReadOnlyDictionary<string, NodeAclPath>? scopePaths = null)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
ArgumentNullException.ThrowIfNull(rows);
|
||||
|
||||
var trie = new PermissionTrie { ClusterId = clusterId, GenerationId = generationId };
|
||||
|
||||
foreach (var row in rows)
|
||||
{
|
||||
if (!string.Equals(row.ClusterId, clusterId, StringComparison.OrdinalIgnoreCase)) continue;
|
||||
var grant = new TrieGrant(row.LdapGroup, row.PermissionFlags);
|
||||
|
||||
var node = row.ScopeKind switch
|
||||
{
|
||||
NodeAclScopeKind.Cluster => trie.Root,
|
||||
_ => Descend(trie.Root, row, scopePaths),
|
||||
};
|
||||
|
||||
if (node is not null)
|
||||
node.Grants.Add(grant);
|
||||
}
|
||||
|
||||
return trie;
|
||||
}
|
||||
|
||||
private static PermissionTrieNode? Descend(PermissionTrieNode root, NodeAcl row, IReadOnlyDictionary<string, NodeAclPath>? scopePaths)
|
||||
{
|
||||
if (string.IsNullOrEmpty(row.ScopeId)) return null;
|
||||
|
||||
// For sub-cluster scopes the caller supplies a path lookup so we know the containing
|
||||
// namespace / UnsArea / UnsLine ids. Without a path lookup we fall back to putting the
|
||||
// row directly under the root using its ScopeId — works for deterministic tests, not
|
||||
// for production where the hierarchy must be honored.
|
||||
if (scopePaths is null || !scopePaths.TryGetValue(row.ScopeId, out var path))
|
||||
{
|
||||
return EnsureChild(root, row.ScopeId);
|
||||
}
|
||||
|
||||
var node = root;
|
||||
foreach (var segment in path.Segments)
|
||||
node = EnsureChild(node, segment);
|
||||
return node;
|
||||
}
|
||||
|
||||
private static PermissionTrieNode EnsureChild(PermissionTrieNode parent, string key)
|
||||
{
|
||||
if (!parent.Children.TryGetValue(key, out var child))
|
||||
{
|
||||
child = new PermissionTrieNode();
|
||||
parent.Children[key] = child;
|
||||
}
|
||||
return child;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ordered list of trie-path segments from root to the target node. Supplied to
|
||||
/// <see cref="PermissionTrieBuilder.Build"/> so the builder knows where a
|
||||
/// <see cref="NodeAclScopeKind.UnsLine"/>-scoped row sits in the hierarchy.
|
||||
/// </summary>
|
||||
/// <param name="Segments">
|
||||
/// Namespace id, then (for Equipment kind) UnsAreaId / UnsLineId / EquipmentId / TagId as
|
||||
/// applicable; or (for SystemPlatform kind) NamespaceId / FolderSegment / .../TagId.
|
||||
/// </param>
|
||||
public sealed record NodeAclPath(IReadOnlyList<string> Segments);
|
||||
@@ -0,0 +1,88 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// Process-singleton cache of <see cref="PermissionTrie"/> instances keyed on
|
||||
/// <c>(ClusterId, GenerationId)</c>. Hot-path evaluation reads
|
||||
/// <see cref="GetTrie(string)"/> without awaiting DB access; the cache is populated
|
||||
/// out-of-band on publish + on first reference via
|
||||
/// <see cref="Install(PermissionTrie)"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per decision #148 and Phase 6.2 Stream B.4 the cache is generation-sealed: once a
|
||||
/// trie is installed for <c>(ClusterId, GenerationId)</c> the entry is immutable. When a
|
||||
/// new generation publishes, the caller calls <see cref="Install"/> with the new trie
|
||||
/// + the cache atomically updates its "current generation" pointer for that cluster.
|
||||
/// Older generations are retained so an in-flight request evaluating the prior generation
|
||||
/// still succeeds — GC via <see cref="Prune(string, int)"/>.
|
||||
/// </remarks>
|
||||
public sealed class PermissionTrieCache
|
||||
{
|
||||
private readonly ConcurrentDictionary<string, ClusterEntry> _byCluster =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>Install a trie for a cluster + make it the current generation.</summary>
|
||||
public void Install(PermissionTrie trie)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(trie);
|
||||
_byCluster.AddOrUpdate(trie.ClusterId,
|
||||
_ => ClusterEntry.FromSingle(trie),
|
||||
(_, existing) => existing.WithAdditional(trie));
|
||||
}
|
||||
|
||||
/// <summary>Get the current-generation trie for a cluster; null when nothing installed.</summary>
|
||||
public PermissionTrie? GetTrie(string clusterId)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
return _byCluster.TryGetValue(clusterId, out var entry) ? entry.Current : null;
|
||||
}
|
||||
|
||||
/// <summary>Get a specific (cluster, generation) trie; null if that pair isn't cached.</summary>
|
||||
public PermissionTrie? GetTrie(string clusterId, long generationId)
|
||||
{
|
||||
if (!_byCluster.TryGetValue(clusterId, out var entry)) return null;
|
||||
return entry.Tries.TryGetValue(generationId, out var trie) ? trie : null;
|
||||
}
|
||||
|
||||
/// <summary>The generation id the <see cref="GetTrie(string)"/> shortcut currently serves for a cluster.</summary>
|
||||
public long? CurrentGenerationId(string clusterId)
|
||||
=> _byCluster.TryGetValue(clusterId, out var entry) ? entry.Current.GenerationId : null;
|
||||
|
||||
/// <summary>Drop every cached trie for one cluster.</summary>
|
||||
public void Invalidate(string clusterId) => _byCluster.TryRemove(clusterId, out _);
|
||||
|
||||
/// <summary>
|
||||
/// Retain only the most-recent <paramref name="keepLatest"/> generations for a cluster.
|
||||
/// No-op when there's nothing to drop.
|
||||
/// </summary>
|
||||
public void Prune(string clusterId, int keepLatest = 3)
|
||||
{
|
||||
if (keepLatest < 1) throw new ArgumentOutOfRangeException(nameof(keepLatest), keepLatest, "keepLatest must be >= 1");
|
||||
if (!_byCluster.TryGetValue(clusterId, out var entry)) return;
|
||||
|
||||
if (entry.Tries.Count <= keepLatest) return;
|
||||
var keep = entry.Tries
|
||||
.OrderByDescending(kvp => kvp.Key)
|
||||
.Take(keepLatest)
|
||||
.ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
|
||||
_byCluster[clusterId] = new ClusterEntry(entry.Current, keep);
|
||||
}
|
||||
|
||||
/// <summary>Diagnostics counter: number of cached (cluster, generation) tries.</summary>
|
||||
public int CachedTrieCount => _byCluster.Values.Sum(e => e.Tries.Count);
|
||||
|
||||
private sealed record ClusterEntry(PermissionTrie Current, IReadOnlyDictionary<long, PermissionTrie> Tries)
|
||||
{
|
||||
public static ClusterEntry FromSingle(PermissionTrie trie) =>
|
||||
new(trie, new Dictionary<long, PermissionTrie> { [trie.GenerationId] = trie });
|
||||
|
||||
public ClusterEntry WithAdditional(PermissionTrie trie)
|
||||
{
|
||||
var next = new Dictionary<long, PermissionTrie>(Tries) { [trie.GenerationId] = trie };
|
||||
// The highest generation wins as "current" — handles out-of-order installs.
|
||||
var current = trie.GenerationId >= Current.GenerationId ? trie : Current;
|
||||
return new ClusterEntry(current, next);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// Default <see cref="IPermissionEvaluator"/> implementation. Resolves the
|
||||
/// <see cref="PermissionTrie"/> for the session's cluster (via
|
||||
/// <see cref="PermissionTrieCache"/>), walks it collecting matched grants, OR-s the
|
||||
/// permission flags, and maps against the operation-specific required permission.
|
||||
/// </summary>
|
||||
public sealed class TriePermissionEvaluator : IPermissionEvaluator
|
||||
{
|
||||
private readonly PermissionTrieCache _cache;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
|
||||
public TriePermissionEvaluator(PermissionTrieCache cache, TimeProvider? timeProvider = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(cache);
|
||||
_cache = cache;
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
}
|
||||
|
||||
public AuthorizationDecision Authorize(UserAuthorizationState session, OpcUaOperation operation, NodeScope scope)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(session);
|
||||
ArgumentNullException.ThrowIfNull(scope);
|
||||
|
||||
// Decision #152 — beyond the staleness ceiling every call fails closed regardless of
|
||||
// cache warmth elsewhere in the process.
|
||||
if (session.IsStale(_timeProvider.GetUtcNow().UtcDateTime))
|
||||
return AuthorizationDecision.NotGranted();
|
||||
|
||||
if (!string.Equals(session.ClusterId, scope.ClusterId, StringComparison.OrdinalIgnoreCase))
|
||||
return AuthorizationDecision.NotGranted();
|
||||
|
||||
var trie = _cache.GetTrie(scope.ClusterId);
|
||||
if (trie is null) return AuthorizationDecision.NotGranted();
|
||||
|
||||
var matches = trie.CollectMatches(scope, session.LdapGroups);
|
||||
if (matches.Count == 0) return AuthorizationDecision.NotGranted();
|
||||
|
||||
var required = MapOperationToPermission(operation);
|
||||
var granted = NodePermissions.None;
|
||||
foreach (var m in matches) granted |= m.PermissionFlags;
|
||||
|
||||
return (granted & required) == required
|
||||
? AuthorizationDecision.Allowed(matches)
|
||||
: AuthorizationDecision.NotGranted();
|
||||
}
|
||||
|
||||
/// <summary>Maps each <see cref="OpcUaOperation"/> to the <see cref="NodePermissions"/> bit required to grant it.</summary>
|
||||
public static NodePermissions MapOperationToPermission(OpcUaOperation op) => op switch
|
||||
{
|
||||
OpcUaOperation.Browse => NodePermissions.Browse,
|
||||
OpcUaOperation.Read => NodePermissions.Read,
|
||||
OpcUaOperation.WriteOperate => NodePermissions.WriteOperate,
|
||||
OpcUaOperation.WriteTune => NodePermissions.WriteTune,
|
||||
OpcUaOperation.WriteConfigure => NodePermissions.WriteConfigure,
|
||||
OpcUaOperation.HistoryRead => NodePermissions.HistoryRead,
|
||||
OpcUaOperation.HistoryUpdate => NodePermissions.HistoryRead, // HistoryUpdate bit not yet in NodePermissions; TODO Stream C follow-up
|
||||
OpcUaOperation.CreateMonitoredItems => NodePermissions.Subscribe,
|
||||
OpcUaOperation.TransferSubscriptions=> NodePermissions.Subscribe,
|
||||
OpcUaOperation.Call => NodePermissions.MethodCall,
|
||||
OpcUaOperation.AlarmAcknowledge => NodePermissions.AlarmAcknowledge,
|
||||
OpcUaOperation.AlarmConfirm => NodePermissions.AlarmConfirm,
|
||||
OpcUaOperation.AlarmShelve => NodePermissions.AlarmShelve,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(op), op, $"No permission mapping defined for operation {op}."),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Authorization;
|
||||
|
||||
/// <summary>
|
||||
/// Per-session authorization state cached on the OPC UA session object + keyed on the
|
||||
/// session id. Captures the LDAP group memberships resolved at sign-in, the generation
|
||||
/// the membership was resolved against, and the bounded freshness window.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per decision #151 the membership is bounded by <see cref="MembershipFreshnessInterval"/>
|
||||
/// (default 15 min). After that, the next hot-path authz call re-resolves LDAP group
|
||||
/// memberships; failure to re-resolve (LDAP unreachable) flips the session to fail-closed
|
||||
/// until a refresh succeeds.
|
||||
///
|
||||
/// Per decision #152 <see cref="AuthCacheMaxStaleness"/> (default 5 min) is separate from
|
||||
/// Phase 6.1's availability-oriented 24h cache — beyond this window the evaluator returns
|
||||
/// <see cref="AuthorizationVerdict.NotGranted"/> regardless of config-cache warmth.
|
||||
/// </remarks>
|
||||
public sealed record UserAuthorizationState
|
||||
{
|
||||
/// <summary>Opaque session id (reuse OPC UA session handle when possible).</summary>
|
||||
public required string SessionId { get; init; }
|
||||
|
||||
/// <summary>Cluster the session is scoped to — every request targets nodes in this cluster.</summary>
|
||||
public required string ClusterId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// LDAP groups the user is a member of as resolved at sign-in / last membership refresh.
|
||||
/// Case comparison is handled downstream by the evaluator (OrdinalIgnoreCase).
|
||||
/// </summary>
|
||||
public required IReadOnlyList<string> LdapGroups { get; init; }
|
||||
|
||||
/// <summary>Timestamp when <see cref="LdapGroups"/> was last resolved from the directory.</summary>
|
||||
public required DateTime MembershipResolvedUtc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Trie generation the session is currently bound to. When
|
||||
/// <see cref="PermissionTrieCache"/> moves to a new generation, the session's
|
||||
/// <c>(AuthGenerationId, MembershipVersion)</c> stamp no longer matches its
|
||||
/// MonitoredItems and they re-evaluate on next publish (decision #153).
|
||||
/// </summary>
|
||||
public required long AuthGenerationId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Monotonic counter incremented every time membership is re-resolved. Combined with
|
||||
/// <see cref="AuthGenerationId"/> into the subscription stamp per decision #153.
|
||||
/// </summary>
|
||||
public required long MembershipVersion { get; init; }
|
||||
|
||||
/// <summary>Bounded membership freshness window; past this the next authz call refreshes.</summary>
|
||||
public TimeSpan MembershipFreshnessInterval { get; init; } = TimeSpan.FromMinutes(15);
|
||||
|
||||
/// <summary>Hard staleness ceiling — beyond this, the evaluator fails closed.</summary>
|
||||
public TimeSpan AuthCacheMaxStaleness { get; init; } = TimeSpan.FromMinutes(5);
|
||||
|
||||
/// <summary>
|
||||
/// True when <paramref name="utcNow"/> - <see cref="MembershipResolvedUtc"/> exceeds
|
||||
/// <see cref="AuthCacheMaxStaleness"/>. The evaluator short-circuits to NotGranted
|
||||
/// whenever this is true.
|
||||
/// </summary>
|
||||
public bool IsStale(DateTime utcNow) => utcNow - MembershipResolvedUtc > AuthCacheMaxStaleness;
|
||||
|
||||
/// <summary>
|
||||
/// True when membership is past its freshness interval but still within the staleness
|
||||
/// ceiling — a signal to the caller to kick off an async refresh, while the current
|
||||
/// call still evaluates against the cached memberships.
|
||||
/// </summary>
|
||||
public bool NeedsRefresh(DateTime utcNow) =>
|
||||
!IsStale(utcNow) && utcNow - MembershipResolvedUtc > MembershipFreshnessInterval;
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Process-singleton registry of <see cref="IDriver"/> factories keyed by
|
||||
/// <c>DriverInstance.DriverType</c> string. Each driver project ships a DI
|
||||
/// extension (e.g. <c>services.AddGalaxyProxyDriverFactory()</c>) that registers
|
||||
/// its factory at startup; the bootstrapper looks up the factory by
|
||||
/// <c>DriverInstance.DriverType</c> + invokes it with the row's
|
||||
/// <c>DriverInstanceId</c> + <c>DriverConfig</c> JSON.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Closes the gap surfaced by task #240 live smoke — DriverInstance rows in
|
||||
/// the central config DB had no path to materialise as registered <see cref="IDriver"/>
|
||||
/// instances. The factory registry is the seam.
|
||||
/// </remarks>
|
||||
public sealed class DriverFactoryRegistry
|
||||
{
|
||||
private readonly Dictionary<string, Func<string, string, IDriver>> _factories
|
||||
= new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly Dictionary<string, DriverTier> _tiers
|
||||
= new(StringComparer.OrdinalIgnoreCase);
|
||||
private readonly object _lock = new();
|
||||
|
||||
/// <summary>
|
||||
/// Register a factory for <paramref name="driverType"/>. Throws if a factory is
|
||||
/// already registered for that type — drivers are singletons by type-name in
|
||||
/// this process.
|
||||
/// </summary>
|
||||
/// <param name="driverType">Matches <c>DriverInstance.DriverType</c>.</param>
|
||||
/// <param name="factory">
|
||||
/// Receives <c>(driverInstanceId, driverConfigJson)</c>; returns a new
|
||||
/// <see cref="IDriver"/>. Must NOT call <see cref="IDriver.InitializeAsync"/>
|
||||
/// itself — the bootstrapper calls it via <see cref="DriverHost.RegisterAsync"/>
|
||||
/// so the host's per-driver retry semantics apply uniformly.
|
||||
/// </param>
|
||||
/// <param name="tier">
|
||||
/// Stability tier per <c>docs/v2/driver-stability.md</c>. Defaults to
|
||||
/// <see cref="DriverTier.A"/> (in-process, lowest recycle footprint); Tier C
|
||||
/// drivers SHOULD pass this explicitly so the scheduled-recycle wiring in
|
||||
/// <c>DriverInstanceBootstrapper</c> can skip in-process drivers by tier check
|
||||
/// rather than by driver-type allow-list.
|
||||
/// </param>
|
||||
public void Register(string driverType, Func<string, string, IDriver> factory,
|
||||
DriverTier tier = DriverTier.A)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(driverType);
|
||||
ArgumentNullException.ThrowIfNull(factory);
|
||||
lock (_lock)
|
||||
{
|
||||
if (_factories.ContainsKey(driverType))
|
||||
throw new InvalidOperationException(
|
||||
$"DriverType '{driverType}' factory already registered for this process");
|
||||
_factories[driverType] = factory;
|
||||
_tiers[driverType] = tier;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Try to look up the factory for <paramref name="driverType"/>. Returns null
|
||||
/// if no driver assembly registered one — bootstrapper logs + skips so a
|
||||
/// missing-assembly deployment doesn't take down the whole server.
|
||||
/// </summary>
|
||||
public Func<string, string, IDriver>? TryGet(string driverType)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(driverType);
|
||||
lock (_lock) return _factories.GetValueOrDefault(driverType);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Look up the tier recorded alongside the factory. Returns <see cref="DriverTier.A"/>
|
||||
/// for unknown driver types — a missing registration is already a skipped-bootstrap
|
||||
/// case upstream; we don't double-surface that failure here.
|
||||
/// </summary>
|
||||
public DriverTier GetTier(string driverType)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(driverType);
|
||||
lock (_lock) return _tiers.GetValueOrDefault(driverType, DriverTier.A);
|
||||
}
|
||||
|
||||
public IReadOnlyCollection<string> RegisteredTypes
|
||||
{
|
||||
get { lock (_lock) return [.. _factories.Keys]; }
|
||||
}
|
||||
}
|
||||
91
src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs
Normal file
91
src/Core/ZB.MOM.WW.OtOpcUa.Core/Hosting/DriverHost.cs
Normal file
@@ -0,0 +1,91 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Process-local registry + lifecycle manager for loaded <see cref="IDriver"/> instances
|
||||
/// (decision #65). Phase 1 scaffold — per-process isolation for Tier C drivers (Galaxy, FOCAS)
|
||||
/// is implemented in Phase 2 via named-pipe RPC; this class handles in-process drivers today
|
||||
/// and exposes the same registration interface so the Tier C wrapper can slot in later.
|
||||
/// </summary>
|
||||
public sealed class DriverHost : IAsyncDisposable
|
||||
{
|
||||
private readonly Dictionary<string, IDriver> _drivers = new();
|
||||
private readonly object _lock = new();
|
||||
|
||||
public IReadOnlyCollection<string> RegisteredDriverIds
|
||||
{
|
||||
get { lock (_lock) return [.. _drivers.Keys]; }
|
||||
}
|
||||
|
||||
public DriverHealth? GetHealth(string driverInstanceId)
|
||||
{
|
||||
lock (_lock)
|
||||
return _drivers.TryGetValue(driverInstanceId, out var d) ? d.GetHealth() : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Look up a registered driver by instance id. Used by the OPC UA server runtime
|
||||
/// (<c>OtOpcUaServer</c>) to instantiate one <c>DriverNodeManager</c> per driver at
|
||||
/// startup. Returns null when the driver is not registered.
|
||||
/// </summary>
|
||||
public IDriver? GetDriver(string driverInstanceId)
|
||||
{
|
||||
lock (_lock)
|
||||
return _drivers.TryGetValue(driverInstanceId, out var d) ? d : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers the driver and calls <see cref="IDriver.InitializeAsync"/>. If initialization
|
||||
/// throws, the driver is kept in the registry so the operator can retry; quality on its
|
||||
/// nodes will reflect <see cref="DriverState.Faulted"/> until <c>Reinitialize</c> succeeds.
|
||||
/// </summary>
|
||||
public async Task RegisterAsync(IDriver driver, string driverConfigJson, CancellationToken ct)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(driver);
|
||||
var id = driver.DriverInstanceId;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
if (_drivers.ContainsKey(id))
|
||||
throw new InvalidOperationException($"Driver '{id}' is already registered.");
|
||||
_drivers[id] = driver;
|
||||
}
|
||||
|
||||
try { await driver.InitializeAsync(driverConfigJson, ct); }
|
||||
catch
|
||||
{
|
||||
// Keep the driver registered — operator will see Faulted state and can reinitialize.
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UnregisterAsync(string driverInstanceId, CancellationToken ct)
|
||||
{
|
||||
IDriver? driver;
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_drivers.TryGetValue(driverInstanceId, out driver)) return;
|
||||
_drivers.Remove(driverInstanceId);
|
||||
}
|
||||
|
||||
try { await driver.ShutdownAsync(ct); }
|
||||
catch { /* shutdown is best-effort; logs elsewhere */ }
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
List<IDriver> snapshot;
|
||||
lock (_lock)
|
||||
{
|
||||
snapshot = [.. _drivers.Values];
|
||||
_drivers.Clear();
|
||||
}
|
||||
|
||||
foreach (var driver in snapshot)
|
||||
{
|
||||
try { await driver.ShutdownAsync(CancellationToken.None); } catch { /* ignore */ }
|
||||
(driver as IDisposable)?.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Observability;
|
||||
|
||||
/// <summary>
|
||||
/// Domain-layer health aggregation for Phase 6.1 Stream C. Pure functions over the driver
|
||||
/// fleet — given each driver's <see cref="DriverState"/>, produce a <see cref="ReadinessVerdict"/>
|
||||
/// that maps to HTTP status codes at the endpoint layer.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// State matrix per <c>docs/v2/implementation/phase-6-1-resilience-and-observability.md</c>
|
||||
/// §Stream C.1:
|
||||
/// <list type="bullet">
|
||||
/// <item><see cref="DriverState.Unknown"/> / <see cref="DriverState.Initializing"/>
|
||||
/// → /readyz 503 (not yet ready).</item>
|
||||
/// <item><see cref="DriverState.Healthy"/> → /readyz 200.</item>
|
||||
/// <item><see cref="DriverState.Degraded"/> → /readyz 200 with flagged driver IDs.</item>
|
||||
/// <item><see cref="DriverState.Faulted"/> → /readyz 503.</item>
|
||||
/// </list>
|
||||
/// The overall verdict is computed across the fleet: any Faulted → Faulted; any
|
||||
/// Unknown/Initializing → NotReady; any Degraded → Degraded; else Healthy. An empty fleet
|
||||
/// is Healthy (nothing to degrade).
|
||||
/// </remarks>
|
||||
public static class DriverHealthReport
|
||||
{
|
||||
/// <summary>Compute the fleet-wide readiness verdict from per-driver states.</summary>
|
||||
public static ReadinessVerdict Aggregate(IReadOnlyList<DriverHealthSnapshot> drivers)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(drivers);
|
||||
if (drivers.Count == 0) return ReadinessVerdict.Healthy;
|
||||
|
||||
var anyFaulted = drivers.Any(d => d.State == DriverState.Faulted);
|
||||
if (anyFaulted) return ReadinessVerdict.Faulted;
|
||||
|
||||
var anyInitializing = drivers.Any(d =>
|
||||
d.State == DriverState.Unknown || d.State == DriverState.Initializing);
|
||||
if (anyInitializing) return ReadinessVerdict.NotReady;
|
||||
|
||||
// Reconnecting = driver alive but not serving live data; report as Degraded so /readyz
|
||||
// stays 200 (the fleet can still serve cached / last-good data) while operators see the
|
||||
// affected driver in the body.
|
||||
var anyDegraded = drivers.Any(d =>
|
||||
d.State == DriverState.Degraded || d.State == DriverState.Reconnecting);
|
||||
if (anyDegraded) return ReadinessVerdict.Degraded;
|
||||
|
||||
return ReadinessVerdict.Healthy;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Map a <see cref="ReadinessVerdict"/> to the HTTP status the /readyz endpoint should
|
||||
/// return per the Stream C.1 state matrix.
|
||||
/// </summary>
|
||||
public static int HttpStatus(ReadinessVerdict verdict) => verdict switch
|
||||
{
|
||||
ReadinessVerdict.Healthy => 200,
|
||||
ReadinessVerdict.Degraded => 200,
|
||||
ReadinessVerdict.NotReady => 503,
|
||||
ReadinessVerdict.Faulted => 503,
|
||||
_ => 500,
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Per-driver snapshot fed into <see cref="DriverHealthReport.Aggregate"/>.</summary>
|
||||
/// <param name="DriverInstanceId">Driver instance identifier (from <c>IDriver.DriverInstanceId</c>).</param>
|
||||
/// <param name="State">Current <see cref="DriverState"/> from <c>IDriver.GetHealth</c>.</param>
|
||||
/// <param name="DetailMessage">Optional driver-supplied detail (e.g. "primary PLC unreachable").</param>
|
||||
public sealed record DriverHealthSnapshot(
|
||||
string DriverInstanceId,
|
||||
DriverState State,
|
||||
string? DetailMessage = null);
|
||||
|
||||
/// <summary>Overall fleet readiness — derived from driver states by <see cref="DriverHealthReport.Aggregate"/>.</summary>
|
||||
public enum ReadinessVerdict
|
||||
{
|
||||
/// <summary>All drivers Healthy (or fleet is empty).</summary>
|
||||
Healthy,
|
||||
|
||||
/// <summary>At least one driver Degraded; none Faulted / NotReady.</summary>
|
||||
Degraded,
|
||||
|
||||
/// <summary>At least one driver Unknown / Initializing; none Faulted.</summary>
|
||||
NotReady,
|
||||
|
||||
/// <summary>At least one driver Faulted.</summary>
|
||||
Faulted,
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
using Serilog.Context;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Observability;
|
||||
|
||||
/// <summary>
|
||||
/// Convenience wrapper around Serilog <see cref="LogContext"/> — attaches the set of
|
||||
/// structured properties a capability call should carry (DriverInstanceId, DriverType,
|
||||
/// CapabilityName, CorrelationId). Callers wrap their call-site body in a <c>using</c>
|
||||
/// block; inner <c>Log.Information</c> / <c>Log.Warning</c> calls emit the context
|
||||
/// automatically via the Serilog enricher chain.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/implementation/phase-6-1-resilience-and-observability.md</c> §Stream C.2.
|
||||
/// The correlation ID should be the OPC UA <c>RequestHeader.RequestHandle</c> when in-flight;
|
||||
/// otherwise a short random GUID. Callers supply whichever is available.
|
||||
/// </remarks>
|
||||
public static class LogContextEnricher
|
||||
{
|
||||
/// <summary>Attach the capability-call property set. Dispose the returned scope to pop.</summary>
|
||||
public static IDisposable Push(string driverInstanceId, string driverType, DriverCapability capability, string correlationId)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(driverInstanceId);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(driverType);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(correlationId);
|
||||
|
||||
var a = LogContext.PushProperty("DriverInstanceId", driverInstanceId);
|
||||
var b = LogContext.PushProperty("DriverType", driverType);
|
||||
var c = LogContext.PushProperty("CapabilityName", capability.ToString());
|
||||
var d = LogContext.PushProperty("CorrelationId", correlationId);
|
||||
return new CompositeScope(a, b, c, d);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generate a short correlation ID when no OPC UA RequestHandle is available.
|
||||
/// 12-hex-char slice of a GUID — long enough for log correlation, short enough to
|
||||
/// scan visually.
|
||||
/// </summary>
|
||||
public static string NewCorrelationId() => Guid.NewGuid().ToString("N")[..12];
|
||||
|
||||
private sealed class CompositeScope : IDisposable
|
||||
{
|
||||
private readonly IDisposable[] _inner;
|
||||
public CompositeScope(params IDisposable[] inner) => _inner = inner;
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
// Reverse-order disposal matches Serilog's stack semantics.
|
||||
for (var i = _inner.Length - 1; i >= 0; i--)
|
||||
_inner[i].Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
275
src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/EquipmentNodeWalker.cs
Normal file
275
src/Core/ZB.MOM.WW.OtOpcUa.Core/OpcUa/EquipmentNodeWalker.cs
Normal file
@@ -0,0 +1,275 @@
|
||||
using System.Text.Json;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Materializes the canonical Unified Namespace browse tree for an Equipment-kind
|
||||
/// <see cref="Configuration.Entities.Namespace"/> from the Config DB's
|
||||
/// <c>UnsArea</c> / <c>UnsLine</c> / <c>Equipment</c> / <c>Tag</c> rows. Runs during
|
||||
/// address-space build per <see cref="IDriver"/> whose
|
||||
/// <c>Namespace.Kind = Equipment</c>; SystemPlatform-kind namespaces (Galaxy) are
|
||||
/// exempt per decision #120 and reach this walker only indirectly through
|
||||
/// <see cref="ITagDiscovery.DiscoverAsync"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Composition strategy.</b> ADR-001 (2026-04-20) accepted Option A — Config
|
||||
/// primary. The walker treats the supplied <see cref="EquipmentNamespaceContent"/>
|
||||
/// snapshot as the authoritative published surface. Every Equipment row becomes a
|
||||
/// folder node at the UNS level-5 segment; every <see cref="Tag"/> bound to an
|
||||
/// Equipment (non-null <see cref="Tag.EquipmentId"/>) becomes a variable node under
|
||||
/// it. Driver-discovered tags that have no Config-DB row are not added by this
|
||||
/// walker — the ITagDiscovery path continues to exist for the SystemPlatform case +
|
||||
/// for enrichment, but Equipment-kind composition is fully Tag-row-driven.
|
||||
/// </para>
|
||||
///
|
||||
/// <para>
|
||||
/// <b>Under each Equipment node.</b> Five identifier properties per decision #121
|
||||
/// (<c>EquipmentId</c>, <c>EquipmentUuid</c>, <c>MachineCode</c>, <c>ZTag</c>,
|
||||
/// <c>SAPID</c>) are added as OPC UA properties — external systems (ERP, SAP PM)
|
||||
/// resolve equipment by whichever identifier they natively use without a sidecar.
|
||||
/// <see cref="IdentificationFolderBuilder.Build"/> materializes the OPC 40010
|
||||
/// Identification sub-folder with the nine decision-#139 fields when at least one
|
||||
/// is non-null; when all nine are null the sub-folder is omitted rather than
|
||||
/// appearing empty.
|
||||
/// </para>
|
||||
///
|
||||
/// <para>
|
||||
/// <b>Address resolution.</b> Variable nodes carry the driver-side full reference
|
||||
/// in <see cref="DriverAttributeInfo.FullName"/> copied from <c>Tag.TagConfig</c>
|
||||
/// (the wire-level address JSON blob whose interpretation is driver-specific). At
|
||||
/// runtime the dispatch layer routes Read/Write calls through the configured
|
||||
/// capability invoker; an unreachable address surfaces as an OPC UA Bad status via
|
||||
/// the natural driver-read failure path, NOT as a build-time reject. The ADR calls
|
||||
/// this "BadNotFound placeholder" behavior — legible to operators via their Admin
|
||||
/// UI + OPC UA client inspection of node status.
|
||||
/// </para>
|
||||
///
|
||||
/// <para>
|
||||
/// <b>Pure function.</b> This class has no dependency on the OPC UA SDK, no
|
||||
/// Config-DB access, no state. It consumes pre-loaded EF Core rows + streams calls
|
||||
/// into the supplied <see cref="IAddressSpaceBuilder"/>. The server-side wiring
|
||||
/// (load snapshot → invoke walker → per-tag capability probe) lives in the Task B
|
||||
/// PR alongside <c>NodeScopeResolver</c>'s Config-DB join.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public static class EquipmentNodeWalker
|
||||
{
|
||||
/// <summary>
|
||||
/// Walk <paramref name="content"/> into <paramref name="namespaceBuilder"/>.
|
||||
/// The builder is scoped to the Equipment-kind namespace root; the walker emits
|
||||
/// Area → Line → Equipment folders under it, then identifier properties + the
|
||||
/// Identification sub-folder + variable nodes per bound Tag under each Equipment.
|
||||
/// </summary>
|
||||
/// <param name="namespaceBuilder">
|
||||
/// The builder scoped to the Equipment-kind namespace root. Caller is responsible for
|
||||
/// creating this (e.g. <c>rootBuilder.Folder(namespace.NamespaceId, namespace.NamespaceUri)</c>).
|
||||
/// </param>
|
||||
/// <param name="content">Pre-loaded + pre-filtered rows for a single published generation.</param>
|
||||
public static void Walk(IAddressSpaceBuilder namespaceBuilder, EquipmentNamespaceContent content)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(namespaceBuilder);
|
||||
ArgumentNullException.ThrowIfNull(content);
|
||||
|
||||
// Group lines by area + equipment by line + tags by equipment up-front. Avoids an
|
||||
// O(N·M) re-scan at each UNS level on large fleets.
|
||||
var linesByArea = content.Lines
|
||||
.GroupBy(l => l.UnsAreaId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToDictionary(g => g.Key, g => g.OrderBy(l => l.Name, StringComparer.Ordinal).ToList(), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var equipmentByLine = content.Equipment
|
||||
.GroupBy(e => e.UnsLineId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToDictionary(g => g.Key, g => g.OrderBy(e => e.Name, StringComparer.Ordinal).ToList(), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var tagsByEquipment = content.Tags
|
||||
.Where(t => !string.IsNullOrEmpty(t.EquipmentId))
|
||||
.GroupBy(t => t.EquipmentId!, StringComparer.OrdinalIgnoreCase)
|
||||
.ToDictionary(g => g.Key, g => g.OrderBy(t => t.Name, StringComparer.Ordinal).ToList(), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var virtualTagsByEquipment = (content.VirtualTags ?? [])
|
||||
.Where(v => v.Enabled)
|
||||
.GroupBy(v => v.EquipmentId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToDictionary(g => g.Key, g => g.OrderBy(v => v.Name, StringComparer.Ordinal).ToList(), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
var scriptedAlarmsByEquipment = (content.ScriptedAlarms ?? [])
|
||||
.Where(a => a.Enabled)
|
||||
.GroupBy(a => a.EquipmentId, StringComparer.OrdinalIgnoreCase)
|
||||
.ToDictionary(g => g.Key, g => g.OrderBy(a => a.Name, StringComparer.Ordinal).ToList(), StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
foreach (var area in content.Areas.OrderBy(a => a.Name, StringComparer.Ordinal))
|
||||
{
|
||||
var areaBuilder = namespaceBuilder.Folder(area.Name, area.Name);
|
||||
if (!linesByArea.TryGetValue(area.UnsAreaId, out var areaLines)) continue;
|
||||
|
||||
foreach (var line in areaLines)
|
||||
{
|
||||
var lineBuilder = areaBuilder.Folder(line.Name, line.Name);
|
||||
if (!equipmentByLine.TryGetValue(line.UnsLineId, out var lineEquipment)) continue;
|
||||
|
||||
foreach (var equipment in lineEquipment)
|
||||
{
|
||||
var equipmentBuilder = lineBuilder.Folder(equipment.Name, equipment.Name);
|
||||
AddIdentifierProperties(equipmentBuilder, equipment);
|
||||
IdentificationFolderBuilder.Build(equipmentBuilder, equipment);
|
||||
|
||||
if (tagsByEquipment.TryGetValue(equipment.EquipmentId, out var equipmentTags))
|
||||
foreach (var tag in equipmentTags)
|
||||
AddTagVariable(equipmentBuilder, tag);
|
||||
|
||||
if (virtualTagsByEquipment.TryGetValue(equipment.EquipmentId, out var vTags))
|
||||
foreach (var vtag in vTags)
|
||||
AddVirtualTagVariable(equipmentBuilder, vtag);
|
||||
|
||||
if (scriptedAlarmsByEquipment.TryGetValue(equipment.EquipmentId, out var alarms))
|
||||
foreach (var alarm in alarms)
|
||||
AddScriptedAlarmVariable(equipmentBuilder, alarm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds the five operator-facing identifiers from decision #121 as OPC UA properties
|
||||
/// on the Equipment node. EquipmentId + EquipmentUuid are always populated;
|
||||
/// MachineCode is required per <see cref="Equipment"/>; ZTag + SAPID are nullable in
|
||||
/// the data model so they're skipped when null to avoid empty-string noise in the
|
||||
/// browse tree.
|
||||
/// </summary>
|
||||
private static void AddIdentifierProperties(IAddressSpaceBuilder equipmentBuilder, Equipment equipment)
|
||||
{
|
||||
equipmentBuilder.AddProperty("EquipmentId", DriverDataType.String, equipment.EquipmentId);
|
||||
equipmentBuilder.AddProperty("EquipmentUuid", DriverDataType.String, equipment.EquipmentUuid.ToString());
|
||||
equipmentBuilder.AddProperty("MachineCode", DriverDataType.String, equipment.MachineCode);
|
||||
if (!string.IsNullOrEmpty(equipment.ZTag))
|
||||
equipmentBuilder.AddProperty("ZTag", DriverDataType.String, equipment.ZTag);
|
||||
if (!string.IsNullOrEmpty(equipment.SAPID))
|
||||
equipmentBuilder.AddProperty("SAPID", DriverDataType.String, equipment.SAPID);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Emit a single Tag row as an <see cref="IAddressSpaceBuilder.Variable"/>. The driver
|
||||
/// full reference lives in <c>Tag.TagConfig</c> (wire-level address, driver-specific
|
||||
/// JSON blob); the variable node's data type derives from <c>Tag.DataType</c>.
|
||||
/// Unreachable-address behavior per ADR-001 Option A: the variable is created; the
|
||||
/// driver's natural Read failure surfaces an OPC UA Bad status at runtime.
|
||||
/// </summary>
|
||||
private static void AddTagVariable(IAddressSpaceBuilder equipmentBuilder, Tag tag)
|
||||
{
|
||||
var attr = new DriverAttributeInfo(
|
||||
FullName: ExtractFullName(tag.TagConfig),
|
||||
DriverDataType: ParseDriverDataType(tag.DataType),
|
||||
IsArray: false,
|
||||
ArrayDim: null,
|
||||
SecurityClass: SecurityClassification.FreeAccess,
|
||||
IsHistorized: false);
|
||||
equipmentBuilder.Variable(tag.Name, tag.Name, attr);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Cross-driver TagConfig convention — the Config DB's <c>CK_Tag_TagConfig_IsJson</c>
|
||||
/// check constraint requires TagConfig to be a JSON object, and every shipped driver
|
||||
/// (Galaxy / Modbus / AB CIP / S7 / FOCAS / TwinCAT / ABLegacy) stores the wire-level
|
||||
/// address in a top-level <c>FullName</c> field. Extracting it here keeps the walker
|
||||
/// driver-agnostic while giving the driver the plain address string its backend
|
||||
/// expects at read-time — the raw JSON would otherwise be passed verbatim to
|
||||
/// <c>IReadable.ReadAsync</c> and the driver would fail to resolve the tag.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Falls back to the raw <paramref name="tagConfig"/> if it doesn't parse as JSON or
|
||||
/// the <c>FullName</c> field is absent. This preserves the pre-refactor behaviour for
|
||||
/// any legacy row that slipped past the check constraint or any future driver that
|
||||
/// wants an opaque non-JSON reference.
|
||||
/// </remarks>
|
||||
internal static string ExtractFullName(string tagConfig)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(tagConfig)) return tagConfig;
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(tagConfig);
|
||||
if (doc.RootElement.ValueKind == JsonValueKind.Object
|
||||
&& doc.RootElement.TryGetProperty("FullName", out var fullName)
|
||||
&& fullName.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
return fullName.GetString() ?? tagConfig;
|
||||
}
|
||||
}
|
||||
catch (JsonException) { /* fall through */ }
|
||||
return tagConfig;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse <see cref="Tag.DataType"/> (stored as the <see cref="DriverDataType"/> enum
|
||||
/// name string, decision #138) into the enum value. Unknown names fall back to
|
||||
/// <see cref="DriverDataType.String"/> so a one-off driver-specific type doesn't
|
||||
/// abort the whole walk; the underlying driver still sees the original TagConfig
|
||||
/// address + can surface its own typed value via the OPC UA variant at read time.
|
||||
/// </summary>
|
||||
private static DriverDataType ParseDriverDataType(string raw) =>
|
||||
Enum.TryParse<DriverDataType>(raw, ignoreCase: true, out var parsed) ? parsed : DriverDataType.String;
|
||||
|
||||
/// <summary>
|
||||
/// Emit a <see cref="VirtualTag"/> row as a <see cref="NodeSourceKind.Virtual"/>
|
||||
/// variable node. <c>FullName</c> doubles as the UNS path Phase 7's VirtualTagEngine
|
||||
/// addresses its engine-side entries by. The <c>VirtualTagId</c> discriminator lets
|
||||
/// the DriverNodeManager dispatch Reads/Subscribes to the engine rather than any
|
||||
/// driver.
|
||||
/// </summary>
|
||||
private static void AddVirtualTagVariable(IAddressSpaceBuilder equipmentBuilder, VirtualTag vtag)
|
||||
{
|
||||
var attr = new DriverAttributeInfo(
|
||||
FullName: vtag.VirtualTagId,
|
||||
DriverDataType: ParseDriverDataType(vtag.DataType),
|
||||
IsArray: false,
|
||||
ArrayDim: null,
|
||||
SecurityClass: SecurityClassification.FreeAccess,
|
||||
IsHistorized: vtag.Historize,
|
||||
IsAlarm: false,
|
||||
WriteIdempotent: false,
|
||||
Source: NodeSourceKind.Virtual,
|
||||
VirtualTagId: vtag.VirtualTagId,
|
||||
ScriptedAlarmId: null);
|
||||
equipmentBuilder.Variable(vtag.Name, vtag.Name, attr);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Emit a <see cref="ScriptedAlarm"/> row as a <see cref="NodeSourceKind.ScriptedAlarm"/>
|
||||
/// variable node. The OPC UA Part 9 alarm-condition materialization happens at the
|
||||
/// node-manager level (which wires the concrete <c>AlarmConditionState</c> subclass
|
||||
/// per <see cref="ScriptedAlarm.AlarmType"/>); this walker provides the browse-level
|
||||
/// anchor + the <see cref="DriverAttributeInfo.IsAlarm"/> flag that triggers that
|
||||
/// materialization path.
|
||||
/// </summary>
|
||||
private static void AddScriptedAlarmVariable(IAddressSpaceBuilder equipmentBuilder, ScriptedAlarm alarm)
|
||||
{
|
||||
var attr = new DriverAttributeInfo(
|
||||
FullName: alarm.ScriptedAlarmId,
|
||||
DriverDataType: DriverDataType.Boolean,
|
||||
IsArray: false,
|
||||
ArrayDim: null,
|
||||
SecurityClass: SecurityClassification.FreeAccess,
|
||||
IsHistorized: false,
|
||||
IsAlarm: true,
|
||||
WriteIdempotent: false,
|
||||
Source: NodeSourceKind.ScriptedAlarm,
|
||||
VirtualTagId: null,
|
||||
ScriptedAlarmId: alarm.ScriptedAlarmId);
|
||||
equipmentBuilder.Variable(alarm.Name, alarm.Name, attr);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pre-loaded + pre-filtered snapshot of one Equipment-kind namespace's worth of Config
|
||||
/// DB rows. All four collections are scoped to the same
|
||||
/// <see cref="Configuration.Entities.ConfigGeneration"/> + the same
|
||||
/// <see cref="Configuration.Entities.Namespace"/> row. The walker assumes this filter
|
||||
/// was applied by the caller + does no cross-generation or cross-namespace validation.
|
||||
/// </summary>
|
||||
public sealed record EquipmentNamespaceContent(
|
||||
IReadOnlyList<UnsArea> Areas,
|
||||
IReadOnlyList<UnsLine> Lines,
|
||||
IReadOnlyList<Equipment> Equipment,
|
||||
IReadOnlyList<Tag> Tags,
|
||||
IReadOnlyList<VirtualTag>? VirtualTags = null,
|
||||
IReadOnlyList<ScriptedAlarm>? ScriptedAlarms = null);
|
||||
@@ -0,0 +1,118 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Generic, driver-agnostic backbone for populating the OPC UA address space from an
|
||||
/// <see cref="IDriver"/>. Walks the driver's discovery, wires the alarm + data-change +
|
||||
/// rediscovery subscription events, and hands each variable to the supplied
|
||||
/// <see cref="IAddressSpaceBuilder"/>. Concrete OPC UA server implementations provide the
|
||||
/// builder — see the Server project's <c>OpcUaAddressSpaceBuilder</c> for the materialization
|
||||
/// against <c>CustomNodeManager2</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/plan.md</c> decision #52 + #62 — Core owns the node tree, drivers stream
|
||||
/// <c>Folder</c>/<c>Variable</c> calls, alarm-bearing variables are annotated via
|
||||
/// <see cref="IVariableHandle.MarkAsAlarmCondition"/> and subsequent
|
||||
/// <see cref="IAlarmSource.OnAlarmEvent"/> payloads route to the sink the builder returned.
|
||||
/// </remarks>
|
||||
public class GenericDriverNodeManager(IDriver driver) : IDisposable
|
||||
{
|
||||
protected IDriver Driver { get; } = driver ?? throw new ArgumentNullException(nameof(driver));
|
||||
|
||||
public string DriverInstanceId => Driver.DriverInstanceId;
|
||||
|
||||
// Source tag (DriverAttributeInfo.FullName) → alarm-condition sink. Populated during
|
||||
// BuildAddressSpaceAsync by a recording IAddressSpaceBuilder implementation that captures the
|
||||
// IVariableHandle per attr.IsAlarm=true variable and calls MarkAsAlarmCondition.
|
||||
private readonly ConcurrentDictionary<string, IAlarmConditionSink> _alarmSinks =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private EventHandler<AlarmEventArgs>? _alarmForwarder;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Populates the address space by streaming nodes from the driver into the supplied builder,
|
||||
/// wraps the builder so alarm-condition sinks are captured, subscribes to the driver's
|
||||
/// alarm event stream, and routes each transition to the matching sink by <c>SourceNodeId</c>.
|
||||
/// Driver exceptions are isolated per decision #12 — the driver's subtree is marked Faulted,
|
||||
/// but other drivers remain available.
|
||||
/// </summary>
|
||||
public async Task BuildAddressSpaceAsync(IAddressSpaceBuilder builder, CancellationToken ct)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
|
||||
if (Driver is not ITagDiscovery discovery)
|
||||
throw new NotSupportedException($"Driver '{Driver.DriverInstanceId}' does not implement ITagDiscovery.");
|
||||
|
||||
var capturing = new CapturingBuilder(builder, _alarmSinks);
|
||||
await discovery.DiscoverAsync(capturing, ct);
|
||||
|
||||
if (Driver is IAlarmSource alarmSource)
|
||||
{
|
||||
_alarmForwarder = (_, e) =>
|
||||
{
|
||||
// Route the alarm to the sink registered for the originating variable, if any.
|
||||
// Unknown source ids are dropped silently — they may belong to another driver or
|
||||
// to a variable the builder chose not to flag as an alarm condition.
|
||||
if (_alarmSinks.TryGetValue(e.SourceNodeId, out var sink))
|
||||
sink.OnTransition(e);
|
||||
};
|
||||
alarmSource.OnAlarmEvent += _alarmForwarder;
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
if (_alarmForwarder is not null && Driver is IAlarmSource alarmSource)
|
||||
{
|
||||
alarmSource.OnAlarmEvent -= _alarmForwarder;
|
||||
}
|
||||
_alarmSinks.Clear();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Snapshot the current alarm-sink registry by source node id. Diagnostic + test hook;
|
||||
/// not part of the hot path.
|
||||
/// </summary>
|
||||
internal IReadOnlyCollection<string> TrackedAlarmSources => _alarmSinks.Keys.ToList();
|
||||
|
||||
/// <summary>
|
||||
/// Wraps the caller-supplied <see cref="IAddressSpaceBuilder"/> so every
|
||||
/// <see cref="IVariableHandle.MarkAsAlarmCondition"/> call registers the returned sink in
|
||||
/// the node manager's source-node-id map. The builder itself drives materialization;
|
||||
/// this wrapper only observes.
|
||||
/// </summary>
|
||||
private sealed class CapturingBuilder(
|
||||
IAddressSpaceBuilder inner,
|
||||
ConcurrentDictionary<string, IAlarmConditionSink> sinks) : IAddressSpaceBuilder
|
||||
{
|
||||
public IAddressSpaceBuilder Folder(string browseName, string displayName)
|
||||
=> new CapturingBuilder(inner.Folder(browseName, displayName), sinks);
|
||||
|
||||
public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo)
|
||||
=> new CapturingHandle(inner.Variable(browseName, displayName, attributeInfo), sinks);
|
||||
|
||||
public void AddProperty(string browseName, DriverDataType dataType, object? value)
|
||||
=> inner.AddProperty(browseName, dataType, value);
|
||||
}
|
||||
|
||||
private sealed class CapturingHandle(
|
||||
IVariableHandle inner,
|
||||
ConcurrentDictionary<string, IAlarmConditionSink> sinks) : IVariableHandle
|
||||
{
|
||||
public string FullReference => inner.FullReference;
|
||||
|
||||
public IAlarmConditionSink MarkAsAlarmCondition(AlarmConditionInfo info)
|
||||
{
|
||||
var sink = inner.MarkAsAlarmCondition(info);
|
||||
// Register by the driver-side full reference so the alarm forwarder can look it up
|
||||
// using AlarmEventArgs.SourceNodeId (which the driver populates with the same tag).
|
||||
sinks[inner.FullReference] = sink;
|
||||
return sink;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.OpcUa;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 6.4 Stream D: materializes the OPC 40010 Machinery companion-spec Identification
|
||||
/// sub-folder under an Equipment node. Reads the nine decision-#139 columns off the
|
||||
/// <see cref="Equipment"/> row and emits one property per non-null field.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Pure-function shape — testable without a real OPC UA node manager. The caller
|
||||
/// passes the builder scoped to the Equipment node; this class handles the Identification
|
||||
/// sub-folder creation + per-field <see cref="IAddressSpaceBuilder.AddProperty"/> calls.</para>
|
||||
///
|
||||
/// <para>ACL binding: the sub-folder + its properties inherit the Equipment scope's
|
||||
/// grants (no new scope level). Phase 6.2's trie treats them as part of the Equipment
|
||||
/// ScopeId — a user with Equipment-level grant reads Identification; a user without the
|
||||
/// grant gets BadUserAccessDenied on both the Equipment node + its Identification variables.
|
||||
/// See <c>docs/v2/acl-design.md</c> §Identification cross-reference.</para>
|
||||
///
|
||||
/// <para>The nine fields per decision #139 are exposed exactly when they carry a non-null
|
||||
/// value. A row with all nine null produces no Identification sub-folder at all — the
|
||||
/// caller can use <see cref="HasAnyFields(Equipment)"/> to skip the Folder call entirely
|
||||
/// and avoid a pointless empty folder appearing in browse trees.</para>
|
||||
/// </remarks>
|
||||
public static class IdentificationFolderBuilder
|
||||
{
|
||||
/// <summary>Browse + display name of the sub-folder — fixed per OPC 40010 convention.</summary>
|
||||
public const string FolderName = "Identification";
|
||||
|
||||
/// <summary>
|
||||
/// Canonical decision #139 field set exposed in the Identification sub-folder. Order
|
||||
/// matches the decision-log entry so any browse-order reader can cross-reference
|
||||
/// without re-sorting.
|
||||
/// </summary>
|
||||
public static IReadOnlyList<string> FieldNames { get; } = new[]
|
||||
{
|
||||
"Manufacturer", "Model", "SerialNumber",
|
||||
"HardwareRevision", "SoftwareRevision",
|
||||
"YearOfConstruction", "AssetLocation",
|
||||
"ManufacturerUri", "DeviceManualUri",
|
||||
};
|
||||
|
||||
/// <summary>True when the equipment row has at least one non-null Identification field.</summary>
|
||||
public static bool HasAnyFields(Equipment equipment)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(equipment);
|
||||
return equipment.Manufacturer is not null
|
||||
|| equipment.Model is not null
|
||||
|| equipment.SerialNumber is not null
|
||||
|| equipment.HardwareRevision is not null
|
||||
|| equipment.SoftwareRevision is not null
|
||||
|| equipment.YearOfConstruction is not null
|
||||
|| equipment.AssetLocation is not null
|
||||
|| equipment.ManufacturerUri is not null
|
||||
|| equipment.DeviceManualUri is not null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build the Identification sub-folder under <paramref name="equipmentBuilder"/>. No-op
|
||||
/// when every field is null. Returns the sub-folder builder (or null when no-op) so
|
||||
/// callers can attach additional nodes underneath if needed.
|
||||
/// </summary>
|
||||
public static IAddressSpaceBuilder? Build(IAddressSpaceBuilder equipmentBuilder, Equipment equipment)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(equipmentBuilder);
|
||||
ArgumentNullException.ThrowIfNull(equipment);
|
||||
|
||||
if (!HasAnyFields(equipment)) return null;
|
||||
|
||||
var folder = equipmentBuilder.Folder(FolderName, FolderName);
|
||||
AddIfPresent(folder, "Manufacturer", DriverDataType.String, equipment.Manufacturer);
|
||||
AddIfPresent(folder, "Model", DriverDataType.String, equipment.Model);
|
||||
AddIfPresent(folder, "SerialNumber", DriverDataType.String, equipment.SerialNumber);
|
||||
AddIfPresent(folder, "HardwareRevision", DriverDataType.String, equipment.HardwareRevision);
|
||||
AddIfPresent(folder, "SoftwareRevision", DriverDataType.String, equipment.SoftwareRevision);
|
||||
AddIfPresent(folder, "YearOfConstruction", DriverDataType.Int32,
|
||||
equipment.YearOfConstruction is null ? null : (object)(int)equipment.YearOfConstruction.Value);
|
||||
AddIfPresent(folder, "AssetLocation", DriverDataType.String, equipment.AssetLocation);
|
||||
AddIfPresent(folder, "ManufacturerUri", DriverDataType.String, equipment.ManufacturerUri);
|
||||
AddIfPresent(folder, "DeviceManualUri", DriverDataType.String, equipment.DeviceManualUri);
|
||||
return folder;
|
||||
}
|
||||
|
||||
private static void AddIfPresent(IAddressSpaceBuilder folder, string name, DriverDataType dataType, object? value)
|
||||
{
|
||||
if (value is null) return;
|
||||
folder.AddProperty(name, dataType, value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps the three mutating surfaces of <see cref="IAlarmSource"/>
|
||||
/// (<see cref="IAlarmSource.SubscribeAlarmsAsync"/>, <see cref="IAlarmSource.UnsubscribeAlarmsAsync"/>,
|
||||
/// <see cref="IAlarmSource.AcknowledgeAsync"/>) through <see cref="CapabilityInvoker"/> so the
|
||||
/// Phase 6.1 resilience pipeline runs — retry semantics match
|
||||
/// <see cref="DriverCapability.AlarmSubscribe"/> (retries by default) and
|
||||
/// <see cref="DriverCapability.AlarmAcknowledge"/> (does NOT retry per decision #143).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Multi-host dispatch: when the driver implements <see cref="IPerCallHostResolver"/>,
|
||||
/// each source-node-id is resolved individually + grouped by host so a dead PLC inside a
|
||||
/// multi-device driver doesn't poison the sibling hosts' breakers. Drivers with a single
|
||||
/// host fall back to <see cref="IDriver.DriverInstanceId"/> as the single-host key.</para>
|
||||
///
|
||||
/// <para>Why this lives here + not on <see cref="CapabilityInvoker"/>: alarm surfaces have a
|
||||
/// handle-returning shape (SubscribeAlarmsAsync returns <see cref="IAlarmSubscriptionHandle"/>)
|
||||
/// + a per-call fan-out (AcknowledgeAsync gets a batch of
|
||||
/// <see cref="AlarmAcknowledgeRequest"/>s that may span multiple hosts). Keeping the fan-out
|
||||
/// logic here keeps the invoker's execute-overloads narrow.</para>
|
||||
/// </remarks>
|
||||
public sealed class AlarmSurfaceInvoker
|
||||
{
|
||||
private readonly CapabilityInvoker _invoker;
|
||||
private readonly IAlarmSource _alarmSource;
|
||||
private readonly IPerCallHostResolver? _hostResolver;
|
||||
private readonly string _defaultHost;
|
||||
|
||||
public AlarmSurfaceInvoker(
|
||||
CapabilityInvoker invoker,
|
||||
IAlarmSource alarmSource,
|
||||
string defaultHost,
|
||||
IPerCallHostResolver? hostResolver = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(invoker);
|
||||
ArgumentNullException.ThrowIfNull(alarmSource);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(defaultHost);
|
||||
|
||||
_invoker = invoker;
|
||||
_alarmSource = alarmSource;
|
||||
_defaultHost = defaultHost;
|
||||
_hostResolver = hostResolver;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Subscribe to alarm events for a set of source node ids, fanning out by resolved host
|
||||
/// so per-host breakers / bulkheads apply. Returns one handle per host — callers that
|
||||
/// don't care about per-host separation may concatenate them.
|
||||
/// </summary>
|
||||
public async Task<IReadOnlyList<IAlarmSubscriptionHandle>> SubscribeAsync(
|
||||
IReadOnlyList<string> sourceNodeIds,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(sourceNodeIds);
|
||||
if (sourceNodeIds.Count == 0) return [];
|
||||
|
||||
var byHost = GroupByHost(sourceNodeIds);
|
||||
var handles = new List<IAlarmSubscriptionHandle>(byHost.Count);
|
||||
foreach (var (host, ids) in byHost)
|
||||
{
|
||||
var handle = await _invoker.ExecuteAsync(
|
||||
DriverCapability.AlarmSubscribe,
|
||||
host,
|
||||
async ct => await _alarmSource.SubscribeAlarmsAsync(ids, ct).ConfigureAwait(false),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
handles.Add(handle);
|
||||
}
|
||||
return handles;
|
||||
}
|
||||
|
||||
/// <summary>Cancel an alarm subscription. Routes through the AlarmSubscribe pipeline for parity.</summary>
|
||||
public ValueTask UnsubscribeAsync(IAlarmSubscriptionHandle handle, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(handle);
|
||||
return _invoker.ExecuteAsync(
|
||||
DriverCapability.AlarmSubscribe,
|
||||
_defaultHost,
|
||||
async ct => await _alarmSource.UnsubscribeAlarmsAsync(handle, ct).ConfigureAwait(false),
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Acknowledge alarms. Fans out by resolved host; each host's batch runs through the
|
||||
/// AlarmAcknowledge pipeline (no-retry per decision #143 — an alarm-ack is not idempotent
|
||||
/// at the plant-floor acknowledgement level even if the OPC UA spec permits re-issue).
|
||||
/// </summary>
|
||||
public async Task AcknowledgeAsync(
|
||||
IReadOnlyList<AlarmAcknowledgeRequest> acknowledgements,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(acknowledgements);
|
||||
if (acknowledgements.Count == 0) return;
|
||||
|
||||
var byHost = _hostResolver is null
|
||||
? new Dictionary<string, List<AlarmAcknowledgeRequest>> { [_defaultHost] = acknowledgements.ToList() }
|
||||
: acknowledgements
|
||||
.GroupBy(a => _hostResolver.ResolveHost(a.SourceNodeId))
|
||||
.ToDictionary(g => g.Key, g => g.ToList());
|
||||
|
||||
foreach (var (host, batch) in byHost)
|
||||
{
|
||||
var batchSnapshot = batch; // capture for the lambda
|
||||
await _invoker.ExecuteAsync(
|
||||
DriverCapability.AlarmAcknowledge,
|
||||
host,
|
||||
async ct => await _alarmSource.AcknowledgeAsync(batchSnapshot, ct).ConfigureAwait(false),
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private Dictionary<string, List<string>> GroupByHost(IReadOnlyList<string> sourceNodeIds)
|
||||
{
|
||||
if (_hostResolver is null)
|
||||
return new Dictionary<string, List<string>> { [_defaultHost] = sourceNodeIds.ToList() };
|
||||
|
||||
var result = new Dictionary<string, List<string>>(StringComparer.Ordinal);
|
||||
foreach (var id in sourceNodeIds)
|
||||
{
|
||||
var host = _hostResolver.ResolveHost(id);
|
||||
if (!result.TryGetValue(host, out var list))
|
||||
result[host] = list = new List<string>();
|
||||
list.Add(id);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
140
src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs
Normal file
140
src/Core/ZB.MOM.WW.OtOpcUa.Core/Resilience/CapabilityInvoker.cs
Normal file
@@ -0,0 +1,140 @@
|
||||
using Polly;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Observability;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Executes driver-capability calls through a shared Polly pipeline. One invoker per
|
||||
/// <c>(DriverInstance, IDriver)</c> pair; the underlying <see cref="DriverResiliencePipelineBuilder"/>
|
||||
/// is process-singleton so all invokers share its cache.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/plan.md</c> decisions #143-144 and Phase 6.1 Stream A.3. The server's dispatch
|
||||
/// layer routes every capability call (<c>IReadable.ReadAsync</c>, <c>IWritable.WriteAsync</c>,
|
||||
/// <c>ITagDiscovery.DiscoverAsync</c>, <c>ISubscribable.SubscribeAsync/UnsubscribeAsync</c>,
|
||||
/// <c>IHostConnectivityProbe</c> probe loop, <c>IAlarmSource.SubscribeAlarmsAsync/AcknowledgeAsync</c>,
|
||||
/// and all four <c>IHistoryProvider</c> reads) through this invoker.
|
||||
/// </remarks>
|
||||
public sealed class CapabilityInvoker
|
||||
{
|
||||
private readonly DriverResiliencePipelineBuilder _builder;
|
||||
private readonly string _driverInstanceId;
|
||||
private readonly string _driverType;
|
||||
private readonly Func<DriverResilienceOptions> _optionsAccessor;
|
||||
private readonly DriverResilienceStatusTracker? _statusTracker;
|
||||
|
||||
/// <summary>
|
||||
/// Construct an invoker for one driver instance.
|
||||
/// </summary>
|
||||
/// <param name="builder">Shared, process-singleton pipeline builder.</param>
|
||||
/// <param name="driverInstanceId">The <c>DriverInstance.Id</c> column value.</param>
|
||||
/// <param name="optionsAccessor">
|
||||
/// Snapshot accessor for the current resilience options. Invoked per call so Admin-edit +
|
||||
/// pipeline-invalidate can take effect without restarting the invoker.
|
||||
/// </param>
|
||||
/// <param name="driverType">Driver type name for structured-log enrichment (e.g. <c>"Modbus"</c>).</param>
|
||||
/// <param name="statusTracker">Optional resilience-status tracker. When wired, every capability call records start/complete so Admin <c>/hosts</c> can surface <see cref="ResilienceStatusSnapshot.CurrentInFlight"/> as the bulkhead-depth proxy.</param>
|
||||
public CapabilityInvoker(
|
||||
DriverResiliencePipelineBuilder builder,
|
||||
string driverInstanceId,
|
||||
Func<DriverResilienceOptions> optionsAccessor,
|
||||
string driverType = "Unknown",
|
||||
DriverResilienceStatusTracker? statusTracker = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(builder);
|
||||
ArgumentNullException.ThrowIfNull(optionsAccessor);
|
||||
|
||||
_builder = builder;
|
||||
_driverInstanceId = driverInstanceId;
|
||||
_driverType = driverType;
|
||||
_optionsAccessor = optionsAccessor;
|
||||
_statusTracker = statusTracker;
|
||||
}
|
||||
|
||||
/// <summary>Execute a capability call returning a value, honoring the per-capability pipeline.</summary>
|
||||
/// <typeparam name="TResult">Return type of the underlying driver call.</typeparam>
|
||||
public async ValueTask<TResult> ExecuteAsync<TResult>(
|
||||
DriverCapability capability,
|
||||
string hostName,
|
||||
Func<CancellationToken, ValueTask<TResult>> callSite,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(callSite);
|
||||
|
||||
var pipeline = ResolvePipeline(capability, hostName);
|
||||
_statusTracker?.RecordCallStart(_driverInstanceId, hostName);
|
||||
try
|
||||
{
|
||||
using (LogContextEnricher.Push(_driverInstanceId, _driverType, capability, LogContextEnricher.NewCorrelationId()))
|
||||
{
|
||||
return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_statusTracker?.RecordCallComplete(_driverInstanceId, hostName);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Execute a void-returning capability call, honoring the per-capability pipeline.</summary>
|
||||
public async ValueTask ExecuteAsync(
|
||||
DriverCapability capability,
|
||||
string hostName,
|
||||
Func<CancellationToken, ValueTask> callSite,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(callSite);
|
||||
|
||||
var pipeline = ResolvePipeline(capability, hostName);
|
||||
_statusTracker?.RecordCallStart(_driverInstanceId, hostName);
|
||||
try
|
||||
{
|
||||
using (LogContextEnricher.Push(_driverInstanceId, _driverType, capability, LogContextEnricher.NewCorrelationId()))
|
||||
{
|
||||
await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_statusTracker?.RecordCallComplete(_driverInstanceId, hostName);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Execute a <see cref="DriverCapability.Write"/> call honoring <see cref="WriteIdempotentAttribute"/>
|
||||
/// semantics — if <paramref name="isIdempotent"/> is <c>false</c>, retries are disabled regardless
|
||||
/// of the tag-level configuration (the pipeline for a non-idempotent write never retries per
|
||||
/// decisions #44-45). If <c>true</c>, the call runs through the capability's pipeline which may
|
||||
/// retry when the tier configuration permits.
|
||||
/// </summary>
|
||||
public async ValueTask<TResult> ExecuteWriteAsync<TResult>(
|
||||
string hostName,
|
||||
bool isIdempotent,
|
||||
Func<CancellationToken, ValueTask<TResult>> callSite,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(callSite);
|
||||
|
||||
if (!isIdempotent)
|
||||
{
|
||||
var noRetryOptions = _optionsAccessor() with
|
||||
{
|
||||
CapabilityPolicies = new Dictionary<DriverCapability, CapabilityPolicy>
|
||||
{
|
||||
[DriverCapability.Write] = _optionsAccessor().Resolve(DriverCapability.Write) with { RetryCount = 0 },
|
||||
},
|
||||
};
|
||||
var pipeline = _builder.GetOrCreate(_driverInstanceId, $"{hostName}::non-idempotent", DriverCapability.Write, noRetryOptions);
|
||||
using (LogContextEnricher.Push(_driverInstanceId, _driverType, DriverCapability.Write, LogContextEnricher.NewCorrelationId()))
|
||||
{
|
||||
return await pipeline.ExecuteAsync(callSite, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
return await ExecuteAsync(DriverCapability.Write, hostName, callSite, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
private ResiliencePipeline ResolvePipeline(DriverCapability capability, string hostName) =>
|
||||
_builder.GetOrCreate(_driverInstanceId, hostName, capability, _optionsAccessor());
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Per-tier × per-capability resilience policy configuration for a driver instance.
|
||||
/// Bound from <c>DriverInstance.ResilienceConfig</c> JSON (nullable column; null = tier defaults).
|
||||
/// Per <c>docs/v2/plan.md</c> decisions #143 and #144.
|
||||
/// </summary>
|
||||
public sealed record DriverResilienceOptions
|
||||
{
|
||||
/// <summary>Tier the owning driver type is registered as; drives the default map.</summary>
|
||||
public required DriverTier Tier { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Per-capability policy overrides. Capabilities absent from this map fall back to
|
||||
/// <see cref="GetTierDefaults(DriverTier)"/> for the configured <see cref="Tier"/>.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<DriverCapability, CapabilityPolicy> CapabilityPolicies { get; init; }
|
||||
= new Dictionary<DriverCapability, CapabilityPolicy>();
|
||||
|
||||
/// <summary>Bulkhead (max concurrent in-flight calls) for every capability. Default 32.</summary>
|
||||
public int BulkheadMaxConcurrent { get; init; } = 32;
|
||||
|
||||
/// <summary>
|
||||
/// Bulkhead queue depth. Zero = no queueing; overflow fails fast with
|
||||
/// <c>BulkheadRejectedException</c>. Default 64.
|
||||
/// </summary>
|
||||
public int BulkheadMaxQueue { get; init; } = 64;
|
||||
|
||||
/// <summary>
|
||||
/// Periodic scheduled recycle interval for Tier C out-of-process hosts, in seconds.
|
||||
/// Null (the default) = no scheduled recycle; the driver's Host process keeps running
|
||||
/// indefinitely unless a memory breach or operator action triggers a recycle. Only
|
||||
/// respected for <see cref="DriverTier.C"/>; Tier A/B recycle would tear down every
|
||||
/// OPC UA session, so the loader ignores non-null values for those tiers + logs a
|
||||
/// warning (per decisions #74 / #145).
|
||||
/// </summary>
|
||||
public int? RecycleIntervalSeconds { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Look up the effective policy for a capability, falling back to tier defaults when no
|
||||
/// override is configured. Never returns null.
|
||||
/// </summary>
|
||||
public CapabilityPolicy Resolve(DriverCapability capability)
|
||||
{
|
||||
if (CapabilityPolicies.TryGetValue(capability, out var policy))
|
||||
return policy;
|
||||
|
||||
var defaults = GetTierDefaults(Tier);
|
||||
return defaults[capability];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Per-tier per-capability default policy table, per decisions #143-144 and the Phase 6.1
|
||||
/// Stream A.2 specification. Retries skipped on <see cref="DriverCapability.Write"/> and
|
||||
/// <see cref="DriverCapability.AlarmAcknowledge"/> regardless of tier.
|
||||
/// </summary>
|
||||
public static IReadOnlyDictionary<DriverCapability, CapabilityPolicy> GetTierDefaults(DriverTier tier) =>
|
||||
tier switch
|
||||
{
|
||||
DriverTier.A => new Dictionary<DriverCapability, CapabilityPolicy>
|
||||
{
|
||||
[DriverCapability.Read] = new(TimeoutSeconds: 2, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.Write] = new(TimeoutSeconds: 2, RetryCount: 0, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.Discover] = new(TimeoutSeconds: 30, RetryCount: 2, BreakerFailureThreshold: 3),
|
||||
[DriverCapability.Subscribe] = new(TimeoutSeconds: 5, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.Probe] = new(TimeoutSeconds: 2, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.AlarmSubscribe] = new(TimeoutSeconds: 5, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.AlarmAcknowledge] = new(TimeoutSeconds: 5, RetryCount: 0, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.HistoryRead] = new(TimeoutSeconds: 30, RetryCount: 2, BreakerFailureThreshold: 5),
|
||||
},
|
||||
DriverTier.B => new Dictionary<DriverCapability, CapabilityPolicy>
|
||||
{
|
||||
[DriverCapability.Read] = new(TimeoutSeconds: 4, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.Write] = new(TimeoutSeconds: 4, RetryCount: 0, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.Discover] = new(TimeoutSeconds: 60, RetryCount: 2, BreakerFailureThreshold: 3),
|
||||
[DriverCapability.Subscribe] = new(TimeoutSeconds: 8, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.Probe] = new(TimeoutSeconds: 4, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.AlarmSubscribe] = new(TimeoutSeconds: 8, RetryCount: 3, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.AlarmAcknowledge] = new(TimeoutSeconds: 8, RetryCount: 0, BreakerFailureThreshold: 5),
|
||||
[DriverCapability.HistoryRead] = new(TimeoutSeconds: 60, RetryCount: 2, BreakerFailureThreshold: 5),
|
||||
},
|
||||
DriverTier.C => new Dictionary<DriverCapability, CapabilityPolicy>
|
||||
{
|
||||
[DriverCapability.Read] = new(TimeoutSeconds: 10, RetryCount: 1, BreakerFailureThreshold: 0),
|
||||
[DriverCapability.Write] = new(TimeoutSeconds: 10, RetryCount: 0, BreakerFailureThreshold: 0),
|
||||
[DriverCapability.Discover] = new(TimeoutSeconds: 120, RetryCount: 1, BreakerFailureThreshold: 0),
|
||||
[DriverCapability.Subscribe] = new(TimeoutSeconds: 15, RetryCount: 1, BreakerFailureThreshold: 0),
|
||||
[DriverCapability.Probe] = new(TimeoutSeconds: 10, RetryCount: 1, BreakerFailureThreshold: 0),
|
||||
[DriverCapability.AlarmSubscribe] = new(TimeoutSeconds: 15, RetryCount: 1, BreakerFailureThreshold: 0),
|
||||
[DriverCapability.AlarmAcknowledge] = new(TimeoutSeconds: 15, RetryCount: 0, BreakerFailureThreshold: 0),
|
||||
[DriverCapability.HistoryRead] = new(TimeoutSeconds: 120, RetryCount: 1, BreakerFailureThreshold: 0),
|
||||
},
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(tier), tier, $"No default policy table defined for tier {tier}."),
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Policy for one capability on one driver instance.</summary>
|
||||
/// <param name="TimeoutSeconds">Per-call timeout (wraps the inner Polly execution).</param>
|
||||
/// <param name="RetryCount">Number of retry attempts after the first failure; zero = no retry.</param>
|
||||
/// <param name="BreakerFailureThreshold">
|
||||
/// Consecutive-failure count that opens the circuit breaker; zero = no breaker
|
||||
/// (Tier C uses the supervisor's process-level breaker instead, per decision #68).
|
||||
/// </param>
|
||||
public sealed record CapabilityPolicy(int TimeoutSeconds, int RetryCount, int BreakerFailureThreshold);
|
||||
@@ -0,0 +1,132 @@
|
||||
using System.Text.Json;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Parses the <c>DriverInstance.ResilienceConfig</c> JSON column into a
|
||||
/// <see cref="DriverResilienceOptions"/> instance layered on top of the tier defaults.
|
||||
/// Every key in the JSON is optional; missing keys fall back to the tier defaults from
|
||||
/// <see cref="DriverResilienceOptions.GetTierDefaults(DriverTier)"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Example JSON shape per Phase 6.1 Stream A.2:</para>
|
||||
/// <code>
|
||||
/// {
|
||||
/// "bulkheadMaxConcurrent": 16,
|
||||
/// "bulkheadMaxQueue": 64,
|
||||
/// "capabilityPolicies": {
|
||||
/// "Read": { "timeoutSeconds": 5, "retryCount": 5, "breakerFailureThreshold": 3 },
|
||||
/// "Write": { "timeoutSeconds": 5, "retryCount": 0, "breakerFailureThreshold": 5 }
|
||||
/// }
|
||||
/// }
|
||||
/// </code>
|
||||
///
|
||||
/// <para>Unrecognised keys + values are ignored so future shapes land without a migration.
|
||||
/// Per-capability overrides are layered on top of tier defaults — a partial policy (only
|
||||
/// some of TimeoutSeconds/RetryCount/BreakerFailureThreshold) fills in the other fields
|
||||
/// from the tier default for that capability.</para>
|
||||
///
|
||||
/// <para>Parser failures (malformed JSON, type mismatches) fall back to pure tier defaults
|
||||
/// + surface through an out-parameter diagnostic. Callers may log the diagnostic but should
|
||||
/// NOT fail driver startup — a misconfigured ResilienceConfig should never brick a
|
||||
/// working driver.</para>
|
||||
/// </remarks>
|
||||
public static class DriverResilienceOptionsParser
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOpts = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
AllowTrailingCommas = true,
|
||||
ReadCommentHandling = JsonCommentHandling.Skip,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Parse the JSON payload layered on <paramref name="tier"/>'s defaults. Returns the
|
||||
/// effective options; <paramref name="parseDiagnostic"/> is null on success, or a
|
||||
/// human-readable error message when the JSON was malformed (options still returned
|
||||
/// = tier defaults).
|
||||
/// </summary>
|
||||
public static DriverResilienceOptions ParseOrDefaults(
|
||||
DriverTier tier,
|
||||
string? resilienceConfigJson,
|
||||
out string? parseDiagnostic)
|
||||
{
|
||||
parseDiagnostic = null;
|
||||
var baseDefaults = DriverResilienceOptions.GetTierDefaults(tier);
|
||||
var baseOptions = new DriverResilienceOptions { Tier = tier, CapabilityPolicies = baseDefaults };
|
||||
|
||||
if (string.IsNullOrWhiteSpace(resilienceConfigJson))
|
||||
return baseOptions;
|
||||
|
||||
ResilienceConfigShape? shape;
|
||||
try
|
||||
{
|
||||
shape = JsonSerializer.Deserialize<ResilienceConfigShape>(resilienceConfigJson, JsonOpts);
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
parseDiagnostic = $"ResilienceConfig JSON malformed; falling back to tier {tier} defaults. Detail: {ex.Message}";
|
||||
return baseOptions;
|
||||
}
|
||||
|
||||
if (shape is null) return baseOptions;
|
||||
|
||||
var merged = new Dictionary<DriverCapability, CapabilityPolicy>(baseDefaults);
|
||||
if (shape.CapabilityPolicies is not null)
|
||||
{
|
||||
foreach (var (capName, overridePolicy) in shape.CapabilityPolicies)
|
||||
{
|
||||
if (!Enum.TryParse<DriverCapability>(capName, ignoreCase: true, out var capability))
|
||||
{
|
||||
parseDiagnostic ??= $"Unknown capability '{capName}' in ResilienceConfig; skipped.";
|
||||
continue;
|
||||
}
|
||||
|
||||
var basePolicy = merged[capability];
|
||||
merged[capability] = new CapabilityPolicy(
|
||||
TimeoutSeconds: overridePolicy.TimeoutSeconds ?? basePolicy.TimeoutSeconds,
|
||||
RetryCount: overridePolicy.RetryCount ?? basePolicy.RetryCount,
|
||||
BreakerFailureThreshold: overridePolicy.BreakerFailureThreshold ?? basePolicy.BreakerFailureThreshold);
|
||||
}
|
||||
}
|
||||
|
||||
// Scheduled recycle is Tier C only — reject a configured interval on Tier A/B as a
|
||||
// misconfiguration surface rather than silently honouring it (recycling an in-process
|
||||
// driver would kill every OPC UA session + every co-hosted driver, per decision #74).
|
||||
int? recycleIntervalSeconds = null;
|
||||
if (shape.RecycleIntervalSeconds is int secs)
|
||||
{
|
||||
if (secs <= 0)
|
||||
parseDiagnostic ??= $"RecycleIntervalSeconds must be positive; got {secs} — ignoring.";
|
||||
else if (tier != DriverTier.C)
|
||||
parseDiagnostic ??= $"RecycleIntervalSeconds is Tier C only; Tier {tier} driver will not scheduled-recycle.";
|
||||
else
|
||||
recycleIntervalSeconds = secs;
|
||||
}
|
||||
|
||||
return new DriverResilienceOptions
|
||||
{
|
||||
Tier = tier,
|
||||
CapabilityPolicies = merged,
|
||||
BulkheadMaxConcurrent = shape.BulkheadMaxConcurrent ?? baseOptions.BulkheadMaxConcurrent,
|
||||
BulkheadMaxQueue = shape.BulkheadMaxQueue ?? baseOptions.BulkheadMaxQueue,
|
||||
RecycleIntervalSeconds = recycleIntervalSeconds,
|
||||
};
|
||||
}
|
||||
|
||||
private sealed class ResilienceConfigShape
|
||||
{
|
||||
public int? BulkheadMaxConcurrent { get; set; }
|
||||
public int? BulkheadMaxQueue { get; set; }
|
||||
public int? RecycleIntervalSeconds { get; set; }
|
||||
public Dictionary<string, CapabilityPolicyShape>? CapabilityPolicies { get; set; }
|
||||
}
|
||||
|
||||
private sealed class CapabilityPolicyShape
|
||||
{
|
||||
public int? TimeoutSeconds { get; set; }
|
||||
public int? RetryCount { get; set; }
|
||||
public int? BreakerFailureThreshold { get; set; }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
using System.Collections.Concurrent;
|
||||
using Polly;
|
||||
using Polly.CircuitBreaker;
|
||||
using Polly.Retry;
|
||||
using Polly.Timeout;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Builds and caches Polly resilience pipelines keyed on
|
||||
/// <c>(DriverInstanceId, HostName, DriverCapability)</c>. One dead PLC behind a multi-device
|
||||
/// driver cannot open the circuit breaker for healthy sibling hosts.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/plan.md</c> decision #144 (per-device isolation). Composition from outside-in:
|
||||
/// <b>Timeout → Retry (when capability permits) → Circuit Breaker (when tier permits) → Bulkhead</b>.
|
||||
///
|
||||
/// <para>Pipeline resolution is lock-free on the hot path: the inner
|
||||
/// <see cref="ConcurrentDictionary{TKey,TValue}"/> caches a <see cref="ResiliencePipeline"/> per key;
|
||||
/// first-call cost is one <see cref="ResiliencePipelineBuilder"/>.Build. Thereafter reads are O(1).</para>
|
||||
/// </remarks>
|
||||
public sealed class DriverResiliencePipelineBuilder
|
||||
{
|
||||
private readonly ConcurrentDictionary<PipelineKey, ResiliencePipeline> _pipelines = new();
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly DriverResilienceStatusTracker? _statusTracker;
|
||||
|
||||
/// <summary>Construct with the ambient clock (use <see cref="TimeProvider.System"/> in prod).</summary>
|
||||
/// <param name="timeProvider">Clock source for pipeline timeouts + breaker sampling. Defaults to system.</param>
|
||||
/// <param name="statusTracker">When non-null, every built pipeline wires Polly telemetry into
|
||||
/// the tracker — retries increment <c>ConsecutiveFailures</c>, breaker-open stamps
|
||||
/// <c>LastBreakerOpenUtc</c>, breaker-close resets failures. Feeds Admin <c>/hosts</c> +
|
||||
/// the Polly bulkhead-depth column. Absent tracker means no telemetry (unit tests +
|
||||
/// deployments that don't care about resilience observability).</param>
|
||||
public DriverResiliencePipelineBuilder(
|
||||
TimeProvider? timeProvider = null,
|
||||
DriverResilienceStatusTracker? statusTracker = null)
|
||||
{
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_statusTracker = statusTracker;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get or build the pipeline for a given <c>(driver instance, host, capability)</c> triple.
|
||||
/// Calls with the same key + same options reuse the same pipeline instance; the first caller
|
||||
/// wins if a race occurs (both pipelines would be behaviourally identical).
|
||||
/// </summary>
|
||||
/// <param name="driverInstanceId">DriverInstance primary key — opaque to this layer.</param>
|
||||
/// <param name="hostName">
|
||||
/// Host the call targets. For single-host drivers (Galaxy, some OPC UA Client configs) pass the
|
||||
/// driver's canonical host string. For multi-host drivers (Modbus with N PLCs), pass the
|
||||
/// specific PLC so one dead PLC doesn't poison healthy siblings.
|
||||
/// </param>
|
||||
/// <param name="capability">Which capability surface is being called.</param>
|
||||
/// <param name="options">Per-driver-instance options (tier + per-capability overrides).</param>
|
||||
public ResiliencePipeline GetOrCreate(
|
||||
string driverInstanceId,
|
||||
string hostName,
|
||||
DriverCapability capability,
|
||||
DriverResilienceOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(hostName);
|
||||
|
||||
var key = new PipelineKey(driverInstanceId, hostName, capability);
|
||||
return _pipelines.GetOrAdd(key, static (k, state) => Build(
|
||||
k.DriverInstanceId, k.HostName, state.capability, state.options, state.timeProvider, state.tracker),
|
||||
(capability, options, timeProvider: _timeProvider, tracker: _statusTracker));
|
||||
}
|
||||
|
||||
/// <summary>Drop cached pipelines for one driver instance (e.g. on ResilienceConfig change). Test + Admin-reload use.</summary>
|
||||
public int Invalidate(string driverInstanceId)
|
||||
{
|
||||
var removed = 0;
|
||||
foreach (var key in _pipelines.Keys)
|
||||
{
|
||||
if (key.DriverInstanceId == driverInstanceId && _pipelines.TryRemove(key, out _))
|
||||
removed++;
|
||||
}
|
||||
return removed;
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of the current number of cached pipelines. For diagnostics only.</summary>
|
||||
public int CachedPipelineCount => _pipelines.Count;
|
||||
|
||||
private static ResiliencePipeline Build(
|
||||
string driverInstanceId,
|
||||
string hostName,
|
||||
DriverCapability capability,
|
||||
DriverResilienceOptions options,
|
||||
TimeProvider timeProvider,
|
||||
DriverResilienceStatusTracker? tracker)
|
||||
{
|
||||
var policy = options.Resolve(capability);
|
||||
var builder = new ResiliencePipelineBuilder { TimeProvider = timeProvider };
|
||||
|
||||
builder.AddTimeout(new TimeoutStrategyOptions
|
||||
{
|
||||
Timeout = TimeSpan.FromSeconds(policy.TimeoutSeconds),
|
||||
});
|
||||
|
||||
if (policy.RetryCount > 0)
|
||||
{
|
||||
var retryOptions = new RetryStrategyOptions
|
||||
{
|
||||
MaxRetryAttempts = policy.RetryCount,
|
||||
BackoffType = DelayBackoffType.Exponential,
|
||||
UseJitter = true,
|
||||
Delay = TimeSpan.FromMilliseconds(100),
|
||||
MaxDelay = TimeSpan.FromSeconds(5),
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex => ex is not OperationCanceledException),
|
||||
};
|
||||
if (tracker is not null)
|
||||
{
|
||||
retryOptions.OnRetry = args =>
|
||||
{
|
||||
tracker.RecordFailure(driverInstanceId, hostName, timeProvider.GetUtcNow().UtcDateTime);
|
||||
return default;
|
||||
};
|
||||
}
|
||||
builder.AddRetry(retryOptions);
|
||||
}
|
||||
|
||||
if (policy.BreakerFailureThreshold > 0)
|
||||
{
|
||||
var breakerOptions = new CircuitBreakerStrategyOptions
|
||||
{
|
||||
FailureRatio = 1.0,
|
||||
MinimumThroughput = policy.BreakerFailureThreshold,
|
||||
SamplingDuration = TimeSpan.FromSeconds(30),
|
||||
BreakDuration = TimeSpan.FromSeconds(15),
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex => ex is not OperationCanceledException),
|
||||
};
|
||||
if (tracker is not null)
|
||||
{
|
||||
breakerOptions.OnOpened = args =>
|
||||
{
|
||||
tracker.RecordBreakerOpen(driverInstanceId, hostName, timeProvider.GetUtcNow().UtcDateTime);
|
||||
return default;
|
||||
};
|
||||
breakerOptions.OnClosed = args =>
|
||||
{
|
||||
// Closing the breaker means the target recovered — reset the consecutive-
|
||||
// failure counter so Admin UI stops flashing red for this host.
|
||||
tracker.RecordSuccess(driverInstanceId, hostName, timeProvider.GetUtcNow().UtcDateTime);
|
||||
return default;
|
||||
};
|
||||
}
|
||||
builder.AddCircuitBreaker(breakerOptions);
|
||||
}
|
||||
|
||||
return builder.Build();
|
||||
}
|
||||
|
||||
private readonly record struct PipelineKey(string DriverInstanceId, string HostName, DriverCapability Capability);
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Process-singleton tracker of live resilience counters per
|
||||
/// <c>(DriverInstanceId, HostName)</c>. Populated by the CapabilityInvoker and the
|
||||
/// MemoryTracking layer; consumed by a HostedService that periodically persists a
|
||||
/// snapshot to the <c>DriverInstanceResilienceStatus</c> table for Admin <c>/hosts</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per Phase 6.1 Stream E. No DB dependency here — the tracker is pure in-memory so
|
||||
/// tests can exercise it without EF Core or SQL Server. The HostedService that writes
|
||||
/// snapshots lives in the Server project (Stream E.2); the actual SignalR push + Blazor
|
||||
/// page refresh (E.3) lands in a follow-up visual-review PR.
|
||||
/// </remarks>
|
||||
public sealed class DriverResilienceStatusTracker
|
||||
{
|
||||
private readonly ConcurrentDictionary<StatusKey, ResilienceStatusSnapshot> _status = new();
|
||||
|
||||
/// <summary>Record a Polly pipeline failure for <paramref name="hostName"/>.</summary>
|
||||
public void RecordFailure(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { ConsecutiveFailures = 1, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with
|
||||
{
|
||||
ConsecutiveFailures = existing.ConsecutiveFailures + 1,
|
||||
LastSampledUtc = utcNow,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>Reset the consecutive-failure count on a successful pipeline execution.</summary>
|
||||
public void RecordSuccess(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { ConsecutiveFailures = 0, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with
|
||||
{
|
||||
ConsecutiveFailures = 0,
|
||||
LastSampledUtc = utcNow,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>Record a circuit-breaker open event.</summary>
|
||||
public void RecordBreakerOpen(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { LastBreakerOpenUtc = utcNow, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with { LastBreakerOpenUtc = utcNow, LastSampledUtc = utcNow });
|
||||
}
|
||||
|
||||
/// <summary>Record a process recycle event (Tier C only).</summary>
|
||||
public void RecordRecycle(string driverInstanceId, string hostName, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { LastRecycleUtc = utcNow, LastSampledUtc = utcNow },
|
||||
(_, existing) => existing with { LastRecycleUtc = utcNow, LastSampledUtc = utcNow });
|
||||
}
|
||||
|
||||
/// <summary>Capture / update the MemoryTracking-supplied baseline + current footprint.</summary>
|
||||
public void RecordFootprint(string driverInstanceId, string hostName, long baselineBytes, long currentBytes, DateTime utcNow)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot
|
||||
{
|
||||
BaselineFootprintBytes = baselineBytes,
|
||||
CurrentFootprintBytes = currentBytes,
|
||||
LastSampledUtc = utcNow,
|
||||
},
|
||||
(_, existing) => existing with
|
||||
{
|
||||
BaselineFootprintBytes = baselineBytes,
|
||||
CurrentFootprintBytes = currentBytes,
|
||||
LastSampledUtc = utcNow,
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Record the entry of a capability call for this (instance, host). Increments the
|
||||
/// in-flight counter used as the <see cref="ResilienceStatusSnapshot.CurrentInFlight"/>
|
||||
/// surface (a cheap stand-in for Polly bulkhead depth). Paired with
|
||||
/// <see cref="RecordCallComplete"/>; callers use try/finally.
|
||||
/// </summary>
|
||||
public void RecordCallStart(string driverInstanceId, string hostName)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { CurrentInFlight = 1 },
|
||||
(_, existing) => existing with { CurrentInFlight = existing.CurrentInFlight + 1 });
|
||||
}
|
||||
|
||||
/// <summary>Paired with <see cref="RecordCallStart"/> — decrements the in-flight counter.</summary>
|
||||
public void RecordCallComplete(string driverInstanceId, string hostName)
|
||||
{
|
||||
var key = new StatusKey(driverInstanceId, hostName);
|
||||
_status.AddOrUpdate(key,
|
||||
_ => new ResilienceStatusSnapshot { CurrentInFlight = 0 }, // start-without-complete shouldn't happen; clamp to 0
|
||||
(_, existing) => existing with { CurrentInFlight = Math.Max(0, existing.CurrentInFlight - 1) });
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of a specific (instance, host) pair; null if no counters recorded yet.</summary>
|
||||
public ResilienceStatusSnapshot? TryGet(string driverInstanceId, string hostName) =>
|
||||
_status.TryGetValue(new StatusKey(driverInstanceId, hostName), out var snapshot) ? snapshot : null;
|
||||
|
||||
/// <summary>Copy of every currently-tracked (instance, host, snapshot) triple. Safe under concurrent writes.</summary>
|
||||
public IReadOnlyList<(string DriverInstanceId, string HostName, ResilienceStatusSnapshot Snapshot)> Snapshot() =>
|
||||
_status.Select(kvp => (kvp.Key.DriverInstanceId, kvp.Key.HostName, kvp.Value)).ToList();
|
||||
|
||||
private readonly record struct StatusKey(string DriverInstanceId, string HostName);
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of the resilience counters for one <c>(DriverInstanceId, HostName)</c> pair.</summary>
|
||||
public sealed record ResilienceStatusSnapshot
|
||||
{
|
||||
public int ConsecutiveFailures { get; init; }
|
||||
public DateTime? LastBreakerOpenUtc { get; init; }
|
||||
public DateTime? LastRecycleUtc { get; init; }
|
||||
public long BaselineFootprintBytes { get; init; }
|
||||
public long CurrentFootprintBytes { get; init; }
|
||||
public DateTime LastSampledUtc { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// In-flight capability calls against this (instance, host). Bumped on call entry +
|
||||
/// decremented on completion. Feeds <c>DriverInstanceResilienceStatus.CurrentBulkheadDepth</c>
|
||||
/// for Admin <c>/hosts</c> — a cheap proxy for the Polly bulkhead depth until the full
|
||||
/// telemetry observer lands.
|
||||
/// </summary>
|
||||
public int CurrentInFlight { get; init; }
|
||||
}
|
||||
65
src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs
Normal file
65
src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryRecycle.cs
Normal file
@@ -0,0 +1,65 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
|
||||
|
||||
/// <summary>
|
||||
/// Tier C only process-recycle companion to <see cref="MemoryTracking"/>. On a
|
||||
/// <see cref="MemoryTrackingAction.HardBreach"/> signal, invokes the supplied
|
||||
/// <see cref="IDriverSupervisor"/> to restart the out-of-process Host.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/plan.md</c> decisions #74 and #145. Tier A/B hard-breach on an in-process
|
||||
/// driver would kill every OPC UA session and every co-hosted driver, so for Tier A/B this
|
||||
/// class logs a <b>promotion-to-Tier-C recommendation</b> and does NOT invoke any supervisor.
|
||||
/// A future tier-migration workflow acts on the recommendation.
|
||||
/// </remarks>
|
||||
public sealed class MemoryRecycle
|
||||
{
|
||||
private readonly DriverTier _tier;
|
||||
private readonly IDriverSupervisor? _supervisor;
|
||||
private readonly ILogger<MemoryRecycle> _logger;
|
||||
|
||||
public MemoryRecycle(DriverTier tier, IDriverSupervisor? supervisor, ILogger<MemoryRecycle> logger)
|
||||
{
|
||||
_tier = tier;
|
||||
_supervisor = supervisor;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handle a <see cref="MemoryTracking"/> classification for the driver. For Tier C with a
|
||||
/// wired supervisor, <c>HardBreach</c> triggers <see cref="IDriverSupervisor.RecycleAsync"/>.
|
||||
/// All other combinations are no-ops with respect to process state (soft breaches + Tier A/B
|
||||
/// hard breaches just log).
|
||||
/// </summary>
|
||||
/// <returns>True when a recycle was requested; false otherwise.</returns>
|
||||
public async Task<bool> HandleAsync(MemoryTrackingAction action, long footprintBytes, CancellationToken cancellationToken)
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case MemoryTrackingAction.SoftBreach:
|
||||
_logger.LogWarning(
|
||||
"Memory soft-breach on driver {DriverId}: footprint={Footprint:N0} bytes, tier={Tier}. Surfaced to Admin; no action.",
|
||||
_supervisor?.DriverInstanceId ?? "(unknown)", footprintBytes, _tier);
|
||||
return false;
|
||||
|
||||
case MemoryTrackingAction.HardBreach when _tier == DriverTier.C && _supervisor is not null:
|
||||
_logger.LogError(
|
||||
"Memory hard-breach on Tier C driver {DriverId}: footprint={Footprint:N0} bytes. Requesting supervisor recycle.",
|
||||
_supervisor.DriverInstanceId, footprintBytes);
|
||||
await _supervisor.RecycleAsync($"Memory hard-breach: {footprintBytes} bytes", cancellationToken).ConfigureAwait(false);
|
||||
return true;
|
||||
|
||||
case MemoryTrackingAction.HardBreach:
|
||||
_logger.LogError(
|
||||
"Memory hard-breach on Tier {Tier} in-process driver {DriverId}: footprint={Footprint:N0} bytes. " +
|
||||
"Recommending promotion to Tier C; NOT auto-killing (decisions #74, #145).",
|
||||
_tier, _supervisor?.DriverInstanceId ?? "(unknown)", footprintBytes);
|
||||
return false;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
136
src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs
Normal file
136
src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/MemoryTracking.cs
Normal file
@@ -0,0 +1,136 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
|
||||
|
||||
/// <summary>
|
||||
/// Tier-agnostic memory-footprint tracker. Captures the post-initialize <b>baseline</b>
|
||||
/// from the first samples after <c>IDriver.InitializeAsync</c>, then classifies each
|
||||
/// subsequent sample against a hybrid soft/hard threshold per
|
||||
/// <c>docs/v2/plan.md</c> decision #146 — <c>soft = max(multiplier × baseline, baseline + floor)</c>,
|
||||
/// <c>hard = 2 × soft</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Per decision #145, this tracker <b>never kills a process</b>. Soft and hard breaches
|
||||
/// log + surface to the Admin UI via <c>DriverInstanceResilienceStatus</c>. The matching
|
||||
/// process-level recycle protection lives in a separate <c>MemoryRecycle</c> that activates
|
||||
/// for Tier C drivers only (where the driver runs out-of-process behind a supervisor that
|
||||
/// can safely restart it without tearing down the OPC UA session or co-hosted in-proc
|
||||
/// drivers).</para>
|
||||
///
|
||||
/// <para>Baseline capture: the tracker starts in <see cref="TrackingPhase.WarmingUp"/> for
|
||||
/// <see cref="BaselineWindow"/> (default 5 min). During that window samples are collected;
|
||||
/// the baseline is computed as the median once the window elapses. Before that point every
|
||||
/// classification returns <see cref="MemoryTrackingAction.Warming"/>.</para>
|
||||
/// </remarks>
|
||||
public sealed class MemoryTracking
|
||||
{
|
||||
private readonly DriverTier _tier;
|
||||
private readonly TimeSpan _baselineWindow;
|
||||
private readonly List<long> _warmupSamples = [];
|
||||
private long _baselineBytes;
|
||||
private TrackingPhase _phase = TrackingPhase.WarmingUp;
|
||||
private DateTime? _warmupStartUtc;
|
||||
|
||||
/// <summary>Tier-default multiplier/floor constants per decision #146.</summary>
|
||||
public static (int Multiplier, long FloorBytes) GetTierConstants(DriverTier tier) => tier switch
|
||||
{
|
||||
DriverTier.A => (Multiplier: 3, FloorBytes: 50L * 1024 * 1024),
|
||||
DriverTier.B => (Multiplier: 3, FloorBytes: 100L * 1024 * 1024),
|
||||
DriverTier.C => (Multiplier: 2, FloorBytes: 500L * 1024 * 1024),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(tier), tier, $"No memory-tracking constants defined for tier {tier}."),
|
||||
};
|
||||
|
||||
/// <summary>Window over which post-init samples are collected to compute the baseline.</summary>
|
||||
public TimeSpan BaselineWindow => _baselineWindow;
|
||||
|
||||
/// <summary>Current phase: <see cref="TrackingPhase.WarmingUp"/> or <see cref="TrackingPhase.Steady"/>.</summary>
|
||||
public TrackingPhase Phase => _phase;
|
||||
|
||||
/// <summary>Captured baseline; 0 until warmup completes.</summary>
|
||||
public long BaselineBytes => _baselineBytes;
|
||||
|
||||
/// <summary>Effective soft threshold (zero while warming up).</summary>
|
||||
public long SoftThresholdBytes => _baselineBytes == 0 ? 0 : ComputeSoft(_tier, _baselineBytes);
|
||||
|
||||
/// <summary>Effective hard threshold = 2 × soft (zero while warming up).</summary>
|
||||
public long HardThresholdBytes => _baselineBytes == 0 ? 0 : ComputeSoft(_tier, _baselineBytes) * 2;
|
||||
|
||||
public MemoryTracking(DriverTier tier, TimeSpan? baselineWindow = null)
|
||||
{
|
||||
_tier = tier;
|
||||
_baselineWindow = baselineWindow ?? TimeSpan.FromMinutes(5);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Submit a memory-footprint sample. Returns the action the caller should surface.
|
||||
/// During warmup, always returns <see cref="MemoryTrackingAction.Warming"/> and accumulates
|
||||
/// samples; once the window elapses the first steady-phase sample triggers baseline capture
|
||||
/// (median of warmup samples).
|
||||
/// </summary>
|
||||
public MemoryTrackingAction Sample(long footprintBytes, DateTime utcNow)
|
||||
{
|
||||
if (_phase == TrackingPhase.WarmingUp)
|
||||
{
|
||||
_warmupStartUtc ??= utcNow;
|
||||
_warmupSamples.Add(footprintBytes);
|
||||
if (utcNow - _warmupStartUtc.Value >= _baselineWindow && _warmupSamples.Count > 0)
|
||||
{
|
||||
_baselineBytes = ComputeMedian(_warmupSamples);
|
||||
_phase = TrackingPhase.Steady;
|
||||
}
|
||||
else
|
||||
{
|
||||
return MemoryTrackingAction.Warming;
|
||||
}
|
||||
}
|
||||
|
||||
if (footprintBytes >= HardThresholdBytes) return MemoryTrackingAction.HardBreach;
|
||||
if (footprintBytes >= SoftThresholdBytes) return MemoryTrackingAction.SoftBreach;
|
||||
return MemoryTrackingAction.None;
|
||||
}
|
||||
|
||||
private static long ComputeSoft(DriverTier tier, long baseline)
|
||||
{
|
||||
var (multiplier, floor) = GetTierConstants(tier);
|
||||
return Math.Max(multiplier * baseline, baseline + floor);
|
||||
}
|
||||
|
||||
private static long ComputeMedian(List<long> samples)
|
||||
{
|
||||
var sorted = samples.Order().ToArray();
|
||||
var mid = sorted.Length / 2;
|
||||
return sorted.Length % 2 == 1
|
||||
? sorted[mid]
|
||||
: (sorted[mid - 1] + sorted[mid]) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Phase of a <see cref="MemoryTracking"/> lifecycle.</summary>
|
||||
public enum TrackingPhase
|
||||
{
|
||||
/// <summary>Collecting post-init samples; baseline not yet computed.</summary>
|
||||
WarmingUp,
|
||||
|
||||
/// <summary>Baseline captured; every sample classified against soft/hard thresholds.</summary>
|
||||
Steady,
|
||||
}
|
||||
|
||||
/// <summary>Classification the tracker returns per sample.</summary>
|
||||
public enum MemoryTrackingAction
|
||||
{
|
||||
/// <summary>Baseline not yet captured; sample collected, no threshold check.</summary>
|
||||
Warming,
|
||||
|
||||
/// <summary>Below soft threshold.</summary>
|
||||
None,
|
||||
|
||||
/// <summary>Between soft and hard thresholds — log + surface, no action.</summary>
|
||||
SoftBreach,
|
||||
|
||||
/// <summary>
|
||||
/// ≥ hard threshold. Log + surface + (Tier C only, via <c>MemoryRecycle</c>) request
|
||||
/// process recycle via the driver supervisor. Tier A/B breach never invokes any
|
||||
/// kill path per decisions #145 and #74.
|
||||
/// </summary>
|
||||
HardBreach,
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
|
||||
|
||||
/// <summary>
|
||||
/// Tier C opt-in periodic-recycle driver per <c>docs/v2/plan.md</c> decision #67.
|
||||
/// A tick method advanced by the caller (fed by a background timer in prod; by test clock
|
||||
/// in unit tests) decides whether the configured interval has elapsed and, if so, drives the
|
||||
/// supplied <see cref="IDriverSupervisor"/> to recycle the Host.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Tier A/B drivers MUST NOT use this class — scheduled recycle for in-process drivers would
|
||||
/// kill every OPC UA session and every co-hosted driver. The ctor throws when constructed
|
||||
/// with any tier other than C to make the misuse structurally impossible.
|
||||
///
|
||||
/// <para>Keeps no background thread of its own — callers invoke <see cref="TickAsync"/> on
|
||||
/// their ambient scheduler tick (Phase 6.1 Stream C's health-endpoint host runs one). That
|
||||
/// decouples the unit under test from wall-clock time and thread-pool scheduling.</para>
|
||||
/// </remarks>
|
||||
public sealed class ScheduledRecycleScheduler
|
||||
{
|
||||
private readonly TimeSpan _recycleInterval;
|
||||
private readonly IDriverSupervisor _supervisor;
|
||||
private readonly ILogger<ScheduledRecycleScheduler> _logger;
|
||||
private DateTime _nextRecycleUtc;
|
||||
|
||||
/// <summary>
|
||||
/// Construct the scheduler for a Tier C driver. Throws if <paramref name="tier"/> isn't C.
|
||||
/// </summary>
|
||||
/// <param name="tier">Driver tier; must be <see cref="DriverTier.C"/>.</param>
|
||||
/// <param name="recycleInterval">Interval between recycles (e.g. 7 days).</param>
|
||||
/// <param name="startUtc">Anchor time; next recycle fires at <paramref name="startUtc"/> + <paramref name="recycleInterval"/>.</param>
|
||||
/// <param name="supervisor">Supervisor that performs the actual recycle.</param>
|
||||
/// <param name="logger">Diagnostic sink.</param>
|
||||
public ScheduledRecycleScheduler(
|
||||
DriverTier tier,
|
||||
TimeSpan recycleInterval,
|
||||
DateTime startUtc,
|
||||
IDriverSupervisor supervisor,
|
||||
ILogger<ScheduledRecycleScheduler> logger)
|
||||
{
|
||||
if (tier != DriverTier.C)
|
||||
throw new ArgumentException(
|
||||
$"ScheduledRecycleScheduler is Tier C only (got {tier}). " +
|
||||
"In-process drivers must not use scheduled recycle; see decisions #74 and #145.",
|
||||
nameof(tier));
|
||||
|
||||
if (recycleInterval <= TimeSpan.Zero)
|
||||
throw new ArgumentException("RecycleInterval must be positive.", nameof(recycleInterval));
|
||||
|
||||
_recycleInterval = recycleInterval;
|
||||
_supervisor = supervisor;
|
||||
_logger = logger;
|
||||
_nextRecycleUtc = startUtc + recycleInterval;
|
||||
}
|
||||
|
||||
/// <summary>Next scheduled recycle UTC. Advances by <see cref="RecycleInterval"/> on each fire.</summary>
|
||||
public DateTime NextRecycleUtc => _nextRecycleUtc;
|
||||
|
||||
/// <summary>Recycle interval this scheduler was constructed with.</summary>
|
||||
public TimeSpan RecycleInterval => _recycleInterval;
|
||||
|
||||
/// <summary>
|
||||
/// Tick the scheduler forward. If <paramref name="utcNow"/> is past
|
||||
/// <see cref="NextRecycleUtc"/>, requests a recycle from the supervisor and advances
|
||||
/// <see cref="NextRecycleUtc"/> by exactly one interval. Returns true when a recycle fired.
|
||||
/// </summary>
|
||||
public async Task<bool> TickAsync(DateTime utcNow, CancellationToken cancellationToken)
|
||||
{
|
||||
if (utcNow < _nextRecycleUtc)
|
||||
return false;
|
||||
|
||||
_logger.LogInformation(
|
||||
"Scheduled recycle due for Tier C driver {DriverId} at {Now:o}; advancing next to {Next:o}.",
|
||||
_supervisor.DriverInstanceId, utcNow, _nextRecycleUtc + _recycleInterval);
|
||||
|
||||
await _supervisor.RecycleAsync("Scheduled periodic recycle", cancellationToken).ConfigureAwait(false);
|
||||
_nextRecycleUtc += _recycleInterval;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>Request an immediate recycle outside the schedule (e.g. MemoryRecycle hard-breach escalation).</summary>
|
||||
public Task RequestRecycleNowAsync(string reason, CancellationToken cancellationToken) =>
|
||||
_supervisor.RecycleAsync(reason, cancellationToken);
|
||||
}
|
||||
81
src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs
Normal file
81
src/Core/ZB.MOM.WW.OtOpcUa.Core/Stability/WedgeDetector.cs
Normal file
@@ -0,0 +1,81 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Stability;
|
||||
|
||||
/// <summary>
|
||||
/// Demand-aware driver-wedge detector per <c>docs/v2/plan.md</c> decision #147.
|
||||
/// Flips a driver to <see cref="WedgeVerdict.Faulted"/> only when BOTH of the following hold:
|
||||
/// (a) there is pending work outstanding, AND (b) no progress has been observed for longer
|
||||
/// than <see cref="Threshold"/>. Idle drivers, write-only burst drivers, and subscription-only
|
||||
/// drivers whose signals don't arrive regularly all stay Healthy.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Pending work signal is supplied by the caller via <see cref="DemandSignal"/>:
|
||||
/// non-zero Polly bulkhead depth, ≥1 active MonitoredItem, or ≥1 queued historian read
|
||||
/// each qualifies. The detector itself is state-light: all it remembers is the last
|
||||
/// <c>LastProgressUtc</c> it saw and the last wedge verdict. No history buffer.</para>
|
||||
///
|
||||
/// <para>Default threshold per plan: <c>5 × PublishingInterval</c>, with a minimum of 60 s.
|
||||
/// Concrete values are driver-agnostic and configured per-instance by the caller.</para>
|
||||
/// </remarks>
|
||||
public sealed class WedgeDetector
|
||||
{
|
||||
/// <summary>Wedge-detection threshold; pass < 60 s and the detector clamps to 60 s.</summary>
|
||||
public TimeSpan Threshold { get; }
|
||||
|
||||
/// <summary>Whether the driver reported itself <see cref="DriverState.Healthy"/> at construction.</summary>
|
||||
public WedgeDetector(TimeSpan threshold)
|
||||
{
|
||||
Threshold = threshold < TimeSpan.FromSeconds(60) ? TimeSpan.FromSeconds(60) : threshold;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Classify the current state against the demand signal. Does not retain state across
|
||||
/// calls — each call is self-contained; the caller owns the <c>LastProgressUtc</c> clock.
|
||||
/// </summary>
|
||||
public WedgeVerdict Classify(DriverState state, DemandSignal demand, DateTime utcNow)
|
||||
{
|
||||
if (state != DriverState.Healthy)
|
||||
return WedgeVerdict.NotApplicable;
|
||||
|
||||
if (!demand.HasPendingWork)
|
||||
return WedgeVerdict.Idle;
|
||||
|
||||
var sinceProgress = utcNow - demand.LastProgressUtc;
|
||||
return sinceProgress > Threshold ? WedgeVerdict.Faulted : WedgeVerdict.Healthy;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Caller-supplied demand snapshot. All three counters are OR'd — any non-zero means work
|
||||
/// is outstanding, which is the trigger for checking the <see cref="LastProgressUtc"/> clock.
|
||||
/// </summary>
|
||||
/// <param name="BulkheadDepth">Polly bulkhead depth (in-flight capability calls).</param>
|
||||
/// <param name="ActiveMonitoredItems">Number of live OPC UA MonitoredItems bound to this driver.</param>
|
||||
/// <param name="QueuedHistoryReads">Pending historian-read requests the driver owes the server.</param>
|
||||
/// <param name="LastProgressUtc">Last time the driver reported a successful unit of work (read, subscribe-ack, publish).</param>
|
||||
public readonly record struct DemandSignal(
|
||||
int BulkheadDepth,
|
||||
int ActiveMonitoredItems,
|
||||
int QueuedHistoryReads,
|
||||
DateTime LastProgressUtc)
|
||||
{
|
||||
/// <summary>True when any of the three counters is > 0.</summary>
|
||||
public bool HasPendingWork => BulkheadDepth > 0 || ActiveMonitoredItems > 0 || QueuedHistoryReads > 0;
|
||||
}
|
||||
|
||||
/// <summary>Outcome of a single <see cref="WedgeDetector.Classify"/> call.</summary>
|
||||
public enum WedgeVerdict
|
||||
{
|
||||
/// <summary>Driver wasn't Healthy to begin with — wedge detection doesn't apply.</summary>
|
||||
NotApplicable,
|
||||
|
||||
/// <summary>Driver claims Healthy + no pending work → stays Healthy.</summary>
|
||||
Idle,
|
||||
|
||||
/// <summary>Driver claims Healthy + has pending work + has made progress within the threshold → stays Healthy.</summary>
|
||||
Healthy,
|
||||
|
||||
/// <summary>Driver claims Healthy + has pending work + has NOT made progress within the threshold → wedged.</summary>
|
||||
Faulted,
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<LangVersion>latest</LangVersion>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<GenerateDocumentationFile>true</GenerateDocumentationFile>
|
||||
<NoWarn>$(NoWarn);CS1591</NoWarn>
|
||||
<RootNamespace>ZB.MOM.WW.OtOpcUa.Core</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Core.Abstractions\ZB.MOM.WW.OtOpcUa.Core.Abstractions.csproj"/>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.OtOpcUa.Configuration\ZB.MOM.WW.OtOpcUa.Configuration.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Polly.Core" Version="8.6.6"/>
|
||||
<PackageReference Include="Serilog" Version="4.3.0"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<InternalsVisibleTo Include="ZB.MOM.WW.OtOpcUa.Core.Tests"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-37gx-xxp4-5rgx"/>
|
||||
<NuGetAuditSuppress Include="https://github.com/advisories/GHSA-w3x6-4m5h-cxqf"/>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user