Admin RedundancyTab — per-cluster read-only topology view. Closes the UI slice of task #149 (Phase 6.3 Stream E — Admin UI RedundancyTab + OpenTelemetry metrics + SignalR); the OpenTelemetry metrics + RoleChanged SignalR push are split into new follow-up task #198 because each is a structural add that deserves its own test matrix + NuGet-dep decision rather than riding this UI PR. New /clusters/{ClusterId} Redundancy tab slotted between ACLs and Audit in the existing ClusterDetail tab bar. Shows each ClusterNode row in the cluster with columns Node / Role (Primary green, Secondary blue, Standalone primary-blue badge) / Host / OPC UA port / ServiceLevel base / ApplicationUri (text-break so the long urn: doesn't blow out the table) / Enabled badge / Last seen (relative age via the same FormatAge helper as Hosts.razor, with a yellow "Stale" chip once LastSeenAt crosses the 30s threshold shared with HostStatusService.StaleThreshold — a missed heartbeat plus clock-skew buffer). Four summary cards above the table — total Nodes, Primary count, Secondary count, Stale count. Two guard-rail alerts: (a) red "No Primary or Standalone" when the cluster has no authoritative write target (all rows are Secondaries — read-only until one is promoted by the server-side RedundancyCoordinator apply-lease flow); (b) red "Split-brain" when >1 Primary exists — apply-lease enforcement at the coordinator level should have made this impossible, so the alert implies a hand-edited DB row + an investigation. New ClusterNodeService with ListByClusterAsync (ordered by ServiceLevelBase descending so Primary rows with higher base float to the top) + a static IsStale predicate matching HostStatusService's 30s convention. DI-registered alongside the existing scoped services in Program.cs. Writes (role swap, enable/disable) are deliberately absent from the service — they go through the RedundancyCoordinator apply-lease flow on the server side + direct DB mutation from Admin would race with it. New ClusterNodeServiceTests covering IsStale across null/recent/old LastSeenAt + ListByClusterAsync ordering + cluster filter. 4/4 new tests passing; full Admin.Tests suite 76/76 (was 72 before this PR, +4). Admin project builds 0 errors. Task #198 captures the deferred work: (1) OpenTelemetry Meter for primary/secondary/stale counts + role_transition counter with from/to/node tags + OTLP exporter config; (2) RoleChanged SignalR push — extend FleetStatusPoller to detect RedundancyRole changes on ClusterNode rows + emit a RoleChanged hub message so the RedundancyTab refreshes instantly instead of on-page-load polling.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -52,6 +52,7 @@ else
|
||||
<li class="nav-item"><button class="nav-link @Tab("namespaces")" @onclick='() => _tab = "namespaces"'>Namespaces</button></li>
|
||||
<li class="nav-item"><button class="nav-link @Tab("drivers")" @onclick='() => _tab = "drivers"'>Drivers</button></li>
|
||||
<li class="nav-item"><button class="nav-link @Tab("acls")" @onclick='() => _tab = "acls"'>ACLs</button></li>
|
||||
<li class="nav-item"><button class="nav-link @Tab("redundancy")" @onclick='() => _tab = "redundancy"'>Redundancy</button></li>
|
||||
<li class="nav-item"><button class="nav-link @Tab("audit")" @onclick='() => _tab = "audit"'>Audit</button></li>
|
||||
</ul>
|
||||
|
||||
@@ -92,6 +93,10 @@ else
|
||||
{
|
||||
<AclsTab GenerationId="@_currentDraft.GenerationId" ClusterId="@ClusterId"/>
|
||||
}
|
||||
else if (_tab == "redundancy")
|
||||
{
|
||||
<RedundancyTab ClusterId="@ClusterId"/>
|
||||
}
|
||||
else if (_tab == "audit")
|
||||
{
|
||||
<AuditTab ClusterId="@ClusterId"/>
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
@using ZB.MOM.WW.OtOpcUa.Admin.Services
|
||||
@using ZB.MOM.WW.OtOpcUa.Configuration.Entities
|
||||
@using ZB.MOM.WW.OtOpcUa.Configuration.Enums
|
||||
@inject ClusterNodeService NodeSvc
|
||||
|
||||
<h4>Redundancy topology</h4>
|
||||
<p class="text-muted small">
|
||||
One row per <code>ClusterNode</code> in this cluster. Role, <code>ApplicationUri</code>,
|
||||
and <code>ServiceLevelBase</code> are authored separately; the Admin UI shows them read-only
|
||||
here so operators can confirm the published topology without touching it. LastSeen older than
|
||||
@((int)ClusterNodeService.StaleThreshold.TotalSeconds)s is flagged Stale — the node has
|
||||
stopped heart-beating and is likely down. Role swap goes through the server-side
|
||||
<code>RedundancyCoordinator</code> apply-lease flow, not direct DB edits.
|
||||
</p>
|
||||
|
||||
@if (_nodes is null)
|
||||
{
|
||||
<p>Loading…</p>
|
||||
}
|
||||
else if (_nodes.Count == 0)
|
||||
{
|
||||
<div class="alert alert-warning">
|
||||
No ClusterNode rows for this cluster. The server process needs at least one entry
|
||||
(with a non-blank <code>ApplicationUri</code>) before it can start up per OPC UA spec.
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
var primaries = _nodes.Count(n => n.RedundancyRole == RedundancyRole.Primary);
|
||||
var secondaries = _nodes.Count(n => n.RedundancyRole == RedundancyRole.Secondary);
|
||||
var standalone = _nodes.Count(n => n.RedundancyRole == RedundancyRole.Standalone);
|
||||
var staleCount = _nodes.Count(ClusterNodeService.IsStale);
|
||||
|
||||
<div class="row g-3 mb-4">
|
||||
<div class="col-md-3"><div class="card"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Nodes</h6>
|
||||
<div class="fs-3">@_nodes.Count</div>
|
||||
</div></div></div>
|
||||
<div class="col-md-3"><div class="card border-success"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Primary</h6>
|
||||
<div class="fs-3 text-success">@primaries</div>
|
||||
</div></div></div>
|
||||
<div class="col-md-3"><div class="card border-info"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Secondary</h6>
|
||||
<div class="fs-3 text-info">@secondaries</div>
|
||||
</div></div></div>
|
||||
<div class="col-md-3"><div class="card @(staleCount > 0 ? "border-warning" : "")"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Stale</h6>
|
||||
<div class="fs-3 @(staleCount > 0 ? "text-warning" : "")">@staleCount</div>
|
||||
</div></div></div>
|
||||
</div>
|
||||
|
||||
@if (primaries == 0 && standalone == 0)
|
||||
{
|
||||
<div class="alert alert-danger small mb-3">
|
||||
No Primary or Standalone node — the cluster has no authoritative write target. Secondaries
|
||||
stay read-only until one of them gets promoted via <code>RedundancyCoordinator</code>.
|
||||
</div>
|
||||
}
|
||||
else if (primaries > 1)
|
||||
{
|
||||
<div class="alert alert-danger small mb-3">
|
||||
<strong>Split-brain:</strong> @primaries nodes claim the Primary role. Apply-lease
|
||||
enforcement should have made this impossible at the coordinator level. Investigate
|
||||
immediately — one of the rows was likely hand-edited.
|
||||
</div>
|
||||
}
|
||||
|
||||
<table class="table table-sm table-hover align-middle">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Node</th>
|
||||
<th>Role</th>
|
||||
<th>Host</th>
|
||||
<th class="text-end">OPC UA port</th>
|
||||
<th class="text-end">ServiceLevel base</th>
|
||||
<th>ApplicationUri</th>
|
||||
<th>Enabled</th>
|
||||
<th>Last seen</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var n in _nodes)
|
||||
{
|
||||
<tr class="@RowClass(n)">
|
||||
<td><code>@n.NodeId</code></td>
|
||||
<td><span class="badge @RoleBadge(n.RedundancyRole)">@n.RedundancyRole</span></td>
|
||||
<td>@n.Host</td>
|
||||
<td class="text-end"><code>@n.OpcUaPort</code></td>
|
||||
<td class="text-end">@n.ServiceLevelBase</td>
|
||||
<td class="small text-break"><code>@n.ApplicationUri</code></td>
|
||||
<td>
|
||||
@if (n.Enabled) { <span class="badge bg-success">Enabled</span> }
|
||||
else { <span class="badge bg-secondary">Disabled</span> }
|
||||
</td>
|
||||
<td class="small @(ClusterNodeService.IsStale(n) ? "text-warning fw-bold" : "")">
|
||||
@(n.LastSeenAt is null ? "never" : FormatAge(n.LastSeenAt.Value))
|
||||
@if (ClusterNodeService.IsStale(n)) { <span class="badge bg-warning text-dark ms-1">Stale</span> }
|
||||
</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
}
|
||||
|
||||
@code {
|
||||
[Parameter] public string ClusterId { get; set; } = string.Empty;
|
||||
|
||||
private List<ClusterNode>? _nodes;
|
||||
|
||||
protected override async Task OnParametersSetAsync()
|
||||
{
|
||||
_nodes = await NodeSvc.ListByClusterAsync(ClusterId, CancellationToken.None);
|
||||
}
|
||||
|
||||
private static string RowClass(ClusterNode n) =>
|
||||
ClusterNodeService.IsStale(n) ? "table-warning" :
|
||||
!n.Enabled ? "table-secondary" : "";
|
||||
|
||||
private static string RoleBadge(RedundancyRole r) => r switch
|
||||
{
|
||||
RedundancyRole.Primary => "bg-success",
|
||||
RedundancyRole.Secondary => "bg-info",
|
||||
RedundancyRole.Standalone => "bg-primary",
|
||||
_ => "bg-secondary",
|
||||
};
|
||||
|
||||
private static string FormatAge(DateTime t)
|
||||
{
|
||||
var age = DateTime.UtcNow - t;
|
||||
if (age.TotalSeconds < 60) return $"{(int)age.TotalSeconds}s ago";
|
||||
if (age.TotalMinutes < 60) return $"{(int)age.TotalMinutes}m ago";
|
||||
if (age.TotalHours < 24) return $"{(int)age.TotalHours}h ago";
|
||||
return t.ToString("yyyy-MM-dd HH:mm 'UTC'");
|
||||
}
|
||||
}
|
||||
@@ -48,6 +48,7 @@ builder.Services.AddScoped<ReservationService>();
|
||||
builder.Services.AddScoped<DraftValidationService>();
|
||||
builder.Services.AddScoped<AuditLogService>();
|
||||
builder.Services.AddScoped<HostStatusService>();
|
||||
builder.Services.AddScoped<ClusterNodeService>();
|
||||
builder.Services.AddScoped<EquipmentImportBatchService>();
|
||||
builder.Services.AddScoped<ZB.MOM.WW.OtOpcUa.Configuration.Services.ILdapGroupRoleMappingService,
|
||||
ZB.MOM.WW.OtOpcUa.Configuration.Services.LdapGroupRoleMappingService>();
|
||||
|
||||
28
src/ZB.MOM.WW.OtOpcUa.Admin/Services/ClusterNodeService.cs
Normal file
28
src/ZB.MOM.WW.OtOpcUa.Admin/Services/ClusterNodeService.cs
Normal file
@@ -0,0 +1,28 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Admin.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Read-side service for ClusterNode rows + their cluster-scoped redundancy view. Consumed
|
||||
/// by the RedundancyTab on the cluster detail page. Writes (role swap, node enable/disable)
|
||||
/// are not supported here — role swap happens through the RedundancyCoordinator apply-lease
|
||||
/// flow on the server side and would conflict with any direct DB mutation from Admin.
|
||||
/// </summary>
|
||||
public sealed class ClusterNodeService(OtOpcUaConfigDbContext db)
|
||||
{
|
||||
/// <summary>Stale-threshold matching <c>HostStatusService.StaleThreshold</c> — 30s of clock
|
||||
/// tolerance covers a missed heartbeat plus publisher GC pauses.</summary>
|
||||
public static readonly TimeSpan StaleThreshold = TimeSpan.FromSeconds(30);
|
||||
|
||||
public Task<List<ClusterNode>> ListByClusterAsync(string clusterId, CancellationToken ct) =>
|
||||
db.ClusterNodes.AsNoTracking()
|
||||
.Where(n => n.ClusterId == clusterId)
|
||||
.OrderByDescending(n => n.ServiceLevelBase)
|
||||
.ThenBy(n => n.NodeId)
|
||||
.ToListAsync(ct);
|
||||
|
||||
public static bool IsStale(ClusterNode node) =>
|
||||
node.LastSeenAt is null || DateTime.UtcNow - node.LastSeenAt.Value > StaleThreshold;
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Admin.Services;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Admin.Tests;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class ClusterNodeServiceTests
|
||||
{
|
||||
[Fact]
|
||||
public void IsStale_NullLastSeen_Returns_True()
|
||||
{
|
||||
var node = NewNode("A", RedundancyRole.Primary, lastSeenAt: null);
|
||||
ClusterNodeService.IsStale(node).ShouldBeTrue();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsStale_RecentLastSeen_Returns_False()
|
||||
{
|
||||
var node = NewNode("A", RedundancyRole.Primary, lastSeenAt: DateTime.UtcNow.AddSeconds(-5));
|
||||
ClusterNodeService.IsStale(node).ShouldBeFalse();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void IsStale_Old_LastSeen_Returns_True()
|
||||
{
|
||||
var node = NewNode("A", RedundancyRole.Primary,
|
||||
lastSeenAt: DateTime.UtcNow - ClusterNodeService.StaleThreshold - TimeSpan.FromSeconds(1));
|
||||
ClusterNodeService.IsStale(node).ShouldBeTrue();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ListByClusterAsync_OrdersByServiceLevelBase_Descending_Then_NodeId()
|
||||
{
|
||||
using var ctx = NewContext();
|
||||
ctx.ClusterNodes.AddRange(
|
||||
NewNode("B-low", RedundancyRole.Secondary, serviceLevelBase: 150, clusterId: "c1"),
|
||||
NewNode("A-high", RedundancyRole.Primary, serviceLevelBase: 200, clusterId: "c1"),
|
||||
NewNode("other-cluster", RedundancyRole.Primary, serviceLevelBase: 200, clusterId: "c2"));
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
var svc = new ClusterNodeService(ctx);
|
||||
var rows = await svc.ListByClusterAsync("c1", CancellationToken.None);
|
||||
|
||||
rows.Count.ShouldBe(2);
|
||||
rows[0].NodeId.ShouldBe("A-high"); // higher ServiceLevelBase first
|
||||
rows[1].NodeId.ShouldBe("B-low");
|
||||
}
|
||||
|
||||
private static ClusterNode NewNode(
|
||||
string nodeId,
|
||||
RedundancyRole role,
|
||||
DateTime? lastSeenAt = null,
|
||||
int serviceLevelBase = 200,
|
||||
string clusterId = "c1") => new()
|
||||
{
|
||||
NodeId = nodeId,
|
||||
ClusterId = clusterId,
|
||||
RedundancyRole = role,
|
||||
Host = $"{nodeId}.example",
|
||||
ApplicationUri = $"urn:{nodeId}",
|
||||
ServiceLevelBase = (byte)serviceLevelBase,
|
||||
LastSeenAt = lastSeenAt,
|
||||
CreatedBy = "test",
|
||||
};
|
||||
|
||||
private static OtOpcUaConfigDbContext NewContext()
|
||||
{
|
||||
var opts = new DbContextOptionsBuilder<OtOpcUaConfigDbContext>()
|
||||
.UseInMemoryDatabase(Guid.NewGuid().ToString())
|
||||
.Options;
|
||||
return new OtOpcUaConfigDbContext(opts);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user