365 lines
15 KiB
Plaintext
365 lines
15 KiB
Plaintext
@page "/hosts"
|
|
@* Two views, top to bottom. (1) Akka cluster topology: each member's NodeId (host:port), roles,
|
|
leader status — the cluster-membership view. (2) Driver instances: live driver-health grouped by
|
|
cluster. The health feed (DriverHealthChanged) carries no per-Akka-member identity, so the rows
|
|
are cluster-scoped (keyed per driver instance across the cluster, not per member). The section
|
|
reads the in-process driver-health snapshot store directly + reloads its config from the ConfigDB. *@
|
|
@attribute [Microsoft.AspNetCore.Authorization.Authorize]
|
|
@rendermode RenderMode.InteractiveServer
|
|
@using Akka.Actor
|
|
@using Akka.Cluster
|
|
@using Microsoft.EntityFrameworkCore
|
|
@using Microsoft.Extensions.Logging
|
|
@using ZB.MOM.WW.OtOpcUa.Configuration
|
|
@using ZB.MOM.WW.OtOpcUa.AdminUI.Hosts
|
|
@using ZB.MOM.WW.OtOpcUa.AdminUI.Hubs
|
|
@using ZB.MOM.WW.OtOpcUa.Commons.Messages.Drivers
|
|
@inject ActorSystem ActorSystem
|
|
@inject IDriverStatusSnapshotStore DriverStore
|
|
@inject IDbContextFactory<OtOpcUaConfigDbContext> DbFactory
|
|
@inject Microsoft.Extensions.Logging.ILogger<Hosts> Logger
|
|
@implements IAsyncDisposable
|
|
|
|
<div class="d-flex justify-content-between align-items-center mb-3">
|
|
<h4 class="mb-0">Cluster hosts</h4>
|
|
</div>
|
|
|
|
<div class="d-flex align-items-center mb-3 gap-2">
|
|
<button class="btn btn-sm btn-outline-primary" @onclick="RefreshAsync" disabled="@_refreshing">
|
|
@if (_refreshing) { <span class="spinner-border spinner-border-sm me-1" /> }
|
|
Refresh
|
|
</button>
|
|
<span class="text-muted small">
|
|
Auto-refresh every @RefreshIntervalSeconds s. Last updated: @(_lastRefreshUtc?.ToString("HH:mm:ss 'UTC'") ?? "—")
|
|
</span>
|
|
</div>
|
|
|
|
<section class="panel notice rise" style="animation-delay:.02s">
|
|
Each row is one Akka cluster member identified by <span class="mono">host:port</span>. Roles
|
|
drive which actors run on which node — <span class="mono">admin</span> nodes host the
|
|
control-plane singletons, <span class="mono">driver</span> nodes host the per-node runtime
|
|
actors. The leader columns identify which member currently owns each role's singletons.
|
|
</section>
|
|
|
|
@if (_rows is null)
|
|
{
|
|
<p>Loading…</p>
|
|
}
|
|
else if (_rows.Count == 0)
|
|
{
|
|
<section class="panel notice rise" style="animation-delay:.08s">
|
|
No cluster members visible. The local node may still be joining.
|
|
</section>
|
|
}
|
|
else
|
|
{
|
|
<section class="agg-grid rise" style="animation-delay:.08s">
|
|
<div class="agg-card">
|
|
<div class="agg-label">Members</div>
|
|
<div class="agg-value numeric">@_rows.Count</div>
|
|
</div>
|
|
<div class="agg-card">
|
|
<div class="agg-label">Up</div>
|
|
<div class="agg-value numeric">@_rows.Count(r => r.Status == "Up")</div>
|
|
</div>
|
|
<div class="agg-card caution">
|
|
<div class="agg-label">Joining/Leaving</div>
|
|
<div class="agg-value numeric">@_rows.Count(r => r.Status is "Joining" or "Leaving" or "Exiting")</div>
|
|
</div>
|
|
<div class="agg-card alert">
|
|
<div class="agg-label">Unreachable</div>
|
|
<div class="agg-value numeric">@_rows.Count(r => r.Unreachable)</div>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="panel rise" style="animation-delay:.14s">
|
|
<div class="panel-head">Members</div>
|
|
<div class="table-wrap">
|
|
<table class="data-table">
|
|
<thead>
|
|
<tr>
|
|
<th>Address</th>
|
|
<th>Status</th>
|
|
<th>Roles</th>
|
|
<th>Leader for</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
@foreach (var r in _rows)
|
|
{
|
|
<tr>
|
|
<td>
|
|
<span class="mono">@r.Address</span>
|
|
@if (r.IsSelf) { <span class="chip chip-idle ms-2">self</span> }
|
|
</td>
|
|
<td>
|
|
<span class="chip @StatusChipClass(r.Status, r.Unreachable)">
|
|
@(r.Unreachable ? $"{r.Status} (unreachable)" : r.Status)
|
|
</span>
|
|
</td>
|
|
<td>
|
|
@foreach (var role in r.Roles)
|
|
{
|
|
<span class="chip chip-idle me-1">@role</span>
|
|
}
|
|
</td>
|
|
<td>
|
|
@if (r.LeaderRoles.Count == 0)
|
|
{
|
|
<span class="text-muted">—</span>
|
|
}
|
|
else
|
|
{
|
|
@foreach (var role in r.LeaderRoles)
|
|
{
|
|
<span class="chip chip-ok me-1">@role</span>
|
|
}
|
|
}
|
|
</td>
|
|
</tr>
|
|
}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</section>
|
|
}
|
|
|
|
<section class="panel notice rise" style="animation-delay:.16s">
|
|
Driver health is keyed per driver instance <em>across the cluster</em>, not per Akka member —
|
|
the health feed carries no per-member identity. These rows are therefore <strong>cluster-scoped</strong>:
|
|
one entry per configured driver instance, grouped under its cluster.
|
|
</section>
|
|
|
|
@if (_driverGroups is null)
|
|
{
|
|
<p>Loading…</p>
|
|
}
|
|
else if (_driverGroups.Count == 0)
|
|
{
|
|
<section class="panel notice rise" style="animation-delay:.2s">
|
|
No driver instances reporting yet.
|
|
</section>
|
|
}
|
|
else
|
|
{
|
|
<section class="panel rise" style="animation-delay:.2s">
|
|
<div class="panel-head">Driver instances</div>
|
|
@foreach (var g in _driverGroups)
|
|
{
|
|
<div class="mb-3">
|
|
<div class="panel-head">Cluster <span class="mono">@g.ClusterId</span></div>
|
|
<div class="mb-2">
|
|
@if (g.Nodes.Count == 0)
|
|
{
|
|
<span class="text-muted">—</span>
|
|
}
|
|
else
|
|
{
|
|
@foreach (var n in g.Nodes)
|
|
{
|
|
<span class="chip chip-idle me-1">@n.NodeId (@n.Host:@n.OpcUaPort)</span>
|
|
}
|
|
}
|
|
</div>
|
|
<div class="table-wrap">
|
|
<table class="data-table">
|
|
<thead>
|
|
<tr>
|
|
<th>Driver</th>
|
|
<th>Type</th>
|
|
<th>Status</th>
|
|
<th>Last read</th>
|
|
<th>Errors/5 min</th>
|
|
<th>Last error</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
@if (g.Drivers.Count == 0)
|
|
{
|
|
<tr>
|
|
<td colspan="6"><span class="text-muted">No drivers.</span></td>
|
|
</tr>
|
|
}
|
|
else
|
|
{
|
|
@foreach (var d in g.Drivers)
|
|
{
|
|
<tr>
|
|
<td>
|
|
@(d.Name ?? d.DriverInstanceId)
|
|
@if (d.Name is not null)
|
|
{
|
|
<span class="mono small">@d.DriverInstanceId</span>
|
|
}
|
|
</td>
|
|
<td>@(d.DriverType ?? "—")</td>
|
|
<td><span class="chip @DriverChipClass(d.State)">@d.State</span></td>
|
|
<td>@(d.LastSuccessfulReadUtc?.ToString("HH:mm:ss 'UTC'") ?? "—")</td>
|
|
<td class="numeric">@d.ErrorCount5Min</td>
|
|
<td><span class="text-muted small">@(d.LastError ?? "—")</span></td>
|
|
</tr>
|
|
}
|
|
}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
}
|
|
</section>
|
|
}
|
|
|
|
@code {
|
|
private const int RefreshIntervalSeconds = 5;
|
|
|
|
private List<MemberRow>? _rows;
|
|
private bool _refreshing;
|
|
private DateTime? _lastRefreshUtc;
|
|
private Timer? _timer;
|
|
|
|
private IReadOnlyList<HostsClusterGroup>? _driverGroups;
|
|
private List<HostsNodeInfo> _nodes = new();
|
|
private List<HostsDriverInstanceInfo> _instances = new();
|
|
|
|
protected override async Task OnInitializedAsync()
|
|
{
|
|
Refresh();
|
|
await LoadConfigAsync();
|
|
RebuildDriverGroups();
|
|
DriverStore.SnapshotChanged += OnSnapshotChanged;
|
|
_timer = new Timer(_ => _ = InvokeAsync(async () =>
|
|
{
|
|
try
|
|
{
|
|
Refresh();
|
|
await LoadConfigAsync();
|
|
RebuildDriverGroups();
|
|
StateHasChanged();
|
|
}
|
|
catch (Exception ex) when (ex is ObjectDisposedException or OperationCanceledException)
|
|
{
|
|
// Circuit disposed while a tick was in flight — ignore (the discarded task would
|
|
// otherwise swallow this silently). Mirrors DriverStatusPanel's drain-on-dispose.
|
|
}
|
|
}), null,
|
|
TimeSpan.FromSeconds(RefreshIntervalSeconds),
|
|
TimeSpan.FromSeconds(RefreshIntervalSeconds));
|
|
}
|
|
|
|
private async Task RefreshAsync()
|
|
{
|
|
_refreshing = true;
|
|
StateHasChanged();
|
|
try
|
|
{
|
|
await Task.Yield();
|
|
Refresh();
|
|
await LoadConfigAsync();
|
|
RebuildDriverGroups();
|
|
}
|
|
finally
|
|
{
|
|
_refreshing = false;
|
|
StateHasChanged();
|
|
}
|
|
}
|
|
|
|
// Load the per-cluster node + driver-instance config from the ConfigDB. Kept cheap and
|
|
// swallow-on-failure so a transient DB hiccup dims the enrichment (rows fall back to id/—)
|
|
// without crashing the page. The live health feed (the snapshot store) is independent of this.
|
|
private async Task LoadConfigAsync()
|
|
{
|
|
try
|
|
{
|
|
await using var db = await DbFactory.CreateDbContextAsync();
|
|
_nodes = await db.ClusterNodes.AsNoTracking()
|
|
.Select(n => new HostsNodeInfo(n.ClusterId, n.NodeId, n.Host, n.OpcUaPort))
|
|
.ToListAsync();
|
|
_instances = await db.DriverInstances.AsNoTracking()
|
|
.Select(d => new HostsDriverInstanceInfo(d.DriverInstanceId, d.ClusterId, d.Name, d.DriverType))
|
|
.ToListAsync();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
Logger.LogWarning(ex, "/hosts: failed to load driver-instance config; rows show ids only.");
|
|
_nodes = new();
|
|
_instances = new();
|
|
}
|
|
}
|
|
|
|
// Re-project the cluster-grouped driver rows from the latest live snapshots + cached config.
|
|
private void RebuildDriverGroups()
|
|
=> _driverGroups = HostsDriverView.Build(DriverStore.GetAll(), _nodes, _instances);
|
|
|
|
// Raised by the snapshot store on the bridge actor's thread for every driver instance. Rebuild
|
|
// from the cached config (cheap — no DB hit) and marshal onto the render sync context. A
|
|
// brand-new driver shows by id until the next config reload (timer/manual Refresh) enriches it.
|
|
private void OnSnapshotChanged(DriverHealthChanged _)
|
|
=> InvokeAsync(() => { RebuildDriverGroups(); StateHasChanged(); });
|
|
|
|
private void Refresh()
|
|
{
|
|
var cluster = Akka.Cluster.Cluster.Get(ActorSystem);
|
|
var state = cluster.State;
|
|
var unreachable = state.Unreachable
|
|
.Select(m => m.Address.ToString()).ToHashSet();
|
|
var selfAddress = cluster.SelfAddress.ToString();
|
|
|
|
_rows = state.Members.Select(m =>
|
|
{
|
|
var address = m.Address.ToString();
|
|
var hostPort = $"{m.Address.Host ?? "?"}:{m.Address.Port ?? 0}";
|
|
var leaderRoles = m.Roles
|
|
.Where(role => cluster.State.RoleLeader(role)?.ToString() == address)
|
|
.OrderBy(s => s, StringComparer.OrdinalIgnoreCase)
|
|
.ToList();
|
|
return new MemberRow(
|
|
Address: hostPort,
|
|
Status: m.Status.ToString(),
|
|
Roles: m.Roles.OrderBy(s => s, StringComparer.OrdinalIgnoreCase).ToList(),
|
|
LeaderRoles: leaderRoles,
|
|
Unreachable: unreachable.Contains(address),
|
|
IsSelf: address == selfAddress);
|
|
})
|
|
.OrderBy(r => r.Address, StringComparer.OrdinalIgnoreCase)
|
|
.ToList();
|
|
_lastRefreshUtc = DateTime.UtcNow;
|
|
}
|
|
|
|
private static string StatusChipClass(string status, bool unreachable) => (status, unreachable) switch
|
|
{
|
|
(_, true) => "chip-alert",
|
|
("Up", _) => "chip-ok",
|
|
("Joining", _) or ("Leaving", _) or ("Exiting", _) or ("WeaklyUp", _) => "chip-caution",
|
|
("Down", _) or ("Removed", _) => "chip-alert",
|
|
_ => "chip-idle",
|
|
};
|
|
|
|
// Map DriverState string → chip CSS class (mirrors DriverStatusPanel.ChipClass).
|
|
private static string DriverChipClass(string? state) => state switch
|
|
{
|
|
"Healthy" => "chip-ok",
|
|
"Degraded" => "chip-warn",
|
|
"Connecting" => "chip-warn",
|
|
"Reconnecting" => "chip-warn",
|
|
"Faulted" => "chip-bad",
|
|
_ => "chip-idle",
|
|
};
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
// Unsubscribe first so the singleton store can't invoke a handler on a disposed component.
|
|
DriverStore.SnapshotChanged -= OnSnapshotChanged;
|
|
// Drain the timer so an in-flight callback can't touch a component that's already gone
|
|
// (System.Threading.Timer's async dispose awaits any in-flight callback — .NET 6+).
|
|
if (_timer is not null) await _timer.DisposeAsync();
|
|
}
|
|
|
|
private sealed record MemberRow(
|
|
string Address,
|
|
string Status,
|
|
IReadOnlyCollection<string> Roles,
|
|
IReadOnlyCollection<string> LeaderRoles,
|
|
bool Unreachable,
|
|
bool IsSelf);
|
|
}
|