Files
lmxopcua/src/ZB.MOM.WW.OtOpcUa.Admin/Hubs/FleetStatusPoller.cs
Joseph Doherty 18f93d72bb Phase 1 LDAP auth + SignalR real-time — closes the last two open Admin UI TODOs. LDAP: Admin/Security/ gets SecurityOptions (bound from appsettings.json Authentication:Ldap), LdapAuthResult record, ILdapAuthService + LdapAuthService ported from scadalink-design's LdapAuthService (TLS guard, search-then-bind when a service account is configured, direct-bind fallback, service-account re-bind after user bind so attribute lookup uses the service principal's read rights, LdapException-to-friendly-message translation, OperationCanceledException pass-through), RoleMapper (pure function: case-insensitive group-name match against LdapOptions.GroupToRole, returns the distinct set of mapped Admin roles). EscapeLdapFilter escapes the five LDAP filter control chars (\, *, (, ), \0); ExtractFirstRdnValue pulls the value portion of a DN's leading RDN for memberOf parsing; ExtractOuSegment added as a GLAuth-specific fallback when the directory doesn't populate memberOf but does embed ou=PrimaryGroup into user DNs (actual GLAuth config in C:\publish\glauth\glauth.cfg uses nameformat=cn, groupformat=ou — direct bind is enough). Login page rewritten: EditForm → ILdapAuthService.AuthenticateAsync → cookie sign-in with claims (Name = displayName, NameIdentifier = username, Role for each mapped role, ldap_group for each raw group); failed bind shows the service's error; empty-role-map returns an explicit "no Admin role mapped" message rather than silently succeeding. appsettings.json gains an Authentication:Ldap section with dev-GLAuth defaults (localhost:3893, UseTls=false, AllowInsecureLdap=true for dev, GroupToRole maps GLAuth's ReadOnly/WriteOperate/AlarmAck → ConfigViewer/ConfigEditor/FleetAdmin). SignalR: two hubs + a BackgroundService poller. FleetStatusHub routes per-cluster NodeStateChanged pushes (SubscribeCluster/UnsubscribeCluster on connection; FleetGroup for dashboard-wide) with a typed NodeStateChangedMessage payload. AlertHub auto-subscribes every connection to the AllAlertsGroup and exposes AcknowledgeAsync (ack persistence deferred to v2.1). FleetStatusPoller (IHostedService, 5s default cadence) scans ClusterNodeGenerationState joined with ClusterNode, caches the prior snapshot per NodeId, pushes NodeStateChanged on any delta, raises AlertMessage("apply-failed") on transition INTO Failed (sticky — the hub client acks later). Program.cs registers HttpContextAccessor (sign-in needs it), SignalR, LdapOptions + ILdapAuthService, the poller as hosted service, and maps /hubs/fleet + /hubs/alerts endpoints. ClusterDetail adds @rendermode RenderMode.InteractiveServer, @implements IAsyncDisposable, and a HubConnectionBuilder subscription that calls LoadAsync() on each NodeStateChanged for its cluster so the "current published" card refreshes without a page reload; a dismissable "Live update" info banner surfaces the most recent event. Microsoft.AspNetCore.SignalR.Client 10.0.0 + Novell.Directory.Ldap.NETStandard 3.6.0 added. Tests: 13 new — RoleMapperTests (single group, case-insensitive match, multi-group distinct-roles, unknown-group ignored, empty-map); LdapAuthServiceTests (EscapeLdapFilter with 4 inputs, ExtractFirstRdnValue with 4 inputs — all via reflection against internals); LdapLiveBindTests (skip when localhost:3893 unreachable; valid-credentials-bind-succeeds; wrong-password-fails-with-recognizable-error; empty-username-rejected-before-hitting-directory); FleetStatusPollerTests (throwaway DB, seeds cluster+node+generation+apply-state, runs PollOnceAsync, asserts NodeStateChanged hit the recorder; second test seeds a Failed state and asserts AlertRaised fired) — backed by RecordingHubContext/RecordingHubClients/RecordingClientProxy that capture SendCoreAsync invocations while throwing NotImplementedException for the IHubClients methods the poller doesn't call (fail-fast if evolution adds new dependencies). InternalsVisibleTo added so the test project can call FleetStatusPoller.PollOnceAsync directly. Full solution 946 pass / 1 pre-existing Phase 0 baseline failure.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 22:28:49 -04:00

94 lines
3.9 KiB
C#

using Microsoft.AspNetCore.SignalR;
using Microsoft.EntityFrameworkCore;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
namespace ZB.MOM.WW.OtOpcUa.Admin.Hubs;
/// <summary>
/// Polls <c>ClusterNodeGenerationState</c> every <see cref="PollInterval"/> and publishes
/// per-node deltas to <see cref="FleetStatusHub"/>. Also raises sticky
/// <see cref="AlertMessage"/>s on transitions into <c>Failed</c>.
/// </summary>
public sealed class FleetStatusPoller(
IServiceScopeFactory scopeFactory,
IHubContext<FleetStatusHub> fleetHub,
IHubContext<AlertHub> alertHub,
ILogger<FleetStatusPoller> logger) : BackgroundService
{
public TimeSpan PollInterval { get; init; } = TimeSpan.FromSeconds(5);
private readonly Dictionary<string, NodeStateSnapshot> _last = new();
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
logger.LogInformation("FleetStatusPoller starting — interval {Interval}s", PollInterval.TotalSeconds);
while (!stoppingToken.IsCancellationRequested)
{
try { await PollOnceAsync(stoppingToken); }
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "FleetStatusPoller tick failed");
}
try { await Task.Delay(PollInterval, stoppingToken); }
catch (OperationCanceledException) { break; }
}
}
internal async Task PollOnceAsync(CancellationToken ct)
{
using var scope = scopeFactory.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
var rows = await db.ClusterNodeGenerationStates.AsNoTracking()
.Join(db.ClusterNodes.AsNoTracking(), s => s.NodeId, n => n.NodeId, (s, n) => new { s, n.ClusterId })
.ToListAsync(ct);
foreach (var r in rows)
{
var snapshot = new NodeStateSnapshot(
r.s.NodeId, r.ClusterId, r.s.CurrentGenerationId,
r.s.LastAppliedStatus?.ToString(), r.s.LastAppliedError,
r.s.LastAppliedAt, r.s.LastSeenAt);
var hadPrior = _last.TryGetValue(r.s.NodeId, out var prior);
if (!hadPrior || prior != snapshot)
{
_last[r.s.NodeId] = snapshot;
var msg = new NodeStateChangedMessage(
snapshot.NodeId, snapshot.ClusterId, snapshot.GenerationId,
snapshot.Status, snapshot.Error, snapshot.AppliedAt, snapshot.SeenAt);
await fleetHub.Clients.Group(FleetStatusHub.GroupName(snapshot.ClusterId))
.SendAsync("NodeStateChanged", msg, ct);
await fleetHub.Clients.Group(FleetStatusHub.FleetGroup)
.SendAsync("NodeStateChanged", msg, ct);
if (snapshot.Status == "Failed" && (!hadPrior || prior.Status != "Failed"))
{
var alert = new AlertMessage(
AlertId: $"{snapshot.NodeId}:apply-failed",
Severity: "error",
Title: $"Apply failed on {snapshot.NodeId}",
Detail: snapshot.Error ?? "(no detail)",
RaisedAtUtc: DateTime.UtcNow,
ClusterId: snapshot.ClusterId,
NodeId: snapshot.NodeId);
await alertHub.Clients.Group(AlertHub.AllAlertsGroup)
.SendAsync("AlertRaised", alert, ct);
}
}
}
}
/// <summary>Exposed for tests — forces a snapshot reset so stub data re-seeds.</summary>
internal void ResetCache() => _last.Clear();
private readonly record struct NodeStateSnapshot(
string NodeId, string ClusterId, long? GenerationId,
string? Status, string? Error, DateTime? AppliedAt, DateTime? SeenAt);
}