Compare commits
4 Commits
phase-3-pr
...
phase-3-pr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef2a810b2d | ||
| a7764e50f3 | |||
|
|
8464e3f376 | ||
| a9357600e7 |
@@ -108,13 +108,30 @@ condition node). Alarm tracking already has its own integration test
|
||||
(`AlarmSubscription*`); the multi-driver alarm case would need a stub
|
||||
`IAlarmSource` that's worth its own focused PR.
|
||||
|
||||
## 7. Host-status per-AppEngine granularity → Admin UI dashboard
|
||||
## 7. Host-status per-AppEngine granularity → Admin UI dashboard — **DONE (PRs 33 + 34)**
|
||||
|
||||
**Status**: PR 13 ships per-platform/per-AppEngine `ScanState` probing; PR 17
|
||||
surfaces the resulting `OnHostStatusChanged` events through OPC UA. Admin
|
||||
UI doesn't render a per-host dashboard yet.
|
||||
**PR 33** landed the data layer: `DriverHostStatus` entity + migration with
|
||||
composite key `(NodeId, DriverInstanceId, HostName)` and two query-supporting
|
||||
indexes (per-cluster drill-down on `NodeId`, stale-row detection on
|
||||
`LastSeenUtc`).
|
||||
|
||||
**To do**:
|
||||
- SignalR hub push of `HostStatusChangedEventArgs` to the Admin UI.
|
||||
- Dashboard page showing each tracked host, current state, last transition
|
||||
time, failure count.
|
||||
**PR 34** wired the publisher + consumer. `HostStatusPublisher` is a
|
||||
`BackgroundService` in the Server process that walks every registered
|
||||
`IHostConnectivityProbe`-capable driver every 10s, calls
|
||||
`GetHostStatuses()`, and upserts rows (`LastSeenUtc` advances each tick;
|
||||
`State` + `StateChangedUtc` update on transitions). Admin UI `/hosts` page
|
||||
groups by cluster, shows four summary cards (Hosts / Running / Stale /
|
||||
Faulted), and flags rows whose `LastSeenUtc` is older than 30s as Stale so
|
||||
operators see crashed Servers without waiting for a state change.
|
||||
|
||||
Deferred as follow-ups:
|
||||
|
||||
- Event-driven push (subscribe to `OnHostStatusChanged` per driver for
|
||||
sub-heartbeat latency). Adds DriverHost lifecycle-event plumbing;
|
||||
10s polling is fine for operator-scale use.
|
||||
- Failure-count column — needs the publisher to track a transition history
|
||||
per host, not just current-state.
|
||||
- SignalR fan-out to the Admin page (currently the page polls the DB, not
|
||||
a hub). The DB-polled version is fine at current cadence but a hub push
|
||||
would eliminate the 10s race where a new row sits in the DB before the
|
||||
Admin page notices.
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
<ul class="nav flex-column">
|
||||
<li class="nav-item"><a class="nav-link text-light" href="/">Overview</a></li>
|
||||
<li class="nav-item"><a class="nav-link text-light" href="/fleet">Fleet status</a></li>
|
||||
<li class="nav-item"><a class="nav-link text-light" href="/hosts">Host status</a></li>
|
||||
<li class="nav-item"><a class="nav-link text-light" href="/clusters">Clusters</a></li>
|
||||
<li class="nav-item"><a class="nav-link text-light" href="/reservations">Reservations</a></li>
|
||||
<li class="nav-item"><a class="nav-link text-light" href="/certificates">Certificates</a></li>
|
||||
|
||||
160
src/ZB.MOM.WW.OtOpcUa.Admin/Components/Pages/Hosts.razor
Normal file
160
src/ZB.MOM.WW.OtOpcUa.Admin/Components/Pages/Hosts.razor
Normal file
@@ -0,0 +1,160 @@
|
||||
@page "/hosts"
|
||||
@using Microsoft.EntityFrameworkCore
|
||||
@using ZB.MOM.WW.OtOpcUa.Admin.Services
|
||||
@using ZB.MOM.WW.OtOpcUa.Configuration.Enums
|
||||
@inject IServiceScopeFactory ScopeFactory
|
||||
@implements IDisposable
|
||||
|
||||
<h1 class="mb-4">Driver host status</h1>
|
||||
|
||||
<div class="d-flex align-items-center mb-3 gap-2">
|
||||
<button class="btn btn-sm btn-outline-primary" @onclick="RefreshAsync" disabled="@_refreshing">
|
||||
@if (_refreshing) { <span class="spinner-border spinner-border-sm me-1" /> }
|
||||
Refresh
|
||||
</button>
|
||||
<span class="text-muted small">
|
||||
Auto-refresh every @RefreshIntervalSeconds s. Last updated: @(_lastRefreshUtc?.ToString("HH:mm:ss 'UTC'") ?? "—")
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="alert alert-info small mb-4">
|
||||
Each row is one host reported by a driver instance on a server node. Galaxy drivers report
|
||||
per-Platform / per-AppEngine entries; Modbus drivers report the PLC endpoint. Rows age out
|
||||
of the Server's publisher on every 10-second heartbeat — rows whose LastSeen is older than
|
||||
30s are flagged Stale, which usually means the owning Server process has crashed or lost
|
||||
its DB connection.
|
||||
</div>
|
||||
|
||||
@if (_rows is null)
|
||||
{
|
||||
<p>Loading…</p>
|
||||
}
|
||||
else if (_rows.Count == 0)
|
||||
{
|
||||
<div class="alert alert-secondary">
|
||||
No host-status rows yet. The Server publishes its first tick 2s after startup; if this list stays empty, check that the Server is running and the driver implements <code>IHostConnectivityProbe</code>.
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="row g-3 mb-4">
|
||||
<div class="col-md-3"><div class="card"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Hosts</h6>
|
||||
<div class="fs-3">@_rows.Count</div>
|
||||
</div></div></div>
|
||||
<div class="col-md-3"><div class="card border-success"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Running</h6>
|
||||
<div class="fs-3 text-success">@_rows.Count(r => r.State == DriverHostState.Running && !HostStatusService.IsStale(r))</div>
|
||||
</div></div></div>
|
||||
<div class="col-md-3"><div class="card border-warning"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Stale</h6>
|
||||
<div class="fs-3 text-warning">@_rows.Count(HostStatusService.IsStale)</div>
|
||||
</div></div></div>
|
||||
<div class="col-md-3"><div class="card border-danger"><div class="card-body">
|
||||
<h6 class="text-muted mb-1">Faulted</h6>
|
||||
<div class="fs-3 text-danger">@_rows.Count(r => r.State == DriverHostState.Faulted)</div>
|
||||
</div></div></div>
|
||||
</div>
|
||||
|
||||
@foreach (var cluster in _rows.GroupBy(r => r.ClusterId ?? "(unassigned)").OrderBy(g => g.Key))
|
||||
{
|
||||
<h2 class="h5 mt-4">Cluster: <code>@cluster.Key</code></h2>
|
||||
<table class="table table-sm table-hover align-middle">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Node</th>
|
||||
<th>Driver</th>
|
||||
<th>Host</th>
|
||||
<th>State</th>
|
||||
<th>Last transition</th>
|
||||
<th>Last seen</th>
|
||||
<th>Detail</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var r in cluster)
|
||||
{
|
||||
<tr class="@RowClass(r)">
|
||||
<td><code>@r.NodeId</code></td>
|
||||
<td><code>@r.DriverInstanceId</code></td>
|
||||
<td>@r.HostName</td>
|
||||
<td>
|
||||
<span class="badge @StateBadge(r.State)">@r.State</span>
|
||||
@if (HostStatusService.IsStale(r))
|
||||
{
|
||||
<span class="badge bg-warning text-dark ms-1">Stale</span>
|
||||
}
|
||||
</td>
|
||||
<td class="small">@FormatAge(r.StateChangedUtc)</td>
|
||||
<td class="small @(HostStatusService.IsStale(r) ? "text-warning" : "")">@FormatAge(r.LastSeenUtc)</td>
|
||||
<td class="text-truncate small" style="max-width: 320px;" title="@r.Detail">@r.Detail</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
}
|
||||
}
|
||||
|
||||
@code {
|
||||
// Mirrors HostStatusPublisher.HeartbeatInterval — polling ahead of the broadcaster
|
||||
// produces stale-looking rows mid-cycle.
|
||||
private const int RefreshIntervalSeconds = 10;
|
||||
|
||||
private List<HostStatusRow>? _rows;
|
||||
private bool _refreshing;
|
||||
private DateTime? _lastRefreshUtc;
|
||||
private Timer? _timer;
|
||||
|
||||
protected override async Task OnInitializedAsync()
|
||||
{
|
||||
await RefreshAsync();
|
||||
_timer = new Timer(async _ => await InvokeAsync(RefreshAsync),
|
||||
state: null,
|
||||
dueTime: TimeSpan.FromSeconds(RefreshIntervalSeconds),
|
||||
period: TimeSpan.FromSeconds(RefreshIntervalSeconds));
|
||||
}
|
||||
|
||||
private async Task RefreshAsync()
|
||||
{
|
||||
if (_refreshing) return;
|
||||
_refreshing = true;
|
||||
try
|
||||
{
|
||||
using var scope = ScopeFactory.CreateScope();
|
||||
var svc = scope.ServiceProvider.GetRequiredService<HostStatusService>();
|
||||
_rows = (await svc.ListAsync()).ToList();
|
||||
_lastRefreshUtc = DateTime.UtcNow;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_refreshing = false;
|
||||
StateHasChanged();
|
||||
}
|
||||
}
|
||||
|
||||
private static string RowClass(HostStatusRow r) => r.State switch
|
||||
{
|
||||
DriverHostState.Faulted => "table-danger",
|
||||
_ when HostStatusService.IsStale(r) => "table-warning",
|
||||
_ => "",
|
||||
};
|
||||
|
||||
private static string StateBadge(DriverHostState s) => s switch
|
||||
{
|
||||
DriverHostState.Running => "bg-success",
|
||||
DriverHostState.Stopped => "bg-secondary",
|
||||
DriverHostState.Faulted => "bg-danger",
|
||||
_ => "bg-secondary",
|
||||
};
|
||||
|
||||
private static string FormatAge(DateTime t)
|
||||
{
|
||||
var age = DateTime.UtcNow - t;
|
||||
if (age.TotalSeconds < 60) return $"{(int)age.TotalSeconds}s ago";
|
||||
if (age.TotalMinutes < 60) return $"{(int)age.TotalMinutes}m ago";
|
||||
if (age.TotalHours < 24) return $"{(int)age.TotalHours}h ago";
|
||||
return t.ToString("yyyy-MM-dd HH:mm 'UTC'");
|
||||
}
|
||||
|
||||
public void Dispose() => _timer?.Dispose();
|
||||
}
|
||||
@@ -47,6 +47,7 @@ builder.Services.AddScoped<NodeAclService>();
|
||||
builder.Services.AddScoped<ReservationService>();
|
||||
builder.Services.AddScoped<DraftValidationService>();
|
||||
builder.Services.AddScoped<AuditLogService>();
|
||||
builder.Services.AddScoped<HostStatusService>();
|
||||
|
||||
// Cert-trust management — reads the OPC UA server's PKI store root so rejected client certs
|
||||
// can be promoted to trusted via the Admin UI. Singleton: no per-request state, just
|
||||
|
||||
63
src/ZB.MOM.WW.OtOpcUa.Admin/Services/HostStatusService.cs
Normal file
63
src/ZB.MOM.WW.OtOpcUa.Admin/Services/HostStatusService.cs
Normal file
@@ -0,0 +1,63 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Admin.Services;
|
||||
|
||||
/// <summary>
|
||||
/// One row per <see cref="DriverHostStatus"/> record, enriched with the owning
|
||||
/// <c>ClusterNode.ClusterId</c> when available (left-join). The Admin <c>/hosts</c> page
|
||||
/// groups by cluster and renders a per-node → per-driver → per-host tree.
|
||||
/// </summary>
|
||||
public sealed record HostStatusRow(
|
||||
string NodeId,
|
||||
string? ClusterId,
|
||||
string DriverInstanceId,
|
||||
string HostName,
|
||||
DriverHostState State,
|
||||
DateTime StateChangedUtc,
|
||||
DateTime LastSeenUtc,
|
||||
string? Detail);
|
||||
|
||||
/// <summary>
|
||||
/// Read-side service for the Admin UI's per-host drill-down. Loads
|
||||
/// <see cref="DriverHostStatus"/> rows (written by the Server process's
|
||||
/// <c>HostStatusPublisher</c>) and left-joins <c>ClusterNode</c> so each row knows which
|
||||
/// cluster it belongs to — the Admin UI groups by cluster for the fleet-wide view.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The publisher heartbeat is 10s (<c>HostStatusPublisher.HeartbeatInterval</c>). The
|
||||
/// Admin page also polls every ~10s and treats rows with <c>LastSeenUtc</c> older than
|
||||
/// <c>StaleThreshold</c> (30s) as stale — covers a missed heartbeat tolerance plus
|
||||
/// a generous buffer for clock skew and publisher GC pauses.
|
||||
/// </remarks>
|
||||
public sealed class HostStatusService(OtOpcUaConfigDbContext db)
|
||||
{
|
||||
public static readonly TimeSpan StaleThreshold = TimeSpan.FromSeconds(30);
|
||||
|
||||
public async Task<IReadOnlyList<HostStatusRow>> ListAsync(CancellationToken ct = default)
|
||||
{
|
||||
// LEFT JOIN on NodeId so a row persists even when its owning ClusterNode row hasn't
|
||||
// been created yet (first-boot bootstrap case — keeps the UI from losing sight of
|
||||
// the reporting server).
|
||||
var rows = await (from s in db.DriverHostStatuses.AsNoTracking()
|
||||
join n in db.ClusterNodes.AsNoTracking()
|
||||
on s.NodeId equals n.NodeId into nodeJoin
|
||||
from n in nodeJoin.DefaultIfEmpty()
|
||||
orderby s.NodeId, s.DriverInstanceId, s.HostName
|
||||
select new HostStatusRow(
|
||||
s.NodeId,
|
||||
n != null ? n.ClusterId : null,
|
||||
s.DriverInstanceId,
|
||||
s.HostName,
|
||||
s.State,
|
||||
s.StateChangedUtc,
|
||||
s.LastSeenUtc,
|
||||
s.Detail)).ToListAsync(ct);
|
||||
return rows;
|
||||
}
|
||||
|
||||
public static bool IsStale(HostStatusRow row) =>
|
||||
DateTime.UtcNow - row.LastSeenUtc > StaleThreshold;
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// Per-host connectivity snapshot the Server publishes for each driver's
|
||||
/// <c>IHostConnectivityProbe.GetHostStatuses</c> entry. One row per
|
||||
/// (<see cref="NodeId"/>, <see cref="DriverInstanceId"/>, <see cref="HostName"/>) triple —
|
||||
/// a redundant 2-node cluster with one Galaxy driver reporting 3 platforms produces 6
|
||||
/// rows, not 3, because each server node owns its own runtime view.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Closes the data-layer piece of LMX follow-up #7 (per-AppEngine Admin dashboard
|
||||
/// drill-down). The publisher hosted service on the Server side subscribes to every
|
||||
/// registered driver's <c>OnHostStatusChanged</c> and upserts rows on transitions +
|
||||
/// periodic liveness heartbeats. <see cref="LastSeenUtc"/> advances on every
|
||||
/// heartbeat so the Admin UI can flag stale rows from a crashed Server.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// No foreign-key to <see cref="ClusterNode"/> — a Server may start reporting host
|
||||
/// status before its ClusterNode row exists (e.g. first-boot bootstrap), and we'd
|
||||
/// rather keep the status row than drop it. The Admin-side service left-joins on
|
||||
/// NodeId when presenting rows.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class DriverHostStatus
|
||||
{
|
||||
/// <summary>Server node that's running the driver.</summary>
|
||||
public required string NodeId { get; set; }
|
||||
|
||||
/// <summary>Driver instance's stable id (matches <c>IDriver.DriverInstanceId</c>).</summary>
|
||||
public required string DriverInstanceId { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side host identifier — Galaxy Platform / AppEngine name, Modbus
|
||||
/// <c>host:port</c>, whatever the probe returns. Opaque to the Admin UI except as
|
||||
/// a display string.
|
||||
/// </summary>
|
||||
public required string HostName { get; set; }
|
||||
|
||||
public DriverHostState State { get; set; } = DriverHostState.Unknown;
|
||||
|
||||
/// <summary>Timestamp of the last state transition (not of the most recent heartbeat).</summary>
|
||||
public DateTime StateChangedUtc { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Advances on every publisher heartbeat — the Admin UI uses
|
||||
/// <c>now - LastSeenUtc > threshold</c> to flag rows whose owning Server has
|
||||
/// stopped reporting (crashed, network-partitioned, etc.), independent of
|
||||
/// <see cref="State"/>.
|
||||
/// </summary>
|
||||
public DateTime LastSeenUtc { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional human-readable detail populated when <see cref="State"/> is
|
||||
/// <see cref="DriverHostState.Faulted"/> — e.g. the exception message from the
|
||||
/// driver's probe. Null for Running / Stopped / Unknown transitions.
|
||||
/// </summary>
|
||||
public string? Detail { get; set; }
|
||||
}
|
||||
21
src/ZB.MOM.WW.OtOpcUa.Configuration/Enums/DriverHostState.cs
Normal file
21
src/ZB.MOM.WW.OtOpcUa.Configuration/Enums/DriverHostState.cs
Normal file
@@ -0,0 +1,21 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
/// <summary>
|
||||
/// Persisted mirror of <c>Core.Abstractions.HostState</c> — the lifecycle state each
|
||||
/// <c>IHostConnectivityProbe</c>-capable driver reports for its per-host topology
|
||||
/// (Galaxy Platforms / AppEngines, Modbus PLC endpoints, future OPC UA gateway upstreams).
|
||||
/// Defined here instead of re-using <c>Core.Abstractions.HostState</c> so the
|
||||
/// Configuration project stays free of driver-runtime dependencies.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The server-side publisher (follow-up PR) translates
|
||||
/// <c>HostStatusChangedEventArgs.NewState</c> to this enum on every transition and
|
||||
/// upserts into <see cref="Entities.DriverHostStatus"/>. Admin UI reads from the DB.
|
||||
/// </remarks>
|
||||
public enum DriverHostState
|
||||
{
|
||||
Unknown,
|
||||
Running,
|
||||
Stopped,
|
||||
Faulted,
|
||||
}
|
||||
1248
src/ZB.MOM.WW.OtOpcUa.Configuration/Migrations/20260418193608_AddDriverHostStatus.Designer.cs
generated
Normal file
1248
src/ZB.MOM.WW.OtOpcUa.Configuration/Migrations/20260418193608_AddDriverHostStatus.Designer.cs
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,49 @@
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Migrations
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public partial class AddDriverHostStatus : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.CreateTable(
|
||||
name: "DriverHostStatus",
|
||||
columns: table => new
|
||||
{
|
||||
NodeId = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
|
||||
DriverInstanceId = table.Column<string>(type: "nvarchar(64)", maxLength: 64, nullable: false),
|
||||
HostName = table.Column<string>(type: "nvarchar(256)", maxLength: 256, nullable: false),
|
||||
State = table.Column<string>(type: "nvarchar(16)", maxLength: 16, nullable: false),
|
||||
StateChangedUtc = table.Column<DateTime>(type: "datetime2(3)", nullable: false),
|
||||
LastSeenUtc = table.Column<DateTime>(type: "datetime2(3)", nullable: false),
|
||||
Detail = table.Column<string>(type: "nvarchar(1024)", maxLength: 1024, nullable: true)
|
||||
},
|
||||
constraints: table =>
|
||||
{
|
||||
table.PrimaryKey("PK_DriverHostStatus", x => new { x.NodeId, x.DriverInstanceId, x.HostName });
|
||||
});
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_DriverHostStatus_LastSeen",
|
||||
table: "DriverHostStatus",
|
||||
column: "LastSeenUtc");
|
||||
|
||||
migrationBuilder.CreateIndex(
|
||||
name: "IX_DriverHostStatus_Node",
|
||||
table: "DriverHostStatus",
|
||||
column: "NodeId");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.DropTable(
|
||||
name: "DriverHostStatus");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -332,6 +332,46 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration.Migrations
|
||||
});
|
||||
});
|
||||
|
||||
modelBuilder.Entity("ZB.MOM.WW.OtOpcUa.Configuration.Entities.DriverHostStatus", b =>
|
||||
{
|
||||
b.Property<string>("NodeId")
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<string>("DriverInstanceId")
|
||||
.HasMaxLength(64)
|
||||
.HasColumnType("nvarchar(64)");
|
||||
|
||||
b.Property<string>("HostName")
|
||||
.HasMaxLength(256)
|
||||
.HasColumnType("nvarchar(256)");
|
||||
|
||||
b.Property<string>("Detail")
|
||||
.HasMaxLength(1024)
|
||||
.HasColumnType("nvarchar(1024)");
|
||||
|
||||
b.Property<DateTime>("LastSeenUtc")
|
||||
.HasColumnType("datetime2(3)");
|
||||
|
||||
b.Property<string>("State")
|
||||
.IsRequired()
|
||||
.HasMaxLength(16)
|
||||
.HasColumnType("nvarchar(16)");
|
||||
|
||||
b.Property<DateTime>("StateChangedUtc")
|
||||
.HasColumnType("datetime2(3)");
|
||||
|
||||
b.HasKey("NodeId", "DriverInstanceId", "HostName");
|
||||
|
||||
b.HasIndex("LastSeenUtc")
|
||||
.HasDatabaseName("IX_DriverHostStatus_LastSeen");
|
||||
|
||||
b.HasIndex("NodeId")
|
||||
.HasDatabaseName("IX_DriverHostStatus_Node");
|
||||
|
||||
b.ToTable("DriverHostStatus", (string)null);
|
||||
});
|
||||
|
||||
modelBuilder.Entity("ZB.MOM.WW.OtOpcUa.Configuration.Entities.DriverInstance", b =>
|
||||
{
|
||||
b.Property<Guid>("DriverInstanceRowId")
|
||||
|
||||
@@ -27,6 +27,7 @@ public sealed class OtOpcUaConfigDbContext(DbContextOptions<OtOpcUaConfigDbConte
|
||||
public DbSet<ClusterNodeGenerationState> ClusterNodeGenerationStates => Set<ClusterNodeGenerationState>();
|
||||
public DbSet<ConfigAuditLog> ConfigAuditLogs => Set<ConfigAuditLog>();
|
||||
public DbSet<ExternalIdReservation> ExternalIdReservations => Set<ExternalIdReservation>();
|
||||
public DbSet<DriverHostStatus> DriverHostStatuses => Set<DriverHostStatus>();
|
||||
|
||||
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||
{
|
||||
@@ -47,6 +48,7 @@ public sealed class OtOpcUaConfigDbContext(DbContextOptions<OtOpcUaConfigDbConte
|
||||
ConfigureClusterNodeGenerationState(modelBuilder);
|
||||
ConfigureConfigAuditLog(modelBuilder);
|
||||
ConfigureExternalIdReservation(modelBuilder);
|
||||
ConfigureDriverHostStatus(modelBuilder);
|
||||
}
|
||||
|
||||
private static void ConfigureServerCluster(ModelBuilder modelBuilder)
|
||||
@@ -484,4 +486,30 @@ public sealed class OtOpcUaConfigDbContext(DbContextOptions<OtOpcUaConfigDbConte
|
||||
e.HasIndex(x => x.EquipmentUuid).HasDatabaseName("IX_ExternalIdReservation_Equipment");
|
||||
});
|
||||
}
|
||||
|
||||
private static void ConfigureDriverHostStatus(ModelBuilder modelBuilder)
|
||||
{
|
||||
modelBuilder.Entity<DriverHostStatus>(e =>
|
||||
{
|
||||
e.ToTable("DriverHostStatus");
|
||||
// Composite key — one row per (server node, driver instance, probe-reported host).
|
||||
// A redundant 2-node cluster with one Galaxy driver reporting 3 platforms produces
|
||||
// 6 rows because each server node owns its own runtime view; the composite key is
|
||||
// what lets both views coexist without shadowing each other.
|
||||
e.HasKey(x => new { x.NodeId, x.DriverInstanceId, x.HostName });
|
||||
e.Property(x => x.NodeId).HasMaxLength(64);
|
||||
e.Property(x => x.DriverInstanceId).HasMaxLength(64);
|
||||
e.Property(x => x.HostName).HasMaxLength(256);
|
||||
e.Property(x => x.State).HasConversion<string>().HasMaxLength(16);
|
||||
e.Property(x => x.StateChangedUtc).HasColumnType("datetime2(3)");
|
||||
e.Property(x => x.LastSeenUtc).HasColumnType("datetime2(3)");
|
||||
e.Property(x => x.Detail).HasMaxLength(1024);
|
||||
|
||||
// NodeId-only index drives the Admin UI's per-cluster drill-down (select all host
|
||||
// statuses for the nodes of a specific cluster via join on ClusterNode.ClusterId).
|
||||
e.HasIndex(x => x.NodeId).HasDatabaseName("IX_DriverHostStatus_Node");
|
||||
// LastSeenUtc index powers the Admin UI's stale-row query (now - LastSeen > N).
|
||||
e.HasIndex(x => x.LastSeenUtc).HasDatabaseName("IX_DriverHostStatus_LastSeen");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
143
src/ZB.MOM.WW.OtOpcUa.Server/HostStatusPublisher.cs
Normal file
143
src/ZB.MOM.WW.OtOpcUa.Server/HostStatusPublisher.cs
Normal file
@@ -0,0 +1,143 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
/// <summary>
|
||||
/// Walks every registered driver once per heartbeat interval, asks each
|
||||
/// <see cref="IHostConnectivityProbe"/>-capable driver for its current
|
||||
/// <see cref="HostConnectivityStatus"/> list, and upserts one
|
||||
/// <see cref="DriverHostStatus"/> row per (NodeId, DriverInstanceId, HostName) into the
|
||||
/// central config DB. Powers the Admin UI's per-host drill-down page (LMX follow-up #7).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Polling rather than event-driven: simpler, and matches the cadence the Admin UI
|
||||
/// consumes. An event-subscription optimization (push on <c>OnHostStatusChanged</c> for
|
||||
/// immediate reflection) is a straightforward follow-up but adds lifecycle complexity
|
||||
/// — drivers can be registered after the publisher starts, and subscribing to each
|
||||
/// one's event on register + unsubscribing on unregister requires DriverHost to expose
|
||||
/// lifecycle events it doesn't today.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <see cref="DriverHostStatus.LastSeenUtc"/> advances every heartbeat so the Admin UI
|
||||
/// can flag stale rows from a crashed Server process independent of
|
||||
/// <see cref="DriverHostStatus.State"/> — a Faulted publisher that stops heartbeating
|
||||
/// stays Faulted in the DB but its LastSeenUtc ages out, which is the signal
|
||||
/// operators actually want.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// If the DB is unreachable on a given tick, the publisher logs and moves on — it
|
||||
/// does not retry or buffer. The next heartbeat picks up the current-state snapshot,
|
||||
/// which is more useful than replaying stale transitions after a long outage.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class HostStatusPublisher(
|
||||
DriverHost driverHost,
|
||||
NodeOptions nodeOptions,
|
||||
IServiceScopeFactory scopeFactory,
|
||||
ILogger<HostStatusPublisher> logger) : BackgroundService
|
||||
{
|
||||
internal static readonly TimeSpan HeartbeatInterval = TimeSpan.FromSeconds(10);
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
// Wait a short moment at startup so NodeBootstrap's RegisterAsync calls have had a
|
||||
// chance to land. First tick runs immediately after so a freshly-started Server
|
||||
// surfaces its host topology in the Admin UI without waiting a full interval.
|
||||
try { await Task.Delay(TimeSpan.FromSeconds(2), stoppingToken); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try { await PublishOnceAsync(stoppingToken); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Never take down the Server on a publisher failure. Log and continue —
|
||||
// stale-row detection on the Admin side will surface the outage.
|
||||
logger.LogWarning(ex, "Host-status publisher tick failed — will retry next heartbeat");
|
||||
}
|
||||
|
||||
try { await Task.Delay(HeartbeatInterval, stoppingToken); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
}
|
||||
}
|
||||
|
||||
internal async Task PublishOnceAsync(CancellationToken ct)
|
||||
{
|
||||
var driverIds = driverHost.RegisteredDriverIds;
|
||||
if (driverIds.Count == 0) return;
|
||||
|
||||
var now = DateTime.UtcNow;
|
||||
using var scope = scopeFactory.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
|
||||
|
||||
foreach (var driverId in driverIds)
|
||||
{
|
||||
var driver = driverHost.GetDriver(driverId);
|
||||
if (driver is not IHostConnectivityProbe probe) continue;
|
||||
|
||||
IReadOnlyList<HostConnectivityStatus> statuses;
|
||||
try { statuses = probe.GetHostStatuses(); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.LogWarning(ex, "Driver {DriverId} GetHostStatuses threw — skipping this tick", driverId);
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var status in statuses)
|
||||
{
|
||||
await UpsertAsync(db, driverId, status, now, ct);
|
||||
}
|
||||
}
|
||||
|
||||
await db.SaveChangesAsync(ct);
|
||||
}
|
||||
|
||||
private async Task UpsertAsync(OtOpcUaConfigDbContext db, string driverId,
|
||||
HostConnectivityStatus status, DateTime now, CancellationToken ct)
|
||||
{
|
||||
var mapped = MapState(status.State);
|
||||
var existing = await db.DriverHostStatuses.SingleOrDefaultAsync(r =>
|
||||
r.NodeId == nodeOptions.NodeId
|
||||
&& r.DriverInstanceId == driverId
|
||||
&& r.HostName == status.HostName, ct);
|
||||
|
||||
if (existing is null)
|
||||
{
|
||||
db.DriverHostStatuses.Add(new DriverHostStatus
|
||||
{
|
||||
NodeId = nodeOptions.NodeId,
|
||||
DriverInstanceId = driverId,
|
||||
HostName = status.HostName,
|
||||
State = mapped,
|
||||
StateChangedUtc = status.LastChangedUtc,
|
||||
LastSeenUtc = now,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
existing.LastSeenUtc = now;
|
||||
if (existing.State != mapped)
|
||||
{
|
||||
existing.State = mapped;
|
||||
existing.StateChangedUtc = status.LastChangedUtc;
|
||||
}
|
||||
}
|
||||
|
||||
internal static DriverHostState MapState(HostState state) => state switch
|
||||
{
|
||||
HostState.Running => DriverHostState.Running,
|
||||
HostState.Stopped => DriverHostState.Stopped,
|
||||
HostState.Faulted => DriverHostState.Faulted,
|
||||
_ => DriverHostState.Unknown,
|
||||
};
|
||||
}
|
||||
@@ -1,8 +1,10 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Serilog;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Server;
|
||||
@@ -72,5 +74,11 @@ builder.Services.AddSingleton<NodeBootstrap>();
|
||||
builder.Services.AddSingleton<OpcUaApplicationHost>();
|
||||
builder.Services.AddHostedService<OpcUaServerService>();
|
||||
|
||||
// Central-config DB access for the host-status publisher (LMX follow-up #7). Scoped context
|
||||
// so per-heartbeat change-tracking stays isolated; publisher opens one scope per tick.
|
||||
builder.Services.AddDbContext<OtOpcUaConfigDbContext>(opt =>
|
||||
opt.UseSqlServer(options.ConfigDbConnectionString));
|
||||
builder.Services.AddHostedService<HostStatusPublisher>();
|
||||
|
||||
var host = builder.Build();
|
||||
await host.RunAsync();
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Server" Version="1.5.374.126"/>
|
||||
<PackageReference Include="OPCFoundation.NetStandard.Opc.Ua.Configuration" Version="1.5.374.126"/>
|
||||
<PackageReference Include="Novell.Directory.Ldap.NETStandard" Version="3.6.0"/>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore.SqlServer" Version="10.0.0"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// End-to-end round-trip through the DB for the <see cref="DriverHostStatus"/> entity
|
||||
/// added in PR 33 — exercises the composite primary key (NodeId, DriverInstanceId,
|
||||
/// HostName), string-backed <c>DriverHostState</c> conversion, and the two indexes the
|
||||
/// Admin UI's drill-down queries will scan (NodeId, LastSeenUtc).
|
||||
/// </summary>
|
||||
[Trait("Category", "SchemaCompliance")]
|
||||
[Collection(nameof(SchemaComplianceCollection))]
|
||||
public sealed class DriverHostStatusTests(SchemaComplianceFixture fixture)
|
||||
{
|
||||
[Fact]
|
||||
public async Task Composite_key_allows_same_host_across_different_nodes_or_drivers()
|
||||
{
|
||||
await using var ctx = NewContext();
|
||||
|
||||
// Same HostName + DriverInstanceId across two different server nodes — classic 2-node
|
||||
// redundancy case. Both rows must be insertable because each server node owns its own
|
||||
// runtime view of the shared host.
|
||||
var now = DateTime.UtcNow;
|
||||
ctx.DriverHostStatuses.Add(new DriverHostStatus
|
||||
{
|
||||
NodeId = "node-a", DriverInstanceId = "galaxy-1", HostName = "GRPlatform",
|
||||
State = DriverHostState.Running,
|
||||
StateChangedUtc = now, LastSeenUtc = now,
|
||||
});
|
||||
ctx.DriverHostStatuses.Add(new DriverHostStatus
|
||||
{
|
||||
NodeId = "node-b", DriverInstanceId = "galaxy-1", HostName = "GRPlatform",
|
||||
State = DriverHostState.Stopped,
|
||||
StateChangedUtc = now, LastSeenUtc = now,
|
||||
Detail = "secondary hasn't taken over yet",
|
||||
});
|
||||
// Same server node + host, different driver instance — second driver doesn't clobber.
|
||||
ctx.DriverHostStatuses.Add(new DriverHostStatus
|
||||
{
|
||||
NodeId = "node-a", DriverInstanceId = "modbus-plc1", HostName = "GRPlatform",
|
||||
State = DriverHostState.Running,
|
||||
StateChangedUtc = now, LastSeenUtc = now,
|
||||
});
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
var rows = await ctx.DriverHostStatuses.AsNoTracking()
|
||||
.Where(r => r.HostName == "GRPlatform").ToListAsync();
|
||||
|
||||
rows.Count.ShouldBe(3);
|
||||
rows.ShouldContain(r => r.NodeId == "node-a" && r.DriverInstanceId == "galaxy-1");
|
||||
rows.ShouldContain(r => r.NodeId == "node-b" && r.State == DriverHostState.Stopped && r.Detail == "secondary hasn't taken over yet");
|
||||
rows.ShouldContain(r => r.NodeId == "node-a" && r.DriverInstanceId == "modbus-plc1");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Upsert_pattern_for_same_key_updates_in_place()
|
||||
{
|
||||
// The publisher hosted service (follow-up PR) upserts on every transition +
|
||||
// heartbeat. This test pins the two-step pattern it will use: check-then-add-or-update
|
||||
// keyed on the composite PK. If the composite key ever changes, this test breaks
|
||||
// loudly so the publisher gets a synchronized update.
|
||||
await using var ctx = NewContext();
|
||||
var t0 = DateTime.UtcNow;
|
||||
ctx.DriverHostStatuses.Add(new DriverHostStatus
|
||||
{
|
||||
NodeId = "upsert-node", DriverInstanceId = "upsert-driver", HostName = "upsert-host",
|
||||
State = DriverHostState.Running,
|
||||
StateChangedUtc = t0, LastSeenUtc = t0,
|
||||
});
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
var t1 = t0.AddSeconds(30);
|
||||
await using (var ctx2 = NewContext())
|
||||
{
|
||||
var existing = await ctx2.DriverHostStatuses.SingleAsync(r =>
|
||||
r.NodeId == "upsert-node" && r.DriverInstanceId == "upsert-driver" && r.HostName == "upsert-host");
|
||||
existing.State = DriverHostState.Faulted;
|
||||
existing.StateChangedUtc = t1;
|
||||
existing.LastSeenUtc = t1;
|
||||
existing.Detail = "transport reset by peer";
|
||||
await ctx2.SaveChangesAsync();
|
||||
}
|
||||
|
||||
await using var ctx3 = NewContext();
|
||||
var final = await ctx3.DriverHostStatuses.AsNoTracking().SingleAsync(r =>
|
||||
r.NodeId == "upsert-node" && r.HostName == "upsert-host");
|
||||
final.State.ShouldBe(DriverHostState.Faulted);
|
||||
final.Detail.ShouldBe("transport reset by peer");
|
||||
// Only one row — a naive "always insert" would have created a duplicate PK and thrown.
|
||||
(await ctx3.DriverHostStatuses.CountAsync(r => r.NodeId == "upsert-node")).ShouldBe(1);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Enum_persists_as_string_not_int()
|
||||
{
|
||||
// Fluent config sets HasConversion<string>() on State — the DB stores 'Running' /
|
||||
// 'Stopped' / 'Faulted' / 'Unknown' as nvarchar(16). Verify by reading the raw
|
||||
// string back via ADO; if someone drops the conversion the column will contain '1'
|
||||
// / '2' / '3' and this assertion fails. Matters because DBAs inspecting the table
|
||||
// directly should see readable state names, not enum ordinals.
|
||||
await using var ctx = NewContext();
|
||||
ctx.DriverHostStatuses.Add(new DriverHostStatus
|
||||
{
|
||||
NodeId = "enum-node", DriverInstanceId = "enum-driver", HostName = "enum-host",
|
||||
State = DriverHostState.Faulted,
|
||||
StateChangedUtc = DateTime.UtcNow, LastSeenUtc = DateTime.UtcNow,
|
||||
});
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
await using var conn = fixture.OpenConnection();
|
||||
using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = "SELECT [State] FROM DriverHostStatus WHERE NodeId = 'enum-node'";
|
||||
var rawValue = (string?)await cmd.ExecuteScalarAsync();
|
||||
rawValue.ShouldBe("Faulted");
|
||||
}
|
||||
|
||||
private OtOpcUaConfigDbContext NewContext()
|
||||
{
|
||||
var options = new DbContextOptionsBuilder<OtOpcUaConfigDbContext>()
|
||||
.UseSqlServer(fixture.ConnectionString)
|
||||
.Options;
|
||||
return new OtOpcUaConfigDbContext(options);
|
||||
}
|
||||
}
|
||||
@@ -28,6 +28,7 @@ public sealed class SchemaComplianceTests
|
||||
"Namespace", "UnsArea", "UnsLine",
|
||||
"DriverInstance", "Device", "Equipment", "Tag", "PollGroup",
|
||||
"NodeAcl", "ExternalIdReservation",
|
||||
"DriverHostStatus",
|
||||
};
|
||||
|
||||
var actual = QueryStrings(@"
|
||||
|
||||
197
tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HostStatusPublisherTests.cs
Normal file
197
tests/ZB.MOM.WW.OtOpcUa.Server.Tests/HostStatusPublisherTests.cs
Normal file
@@ -0,0 +1,197 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Hosting;
|
||||
using ZB.MOM.WW.OtOpcUa.Server;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Server.Tests;
|
||||
|
||||
[Trait("Category", "Integration")]
|
||||
public sealed class HostStatusPublisherTests : IDisposable
|
||||
{
|
||||
private const string DefaultServer = "localhost,14330";
|
||||
private const string DefaultSaPassword = "OtOpcUaDev_2026!";
|
||||
|
||||
private readonly string _databaseName = $"OtOpcUaPublisher_{Guid.NewGuid():N}";
|
||||
private readonly string _connectionString;
|
||||
private readonly ServiceProvider _sp;
|
||||
|
||||
public HostStatusPublisherTests()
|
||||
{
|
||||
var server = Environment.GetEnvironmentVariable("OTOPCUA_CONFIG_TEST_SERVER") ?? DefaultServer;
|
||||
var password = Environment.GetEnvironmentVariable("OTOPCUA_CONFIG_TEST_SA_PASSWORD") ?? DefaultSaPassword;
|
||||
_connectionString =
|
||||
$"Server={server};Database={_databaseName};User Id=sa;Password={password};TrustServerCertificate=True;Encrypt=False;";
|
||||
|
||||
var services = new ServiceCollection();
|
||||
services.AddLogging();
|
||||
services.AddDbContext<OtOpcUaConfigDbContext>(o => o.UseSqlServer(_connectionString));
|
||||
_sp = services.BuildServiceProvider();
|
||||
|
||||
using var scope = _sp.CreateScope();
|
||||
scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>().Database.Migrate();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_sp.Dispose();
|
||||
using var conn = new Microsoft.Data.SqlClient.SqlConnection(
|
||||
new Microsoft.Data.SqlClient.SqlConnectionStringBuilder(_connectionString) { InitialCatalog = "master" }.ConnectionString);
|
||||
conn.Open();
|
||||
using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = $@"
|
||||
IF DB_ID(N'{_databaseName}') IS NOT NULL
|
||||
BEGIN
|
||||
ALTER DATABASE [{_databaseName}] SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
|
||||
DROP DATABASE [{_databaseName}];
|
||||
END";
|
||||
cmd.ExecuteNonQuery();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Publisher_upserts_one_row_per_host_reported_by_each_probe_driver()
|
||||
{
|
||||
var driverHost = new DriverHost();
|
||||
await driverHost.RegisterAsync(new ProbeStubDriver("driver-a",
|
||||
new HostConnectivityStatus("HostA1", HostState.Running, DateTime.UtcNow),
|
||||
new HostConnectivityStatus("HostA2", HostState.Stopped, DateTime.UtcNow)),
|
||||
"{}", CancellationToken.None);
|
||||
await driverHost.RegisterAsync(new NonProbeStubDriver("driver-no-probe"), "{}", CancellationToken.None);
|
||||
|
||||
var nodeOptions = NewNodeOptions("node-a");
|
||||
var publisher = new HostStatusPublisher(driverHost, nodeOptions, _sp.GetRequiredService<IServiceScopeFactory>(),
|
||||
NullLogger<HostStatusPublisher>.Instance);
|
||||
|
||||
await publisher.PublishOnceAsync(CancellationToken.None);
|
||||
|
||||
using var scope = _sp.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
|
||||
var rows = await db.DriverHostStatuses.AsNoTracking().ToListAsync();
|
||||
|
||||
rows.Count.ShouldBe(2, "driver-no-probe doesn't implement IHostConnectivityProbe — no rows for it");
|
||||
rows.ShouldContain(r => r.HostName == "HostA1" && r.State == DriverHostState.Running && r.DriverInstanceId == "driver-a");
|
||||
rows.ShouldContain(r => r.HostName == "HostA2" && r.State == DriverHostState.Stopped && r.DriverInstanceId == "driver-a");
|
||||
rows.ShouldAllBe(r => r.NodeId == "node-a");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Second_tick_updates_LastSeenUtc_without_creating_duplicate_rows()
|
||||
{
|
||||
var driver = new ProbeStubDriver("driver-x",
|
||||
new HostConnectivityStatus("HostX", HostState.Running, DateTime.UtcNow));
|
||||
var driverHost = new DriverHost();
|
||||
await driverHost.RegisterAsync(driver, "{}", CancellationToken.None);
|
||||
|
||||
var publisher = new HostStatusPublisher(driverHost, NewNodeOptions("node-x"),
|
||||
_sp.GetRequiredService<IServiceScopeFactory>(),
|
||||
NullLogger<HostStatusPublisher>.Instance);
|
||||
|
||||
await publisher.PublishOnceAsync(CancellationToken.None);
|
||||
var firstSeen = await SingleRowAsync("node-x", "driver-x", "HostX");
|
||||
await Task.Delay(50); // guarantee a later wall-clock value so LastSeenUtc advances
|
||||
await publisher.PublishOnceAsync(CancellationToken.None);
|
||||
var secondSeen = await SingleRowAsync("node-x", "driver-x", "HostX");
|
||||
|
||||
secondSeen.LastSeenUtc.ShouldBeGreaterThan(firstSeen.LastSeenUtc,
|
||||
"heartbeat advances LastSeenUtc so Admin can stale-flag rows from crashed Servers");
|
||||
|
||||
// Still exactly one row — a naive Add-every-tick would have thrown or duplicated.
|
||||
using var scope = _sp.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
|
||||
(await db.DriverHostStatuses.CountAsync(r => r.NodeId == "node-x")).ShouldBe(1);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task State_change_between_ticks_updates_State_and_StateChangedUtc()
|
||||
{
|
||||
var driver = new ProbeStubDriver("driver-y",
|
||||
new HostConnectivityStatus("HostY", HostState.Running, DateTime.UtcNow.AddSeconds(-10)));
|
||||
var driverHost = new DriverHost();
|
||||
await driverHost.RegisterAsync(driver, "{}", CancellationToken.None);
|
||||
|
||||
var publisher = new HostStatusPublisher(driverHost, NewNodeOptions("node-y"),
|
||||
_sp.GetRequiredService<IServiceScopeFactory>(),
|
||||
NullLogger<HostStatusPublisher>.Instance);
|
||||
|
||||
await publisher.PublishOnceAsync(CancellationToken.None);
|
||||
var before = await SingleRowAsync("node-y", "driver-y", "HostY");
|
||||
|
||||
// Swap the driver's reported state to Faulted with a newer transition timestamp.
|
||||
var newChange = DateTime.UtcNow;
|
||||
driver.Statuses = [new HostConnectivityStatus("HostY", HostState.Faulted, newChange)];
|
||||
await publisher.PublishOnceAsync(CancellationToken.None);
|
||||
|
||||
var after = await SingleRowAsync("node-y", "driver-y", "HostY");
|
||||
after.State.ShouldBe(DriverHostState.Faulted);
|
||||
// datetime2(3) has millisecond precision — DateTime.UtcNow carries up to 100ns ticks,
|
||||
// so the stored value rounds down. Compare at millisecond granularity to stay clean.
|
||||
after.StateChangedUtc.ShouldBe(newChange, tolerance: TimeSpan.FromMilliseconds(1));
|
||||
after.StateChangedUtc.ShouldBeGreaterThan(before.StateChangedUtc,
|
||||
"StateChangedUtc must advance when the state actually changed");
|
||||
before.State.ShouldBe(DriverHostState.Running);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MapState_translates_every_HostState_member()
|
||||
{
|
||||
HostStatusPublisher.MapState(HostState.Running).ShouldBe(DriverHostState.Running);
|
||||
HostStatusPublisher.MapState(HostState.Stopped).ShouldBe(DriverHostState.Stopped);
|
||||
HostStatusPublisher.MapState(HostState.Faulted).ShouldBe(DriverHostState.Faulted);
|
||||
HostStatusPublisher.MapState(HostState.Unknown).ShouldBe(DriverHostState.Unknown);
|
||||
}
|
||||
|
||||
private async Task<Configuration.Entities.DriverHostStatus> SingleRowAsync(string node, string driver, string host)
|
||||
{
|
||||
using var scope = _sp.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<OtOpcUaConfigDbContext>();
|
||||
return await db.DriverHostStatuses.AsNoTracking()
|
||||
.SingleAsync(r => r.NodeId == node && r.DriverInstanceId == driver && r.HostName == host);
|
||||
}
|
||||
|
||||
private static NodeOptions NewNodeOptions(string nodeId) => new()
|
||||
{
|
||||
NodeId = nodeId,
|
||||
ClusterId = "cluster-t",
|
||||
ConfigDbConnectionString = "unused-publisher-gets-db-from-scope",
|
||||
};
|
||||
|
||||
private sealed class ProbeStubDriver(string id, params HostConnectivityStatus[] initial)
|
||||
: IDriver, IHostConnectivityProbe
|
||||
{
|
||||
public HostConnectivityStatus[] Statuses { get; set; } = initial;
|
||||
public string DriverInstanceId => id;
|
||||
public string DriverType => "ProbeStub";
|
||||
|
||||
public event EventHandler<HostStatusChangedEventArgs>? OnHostStatusChanged;
|
||||
|
||||
public Task InitializeAsync(string driverConfigJson, CancellationToken ct) => Task.CompletedTask;
|
||||
public Task ReinitializeAsync(string driverConfigJson, CancellationToken ct) => Task.CompletedTask;
|
||||
public Task ShutdownAsync(CancellationToken ct) => Task.CompletedTask;
|
||||
public DriverHealth GetHealth() => new(DriverState.Healthy, DateTime.UtcNow, null);
|
||||
public long GetMemoryFootprint() => 0;
|
||||
public Task FlushOptionalCachesAsync(CancellationToken ct) => Task.CompletedTask;
|
||||
|
||||
public IReadOnlyList<HostConnectivityStatus> GetHostStatuses() => Statuses;
|
||||
|
||||
// Keeps the compiler happy — event is part of the interface contract even if unused here.
|
||||
internal void Raise(HostStatusChangedEventArgs e) => OnHostStatusChanged?.Invoke(this, e);
|
||||
}
|
||||
|
||||
private sealed class NonProbeStubDriver(string id) : IDriver
|
||||
{
|
||||
public string DriverInstanceId => id;
|
||||
public string DriverType => "NonProbeStub";
|
||||
|
||||
public Task InitializeAsync(string driverConfigJson, CancellationToken ct) => Task.CompletedTask;
|
||||
public Task ReinitializeAsync(string driverConfigJson, CancellationToken ct) => Task.CompletedTask;
|
||||
public Task ShutdownAsync(CancellationToken ct) => Task.CompletedTask;
|
||||
public DriverHealth GetHealth() => new(DriverState.Healthy, DateTime.UtcNow, null);
|
||||
public long GetMemoryFootprint() => 0;
|
||||
public Task FlushOptionalCachesAsync(CancellationToken ct) => Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user