mbproxy: fix the dashboard's C2/M-series review findings

Closes the on-demand-capture leak cluster from the code review. The capture's armed state was driven off SignalR's ConnectionId, which changes on every transport reconnect, so a reconnect-during-view leaked a subscriber and left the capture armed forever with no viewer. PlcSubscriptionTracker now keys on a stable per-page-load tabId, and StatusBroadcaster reconciles capture arm state from the live viewer set each push cycle — making arming single-threaded and reconnect-safe. Also fixes the TagValueCapture disarm-vs-record race, the bind-failure broadcaster/listener leak, removes dead JSON-context code, and reworks the frontend cold-start retry plus an unknown-PLC watchdog. Adds tracker / broadcaster-loop / race / wire-shape test coverage.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-16 16:12:43 -04:00
parent 554b05d28c
commit 374eecd205
16 changed files with 580 additions and 212 deletions
@@ -0,0 +1,43 @@
using System.Text.Json;
using Mbproxy.Admin;
using Shouldly;
using Xunit;
namespace Mbproxy.Tests.Admin;
/// <summary>
/// Locks the SignalR payload wire shape. The hub serialises detail / fleet payloads
/// with a camelCase property policy (see <c>AdminEndpointHost</c>'s <c>AddJsonProtocol</c>),
/// and the dashboard JS reads camelCase field names — so a regression to the naming
/// policy would silently break every field on the live feed with no other failing test.
/// </summary>
[Trait("Category", "Unit")]
public sealed class DebugDtoSerializationTests
{
// The exact policy AdminEndpointHost configures on the hub's PayloadSerializerOptions.
private static readonly JsonSerializerOptions HubOptions =
new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
[Fact]
public void PlcDetailResponse_SerializesWithCamelCaseFieldNames()
{
var detail = new PlcDetailResponse(
Plc: null,
Debug: new PlcDebugSnapshot(
CaptureArmed: true,
Tags: [new TagValueDto(
Address: 100, Width: 16, Name: "Left AirSP", HasValue: true,
Direction: "read", RawHex: "0x1234", DecodedValue: 1234,
UpdatedAtUtc: "2026-05-16T00:00:00Z", AgeSeconds: 1.5)]));
string json = JsonSerializer.Serialize(detail, HubOptions);
// Case.Sensitive throughout — Shouldly's string contains defaults to
// case-insensitive, which would not distinguish camelCase from PascalCase.
json.ShouldContain("\"captureArmed\"", Case.Sensitive);
json.ShouldContain("\"decodedValue\"", Case.Sensitive);
json.ShouldContain("\"updatedAtUtc\"", Case.Sensitive);
json.ShouldNotContain("\"CaptureArmed\"", Case.Sensitive);
json.ShouldNotContain("\"DecodedValue\"", Case.Sensitive);
}
}
@@ -0,0 +1,110 @@
using Mbproxy.Admin;
using Shouldly;
using Xunit;
namespace Mbproxy.Tests.Admin;
/// <summary>
/// Unit tests for <see cref="PlcSubscriptionTracker"/> — the tab-keyed, reconnect-safe
/// record of which PLC detail pages are open. Includes a concurrency stress test, since
/// the tracker is mutated from multiple SignalR hub-dispatch threads.
/// </summary>
[Trait("Category", "Unit")]
public sealed class PlcSubscriptionTrackerTests
{
[Fact]
public void Subscribe_ThenRemoveLastConnection_ClearsViewer()
{
var t = new PlcSubscriptionTracker();
t.SubscribePlc("c1", "tab", "plc");
t.ActivePlcs().ShouldBe(["plc"]);
t.RemoveConnection("c1");
t.ActivePlcs().ShouldBeEmpty();
}
[Fact]
public void SameTab_TwoConnections_StaysActiveUntilLastConnectionGone()
{
// Reconnect overlap: the same tab briefly holds two connections. Dropping the
// old one must not release the tab — this is the leak C2 guards against.
var t = new PlcSubscriptionTracker();
t.SubscribePlc("c-old", "tab", "plc");
t.SubscribePlc("c-new", "tab", "plc");
t.RemoveConnection("c-old");
t.ActivePlcs().ShouldContain("plc", "the tab is still alive on the second connection");
t.RemoveConnection("c-new");
t.ActivePlcs().ShouldBeEmpty();
}
[Fact]
public void DistinctTabs_AreCountedSeparately()
{
var t = new PlcSubscriptionTracker();
t.SubscribePlc("c1", "tab-A", "plc");
t.SubscribePlc("c2", "tab-B", "plc");
t.RemoveConnection("c1");
t.ActivePlcs().ShouldContain("plc", "the second tab still views the PLC");
t.RemoveConnection("c2");
t.ActivePlcs().ShouldBeEmpty();
}
[Fact]
public void RepeatedSubscribe_SameTabSamePlc_IsIdempotent()
{
var t = new PlcSubscriptionTracker();
t.SubscribePlc("c1", "tab", "plc");
t.SubscribePlc("c1", "tab", "plc"); // redundant repeat
t.ActivePlcs().ShouldBe(["plc"]);
t.RemoveConnection("c1");
t.ActivePlcs().ShouldBeEmpty("a repeated subscribe must not inflate the viewer count");
}
[Fact]
public void OneConnection_MultiplePlcs_AllReleasedTogether()
{
var t = new PlcSubscriptionTracker();
t.SubscribePlc("c1", "tab", "plc-a");
t.SubscribePlc("c1", "tab", "plc-b");
t.ActivePlcs().Count.ShouldBe(2);
t.RemoveConnection("c1");
t.ActivePlcs().ShouldBeEmpty();
}
[Fact]
public void RemoveConnection_Unknown_IsNoOp()
{
var t = new PlcSubscriptionTracker();
Should.NotThrow(() => t.RemoveConnection("never-seen"));
t.ActivePlcs().ShouldBeEmpty();
}
[Fact]
public async Task ConcurrentSubscribeAndRemove_NeverLeaksOrThrows()
{
var t = new PlcSubscriptionTracker();
const int tasks = 16;
const int iterations = 5_000;
await Task.WhenAll(Enumerable.Range(0, tasks).Select(taskNo => Task.Run(() =>
{
for (int i = 0; i < iterations; i++)
{
string conn = $"c{taskNo}-{i}";
string tab = $"tab{taskNo}-{i}";
t.SubscribePlc(conn, tab, "plc");
t.RemoveConnection(conn);
}
}, TestContext.Current.CancellationToken)));
t.ActivePlcs().ShouldBeEmpty(
"every subscribe was paired with a remove — no viewer count may leak");
}
}
@@ -45,6 +45,8 @@ public sealed class StatusBroadcasterTests
hostBuilder.Configuration.AddInMemoryCollection(new Dictionary<string, string?>
{
["Mbproxy:AdminPort"] = "0",
// Fast tick so the LoopAsync test observes several cycles quickly.
["Mbproxy:AdminPushIntervalMs"] = "100",
});
hostBuilder.Services.AddSerilog(
new LoggerConfiguration().MinimumLevel.Fatal().CreateLogger(), dispose: false);
@@ -96,7 +98,7 @@ public sealed class StatusBroadcasterTests
{
await using var h = await BuildAsync();
h.Registry.GetOrCreate("plc-x", BcdTagMap.Empty);
h.Tracker.Add("conn-1", "plc-x");
h.Tracker.SubscribePlc("conn-1", "tab-1", "plc-x");
await h.Broadcaster.PushOnceAsync(TestContext.Current.CancellationToken);
@@ -105,16 +107,60 @@ public sealed class StatusBroadcasterTests
push.Detail.Debug.ShouldNotBeNull();
}
[Fact]
public async Task PushOnce_ReconcilesCaptureArmState_FromActiveViewers()
{
await using var h = await BuildAsync();
h.Registry.GetOrCreate("plc-x", BcdTagMap.Empty);
// No viewer yet — a push must leave the capture disarmed.
await h.Broadcaster.PushOnceAsync(TestContext.Current.CancellationToken);
h.Registry.TryGet("plc-x", out var capture).ShouldBeTrue();
capture.IsArmed.ShouldBeFalse("no detail page open — capture stays disarmed");
// A viewer opens the detail page — the next push arms the capture.
h.Tracker.SubscribePlc("conn-1", "tab-1", "plc-x");
await h.Broadcaster.PushOnceAsync(TestContext.Current.CancellationToken);
capture.IsArmed.ShouldBeTrue("the broadcaster reconciles the capture armed for a viewed PLC");
// The viewer leaves — the next push disarms it again.
h.Tracker.RemoveConnection("conn-1");
await h.Broadcaster.PushOnceAsync(TestContext.Current.CancellationToken);
capture.IsArmed.ShouldBeFalse("the broadcaster disarms a capture once its last viewer leaves");
}
[Fact]
public async Task StopAsync_DisarmsEveryCapture()
{
await using var h = await BuildAsync();
h.Registry.GetOrCreate("plc-x", BcdTagMap.Empty);
h.Registry.Arm("plc-x");
h.Registry.ReconcileArmed(["plc-x"]);
await h.Broadcaster.StopAsync();
h.Registry.TryGet("plc-x", out var capture).ShouldBeTrue();
capture.IsArmed.ShouldBeFalse();
}
[Fact]
public async Task Loop_PushesRepeatedly_ThenStopsAfterStopAsync()
{
await using var h = await BuildAsync();
h.Broadcaster.Start();
// The harness runs at AdminPushIntervalMs = 100 ms; wait (generously) for the
// background loop to complete several cycles.
var deadline = DateTime.UtcNow.AddSeconds(10);
while (h.Sink.FleetPushes.Count < 3 && DateTime.UtcNow < deadline)
await Task.Delay(50, TestContext.Current.CancellationToken);
h.Sink.FleetPushes.Count.ShouldBeGreaterThanOrEqualTo(3,
"the background loop must push the fleet snapshot every interval");
await h.Broadcaster.StopAsync();
int afterStop = h.Sink.FleetPushes.Count;
await Task.Delay(400, TestContext.Current.CancellationToken);
h.Sink.FleetPushes.Count.ShouldBe(afterStop, "no pushes may occur after StopAsync");
}
}
@@ -1,27 +1,23 @@
using Mbproxy.Admin;
using Mbproxy.Bcd;
using Mbproxy.Proxy;
using Shouldly;
using Xunit;
namespace Mbproxy.Tests.Admin;
/// <summary>
/// Unit tests for <see cref="StatusHub"/> — group joins and on-demand capture
/// arming. Uses hand-written SignalR test doubles (see <see cref="SignalRFakes"/>);
/// no SignalR host is started.
/// Unit tests for <see cref="StatusHub"/> — group joins and subscription tracking.
/// Capture arming is the broadcaster's job; the hub only mutates the
/// <see cref="PlcSubscriptionTracker"/>. Uses hand-written SignalR test doubles
/// (see <see cref="SignalRFakes"/>); no SignalR host is started.
/// </summary>
[Trait("Category", "Unit")]
public sealed class StatusHubTests
{
private static StatusHub MakeHub(
string connectionId,
PlcSubscriptionTracker tracker,
TagCaptureRegistry registry,
out FakeGroupManager groups)
string connectionId, PlcSubscriptionTracker tracker, out FakeGroupManager groups)
{
groups = new FakeGroupManager();
return new StatusHub(tracker, registry)
return new StatusHub(tracker)
{
Context = new FakeHubCallerContext(connectionId),
Groups = groups,
@@ -31,7 +27,7 @@ public sealed class StatusHubTests
[Fact]
public async Task SubscribeFleet_JoinsFleetGroup()
{
var hub = MakeHub("conn-1", new PlcSubscriptionTracker(), new TagCaptureRegistry(), out var groups);
var hub = MakeHub("conn-1", new PlcSubscriptionTracker(), out var groups);
await hub.SubscribeFleet();
@@ -39,53 +35,63 @@ public sealed class StatusHubTests
}
[Fact]
public async Task SubscribePlc_JoinsPlcGroup_AndArmsCapture()
public async Task SubscribePlc_JoinsPlcGroup_AndTracksViewer()
{
var registry = new TagCaptureRegistry();
registry.GetOrCreate("plc-1", BcdTagMap.Empty);
var hub = MakeHub("conn-1", new PlcSubscriptionTracker(), registry, out var groups);
var tracker = new PlcSubscriptionTracker();
var hub = MakeHub("conn-1", tracker, out var groups);
await hub.SubscribePlc("plc-1");
await hub.SubscribePlc("plc-1", "tab-A");
groups.Added.ShouldContain(("conn-1", StatusHub.PlcGroup("plc-1")));
registry.TryGet("plc-1", out var capture).ShouldBeTrue();
capture.IsArmed.ShouldBeTrue();
tracker.ActivePlcs().ShouldContain("plc-1");
}
[Fact]
public async Task SecondSubscriber_FirstLeaveKeepsArmed_LastLeaveDisarms()
public async Task Reconnect_SameTab_NewConnection_DoesNotLeakViewer()
{
var tracker = new PlcSubscriptionTracker();
var registry = new TagCaptureRegistry();
registry.GetOrCreate("plc-1", BcdTagMap.Empty);
// A transport reconnect: the same browser tab acquires a new ConnectionId and
// re-subscribes; the old connection's OnDisconnectedAsync then fires late. The
// PLC must not be left with a stranded viewer once the tab finally closes.
var tracker = new PlcSubscriptionTracker();
var hub1 = MakeHub("conn-1", tracker, registry, out _);
var hub2 = MakeHub("conn-2", tracker, registry, out _);
var first = MakeHub("conn-old", tracker, out _);
await first.SubscribePlc("plc-1", "tab-A");
await hub1.SubscribePlc("plc-1");
await hub2.SubscribePlc("plc-1");
var second = MakeHub("conn-new", tracker, out _);
await second.SubscribePlc("plc-1", "tab-A");
registry.TryGet("plc-1", out var capture).ShouldBeTrue();
capture.IsArmed.ShouldBeTrue();
await first.OnDisconnectedAsync(null); // late disconnect of the old connection
tracker.ActivePlcs().ShouldContain("plc-1",
"the tab is still open on the reconnected connection");
// First viewer leaves — a second viewer remains, so capture stays armed.
await hub1.OnDisconnectedAsync(null);
capture.IsArmed.ShouldBeTrue("capture must stay armed while another detail page is open");
// Last viewer leaves — capture disarms.
await hub2.OnDisconnectedAsync(null);
capture.IsArmed.ShouldBeFalse("capture must disarm when the last viewer leaves");
await second.OnDisconnectedAsync(null); // the tab finally closes
tracker.ActivePlcs().ShouldBeEmpty("no viewer may be stranded after the tab closes");
}
[Fact]
public async Task SubscribePlc_UnknownPlc_DoesNotThrow_AndArmsNothing()
public async Task TwoTabs_FirstCloseKeepsActive_LastCloseClears()
{
var registry = new TagCaptureRegistry(); // no captures registered
var hub = MakeHub("conn-1", new PlcSubscriptionTracker(), registry, out var groups);
var tracker = new PlcSubscriptionTracker();
await Should.NotThrowAsync(async () => await hub.SubscribePlc("ghost"));
var tabA = MakeHub("conn-a", tracker, out _);
var tabB = MakeHub("conn-b", tracker, out _);
await tabA.SubscribePlc("plc-1", "tab-A");
await tabB.SubscribePlc("plc-1", "tab-B");
await tabA.OnDisconnectedAsync(null);
tracker.ActivePlcs().ShouldContain("plc-1", "a second tab is still viewing the PLC");
await tabB.OnDisconnectedAsync(null);
tracker.ActivePlcs().ShouldBeEmpty();
}
[Fact]
public async Task SubscribePlc_UnknownPlc_DoesNotThrow()
{
var hub = MakeHub("conn-1", new PlcSubscriptionTracker(), out var groups);
await Should.NotThrowAsync(async () => await hub.SubscribePlc("ghost", "tab-A"));
groups.Added.ShouldContain(("conn-1", StatusHub.PlcGroup("ghost")));
registry.TryGet("ghost", out _).ShouldBeFalse();
}
}
@@ -7,8 +7,9 @@ using Xunit;
namespace Mbproxy.Tests.Proxy;
/// <summary>
/// Unit tests for <see cref="TagCaptureRegistry"/> — the shared seam that arms and
/// disarms per-PLC <see cref="TagValueCapture"/> instances.
/// Unit tests for <see cref="TagCaptureRegistry"/> — the shared seam holding per-PLC
/// <see cref="TagValueCapture"/> instances. Arm state is reconciled in bulk against the
/// live viewer set (not toggled per PLC) so the broadcaster is the single authority.
/// </summary>
[Trait("Category", "Unit")]
public sealed class TagCaptureRegistryTests
@@ -25,48 +26,69 @@ public sealed class TagCaptureRegistryTests
}
[Fact]
public void GetOrCreate_ReturnsSameInstance_OnRepeatCall_WhenTagSetUnchanged()
public void GetOrCreate_ReturnsLiveInstance_OnRepeatCall()
{
var registry = new TagCaptureRegistry();
var first = registry.GetOrCreate("plc-1", Map((100, 16)));
registry.GetOrCreate("plc-1", Map((100, 16)));
var second = registry.GetOrCreate("plc-1", Map((100, 16)));
// AddOrUpdate's update path rebuilds; both must be live and consistent.
second.TagCount.ShouldBe(1);
registry.TryGet("plc-1", out var current).ShouldBeTrue();
current.ShouldBeSameAs(second);
}
[Fact]
public void GetOrCreate_Rebuild_PreservesArmedFlag()
public void GetOrCreate_Rebuild_ProducesDisarmedCapture_AndReconcileReArms()
{
// The rebuilt capture is intentionally disarmed: ReconcileArmed re-arms it within
// one push cycle if the PLC still has a viewer, so arm state is never carried
// across the rebuild — which removes any arm-vs-rebuild race.
var registry = new TagCaptureRegistry();
var capture = registry.GetOrCreate("plc-1", Map((100, 16)));
capture.Arm();
registry.GetOrCreate("plc-1", Map((100, 16)));
registry.ReconcileArmed(["plc-1"]);
registry.TryGet("plc-1", out var armed).ShouldBeTrue();
armed.IsArmed.ShouldBeTrue();
// Hot-reload reseat: same PLC, changed tag set.
var rebuilt = registry.GetOrCreate("plc-1", Map((100, 16), (200, 32)));
rebuilt.ShouldNotBeSameAs(capture);
rebuilt.IsArmed.ShouldBeTrue("a rebuilt capture must keep capturing for an open detail page");
rebuilt.ShouldNotBeSameAs(armed);
rebuilt.IsArmed.ShouldBeFalse("a rebuilt capture starts disarmed");
rebuilt.TagCount.ShouldBe(2);
// The next reconcile re-arms it because the PLC is still viewed.
registry.ReconcileArmed(["plc-1"]);
rebuilt.IsArmed.ShouldBeTrue();
}
[Fact]
public void Arm_And_Disarm_ReachTheRightCapture()
public void ReconcileArmed_ArmsActivePlcs_DisarmsTheRest()
{
var registry = new TagCaptureRegistry();
registry.GetOrCreate("plc-1", Map((100, 16)));
registry.GetOrCreate("plc-2", Map((100, 16)));
registry.Arm("plc-1");
registry.ReconcileArmed(["plc-1"]);
registry.TryGet("plc-1", out var c1).ShouldBeTrue();
registry.TryGet("plc-2", out var c2).ShouldBeTrue();
c1.IsArmed.ShouldBeTrue();
c2.IsArmed.ShouldBeFalse();
registry.Disarm("plc-1");
// plc-1's viewer leaves, plc-2 gains one.
registry.ReconcileArmed(["plc-2"]);
c1.IsArmed.ShouldBeFalse();
c2.IsArmed.ShouldBeTrue();
}
[Fact]
public void ReconcileArmed_EmptyActiveSet_DisarmsEverything()
{
var registry = new TagCaptureRegistry();
registry.GetOrCreate("plc-1", Map((100, 16)));
registry.ReconcileArmed(["plc-1"]);
registry.ReconcileArmed(Array.Empty<string>());
registry.TryGet("plc-1", out var c1).ShouldBeTrue();
c1.IsArmed.ShouldBeFalse();
}
@@ -76,8 +98,7 @@ public sealed class TagCaptureRegistryTests
var registry = new TagCaptureRegistry();
registry.GetOrCreate("plc-1", Map((100, 16)));
registry.GetOrCreate("plc-2", Map((100, 16)));
registry.Arm("plc-1");
registry.Arm("plc-2");
registry.ReconcileArmed(["plc-1", "plc-2"]);
registry.DisarmAll();
@@ -92,8 +113,7 @@ public sealed class TagCaptureRegistryTests
{
var registry = new TagCaptureRegistry();
Should.NotThrow(() => registry.Arm("ghost"));
Should.NotThrow(() => registry.Disarm("ghost"));
Should.NotThrow(() => registry.ReconcileArmed(["ghost"]));
Should.NotThrow(() => registry.Remove("ghost"));
registry.TryGet("ghost", out _).ShouldBeFalse();
}
@@ -174,4 +174,38 @@ public sealed class TagValueCaptureTests
await Task.WhenAll([.. writers, reader]);
tornObserved.ShouldBeFalse("Snapshot must never observe a torn (half-updated) slot");
}
[Fact]
public async Task ConcurrentRecordAndDisarm_LeavesNoStaleObservation()
{
// M7 regression: Record() checks _armed then writes; Disarm() flips _armed then
// clears the slots. A Record that passes the check while armed, then has Disarm
// run, then writes, would strand a stale observation on a disarmed capture —
// breaking the "reopened page shows no stale data" contract. Record's re-check
// after the write must undo that. The capture ends disarmed (the toggler's last
// op is Disarm), so a clean Snapshot is a deterministic post-condition of the fix.
var capture = Make((100, 16));
var ct = TestContext.Current.CancellationToken;
var recorder = Task.Run(() =>
{
for (int i = 0; i < 400_000; i++)
capture.Record(100, 0x1234, 0, 1234, CaptureDirection.Read);
}, ct);
var toggler = Task.Run(() =>
{
for (int i = 0; i < 80_000; i++)
{
capture.Arm();
capture.Disarm();
}
}, ct);
await Task.WhenAll(recorder, toggler);
capture.IsArmed.ShouldBeFalse();
capture.Snapshot().ShouldAllBe(s => s.UpdatedAtUtc == null,
"a disarmed capture must never retain a recorded observation");
}
}