fix(historian-sidecar): don't wedge the TCP listener when Start() bind fails
v2-ci / build (push) Failing after 46s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests) (push) Has been skipped
v2-ci / build (push) Failing after 46s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests) (push) Has been skipped
Live verification on a Windows VM surfaced a crash loop: TcpFrameServer.EnsureListening assigned _listener = new TcpListener(...) BEFORE calling Start(). When Start() throws — e.g. the port is in a Windows excluded/reserved range (WSAEACCES) or already in use — the field was left non-null-but-unstarted, so the `if (_listener is not null) return` guard permanently skipped re-Start() and every subsequent AcceptTcpClientAsync() threw the misleading InvalidOperationException "Not listening" → 20 failures → exit 2 → NSSM restart → loop. Now _listener is assigned only after Start() succeeds, so a transient bind failure is retried and a permanent one surfaces the real bind error each iteration. Adds a regression test that forces a bind conflict and asserts the SocketException persists.
This commit is contained in:
@@ -48,8 +48,16 @@ public sealed class TcpFrameServer : IDisposable
|
||||
private void EnsureListening()
|
||||
{
|
||||
if (_listener is not null) return;
|
||||
_listener = new TcpListener(_bind, _port);
|
||||
_listener.Start();
|
||||
|
||||
// Assign _listener ONLY after Start() succeeds. If Start() throws (e.g. the port is in
|
||||
// a Windows excluded/reserved range → WSAEACCES "access forbidden", or already in use),
|
||||
// _listener must stay null so the next RunAsync iteration retries the full create+Start.
|
||||
// Assigning before Start() leaves a non-null-but-unstarted listener that the
|
||||
// `if (_listener is not null) return` guard would never re-Start, turning a one-time
|
||||
// bind error into a permanent misleading "Not listening" crash loop.
|
||||
var listener = new TcpListener(_bind, _port);
|
||||
listener.Start();
|
||||
_listener = listener;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
+30
@@ -264,4 +264,34 @@ public sealed class TcpRoundTripTests
|
||||
cts.Cancel();
|
||||
try { await serverLoop; } catch (OperationCanceledException) { /* expected */ }
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task BindFailure_SurfacesBindError_NotPermanentNotListening()
|
||||
{
|
||||
// Regression (live-caught 2026-06-12): when TcpFrameServer's listener bind fails (port in a
|
||||
// Windows excluded range → WSAEACCES, or already in use), the failure must surface as the
|
||||
// bind SocketException on EVERY accept attempt — NOT a one-time bind error followed by a
|
||||
// permanent InvalidOperationException "Not listening". The latter is the assign-before-Start
|
||||
// wedge: a non-null-but-unstarted listener that EnsureListening's guard never re-Starts,
|
||||
// which crash-looped the live sidecar on the reserved port 32569.
|
||||
using var cts = new CancellationTokenSource(Timeout);
|
||||
|
||||
// Occupy a loopback port exclusively so the server's Start() bind is forbidden.
|
||||
var blocker = new TcpListener(IPAddress.Loopback, 0) { ExclusiveAddressUse = true };
|
||||
blocker.Start();
|
||||
try
|
||||
{
|
||||
var takenPort = ((IPEndPoint)blocker.LocalEndpoint).Port;
|
||||
using var server = new TcpFrameServer(IPAddress.Loopback, takenPort, "shh", tlsCert: null, Quiet);
|
||||
|
||||
// First accept attempt: the bind fails with a SocketException.
|
||||
await Should.ThrowAsync<SocketException>(() => server.RunOneConnectionAsync(new EchoHandler(), cts.Token));
|
||||
|
||||
// Second attempt MUST also be the bind SocketException — not InvalidOperationException
|
||||
// "Not listening". This is the assertion that fails against the assign-before-Start bug.
|
||||
var second = await Should.ThrowAsync<Exception>(() => server.RunOneConnectionAsync(new EchoHandler(), cts.Token));
|
||||
second.ShouldBeOfType<SocketException>();
|
||||
}
|
||||
finally { blocker.Stop(); }
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user