Phase 3 PR 72 -- Multi-endpoint failover for OPC UA Client driver. Adds OpcUaClientDriverOptions.EndpointUrls ordered list + PerEndpointConnectTimeout knob. On InitializeAsync the driver walks the candidate list in order via ResolveEndpointCandidates and returns the session from the first endpoint that successfully connects. Captures per-URL failure reasons in a List<string> and, if every candidate fails, throws AggregateException whose message names every URL + its failure class (e.g. 'opc.tcp://primary:4840 -> TimeoutException: ...'). That's critical diag for field debugging -- without it 'failover picked the wrong one' surfaces as a mystery. Single-URL backwards compat: EndpointUrl field retained as a one-URL shortcut. When EndpointUrls is null or empty the driver falls through to a single-candidate list of [EndpointUrl], so every existing single-endpoint config keeps working without migration. When both are provided, EndpointUrls wins + EndpointUrl is ignored -- documented on the field xml-doc. Per-endpoint connect budget: PerEndpointConnectTimeout (default 3s) caps each attempt so a sweep over several dead servers can't blow the overall init budget. Applied via CancellationTokenSource.CreateLinkedTokenSource + CancelAfter inside OpenSessionOnEndpointAsync (the extracted single-endpoint connect helper) so the cap is independent of the outer Options.Timeout which governs steady-state ops. BuildUserIdentity extracted out of InitializeAsync so the failover loop builds the UserIdentity ONCE and reuses it across every endpoint attempt -- generating it N times would re-unlock the user cert's private key N times, wasteful + keeps the password in memory longer. HostName now reflects the endpoint that actually connected via _connectedEndpointUrl instead of always returning opts.EndpointUrl -- so the Admin /hosts dashboard shows which of the configured endpoints is currently serving traffic (primary vs backup). Falls back to the first candidate pre-connect so the dashboard has a sensible identity before the first connect, and resets to null on ShutdownAsync. Use case: an OPC UA hot-standby server pair (primary 4840 + backup 4841) where either can serve the same address space. Operator configures EndpointUrls=[primary, backup]; driver tries primary first, falls over to backup on primary failure with a clean AggregateException describing both attempts if both are down. Unit tests (OpcUaClientFailoverTests, 5 facts): ResolveEndpointCandidates_prefers_EndpointUrls_when_provided (list trumps single), ResolveEndpointCandidates_falls_back_to_single_EndpointUrl_when_list_empty (legacy config compat), ResolveEndpointCandidates_empty_list_treated_as_fallback (explicit empty list also falls back -- otherwise we'd produce a zero-candidate sweep that throws with nothing tried), HostName_uses_first_candidate_before_connect (dashboard rendering pre-connect), Initialize_against_all_unreachable_endpoints_throws_AggregateException_listing_each (three loopback dead ports, asserts each URL appears in the aggregate message + driver flips to Faulted). 31/31 OpcUaClient.Tests pass. dotnet build clean. OPC UA Client driver security/auth/availability feature set now complete per driver-specs.md \u00A78: policy-filtered endpoint selection (PR 70), Anonymous+Username+Certificate auth (PR 71), multi-endpoint failover (this PR).
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class OpcUaClientFailoverTests
|
||||
{
|
||||
[Fact]
|
||||
public void ResolveEndpointCandidates_prefers_EndpointUrls_when_provided()
|
||||
{
|
||||
var opts = new OpcUaClientDriverOptions
|
||||
{
|
||||
EndpointUrl = "opc.tcp://fallback:4840",
|
||||
EndpointUrls = ["opc.tcp://primary:4840", "opc.tcp://backup:4841"],
|
||||
};
|
||||
var list = OpcUaClientDriver.ResolveEndpointCandidates(opts);
|
||||
list.Count.ShouldBe(2);
|
||||
list[0].ShouldBe("opc.tcp://primary:4840");
|
||||
list[1].ShouldBe("opc.tcp://backup:4841");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveEndpointCandidates_falls_back_to_single_EndpointUrl_when_list_empty()
|
||||
{
|
||||
var opts = new OpcUaClientDriverOptions { EndpointUrl = "opc.tcp://only:4840" };
|
||||
var list = OpcUaClientDriver.ResolveEndpointCandidates(opts);
|
||||
list.Count.ShouldBe(1);
|
||||
list[0].ShouldBe("opc.tcp://only:4840");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResolveEndpointCandidates_empty_list_treated_as_fallback_to_EndpointUrl()
|
||||
{
|
||||
// Explicit empty list should still fall back to the single-URL shortcut rather than
|
||||
// producing a zero-candidate sweep that would immediately throw with no URLs tried.
|
||||
var opts = new OpcUaClientDriverOptions
|
||||
{
|
||||
EndpointUrl = "opc.tcp://single:4840",
|
||||
EndpointUrls = [],
|
||||
};
|
||||
OpcUaClientDriver.ResolveEndpointCandidates(opts).Count.ShouldBe(1);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HostName_uses_first_candidate_before_connect()
|
||||
{
|
||||
var opts = new OpcUaClientDriverOptions
|
||||
{
|
||||
EndpointUrls = ["opc.tcp://primary:4840", "opc.tcp://backup:4841"],
|
||||
};
|
||||
using var drv = new OpcUaClientDriver(opts, "opcua-host");
|
||||
drv.HostName.ShouldBe("opc.tcp://primary:4840",
|
||||
"pre-connect the dashboard should show the first candidate URL so operators can link back");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Initialize_against_all_unreachable_endpoints_throws_AggregateException_listing_each()
|
||||
{
|
||||
// Port 1 + port 2 + port 3 on loopback are all guaranteed closed (TCP RST immediate).
|
||||
// Failover sweep should attempt all three and throw AggregateException naming each URL
|
||||
// so operators see exactly which candidates were tried.
|
||||
var opts = new OpcUaClientDriverOptions
|
||||
{
|
||||
EndpointUrls = ["opc.tcp://127.0.0.1:1", "opc.tcp://127.0.0.1:2", "opc.tcp://127.0.0.1:3"],
|
||||
PerEndpointConnectTimeout = TimeSpan.FromMilliseconds(500),
|
||||
Timeout = TimeSpan.FromMilliseconds(500),
|
||||
AutoAcceptCertificates = true,
|
||||
};
|
||||
using var drv = new OpcUaClientDriver(opts, "opcua-failover");
|
||||
|
||||
var ex = await Should.ThrowAsync<AggregateException>(async () =>
|
||||
await drv.InitializeAsync("{}", TestContext.Current.CancellationToken));
|
||||
|
||||
ex.Message.ShouldContain("127.0.0.1:1");
|
||||
ex.Message.ShouldContain("127.0.0.1:2");
|
||||
ex.Message.ShouldContain("127.0.0.1:3");
|
||||
drv.GetHealth().State.ShouldBe(DriverState.Faulted);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user