Files
lmxopcua/tests/ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests/AbLegacyAutoDemoteTests.cs
2026-04-26 08:44:53 -04:00

381 lines
16 KiB
C#

using System.Text.Json;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Driver.AbLegacy;
using ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.PlcFamilies;
namespace ZB.MOM.WW.OtOpcUa.Driver.AbLegacy.Tests;
/// <summary>
/// PR ablegacy-12 / #255 — auto-demote on consecutive comm failure. After
/// <c>FailureThreshold</c> consecutive read or probe failures the driver
/// marks the device <c>Demoted</c> for <c>DemoteFor</c>; subsequent reads
/// short-circuit with <c>BadCommunicationError</c> without invoking
/// libplctag, so one slow PLC sharing the driver thread can't starve faster
/// peers. Probe success clears the demote early; read success resets the
/// consecutive-failure tally without leaving the demote window.
/// </summary>
[Trait("Category", "Unit")]
public sealed class AbLegacyAutoDemoteTests
{
private const string Host = "ab://10.0.0.5/1,0";
private const string SecondHost = "ab://10.0.0.6/1,0";
/// <summary>
/// Disable the probe by default — every test wants deterministic
/// control over the failure tally without a background loop racing
/// against the read path.
/// </summary>
private static AbLegacyDriverOptions BaseOptions(
AbLegacyDemoteOptions? demote = null,
IReadOnlyList<AbLegacyDeviceOptions>? devices = null,
IReadOnlyList<AbLegacyTagDefinition>? tags = null) => new()
{
Devices = devices ?? [new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500, Demote: demote)],
Tags = tags ?? [new AbLegacyTagDefinition("X", Host, "N7:0", AbLegacyDataType.Int)],
Probe = new AbLegacyProbeOptions { Enabled = false },
};
private static (AbLegacyDriver drv, FakeAbLegacyTagFactory factory) NewDriver(
AbLegacyDemoteOptions? demote = null,
IReadOnlyList<AbLegacyDeviceOptions>? devices = null,
IReadOnlyList<AbLegacyTagDefinition>? tags = null)
{
var factory = new FakeAbLegacyTagFactory();
var drv = new AbLegacyDriver(BaseOptions(demote, devices, tags), "drv-demote", factory);
return (drv, factory);
}
private static FakeAbLegacyTag SeedFailingTag(FakeAbLegacyTagFactory factory)
{
// Cause every read to throw — exception-driven failures count as
// BadCommunicationError per RecordError(commFailure:true).
factory.Customise = p => new FakeAbLegacyTag(p)
{
ThrowOnRead = true,
Exception = new TimeoutException("simulated comm failure"),
};
// Return value is the prototype so a caller that wants to flip the
// failure off later can do so via factory.Tags["N7:0"].
return null!;
}
[Fact]
public async Task Three_consecutive_failures_demote_the_device()
{
var (drv, factory) = NewDriver();
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
var state = drv.GetDeviceState(Host).ShouldNotBeNull();
state.DemotedUntilUtc.ShouldNotBeNull();
var snap = drv.DiagnosticTags.Snapshot(Host);
snap.DemoteCount.ShouldBe(1);
snap.LastDemotedUtc.ShouldNotBeNull();
drv.GetHostStatuses().Single().State.ShouldBe(HostState.Demoted);
}
[Fact]
public async Task Reads_while_demoted_short_circuit_without_invoking_libplctag()
{
var (drv, factory) = NewDriver(
new AbLegacyDemoteOptions(FailureThreshold: 3, DemoteFor: TimeSpan.FromMinutes(5)));
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
// Trip the demotion.
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
var readsBeforeDemote = factory.Tags["N7:0"].ReadCount;
// Subsequent reads MUST NOT call into libplctag — the short-circuit
// returns BadCommunicationError before EnsureTagRuntimeAsync.
var result = await drv.ReadAsync(["X"], CancellationToken.None);
result[0].StatusCode.ShouldBe(AbLegacyStatusMapper.BadCommunicationError);
factory.Tags["N7:0"].ReadCount.ShouldBe(readsBeforeDemote);
var result2 = await drv.ReadAsync(["X"], CancellationToken.None);
result2[0].StatusCode.ShouldBe(AbLegacyStatusMapper.BadCommunicationError);
factory.Tags["N7:0"].ReadCount.ShouldBe(readsBeforeDemote);
}
[Fact]
public async Task After_DemoteFor_expires_next_read_dispatches_through()
{
// Tiny window so the cool-down expires within the test.
var (drv, factory) = NewDriver(
new AbLegacyDemoteOptions(FailureThreshold: 2, DemoteFor: TimeSpan.FromMilliseconds(50)));
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
// Trip with two failures.
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
var state = drv.GetDeviceState(Host).ShouldNotBeNull();
state.DemotedUntilUtc.ShouldNotBeNull();
var readsBeforeWait = factory.Tags["N7:0"].ReadCount;
// Flip the fake to succeed and wait past the demote window.
factory.Tags["N7:0"].ThrowOnRead = false;
factory.Tags["N7:0"].Value = 42;
factory.Tags["N7:0"].Status = 0;
await Task.Delay(TimeSpan.FromMilliseconds(120));
var result = await drv.ReadAsync(["X"], CancellationToken.None);
result[0].StatusCode.ShouldBe(AbLegacyStatusMapper.Good);
result[0].Value.ShouldBe(42);
// The window expiry path dispatched through to libplctag.
factory.Tags["N7:0"].ReadCount.ShouldBeGreaterThan(readsBeforeWait);
}
[Fact]
public async Task Successful_read_resets_consecutive_failure_counter()
{
var (drv, factory) = NewDriver();
// Initial state — every read fails.
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
var state = drv.GetDeviceState(Host).ShouldNotBeNull();
state.ConsecutiveFailures.ShouldBe(2);
// One successful read — flip the existing fake.
factory.Tags["N7:0"].ThrowOnRead = false;
factory.Tags["N7:0"].Value = 99;
factory.Tags["N7:0"].Status = 0;
await drv.ReadAsync(["X"], CancellationToken.None);
state.ConsecutiveFailures.ShouldBe(0);
state.DemotedUntilUtc.ShouldBeNull();
}
[Fact]
public async Task Failure_success_failure_does_not_demote_at_threshold_three()
{
var (drv, factory) = NewDriver(
new AbLegacyDemoteOptions(FailureThreshold: 3));
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
// 2 failures.
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
// 1 success — counter resets.
factory.Tags["N7:0"].ThrowOnRead = false;
factory.Tags["N7:0"].Status = 0;
await drv.ReadAsync(["X"], CancellationToken.None);
// 2 more failures — should still be below the threshold.
factory.Tags["N7:0"].ThrowOnRead = true;
factory.Tags["N7:0"].Exception = new TimeoutException("flap");
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
var state = drv.GetDeviceState(Host).ShouldNotBeNull();
state.DemotedUntilUtc.ShouldBeNull();
drv.DiagnosticTags.Snapshot(Host).DemoteCount.ShouldBe(0);
}
[Fact]
public async Task DemoteCount_and_LastDemotedUtc_surface_via_diagnostic_short_circuit()
{
var (drv, factory) = NewDriver();
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
// Read the synthetic _Diagnostics counters.
var demoteCountRef = $"{AbLegacyDiagnosticTags.DiagnosticsFolderPrefix}{Host}/DemoteCount";
var lastDemotedRef = $"{AbLegacyDiagnosticTags.DiagnosticsFolderPrefix}{Host}/LastDemotedUtc";
var counts = await drv.ReadAsync([demoteCountRef, lastDemotedRef], CancellationToken.None);
counts[0].StatusCode.ShouldBe(AbLegacyStatusMapper.Good);
counts[0].Value.ShouldBe(1L);
counts[1].StatusCode.ShouldBe(AbLegacyStatusMapper.Good);
counts[1].Value.ShouldBeOfType<string>();
((string)counts[1].Value!).Length.ShouldBeGreaterThan(0); // ISO-8601 stamp
}
[Fact]
public async Task Demote_disabled_never_short_circuits_reads()
{
var (drv, factory) = NewDriver(
new AbLegacyDemoteOptions(FailureThreshold: 1, Enabled: false));
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
// 5 failures — would normally trip a single-fail threshold, but Enabled=false.
for (var i = 0; i < 5; i++) await drv.ReadAsync(["X"], CancellationToken.None);
var state = drv.GetDeviceState(Host).ShouldNotBeNull();
state.DemotedUntilUtc.ShouldBeNull();
var snap = drv.DiagnosticTags.Snapshot(Host);
snap.DemoteCount.ShouldBe(0);
// Failures still get recorded as comm errors though — the diagnostic
// surface is honest about what happened, just no auto-throttle.
snap.CommFailures.ShouldBe(5);
// libplctag was invoked every time — that's the whole point of opting out.
factory.Tags["N7:0"].ReadCount.ShouldBe(5);
}
[Fact]
public async Task Reinit_preserves_DemoteCount_but_clears_active_demotion()
{
var (drv, factory) = NewDriver();
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
drv.DiagnosticTags.Snapshot(Host).DemoteCount.ShouldBe(1);
drv.GetDeviceState(Host)!.DemotedUntilUtc.ShouldNotBeNull();
await drv.ReinitializeAsync("{}", CancellationToken.None);
// Active demotion cleared (the device is freshly tracked); cumulative count survives.
drv.GetDeviceState(Host)!.DemotedUntilUtc.ShouldBeNull();
drv.GetDeviceState(Host)!.ConsecutiveFailures.ShouldBe(0);
drv.DiagnosticTags.Snapshot(Host).DemoteCount.ShouldBe(1);
}
[Fact]
public async Task Disposing_driver_after_demotion_does_not_throw()
{
var (drv, factory) = NewDriver();
SeedFailingTag(factory);
await drv.InitializeAsync("{}", CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.ReadAsync(["X"], CancellationToken.None);
await drv.DisposeAsync();
}
[Fact]
public async Task Demote_options_dto_round_trips_through_factory_extensions()
{
const string json = """
{
"Devices": [
{
"HostAddress": "ab://10.0.0.5/1,0",
"PlcFamily": "Slc500",
"Demote": {
"FailureThreshold": 5,
"DemoteForMs": 60000,
"Enabled": true
}
}
],
"Probe": { "Enabled": false },
"Tags": [
{ "Name": "X", "DeviceHostAddress": "ab://10.0.0.5/1,0", "Address": "N7:0", "DataType": "Int" }
]
}
""";
var drv = AbLegacyDriverFactoryExtensions.CreateInstance("drv-demote-roundtrip", json);
await drv.InitializeAsync(json, CancellationToken.None);
var state = drv.GetDeviceState(Host).ShouldNotBeNull();
state.Options.Demote.ShouldNotBeNull();
state.Options.Demote!.FailureThreshold.ShouldBe(5);
state.Options.Demote.EffectiveDemoteFor.ShouldBe(TimeSpan.FromMinutes(1));
state.Options.Demote.Enabled.ShouldBeTrue();
await drv.ShutdownAsync(CancellationToken.None);
}
[Fact]
public async Task Two_devices_one_faulty_does_not_starve_the_healthy_one()
{
// Mixed factory — one host's tag throws, the other's reads cleanly.
var factory = new FakeAbLegacyTagFactory();
factory.Customise = p =>
{
// Identify by the Gateway portion of the create params.
var fail = p.Gateway == "10.0.0.6";
return new FakeAbLegacyTag(p)
{
ThrowOnRead = fail,
Exception = fail ? new TimeoutException("faulty") : null,
Value = 42,
Status = 0,
};
};
var drv = new AbLegacyDriver(new AbLegacyDriverOptions
{
Devices =
[
new AbLegacyDeviceOptions(Host, AbLegacyPlcFamily.Slc500),
new AbLegacyDeviceOptions(SecondHost, AbLegacyPlcFamily.Slc500),
],
Tags =
[
new AbLegacyTagDefinition("Healthy", Host, "N7:0", AbLegacyDataType.Int),
new AbLegacyTagDefinition("Faulty", SecondHost, "N7:0", AbLegacyDataType.Int),
],
Probe = new AbLegacyProbeOptions { Enabled = false },
}, "drv-mix", factory);
await drv.InitializeAsync("{}", CancellationToken.None);
// Trip the faulty side.
for (var i = 0; i < 3; i++)
await drv.ReadAsync(["Faulty"], CancellationToken.None);
// Healthy host MUST keep returning Good even though the sibling is demoted.
var healthyResult = await drv.ReadAsync(["Healthy"], CancellationToken.None);
healthyResult[0].StatusCode.ShouldBe(AbLegacyStatusMapper.Good);
healthyResult[0].Value.ShouldBe(42);
// Reads against the faulty host short-circuit.
var faultyResult = await drv.ReadAsync(["Faulty"], CancellationToken.None);
faultyResult[0].StatusCode.ShouldBe(AbLegacyStatusMapper.BadCommunicationError);
drv.GetDeviceState(Host)!.DemotedUntilUtc.ShouldBeNull();
drv.GetDeviceState(SecondHost)!.DemotedUntilUtc.ShouldNotBeNull();
}
[Fact]
public async Task BadNodeIdUnknown_does_not_count_toward_demote_tally()
{
// -14 maps to BadNodeIdUnknown — terminal, not a comm failure.
var (drv, factory) = NewDriver();
factory.Customise = p => new FakeAbLegacyTag(p) { Status = -14 };
await drv.InitializeAsync("{}", CancellationToken.None);
for (var i = 0; i < 5; i++)
await drv.ReadAsync(["X"], CancellationToken.None);
var state = drv.GetDeviceState(Host).ShouldNotBeNull();
// Five terminal failures shouldn't trip the demote threshold — they're
// a config / decoder mismatch, not a sign of a flapping link.
state.DemotedUntilUtc.ShouldBeNull();
drv.DiagnosticTags.Snapshot(Host).DemoteCount.ShouldBe(0);
}
[Fact]
public void HostState_enum_has_Demoted_value()
{
// Belt-and-braces: the abstraction surface must carry the new value
// for downstream consumers (HostStatusPublisher, Admin UI, …) to
// see and route it.
Enum.IsDefined(typeof(HostState), HostState.Demoted).ShouldBeTrue();
((int)HostState.Demoted).ShouldBeGreaterThan((int)HostState.Faulted);
}
}