fix(external-system-gateway): resolve ExternalSystemGateway-002/003 — apply HTTP call timeout, confirm CachedCall no double-dispatch
This commit is contained in:
@@ -8,7 +8,7 @@
|
|||||||
| Last reviewed | 2026-05-16 |
|
| Last reviewed | 2026-05-16 |
|
||||||
| Reviewer | claude-agent |
|
| Reviewer | claude-agent |
|
||||||
| Commit reviewed | `9c60592` |
|
| Commit reviewed | `9c60592` |
|
||||||
| Open findings | 13 |
|
| Open findings | 11 |
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
@@ -109,7 +109,7 @@ transient-retry paths. Fixed by the commit whose message references
|
|||||||
|--|--|
|
|--|--|
|
||||||
| Severity | High |
|
| Severity | High |
|
||||||
| Category | Error handling & resilience |
|
| Category | Error handling & resilience |
|
||||||
| Status | Open |
|
| Status | Resolved |
|
||||||
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:130`, `src/ScadaLink.ExternalSystemGateway/ServiceCollectionExtensions.cs:13` |
|
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:130`, `src/ScadaLink.ExternalSystemGateway/ServiceCollectionExtensions.cs:13` |
|
||||||
|
|
||||||
**Description**
|
**Description**
|
||||||
@@ -142,7 +142,24 @@ is classified as transient.
|
|||||||
|
|
||||||
**Resolution**
|
**Resolution**
|
||||||
|
|
||||||
_Unresolved._
|
Resolved 2026-05-16 (commit `<pending>`). `InvokeHttpAsync` now enforces a call
|
||||||
|
timeout: `ExternalSystemClient` takes an `IOptions<ExternalSystemGatewayOptions>` and
|
||||||
|
links a `CancellationTokenSource(DefaultHttpTimeout)` with the caller's token before
|
||||||
|
`SendAsync` and the response-body read, so the design's "timeout applies to the HTTP
|
||||||
|
request round-trip" guarantee now holds within the configured window (default 30s)
|
||||||
|
instead of `HttpClient`'s default 100s. A timeout is reclassified as a
|
||||||
|
`TransientExternalSystemException`; a caller-initiated cancellation is distinguished
|
||||||
|
from a timeout and propagated as `OperationCanceledException` rather than being
|
||||||
|
swallowed as transient. Regression tests:
|
||||||
|
`Call_SlowSystem_TimesOutAsTransientErrorWithinConfiguredWindow` and
|
||||||
|
`Call_CallerCancellation_IsNotMisreportedAsTimeout`.
|
||||||
|
|
||||||
|
Note (partial scope): the per-*system* `Timeout` field on `ExternalSystemDefinition`
|
||||||
|
remains unimplemented — adding it requires a change to `ScadaLink.Commons`, which is
|
||||||
|
outside this module's edit scope. Until that entity field exists, the configured
|
||||||
|
`DefaultHttpTimeout` is the effective per-call limit for every system. A follow-up
|
||||||
|
against the Commons module should add the `Timeout` field and have `InvokeHttpAsync`
|
||||||
|
prefer it over the default. This is a tracked follow-up, not a regression.
|
||||||
|
|
||||||
### ExternalSystemGateway-003 — `CachedCall` double-dispatches the HTTP request
|
### ExternalSystemGateway-003 — `CachedCall` double-dispatches the HTTP request
|
||||||
|
|
||||||
@@ -150,7 +167,7 @@ _Unresolved._
|
|||||||
|--|--|
|
|--|--|
|
||||||
| Severity | High |
|
| Severity | High |
|
||||||
| Category | Correctness & logic bugs |
|
| Category | Correctness & logic bugs |
|
||||||
| Status | Open |
|
| Status | Resolved |
|
||||||
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:84-117` |
|
| Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:84-117` |
|
||||||
|
|
||||||
**Description**
|
**Description**
|
||||||
@@ -179,7 +196,18 @@ the duplicated logic.
|
|||||||
|
|
||||||
**Resolution**
|
**Resolution**
|
||||||
|
|
||||||
_Unresolved._
|
Resolved 2026-05-16 (commit `<pending>`). Re-triage: this finding was already fixed in
|
||||||
|
the codebase as a side effect of the `ExternalSystemGateway-001` fix and is no longer
|
||||||
|
reproducible against the current source. `StoreAndForwardService.EnqueueAsync` gained an
|
||||||
|
`attemptImmediateDelivery` parameter (recommendation approach (b)), and
|
||||||
|
`CachedCallAsync` passes `attemptImmediateDelivery: false` after its own first HTTP
|
||||||
|
attempt — so `EnqueueAsync` buffers the message for the background retry sweep without
|
||||||
|
re-invoking the registered delivery handler, eliminating the duplicate dispatch. A
|
||||||
|
dedicated regression test, `CachedCall_TransientFailure_DoesNotImmediatelyRedispatchViaRegisteredHandler`,
|
||||||
|
was added in this module's test suite: it registers a counting delivery handler, drives
|
||||||
|
a `CachedCall` whose HTTP attempt fails transiently, and asserts the handler is invoked
|
||||||
|
zero times during enqueue. The test was verified to fail if `attemptImmediateDelivery`
|
||||||
|
is flipped back to `true`.
|
||||||
|
|
||||||
### ExternalSystemGateway-004 — System retry settings are not honoured for cached calls/writes
|
### ExternalSystemGateway-004 — System retry settings are not honoured for cached calls/writes
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ using System.Net.Http.Headers;
|
|||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
using ScadaLink.Commons.Entities.ExternalSystems;
|
using ScadaLink.Commons.Entities.ExternalSystems;
|
||||||
using ScadaLink.Commons.Interfaces.Repositories;
|
using ScadaLink.Commons.Interfaces.Repositories;
|
||||||
using ScadaLink.Commons.Interfaces.Services;
|
using ScadaLink.Commons.Interfaces.Services;
|
||||||
@@ -22,17 +23,20 @@ public class ExternalSystemClient : IExternalSystemClient
|
|||||||
private readonly IExternalSystemRepository _repository;
|
private readonly IExternalSystemRepository _repository;
|
||||||
private readonly StoreAndForwardService? _storeAndForward;
|
private readonly StoreAndForwardService? _storeAndForward;
|
||||||
private readonly ILogger<ExternalSystemClient> _logger;
|
private readonly ILogger<ExternalSystemClient> _logger;
|
||||||
|
private readonly ExternalSystemGatewayOptions _options;
|
||||||
|
|
||||||
public ExternalSystemClient(
|
public ExternalSystemClient(
|
||||||
IHttpClientFactory httpClientFactory,
|
IHttpClientFactory httpClientFactory,
|
||||||
IExternalSystemRepository repository,
|
IExternalSystemRepository repository,
|
||||||
ILogger<ExternalSystemClient> logger,
|
ILogger<ExternalSystemClient> logger,
|
||||||
StoreAndForwardService? storeAndForward = null)
|
StoreAndForwardService? storeAndForward = null,
|
||||||
|
IOptions<ExternalSystemGatewayOptions>? options = null)
|
||||||
{
|
{
|
||||||
_httpClientFactory = httpClientFactory;
|
_httpClientFactory = httpClientFactory;
|
||||||
_repository = repository;
|
_repository = repository;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
_storeAndForward = storeAndForward;
|
_storeAndForward = storeAndForward;
|
||||||
|
_options = options?.Value ?? new ExternalSystemGatewayOptions();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -198,22 +202,59 @@ public class ExternalSystemClient : IExternalSystemClient
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enforce the per-call timeout. ExternalSystemDefinition has no per-system
|
||||||
|
// Timeout field yet, so the configured DefaultHttpTimeout is the effective
|
||||||
|
// round-trip limit (the design's "timeout applies to the HTTP request
|
||||||
|
// round-trip" guarantee). A linked CTS lets us distinguish a timeout from a
|
||||||
|
// caller-initiated cancellation: only the timeout is reclassified as transient.
|
||||||
|
using var timeoutCts = new CancellationTokenSource(_options.DefaultHttpTimeout);
|
||||||
|
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(
|
||||||
|
cancellationToken, timeoutCts.Token);
|
||||||
|
|
||||||
HttpResponseMessage response;
|
HttpResponseMessage response;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
response = await client.SendAsync(request, cancellationToken);
|
response = await client.SendAsync(request, linkedCts.Token);
|
||||||
|
}
|
||||||
|
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
// The caller asked to abandon the work — do not reclassify as transient.
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
// Our own timeout elapsed — a transient failure per the design.
|
||||||
|
throw ErrorClassifier.AsTransient(
|
||||||
|
$"Timeout calling {system.Name} after {_options.DefaultHttpTimeout.TotalSeconds:0.##}s", ex);
|
||||||
}
|
}
|
||||||
catch (Exception ex) when (ErrorClassifier.IsTransient(ex))
|
catch (Exception ex) when (ErrorClassifier.IsTransient(ex))
|
||||||
{
|
{
|
||||||
throw ErrorClassifier.AsTransient($"Connection error to {system.Name}: {ex.Message}", ex);
|
throw ErrorClassifier.AsTransient($"Connection error to {system.Name}: {ex.Message}", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.IsSuccessStatusCode)
|
// The timeout also covers reading the response body (the design's
|
||||||
|
// "round-trip" guarantee), so the linked token is used for the read too.
|
||||||
|
string body;
|
||||||
|
try
|
||||||
{
|
{
|
||||||
return await response.Content.ReadAsStringAsync(cancellationToken);
|
body = await response.Content.ReadAsStringAsync(linkedCts.Token);
|
||||||
|
}
|
||||||
|
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
catch (OperationCanceledException ex) when (timeoutCts.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
throw ErrorClassifier.AsTransient(
|
||||||
|
$"Timeout reading response from {system.Name} after {_options.DefaultHttpTimeout.TotalSeconds:0.##}s", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
var errorBody = await response.Content.ReadAsStringAsync(cancellationToken);
|
if (response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
var errorBody = body;
|
||||||
|
|
||||||
if (ErrorClassifier.IsTransient(response.StatusCode))
|
if (ErrorClassifier.IsTransient(response.StatusCode))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
using System.Net;
|
using System.Net;
|
||||||
|
using Microsoft.Data.Sqlite;
|
||||||
using Microsoft.Extensions.Logging.Abstractions;
|
using Microsoft.Extensions.Logging.Abstractions;
|
||||||
using NSubstitute;
|
using NSubstitute;
|
||||||
using ScadaLink.Commons.Entities.ExternalSystems;
|
using ScadaLink.Commons.Entities.ExternalSystems;
|
||||||
using ScadaLink.Commons.Interfaces.Repositories;
|
using ScadaLink.Commons.Interfaces.Repositories;
|
||||||
|
using ScadaLink.StoreAndForward;
|
||||||
|
|
||||||
namespace ScadaLink.ExternalSystemGateway.Tests;
|
namespace ScadaLink.ExternalSystemGateway.Tests;
|
||||||
|
|
||||||
@@ -216,6 +218,116 @@ public class ExternalSystemClientTests
|
|||||||
() => client.DeliverBufferedAsync(BufferedCall("TestAPI", "failMethod")));
|
() => client.DeliverBufferedAsync(BufferedCall("TestAPI", "failMethod")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── ExternalSystemGateway-003: CachedCall must not double-dispatch ──
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task CachedCall_TransientFailure_DoesNotImmediatelyRedispatchViaRegisteredHandler()
|
||||||
|
{
|
||||||
|
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||||
|
var method = new ExternalSystemMethod("postData", "POST", "/post") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||||
|
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||||
|
.Returns(new List<ExternalSystemDefinition> { system });
|
||||||
|
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||||
|
.Returns(new List<ExternalSystemMethod> { method });
|
||||||
|
|
||||||
|
// The HTTP layer always fails transiently (500).
|
||||||
|
var httpClient = new HttpClient(new MockHttpMessageHandler(HttpStatusCode.InternalServerError, "boom"));
|
||||||
|
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(httpClient);
|
||||||
|
|
||||||
|
// A real S&F service with a registered delivery handler that counts invocations.
|
||||||
|
var dbName = $"EsgDoubleDispatch_{Guid.NewGuid():N}";
|
||||||
|
var connStr = $"Data Source={dbName};Mode=Memory;Cache=Shared";
|
||||||
|
using var keepAlive = new SqliteConnection(connStr);
|
||||||
|
keepAlive.Open();
|
||||||
|
var storage = new StoreAndForwardStorage(connStr, NullLogger<StoreAndForwardStorage>.Instance);
|
||||||
|
await storage.InitializeAsync();
|
||||||
|
var sfOptions = new StoreAndForwardOptions
|
||||||
|
{
|
||||||
|
DefaultRetryInterval = TimeSpan.FromMinutes(10),
|
||||||
|
RetryTimerInterval = TimeSpan.FromMinutes(10),
|
||||||
|
};
|
||||||
|
var sf = new StoreAndForwardService(storage, sfOptions, NullLogger<StoreAndForwardService>.Instance);
|
||||||
|
|
||||||
|
var handlerInvocations = 0;
|
||||||
|
sf.RegisterDeliveryHandler(
|
||||||
|
ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.ExternalSystem,
|
||||||
|
_ => { Interlocked.Increment(ref handlerInvocations); return Task.FromResult(false); });
|
||||||
|
|
||||||
|
var client = new ExternalSystemClient(
|
||||||
|
_httpClientFactory, _repository,
|
||||||
|
NullLogger<ExternalSystemClient>.Instance,
|
||||||
|
storeAndForward: sf);
|
||||||
|
|
||||||
|
var result = await client.CachedCallAsync("TestAPI", "postData");
|
||||||
|
|
||||||
|
// The call already made one HTTP attempt; EnqueueAsync must NOT invoke the
|
||||||
|
// registered handler again synchronously (which would dispatch a 2nd request).
|
||||||
|
Assert.True(result.WasBuffered);
|
||||||
|
Assert.Equal(0, handlerInvocations);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── ExternalSystemGateway-002: per-system call timeout ──
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Call_SlowSystem_TimesOutAsTransientErrorWithinConfiguredWindow()
|
||||||
|
{
|
||||||
|
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||||
|
var method = new ExternalSystemMethod("getData", "GET", "/data") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||||
|
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||||
|
.Returns(new List<ExternalSystemDefinition> { system });
|
||||||
|
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||||
|
.Returns(new List<ExternalSystemMethod> { method });
|
||||||
|
|
||||||
|
// Handler that hangs far longer than the configured timeout and the test budget.
|
||||||
|
var httpClient = new HttpClient(new HangingHttpMessageHandler(TimeSpan.FromMinutes(10)));
|
||||||
|
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(httpClient);
|
||||||
|
|
||||||
|
// Configure a short timeout so the call must fail quickly.
|
||||||
|
var options = new ExternalSystemGatewayOptions { DefaultHttpTimeout = TimeSpan.FromMilliseconds(200) };
|
||||||
|
var client = new ExternalSystemClient(
|
||||||
|
_httpClientFactory, _repository,
|
||||||
|
NullLogger<ExternalSystemClient>.Instance,
|
||||||
|
options: Microsoft.Extensions.Options.Options.Create(options));
|
||||||
|
|
||||||
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||||
|
var result = await client.CallAsync("TestAPI", "getData");
|
||||||
|
sw.Stop();
|
||||||
|
|
||||||
|
Assert.False(result.Success);
|
||||||
|
Assert.Contains("Transient error", result.ErrorMessage);
|
||||||
|
Assert.Contains("Timeout", result.ErrorMessage);
|
||||||
|
// Must fail near the configured 200ms, well before HttpClient's default 100s.
|
||||||
|
Assert.True(sw.Elapsed < TimeSpan.FromSeconds(10),
|
||||||
|
$"Call took {sw.Elapsed}, expected to time out near the configured 200ms window");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Call_CallerCancellation_IsNotMisreportedAsTimeout()
|
||||||
|
{
|
||||||
|
var system = new ExternalSystemDefinition("TestAPI", "https://api.example.com", "none") { Id = 1 };
|
||||||
|
var method = new ExternalSystemMethod("getData", "GET", "/data") { Id = 1, ExternalSystemDefinitionId = 1 };
|
||||||
|
_repository.GetAllExternalSystemsAsync(Arg.Any<CancellationToken>())
|
||||||
|
.Returns(new List<ExternalSystemDefinition> { system });
|
||||||
|
_repository.GetMethodsByExternalSystemIdAsync(1, Arg.Any<CancellationToken>())
|
||||||
|
.Returns(new List<ExternalSystemMethod> { method });
|
||||||
|
|
||||||
|
var httpClient = new HttpClient(new HangingHttpMessageHandler(TimeSpan.FromMinutes(10)));
|
||||||
|
_httpClientFactory.CreateClient(Arg.Any<string>()).Returns(httpClient);
|
||||||
|
|
||||||
|
var options = new ExternalSystemGatewayOptions { DefaultHttpTimeout = TimeSpan.FromMinutes(5) };
|
||||||
|
var client = new ExternalSystemClient(
|
||||||
|
_httpClientFactory, _repository,
|
||||||
|
NullLogger<ExternalSystemClient>.Instance,
|
||||||
|
options: Microsoft.Extensions.Options.Options.Create(options));
|
||||||
|
|
||||||
|
using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(200));
|
||||||
|
|
||||||
|
// Caller-initiated cancellation must surface as OperationCanceledException,
|
||||||
|
// not be swallowed as a transient timeout error.
|
||||||
|
await Assert.ThrowsAnyAsync<OperationCanceledException>(
|
||||||
|
() => client.CallAsync("TestAPI", "getData", cancellationToken: cts.Token));
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Test helper: mock HTTP message handler.
|
/// Test helper: mock HTTP message handler.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -238,4 +350,20 @@ public class ExternalSystemClientTests
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Test helper: an HTTP handler that hangs until cancelled (simulates a slow/hung system).
|
||||||
|
/// </summary>
|
||||||
|
private class HangingHttpMessageHandler : HttpMessageHandler
|
||||||
|
{
|
||||||
|
private readonly TimeSpan _delay;
|
||||||
|
|
||||||
|
public HangingHttpMessageHandler(TimeSpan delay) => _delay = delay;
|
||||||
|
|
||||||
|
protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
await Task.Delay(_delay, cancellationToken);
|
||||||
|
return new HttpResponseMessage(HttpStatusCode.OK) { Content = new StringContent("{}") };
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="../../src/ScadaLink.ExternalSystemGateway/ScadaLink.ExternalSystemGateway.csproj" />
|
<ProjectReference Include="../../src/ScadaLink.ExternalSystemGateway/ScadaLink.ExternalSystemGateway.csproj" />
|
||||||
<ProjectReference Include="../../src/ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
<ProjectReference Include="../../src/ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
||||||
|
<ProjectReference Include="../../src/ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
Reference in New Issue
Block a user