chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)
Group all 69 projects into category subfolders under src/ and tests/ so the Rider Solution Explorer mirrors the module structure. Folders: Core, Server, Drivers (with a nested Driver CLIs subfolder), Client, Tooling. - Move every project folder on disk with git mv (history preserved as renames). - Recompute relative paths in 57 .csproj files: cross-category ProjectReferences, the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external mxaccessgw refs in Driver.Galaxy and its test project. - Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders. - Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL, integration, install). Build green (0 errors); unit tests pass. Docs left for a separate pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,157 @@
|
||||
using System.Collections.Concurrent;
|
||||
using Polly;
|
||||
using Polly.CircuitBreaker;
|
||||
using Polly.Retry;
|
||||
using Polly.Timeout;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Builds and caches Polly resilience pipelines keyed on
|
||||
/// <c>(DriverInstanceId, HostName, DriverCapability)</c>. One dead PLC behind a multi-device
|
||||
/// driver cannot open the circuit breaker for healthy sibling hosts.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Per <c>docs/v2/plan.md</c> decision #144 (per-device isolation). Composition from outside-in:
|
||||
/// <b>Timeout → Retry (when capability permits) → Circuit Breaker (when tier permits) → Bulkhead</b>.
|
||||
///
|
||||
/// <para>Pipeline resolution is lock-free on the hot path: the inner
|
||||
/// <see cref="ConcurrentDictionary{TKey,TValue}"/> caches a <see cref="ResiliencePipeline"/> per key;
|
||||
/// first-call cost is one <see cref="ResiliencePipelineBuilder"/>.Build. Thereafter reads are O(1).</para>
|
||||
/// </remarks>
|
||||
public sealed class DriverResiliencePipelineBuilder
|
||||
{
|
||||
private readonly ConcurrentDictionary<PipelineKey, ResiliencePipeline> _pipelines = new();
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly DriverResilienceStatusTracker? _statusTracker;
|
||||
|
||||
/// <summary>Construct with the ambient clock (use <see cref="TimeProvider.System"/> in prod).</summary>
|
||||
/// <param name="timeProvider">Clock source for pipeline timeouts + breaker sampling. Defaults to system.</param>
|
||||
/// <param name="statusTracker">When non-null, every built pipeline wires Polly telemetry into
|
||||
/// the tracker — retries increment <c>ConsecutiveFailures</c>, breaker-open stamps
|
||||
/// <c>LastBreakerOpenUtc</c>, breaker-close resets failures. Feeds Admin <c>/hosts</c> +
|
||||
/// the Polly bulkhead-depth column. Absent tracker means no telemetry (unit tests +
|
||||
/// deployments that don't care about resilience observability).</param>
|
||||
public DriverResiliencePipelineBuilder(
|
||||
TimeProvider? timeProvider = null,
|
||||
DriverResilienceStatusTracker? statusTracker = null)
|
||||
{
|
||||
_timeProvider = timeProvider ?? TimeProvider.System;
|
||||
_statusTracker = statusTracker;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get or build the pipeline for a given <c>(driver instance, host, capability)</c> triple.
|
||||
/// Calls with the same key + same options reuse the same pipeline instance; the first caller
|
||||
/// wins if a race occurs (both pipelines would be behaviourally identical).
|
||||
/// </summary>
|
||||
/// <param name="driverInstanceId">DriverInstance primary key — opaque to this layer.</param>
|
||||
/// <param name="hostName">
|
||||
/// Host the call targets. For single-host drivers (Galaxy, some OPC UA Client configs) pass the
|
||||
/// driver's canonical host string. For multi-host drivers (Modbus with N PLCs), pass the
|
||||
/// specific PLC so one dead PLC doesn't poison healthy siblings.
|
||||
/// </param>
|
||||
/// <param name="capability">Which capability surface is being called.</param>
|
||||
/// <param name="options">Per-driver-instance options (tier + per-capability overrides).</param>
|
||||
public ResiliencePipeline GetOrCreate(
|
||||
string driverInstanceId,
|
||||
string hostName,
|
||||
DriverCapability capability,
|
||||
DriverResilienceOptions options)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(options);
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(hostName);
|
||||
|
||||
var key = new PipelineKey(driverInstanceId, hostName, capability);
|
||||
return _pipelines.GetOrAdd(key, static (k, state) => Build(
|
||||
k.DriverInstanceId, k.HostName, state.capability, state.options, state.timeProvider, state.tracker),
|
||||
(capability, options, timeProvider: _timeProvider, tracker: _statusTracker));
|
||||
}
|
||||
|
||||
/// <summary>Drop cached pipelines for one driver instance (e.g. on ResilienceConfig change). Test + Admin-reload use.</summary>
|
||||
public int Invalidate(string driverInstanceId)
|
||||
{
|
||||
var removed = 0;
|
||||
foreach (var key in _pipelines.Keys)
|
||||
{
|
||||
if (key.DriverInstanceId == driverInstanceId && _pipelines.TryRemove(key, out _))
|
||||
removed++;
|
||||
}
|
||||
return removed;
|
||||
}
|
||||
|
||||
/// <summary>Snapshot of the current number of cached pipelines. For diagnostics only.</summary>
|
||||
public int CachedPipelineCount => _pipelines.Count;
|
||||
|
||||
private static ResiliencePipeline Build(
|
||||
string driverInstanceId,
|
||||
string hostName,
|
||||
DriverCapability capability,
|
||||
DriverResilienceOptions options,
|
||||
TimeProvider timeProvider,
|
||||
DriverResilienceStatusTracker? tracker)
|
||||
{
|
||||
var policy = options.Resolve(capability);
|
||||
var builder = new ResiliencePipelineBuilder { TimeProvider = timeProvider };
|
||||
|
||||
builder.AddTimeout(new TimeoutStrategyOptions
|
||||
{
|
||||
Timeout = TimeSpan.FromSeconds(policy.TimeoutSeconds),
|
||||
});
|
||||
|
||||
if (policy.RetryCount > 0)
|
||||
{
|
||||
var retryOptions = new RetryStrategyOptions
|
||||
{
|
||||
MaxRetryAttempts = policy.RetryCount,
|
||||
BackoffType = DelayBackoffType.Exponential,
|
||||
UseJitter = true,
|
||||
Delay = TimeSpan.FromMilliseconds(100),
|
||||
MaxDelay = TimeSpan.FromSeconds(5),
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex => ex is not OperationCanceledException),
|
||||
};
|
||||
if (tracker is not null)
|
||||
{
|
||||
retryOptions.OnRetry = args =>
|
||||
{
|
||||
tracker.RecordFailure(driverInstanceId, hostName, timeProvider.GetUtcNow().UtcDateTime);
|
||||
return default;
|
||||
};
|
||||
}
|
||||
builder.AddRetry(retryOptions);
|
||||
}
|
||||
|
||||
if (policy.BreakerFailureThreshold > 0)
|
||||
{
|
||||
var breakerOptions = new CircuitBreakerStrategyOptions
|
||||
{
|
||||
FailureRatio = 1.0,
|
||||
MinimumThroughput = policy.BreakerFailureThreshold,
|
||||
SamplingDuration = TimeSpan.FromSeconds(30),
|
||||
BreakDuration = TimeSpan.FromSeconds(15),
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex => ex is not OperationCanceledException),
|
||||
};
|
||||
if (tracker is not null)
|
||||
{
|
||||
breakerOptions.OnOpened = args =>
|
||||
{
|
||||
tracker.RecordBreakerOpen(driverInstanceId, hostName, timeProvider.GetUtcNow().UtcDateTime);
|
||||
return default;
|
||||
};
|
||||
breakerOptions.OnClosed = args =>
|
||||
{
|
||||
// Closing the breaker means the target recovered — reset the consecutive-
|
||||
// failure counter so Admin UI stops flashing red for this host.
|
||||
tracker.RecordSuccess(driverInstanceId, hostName, timeProvider.GetUtcNow().UtcDateTime);
|
||||
return default;
|
||||
};
|
||||
}
|
||||
builder.AddCircuitBreaker(breakerOptions);
|
||||
}
|
||||
|
||||
return builder.Build();
|
||||
}
|
||||
|
||||
private readonly record struct PipelineKey(string DriverInstanceId, string HostName, DriverCapability Capability);
|
||||
}
|
||||
Reference in New Issue
Block a user