fix(review): remediate re-review findings — DCL-029/InboundAPI-031/SiteRuntime-032/StoreAndForward-028 + Low doc/test
Fixes the 8 findings from the 2026-06-24 re-review (commit c42bb485), with a
regression test per Medium finding:
- DataConnectionLayer-029 (Med): HandleAlarmSubscribeCompleted now mirrors the
tag-path re-check — if a feed is already stored for the source, release the
redundant just-created subscription instead of overwriting + leaking the first
one (the double-subscribe window DCL-023 reopened). +regression test.
- InboundAPI-031 (Med): remove WaitForAttribute's local 5s grace backstop (tighter
than the CommunicationService Ask's timeout+IntegrationTimeout round-trip budget,
so a slow-but-valid timed-out 'false' got cancelled into a 500). Link only the
client-abort + explicit caller tokens; the lower layer owns the backstop. +test.
- SiteRuntime-032 (Med): derive the deployed count from an authoritative set of
deployed config names (HashSet) instead of a map-presence-gated int, so deleting
a DISABLED instance decrements correctly (SiteRuntime-029's gate leaked it).
+deploy->disable->delete regression test.
- StoreAndForward-028 (Med): reset _bufferedCount in StopAsync alongside the
register-guard so a same-instance Stop->Start re-seeds from a clean base (no ~2N
gauge double-count). +restart regression test.
- AuditLog-017 (Low): test the OnIngestAsync scope-resolution guard (actor survives,
replies empty, counts the failure) — no longer unpinned.
- CentralUI-037 / ScriptAnalysis-009 / SiteRuntime-033 (Low): doc-comment + spec
fixes (Database-throws in the inbound sandbox; baseReferences param wording;
native-alarm cap return-to-normal + per-condition NativeAlarmDropped eviction).
Targeted suites green: SiteRuntime 5, StoreAndForward 6, InboundAPI 31,
DataConnectionLayer 10, AuditLog 5, ScriptAnalysis 40, CentralUI ScriptAnalysis 52.
This commit is contained in:
@@ -161,7 +161,9 @@ public class ScriptAnalysisService
|
||||
/// <see cref="SandboxMaxCallSharedDepth"/>. <c>CallScript</c> still throws
|
||||
/// because a shared script has no template siblings in this context.
|
||||
/// For the SandboxInboundScriptHost surface, every <c>Route</c> call throws
|
||||
/// because cross-site routing needs a deployed site.
|
||||
/// because cross-site routing needs a deployed site, and every <c>Database</c>
|
||||
/// call (<c>QuerySingleAsync</c>/<c>QueryAsync</c>/<c>ExecuteAsync</c>) throws
|
||||
/// because a Test Run has no configured central database connection.
|
||||
/// Console.Out / Console.Error are captured per-call via an AsyncLocal
|
||||
/// scope (see <see cref="SandboxConsoleCapture"/>) so writes from the script
|
||||
/// land in the result without mutating process-global Console state — two
|
||||
|
||||
@@ -1822,8 +1822,29 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
|
||||
if (msg.Success && msg.SubscriptionId != null)
|
||||
{
|
||||
_alarmSubscriptionIds[msg.SourceReference] = msg.SubscriptionId;
|
||||
_log.Info("[{0}] Alarm feed subscribed for source {1}", _connectionName, msg.SourceReference);
|
||||
// DataConnectionLayer-029: a concurrent unsubscribe clears the in-flight
|
||||
// marker (DCL-023), so a fresh subscribe for the same source can issue a
|
||||
// SECOND adapter feed before this completion fires — yielding two completions
|
||||
// for one source. Mirror the tag-path re-check (see HandleTagSubscribeCompleted,
|
||||
// the `_subscriptionIds.ContainsKey` guard): if a feed is already stored, THIS
|
||||
// completion is the redundant one — release its feed rather than overwriting
|
||||
// the stored id and leaking the already-tracked subscription.
|
||||
if (_alarmSubscriptionIds.ContainsKey(msg.SourceReference))
|
||||
{
|
||||
if (_adapter is IAlarmSubscribableConnection alarmable)
|
||||
{
|
||||
_log.Warning(
|
||||
"[{0}] Duplicate alarm feed for source {1}; releasing the redundant " +
|
||||
"subscription instead of overwriting the stored one.",
|
||||
_connectionName, msg.SourceReference);
|
||||
_ = alarmable.UnsubscribeAlarmsAsync(msg.SubscriptionId);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_alarmSubscriptionIds[msg.SourceReference] = msg.SubscriptionId;
|
||||
_log.Info("[{0}] Alarm feed subscribed for source {1}", _connectionName, msg.SourceReference);
|
||||
}
|
||||
}
|
||||
else if (!msg.Success)
|
||||
{
|
||||
|
||||
@@ -104,13 +104,6 @@ public class RouteHelper
|
||||
/// </summary>
|
||||
public class RouteTarget
|
||||
{
|
||||
// InboundAPI-029: a small grace past the wait timeout. The SITE enforces the wait
|
||||
// timeout and returns Matched=false when it elapses; the local backstop fires only
|
||||
// if the site fails to respond, so it must sit slightly LATER than the wait timeout
|
||||
// (it must not pre-empt the site's own timed-out response and turn a clean `false`
|
||||
// into a cancellation).
|
||||
private static readonly TimeSpan WaitResponseGrace = TimeSpan.FromSeconds(5);
|
||||
|
||||
private readonly string _instanceCode;
|
||||
private readonly IInstanceLocator _instanceLocator;
|
||||
private readonly IInstanceRouter _instanceRouter;
|
||||
@@ -261,11 +254,18 @@ public class RouteTarget
|
||||
TimeSpan timeout,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
// InboundAPI-029: bound the wait by the WAIT timeout (+ grace backstop), the
|
||||
// client-disconnect token, and an explicit caller token — NOT the method deadline.
|
||||
using var waitCts = new CancellationTokenSource(timeout + WaitResponseGrace);
|
||||
// InboundAPI-031: do NOT impose a local wait-timeout backstop here. The site
|
||||
// enforces the wait `timeout` and returns Matched=false when it elapses, and the
|
||||
// cluster Ask in CommunicationService.RouteToWaitForAttributeAsync already bounds
|
||||
// the round trip by `timeout + IntegrationTimeout` — the authoritative backstop
|
||||
// for a missing site response. A local CTS of `timeout + small grace` (the prior
|
||||
// InboundAPI-029 approach) was TIGHTER than that round-trip budget, so a
|
||||
// slow-but-valid timed-out response could be cancelled into an exception instead
|
||||
// of the spec-mandated `false`. Link ONLY the client-disconnect token and an
|
||||
// explicit caller token — NOT the method deadline — so a client abort still
|
||||
// cancels the wait while the wait timeout itself governs the duration.
|
||||
using var linked = CancellationTokenSource.CreateLinkedTokenSource(
|
||||
waitCts.Token, _requestAbortedToken, cancellationToken);
|
||||
_requestAbortedToken, cancellationToken);
|
||||
var token = linked.Token;
|
||||
var siteId = await ResolveSiteAsync(token);
|
||||
|
||||
|
||||
@@ -104,11 +104,13 @@ public static class ScriptTrustValidator
|
||||
var violations = new SortedSet<string>(StringComparer.Ordinal);
|
||||
|
||||
// ---- Pass 1: semantic symbol analysis (ported from SiteRuntime) ----
|
||||
// Use the full trusted-platform reference set (not the minimal
|
||||
// runtime-fidelity DefaultReferences) so EVERY type a script names
|
||||
// Resolve against the supplied baseReferences so EVERY type a script names
|
||||
// resolves and is judged by its true namespace — closing the
|
||||
// forbidden-type-in-allowed-namespace blind spot (e.g. a bare
|
||||
// System.Diagnostics.Process via `using System.Diagnostics;`).
|
||||
// System.Diagnostics.Process via `using System.Diagnostics;`). The public
|
||||
// entry point passes the full trusted-platform reference set; a caller on the
|
||||
// degraded/test path may instead pass the minimal anchor-enriched fallback
|
||||
// (BuildMinimalFallbackReferences()).
|
||||
var references = baseReferences.ToList();
|
||||
if (extraReferences != null)
|
||||
references.AddRange(extraReferences);
|
||||
|
||||
@@ -73,7 +73,14 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
/// Cleared in <see cref="HandleTerminated"/> alongside <see cref="_pendingRedeploys"/>.
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, IActorRef> _terminatingActorsByName = new();
|
||||
private int _totalDeployedCount;
|
||||
/// <summary>
|
||||
/// SiteRuntime-032: authoritative set of deployed instance config names (enabled
|
||||
/// AND disabled). The deployed/disabled health counts are derived from this set's
|
||||
/// size, so add-on-deploy / remove-on-delete keeps the count correct for every
|
||||
/// path — including deleting a DISABLED instance, which has a config row but is
|
||||
/// absent from both <see cref="_instanceActors"/> and <see cref="_terminatingActorsByName"/>.
|
||||
/// </summary>
|
||||
private readonly HashSet<string> _deployedInstanceNames = new();
|
||||
|
||||
/// <summary>Akka timer scheduler injected by the framework via <see cref="IWithTimers"/>.</summary>
|
||||
public ITimerScheduler Timers { get; set; } = null!;
|
||||
@@ -268,7 +275,9 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
|
||||
var enabledConfigs = msg.Configs.Where(c => c.IsEnabled).ToList();
|
||||
_totalDeployedCount = msg.Configs.Count;
|
||||
_deployedInstanceNames.Clear();
|
||||
foreach (var c in msg.Configs)
|
||||
_deployedInstanceNames.Add(c.InstanceUniqueName);
|
||||
_logger.LogInformation(
|
||||
"Loaded {Total} deployed configs ({Enabled} enabled) from SQLite",
|
||||
msg.Configs.Count, enabledConfigs.Count);
|
||||
@@ -436,7 +445,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
// Create the Instance Actor immediately
|
||||
CreateInstanceActor(instanceName, command.FlattenedConfigurationJson);
|
||||
if (!isRedeploy)
|
||||
_totalDeployedCount++;
|
||||
_deployedInstanceNames.Add(instanceName);
|
||||
UpdateInstanceCounts();
|
||||
|
||||
// Persist to SQLite and clear static overrides asynchronously
|
||||
@@ -510,7 +519,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
if (_instanceActors.Remove(result.InstanceName, out var orphan))
|
||||
Context.Stop(orphan);
|
||||
if (!result.IsRedeploy)
|
||||
_totalDeployedCount = Math.Max(0, _totalDeployedCount - 1);
|
||||
_deployedInstanceNames.Remove(result.InstanceName);
|
||||
UpdateInstanceCounts();
|
||||
|
||||
result.OriginalSender.Tell(new DeploymentStatusResponse(
|
||||
@@ -657,7 +666,6 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
// left intact — so when Terminated fires, HandleTerminated calls
|
||||
// ApplyDeployment(isRedeploy: true) and RESURRECTS the just-deleted instance,
|
||||
// with the counter now inconsistent. Cancel the pending redeploy first.
|
||||
var wasPresent = false;
|
||||
if (_terminatingActorsByName.TryGetValue(instanceName, out var terminatingRef))
|
||||
{
|
||||
// Drop the buffered command so HandleTerminated's _pendingRedeploys.Remove
|
||||
@@ -674,20 +682,20 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
_terminatingActorsByName.Remove(instanceName);
|
||||
// The terminating predecessor is already being stopped by HandleDeploy;
|
||||
// no Context.Stop needed here.
|
||||
wasPresent = true;
|
||||
}
|
||||
else if (_instanceActors.TryGetValue(instanceName, out var actor))
|
||||
{
|
||||
Context.Stop(actor);
|
||||
_instanceActors.Remove(instanceName);
|
||||
wasPresent = true;
|
||||
}
|
||||
|
||||
// SiteRuntime-029: only decrement when the instance was actually present
|
||||
// (live in _instanceActors OR mid-redeploy in _terminatingActorsByName).
|
||||
// A delete for a wholly-unknown instance must not drive the count negative.
|
||||
if (wasPresent)
|
||||
_totalDeployedCount = Math.Max(0, _totalDeployedCount - 1);
|
||||
// SiteRuntime-032: the deployed count is derived from the authoritative set of
|
||||
// deployed config names, so removing the name here decrements it. Correct for a
|
||||
// live, mid-redeploy, OR DISABLED instance (a disabled instance has a config row
|
||||
// but is absent from both in-memory maps); a delete for a never-deployed instance
|
||||
// removes nothing and leaves the count unchanged. Supersedes SiteRuntime-029's
|
||||
// map-presence gate, which leaked the count on disabled-instance deletes.
|
||||
_deployedInstanceNames.Remove(instanceName);
|
||||
UpdateInstanceCounts();
|
||||
|
||||
var sender = Sender;
|
||||
@@ -1547,14 +1555,14 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
||||
|
||||
/// <summary>
|
||||
/// Updates the health collector with current instance counts.
|
||||
/// Total deployed = _totalDeployedCount, enabled = running actors, disabled = difference.
|
||||
/// Total deployed = _deployedInstanceNames.Count, enabled = running actors, disabled = difference.
|
||||
/// </summary>
|
||||
private void UpdateInstanceCounts()
|
||||
{
|
||||
_healthCollector?.SetInstanceCounts(
|
||||
deployed: _totalDeployedCount,
|
||||
deployed: _deployedInstanceNames.Count,
|
||||
enabled: _instanceActors.Count,
|
||||
disabled: _totalDeployedCount - _instanceActors.Count);
|
||||
disabled: _deployedInstanceNames.Count - _instanceActors.Count);
|
||||
}
|
||||
|
||||
// ── Internal messages ──
|
||||
|
||||
@@ -456,6 +456,13 @@ public class StoreAndForwardService
|
||||
ScadaBridgeTelemetry.ClearQueueDepthProvider(provider);
|
||||
_queueDepthProvider = null;
|
||||
}
|
||||
// StoreAndForward-028: reset the cached depth alongside the registration guard.
|
||||
// StartAsync re-seeds _bufferedCount from the durable Pending count under this
|
||||
// same guard; without resetting here, a same-instance Stop->Start would ADD the
|
||||
// re-read count on top of the leftover in-memory value, double-counting the gauge
|
||||
// (~2N). Reset to zero so the restart seeds from a clean base. Buffered rows
|
||||
// remain durable in SQLite, so the re-seed restores the true count.
|
||||
Interlocked.Exchange(ref _bufferedCount, 0);
|
||||
Interlocked.Exchange(ref _queueDepthProviderRegistered, 0);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user