fix(scripted-alarms): resolve High code-review finding (Core.ScriptedAlarms-001)
_alarms was a plain Dictionary<string, AlarmState> mutated under the _evalGate semaphore, but four read paths (GetState, GetAllStates, the LoadedAlarmIds property, and RunShelvingCheck) touched it from arbitrary threads with no synchronisation. A Dictionary read concurrent with a writer's entry reassignment can throw InvalidOperationException or return torn state. Switched _alarms to ConcurrentDictionary<string, AlarmState>. The only write shapes are indexer-set and Clear, both atomic on ConcurrentDictionary, so all mutations stay correct without further change; reads now get safe snapshot semantics. LoadedAlarmIds materialises the key snapshot to keep its IReadOnlyCollection<string> return type. This matches _valueCache, which is already a ConcurrentDictionary. Added a regression test (Concurrent_reads_during_mutation_do_not_throw) that hammers the engine with state mutations while four reader threads continuously call the three unguarded read paths. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
| Review date | 2026-05-22 |
|
||||
| Commit reviewed | `76d35d1` |
|
||||
| Status | Reviewed |
|
||||
| Open findings | 12 |
|
||||
| Open findings | 11 |
|
||||
|
||||
## Checklist coverage
|
||||
|
||||
@@ -36,13 +36,13 @@ a category produced nothing rather than leaving it blank.
|
||||
| Severity | High |
|
||||
| Category | Concurrency & thread safety |
|
||||
| Location | `ScriptedAlarmEngine.cs:175`, `ScriptedAlarmEngine.cs:178`, `ScriptedAlarmEngine.cs:73`, `ScriptedAlarmEngine.cs:368` |
|
||||
| Status | Open |
|
||||
| Status | Resolved |
|
||||
|
||||
**Description:** `_alarms` is a plain `Dictionary<string, AlarmState>` (line 42). Every mutation of it (`LoadAsync`, `ApplyAsync`, `ReevaluateAsync`, `ShelvingCheckAsync`) correctly happens under the `_evalGate` semaphore, but four read paths touch it with no synchronisation: `GetState` (line 175), `GetAllStates` (line 178-179), the `LoadedAlarmIds` property (line 73), and `RunShelvingCheck` (line 368, `_alarms.Keys.ToArray()`). `RunShelvingCheck` fires from a `Timer` thread-pool callback and can run concurrently with an `ApplyAsync`/`ReevaluateAsync` that is reassigning a dictionary entry. `Dictionary` is not safe for concurrent read while another thread writes — even a value reassignment can be observed mid-rehash and throw `InvalidOperationException` or return torn state. `GetState`/`GetAllStates` are documented as being used by the Admin UI status page, so these reads come from arbitrary request threads.
|
||||
|
||||
**Recommendation:** Either switch `_alarms` to `ConcurrentDictionary<string, AlarmState>` (entry reassignment via `_alarms[id] = ...` is already the only write shape, which a `ConcurrentDictionary` supports atomically), or acquire `_evalGate` in every reader. A `ConcurrentDictionary` is the lighter change and matches `_valueCache`, which is already concurrent.
|
||||
|
||||
**Resolution:** _(open)_
|
||||
**Resolution:** Resolved 2026-05-22 — switched `_alarms` to `ConcurrentDictionary<string, AlarmState>` so the four unguarded read paths are safe against concurrent under-gate entry reassignment; added a concurrency regression test.
|
||||
|
||||
### Core.ScriptedAlarms-002
|
||||
|
||||
|
||||
@@ -39,7 +39,15 @@ public sealed class ScriptedAlarmEngine : IDisposable
|
||||
private readonly Func<DateTime> _clock;
|
||||
private readonly TimeSpan _scriptTimeout;
|
||||
|
||||
private readonly Dictionary<string, AlarmState> _alarms = new(StringComparer.Ordinal);
|
||||
// ConcurrentDictionary, not a plain Dictionary: every mutation happens under
|
||||
// _evalGate, but four read paths (GetState, GetAllStates, LoadedAlarmIds,
|
||||
// RunShelvingCheck) touch _alarms from arbitrary threads (Admin UI request
|
||||
// threads, the shelving Timer thread-pool callback) without holding the gate.
|
||||
// A plain Dictionary read concurrent with a writer's entry reassignment can
|
||||
// throw or return torn state; ConcurrentDictionary makes entry assignment and
|
||||
// snapshot enumeration safe. The only write shapes are indexer-set and Clear,
|
||||
// both of which ConcurrentDictionary supports atomically. (Core.ScriptedAlarms-001)
|
||||
private readonly ConcurrentDictionary<string, AlarmState> _alarms = new(StringComparer.Ordinal);
|
||||
private readonly ConcurrentDictionary<string, DataValueSnapshot> _valueCache
|
||||
= new(StringComparer.Ordinal);
|
||||
private readonly Dictionary<string, HashSet<string>> _alarmsReferencing
|
||||
@@ -70,7 +78,7 @@ public sealed class ScriptedAlarmEngine : IDisposable
|
||||
/// <summary>Raised for every emission the Part9StateMachine produces that the engine should publish.</summary>
|
||||
public event EventHandler<ScriptedAlarmEvent>? OnEvent;
|
||||
|
||||
public IReadOnlyCollection<string> LoadedAlarmIds => _alarms.Keys;
|
||||
public IReadOnlyCollection<string> LoadedAlarmIds => _alarms.Keys.ToArray();
|
||||
|
||||
/// <summary>
|
||||
/// Load a batch of alarm definitions. Compiles every predicate, aggregates any
|
||||
|
||||
@@ -303,6 +303,67 @@ public sealed class ScriptedAlarmEngineTests
|
||||
up.ActiveSubscriptionCount.ShouldBe(0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Concurrent_reads_during_mutation_do_not_throw(/* Core.ScriptedAlarms-001 */)
|
||||
{
|
||||
// Regression for Core.ScriptedAlarms-001: GetState / GetAllStates /
|
||||
// LoadedAlarmIds touch _alarms from arbitrary threads with no lock while
|
||||
// ApplyAsync / ReevaluateAsync reassign dictionary entries under _evalGate.
|
||||
// With a plain Dictionary this race throws InvalidOperationException or
|
||||
// returns torn state; with a ConcurrentDictionary the reads are safe.
|
||||
var up = new FakeUpstream();
|
||||
const int alarmCount = 40;
|
||||
var defs = new List<ScriptedAlarmDefinition>();
|
||||
for (var i = 0; i < alarmCount; i++)
|
||||
{
|
||||
up.Set($"Temp{i}", 50);
|
||||
defs.Add(Alarm($"A{i}", $$"""return (int)ctx.GetTag("Temp{{i}}").Value > 100;"""));
|
||||
}
|
||||
|
||||
using var eng = Build(up, out _);
|
||||
await eng.LoadAsync(defs, TestContext.Current.CancellationToken);
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
|
||||
Exception? readerFailure = null;
|
||||
|
||||
// Writer: hammer the engine with state mutations that reassign _alarms entries.
|
||||
var writer = Task.Run(async () =>
|
||||
{
|
||||
var round = 0;
|
||||
while (!cts.IsCancellationRequested)
|
||||
{
|
||||
var id = $"A{round % alarmCount}";
|
||||
await eng.AddCommentAsync(id, "tester", $"r{round}", CancellationToken.None);
|
||||
up.Push($"Temp{round % alarmCount}", round % 2 == 0 ? 150 : 50);
|
||||
round++;
|
||||
}
|
||||
});
|
||||
|
||||
// Readers: continuously touch the three unguarded read paths.
|
||||
var readers = Enumerable.Range(0, 4).Select(_reader => Task.Run(() =>
|
||||
{
|
||||
try
|
||||
{
|
||||
while (!cts.IsCancellationRequested)
|
||||
{
|
||||
_ = eng.LoadedAlarmIds.Count;
|
||||
_ = eng.GetAllStates().Count;
|
||||
for (var i = 0; i < alarmCount; i++)
|
||||
_ = eng.GetState($"A{i}");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
readerFailure = ex;
|
||||
}
|
||||
})).ToArray();
|
||||
|
||||
await Task.WhenAll([writer, .. readers]);
|
||||
|
||||
readerFailure.ShouldBeNull(
|
||||
"concurrent reads of _alarms while it is being mutated must not throw");
|
||||
}
|
||||
|
||||
private static async Task WaitForAsync(Func<bool> cond, int timeoutMs = 2000)
|
||||
{
|
||||
var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs);
|
||||
|
||||
Reference in New Issue
Block a user