diff --git a/src/ZB.MOM.WW.OtOpcUa.Core.Scripting/CompiledScriptCache.cs b/src/ZB.MOM.WW.OtOpcUa.Core.Scripting/CompiledScriptCache.cs new file mode 100644 index 0000000..4b59278 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core.Scripting/CompiledScriptCache.cs @@ -0,0 +1,83 @@ +using System.Collections.Concurrent; +using System.Security.Cryptography; +using System.Text; + +namespace ZB.MOM.WW.OtOpcUa.Core.Scripting; + +/// +/// Source-hash-keyed compile cache for user scripts. Roslyn compilation is the most +/// expensive step in the evaluator pipeline (5-20ms per script depending on size); +/// re-compiling on every value-change event would starve the virtual-tag engine. +/// The cache is generic on the subclass + result type so +/// different engines (virtual-tag / alarm-predicate / future alarm-action) each get +/// their own cache instance — there's no cross-type pollution. +/// +/// +/// +/// Concurrent-safe: of +/// means a miss on two threads compiles exactly once. +/// guarantees other +/// threads block on the in-flight compile rather than racing to duplicate work. +/// +/// +/// Cache is keyed on SHA-256 of the UTF-8 bytes of the source — collision-free in +/// practice. Whitespace changes therefore miss the cache on purpose; operators +/// see re-compile time on their first evaluation after a format-only edit which +/// is rare and benign. +/// +/// +/// No capacity bound. Virtual-tag + alarm scripts are operator-authored and +/// bounded by config DB (typically low thousands). If that changes in v3, add an +/// LRU eviction policy — the API stays the same. +/// +/// +public sealed class CompiledScriptCache + where TContext : ScriptContext +{ + private readonly ConcurrentDictionary>> _cache = new(); + + /// + /// Return the compiled evaluator for , compiling + /// on first sight + reusing thereafter. If the source fails to compile, the + /// original Roslyn / sandbox exception propagates; the cache entry is removed so + /// the next call retries (useful during Admin UI authoring when the operator is + /// still fixing syntax). + /// + public ScriptEvaluator GetOrCompile(string scriptSource) + { + if (scriptSource is null) throw new ArgumentNullException(nameof(scriptSource)); + + var key = HashSource(scriptSource); + var lazy = _cache.GetOrAdd(key, _ => new Lazy>( + () => ScriptEvaluator.Compile(scriptSource), + LazyThreadSafetyMode.ExecutionAndPublication)); + + try + { + return lazy.Value; + } + catch + { + // Failed compile — evict so a retry with corrected source can succeed. + _cache.TryRemove(key, out _); + throw; + } + } + + /// Current entry count. Exposed for Admin UI diagnostics / tests. + public int Count => _cache.Count; + + /// Drop every cached compile. Used on config generation publish + tests. + public void Clear() => _cache.Clear(); + + /// True when the exact source has been compiled at least once + is still cached. + public bool Contains(string scriptSource) + => _cache.ContainsKey(HashSource(scriptSource)); + + private static string HashSource(string source) + { + var bytes = Encoding.UTF8.GetBytes(source); + var hash = SHA256.HashData(bytes); + return Convert.ToHexString(hash); + } +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Core.Scripting/TimedScriptEvaluator.cs b/src/ZB.MOM.WW.OtOpcUa.Core.Scripting/TimedScriptEvaluator.cs new file mode 100644 index 0000000..237435f --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core.Scripting/TimedScriptEvaluator.cs @@ -0,0 +1,102 @@ +namespace ZB.MOM.WW.OtOpcUa.Core.Scripting; + +/// +/// Wraps a with a per-evaluation +/// wall-clock timeout. Default is 250ms per Phase 7 plan Stream A.4; configurable +/// per tag so deployments with slower backends can widen it. +/// +/// +/// +/// Implemented with +/// rather than a cancellation-token-only approach because Roslyn-compiled +/// scripts don't internally poll the cancellation token unless the user code +/// does async work. A CPU-bound infinite loop in a script won't honor a +/// cooperative cancel — WaitAsync returns control when the timeout fires +/// regardless of whether the inner task completes. +/// +/// +/// Known limitation: when a script times out, the underlying ScriptRunner +/// task continues running on a thread-pool thread until the Roslyn runtime +/// returns. In the CPU-bound-infinite-loop case that's effectively "leaked" — +/// the thread is tied up until the runtime decides to return, which it may +/// never do. Phase 7 plan Stream A.4 accepts this as a known trade-off; tighter +/// CPU budgeting would require an out-of-process script runner, which is a v3 +/// concern. In practice, the timeout + structured warning log surfaces the +/// offending script so the operator can fix it; the orphan thread is rare. +/// +/// +/// Caller-supplied is honored — if the caller +/// cancels before the timeout fires, the caller's cancel wins and the +/// propagates (not wrapped as +/// ). That distinction matters: the +/// virtual-tag engine's shutdown path cancels scripts on dispose; it shouldn't +/// see those as timeouts. +/// +/// +public sealed class TimedScriptEvaluator + where TContext : ScriptContext +{ + /// Default timeout per Phase 7 plan Stream A.4 — 250ms. + public static readonly TimeSpan DefaultTimeout = TimeSpan.FromMilliseconds(250); + + private readonly ScriptEvaluator _inner; + + /// Wall-clock budget per evaluation. Script exceeding this throws . + public TimeSpan Timeout { get; } + + public TimedScriptEvaluator(ScriptEvaluator inner) + : this(inner, DefaultTimeout) + { + } + + public TimedScriptEvaluator(ScriptEvaluator inner, TimeSpan timeout) + { + _inner = inner ?? throw new ArgumentNullException(nameof(inner)); + if (timeout <= TimeSpan.Zero) + throw new ArgumentOutOfRangeException(nameof(timeout), "Timeout must be positive."); + Timeout = timeout; + } + + public async Task RunAsync(TContext context, CancellationToken ct = default) + { + if (context is null) throw new ArgumentNullException(nameof(context)); + + // Push evaluation to a thread-pool thread so a CPU-bound script (e.g. a tight + // loop with no async work) doesn't hog the caller's thread before WaitAsync + // gets to register its timeout. Without this, Roslyn's ScriptRunner executes + // synchronously on the calling thread and returns an already-completed Task, + // so WaitAsync sees a completed task and never fires the timeout. + var runTask = Task.Run(() => _inner.RunAsync(context, ct), ct); + try + { + return await runTask.WaitAsync(Timeout, ct).ConfigureAwait(false); + } + catch (TimeoutException) + { + // WaitAsync's synthesized timeout — the inner task may still be running + // on its thread-pool thread (known leak documented in the class summary). + // Wrap so callers can distinguish from user-written timeout logic. + throw new ScriptTimeoutException(Timeout); + } + } +} + +/// +/// Thrown when a script evaluation exceeds its configured timeout. The virtual-tag +/// engine (Stream B) catches this + maps the owning tag's quality to +/// BadInternalError per Phase 7 plan decision #11, logging a structured +/// warning with the offending script name so operators can locate + fix it. +/// +public sealed class ScriptTimeoutException : Exception +{ + public TimeSpan Timeout { get; } + + public ScriptTimeoutException(TimeSpan timeout) + : base($"Script evaluation exceeded the configured timeout of {timeout.TotalMilliseconds:F1} ms. " + + "The script was either CPU-bound or blocked on a slow operation; check ctx.Logger output " + + "around the timeout and consider widening the timeout per tag, simplifying the script, or " + + "moving heavy work out of the evaluation path.") + { + Timeout = timeout; + } +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/CompiledScriptCacheTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/CompiledScriptCacheTests.cs new file mode 100644 index 0000000..3fa9ce7 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/CompiledScriptCacheTests.cs @@ -0,0 +1,151 @@ +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Scripting; + +namespace ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests; + +/// +/// Exercises the source-hash keyed compile cache. Roslyn compilation is the most +/// expensive step in the evaluator pipeline; this cache collapses redundant +/// compiles of unchanged scripts to zero-cost lookups + makes sure concurrent +/// callers never double-compile. +/// +[Trait("Category", "Unit")] +public sealed class CompiledScriptCacheTests +{ + private sealed class CompileCountingGate + { + public int Count; + } + + [Fact] + public void First_call_compiles_and_caches() + { + var cache = new CompiledScriptCache(); + cache.Count.ShouldBe(0); + + var e = cache.GetOrCompile("""return 42;"""); + e.ShouldNotBeNull(); + cache.Count.ShouldBe(1); + cache.Contains("""return 42;""").ShouldBeTrue(); + } + + [Fact] + public void Identical_source_returns_the_same_compiled_evaluator() + { + var cache = new CompiledScriptCache(); + var first = cache.GetOrCompile("""return 1;"""); + var second = cache.GetOrCompile("""return 1;"""); + ReferenceEquals(first, second).ShouldBeTrue(); + cache.Count.ShouldBe(1); + } + + [Fact] + public void Different_source_produces_different_evaluator() + { + var cache = new CompiledScriptCache(); + var a = cache.GetOrCompile("""return 1;"""); + var b = cache.GetOrCompile("""return 2;"""); + ReferenceEquals(a, b).ShouldBeFalse(); + cache.Count.ShouldBe(2); + } + + [Fact] + public void Whitespace_difference_misses_cache() + { + // Documented behavior: reformatting a script recompiles. Simpler + cheaper + // than the alternative (AST-canonicalize then hash) and doesn't happen often. + var cache = new CompiledScriptCache(); + cache.GetOrCompile("""return 1;"""); + cache.GetOrCompile("return 1; "); // trailing whitespace — different hash + cache.Count.ShouldBe(2); + } + + [Fact] + public async Task Cached_evaluator_still_runs_correctly() + { + var cache = new CompiledScriptCache(); + var e = cache.GetOrCompile("""return (double)ctx.GetTag("In").Value * 3.0;"""); + var ctx = new FakeScriptContext().Seed("In", 7.0); + + // Run twice through the cache — both must return the same correct value. + var first = await e.RunAsync(ctx, TestContext.Current.CancellationToken); + var second = await cache.GetOrCompile("""return (double)ctx.GetTag("In").Value * 3.0;""") + .RunAsync(ctx, TestContext.Current.CancellationToken); + first.ShouldBe(21.0); + second.ShouldBe(21.0); + } + + [Fact] + public void Failed_compile_is_evicted_so_retry_with_corrected_source_works() + { + var cache = new CompiledScriptCache(); + + // First attempt — undefined identifier, compile throws. + Should.Throw(() => cache.GetOrCompile("""return unknownIdentifier + 1;""")); + cache.Count.ShouldBe(0, "failed compile must be evicted so retry can re-attempt"); + + // Retry with corrected source succeeds + caches. + cache.GetOrCompile("""return 42;""").ShouldNotBeNull(); + cache.Count.ShouldBe(1); + } + + [Fact] + public void Clear_drops_every_entry() + { + var cache = new CompiledScriptCache(); + cache.GetOrCompile("""return 1;"""); + cache.GetOrCompile("""return 2;"""); + cache.Count.ShouldBe(2); + + cache.Clear(); + cache.Count.ShouldBe(0); + cache.Contains("""return 1;""").ShouldBeFalse(); + } + + [Fact] + public void Concurrent_compiles_of_the_same_source_deduplicate() + { + // LazyThreadSafetyMode.ExecutionAndPublication guarantees only one compile + // even when multiple threads race GetOrCompile against an empty cache. + // We can't directly count Roslyn compilations — but we can assert all + // concurrent callers see the same evaluator instance. + var cache = new CompiledScriptCache(); + const string src = """return 99;"""; + + var tasks = Enumerable.Range(0, 20) + .Select(_ => Task.Run(() => cache.GetOrCompile(src))) + .ToArray(); + Task.WhenAll(tasks).GetAwaiter().GetResult(); + + var firstInstance = tasks[0].Result; + foreach (var t in tasks) + ReferenceEquals(t.Result, firstInstance).ShouldBeTrue(); + cache.Count.ShouldBe(1); + } + + [Fact] + public void Different_TContext_TResult_pairs_use_separate_cache_instances() + { + // Documented: each engine (virtual-tag / alarm-predicate / alarm-action) owns + // its own cache. The type-parametric design makes this the default without + // cross-contamination at the dictionary level. + var intCache = new CompiledScriptCache(); + var boolCache = new CompiledScriptCache(); + + intCache.GetOrCompile("""return 1;"""); + boolCache.GetOrCompile("""return true;"""); + + intCache.Count.ShouldBe(1); + boolCache.Count.ShouldBe(1); + intCache.Contains("""return true;""").ShouldBeFalse(); + boolCache.Contains("""return 1;""").ShouldBeFalse(); + } + + [Fact] + public void Null_source_throws_ArgumentNullException() + { + var cache = new CompiledScriptCache(); + Should.Throw(() => cache.GetOrCompile(null!)); + } +} diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/TimedScriptEvaluatorTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/TimedScriptEvaluatorTests.cs new file mode 100644 index 0000000..765f0f1 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests/TimedScriptEvaluatorTests.cs @@ -0,0 +1,134 @@ +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Scripting; + +namespace ZB.MOM.WW.OtOpcUa.Core.Scripting.Tests; + +/// +/// Verifies the per-evaluation timeout wrapper. Fast scripts complete normally; +/// CPU-bound or hung scripts throw instead of +/// starving the engine. Caller-supplied cancellation tokens take precedence over the +/// timeout so driver-shutdown paths see a clean cancel rather than a timeout. +/// +[Trait("Category", "Unit")] +public sealed class TimedScriptEvaluatorTests +{ + [Fact] + public async Task Fast_script_completes_under_timeout_and_returns_value() + { + var inner = ScriptEvaluator.Compile( + """return (double)ctx.GetTag("In").Value + 1.0;"""); + var timed = new TimedScriptEvaluator( + inner, TimeSpan.FromSeconds(1)); + + var ctx = new FakeScriptContext().Seed("In", 41.0); + var result = await timed.RunAsync(ctx, TestContext.Current.CancellationToken); + result.ShouldBe(42.0); + } + + [Fact] + public async Task Script_longer_than_timeout_throws_ScriptTimeoutException() + { + // Scripts can't easily do Thread.Sleep in the sandbox (System.Threading.Thread + // is denied). But a tight CPU loop exceeds any short timeout. + var inner = ScriptEvaluator.Compile( + """ + var end = Environment.TickCount64 + 5000; + while (Environment.TickCount64 < end) { } + return 1; + """); + var timed = new TimedScriptEvaluator( + inner, TimeSpan.FromMilliseconds(50)); + + var ex = await Should.ThrowAsync(async () => + await timed.RunAsync(new FakeScriptContext(), TestContext.Current.CancellationToken)); + ex.Timeout.ShouldBe(TimeSpan.FromMilliseconds(50)); + ex.Message.ShouldContain("50.0"); + } + + [Fact] + public async Task Caller_cancellation_takes_precedence_over_timeout() + { + // A CPU-bound script that would otherwise timeout; external ct fires first. + // Expected: OperationCanceledException (not ScriptTimeoutException) so shutdown + // paths aren't misclassified as timeouts. + var inner = ScriptEvaluator.Compile( + """ + var end = Environment.TickCount64 + 10000; + while (Environment.TickCount64 < end) { } + return 1; + """); + var timed = new TimedScriptEvaluator( + inner, TimeSpan.FromSeconds(5)); + + using var cts = new CancellationTokenSource(TimeSpan.FromMilliseconds(80)); + await Should.ThrowAsync(async () => + await timed.RunAsync(new FakeScriptContext(), cts.Token)); + } + + [Fact] + public void Default_timeout_is_250ms_per_plan() + { + TimedScriptEvaluator.DefaultTimeout + .ShouldBe(TimeSpan.FromMilliseconds(250)); + } + + [Fact] + public void Zero_or_negative_timeout_is_rejected_at_construction() + { + var inner = ScriptEvaluator.Compile("""return 1;"""); + Should.Throw(() => + new TimedScriptEvaluator(inner, TimeSpan.Zero)); + Should.Throw(() => + new TimedScriptEvaluator(inner, TimeSpan.FromMilliseconds(-1))); + } + + [Fact] + public void Null_inner_is_rejected() + { + Should.Throw(() => + new TimedScriptEvaluator(null!)); + } + + [Fact] + public void Null_context_is_rejected() + { + var inner = ScriptEvaluator.Compile("""return 1;"""); + var timed = new TimedScriptEvaluator(inner); + Should.ThrowAsync(async () => + await timed.RunAsync(null!, TestContext.Current.CancellationToken)); + } + + [Fact] + public async Task Script_exception_propagates_unwrapped() + { + // User-thrown exceptions must come through as-is — NOT wrapped in + // ScriptTimeoutException. The virtual-tag engine catches them per-tag and + // maps to BadInternalError; conflating with timeout would lose that info. + var inner = ScriptEvaluator.Compile( + """throw new InvalidOperationException("script boom");"""); + var timed = new TimedScriptEvaluator(inner, TimeSpan.FromSeconds(1)); + + var ex = await Should.ThrowAsync(async () => + await timed.RunAsync(new FakeScriptContext(), TestContext.Current.CancellationToken)); + ex.Message.ShouldBe("script boom"); + } + + [Fact] + public async Task ScriptTimeoutException_message_points_at_diagnostic_path() + { + var inner = ScriptEvaluator.Compile( + """ + var end = Environment.TickCount64 + 5000; + while (Environment.TickCount64 < end) { } + return 1; + """); + var timed = new TimedScriptEvaluator( + inner, TimeSpan.FromMilliseconds(30)); + + var ex = await Should.ThrowAsync(async () => + await timed.RunAsync(new FakeScriptContext(), TestContext.Current.CancellationToken)); + ex.Message.ShouldContain("ctx.Logger"); + ex.Message.ShouldContain("widening the timeout"); + } +}