refactor(siteruntime): M3.3 ValidateTrustModel delegates to shared ScriptAnalysis + compile-surface parity test

This commit is contained in:
Joseph Doherty
2026-06-16 19:37:50 -04:00
parent 14bd25196a
commit 64d6ac7288
6 changed files with 195 additions and 185 deletions
@@ -1,49 +1,23 @@
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Scripting;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.Scripting;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.ScriptAnalysis;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
/// <summary>
/// WP-19: Script Trust Model — compiles C# scripts using Roslyn with restricted API access.
/// Forbidden APIs: System.IO, Process, Threading (except async/await), Reflection,
/// System.Net.Sockets, System.Net.Http.
/// The forbidden-API verdict is delegated to the shared authoritative
/// <see cref="ScriptTrustValidator"/> (M3.1 consolidation); this service keeps the real
/// execution-path compile of the script against <see cref="ScriptGlobals"/> /
/// <see cref="TriggerExpressionGlobals"/>.
/// </summary>
public class ScriptCompilationService
{
private readonly ILogger<ScriptCompilationService> _logger;
/// <summary>
/// Forbidden API roots. Each entry is matched as a prefix against both the resolved
/// symbol's containing namespace and its fully-qualified containing type name, so an
/// entry may name a whole namespace ("System.IO") or a single type
/// ("System.Diagnostics.Process").
/// </summary>
private static readonly string[] ForbiddenNamespaces =
[
"System.IO",
"System.Diagnostics.Process",
"System.Threading",
"System.Reflection",
"System.Net.Sockets",
"System.Net.Http"
];
/// <summary>
/// Specific namespaces/types allowed even though they sit under a forbidden root.
/// async/await and cancellation tokens are OK despite System.Threading being blocked.
/// </summary>
private static readonly string[] AllowedExceptions =
[
"System.Threading.Tasks",
"System.Threading.CancellationToken",
"System.Threading.CancellationTokenSource"
];
/// <summary>Initializes a new instance of the ScriptCompilationService class.</summary>
/// <param name="logger">Logger instance.</param>
public ScriptCompilationService(ILogger<ScriptCompilationService> logger)
@@ -54,160 +28,24 @@ public class ScriptCompilationService
/// <summary>
/// SiteRuntime-011: validates that the script does not reference forbidden APIs.
///
/// Validation is performed with Roslyn semantic analysis rather than a raw substring
/// scan of the source text. The script is parsed and a semantic model is built; every
/// identifier, type reference, member access, and object creation is resolved to its
/// symbol and the symbol's containing namespace is checked against the forbidden list.
///
/// This is reliable in both directions a textual scan was not:
/// - it catches forbidden types regardless of how they are written (<c>global::</c>
/// prefixes, aliases, transitively-imported namespaces) because it inspects the
/// resolved symbol, not the spelling;
/// - it does not raise false positives for the namespace string appearing in a
/// comment, a string literal, or an unrelated identifier.
/// As of the M3.1 script-analysis consolidation this delegates to the shared
/// authoritative <see cref="ScriptTrustValidator.FindViolations(string, System.Collections.Generic.IEnumerable{MetadataReference})"/>,
/// which is the same Roslyn semantic-symbol analysis this service previously hosted
/// plus reflection-gateway / <c>dynamic</c> / <c>Activator</c> hardening ported from
/// the InboundAPI checker. The shared validator is the single source of truth for the
/// forbidden-API deny-list; SiteRuntime retains only the real execution-path compile
/// in <see cref="CompileCore"/>.
///
/// Returns a list of violation messages, empty if clean.
/// </summary>
/// <param name="code">The script code to validate.</param>
/// <returns>A list of trust-model violation messages; empty if the script is clean.</returns>
public IReadOnlyList<string> ValidateTrustModel(string code)
{
var tree = CSharpSyntaxTree.ParseText(
code, new CSharpParseOptions(kind: SourceCodeKind.Script));
var compilation = CSharpCompilation.CreateScriptCompilation(
"TrustValidation",
tree,
ScriptReferences,
new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary));
var model = compilation.GetSemanticModel(tree);
var root = tree.GetRoot();
// Deduplicate so a forbidden symbol used many times is reported once but
// distinct forbidden symbols are all reported.
var violations = new SortedSet<string>(StringComparer.Ordinal);
foreach (var node in root.DescendantNodes())
{
// Only inspect nodes that name a type or member; skip declarations,
// string literals and comments entirely. Member-access and qualified-name
// parents are evaluated as a whole, so their nested name parts are skipped.
if (node is not (SimpleNameSyntax or MemberAccessExpressionSyntax
or QualifiedNameSyntax or ObjectCreationExpressionSyntax))
{
continue;
}
var info = model.GetSymbolInfo(node);
var symbol = info.Symbol ?? info.CandidateSymbols.FirstOrDefault();
// The set of fully-qualified scopes this reference touches: the resolved
// symbol's containing namespace and type, or — when the symbol could not
// be resolved (a type from an unreferenced assembly) — the syntactic
// fully-qualified name written in source as a safe fallback.
var scopes = symbol != null
? GetSymbolScopes(symbol)
: GetSyntacticScopes(node);
if (scopes.Count == 0)
continue;
var forbidden = ForbiddenNamespaces.FirstOrDefault(
f => scopes.Any(s => IsUnderScope(s, f)));
if (forbidden == null)
continue;
// Allow specific exception namespaces/types (async/await, cancellation).
if (scopes.Any(s => AllowedExceptions.Any(a => IsUnderScope(s, a))))
continue;
var name = symbol?.Name ?? node.ToString();
violations.Add($"Forbidden API reference: '{forbidden}' ({scopes[0]}.{name})");
}
return violations.ToList();
}
=> ScriptTrustValidator.FindViolations(code);
/// <summary>
/// Returns the fully-qualified scopes a resolved symbol belongs to — its containing
/// namespace and, for a type or member, the fully-qualified containing type. A bare
/// namespace symbol is intentionally ignored: a namespace name on its own performs
/// no action; harm requires referencing a type or a member.
/// </summary>
private static List<string> GetSymbolScopes(ISymbol symbol)
{
var scopes = new List<string>();
switch (symbol)
{
case INamespaceSymbol:
// A namespace reference alone is harmless — skip it. (This avoids a
// false positive on the "System.Threading" qualifier of the allowed
// "System.Threading.Tasks.Task".)
break;
case ITypeSymbol typeSymbol:
scopes.Add(typeSymbol.ToDisplayString());
if (typeSymbol.ContainingNamespace is { IsGlobalNamespace: false } typeNs)
scopes.Add(typeNs.ToDisplayString());
break;
default:
if (symbol.ContainingType != null)
{
scopes.Add(symbol.ContainingType.ToDisplayString());
if (symbol.ContainingType.ContainingNamespace is { IsGlobalNamespace: false } memberNs)
scopes.Add(memberNs.ToDisplayString());
}
else if (symbol.ContainingNamespace is { IsGlobalNamespace: false } ns)
{
scopes.Add(ns.ToDisplayString());
}
break;
}
return scopes;
}
/// <summary>
/// Fallback used when a name could not be resolved to a symbol (e.g. a type from an
/// assembly the script is not allowed to reference). The fully-qualified name as
/// written in source is used directly — a script that names
/// <c>System.Net.Http.HttpClient</c> is still rejected even though that assembly is
/// deliberately absent from the script's metadata references.
/// </summary>
private static List<string> GetSyntacticScopes(SyntaxNode node)
{
// A dotted name written in source is itself the fully-qualified scope. Only
// consider names that actually contain a dot — bare local identifiers cannot
// reach a forbidden namespace.
var text = node switch
{
QualifiedNameSyntax q => q.ToString(),
MemberAccessExpressionSyntax m => m.ToString(),
_ => string.Empty
};
// Strip whitespace/newlines that a multi-line member-access chain may contain.
text = new string(text.Where(c => !char.IsWhiteSpace(c)).ToArray());
return string.IsNullOrEmpty(text) || !text.Contains('.')
? []
: [text];
}
/// <summary>
/// True if <paramref name="actual"/> is exactly, or nested within,
/// <paramref name="root"/> (e.g. "System.IO.Compression" is under "System.IO",
/// "System.Diagnostics.Process" is under "System.Diagnostics.Process").
/// </summary>
private static bool IsUnderScope(string actual, string root)
=> actual.Equals(root, StringComparison.Ordinal)
|| actual.StartsWith(root + ".", StringComparison.Ordinal);
/// <summary>
/// Assemblies referenced by compiled scripts. Shared between the Roslyn scripting
/// options and the semantic-analysis compilation built for trust validation
/// (SiteRuntime-011), so the validator resolves symbols against exactly the same
/// metadata the script is compiled against.
/// Assemblies referenced by compiled scripts, used to build the Roslyn scripting
/// options for the real execution-path compile.
/// </summary>
private static readonly System.Reflection.Assembly[] ScriptAssemblies =
[
@@ -218,14 +56,6 @@ public class ScriptCompilationService
typeof(Commons.Types.DynamicJsonElement).Assembly
];
/// <summary>
/// Metadata references for the trust-validation semantic compilation.
/// </summary>
private static readonly MetadataReference[] ScriptReferences =
ScriptAssemblies
.Select(a => (MetadataReference)MetadataReference.CreateFromFile(a.Location))
.ToArray();
/// <summary>
/// Shared Roslyn scripting options (references + imports) used by both full
/// script compilation and trigger-expression compilation.
@@ -35,6 +35,7 @@
<ItemGroup>
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.Commons/ZB.MOM.WW.ScadaBridge.Commons.csproj" />
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.Communication/ZB.MOM.WW.ScadaBridge.Communication.csproj" />
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.ScriptAnalysis/ZB.MOM.WW.ScadaBridge.ScriptAnalysis.csproj" />
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.HealthMonitoring/ZB.MOM.WW.ScadaBridge.HealthMonitoring.csproj" />
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.SiteEventLogging/ZB.MOM.WW.ScadaBridge.SiteEventLogging.csproj" />
<ProjectReference Include="../ZB.MOM.WW.ScadaBridge.StoreAndForward/ZB.MOM.WW.ScadaBridge.StoreAndForward.csproj" />
@@ -0,0 +1,105 @@
using System.Reflection;
using ZB.MOM.WW.ScadaBridge.ScriptAnalysis;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Scripts;
/// <summary>
/// M3.3: compile-surface parity guard. The shared
/// <see cref="ScriptCompileSurface"/> / <see cref="TriggerCompileSurface"/> are
/// compile-only stubs the design-time deploy gate binds candidate scripts
/// against; they must mirror the real SiteRuntime <see cref="ScriptGlobals"/> /
/// <see cref="TriggerExpressionGlobals"/> bind surfaces. If a member a script can
/// reference on the real globals is missing from the compile surface, a script
/// that uses it would pass the design-time gate but fail at the site — so the
/// compile surface's public top-level member NAMES must be a SUPERSET of the
/// real globals' names. This test fails loudly (listing the missing names) when
/// the surface drifts behind the runtime globals.
///
/// <para>
/// Top-level member-name parity is sufficient: the design-time compile against
/// the surface catches deeper signature mismatches itself; this guard only
/// ensures every entry point a script can name on the real globals exists on the
/// stub.
/// </para>
/// </summary>
public class CompileSurfaceParityTests
{
[Fact]
public void ScriptCompileSurface_MemberNames_AreSupersetOf_ScriptGlobals()
{
AssertSurfaceCoversGlobals(
surface: typeof(ScriptCompileSurface),
globals: typeof(ScriptGlobals));
}
[Fact]
public void TriggerCompileSurface_MemberNames_AreSupersetOf_TriggerExpressionGlobals()
{
AssertSurfaceCoversGlobals(
surface: typeof(TriggerCompileSurface),
globals: typeof(TriggerExpressionGlobals));
}
/// <summary>
/// Asserts that the public instance property + method member names of
/// <paramref name="surface"/> are a superset of those of
/// <paramref name="globals"/>. Inherited <see cref="object"/> members
/// (ToString/GetHashCode/Equals/GetType) are excluded. Fails with the exact
/// list of missing names when the surface does not cover the globals.
/// </summary>
private static void AssertSurfaceCoversGlobals(Type surface, Type globals)
{
var surfaceNames = PublicInstanceMemberNames(surface);
var globalsNames = PublicInstanceMemberNames(globals);
var missing = globalsNames
.Where(name => !surfaceNames.Contains(name))
.OrderBy(name => name, StringComparer.Ordinal)
.ToList();
Assert.True(
missing.Count == 0,
$"Compile surface '{surface.Name}' is missing {missing.Count} member name(s) "
+ $"present on '{globals.Name}': {string.Join(", ", missing)}. "
+ "The compile-only surface must mirror the runtime globals — add the "
+ "missing member(s) to the ScriptAnalysis surface type.");
}
/// <summary>
/// Public instance property + method member names declared on or inherited by
/// <paramref name="type"/>, excluding the inherited <see cref="object"/>
/// members (ToString, GetHashCode, Equals, GetType) so only script-reachable
/// API names remain. Property/method names are compared (no signatures).
/// </summary>
private static HashSet<string> PublicInstanceMemberNames(Type type)
{
const BindingFlags flags = BindingFlags.Public | BindingFlags.Instance;
var objectMembers = new HashSet<string>(StringComparer.Ordinal)
{
nameof(ToString),
nameof(GetHashCode),
nameof(Equals),
nameof(GetType),
};
var names = new HashSet<string>(StringComparer.Ordinal);
foreach (var property in type.GetProperties(flags))
names.Add(property.Name);
foreach (var method in type.GetMethods(flags))
{
// Skip compiler-generated property accessors (get_X / set_X) — the
// property itself is already counted by name above.
if (method.IsSpecialName)
continue;
if (objectMembers.Contains(method.Name))
continue;
names.Add(method.Name);
}
return names;
}
}
@@ -5,6 +5,16 @@ namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Scripts;
/// <summary>
/// WP-19: Script Trust Model tests — validates forbidden API detection and compilation.
///
/// As of the M3.3 consolidation, <c>ScriptCompilationService.ValidateTrustModel</c>
/// delegates its forbidden-API verdict to the shared authoritative
/// <c>ScriptAnalysis.ScriptTrustValidator</c>, which is stricter than SiteRuntime's
/// original deny-list: ALL of <c>System.Net</c> is forbidden (not just Sockets/Http),
/// plus reflection gateways, <c>dynamic</c>, <c>Activator</c>,
/// <c>System.Runtime.InteropServices</c> and <c>Microsoft.Win32</c>. Only
/// <c>System.Diagnostics.Process</c> is blocked under System.Diagnostics —
/// <c>Stopwatch</c> stays allowed. The real execution-path compile against
/// <c>ScriptGlobals</c> / <c>TriggerExpressionGlobals</c> is unchanged.
/// </summary>
public class ScriptCompilationServiceTests
{
@@ -108,4 +118,61 @@ public class ScriptCompilationServiceTests
Assert.False(result.IsSuccess);
Assert.NotEmpty(result.Errors);
}
// ── M3.3: stricter shared-validator behavior ──
[Fact]
public void ValidateTrustModel_SystemNetDns_Forbidden()
{
// The shared validator forbids ALL of System.Net — not just Sockets/Http.
// System.Net.Dns was allowed under the old SiteRuntime list; now blocked.
var violations = _service.ValidateTrustModel(
"System.Net.Dns.GetHostName()");
Assert.NotEmpty(violations);
Assert.Contains(violations, v => v.Contains("System.Net"));
}
[Fact]
public void ValidateTrustModel_ReflectionGatewayViaPermittedType_Forbidden()
{
// typeof(x).Assembly.GetType(...) never spells a forbidden namespace, but
// the shared validator rejects the reflection-gateway members regardless of
// receiver — this was NOT caught by the old SiteRuntime list.
var violations = _service.ValidateTrustModel(
"typeof(string).Assembly.GetType(\"System.IO.File\")");
Assert.NotEmpty(violations);
}
[Fact]
public void ValidateTrustModel_Dynamic_Forbidden()
{
var violations = _service.ValidateTrustModel("dynamic d = 1; return d;");
Assert.NotEmpty(violations);
}
[Fact]
public void ValidateTrustModel_Activator_Forbidden()
{
var violations = _service.ValidateTrustModel(
"Activator.CreateInstance(typeof(string))");
Assert.NotEmpty(violations);
}
[Fact]
public void ValidateTrustModel_InteropServices_Forbidden()
{
var violations = _service.ValidateTrustModel(
"System.Runtime.InteropServices.Marshal.SizeOf<int>()");
Assert.NotEmpty(violations);
}
[Fact]
public void ValidateTrustModel_Stopwatch_Allowed()
{
// Only System.Diagnostics.Process is blocked under System.Diagnostics —
// Stopwatch stays allowed.
var violations = _service.ValidateTrustModel(
"var sw = System.Diagnostics.Stopwatch.StartNew(); return sw.ElapsedMilliseconds;");
Assert.Empty(violations);
}
}
@@ -8,6 +8,12 @@ namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Tests.Scripts;
/// The previous implementation was a raw substring scan of the source text — it both
/// missed forbidden APIs (no literal namespace string) and raised false positives on
/// the namespace string appearing in comments, string literals or unrelated identifiers.
///
/// As of M3.3, <c>ValidateTrustModel</c> delegates to the shared authoritative
/// <c>ScriptAnalysis.ScriptTrustValidator</c>, which retains this same Roslyn
/// semantic-symbol analysis (plus reflection-gateway hardening), so these bypass /
/// false-positive / allowed-exception regressions continue to hold through the
/// delegating service.
/// </summary>
public class TrustModelSemanticTests
{
@@ -25,6 +25,7 @@
<ItemGroup>
<ProjectReference Include="../../src/ZB.MOM.WW.ScadaBridge.SiteRuntime/ZB.MOM.WW.ScadaBridge.SiteRuntime.csproj" />
<ProjectReference Include="../../src/ZB.MOM.WW.ScadaBridge.ScriptAnalysis/ZB.MOM.WW.ScadaBridge.ScriptAnalysis.csproj" />
<ProjectReference Include="../../src/ZB.MOM.WW.ScadaBridge.Commons/ZB.MOM.WW.ScadaBridge.Commons.csproj" />
<ProjectReference Include="../../src/ZB.MOM.WW.ScadaBridge.HealthMonitoring/ZB.MOM.WW.ScadaBridge.HealthMonitoring.csproj" />
</ItemGroup>