fix(template-engine): resolve TemplateEngine-006..010 — code-region-aware API/brace scanning, composed-alarm override validation, N+1 fix, doc correction

This commit is contained in:
Joseph Doherty
2026-05-16 21:44:11 -04:00
parent 5672502d83
commit 804697f873
11 changed files with 832 additions and 160 deletions

View File

@@ -0,0 +1,413 @@
namespace ScadaLink.TemplateEngine.Validation;
/// <summary>
/// String/comment-aware scanner for the balanced-delimiter ("does it look like
/// valid C#") checks used by <see cref="ScriptCompiler"/> and
/// <c>SharedScriptService.ValidateSyntax</c>.
///
/// <para>
/// This is <b>not</b> a compiler. It is an interim structural check that walks
/// the source once and tracks <c>{}</c>, <c>[]</c> and <c>()</c> depth while
/// correctly skipping over the C# lexical constructs in which a delimiter is
/// inert: line/block comments, regular string literals (with <c>\</c> escapes),
/// verbatim strings (<c>@"..."</c>, where <c>""</c> escapes a quote and <c>\</c>
/// is literal), interpolated strings (<c>$"..."</c> / <c>$@"..."</c> — the holes
/// <c>{...}</c> are code and <c>{{</c>/<c>}}</c> are escaped braces), raw string
/// literals (<c>"""..."""</c>), and char literals (<c>'}'</c>).
/// </para>
///
/// <para>
/// It is intentionally conservative: when the real Roslyn-based compiler is
/// wired in (see <see cref="ScriptCompiler"/>) this hand-rolled scan should be
/// replaced by <c>CSharpSyntaxTree.ParseText</c> diagnostics. Until then this
/// scanner removes the false positives that a naive character count produced
/// for valid scripts containing a delimiter inside a string or comment.
/// </para>
/// </summary>
internal static class CSharpDelimiterScanner
{
/// <summary>The kind of delimiter mismatch found, if any.</summary>
internal enum Mismatch
{
None,
UnexpectedCloseBrace,
UnexpectedCloseBracket,
UnexpectedCloseParen,
UnclosedBrace,
UnclosedBracket,
UnclosedParen,
UnclosedBlockComment,
UnterminatedString,
UnterminatedChar,
}
/// <summary>
/// Returns true when <paramref name="pattern"/> occurs in a <b>code</b>
/// region of <paramref name="code"/> — i.e. not wholly inside a string
/// literal, char literal, or comment. Used by the interim forbidden-API
/// scan so that the inert text <c>System.IO.</c> in a comment or string
/// literal is not flagged as a forbidden API call (TemplateEngine-006).
///
/// <para>
/// This removes the false-positive half of the substring scan. It does
/// <b>not</b> close the bypass half: namespace aliases, <c>using static</c>,
/// and <c>global::</c>-qualified references still evade a pure text match.
/// Authoritative forbidden-API enforcement requires Roslyn semantic symbol
/// analysis and is deferred to the real script compiler / Site Runtime
/// sandbox; this check is advisory only.
/// </para>
/// </summary>
internal static bool ContainsInCode(string code, string pattern)
{
if (string.IsNullOrEmpty(pattern))
return false;
// Blank out every string/char-literal/comment span, then do an ordinary
// substring search over what remains (the code regions).
var codeOnly = BlankNonCodeSpans(code);
return codeOnly.Contains(pattern, StringComparison.Ordinal);
}
/// <summary>
/// Replaces the content of every comment, string literal, and char literal
/// with spaces (newlines preserved), leaving only code regions intact.
/// Delimiter characters themselves are also blanked so a pattern cannot
/// straddle a literal boundary.
/// </summary>
private static string BlankNonCodeSpans(string code)
{
var buffer = code.ToCharArray();
int n = code.Length;
int i = 0;
void Blank(int from, int to)
{
for (int k = from; k < to && k < n; k++)
if (buffer[k] != '\n' && buffer[k] != '\r')
buffer[k] = ' ';
}
while (i < n)
{
char c = code[i];
char next = i + 1 < n ? code[i + 1] : '\0';
int start = i;
if (c == '/' && next == '/')
{
i += 2;
while (i < n && code[i] != '\n') i++;
Blank(start, i);
continue;
}
if (c == '/' && next == '*')
{
i += 2;
while (i < n && !(code[i] == '*' && i + 1 < n && code[i + 1] == '/')) i++;
if (i < n) i += 2;
Blank(start, i);
continue;
}
if (c == '"' && next == '"' && i + 2 < n && code[i + 2] == '"')
{
SkipRawString(code, ref i);
Blank(start, i);
continue;
}
if (c == '$')
{
int j = i + 1;
bool verbatim = false;
if (j < n && code[j] == '@') { verbatim = true; j++; }
if (j < n && code[j] == '"')
{
i = j;
SkipInterpolatedString(code, ref i, verbatim);
Blank(start, i);
continue;
}
}
if (c == '@' && next == '"')
{
i++;
SkipVerbatimString(code, ref i);
Blank(start, i);
continue;
}
if (c == '"')
{
SkipRegularString(code, ref i);
Blank(start, i);
continue;
}
if (c == '\'')
{
SkipCharLiteral(code, ref i);
Blank(start, i);
continue;
}
i++;
}
return new string(buffer);
}
/// <summary>
/// Walks <paramref name="code"/> once and reports the first structural
/// delimiter problem, or <see cref="Mismatch.None"/> when the source is
/// balanced. Delimiters inside comments, strings, and char literals are
/// ignored.
/// </summary>
internal static Mismatch Scan(string code)
{
int brace = 0, bracket = 0, paren = 0;
int i = 0;
int n = code.Length;
while (i < n)
{
char c = code[i];
char next = i + 1 < n ? code[i + 1] : '\0';
// Line comment.
if (c == '/' && next == '/')
{
i += 2;
while (i < n && code[i] != '\n') i++;
continue;
}
// Block comment.
if (c == '/' && next == '*')
{
i += 2;
bool closed = false;
while (i < n)
{
if (code[i] == '*' && i + 1 < n && code[i + 1] == '/')
{
i += 2;
closed = true;
break;
}
i++;
}
if (!closed) return Mismatch.UnclosedBlockComment;
continue;
}
// Raw string literal: three or more consecutive quotes open it; the
// same number of quotes closes it. Detected before $/@-prefixed and
// plain strings.
if (c == '"' && next == '"' && i + 2 < n && code[i + 2] == '"')
{
if (!SkipRawString(code, ref i)) return Mismatch.UnterminatedString;
continue;
}
// Interpolated string ($"..." or $@"..." / @$"...").
if (c == '$')
{
int j = i + 1;
bool verbatim = false;
if (j < n && code[j] == '@') { verbatim = true; j++; }
if (j < n && code[j] == '"')
{
i = j;
if (!SkipInterpolatedString(code, ref i, verbatim)) return Mismatch.UnterminatedString;
continue;
}
}
// Verbatim string (@"...").
if (c == '@' && next == '"')
{
i++; // now on the opening quote
if (!SkipVerbatimString(code, ref i)) return Mismatch.UnterminatedString;
continue;
}
// Regular string literal.
if (c == '"')
{
if (!SkipRegularString(code, ref i)) return Mismatch.UnterminatedString;
continue;
}
// Char literal.
if (c == '\'')
{
if (!SkipCharLiteral(code, ref i)) return Mismatch.UnterminatedChar;
continue;
}
switch (c)
{
case '{': brace++; break;
case '}':
brace--;
if (brace < 0) return Mismatch.UnexpectedCloseBrace;
break;
case '[': bracket++; break;
case ']':
bracket--;
if (bracket < 0) return Mismatch.UnexpectedCloseBracket;
break;
case '(': paren++; break;
case ')':
paren--;
if (paren < 0) return Mismatch.UnexpectedCloseParen;
break;
}
i++;
}
if (brace != 0) return Mismatch.UnclosedBrace;
if (bracket != 0) return Mismatch.UnclosedBracket;
if (paren != 0) return Mismatch.UnclosedParen;
return Mismatch.None;
}
/// <summary>
/// Advances <paramref name="i"/> past a regular <c>"..."</c> string literal.
/// On entry <c>code[i] == '"'</c>. Returns false if the string is unterminated.
/// </summary>
private static bool SkipRegularString(string code, ref int i)
{
int n = code.Length;
i++; // past opening quote
while (i < n)
{
char c = code[i];
if (c == '\\') { i += 2; continue; } // escape — skip next char
if (c == '\n') return false; // unterminated (no multi-line)
if (c == '"') { i++; return true; }
i++;
}
return false;
}
/// <summary>
/// Advances past a verbatim <c>@"..."</c> string. On entry <c>code[i] == '"'</c>.
/// Inside, <c>\</c> is literal and <c>""</c> is an escaped quote.
/// </summary>
private static bool SkipVerbatimString(string code, ref int i)
{
int n = code.Length;
i++; // past opening quote
while (i < n)
{
if (code[i] == '"')
{
if (i + 1 < n && code[i + 1] == '"') { i += 2; continue; } // escaped quote
i++;
return true;
}
i++;
}
return false;
}
/// <summary>
/// Advances past an interpolated string. <paramref name="verbatim"/> selects
/// the <c>$@"..."</c> escaping rules. Interpolation holes <c>{...}</c> are
/// skipped over (their braces are code, not literal text); <c>{{</c>/<c>}}</c>
/// are escaped braces. On entry <c>code[i] == '"'</c>.
/// </summary>
private static bool SkipInterpolatedString(string code, ref int i, bool verbatim)
{
int n = code.Length;
i++; // past opening quote
while (i < n)
{
char c = code[i];
if (!verbatim && c == '\\') { i += 2; continue; }
if (c == '"')
{
if (verbatim && i + 1 < n && code[i + 1] == '"') { i += 2; continue; }
i++;
return true;
}
if (c == '{')
{
if (i + 1 < n && code[i + 1] == '{') { i += 2; continue; } // escaped brace
// Interpolation hole — skip to the matching '}', tracking nested
// braces so a hole containing an object initializer is handled.
i++;
int depth = 1;
while (i < n && depth > 0)
{
char h = code[i];
if (h == '{') depth++;
else if (h == '}') depth--;
else if (h == '"')
{
// A nested string inside the hole.
if (!SkipRegularString(code, ref i)) return false;
continue;
}
i++;
}
continue;
}
if (c == '}' && i + 1 < n && code[i + 1] == '}') { i += 2; continue; } // escaped brace
i++;
}
return false;
}
/// <summary>
/// Advances past a raw string literal <c>"""..."""</c> (C# 11). On entry
/// <c>code[i]</c> is the first of three or more opening quotes.
/// </summary>
private static bool SkipRawString(string code, ref int i)
{
int n = code.Length;
int openCount = 0;
while (i < n && code[i] == '"') { openCount++; i++; }
// Look for a run of the same number of quotes.
while (i < n)
{
if (code[i] == '"')
{
int closeCount = 0;
int start = i;
while (i < n && code[i] == '"') { closeCount++; i++; }
if (closeCount >= openCount) return true;
// Fewer quotes than the opener — they are literal content; keep scanning.
if (closeCount == 0) i = start + 1;
}
else
{
i++;
}
}
return false;
}
/// <summary>
/// Advances past a <c>'x'</c> char literal. On entry <c>code[i] == '\''</c>.
/// </summary>
private static bool SkipCharLiteral(string code, ref int i)
{
int n = code.Length;
i++; // past opening quote
while (i < n)
{
char c = code[i];
if (c == '\\') { i += 2; continue; }
if (c == '\n') return false;
if (c == '\'') { i++; return true; }
i++;
}
return false;
}
}

View File

@@ -9,6 +9,18 @@ namespace ScadaLink.TemplateEngine.Validation;
/// and enforces the forbidden API list (System.IO, Process, Threading, Reflection, raw network).
///
/// For now, this implementation performs basic syntax validation.
///
/// <para>
/// <b>SECURITY LIMITATION (TemplateEngine-006):</b> the forbidden-API check below
/// is an interim, <i>advisory</i> text scan — it is NOT an authoritative trust-model
/// boundary. <see cref="CSharpDelimiterScanner.ContainsInCode"/> removes the
/// false-positive half (forbidden text inside a string/comment is ignored), but a
/// determined script can still bypass the literal patterns via namespace aliases,
/// <c>using static</c>, or <c>global::</c>-qualified references. Authoritative
/// enforcement requires Roslyn semantic symbol analysis of the referenced
/// types/namespaces and is the responsibility of the real script compiler and the
/// Site Runtime sandbox. Do not rely on this class as the sole trust-model gate.
/// </para>
/// </summary>
public class ScriptCompiler
{
@@ -17,6 +29,13 @@ public class ScriptCompiler
/// <see cref="ValidationService"/>) must not use these. Trigger expressions run
/// under the same trust model as scripts, so the list is shared from here rather
/// than duplicated.
///
/// <para>
/// Matched with <see cref="CSharpDelimiterScanner.ContainsInCode"/> against code
/// regions only. This is advisory — see the class summary's SECURITY LIMITATION
/// note; the substring patterns are bypassable and the authoritative check is
/// deferred to Roslyn semantic analysis.
/// </para>
/// </summary>
internal static readonly string[] ForbiddenPatterns =
[
@@ -39,10 +58,12 @@ public class ScriptCompiler
if (string.IsNullOrWhiteSpace(code))
return Result<bool>.Failure($"Script '{scriptName}' has empty code.");
// Check for forbidden APIs
// Check for forbidden APIs. Advisory only (see class summary): the scan is
// code-region-aware so forbidden text inside a string/comment is ignored,
// but it remains a substring match and is not an authoritative boundary.
foreach (var pattern in ForbiddenPatterns)
{
if (code.Contains(pattern, StringComparison.Ordinal))
if (CSharpDelimiterScanner.ContainsInCode(code, pattern))
{
return Result<bool>.Failure(
$"Script '{scriptName}' uses forbidden API: '{pattern.TrimEnd('.')}'. " +
@@ -50,76 +71,35 @@ public class ScriptCompiler
}
}
// Basic brace matching validation
var braceDepth = 0;
var inString = false;
var inLineComment = false;
var inBlockComment = false;
for (int i = 0; i < code.Length; i++)
// Basic structural validation: balanced braces/brackets/parens. The scan
// is string- and comment-aware (see CSharpDelimiterScanner) so a delimiter
// inside a regular/verbatim/interpolated/raw string, a char literal, or a
// comment does not produce a false mismatch. This remains an interim check
// until the Roslyn-based compiler is wired in.
var mismatch = CSharpDelimiterScanner.Scan(code);
return mismatch switch
{
var c = code[i];
var next = i + 1 < code.Length ? code[i + 1] : '\0';
if (inLineComment)
{
if (c == '\n') inLineComment = false;
continue;
}
if (inBlockComment)
{
if (c == '*' && next == '/')
{
inBlockComment = false;
i++;
}
continue;
}
if (c == '/' && next == '/')
{
inLineComment = true;
i++;
continue;
}
if (c == '/' && next == '*')
{
inBlockComment = true;
i++;
continue;
}
if (c == '"' && !inString)
{
inString = true;
continue;
}
if (c == '"' && inString)
{
// Check for escaped quote
if (i > 0 && code[i - 1] != '\\')
inString = false;
continue;
}
if (inString) continue;
if (c == '{') braceDepth++;
else if (c == '}') braceDepth--;
if (braceDepth < 0)
return Result<bool>.Failure($"Script '{scriptName}' has mismatched braces (unexpected closing brace).");
}
if (braceDepth != 0)
return Result<bool>.Failure($"Script '{scriptName}' has mismatched braces ({braceDepth} unclosed).");
if (inBlockComment)
return Result<bool>.Failure($"Script '{scriptName}' has an unclosed block comment.");
return Result<bool>.Success(true);
CSharpDelimiterScanner.Mismatch.None =>
Result<bool>.Success(true),
CSharpDelimiterScanner.Mismatch.UnexpectedCloseBrace =>
Result<bool>.Failure($"Script '{scriptName}' has mismatched braces (unexpected closing brace)."),
CSharpDelimiterScanner.Mismatch.UnclosedBrace =>
Result<bool>.Failure($"Script '{scriptName}' has mismatched braces (unclosed opening brace)."),
CSharpDelimiterScanner.Mismatch.UnexpectedCloseBracket =>
Result<bool>.Failure($"Script '{scriptName}' has mismatched brackets (unexpected closing bracket)."),
CSharpDelimiterScanner.Mismatch.UnclosedBracket =>
Result<bool>.Failure($"Script '{scriptName}' has mismatched brackets (unclosed opening bracket)."),
CSharpDelimiterScanner.Mismatch.UnexpectedCloseParen =>
Result<bool>.Failure($"Script '{scriptName}' has mismatched parentheses (unexpected closing parenthesis)."),
CSharpDelimiterScanner.Mismatch.UnclosedParen =>
Result<bool>.Failure($"Script '{scriptName}' has mismatched parentheses (unclosed opening parenthesis)."),
CSharpDelimiterScanner.Mismatch.UnclosedBlockComment =>
Result<bool>.Failure($"Script '{scriptName}' has an unclosed block comment."),
CSharpDelimiterScanner.Mismatch.UnterminatedString =>
Result<bool>.Failure($"Script '{scriptName}' has an unterminated string literal."),
CSharpDelimiterScanner.Mismatch.UnterminatedChar =>
Result<bool>.Failure($"Script '{scriptName}' has an unterminated character literal."),
_ => Result<bool>.Success(true),
};
}
}

View File

@@ -317,9 +317,12 @@ public class ValidationService
/// </summary>
internal static string? CheckExpressionSyntax(string expression)
{
// Advisory forbidden-API scan (TemplateEngine-006): code-region-aware so
// the inert text inside a string/comment is not flagged, but still a
// substring match — not an authoritative boundary. See ScriptCompiler.
foreach (var pattern in ScriptCompiler.ForbiddenPatterns)
{
if (expression.Contains(pattern, StringComparison.Ordinal))
if (CSharpDelimiterScanner.ContainsInCode(expression, pattern))
{
return $"uses forbidden API '{pattern.TrimEnd('.')}'. " +
"Trigger expressions cannot use System.IO, Process, Threading, Reflection, or raw network APIs.";