414 lines
14 KiB
C#
414 lines
14 KiB
C#
namespace ScadaLink.TemplateEngine.Validation;
|
|
|
|
/// <summary>
|
|
/// String/comment-aware scanner for the balanced-delimiter ("does it look like
|
|
/// valid C#") checks used by <see cref="ScriptCompiler"/> and
|
|
/// <c>SharedScriptService.ValidateSyntax</c>.
|
|
///
|
|
/// <para>
|
|
/// This is <b>not</b> a compiler. It is an interim structural check that walks
|
|
/// the source once and tracks <c>{}</c>, <c>[]</c> and <c>()</c> depth while
|
|
/// correctly skipping over the C# lexical constructs in which a delimiter is
|
|
/// inert: line/block comments, regular string literals (with <c>\</c> escapes),
|
|
/// verbatim strings (<c>@"..."</c>, where <c>""</c> escapes a quote and <c>\</c>
|
|
/// is literal), interpolated strings (<c>$"..."</c> / <c>$@"..."</c> — the holes
|
|
/// <c>{...}</c> are code and <c>{{</c>/<c>}}</c> are escaped braces), raw string
|
|
/// literals (<c>"""..."""</c>), and char literals (<c>'}'</c>).
|
|
/// </para>
|
|
///
|
|
/// <para>
|
|
/// It is intentionally conservative: when the real Roslyn-based compiler is
|
|
/// wired in (see <see cref="ScriptCompiler"/>) this hand-rolled scan should be
|
|
/// replaced by <c>CSharpSyntaxTree.ParseText</c> diagnostics. Until then this
|
|
/// scanner removes the false positives that a naive character count produced
|
|
/// for valid scripts containing a delimiter inside a string or comment.
|
|
/// </para>
|
|
/// </summary>
|
|
internal static class CSharpDelimiterScanner
|
|
{
|
|
/// <summary>The kind of delimiter mismatch found, if any.</summary>
|
|
internal enum Mismatch
|
|
{
|
|
None,
|
|
UnexpectedCloseBrace,
|
|
UnexpectedCloseBracket,
|
|
UnexpectedCloseParen,
|
|
UnclosedBrace,
|
|
UnclosedBracket,
|
|
UnclosedParen,
|
|
UnclosedBlockComment,
|
|
UnterminatedString,
|
|
UnterminatedChar,
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns true when <paramref name="pattern"/> occurs in a <b>code</b>
|
|
/// region of <paramref name="code"/> — i.e. not wholly inside a string
|
|
/// literal, char literal, or comment. Used by the interim forbidden-API
|
|
/// scan so that the inert text <c>System.IO.</c> in a comment or string
|
|
/// literal is not flagged as a forbidden API call (TemplateEngine-006).
|
|
///
|
|
/// <para>
|
|
/// This removes the false-positive half of the substring scan. It does
|
|
/// <b>not</b> close the bypass half: namespace aliases, <c>using static</c>,
|
|
/// and <c>global::</c>-qualified references still evade a pure text match.
|
|
/// Authoritative forbidden-API enforcement requires Roslyn semantic symbol
|
|
/// analysis and is deferred to the real script compiler / Site Runtime
|
|
/// sandbox; this check is advisory only.
|
|
/// </para>
|
|
/// </summary>
|
|
internal static bool ContainsInCode(string code, string pattern)
|
|
{
|
|
if (string.IsNullOrEmpty(pattern))
|
|
return false;
|
|
|
|
// Blank out every string/char-literal/comment span, then do an ordinary
|
|
// substring search over what remains (the code regions).
|
|
var codeOnly = BlankNonCodeSpans(code);
|
|
return codeOnly.Contains(pattern, StringComparison.Ordinal);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Replaces the content of every comment, string literal, and char literal
|
|
/// with spaces (newlines preserved), leaving only code regions intact.
|
|
/// Delimiter characters themselves are also blanked so a pattern cannot
|
|
/// straddle a literal boundary.
|
|
/// </summary>
|
|
private static string BlankNonCodeSpans(string code)
|
|
{
|
|
var buffer = code.ToCharArray();
|
|
int n = code.Length;
|
|
int i = 0;
|
|
|
|
void Blank(int from, int to)
|
|
{
|
|
for (int k = from; k < to && k < n; k++)
|
|
if (buffer[k] != '\n' && buffer[k] != '\r')
|
|
buffer[k] = ' ';
|
|
}
|
|
|
|
while (i < n)
|
|
{
|
|
char c = code[i];
|
|
char next = i + 1 < n ? code[i + 1] : '\0';
|
|
int start = i;
|
|
|
|
if (c == '/' && next == '/')
|
|
{
|
|
i += 2;
|
|
while (i < n && code[i] != '\n') i++;
|
|
Blank(start, i);
|
|
continue;
|
|
}
|
|
if (c == '/' && next == '*')
|
|
{
|
|
i += 2;
|
|
while (i < n && !(code[i] == '*' && i + 1 < n && code[i + 1] == '/')) i++;
|
|
if (i < n) i += 2;
|
|
Blank(start, i);
|
|
continue;
|
|
}
|
|
if (c == '"' && next == '"' && i + 2 < n && code[i + 2] == '"')
|
|
{
|
|
SkipRawString(code, ref i);
|
|
Blank(start, i);
|
|
continue;
|
|
}
|
|
if (c == '$')
|
|
{
|
|
int j = i + 1;
|
|
bool verbatim = false;
|
|
if (j < n && code[j] == '@') { verbatim = true; j++; }
|
|
if (j < n && code[j] == '"')
|
|
{
|
|
i = j;
|
|
SkipInterpolatedString(code, ref i, verbatim);
|
|
Blank(start, i);
|
|
continue;
|
|
}
|
|
}
|
|
if (c == '@' && next == '"')
|
|
{
|
|
i++;
|
|
SkipVerbatimString(code, ref i);
|
|
Blank(start, i);
|
|
continue;
|
|
}
|
|
if (c == '"')
|
|
{
|
|
SkipRegularString(code, ref i);
|
|
Blank(start, i);
|
|
continue;
|
|
}
|
|
if (c == '\'')
|
|
{
|
|
SkipCharLiteral(code, ref i);
|
|
Blank(start, i);
|
|
continue;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
return new string(buffer);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Walks <paramref name="code"/> once and reports the first structural
|
|
/// delimiter problem, or <see cref="Mismatch.None"/> when the source is
|
|
/// balanced. Delimiters inside comments, strings, and char literals are
|
|
/// ignored.
|
|
/// </summary>
|
|
internal static Mismatch Scan(string code)
|
|
{
|
|
int brace = 0, bracket = 0, paren = 0;
|
|
int i = 0;
|
|
int n = code.Length;
|
|
|
|
while (i < n)
|
|
{
|
|
char c = code[i];
|
|
char next = i + 1 < n ? code[i + 1] : '\0';
|
|
|
|
// Line comment.
|
|
if (c == '/' && next == '/')
|
|
{
|
|
i += 2;
|
|
while (i < n && code[i] != '\n') i++;
|
|
continue;
|
|
}
|
|
|
|
// Block comment.
|
|
if (c == '/' && next == '*')
|
|
{
|
|
i += 2;
|
|
bool closed = false;
|
|
while (i < n)
|
|
{
|
|
if (code[i] == '*' && i + 1 < n && code[i + 1] == '/')
|
|
{
|
|
i += 2;
|
|
closed = true;
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
if (!closed) return Mismatch.UnclosedBlockComment;
|
|
continue;
|
|
}
|
|
|
|
// Raw string literal: three or more consecutive quotes open it; the
|
|
// same number of quotes closes it. Detected before $/@-prefixed and
|
|
// plain strings.
|
|
if (c == '"' && next == '"' && i + 2 < n && code[i + 2] == '"')
|
|
{
|
|
if (!SkipRawString(code, ref i)) return Mismatch.UnterminatedString;
|
|
continue;
|
|
}
|
|
|
|
// Interpolated string ($"..." or $@"..." / @$"...").
|
|
if (c == '$')
|
|
{
|
|
int j = i + 1;
|
|
bool verbatim = false;
|
|
if (j < n && code[j] == '@') { verbatim = true; j++; }
|
|
if (j < n && code[j] == '"')
|
|
{
|
|
i = j;
|
|
if (!SkipInterpolatedString(code, ref i, verbatim)) return Mismatch.UnterminatedString;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Verbatim string (@"...").
|
|
if (c == '@' && next == '"')
|
|
{
|
|
i++; // now on the opening quote
|
|
if (!SkipVerbatimString(code, ref i)) return Mismatch.UnterminatedString;
|
|
continue;
|
|
}
|
|
|
|
// Regular string literal.
|
|
if (c == '"')
|
|
{
|
|
if (!SkipRegularString(code, ref i)) return Mismatch.UnterminatedString;
|
|
continue;
|
|
}
|
|
|
|
// Char literal.
|
|
if (c == '\'')
|
|
{
|
|
if (!SkipCharLiteral(code, ref i)) return Mismatch.UnterminatedChar;
|
|
continue;
|
|
}
|
|
|
|
switch (c)
|
|
{
|
|
case '{': brace++; break;
|
|
case '}':
|
|
brace--;
|
|
if (brace < 0) return Mismatch.UnexpectedCloseBrace;
|
|
break;
|
|
case '[': bracket++; break;
|
|
case ']':
|
|
bracket--;
|
|
if (bracket < 0) return Mismatch.UnexpectedCloseBracket;
|
|
break;
|
|
case '(': paren++; break;
|
|
case ')':
|
|
paren--;
|
|
if (paren < 0) return Mismatch.UnexpectedCloseParen;
|
|
break;
|
|
}
|
|
|
|
i++;
|
|
}
|
|
|
|
if (brace != 0) return Mismatch.UnclosedBrace;
|
|
if (bracket != 0) return Mismatch.UnclosedBracket;
|
|
if (paren != 0) return Mismatch.UnclosedParen;
|
|
return Mismatch.None;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Advances <paramref name="i"/> past a regular <c>"..."</c> string literal.
|
|
/// On entry <c>code[i] == '"'</c>. Returns false if the string is unterminated.
|
|
/// </summary>
|
|
private static bool SkipRegularString(string code, ref int i)
|
|
{
|
|
int n = code.Length;
|
|
i++; // past opening quote
|
|
while (i < n)
|
|
{
|
|
char c = code[i];
|
|
if (c == '\\') { i += 2; continue; } // escape — skip next char
|
|
if (c == '\n') return false; // unterminated (no multi-line)
|
|
if (c == '"') { i++; return true; }
|
|
i++;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Advances past a verbatim <c>@"..."</c> string. On entry <c>code[i] == '"'</c>.
|
|
/// Inside, <c>\</c> is literal and <c>""</c> is an escaped quote.
|
|
/// </summary>
|
|
private static bool SkipVerbatimString(string code, ref int i)
|
|
{
|
|
int n = code.Length;
|
|
i++; // past opening quote
|
|
while (i < n)
|
|
{
|
|
if (code[i] == '"')
|
|
{
|
|
if (i + 1 < n && code[i + 1] == '"') { i += 2; continue; } // escaped quote
|
|
i++;
|
|
return true;
|
|
}
|
|
i++;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Advances past an interpolated string. <paramref name="verbatim"/> selects
|
|
/// the <c>$@"..."</c> escaping rules. Interpolation holes <c>{...}</c> are
|
|
/// skipped over (their braces are code, not literal text); <c>{{</c>/<c>}}</c>
|
|
/// are escaped braces. On entry <c>code[i] == '"'</c>.
|
|
/// </summary>
|
|
private static bool SkipInterpolatedString(string code, ref int i, bool verbatim)
|
|
{
|
|
int n = code.Length;
|
|
i++; // past opening quote
|
|
while (i < n)
|
|
{
|
|
char c = code[i];
|
|
|
|
if (!verbatim && c == '\\') { i += 2; continue; }
|
|
|
|
if (c == '"')
|
|
{
|
|
if (verbatim && i + 1 < n && code[i + 1] == '"') { i += 2; continue; }
|
|
i++;
|
|
return true;
|
|
}
|
|
|
|
if (c == '{')
|
|
{
|
|
if (i + 1 < n && code[i + 1] == '{') { i += 2; continue; } // escaped brace
|
|
// Interpolation hole — skip to the matching '}', tracking nested
|
|
// braces so a hole containing an object initializer is handled.
|
|
i++;
|
|
int depth = 1;
|
|
while (i < n && depth > 0)
|
|
{
|
|
char h = code[i];
|
|
if (h == '{') depth++;
|
|
else if (h == '}') depth--;
|
|
else if (h == '"')
|
|
{
|
|
// A nested string inside the hole.
|
|
if (!SkipRegularString(code, ref i)) return false;
|
|
continue;
|
|
}
|
|
i++;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (c == '}' && i + 1 < n && code[i + 1] == '}') { i += 2; continue; } // escaped brace
|
|
|
|
i++;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Advances past a raw string literal <c>"""..."""</c> (C# 11). On entry
|
|
/// <c>code[i]</c> is the first of three or more opening quotes.
|
|
/// </summary>
|
|
private static bool SkipRawString(string code, ref int i)
|
|
{
|
|
int n = code.Length;
|
|
int openCount = 0;
|
|
while (i < n && code[i] == '"') { openCount++; i++; }
|
|
|
|
// Look for a run of the same number of quotes.
|
|
while (i < n)
|
|
{
|
|
if (code[i] == '"')
|
|
{
|
|
int closeCount = 0;
|
|
int start = i;
|
|
while (i < n && code[i] == '"') { closeCount++; i++; }
|
|
if (closeCount >= openCount) return true;
|
|
// Fewer quotes than the opener — they are literal content; keep scanning.
|
|
if (closeCount == 0) i = start + 1;
|
|
}
|
|
else
|
|
{
|
|
i++;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Advances past a <c>'x'</c> char literal. On entry <c>code[i] == '\''</c>.
|
|
/// </summary>
|
|
private static bool SkipCharLiteral(string code, ref int i)
|
|
{
|
|
int n = code.Length;
|
|
i++; // past opening quote
|
|
while (i < n)
|
|
{
|
|
char c = code[i];
|
|
if (c == '\\') { i += 2; continue; }
|
|
if (c == '\n') return false;
|
|
if (c == '\'') { i++; return true; }
|
|
i++;
|
|
}
|
|
return false;
|
|
}
|
|
}
|