namespace ScadaLink.TemplateEngine.Validation; /// /// String/comment-aware scanner for the balanced-delimiter ("does it look like /// valid C#") checks used by and /// SharedScriptService.ValidateSyntax. /// /// /// This is not a compiler. It is an interim structural check that walks /// the source once and tracks {}, [] and () depth while /// correctly skipping over the C# lexical constructs in which a delimiter is /// inert: line/block comments, regular string literals (with \ escapes), /// verbatim strings (@"...", where "" escapes a quote and \ /// is literal), interpolated strings ($"..." / $@"..." — the holes /// {...} are code and {{/}} are escaped braces), raw string /// literals ("""..."""), and char literals ('}'). /// /// /// /// It is intentionally conservative: when the real Roslyn-based compiler is /// wired in (see ) this hand-rolled scan should be /// replaced by CSharpSyntaxTree.ParseText diagnostics. Until then this /// scanner removes the false positives that a naive character count produced /// for valid scripts containing a delimiter inside a string or comment. /// /// internal static class CSharpDelimiterScanner { /// The kind of delimiter mismatch found, if any. internal enum Mismatch { None, UnexpectedCloseBrace, UnexpectedCloseBracket, UnexpectedCloseParen, UnclosedBrace, UnclosedBracket, UnclosedParen, UnclosedBlockComment, UnterminatedString, UnterminatedChar, } /// /// Returns true when occurs in a code /// region of — i.e. not wholly inside a string /// literal, char literal, or comment. Used by the interim forbidden-API /// scan so that the inert text System.IO. in a comment or string /// literal is not flagged as a forbidden API call (TemplateEngine-006). /// /// /// This removes the false-positive half of the substring scan. It does /// not close the bypass half: namespace aliases, using static, /// and global::-qualified references still evade a pure text match. /// Authoritative forbidden-API enforcement requires Roslyn semantic symbol /// analysis and is deferred to the real script compiler / Site Runtime /// sandbox; this check is advisory only. /// /// internal static bool ContainsInCode(string code, string pattern) { if (string.IsNullOrEmpty(pattern)) return false; // Blank out every string/char-literal/comment span, then do an ordinary // substring search over what remains (the code regions). var codeOnly = BlankNonCodeSpans(code); return codeOnly.Contains(pattern, StringComparison.Ordinal); } /// /// Replaces the content of every comment, string literal, and char literal /// with spaces (newlines preserved), leaving only code regions intact. /// Delimiter characters themselves are also blanked so a pattern cannot /// straddle a literal boundary. /// private static string BlankNonCodeSpans(string code) { var buffer = code.ToCharArray(); int n = code.Length; int i = 0; void Blank(int from, int to) { for (int k = from; k < to && k < n; k++) if (buffer[k] != '\n' && buffer[k] != '\r') buffer[k] = ' '; } while (i < n) { char c = code[i]; char next = i + 1 < n ? code[i + 1] : '\0'; int start = i; if (c == '/' && next == '/') { i += 2; while (i < n && code[i] != '\n') i++; Blank(start, i); continue; } if (c == '/' && next == '*') { i += 2; while (i < n && !(code[i] == '*' && i + 1 < n && code[i + 1] == '/')) i++; if (i < n) i += 2; Blank(start, i); continue; } if (c == '"' && next == '"' && i + 2 < n && code[i + 2] == '"') { SkipRawString(code, ref i); Blank(start, i); continue; } if (c == '$') { int j = i + 1; bool verbatim = false; if (j < n && code[j] == '@') { verbatim = true; j++; } if (j < n && code[j] == '"') { i = j; SkipInterpolatedString(code, ref i, verbatim); Blank(start, i); continue; } } if (c == '@' && next == '"') { i++; SkipVerbatimString(code, ref i); Blank(start, i); continue; } if (c == '"') { SkipRegularString(code, ref i); Blank(start, i); continue; } if (c == '\'') { SkipCharLiteral(code, ref i); Blank(start, i); continue; } i++; } return new string(buffer); } /// /// Walks once and reports the first structural /// delimiter problem, or when the source is /// balanced. Delimiters inside comments, strings, and char literals are /// ignored. /// internal static Mismatch Scan(string code) { int brace = 0, bracket = 0, paren = 0; int i = 0; int n = code.Length; while (i < n) { char c = code[i]; char next = i + 1 < n ? code[i + 1] : '\0'; // Line comment. if (c == '/' && next == '/') { i += 2; while (i < n && code[i] != '\n') i++; continue; } // Block comment. if (c == '/' && next == '*') { i += 2; bool closed = false; while (i < n) { if (code[i] == '*' && i + 1 < n && code[i + 1] == '/') { i += 2; closed = true; break; } i++; } if (!closed) return Mismatch.UnclosedBlockComment; continue; } // Raw string literal: three or more consecutive quotes open it; the // same number of quotes closes it. Detected before $/@-prefixed and // plain strings. if (c == '"' && next == '"' && i + 2 < n && code[i + 2] == '"') { if (!SkipRawString(code, ref i)) return Mismatch.UnterminatedString; continue; } // Interpolated string ($"..." or $@"..." / @$"..."). if (c == '$') { int j = i + 1; bool verbatim = false; if (j < n && code[j] == '@') { verbatim = true; j++; } if (j < n && code[j] == '"') { i = j; if (!SkipInterpolatedString(code, ref i, verbatim)) return Mismatch.UnterminatedString; continue; } } // Verbatim string (@"..."). if (c == '@' && next == '"') { i++; // now on the opening quote if (!SkipVerbatimString(code, ref i)) return Mismatch.UnterminatedString; continue; } // Regular string literal. if (c == '"') { if (!SkipRegularString(code, ref i)) return Mismatch.UnterminatedString; continue; } // Char literal. if (c == '\'') { if (!SkipCharLiteral(code, ref i)) return Mismatch.UnterminatedChar; continue; } switch (c) { case '{': brace++; break; case '}': brace--; if (brace < 0) return Mismatch.UnexpectedCloseBrace; break; case '[': bracket++; break; case ']': bracket--; if (bracket < 0) return Mismatch.UnexpectedCloseBracket; break; case '(': paren++; break; case ')': paren--; if (paren < 0) return Mismatch.UnexpectedCloseParen; break; } i++; } if (brace != 0) return Mismatch.UnclosedBrace; if (bracket != 0) return Mismatch.UnclosedBracket; if (paren != 0) return Mismatch.UnclosedParen; return Mismatch.None; } /// /// Advances past a regular "..." string literal. /// On entry code[i] == '"'. Returns false if the string is unterminated. /// private static bool SkipRegularString(string code, ref int i) { int n = code.Length; i++; // past opening quote while (i < n) { char c = code[i]; if (c == '\\') { i += 2; continue; } // escape — skip next char if (c == '\n') return false; // unterminated (no multi-line) if (c == '"') { i++; return true; } i++; } return false; } /// /// Advances past a verbatim @"..." string. On entry code[i] == '"'. /// Inside, \ is literal and "" is an escaped quote. /// private static bool SkipVerbatimString(string code, ref int i) { int n = code.Length; i++; // past opening quote while (i < n) { if (code[i] == '"') { if (i + 1 < n && code[i + 1] == '"') { i += 2; continue; } // escaped quote i++; return true; } i++; } return false; } /// /// Advances past an interpolated string. selects /// the $@"..." escaping rules. Interpolation holes {...} are /// skipped over (their braces are code, not literal text); {{/}} /// are escaped braces. On entry code[i] == '"'. /// private static bool SkipInterpolatedString(string code, ref int i, bool verbatim) { int n = code.Length; i++; // past opening quote while (i < n) { char c = code[i]; if (!verbatim && c == '\\') { i += 2; continue; } if (c == '"') { if (verbatim && i + 1 < n && code[i + 1] == '"') { i += 2; continue; } i++; return true; } if (c == '{') { if (i + 1 < n && code[i + 1] == '{') { i += 2; continue; } // escaped brace // Interpolation hole — skip to the matching '}', tracking nested // braces so a hole containing an object initializer is handled. i++; int depth = 1; while (i < n && depth > 0) { char h = code[i]; if (h == '{') depth++; else if (h == '}') depth--; else if (h == '"') { // A nested string inside the hole. if (!SkipRegularString(code, ref i)) return false; continue; } i++; } continue; } if (c == '}' && i + 1 < n && code[i + 1] == '}') { i += 2; continue; } // escaped brace i++; } return false; } /// /// Advances past a raw string literal """...""" (C# 11). On entry /// code[i] is the first of three or more opening quotes. /// private static bool SkipRawString(string code, ref int i) { int n = code.Length; int openCount = 0; while (i < n && code[i] == '"') { openCount++; i++; } // Look for a run of the same number of quotes. while (i < n) { if (code[i] == '"') { int closeCount = 0; int start = i; while (i < n && code[i] == '"') { closeCount++; i++; } if (closeCount >= openCount) return true; // Fewer quotes than the opener — they are literal content; keep scanning. if (closeCount == 0) i = start + 1; } else { i++; } } return false; } /// /// Advances past a 'x' char literal. On entry code[i] == '\''. /// private static bool SkipCharLiteral(string code, ref int i) { int n = code.Length; i++; // past opening quote while (i < n) { char c = code[i]; if (c == '\\') { i += 2; continue; } if (c == '\n') return false; if (c == '\'') { i++; return true; } i++; } return false; } }