feat(commons): quote-aware OverrideCsvParser (T16 CSV)
This commit is contained in:
@@ -0,0 +1,197 @@
|
||||
namespace ZB.MOM.WW.ScadaBridge.Commons.Types;
|
||||
|
||||
/// <summary>
|
||||
/// One parsed instance-attribute-override CSV row: an attribute name plus its
|
||||
/// (canonical-string) value and optional list element type. A null
|
||||
/// <see cref="Value"/> means "clear the override". <see cref="LineNumber"/> is the
|
||||
/// 1-based source line (the header is line 1) so downstream errors can point back
|
||||
/// at the operator's file.
|
||||
/// </summary>
|
||||
public sealed record OverrideCsvRow(string AttributeName, string? Value, string? ElementType, int LineNumber);
|
||||
|
||||
/// <summary>
|
||||
/// Outcome of parsing an override CSV: the successfully-parsed <see cref="Rows"/>
|
||||
/// plus per-line <see cref="Errors"/>. Parsing never throws — malformed rows are
|
||||
/// reported and excluded, valid rows still flow through. Downstream callers
|
||||
/// validate names/types against the instance schema; this parser is purely
|
||||
/// syntactic.
|
||||
/// </summary>
|
||||
public sealed record OverrideCsvParseResult(IReadOnlyList<OverrideCsvRow> Rows, IReadOnlyList<string> Errors);
|
||||
|
||||
/// <summary>
|
||||
/// Pure, dependency-free, quote-aware parser turning instance-attribute-override
|
||||
/// CSV text into structured rows plus per-line errors. Callers supply the text
|
||||
/// (no file I/O). The header row is required and case-insensitive
|
||||
/// (<c>AttributeName,Value,ElementType</c>); the <c>ElementType</c> column is
|
||||
/// optional. Fields follow RFC-4180 quoting: a double-quoted field may embed
|
||||
/// commas and doubled quotes (<c>""</c> → <c>"</c>); only unquoted fields are
|
||||
/// whitespace-trimmed.
|
||||
/// </summary>
|
||||
public static class OverrideCsvParser
|
||||
{
|
||||
private const string HeaderError =
|
||||
"Missing or invalid header row. Expected 'AttributeName,Value,ElementType' " +
|
||||
"(ElementType column optional).";
|
||||
|
||||
/// <summary>
|
||||
/// Parses override CSV <paramref name="csvText"/>. Returns parsed rows and any
|
||||
/// per-line errors; never throws. On a missing/unrecognized header returns zero
|
||||
/// rows and a single header error.
|
||||
/// </summary>
|
||||
public static OverrideCsvParseResult Parse(string csvText)
|
||||
{
|
||||
var rows = new List<OverrideCsvRow>();
|
||||
var errors = new List<string>();
|
||||
|
||||
// Split into physical lines; \r\n and \r are normalized to \n boundaries.
|
||||
var lines = (csvText ?? string.Empty).Replace("\r\n", "\n").Replace('\r', '\n').Split('\n');
|
||||
|
||||
var headerSeen = false;
|
||||
var hasElementTypeColumn = false;
|
||||
|
||||
for (var i = 0; i < lines.Length; i++)
|
||||
{
|
||||
var lineNumber = i + 1;
|
||||
var rawLine = lines[i];
|
||||
|
||||
// Skip fully-blank lines (whitespace-only included) without error.
|
||||
if (string.IsNullOrWhiteSpace(rawLine))
|
||||
continue;
|
||||
|
||||
var fields = SplitFields(rawLine);
|
||||
|
||||
if (!headerSeen)
|
||||
{
|
||||
if (!TryMatchHeader(fields, out hasElementTypeColumn))
|
||||
{
|
||||
errors.Add(HeaderError);
|
||||
return new OverrideCsvParseResult(rows, errors);
|
||||
}
|
||||
|
||||
headerSeen = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
var expectedColumns = hasElementTypeColumn ? 3 : 2;
|
||||
if (fields.Count != expectedColumns)
|
||||
{
|
||||
errors.Add(
|
||||
$"Line {lineNumber}: expected {expectedColumns} columns but found {fields.Count}.");
|
||||
continue;
|
||||
}
|
||||
|
||||
var attributeName = fields[0];
|
||||
if (string.IsNullOrWhiteSpace(attributeName))
|
||||
{
|
||||
errors.Add($"Line {lineNumber}: AttributeName must not be blank.");
|
||||
continue;
|
||||
}
|
||||
|
||||
var value = NullIfEmpty(fields[1]);
|
||||
var elementType = hasElementTypeColumn ? NullIfEmpty(fields[2]) : null;
|
||||
|
||||
rows.Add(new OverrideCsvRow(attributeName, value, elementType, lineNumber));
|
||||
}
|
||||
|
||||
if (!headerSeen)
|
||||
errors.Add(HeaderError);
|
||||
|
||||
return new OverrideCsvParseResult(rows, errors);
|
||||
}
|
||||
|
||||
/// <summary>Matches the required header (case-insensitive); reports whether the optional ElementType column is present.</summary>
|
||||
private static bool TryMatchHeader(IReadOnlyList<string> fields, out bool hasElementTypeColumn)
|
||||
{
|
||||
hasElementTypeColumn = false;
|
||||
|
||||
var matchesTwoColumn =
|
||||
fields.Count == 2 &&
|
||||
HeaderEquals(fields[0], "AttributeName") &&
|
||||
HeaderEquals(fields[1], "Value");
|
||||
|
||||
var matchesThreeColumn =
|
||||
fields.Count == 3 &&
|
||||
HeaderEquals(fields[0], "AttributeName") &&
|
||||
HeaderEquals(fields[1], "Value") &&
|
||||
HeaderEquals(fields[2], "ElementType");
|
||||
|
||||
if (matchesThreeColumn)
|
||||
{
|
||||
hasElementTypeColumn = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return matchesTwoColumn;
|
||||
}
|
||||
|
||||
private static bool HeaderEquals(string field, string expected) =>
|
||||
string.Equals(field, expected, StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
private static string? NullIfEmpty(string field) => field.Length == 0 ? null : field;
|
||||
|
||||
/// <summary>
|
||||
/// RFC-4180-ish field splitter for a single physical line: quoted fields may
|
||||
/// embed commas and doubled quotes (<c>""</c> → <c>"</c>); unquoted fields are
|
||||
/// whitespace-trimmed.
|
||||
/// </summary>
|
||||
private static List<string> SplitFields(string line)
|
||||
{
|
||||
var fields = new List<string>();
|
||||
var field = new System.Text.StringBuilder();
|
||||
var inQuotes = false;
|
||||
var quoted = false; // this field was (at least partly) quoted → preserve whitespace
|
||||
|
||||
for (var i = 0; i < line.Length; i++)
|
||||
{
|
||||
var c = line[i];
|
||||
|
||||
if (inQuotes)
|
||||
{
|
||||
if (c == '"')
|
||||
{
|
||||
// Doubled quote inside a quoted field → a single literal quote.
|
||||
if (i + 1 < line.Length && line[i + 1] == '"')
|
||||
{
|
||||
field.Append('"');
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
inQuotes = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
field.Append(c);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '"':
|
||||
inQuotes = true;
|
||||
quoted = true;
|
||||
break;
|
||||
case ',':
|
||||
fields.Add(Finalize(field, quoted));
|
||||
field.Clear();
|
||||
quoted = false;
|
||||
break;
|
||||
default:
|
||||
field.Append(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fields.Add(Finalize(field, quoted));
|
||||
return fields;
|
||||
}
|
||||
|
||||
private static string Finalize(System.Text.StringBuilder field, bool quoted)
|
||||
{
|
||||
var text = field.ToString();
|
||||
return quoted ? text : text.Trim(); // only unquoted whitespace is trimmed
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user