fix(commons): OverrideCsvParser — preserve literal mid-field quotes, error on unterminated quoted field (T16 CSV)
This commit is contained in:
@@ -58,7 +58,11 @@ public static class OverrideCsvParser
|
||||
if (string.IsNullOrWhiteSpace(rawLine))
|
||||
continue;
|
||||
|
||||
var fields = SplitFields(rawLine);
|
||||
if (!SplitFields(rawLine, out var fields))
|
||||
{
|
||||
errors.Add($"Line {lineNumber}: Unterminated quoted field.");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!headerSeen)
|
||||
{
|
||||
@@ -130,16 +134,30 @@ public static class OverrideCsvParser
|
||||
private static string? NullIfEmpty(string field) => field.Length == 0 ? null : field;
|
||||
|
||||
/// <summary>
|
||||
/// RFC-4180-ish field splitter for a single physical line: quoted fields may
|
||||
/// embed commas and doubled quotes (<c>""</c> → <c>"</c>); unquoted fields are
|
||||
/// whitespace-trimmed.
|
||||
/// RFC-4180-ish field splitter for a single physical line. Quoting rules:
|
||||
/// <list type="bullet">
|
||||
/// <item>A field is <i>quoted</i> only if its first non-whitespace char is a
|
||||
/// <c>"</c> (leading whitespace before the opening quote is allowed and
|
||||
/// ignored). Inside a quoted field, commas are literal and <c>""</c> is an
|
||||
/// escaped single quote; the closing <c>"</c> must be the last non-whitespace
|
||||
/// char of the field (trailing whitespace after the close is allowed and
|
||||
/// ignored).</item>
|
||||
/// <item>A <c>"</c> appearing anywhere else in an unquoted field (i.e. after
|
||||
/// non-whitespace content) is a <b>literal</b> character and is preserved.</item>
|
||||
/// <item>Unquoted fields are whitespace-trimmed; quoted field values are kept
|
||||
/// verbatim.</item>
|
||||
/// </list>
|
||||
/// Returns <c>true</c> with the split <paramref name="fields"/> on success;
|
||||
/// returns <c>false</c> when a quoted field is opened but never closed before
|
||||
/// end-of-line (the caller emits a per-line "unterminated" error).
|
||||
/// </summary>
|
||||
private static List<string> SplitFields(string line)
|
||||
private static bool SplitFields(string line, out List<string> fields)
|
||||
{
|
||||
var fields = new List<string>();
|
||||
fields = new List<string>();
|
||||
var field = new System.Text.StringBuilder();
|
||||
var inQuotes = false;
|
||||
var quoted = false; // this field was (at least partly) quoted → preserve whitespace
|
||||
var inQuotes = false; // currently between an opening and closing quote
|
||||
var quoted = false; // this field opened with a quote → keep value verbatim
|
||||
var sawContent = false; // any non-whitespace char seen in the current field yet
|
||||
|
||||
for (var i = 0; i < line.Length; i++)
|
||||
{
|
||||
@@ -157,7 +175,7 @@ public static class OverrideCsvParser
|
||||
}
|
||||
else
|
||||
{
|
||||
inQuotes = false;
|
||||
inQuotes = false; // closing quote; only trailing whitespace may follow
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -170,23 +188,40 @@ public static class OverrideCsvParser
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '"':
|
||||
inQuotes = true;
|
||||
quoted = true;
|
||||
break;
|
||||
case ',':
|
||||
fields.Add(Finalize(field, quoted));
|
||||
field.Clear();
|
||||
inQuotes = false;
|
||||
quoted = false;
|
||||
sawContent = false;
|
||||
break;
|
||||
case '"' when !sawContent:
|
||||
// Opening quote: first non-whitespace char of the field. Any
|
||||
// leading whitespace seen so far is part of the (ignored) prefix.
|
||||
field.Clear();
|
||||
inQuotes = true;
|
||||
quoted = true;
|
||||
sawContent = true;
|
||||
break;
|
||||
default:
|
||||
// After a quoted field has closed, only whitespace may appear
|
||||
// before the next delimiter — it is ignored, not appended.
|
||||
if (quoted)
|
||||
break;
|
||||
|
||||
// A '"' here (sawContent already true) falls through as a literal.
|
||||
if (!char.IsWhiteSpace(c))
|
||||
sawContent = true;
|
||||
field.Append(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inQuotes)
|
||||
return false; // opened a quoted field that was never closed
|
||||
|
||||
fields.Add(Finalize(field, quoted));
|
||||
return fields;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static string Finalize(System.Text.StringBuilder field, bool quoted)
|
||||
|
||||
Reference in New Issue
Block a user