feat(transport): pure Myers LineDiffer helper (M8 A3, T20)

This commit is contained in:
Joseph Doherty
2026-06-18 05:41:10 -04:00
parent 4f1925870f
commit c1282e06a2
2 changed files with 491 additions and 0 deletions
@@ -0,0 +1,299 @@
namespace ZB.MOM.WW.ScadaBridge.Transport.Import;
/// <summary>
/// The kind of edit a <see cref="LineDiffLine"/> represents within a per-line
/// diff: an unchanged line common to both sides (<see cref="Context"/>), a line
/// present only in the new text (<see cref="Add"/>), or a line present only in
/// the old text (<see cref="Remove"/>).
/// </summary>
public enum LineDiffOp
{
/// <summary>Unchanged line, present in both old and new text.</summary>
Context,
/// <summary>Line present only in the new text.</summary>
Add,
/// <summary>Line present only in the old text.</summary>
Remove,
}
/// <summary>
/// One line of a per-line diff. <see cref="OldLineNo"/> and <see cref="NewLineNo"/>
/// are 1-based. <see cref="LineDiffOp.Context"/> lines carry both; <see cref="LineDiffOp.Add"/>
/// lines carry only <see cref="NewLineNo"/>; <see cref="LineDiffOp.Remove"/> lines carry
/// only <see cref="OldLineNo"/>.
/// </summary>
public sealed record LineDiffLine(LineDiffOp Op, string Text, int? OldLineNo, int? NewLineNo);
/// <summary>
/// The result of a per-line diff. <see cref="Lines"/> is in source order and may
/// be truncated (see <see cref="Truncated"/>); <see cref="AddedCount"/> and
/// <see cref="RemovedCount"/> always report the full diff totals regardless of
/// truncation.
/// </summary>
public sealed record LineDiffResult(
IReadOnlyList<LineDiffLine> Lines,
bool Truncated,
int AddedCount,
int RemovedCount);
/// <summary>
/// Pure, dependency-free per-line text diff using the Myers O(ND) shortest-edit-script
/// algorithm. Line endings are normalized (<c>\r\n</c> and lone <c>\r</c> become <c>\n</c>)
/// before splitting, so otherwise-identical text with mixed endings produces no changes.
/// </summary>
/// <remarks>
/// Hand-rolled deliberately — the project forbids third-party diff libraries (mirroring
/// the custom-SVG <c>KpiTrendChart</c> precedent). The classic Myers algorithm computes a
/// minimal edit script; we record the search frontier per edit-distance <c>d</c> and
/// backtrack to recover that script, then emit lines in source order.
/// </remarks>
public static class LineDiffer
{
/// <summary>
/// Computes a minimal per-line diff between <paramref name="oldText"/> and
/// <paramref name="newText"/> (either may be <see langword="null"/>, treated as empty).
/// </summary>
/// <param name="oldText">The original text, or <see langword="null"/> (empty).</param>
/// <param name="newText">The new text, or <see langword="null"/> (empty).</param>
/// <param name="maxLines">
/// Maximum number of entries to retain in <see cref="LineDiffResult.Lines"/>. If the full
/// diff would emit more lines, <see cref="LineDiffResult.Lines"/> is truncated to this many
/// entries and <see cref="LineDiffResult.Truncated"/> is set; the add/remove totals are
/// unaffected. Values &lt;= 0 yield an empty <see cref="LineDiffResult.Lines"/>.
/// </param>
public static LineDiffResult Diff(string? oldText, string? newText, int maxLines = 400)
{
string[] oldLines = SplitLines(oldText);
string[] newLines = SplitLines(newText);
IReadOnlyList<LineDiffLine> full = BuildDiff(oldLines, newLines, out int added, out int removed);
bool truncated = full.Count > maxLines;
IReadOnlyList<LineDiffLine> lines;
if (truncated)
{
int take = maxLines > 0 ? maxLines : 0;
var capped = new LineDiffLine[take];
for (int i = 0; i < take; i++)
{
capped[i] = full[i];
}
lines = capped;
}
else
{
lines = full;
}
return new LineDiffResult(lines, truncated, added, removed);
}
/// <summary>
/// Normalizes line endings (<c>\r\n</c> and lone <c>\r</c> to <c>\n</c>) and splits on <c>\n</c>.
/// A <see langword="null"/> or empty input yields an empty array (no lines), so the diff of two
/// empties is empty rather than a single empty-string match.
/// </summary>
private static string[] SplitLines(string? text)
{
if (string.IsNullOrEmpty(text))
{
return [];
}
string normalized = text.Replace("\r\n", "\n").Replace('\r', '\n');
return normalized.Split('\n');
}
/// <summary>
/// Runs Myers O(ND), backtracks the recorded frontier into a source-ordered edit script,
/// and reports the full add/remove totals via the out parameters.
/// </summary>
private static IReadOnlyList<LineDiffLine> BuildDiff(
string[] a,
string[] b,
out int added,
out int removed)
{
int n = a.Length;
int m = b.Length;
var result = new List<LineDiffLine>(n + m);
added = 0;
removed = 0;
// Fast paths: one side empty.
if (n == 0 && m == 0)
{
return result;
}
if (n == 0)
{
for (int j = 0; j < m; j++)
{
result.Add(new LineDiffLine(LineDiffOp.Add, b[j], OldLineNo: null, NewLineNo: j + 1));
}
added = m;
return result;
}
if (m == 0)
{
for (int i = 0; i < n; i++)
{
result.Add(new LineDiffLine(LineDiffOp.Remove, a[i], OldLineNo: i + 1, NewLineNo: null));
}
removed = n;
return result;
}
// Myers shortest-edit-script: record the V frontier at each edit distance d so we can
// backtrack the path. The diagonal index k ranges over [-d, d]; we offset by `max` to
// index a non-negative array.
int max = n + m;
int offset = max;
var trace = new List<int[]>(max + 1);
var v = new int[(2 * max) + 1];
int foundD = -1;
for (int d = 0; d <= max; d++)
{
// Snapshot V before this round's overwrites so backtracking is faithful.
trace.Add((int[])v.Clone());
for (int k = -d; k <= d; k += 2)
{
int x;
if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1]))
{
// Down move (insertion): take the path from k+1.
x = v[offset + k + 1];
}
else
{
// Right move (deletion): extend the path from k-1.
x = v[offset + k - 1] + 1;
}
int y = x - k;
// Follow the diagonal (matching lines) as far as possible.
while (x < n && y < m && a[x] == b[y])
{
x++;
y++;
}
v[offset + k] = x;
if (x >= n && y >= m)
{
foundD = d;
break;
}
}
if (foundD >= 0)
{
break;
}
}
// Backtrack the recorded frontier to produce the edit script in reverse, then reverse it.
var reversed = Backtrack(a, b, trace, foundD, offset, n, m);
foreach (LineDiffLine line in reversed)
{
result.Add(line);
if (line.Op == LineDiffOp.Add)
{
added++;
}
else if (line.Op == LineDiffOp.Remove)
{
removed++;
}
}
return result;
}
/// <summary>
/// Walks the per-distance V snapshots backwards from the end point, emitting matched
/// (context), removed, and added lines. The walk produces lines in reverse source order;
/// the returned list is reversed back into forward source order.
/// </summary>
private static List<LineDiffLine> Backtrack(
string[] a,
string[] b,
List<int[]> trace,
int foundD,
int offset,
int n,
int m)
{
var reverse = new List<LineDiffLine>(n + m);
int x = n;
int y = m;
for (int d = foundD; d > 0; d--)
{
int[] v = trace[d];
int k = x - y;
int prevK;
if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1]))
{
// Came from a down move (insertion).
prevK = k + 1;
}
else
{
// Came from a right move (deletion).
prevK = k - 1;
}
int prevX = v[offset + prevK];
int prevY = prevX - prevK;
// Emit the diagonal (matched lines) that follows the snake's start.
while (x > prevX && y > prevY)
{
x--;
y--;
reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1));
}
if (d > 0)
{
if (x == prevX)
{
// Insertion: a new-side line (b[prevY]) with no old counterpart.
y--;
reverse.Add(new LineDiffLine(LineDiffOp.Add, b[y], OldLineNo: null, NewLineNo: y + 1));
}
else
{
// Deletion: an old-side line (a[prevX]) with no new counterpart.
x--;
reverse.Add(new LineDiffLine(LineDiffOp.Remove, a[x], OldLineNo: x + 1, NewLineNo: null));
}
}
}
// d == 0 leg: any remaining leading diagonal is all matched context.
while (x > 0 && y > 0)
{
x--;
y--;
reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1));
}
reverse.Reverse();
return reverse;
}
}