feat(transport): pure Myers LineDiffer helper (M8 A3, T20)
This commit is contained in:
@@ -0,0 +1,299 @@
|
||||
namespace ZB.MOM.WW.ScadaBridge.Transport.Import;
|
||||
|
||||
/// <summary>
|
||||
/// The kind of edit a <see cref="LineDiffLine"/> represents within a per-line
|
||||
/// diff: an unchanged line common to both sides (<see cref="Context"/>), a line
|
||||
/// present only in the new text (<see cref="Add"/>), or a line present only in
|
||||
/// the old text (<see cref="Remove"/>).
|
||||
/// </summary>
|
||||
public enum LineDiffOp
|
||||
{
|
||||
/// <summary>Unchanged line, present in both old and new text.</summary>
|
||||
Context,
|
||||
|
||||
/// <summary>Line present only in the new text.</summary>
|
||||
Add,
|
||||
|
||||
/// <summary>Line present only in the old text.</summary>
|
||||
Remove,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One line of a per-line diff. <see cref="OldLineNo"/> and <see cref="NewLineNo"/>
|
||||
/// are 1-based. <see cref="LineDiffOp.Context"/> lines carry both; <see cref="LineDiffOp.Add"/>
|
||||
/// lines carry only <see cref="NewLineNo"/>; <see cref="LineDiffOp.Remove"/> lines carry
|
||||
/// only <see cref="OldLineNo"/>.
|
||||
/// </summary>
|
||||
public sealed record LineDiffLine(LineDiffOp Op, string Text, int? OldLineNo, int? NewLineNo);
|
||||
|
||||
/// <summary>
|
||||
/// The result of a per-line diff. <see cref="Lines"/> is in source order and may
|
||||
/// be truncated (see <see cref="Truncated"/>); <see cref="AddedCount"/> and
|
||||
/// <see cref="RemovedCount"/> always report the full diff totals regardless of
|
||||
/// truncation.
|
||||
/// </summary>
|
||||
public sealed record LineDiffResult(
|
||||
IReadOnlyList<LineDiffLine> Lines,
|
||||
bool Truncated,
|
||||
int AddedCount,
|
||||
int RemovedCount);
|
||||
|
||||
/// <summary>
|
||||
/// Pure, dependency-free per-line text diff using the Myers O(ND) shortest-edit-script
|
||||
/// algorithm. Line endings are normalized (<c>\r\n</c> and lone <c>\r</c> become <c>\n</c>)
|
||||
/// before splitting, so otherwise-identical text with mixed endings produces no changes.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Hand-rolled deliberately — the project forbids third-party diff libraries (mirroring
|
||||
/// the custom-SVG <c>KpiTrendChart</c> precedent). The classic Myers algorithm computes a
|
||||
/// minimal edit script; we record the search frontier per edit-distance <c>d</c> and
|
||||
/// backtrack to recover that script, then emit lines in source order.
|
||||
/// </remarks>
|
||||
public static class LineDiffer
|
||||
{
|
||||
/// <summary>
|
||||
/// Computes a minimal per-line diff between <paramref name="oldText"/> and
|
||||
/// <paramref name="newText"/> (either may be <see langword="null"/>, treated as empty).
|
||||
/// </summary>
|
||||
/// <param name="oldText">The original text, or <see langword="null"/> (empty).</param>
|
||||
/// <param name="newText">The new text, or <see langword="null"/> (empty).</param>
|
||||
/// <param name="maxLines">
|
||||
/// Maximum number of entries to retain in <see cref="LineDiffResult.Lines"/>. If the full
|
||||
/// diff would emit more lines, <see cref="LineDiffResult.Lines"/> is truncated to this many
|
||||
/// entries and <see cref="LineDiffResult.Truncated"/> is set; the add/remove totals are
|
||||
/// unaffected. Values <= 0 yield an empty <see cref="LineDiffResult.Lines"/>.
|
||||
/// </param>
|
||||
public static LineDiffResult Diff(string? oldText, string? newText, int maxLines = 400)
|
||||
{
|
||||
string[] oldLines = SplitLines(oldText);
|
||||
string[] newLines = SplitLines(newText);
|
||||
|
||||
IReadOnlyList<LineDiffLine> full = BuildDiff(oldLines, newLines, out int added, out int removed);
|
||||
|
||||
bool truncated = full.Count > maxLines;
|
||||
IReadOnlyList<LineDiffLine> lines;
|
||||
if (truncated)
|
||||
{
|
||||
int take = maxLines > 0 ? maxLines : 0;
|
||||
var capped = new LineDiffLine[take];
|
||||
for (int i = 0; i < take; i++)
|
||||
{
|
||||
capped[i] = full[i];
|
||||
}
|
||||
|
||||
lines = capped;
|
||||
}
|
||||
else
|
||||
{
|
||||
lines = full;
|
||||
}
|
||||
|
||||
return new LineDiffResult(lines, truncated, added, removed);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes line endings (<c>\r\n</c> and lone <c>\r</c> to <c>\n</c>) and splits on <c>\n</c>.
|
||||
/// A <see langword="null"/> or empty input yields an empty array (no lines), so the diff of two
|
||||
/// empties is empty rather than a single empty-string match.
|
||||
/// </summary>
|
||||
private static string[] SplitLines(string? text)
|
||||
{
|
||||
if (string.IsNullOrEmpty(text))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
string normalized = text.Replace("\r\n", "\n").Replace('\r', '\n');
|
||||
return normalized.Split('\n');
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs Myers O(ND), backtracks the recorded frontier into a source-ordered edit script,
|
||||
/// and reports the full add/remove totals via the out parameters.
|
||||
/// </summary>
|
||||
private static IReadOnlyList<LineDiffLine> BuildDiff(
|
||||
string[] a,
|
||||
string[] b,
|
||||
out int added,
|
||||
out int removed)
|
||||
{
|
||||
int n = a.Length;
|
||||
int m = b.Length;
|
||||
var result = new List<LineDiffLine>(n + m);
|
||||
added = 0;
|
||||
removed = 0;
|
||||
|
||||
// Fast paths: one side empty.
|
||||
if (n == 0 && m == 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
{
|
||||
for (int j = 0; j < m; j++)
|
||||
{
|
||||
result.Add(new LineDiffLine(LineDiffOp.Add, b[j], OldLineNo: null, NewLineNo: j + 1));
|
||||
}
|
||||
|
||||
added = m;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (m == 0)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
result.Add(new LineDiffLine(LineDiffOp.Remove, a[i], OldLineNo: i + 1, NewLineNo: null));
|
||||
}
|
||||
|
||||
removed = n;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Myers shortest-edit-script: record the V frontier at each edit distance d so we can
|
||||
// backtrack the path. The diagonal index k ranges over [-d, d]; we offset by `max` to
|
||||
// index a non-negative array.
|
||||
int max = n + m;
|
||||
int offset = max;
|
||||
var trace = new List<int[]>(max + 1);
|
||||
var v = new int[(2 * max) + 1];
|
||||
|
||||
int foundD = -1;
|
||||
for (int d = 0; d <= max; d++)
|
||||
{
|
||||
// Snapshot V before this round's overwrites so backtracking is faithful.
|
||||
trace.Add((int[])v.Clone());
|
||||
|
||||
for (int k = -d; k <= d; k += 2)
|
||||
{
|
||||
int x;
|
||||
if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1]))
|
||||
{
|
||||
// Down move (insertion): take the path from k+1.
|
||||
x = v[offset + k + 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Right move (deletion): extend the path from k-1.
|
||||
x = v[offset + k - 1] + 1;
|
||||
}
|
||||
|
||||
int y = x - k;
|
||||
|
||||
// Follow the diagonal (matching lines) as far as possible.
|
||||
while (x < n && y < m && a[x] == b[y])
|
||||
{
|
||||
x++;
|
||||
y++;
|
||||
}
|
||||
|
||||
v[offset + k] = x;
|
||||
|
||||
if (x >= n && y >= m)
|
||||
{
|
||||
foundD = d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (foundD >= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Backtrack the recorded frontier to produce the edit script in reverse, then reverse it.
|
||||
var reversed = Backtrack(a, b, trace, foundD, offset, n, m);
|
||||
|
||||
foreach (LineDiffLine line in reversed)
|
||||
{
|
||||
result.Add(line);
|
||||
if (line.Op == LineDiffOp.Add)
|
||||
{
|
||||
added++;
|
||||
}
|
||||
else if (line.Op == LineDiffOp.Remove)
|
||||
{
|
||||
removed++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Walks the per-distance V snapshots backwards from the end point, emitting matched
|
||||
/// (context), removed, and added lines. The walk produces lines in reverse source order;
|
||||
/// the returned list is reversed back into forward source order.
|
||||
/// </summary>
|
||||
private static List<LineDiffLine> Backtrack(
|
||||
string[] a,
|
||||
string[] b,
|
||||
List<int[]> trace,
|
||||
int foundD,
|
||||
int offset,
|
||||
int n,
|
||||
int m)
|
||||
{
|
||||
var reverse = new List<LineDiffLine>(n + m);
|
||||
int x = n;
|
||||
int y = m;
|
||||
|
||||
for (int d = foundD; d > 0; d--)
|
||||
{
|
||||
int[] v = trace[d];
|
||||
int k = x - y;
|
||||
|
||||
int prevK;
|
||||
if (k == -d || (k != d && v[offset + k - 1] < v[offset + k + 1]))
|
||||
{
|
||||
// Came from a down move (insertion).
|
||||
prevK = k + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Came from a right move (deletion).
|
||||
prevK = k - 1;
|
||||
}
|
||||
|
||||
int prevX = v[offset + prevK];
|
||||
int prevY = prevX - prevK;
|
||||
|
||||
// Emit the diagonal (matched lines) that follows the snake's start.
|
||||
while (x > prevX && y > prevY)
|
||||
{
|
||||
x--;
|
||||
y--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1));
|
||||
}
|
||||
|
||||
if (d > 0)
|
||||
{
|
||||
if (x == prevX)
|
||||
{
|
||||
// Insertion: a new-side line (b[prevY]) with no old counterpart.
|
||||
y--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Add, b[y], OldLineNo: null, NewLineNo: y + 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Deletion: an old-side line (a[prevX]) with no new counterpart.
|
||||
x--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Remove, a[x], OldLineNo: x + 1, NewLineNo: null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// d == 0 leg: any remaining leading diagonal is all matched context.
|
||||
while (x > 0 && y > 0)
|
||||
{
|
||||
x--;
|
||||
y--;
|
||||
reverse.Add(new LineDiffLine(LineDiffOp.Context, a[x], OldLineNo: x + 1, NewLineNo: y + 1));
|
||||
}
|
||||
|
||||
reverse.Reverse();
|
||||
return reverse;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user