feat(etl): implement ColumnDropTransformer

Add a data transformer that removes specified columns from the data stream.
Columns are matched by name (case-insensitive) and multiple columns can be
dropped in a single transformer. Includes comprehensive tests using NSubstitute
for mock IDataReader.
This commit is contained in:
Joseph Doherty
2026-01-03 09:10:54 -05:00
parent 6e7bcadf68
commit f1b7809a45
2 changed files with 153 additions and 0 deletions
@@ -0,0 +1,66 @@
using System.Data;
namespace JdeScoping.DataSync.Etl.Transformers;
/// <summary>
/// A data transformer that removes specified columns from the data stream.
/// Columns are matched by name (case-insensitive).
/// </summary>
public class ColumnDropTransformer : DataTransformerBase
{
private readonly HashSet<string> _columnsToDrop;
private int[]? _ordinalMap;
private Dictionary<string, int>? _nameToOrdinal;
/// <inheritdoc />
public override string TransformerName => $"DropColumns:{string.Join(",", _columnsToDrop)}";
/// <summary>
/// Creates a new ColumnDropTransformer that removes the specified columns.
/// </summary>
/// <param name="columnsToDrop">The names of columns to drop (case-insensitive).</param>
public ColumnDropTransformer(params string[] columnsToDrop)
{
ArgumentNullException.ThrowIfNull(columnsToDrop);
_columnsToDrop = new HashSet<string>(columnsToDrop, StringComparer.OrdinalIgnoreCase);
}
/// <inheritdoc />
protected override void OnInitialize(IDataReader source)
{
var ordinalList = new List<int>();
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
for (int i = 0; i < source.FieldCount; i++)
{
var name = source.GetName(i);
if (!_columnsToDrop.Contains(name))
{
_nameToOrdinal[name] = ordinalList.Count;
ordinalList.Add(i);
}
}
_ordinalMap = ordinalList.ToArray();
}
/// <inheritdoc />
public override int GetFieldCount(IDataReader source) => _ordinalMap!.Length;
/// <inheritdoc />
public override string GetName(int ordinal, IDataReader source) => source.GetName(_ordinalMap![ordinal]);
/// <inheritdoc />
public override Type GetFieldType(int ordinal, IDataReader source) => source.GetFieldType(_ordinalMap![ordinal]);
/// <inheritdoc />
public override object GetValue(int ordinal, IDataReader source) => source.GetValue(_ordinalMap![ordinal]);
/// <inheritdoc />
public override int GetOrdinal(string name, IDataReader source)
{
if (_nameToOrdinal!.TryGetValue(name, out var ordinal)) return ordinal;
throw new IndexOutOfRangeException($"Column '{name}' not found or was dropped.");
}
/// <inheritdoc />
public override bool IsDBNull(int ordinal, IDataReader source) => source.IsDBNull(_ordinalMap![ordinal]);
}
@@ -0,0 +1,87 @@
using System.Data;
using JdeScoping.DataSync.Etl.Transformers;
using NSubstitute;
namespace JdeScoping.DataSync.Tests.Etl.Transformers;
public class ColumnDropTransformerTests
{
[Fact]
public void FieldCount_ExcludesDroppedColumns()
{
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
var transformer = new ColumnDropTransformer("DropMe");
var reader = transformer.Transform(source);
Assert.Equal(3, reader.FieldCount);
}
[Fact]
public void GetName_SkipsDroppedColumns()
{
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
var transformer = new ColumnDropTransformer("DropMe");
var reader = transformer.Transform(source);
Assert.Equal("Id", reader.GetName(0));
Assert.Equal("Name", reader.GetName(1));
Assert.Equal("Value", reader.GetName(2));
}
[Fact]
public void GetOrdinal_ReturnsRemappedOrdinal()
{
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
var transformer = new ColumnDropTransformer("DropMe");
var reader = transformer.Transform(source);
Assert.Equal(0, reader.GetOrdinal("Id"));
Assert.Equal(1, reader.GetOrdinal("Name"));
Assert.Equal(2, reader.GetOrdinal("Value"));
}
[Fact]
public void GetOrdinal_DroppedColumn_ThrowsIndexOutOfRange()
{
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
var transformer = new ColumnDropTransformer("DropMe");
var reader = transformer.Transform(source);
Assert.Throws<IndexOutOfRangeException>(() => reader.GetOrdinal("DropMe"));
}
[Fact]
public void GetValue_ReturnsCorrectValues()
{
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" }, new object[] { 1, "Test", "Dropped", 42 });
source.Read().Returns(true);
var transformer = new ColumnDropTransformer("DropMe");
var reader = transformer.Transform(source);
reader.Read();
Assert.Equal(1, reader.GetValue(0));
Assert.Equal("Test", reader.GetValue(1));
Assert.Equal(42, reader.GetValue(2));
}
[Fact]
public void MultipleDroppedColumns_AllExcluded()
{
var source = CreateMockReader(new[] { "Id", "Drop1", "Name", "Drop2", "Value" });
var transformer = new ColumnDropTransformer("Drop1", "Drop2");
var reader = transformer.Transform(source);
Assert.Equal(3, reader.FieldCount);
Assert.Equal("Id", reader.GetName(0));
Assert.Equal("Name", reader.GetName(1));
Assert.Equal("Value", reader.GetName(2));
}
private static IDataReader CreateMockReader(string[] columns, object[]? values = null)
{
var reader = Substitute.For<IDataReader>();
reader.FieldCount.Returns(columns.Length);
for (int i = 0; i < columns.Length; i++)
{
var index = i;
reader.GetName(index).Returns(columns[index]);
reader.GetOrdinal(columns[index]).Returns(index);
if (values != null) reader.GetValue(index).Returns(values[index]);
}
return reader;
}
}