feat(etl): implement ColumnDropTransformer

Add a data transformer that removes specified columns from the data stream.
Columns are matched by name (case-insensitive) and multiple columns can be
dropped in a single transformer. Includes comprehensive tests using NSubstitute
for mock IDataReader.
This commit is contained in:
Joseph Doherty
2026-01-03 09:10:54 -05:00
parent 6e7bcadf68
commit f1b7809a45
2 changed files with 153 additions and 0 deletions
@@ -0,0 +1,66 @@
using System.Data;
namespace JdeScoping.DataSync.Etl.Transformers;
/// <summary>
/// A data transformer that removes specified columns from the data stream.
/// Columns are matched by name (case-insensitive).
/// </summary>
public class ColumnDropTransformer : DataTransformerBase
{
private readonly HashSet<string> _columnsToDrop;
private int[]? _ordinalMap;
private Dictionary<string, int>? _nameToOrdinal;
/// <inheritdoc />
public override string TransformerName => $"DropColumns:{string.Join(",", _columnsToDrop)}";
/// <summary>
/// Creates a new ColumnDropTransformer that removes the specified columns.
/// </summary>
/// <param name="columnsToDrop">The names of columns to drop (case-insensitive).</param>
public ColumnDropTransformer(params string[] columnsToDrop)
{
ArgumentNullException.ThrowIfNull(columnsToDrop);
_columnsToDrop = new HashSet<string>(columnsToDrop, StringComparer.OrdinalIgnoreCase);
}
/// <inheritdoc />
protected override void OnInitialize(IDataReader source)
{
var ordinalList = new List<int>();
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
for (int i = 0; i < source.FieldCount; i++)
{
var name = source.GetName(i);
if (!_columnsToDrop.Contains(name))
{
_nameToOrdinal[name] = ordinalList.Count;
ordinalList.Add(i);
}
}
_ordinalMap = ordinalList.ToArray();
}
/// <inheritdoc />
public override int GetFieldCount(IDataReader source) => _ordinalMap!.Length;
/// <inheritdoc />
public override string GetName(int ordinal, IDataReader source) => source.GetName(_ordinalMap![ordinal]);
/// <inheritdoc />
public override Type GetFieldType(int ordinal, IDataReader source) => source.GetFieldType(_ordinalMap![ordinal]);
/// <inheritdoc />
public override object GetValue(int ordinal, IDataReader source) => source.GetValue(_ordinalMap![ordinal]);
/// <inheritdoc />
public override int GetOrdinal(string name, IDataReader source)
{
if (_nameToOrdinal!.TryGetValue(name, out var ordinal)) return ordinal;
throw new IndexOutOfRangeException($"Column '{name}' not found or was dropped.");
}
/// <inheritdoc />
public override bool IsDBNull(int ordinal, IDataReader source) => source.IsDBNull(_ordinalMap![ordinal]);
}