Files
jdescopingtool/NEW/src/JdeScoping.DataSync/Etl/Transformers/RegexTransformer.cs
T
Joseph Doherty cb41e42bb7 feat(datasync): add RegexTransformer with Find & Replace mode
Initial implementation supporting:
- Find & Replace mode with regex pattern and replacement string
- Case-insensitive option
- NonMatchBehavior enum for handling non-matches
2026-01-22 07:14:08 -05:00

121 lines
3.9 KiB
C#

using System.Data;
using System.Text.RegularExpressions;
namespace JdeScoping.DataSync.Etl.Transformers;
/// <summary>
/// Specifies behavior when a regex pattern does not match the input value.
/// </summary>
public enum NonMatchBehavior
{
/// <summary>Keep the original value unchanged.</summary>
KeepOriginal,
/// <summary>Return null/DBNull.</summary>
ReturnNull,
/// <summary>Return an empty string.</summary>
ReturnEmpty
}
/// <summary>
/// A data transformer that applies regex transformations to string values in a column.
/// Supports two modes: Find &amp; Replace (when replacement is provided) and Match &amp; Extract
/// (when replacement is null, extracts first capture group).
/// </summary>
public class RegexTransformer : DataTransformerBase
{
private readonly string _columnName;
private readonly string _pattern;
private readonly string? _replacement;
private readonly bool _ignoreCase;
private readonly NonMatchBehavior _nonMatchBehavior;
private Regex? _regex;
private int _columnOrdinal = -1;
/// <inheritdoc />
public override string TransformerName => $"Regex:{_columnName}";
/// <summary>
/// Creates a new RegexTransformer.
/// </summary>
/// <param name="columnName">The column to transform.</param>
/// <param name="pattern">The regex pattern.</param>
/// <param name="replacement">Replacement string for Find &amp; Replace mode, or null for Match &amp; Extract mode.</param>
/// <param name="ignoreCase">Whether to use case-insensitive matching.</param>
/// <param name="nonMatchBehavior">Behavior when pattern does not match.</param>
public RegexTransformer(
string columnName,
string pattern,
string? replacement = null,
bool ignoreCase = false,
NonMatchBehavior nonMatchBehavior = NonMatchBehavior.KeepOriginal)
{
ArgumentException.ThrowIfNullOrWhiteSpace(columnName);
ArgumentException.ThrowIfNullOrWhiteSpace(pattern);
_columnName = columnName;
_pattern = pattern;
_replacement = replacement;
_ignoreCase = ignoreCase;
_nonMatchBehavior = nonMatchBehavior;
}
/// <inheritdoc />
protected override void OnInitialize(IDataReader source)
{
_columnOrdinal = source.GetOrdinal(_columnName);
var options = RegexOptions.Compiled;
if (_ignoreCase)
options |= RegexOptions.IgnoreCase;
_regex = new Regex(_pattern, options);
}
/// <inheritdoc />
public override object GetValue(int ordinal, IDataReader source)
{
var value = source.GetValue(ordinal);
// Only transform the target column
if (ordinal != _columnOrdinal)
return value;
// Pass through null/DBNull
if (value == null || value == DBNull.Value)
return DBNull.Value;
var stringValue = value.ToString() ?? string.Empty;
// Find & Replace mode (replacement is not null)
if (_replacement != null)
{
return _regex!.Replace(stringValue, _replacement);
}
// Match & Extract mode (replacement is null)
var match = _regex!.Match(stringValue);
if (match.Success && match.Groups.Count > 1)
{
return match.Groups[1].Value;
}
// No match - apply NonMatchBehavior
return _nonMatchBehavior switch
{
NonMatchBehavior.ReturnNull => DBNull.Value,
NonMatchBehavior.ReturnEmpty => string.Empty,
_ => value // KeepOriginal
};
}
/// <inheritdoc />
public override Type GetFieldType(int ordinal, IDataReader source)
{
// Target column always returns string
if (ordinal == _columnOrdinal)
return typeof(string);
return source.GetFieldType(ordinal);
}
}