using System.Data; using System.Text.RegularExpressions; namespace JdeScoping.DataSync.Etl.Transformers; /// /// Specifies behavior when a regex pattern does not match the input value. /// public enum NonMatchBehavior { /// Keep the original value unchanged. KeepOriginal, /// Return null/DBNull. ReturnNull, /// Return an empty string. ReturnEmpty } /// /// A data transformer that applies regex transformations to string values in a column. /// Supports two modes: Find & Replace (when replacement is provided) and Match & Extract /// (when replacement is null, extracts first capture group). /// public class RegexTransformer : DataTransformerBase { private readonly string _columnName; private readonly string _pattern; private readonly string? _replacement; private readonly bool _ignoreCase; private readonly NonMatchBehavior _nonMatchBehavior; private Regex? _regex; private int _columnOrdinal = -1; /// public override string TransformerName => $"Regex:{_columnName}"; /// /// Creates a new RegexTransformer. /// /// The column to transform. /// The regex pattern. /// Replacement string for Find & Replace mode, or null for Match & Extract mode. /// Whether to use case-insensitive matching. /// Behavior when pattern does not match. public RegexTransformer( string columnName, string pattern, string? replacement = null, bool ignoreCase = false, NonMatchBehavior nonMatchBehavior = NonMatchBehavior.KeepOriginal) { ArgumentException.ThrowIfNullOrWhiteSpace(columnName); ArgumentException.ThrowIfNullOrWhiteSpace(pattern); _columnName = columnName; _pattern = pattern; _replacement = replacement; _ignoreCase = ignoreCase; _nonMatchBehavior = nonMatchBehavior; } /// protected override void OnInitialize(IDataReader source) { _columnOrdinal = source.GetOrdinal(_columnName); var options = RegexOptions.Compiled; if (_ignoreCase) options |= RegexOptions.IgnoreCase; _regex = new Regex(_pattern, options); } /// public override object GetValue(int ordinal, IDataReader source) { var value = source.GetValue(ordinal); // Only transform the target column if (ordinal != _columnOrdinal) return value; // Pass through null/DBNull if (value == null || value == DBNull.Value) return DBNull.Value; var stringValue = value.ToString() ?? string.Empty; // Find & Replace mode (replacement is not null) if (_replacement != null) { return _regex!.Replace(stringValue, _replacement); } // Match & Extract mode (replacement is null) var match = _regex!.Match(stringValue); if (match.Success && match.Groups.Count > 1) { return match.Groups[1].Value; } // No match - apply NonMatchBehavior return _nonMatchBehavior switch { NonMatchBehavior.ReturnNull => DBNull.Value, NonMatchBehavior.ReturnEmpty => string.Empty, _ => value // KeepOriginal }; } /// public override Type GetFieldType(int ordinal, IDataReader source) { // Target column always returns string if (ordinal == _columnOrdinal) return typeof(string); return source.GetFieldType(ordinal); } }