Files
jdescopingtool/NEW/src/JdeScoping.DataSync/Services/EtlPipelineFactory.cs
T
Joseph Doherty c814a7294b refactor(datasync): remove deprecated SyncMode and SyncModeConfig
- Delete SyncMode.cs enum file
- Remove SyncModes property from PipelineConfig
- Remove SyncModeConfig and DestinationOverride records
- Remove WithMode(SyncMode) from IEtlPipelineBuilder
- Remove BuildWithSyncModes() and related methods from EtlPipelineFactory
- Remove syncModes sections from all pipelines in pipelines.json
- Update tests to use schedules-only configuration

All pipelines now require 'schedules' format (mass/daily/hourly).
WithUpdateType(UpdateTypes) is the only way to set update type.
2026-01-07 05:16:20 -05:00

327 lines
13 KiB
C#

using System.Text.Json;
using JdeScoping.Core.Models.Enums;
using JdeScoping.DataAccess.Interfaces;
using JdeScoping.DataSync.Configuration;
using JdeScoping.DataSync.Contracts;
using JdeScoping.DataSync.Etl.Contracts;
using JdeScoping.DataSync.Etl.Destinations;
using JdeScoping.DataSync.Etl.Pipeline;
using JdeScoping.DataSync.Etl.Scripts;
using JdeScoping.DataSync.Etl.Sources;
using JdeScoping.DataSync.Options;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace JdeScoping.DataSync.Services;
/// <summary>
/// Factory for creating ETL pipelines from JSON configuration.
/// </summary>
public class EtlPipelineFactory : IEtlPipelineFactory
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
ReadCommentHandling = JsonCommentHandling.Skip,
AllowTrailingCommas = true
};
private readonly IDbConnectionFactory _connectionFactory;
private readonly ILogger<EtlPipeline> _logger;
private readonly PipelinesRoot _config;
/// <summary>
/// Creates a new pipeline factory.
/// </summary>
/// <param name="connectionFactory">Factory for creating database connections.</param>
/// <param name="options">Pipeline configuration options.</param>
/// <param name="logger">Logger for pipeline execution.</param>
public EtlPipelineFactory(
IDbConnectionFactory connectionFactory,
IOptions<PipelineOptions> options,
ILogger<EtlPipeline> logger)
{
ArgumentNullException.ThrowIfNull(connectionFactory);
ArgumentNullException.ThrowIfNull(options);
ArgumentNullException.ThrowIfNull(logger);
_connectionFactory = connectionFactory;
_logger = logger;
_config = LoadPipelineConfigs(options.Value.ConfigPath);
}
/// <summary>
/// Creates a new pipeline factory with a pre-loaded configuration (for testing).
/// </summary>
internal EtlPipelineFactory(
IDbConnectionFactory connectionFactory,
PipelinesRoot config,
ILogger<EtlPipeline> logger)
{
ArgumentNullException.ThrowIfNull(connectionFactory);
ArgumentNullException.ThrowIfNull(config);
ArgumentNullException.ThrowIfNull(logger);
ValidateConfig(config);
_connectionFactory = connectionFactory;
_logger = logger;
_config = config;
}
/// <inheritdoc />
public IEtlPipelineBuilder ForTable(string tableName)
{
ArgumentException.ThrowIfNullOrWhiteSpace(tableName);
if (!_config.Pipelines.TryGetValue(tableName, out var pipelineConfig))
{
throw new InvalidOperationException(
$"No pipeline configured for table: {tableName}. " +
$"Available tables: {string.Join(", ", _config.Pipelines.Keys)}");
}
return new PipelineBuilder(
_connectionFactory,
tableName,
pipelineConfig,
_config.EffectiveSettings,
_config.EffectiveScheduleDefaults,
_logger);
}
private PipelinesRoot LoadPipelineConfigs(string configPath)
{
// Resolve path relative to assembly location (handles both debug and publish)
var assemblyDir = Path.GetDirectoryName(typeof(EtlPipelineFactory).Assembly.Location)!;
var fullPath = Path.Combine(assemblyDir, configPath);
if (!File.Exists(fullPath))
{
throw new FileNotFoundException(
$"Pipeline config not found: {fullPath}. " +
"Ensure the config file is included in the build output.");
}
var json = File.ReadAllText(fullPath);
var root = JsonSerializer.Deserialize<PipelinesRoot>(json, JsonOptions)
?? throw new InvalidOperationException("Failed to deserialize pipeline config: result was null.");
ValidateConfig(root);
return root;
}
private static void ValidateConfig(PipelinesRoot root)
{
foreach (var (name, config) in root.Pipelines)
{
// Schedules are now required
if (config.Schedules == null)
{
throw new InvalidOperationException(
$"Pipeline '{name}' must define 'schedules'.");
}
// Validate no runtime parameters (not yet supported)
if (config.Source.Parameters != null)
{
foreach (var (paramName, paramConfig) in config.Source.Parameters)
{
if (paramConfig.Source.Equals("runtime", StringComparison.OrdinalIgnoreCase))
{
throw new NotSupportedException(
$"Pipeline '{name}' parameter '{paramName}': " +
"runtime parameter source is not yet supported.");
}
}
}
}
}
private sealed class PipelineBuilder : IEtlPipelineBuilder
{
private readonly IDbConnectionFactory _connectionFactory;
private readonly string _tableName;
private readonly PipelineConfig _config;
private readonly PipelineSettings _settings;
private readonly ScheduleDefaults _scheduleDefaults;
private readonly ILogger<EtlPipeline> _logger;
private UpdateTypes _updateType = UpdateTypes.Hourly;
private DateTime? _minDtOverride;
public PipelineBuilder(
IDbConnectionFactory connectionFactory,
string tableName,
PipelineConfig config,
PipelineSettings settings,
ScheduleDefaults scheduleDefaults,
ILogger<EtlPipeline> logger)
{
_connectionFactory = connectionFactory;
_tableName = tableName;
_config = config;
_settings = settings;
_scheduleDefaults = scheduleDefaults;
_logger = logger;
}
public IEtlPipelineBuilder WithUpdateType(UpdateTypes updateType)
{
_updateType = updateType;
return this;
}
public IEtlPipelineBuilder WithMinimumDate(DateTime? minDt)
{
_minDtOverride = minDt;
return this;
}
public EtlPipeline Build()
{
return BuildWithSchedules();
}
private EtlPipeline BuildWithSchedules()
{
var scheduleConfig = GetEffectiveScheduleConfig(_updateType);
// Compute MinDt from override
var minDt = _minDtOverride;
// Use massQuery for Mass, regular query for Daily/Hourly
var useMassQuery = _updateType == UpdateTypes.Mass && !string.IsNullOrEmpty(_config.Source.MassQuery);
// Create source with parameter substitution
var source = CreateSource(_config.Source, minDt, useMassQuery);
// Determine destination type (Mass with prePurge = bulkImport, others = bulkMerge unless prePurge)
var destType = scheduleConfig.PrePurge ? "bulkImport" : "bulkMerge";
var destination = CreateDestination(destType, _config.Destination, scheduleConfig);
// Build pipeline with scripts
var builder = new EtlPipelineBuilder()
.WithName(_tableName)
.WithSource(source)
.WithDestination(destination)
.WithLogger(_logger);
// Add pre-scripts: config scripts first, then prePurge
foreach (var script in _config.PreScripts ?? [])
{
builder.WithPreScript(new SqlScriptRunner(_connectionFactory, script, $"PreScript:{script.Substring(0, Math.Min(30, script.Length))}"));
}
if (scheduleConfig.PrePurge)
{
var truncateSql = $"TRUNCATE TABLE [{_config.Destination.Table}]";
builder.WithPreScript(new SqlScriptRunner(_connectionFactory, truncateSql, "PrePurge"));
}
// Add post-scripts: reIndex first, then config scripts
if (scheduleConfig.ReIndex)
{
var reindexSql = $"ALTER INDEX ALL ON [{_config.Destination.Table}] REBUILD";
builder.WithPostScript(new SqlScriptRunner(_connectionFactory, reindexSql, "ReIndex"));
}
foreach (var script in _config.PostScripts ?? [])
{
builder.WithPostScript(new SqlScriptRunner(_connectionFactory, script, $"PostScript:{script.Substring(0, Math.Min(30, script.Length))}"));
}
return builder.Build();
}
private Configuration.ScheduleConfig GetEffectiveScheduleConfig(UpdateTypes updateType)
{
// Get default for this update type
var defaultConfig = updateType switch
{
UpdateTypes.Mass => _scheduleDefaults.Mass,
UpdateTypes.Daily => _scheduleDefaults.Daily,
UpdateTypes.Hourly => _scheduleDefaults.Hourly,
_ => _scheduleDefaults.Hourly
};
// Get pipeline-specific override if exists
var pipelineConfig = updateType switch
{
UpdateTypes.Mass => _config.Schedules?.Mass,
UpdateTypes.Daily => _config.Schedules?.Daily,
UpdateTypes.Hourly => _config.Schedules?.Hourly,
_ => null
};
// Merge: pipeline config overrides defaults
return pipelineConfig?.MergeWith(defaultConfig) ?? defaultConfig;
}
private IImportSource CreateSource(SourceConfig sourceConfig, DateTime? minDt, bool useMassQuery)
{
// Use massQuery if specified, otherwise use the default query
var query = useMassQuery ? sourceConfig.MassQuery! : sourceConfig.Query;
var parameters = new Dictionary<string, object>();
var converter = new ParameterFormatConverter(_settings.Timezone);
// Only add parameters when not using massQuery (mass queries typically don't need date parameters)
var needsParameters = !useMassQuery;
if (sourceConfig.Parameters != null && minDt.HasValue && needsParameters)
{
foreach (var (_, paramConfig) in sourceConfig.Parameters)
{
var paramValue = paramConfig.Source.ToLowerInvariant() switch
{
"offset" => converter.Convert(minDt.Value, paramConfig.Format),
"static" => paramConfig.Value
?? throw new InvalidOperationException(
$"Static parameter '{paramConfig.Name}' requires a value."),
_ => throw new NotSupportedException(
$"Parameter source '{paramConfig.Source}' is not supported.")
};
// Use the parameter name exactly as configured (provider-specific)
parameters[paramConfig.Name] = paramValue;
}
}
return new DbQuerySource(
_connectionFactory,
sourceConfig.Connection,
query,
parameters);
}
private IImportDestination CreateDestination(
string destType,
DestinationConfig baseConfig,
Configuration.ScheduleConfig scheduleConfig)
{
var tableName = baseConfig.Table;
// Use base config for match/exclude columns
var matchColumns = baseConfig.MatchColumns?.ToArray();
var excludeFromUpdate = baseConfig.ExcludeFromUpdate?.ToArray();
return destType.ToLowerInvariant() switch
{
"bulkimport" => new DbBulkImportDestination(_connectionFactory, tableName),
"bulkmerge" => new DbBulkMergeDestination(
_connectionFactory,
tableName,
matchColumns ?? throw new InvalidOperationException(
$"matchColumns required for bulkMerge destination on table '{tableName}'."),
updateColumns: null,
excludeFromUpdate: excludeFromUpdate,
updateCondition: scheduleConfig.UpdateWhen),
_ => throw new InvalidOperationException(
$"Unknown destination type: '{destType}'. Expected 'bulkImport' or 'bulkMerge'.")
};
}
}
}