feat(etl): implement EtlPipeline and EtlPipelineBuilder
Add pipeline orchestration for ETL operations: - EtlPipeline: executes source -> transform -> destination flow - EtlPipelineBuilder: fluent builder for pipeline configuration - Supports pre/post scripts, multiple transformers - Returns PipelineResult with step-by-step timing
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
using System.Data;
|
||||
using System.Diagnostics;
|
||||
using JdeScoping.DataSync.Etl.Contracts;
|
||||
using JdeScoping.DataSync.Etl.Results;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Pipeline;
|
||||
|
||||
public class EtlPipeline
|
||||
{
|
||||
private readonly IImportSource _source;
|
||||
private readonly IReadOnlyList<IDataTransformer> _transformers;
|
||||
private readonly IImportDestination _destination;
|
||||
private readonly IReadOnlyList<IScriptRunner> _preScripts;
|
||||
private readonly IReadOnlyList<IScriptRunner> _postScripts;
|
||||
private readonly ILogger<EtlPipeline> _logger;
|
||||
|
||||
public string PipelineName { get; }
|
||||
|
||||
internal EtlPipeline(
|
||||
string name,
|
||||
IImportSource source,
|
||||
IReadOnlyList<IDataTransformer> transformers,
|
||||
IImportDestination destination,
|
||||
IReadOnlyList<IScriptRunner> preScripts,
|
||||
IReadOnlyList<IScriptRunner> postScripts,
|
||||
ILogger<EtlPipeline> logger)
|
||||
{
|
||||
PipelineName = name;
|
||||
_source = source;
|
||||
_transformers = transformers;
|
||||
_destination = destination;
|
||||
_preScripts = preScripts;
|
||||
_postScripts = postScripts;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<PipelineResult> ExecuteAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var steps = new List<StepResult>();
|
||||
var totalStopwatch = Stopwatch.StartNew();
|
||||
|
||||
_logger.LogInformation("Starting pipeline {PipelineName}", PipelineName);
|
||||
|
||||
try
|
||||
{
|
||||
// 1. Run pre-scripts
|
||||
foreach (var script in _preScripts)
|
||||
{
|
||||
var stepResult = await RunScriptAsync(script, cancellationToken);
|
||||
steps.Add(stepResult);
|
||||
}
|
||||
|
||||
// 2. Open source
|
||||
var sourceStopwatch = Stopwatch.StartNew();
|
||||
await using (_source)
|
||||
{
|
||||
var reader = await _source.ReadDataAsync(cancellationToken);
|
||||
sourceStopwatch.Stop();
|
||||
steps.Add(new StepResult(_source.SourceName, "Source", 0, sourceStopwatch.Elapsed));
|
||||
|
||||
// 3. Apply transformers
|
||||
foreach (var transformer in _transformers)
|
||||
{
|
||||
var transformStopwatch = Stopwatch.StartNew();
|
||||
reader = transformer.Transform(reader);
|
||||
transformStopwatch.Stop();
|
||||
steps.Add(new StepResult(transformer.TransformerName, "Transform", 0, transformStopwatch.Elapsed));
|
||||
}
|
||||
|
||||
// 4. Write to destination
|
||||
var destResult = await _destination.WriteAsync(reader, cancellationToken);
|
||||
steps.Add(new StepResult(_destination.DestinationName, "Destination", destResult.RowsProcessed, destResult.Elapsed));
|
||||
}
|
||||
|
||||
// 5. Run post-scripts
|
||||
foreach (var script in _postScripts)
|
||||
{
|
||||
var stepResult = await RunScriptAsync(script, cancellationToken);
|
||||
steps.Add(stepResult);
|
||||
}
|
||||
|
||||
totalStopwatch.Stop();
|
||||
var totalRows = steps.Sum(s => s.RowsAffected);
|
||||
|
||||
_logger.LogInformation("Pipeline {PipelineName} completed. Rows={Rows}, Elapsed={Elapsed}ms",
|
||||
PipelineName, totalRows, totalStopwatch.ElapsedMilliseconds);
|
||||
|
||||
return PipelineResult.Succeeded(totalRows, totalStopwatch.Elapsed, steps);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
totalStopwatch.Stop();
|
||||
var totalRows = steps.Sum(s => s.RowsAffected);
|
||||
|
||||
_logger.LogError(ex, "Pipeline {PipelineName} failed at step {Step}",
|
||||
PipelineName, steps.LastOrDefault()?.StepName ?? "Unknown");
|
||||
|
||||
return PipelineResult.Failed(totalRows, totalStopwatch.Elapsed, steps, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<StepResult> RunScriptAsync(IScriptRunner script, CancellationToken ct)
|
||||
{
|
||||
var stopwatch = Stopwatch.StartNew();
|
||||
_logger.LogDebug("Running script {ScriptName}", script.ScriptName);
|
||||
await script.ExecuteAsync(ct);
|
||||
stopwatch.Stop();
|
||||
return new StepResult(script.ScriptName, "Script", 0, stopwatch.Elapsed);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
using JdeScoping.DataSync.Etl.Contracts;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace JdeScoping.DataSync.Etl.Pipeline;
|
||||
|
||||
public class EtlPipelineBuilder
|
||||
{
|
||||
private string _name = "Unnamed";
|
||||
private IImportSource? _source;
|
||||
private readonly List<IDataTransformer> _transformers = new();
|
||||
private IImportDestination? _destination;
|
||||
private readonly List<IScriptRunner> _preScripts = new();
|
||||
private readonly List<IScriptRunner> _postScripts = new();
|
||||
private ILogger<EtlPipeline>? _logger;
|
||||
|
||||
public EtlPipelineBuilder WithName(string name)
|
||||
{
|
||||
_name = name ?? throw new ArgumentNullException(nameof(name));
|
||||
return this;
|
||||
}
|
||||
|
||||
public EtlPipelineBuilder WithSource(IImportSource source)
|
||||
{
|
||||
_source = source ?? throw new ArgumentNullException(nameof(source));
|
||||
return this;
|
||||
}
|
||||
|
||||
public EtlPipelineBuilder WithTransformer(IDataTransformer transformer)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(transformer);
|
||||
_transformers.Add(transformer);
|
||||
return this;
|
||||
}
|
||||
|
||||
public EtlPipelineBuilder WithDestination(IImportDestination destination)
|
||||
{
|
||||
_destination = destination ?? throw new ArgumentNullException(nameof(destination));
|
||||
return this;
|
||||
}
|
||||
|
||||
public EtlPipelineBuilder WithPreScript(IScriptRunner script)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(script);
|
||||
_preScripts.Add(script);
|
||||
return this;
|
||||
}
|
||||
|
||||
public EtlPipelineBuilder WithPostScript(IScriptRunner script)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(script);
|
||||
_postScripts.Add(script);
|
||||
return this;
|
||||
}
|
||||
|
||||
public EtlPipelineBuilder WithLogger(ILogger<EtlPipeline> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
return this;
|
||||
}
|
||||
|
||||
public EtlPipeline Build()
|
||||
{
|
||||
if (_source == null)
|
||||
throw new InvalidOperationException("Source is required. Call WithSource() before Build().");
|
||||
if (_destination == null)
|
||||
throw new InvalidOperationException("Destination is required. Call WithDestination() before Build().");
|
||||
|
||||
return new EtlPipeline(_name, _source, _transformers, _destination, _preScripts, _postScripts,
|
||||
_logger ?? NullLogger<EtlPipeline>.Instance);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user