ec4c8fab87
Move configuration options from Core/DataAccess/DataSync/ExcelIO to dedicated Options folders within each project for better organization. Update all references and tests accordingly.
2512 lines
80 KiB
Markdown
2512 lines
80 KiB
Markdown
# ETL Pipeline Implementation Plan
|
|
|
|
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
|
|
|
**Goal:** Implement a flexible, configuration-driven ETL pipeline for DataSync that replaces the strongly-typed fetcher + source-generated DataReader pattern.
|
|
|
|
**Architecture:** Pipeline components (Source → Transformer → Destination) connected via `IDataReader`. Transformers are decorators that wrap readers. Destinations own their connections. Pipeline orchestrates with fail-fast error handling and step-by-step metrics.
|
|
|
|
**Tech Stack:** .NET 10, Microsoft.Data.SqlClient, xUnit, NSubstitute
|
|
|
|
---
|
|
|
|
## Phase 1: Core Interfaces and Result Models
|
|
|
|
### Task 1: Create Result Models
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Results/DestinationResult.cs`
|
|
- Create: `src/JdeScoping.DataSync/Etl/Results/StepResult.cs`
|
|
- Create: `src/JdeScoping.DataSync/Etl/Results/PipelineResult.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Results/PipelineResultTests.cs`
|
|
|
|
**Step 1: Create DestinationResult record**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Results/DestinationResult.cs
|
|
namespace JdeScoping.DataSync.Etl.Results;
|
|
|
|
/// <summary>
|
|
/// Result from a destination write operation.
|
|
/// </summary>
|
|
public record DestinationResult(
|
|
long RowsProcessed,
|
|
int BatchCount,
|
|
TimeSpan Elapsed);
|
|
```
|
|
|
|
**Step 2: Create StepResult record**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Results/StepResult.cs
|
|
namespace JdeScoping.DataSync.Etl.Results;
|
|
|
|
/// <summary>
|
|
/// Result from a single pipeline step.
|
|
/// </summary>
|
|
public record StepResult(
|
|
string StepName,
|
|
string StepType,
|
|
long RowsAffected,
|
|
TimeSpan Elapsed);
|
|
```
|
|
|
|
**Step 3: Create PipelineResult record**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Results/PipelineResult.cs
|
|
namespace JdeScoping.DataSync.Etl.Results;
|
|
|
|
/// <summary>
|
|
/// Complete result from pipeline execution.
|
|
/// </summary>
|
|
public record PipelineResult(
|
|
bool Success,
|
|
long TotalRows,
|
|
TimeSpan Elapsed,
|
|
IReadOnlyList<StepResult> Steps,
|
|
Exception? Error = null)
|
|
{
|
|
/// <summary>
|
|
/// Creates a successful result.
|
|
/// </summary>
|
|
public static PipelineResult Succeeded(long totalRows, TimeSpan elapsed, IReadOnlyList<StepResult> steps)
|
|
=> new(true, totalRows, elapsed, steps);
|
|
|
|
/// <summary>
|
|
/// Creates a failed result.
|
|
/// </summary>
|
|
public static PipelineResult Failed(long totalRows, TimeSpan elapsed, IReadOnlyList<StepResult> steps, Exception error)
|
|
=> new(false, totalRows, elapsed, steps, error);
|
|
}
|
|
```
|
|
|
|
**Step 4: Write test for PipelineResult factory methods**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Results/PipelineResultTests.cs
|
|
namespace JdeScoping.DataSync.Tests.Etl.Results;
|
|
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
|
|
public class PipelineResultTests
|
|
{
|
|
[Fact]
|
|
public void Succeeded_CreatesSuccessfulResult()
|
|
{
|
|
var steps = new List<StepResult>
|
|
{
|
|
new("Source", "Source", 0, TimeSpan.FromSeconds(1)),
|
|
new("Destination", "Destination", 100, TimeSpan.FromSeconds(5))
|
|
};
|
|
|
|
var result = PipelineResult.Succeeded(100, TimeSpan.FromSeconds(6), steps);
|
|
|
|
Assert.True(result.Success);
|
|
Assert.Equal(100, result.TotalRows);
|
|
Assert.Null(result.Error);
|
|
Assert.Equal(2, result.Steps.Count);
|
|
}
|
|
|
|
[Fact]
|
|
public void Failed_CreatesFailedResult()
|
|
{
|
|
var steps = new List<StepResult>();
|
|
var error = new InvalidOperationException("Test error");
|
|
|
|
var result = PipelineResult.Failed(0, TimeSpan.FromSeconds(1), steps, error);
|
|
|
|
Assert.False(result.Success);
|
|
Assert.Equal(0, result.TotalRows);
|
|
Assert.Same(error, result.Error);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 5: Run tests**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~PipelineResultTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 6: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Results/ tests/JdeScoping.DataSync.Tests/Etl/Results/
|
|
git commit -m "feat(etl): add result models for pipeline execution"
|
|
```
|
|
|
|
---
|
|
|
|
### Task 2: Create Core Interfaces
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Contracts/IImportSource.cs`
|
|
- Create: `src/JdeScoping.DataSync/Etl/Contracts/IDataTransformer.cs`
|
|
- Create: `src/JdeScoping.DataSync/Etl/Contracts/IImportDestination.cs`
|
|
- Create: `src/JdeScoping.DataSync/Etl/Contracts/IScriptRunner.cs`
|
|
|
|
**Step 1: Create IImportSource interface**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Contracts/IImportSource.cs
|
|
using System.Data;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Contracts;
|
|
|
|
/// <summary>
|
|
/// Source that reads data and returns an IDataReader for streaming.
|
|
/// </summary>
|
|
public interface IImportSource : IAsyncDisposable
|
|
{
|
|
/// <summary>
|
|
/// Opens the source and returns a data reader for streaming rows.
|
|
/// </summary>
|
|
Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Name of this source for logging and metrics.
|
|
/// </summary>
|
|
string SourceName { get; }
|
|
}
|
|
```
|
|
|
|
**Step 2: Create IDataTransformer interface**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Contracts/IDataTransformer.cs
|
|
using System.Data;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Contracts;
|
|
|
|
/// <summary>
|
|
/// Transforms an IDataReader by wrapping it with transformation logic.
|
|
/// </summary>
|
|
public interface IDataTransformer
|
|
{
|
|
/// <summary>
|
|
/// Wraps the source reader with transformation logic.
|
|
/// </summary>
|
|
IDataReader Transform(IDataReader source);
|
|
|
|
/// <summary>
|
|
/// Name of this transformer for logging and metrics.
|
|
/// </summary>
|
|
string TransformerName { get; }
|
|
}
|
|
```
|
|
|
|
**Step 3: Create IImportDestination interface**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Contracts/IImportDestination.cs
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Contracts;
|
|
|
|
/// <summary>
|
|
/// Destination that writes data from an IDataReader.
|
|
/// </summary>
|
|
public interface IImportDestination
|
|
{
|
|
/// <summary>
|
|
/// Writes all rows from the reader to the destination.
|
|
/// </summary>
|
|
Task<DestinationResult> WriteAsync(
|
|
IDataReader source,
|
|
CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Name of this destination for logging and metrics.
|
|
/// </summary>
|
|
string DestinationName { get; }
|
|
}
|
|
```
|
|
|
|
**Step 4: Create IScriptRunner interface**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Contracts/IScriptRunner.cs
|
|
namespace JdeScoping.DataSync.Etl.Contracts;
|
|
|
|
/// <summary>
|
|
/// Runs a SQL script as part of pipeline pre/post processing.
|
|
/// </summary>
|
|
public interface IScriptRunner
|
|
{
|
|
/// <summary>
|
|
/// Executes the script.
|
|
/// </summary>
|
|
Task ExecuteAsync(CancellationToken cancellationToken = default);
|
|
|
|
/// <summary>
|
|
/// Name of this script for logging and metrics.
|
|
/// </summary>
|
|
string ScriptName { get; }
|
|
}
|
|
```
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Contracts/
|
|
git commit -m "feat(etl): add core ETL pipeline interfaces"
|
|
```
|
|
|
|
---
|
|
|
|
## Phase 2: Script Runner Implementation
|
|
|
|
### Task 3: Implement SqlScriptRunner
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Scripts/SqlScriptRunner.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Scripts/SqlScriptRunnerTests.cs`
|
|
|
|
**Step 1: Write failing test for SqlScriptRunner**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Scripts/SqlScriptRunnerTests.cs
|
|
using System.Data.Common;
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Scripts;
|
|
using Microsoft.Data.SqlClient;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Scripts;
|
|
|
|
public class SqlScriptRunnerTests
|
|
{
|
|
[Fact]
|
|
public void Constructor_SetsScriptName()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
var runner = new SqlScriptRunner(factory, "SELECT 1", "TestScript");
|
|
|
|
Assert.Equal("TestScript", runner.ScriptName);
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_WithNullName_DefaultsToSqlScript()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
var runner = new SqlScriptRunner(factory, "SELECT 1");
|
|
|
|
Assert.Equal("SqlScript", runner.ScriptName);
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullFactory_ThrowsArgumentNullException()
|
|
{
|
|
Assert.Throws<ArgumentNullException>(() =>
|
|
new SqlScriptRunner(null!, "SELECT 1"));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullSql_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new SqlScriptRunner(factory, null!));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_EmptySql_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new SqlScriptRunner(factory, ""));
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~SqlScriptRunnerTests" --verbosity normal`
|
|
Expected: FAIL - SqlScriptRunner class does not exist
|
|
|
|
**Step 3: Implement SqlScriptRunner**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Scripts/SqlScriptRunner.cs
|
|
using System.Data;
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Scripts;
|
|
|
|
/// <summary>
|
|
/// Runs a SQL script against the database.
|
|
/// </summary>
|
|
public class SqlScriptRunner : IScriptRunner
|
|
{
|
|
private readonly IDbConnectionFactory _connectionFactory;
|
|
private readonly string _sql;
|
|
private readonly int _timeoutSeconds;
|
|
|
|
public string ScriptName { get; }
|
|
|
|
public SqlScriptRunner(
|
|
IDbConnectionFactory connectionFactory,
|
|
string sql,
|
|
string? name = null,
|
|
int timeoutSeconds = 3600)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(connectionFactory);
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(sql);
|
|
|
|
_connectionFactory = connectionFactory;
|
|
_sql = sql;
|
|
_timeoutSeconds = timeoutSeconds;
|
|
ScriptName = name ?? "SqlScript";
|
|
}
|
|
|
|
public async Task ExecuteAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
await using var connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
|
await using var command = connection.CreateCommand();
|
|
command.CommandText = _sql;
|
|
command.CommandTimeout = _timeoutSeconds;
|
|
await command.ExecuteNonQueryAsync(cancellationToken);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~SqlScriptRunnerTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Scripts/SqlScriptRunner.cs tests/JdeScoping.DataSync.Tests/Etl/Scripts/
|
|
git commit -m "feat(etl): implement SqlScriptRunner"
|
|
```
|
|
|
|
---
|
|
|
|
### Task 4: Implement CommonScripts Factory
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Scripts/CommonScripts.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Scripts/CommonScriptsTests.cs`
|
|
|
|
**Step 1: Write failing test for CommonScripts**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Scripts/CommonScriptsTests.cs
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Scripts;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Scripts;
|
|
|
|
public class CommonScriptsTests
|
|
{
|
|
private readonly IDbConnectionFactory _factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
[Fact]
|
|
public void DisableIndexes_ReturnsRunnerWithCorrectName()
|
|
{
|
|
var runner = CommonScripts.DisableIndexes(_factory, "WorkOrder");
|
|
|
|
Assert.Equal("DisableIndexes:WorkOrder", runner.ScriptName);
|
|
}
|
|
|
|
[Fact]
|
|
public void RebuildIndexes_ReturnsRunnerWithCorrectName()
|
|
{
|
|
var runner = CommonScripts.RebuildIndexes(_factory, "WorkOrder");
|
|
|
|
Assert.Equal("RebuildIndexes:WorkOrder", runner.ScriptName);
|
|
}
|
|
|
|
[Fact]
|
|
public void UpdateStatistics_ReturnsRunnerWithCorrectName()
|
|
{
|
|
var runner = CommonScripts.UpdateStatistics(_factory, "WorkOrder");
|
|
|
|
Assert.Equal("UpdateStats:WorkOrder", runner.ScriptName);
|
|
}
|
|
|
|
[Fact]
|
|
public void CustomSql_ReturnsRunnerWithProvidedName()
|
|
{
|
|
var runner = CommonScripts.CustomSql(_factory, "SELECT 1", "MyCustomScript");
|
|
|
|
Assert.Equal("MyCustomScript", runner.ScriptName);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~CommonScriptsTests" --verbosity normal`
|
|
Expected: FAIL - CommonScripts class does not exist
|
|
|
|
**Step 3: Implement CommonScripts**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Scripts/CommonScripts.cs
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Scripts;
|
|
|
|
/// <summary>
|
|
/// Factory methods for common SQL scripts used in ETL pipelines.
|
|
/// </summary>
|
|
public static class CommonScripts
|
|
{
|
|
/// <summary>
|
|
/// Creates a script that disables non-clustered indexes on a table.
|
|
/// </summary>
|
|
public static IScriptRunner DisableIndexes(IDbConnectionFactory factory, string tableName)
|
|
{
|
|
var sql = $@"
|
|
DECLARE @sql NVARCHAR(MAX) = '';
|
|
SELECT @sql = @sql + 'ALTER INDEX [' + i.name + '] ON [{tableName}] DISABLE;' + CHAR(13)
|
|
FROM sys.indexes i
|
|
INNER JOIN sys.tables t ON i.object_id = t.object_id
|
|
WHERE t.name = '{tableName}'
|
|
AND i.type = 2
|
|
AND i.is_disabled = 0;
|
|
IF LEN(@sql) > 0 EXEC sp_executesql @sql;";
|
|
|
|
return new SqlScriptRunner(factory, sql, $"DisableIndexes:{tableName}", timeoutSeconds: 300);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates a script that rebuilds all indexes on a table.
|
|
/// </summary>
|
|
public static IScriptRunner RebuildIndexes(IDbConnectionFactory factory, string tableName)
|
|
{
|
|
var sql = $"ALTER INDEX ALL ON [{tableName}] REBUILD WITH (FILLFACTOR = 95)";
|
|
return new SqlScriptRunner(factory, sql, $"RebuildIndexes:{tableName}", timeoutSeconds: 3600);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates a script that updates statistics on a table.
|
|
/// </summary>
|
|
public static IScriptRunner UpdateStatistics(IDbConnectionFactory factory, string tableName)
|
|
{
|
|
var sql = $"UPDATE STATISTICS [{tableName}]";
|
|
return new SqlScriptRunner(factory, sql, $"UpdateStats:{tableName}", timeoutSeconds: 600);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Creates a script runner for custom SQL.
|
|
/// </summary>
|
|
public static IScriptRunner CustomSql(IDbConnectionFactory factory, string sql, string name)
|
|
{
|
|
return new SqlScriptRunner(factory, sql, name);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~CommonScriptsTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Scripts/CommonScripts.cs tests/JdeScoping.DataSync.Tests/Etl/Scripts/CommonScriptsTests.cs
|
|
git commit -m "feat(etl): add CommonScripts factory for index and statistics scripts"
|
|
```
|
|
|
|
---
|
|
|
|
## Phase 3: Transformer Infrastructure
|
|
|
|
### Task 5: Implement TransformingDataReader Base
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Transformers/TransformingDataReader.cs`
|
|
- Create: `src/JdeScoping.DataSync/Etl/Transformers/DataTransformerBase.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Transformers/TransformingDataReaderTests.cs`
|
|
|
|
**Step 1: Write failing test for TransformingDataReader**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Transformers/TransformingDataReaderTests.cs
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Transformers;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Transformers;
|
|
|
|
public class TransformingDataReaderTests
|
|
{
|
|
[Fact]
|
|
public void Read_DelegatesToSourceReader()
|
|
{
|
|
var source = Substitute.For<IDataReader>();
|
|
source.Read().Returns(true, true, false);
|
|
|
|
var transformer = new PassThroughTransformer();
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.True(reader.Read());
|
|
Assert.True(reader.Read());
|
|
Assert.False(reader.Read());
|
|
}
|
|
|
|
[Fact]
|
|
public void FieldCount_DelegatesToTransformer()
|
|
{
|
|
var source = Substitute.For<IDataReader>();
|
|
source.FieldCount.Returns(5);
|
|
|
|
var transformer = new PassThroughTransformer();
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(5, reader.FieldCount);
|
|
}
|
|
|
|
[Fact]
|
|
public void GetName_DelegatesToTransformer()
|
|
{
|
|
var source = Substitute.For<IDataReader>();
|
|
source.GetName(0).Returns("OriginalName");
|
|
|
|
var transformer = new PassThroughTransformer();
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal("OriginalName", reader.GetName(0));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetValue_DelegatesToTransformer()
|
|
{
|
|
var source = Substitute.For<IDataReader>();
|
|
source.GetValue(0).Returns("TestValue");
|
|
|
|
var transformer = new PassThroughTransformer();
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal("TestValue", reader.GetValue(0));
|
|
}
|
|
|
|
[Fact]
|
|
public void Dispose_DisposesSourceReader()
|
|
{
|
|
var source = Substitute.For<IDataReader>();
|
|
|
|
var transformer = new PassThroughTransformer();
|
|
var reader = transformer.Transform(source);
|
|
reader.Dispose();
|
|
|
|
source.Received(1).Dispose();
|
|
}
|
|
|
|
// Test helper - pass-through transformer
|
|
private class PassThroughTransformer : DataTransformerBase
|
|
{
|
|
public override string TransformerName => "PassThrough";
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~TransformingDataReaderTests" --verbosity normal`
|
|
Expected: FAIL - classes do not exist
|
|
|
|
**Step 3: Implement DataTransformerBase**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Transformers/DataTransformerBase.cs
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Transformers;
|
|
|
|
/// <summary>
|
|
/// Base class for data transformers that wrap an IDataReader.
|
|
/// Override virtual methods to customize transformation behavior.
|
|
/// </summary>
|
|
public abstract class DataTransformerBase : IDataTransformer
|
|
{
|
|
public abstract string TransformerName { get; }
|
|
|
|
public IDataReader Transform(IDataReader source)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(source);
|
|
OnInitialize(source);
|
|
return new TransformingDataReader(source, this);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Called once when transform is initialized. Use to cache ordinals.
|
|
/// </summary>
|
|
protected virtual void OnInitialize(IDataReader source) { }
|
|
|
|
/// <summary>
|
|
/// Returns the number of fields after transformation.
|
|
/// </summary>
|
|
public virtual int GetFieldCount(IDataReader source) => source.FieldCount;
|
|
|
|
/// <summary>
|
|
/// Returns the field name at the given ordinal after transformation.
|
|
/// </summary>
|
|
public virtual string GetName(int ordinal, IDataReader source) => source.GetName(ordinal);
|
|
|
|
/// <summary>
|
|
/// Returns the field type at the given ordinal after transformation.
|
|
/// </summary>
|
|
public virtual Type GetFieldType(int ordinal, IDataReader source) => source.GetFieldType(ordinal);
|
|
|
|
/// <summary>
|
|
/// Returns the value at the given ordinal after transformation.
|
|
/// </summary>
|
|
public virtual object GetValue(int ordinal, IDataReader source) => source.GetValue(ordinal);
|
|
|
|
/// <summary>
|
|
/// Returns the ordinal for a field name after transformation.
|
|
/// </summary>
|
|
public virtual int GetOrdinal(string name, IDataReader source) => source.GetOrdinal(name);
|
|
|
|
/// <summary>
|
|
/// Returns whether the value at the given ordinal is null.
|
|
/// </summary>
|
|
public virtual bool IsDBNull(int ordinal, IDataReader source) => source.IsDBNull(ordinal);
|
|
}
|
|
```
|
|
|
|
**Step 4: Implement TransformingDataReader**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Transformers/TransformingDataReader.cs
|
|
using System.Data;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Transformers;
|
|
|
|
/// <summary>
|
|
/// IDataReader wrapper that delegates to a transformer for value/schema operations.
|
|
/// </summary>
|
|
internal sealed class TransformingDataReader : IDataReader
|
|
{
|
|
private readonly IDataReader _source;
|
|
private readonly DataTransformerBase _transformer;
|
|
|
|
public TransformingDataReader(IDataReader source, DataTransformerBase transformer)
|
|
{
|
|
_source = source ?? throw new ArgumentNullException(nameof(source));
|
|
_transformer = transformer ?? throw new ArgumentNullException(nameof(transformer));
|
|
}
|
|
|
|
// Schema properties - delegate to transformer
|
|
public int FieldCount => _transformer.GetFieldCount(_source);
|
|
public string GetName(int i) => _transformer.GetName(i, _source);
|
|
public Type GetFieldType(int i) => _transformer.GetFieldType(i, _source);
|
|
public int GetOrdinal(string name) => _transformer.GetOrdinal(name, _source);
|
|
|
|
// Value access - delegate to transformer
|
|
public object GetValue(int i) => _transformer.GetValue(i, _source);
|
|
public bool IsDBNull(int i) => _transformer.IsDBNull(i, _source);
|
|
public object this[int i] => GetValue(i);
|
|
public object this[string name] => GetValue(GetOrdinal(name));
|
|
|
|
// Row navigation - delegate to source
|
|
public bool Read() => _source.Read();
|
|
public bool NextResult() => _source.NextResult();
|
|
public int Depth => _source.Depth;
|
|
public bool IsClosed => _source.IsClosed;
|
|
public int RecordsAffected => _source.RecordsAffected;
|
|
public void Close() => _source.Close();
|
|
public void Dispose() => _source.Dispose();
|
|
|
|
// Type-specific getters - delegate to transformer via GetValue
|
|
public bool GetBoolean(int i) => (bool)GetValue(i);
|
|
public byte GetByte(int i) => (byte)GetValue(i);
|
|
public char GetChar(int i) => (char)GetValue(i);
|
|
public DateTime GetDateTime(int i) => (DateTime)GetValue(i);
|
|
public decimal GetDecimal(int i) => (decimal)GetValue(i);
|
|
public double GetDouble(int i) => (double)GetValue(i);
|
|
public float GetFloat(int i) => (float)GetValue(i);
|
|
public Guid GetGuid(int i) => (Guid)GetValue(i);
|
|
public short GetInt16(int i) => (short)GetValue(i);
|
|
public int GetInt32(int i) => (int)GetValue(i);
|
|
public long GetInt64(int i) => (long)GetValue(i);
|
|
public string GetString(int i) => (string)GetValue(i);
|
|
|
|
public string GetDataTypeName(int i) => _source.GetDataTypeName(i);
|
|
public int GetValues(object[] values)
|
|
{
|
|
var count = Math.Min(values.Length, FieldCount);
|
|
for (int i = 0; i < count; i++)
|
|
values[i] = GetValue(i);
|
|
return count;
|
|
}
|
|
|
|
public long GetBytes(int i, long fieldOffset, byte[]? buffer, int bufferoffset, int length)
|
|
=> _source.GetBytes(i, fieldOffset, buffer, bufferoffset, length);
|
|
|
|
public long GetChars(int i, long fieldoffset, char[]? buffer, int bufferoffset, int length)
|
|
=> _source.GetChars(i, fieldoffset, buffer, bufferoffset, length);
|
|
|
|
public IDataReader GetData(int i) => _source.GetData(i);
|
|
|
|
public DataTable? GetSchemaTable() => _source.GetSchemaTable();
|
|
}
|
|
```
|
|
|
|
**Step 5: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~TransformingDataReaderTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 6: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Transformers/ tests/JdeScoping.DataSync.Tests/Etl/Transformers/
|
|
git commit -m "feat(etl): implement TransformingDataReader and DataTransformerBase"
|
|
```
|
|
|
|
---
|
|
|
|
### Task 6: Implement ColumnDropTransformer
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Transformers/ColumnDropTransformer.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Transformers/ColumnDropTransformerTests.cs`
|
|
|
|
**Step 1: Write failing tests**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Transformers/ColumnDropTransformerTests.cs
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Transformers;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Transformers;
|
|
|
|
public class ColumnDropTransformerTests
|
|
{
|
|
[Fact]
|
|
public void FieldCount_ExcludesDroppedColumns()
|
|
{
|
|
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
|
|
|
var transformer = new ColumnDropTransformer("DropMe");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(3, reader.FieldCount);
|
|
}
|
|
|
|
[Fact]
|
|
public void GetName_SkipsDroppedColumns()
|
|
{
|
|
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
|
|
|
var transformer = new ColumnDropTransformer("DropMe");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal("Id", reader.GetName(0));
|
|
Assert.Equal("Name", reader.GetName(1));
|
|
Assert.Equal("Value", reader.GetName(2));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetOrdinal_ReturnsRemappedOrdinal()
|
|
{
|
|
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
|
|
|
var transformer = new ColumnDropTransformer("DropMe");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(0, reader.GetOrdinal("Id"));
|
|
Assert.Equal(1, reader.GetOrdinal("Name"));
|
|
Assert.Equal(2, reader.GetOrdinal("Value"));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetOrdinal_DroppedColumn_ThrowsIndexOutOfRange()
|
|
{
|
|
var source = CreateMockReader(new[] { "Id", "Name", "DropMe", "Value" });
|
|
|
|
var transformer = new ColumnDropTransformer("DropMe");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Throws<IndexOutOfRangeException>(() => reader.GetOrdinal("DropMe"));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetValue_ReturnsCorrectValues()
|
|
{
|
|
var source = CreateMockReader(
|
|
new[] { "Id", "Name", "DropMe", "Value" },
|
|
new object[] { 1, "Test", "Dropped", 42 });
|
|
|
|
var transformer = new ColumnDropTransformer("DropMe");
|
|
var reader = transformer.Transform(source);
|
|
source.Read().Returns(true);
|
|
reader.Read();
|
|
|
|
Assert.Equal(1, reader.GetValue(0));
|
|
Assert.Equal("Test", reader.GetValue(1));
|
|
Assert.Equal(42, reader.GetValue(2));
|
|
}
|
|
|
|
[Fact]
|
|
public void MultipleDroppedColumns_AllExcluded()
|
|
{
|
|
var source = CreateMockReader(new[] { "Id", "Drop1", "Name", "Drop2", "Value" });
|
|
|
|
var transformer = new ColumnDropTransformer("Drop1", "Drop2");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(3, reader.FieldCount);
|
|
Assert.Equal("Id", reader.GetName(0));
|
|
Assert.Equal("Name", reader.GetName(1));
|
|
Assert.Equal("Value", reader.GetName(2));
|
|
}
|
|
|
|
private static IDataReader CreateMockReader(string[] columns, object[]? values = null)
|
|
{
|
|
var reader = Substitute.For<IDataReader>();
|
|
reader.FieldCount.Returns(columns.Length);
|
|
|
|
for (int i = 0; i < columns.Length; i++)
|
|
{
|
|
var index = i;
|
|
reader.GetName(index).Returns(columns[index]);
|
|
reader.GetOrdinal(columns[index]).Returns(index);
|
|
if (values != null)
|
|
reader.GetValue(index).Returns(values[index]);
|
|
}
|
|
|
|
return reader;
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~ColumnDropTransformerTests" --verbosity normal`
|
|
Expected: FAIL - ColumnDropTransformer does not exist
|
|
|
|
**Step 3: Implement ColumnDropTransformer**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Transformers/ColumnDropTransformer.cs
|
|
using System.Data;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Transformers;
|
|
|
|
/// <summary>
|
|
/// Transformer that removes columns from the output.
|
|
/// </summary>
|
|
public class ColumnDropTransformer : DataTransformerBase
|
|
{
|
|
private readonly HashSet<string> _columnsToDrop;
|
|
private int[]? _ordinalMap; // maps output ordinal -> source ordinal
|
|
private Dictionary<string, int>? _nameToOrdinal;
|
|
|
|
public override string TransformerName => $"DropColumns:{string.Join(",", _columnsToDrop)}";
|
|
|
|
public ColumnDropTransformer(params string[] columnsToDrop)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(columnsToDrop);
|
|
_columnsToDrop = new HashSet<string>(columnsToDrop, StringComparer.OrdinalIgnoreCase);
|
|
}
|
|
|
|
protected override void OnInitialize(IDataReader source)
|
|
{
|
|
var ordinalList = new List<int>();
|
|
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
for (int i = 0; i < source.FieldCount; i++)
|
|
{
|
|
var name = source.GetName(i);
|
|
if (!_columnsToDrop.Contains(name))
|
|
{
|
|
_nameToOrdinal[name] = ordinalList.Count;
|
|
ordinalList.Add(i);
|
|
}
|
|
}
|
|
|
|
_ordinalMap = ordinalList.ToArray();
|
|
}
|
|
|
|
public override int GetFieldCount(IDataReader source) => _ordinalMap!.Length;
|
|
|
|
public override string GetName(int ordinal, IDataReader source)
|
|
=> source.GetName(_ordinalMap![ordinal]);
|
|
|
|
public override Type GetFieldType(int ordinal, IDataReader source)
|
|
=> source.GetFieldType(_ordinalMap![ordinal]);
|
|
|
|
public override object GetValue(int ordinal, IDataReader source)
|
|
=> source.GetValue(_ordinalMap![ordinal]);
|
|
|
|
public override int GetOrdinal(string name, IDataReader source)
|
|
{
|
|
if (_nameToOrdinal!.TryGetValue(name, out var ordinal))
|
|
return ordinal;
|
|
throw new IndexOutOfRangeException($"Column '{name}' not found or was dropped.");
|
|
}
|
|
|
|
public override bool IsDBNull(int ordinal, IDataReader source)
|
|
=> source.IsDBNull(_ordinalMap![ordinal]);
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~ColumnDropTransformerTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Transformers/ColumnDropTransformer.cs tests/JdeScoping.DataSync.Tests/Etl/Transformers/ColumnDropTransformerTests.cs
|
|
git commit -m "feat(etl): implement ColumnDropTransformer"
|
|
```
|
|
|
|
---
|
|
|
|
### Task 7: Implement ColumnRenameTransformer
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Transformers/ColumnRenameTransformer.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Transformers/ColumnRenameTransformerTests.cs`
|
|
|
|
**Step 1: Write failing tests**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Transformers/ColumnRenameTransformerTests.cs
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Transformers;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Transformers;
|
|
|
|
public class ColumnRenameTransformerTests
|
|
{
|
|
[Fact]
|
|
public void GetName_ReturnsRenamedColumn()
|
|
{
|
|
var source = CreateMockReader(new[] { "OldName", "Other" });
|
|
|
|
var transformer = new ColumnRenameTransformer(("OldName", "NewName"));
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal("NewName", reader.GetName(0));
|
|
Assert.Equal("Other", reader.GetName(1));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetOrdinal_FindsByNewName()
|
|
{
|
|
var source = CreateMockReader(new[] { "OldName", "Other" });
|
|
|
|
var transformer = new ColumnRenameTransformer(("OldName", "NewName"));
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(0, reader.GetOrdinal("NewName"));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetOrdinal_OldName_ThrowsIndexOutOfRange()
|
|
{
|
|
var source = CreateMockReader(new[] { "OldName", "Other" });
|
|
|
|
var transformer = new ColumnRenameTransformer(("OldName", "NewName"));
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Throws<IndexOutOfRangeException>(() => reader.GetOrdinal("OldName"));
|
|
}
|
|
|
|
[Fact]
|
|
public void FieldCount_Unchanged()
|
|
{
|
|
var source = CreateMockReader(new[] { "OldName", "Other" });
|
|
|
|
var transformer = new ColumnRenameTransformer(("OldName", "NewName"));
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(2, reader.FieldCount);
|
|
}
|
|
|
|
[Fact]
|
|
public void MultipleRenames_AllApplied()
|
|
{
|
|
var source = CreateMockReader(new[] { "A", "B", "C" });
|
|
|
|
var transformer = new ColumnRenameTransformer(("A", "Alpha"), ("C", "Charlie"));
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal("Alpha", reader.GetName(0));
|
|
Assert.Equal("B", reader.GetName(1));
|
|
Assert.Equal("Charlie", reader.GetName(2));
|
|
}
|
|
|
|
private static IDataReader CreateMockReader(string[] columns)
|
|
{
|
|
var reader = Substitute.For<IDataReader>();
|
|
reader.FieldCount.Returns(columns.Length);
|
|
|
|
for (int i = 0; i < columns.Length; i++)
|
|
{
|
|
var index = i;
|
|
reader.GetName(index).Returns(columns[index]);
|
|
reader.GetOrdinal(columns[index]).Returns(index);
|
|
}
|
|
|
|
return reader;
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~ColumnRenameTransformerTests" --verbosity normal`
|
|
Expected: FAIL - ColumnRenameTransformer does not exist
|
|
|
|
**Step 3: Implement ColumnRenameTransformer**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Transformers/ColumnRenameTransformer.cs
|
|
using System.Data;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Transformers;
|
|
|
|
/// <summary>
|
|
/// Transformer that renames columns in the output.
|
|
/// </summary>
|
|
public class ColumnRenameTransformer : DataTransformerBase
|
|
{
|
|
private readonly Dictionary<string, string> _renames; // old -> new
|
|
private string[]? _outputNames;
|
|
private Dictionary<string, int>? _nameToOrdinal;
|
|
|
|
public override string TransformerName => $"RenameColumns:{_renames.Count}";
|
|
|
|
public ColumnRenameTransformer(params (string OldName, string NewName)[] renames)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(renames);
|
|
_renames = renames.ToDictionary(
|
|
r => r.OldName,
|
|
r => r.NewName,
|
|
StringComparer.OrdinalIgnoreCase);
|
|
}
|
|
|
|
protected override void OnInitialize(IDataReader source)
|
|
{
|
|
_outputNames = new string[source.FieldCount];
|
|
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
for (int i = 0; i < source.FieldCount; i++)
|
|
{
|
|
var originalName = source.GetName(i);
|
|
var outputName = _renames.TryGetValue(originalName, out var newName)
|
|
? newName
|
|
: originalName;
|
|
_outputNames[i] = outputName;
|
|
_nameToOrdinal[outputName] = i;
|
|
}
|
|
}
|
|
|
|
public override string GetName(int ordinal, IDataReader source)
|
|
=> _outputNames![ordinal];
|
|
|
|
public override int GetOrdinal(string name, IDataReader source)
|
|
{
|
|
if (_nameToOrdinal!.TryGetValue(name, out var ordinal))
|
|
return ordinal;
|
|
throw new IndexOutOfRangeException($"Column '{name}' not found.");
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~ColumnRenameTransformerTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Transformers/ColumnRenameTransformer.cs tests/JdeScoping.DataSync.Tests/Etl/Transformers/ColumnRenameTransformerTests.cs
|
|
git commit -m "feat(etl): implement ColumnRenameTransformer"
|
|
```
|
|
|
|
---
|
|
|
|
### Task 8: Implement JdeDateTransformer
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Transformers/JdeDateTransformer.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Transformers/JdeDateTransformerTests.cs`
|
|
|
|
**Step 1: Write failing tests**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Transformers/JdeDateTransformerTests.cs
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Transformers;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Transformers;
|
|
|
|
public class JdeDateTransformerTests
|
|
{
|
|
[Fact]
|
|
public void FieldCount_ReducedByOne()
|
|
{
|
|
var source = CreateMockReader(
|
|
new[] { "Id", "UPMJ", "TDAY", "Name" },
|
|
new object[] { 1, 124001m, 120000m, "Test" });
|
|
|
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "UpdatedAt");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(3, reader.FieldCount);
|
|
}
|
|
|
|
[Fact]
|
|
public void GetName_DateColumnRenamed_TimeColumnRemoved()
|
|
{
|
|
var source = CreateMockReader(
|
|
new[] { "Id", "UPMJ", "TDAY", "Name" },
|
|
new object[] { 1, 124001m, 120000m, "Test" });
|
|
|
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "UpdatedAt");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal("Id", reader.GetName(0));
|
|
Assert.Equal("UpdatedAt", reader.GetName(1));
|
|
Assert.Equal("Name", reader.GetName(2));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetValue_ParsesJulianDateAndTime()
|
|
{
|
|
// Julian date 124001 = Jan 1, 2024 (century digit 1 = 2000s, year 24, day 001)
|
|
// Time 120000 = 12:00:00
|
|
var source = CreateMockReader(
|
|
new[] { "Id", "UPMJ", "TDAY", "Name" },
|
|
new object[] { 1, 124001m, 120000m, "Test" });
|
|
source.Read().Returns(true);
|
|
|
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "UpdatedAt");
|
|
var reader = transformer.Transform(source);
|
|
reader.Read();
|
|
|
|
var expectedDate = new DateTime(2024, 1, 1, 12, 0, 0);
|
|
Assert.Equal(expectedDate, reader.GetValue(1));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetValue_NullDate_ReturnsDbNull()
|
|
{
|
|
var source = CreateMockReader(
|
|
new[] { "Id", "UPMJ", "TDAY", "Name" },
|
|
new object[] { 1, DBNull.Value, DBNull.Value, "Test" });
|
|
source.Read().Returns(true);
|
|
|
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "UpdatedAt");
|
|
var reader = transformer.Transform(source);
|
|
reader.Read();
|
|
|
|
Assert.Equal(DBNull.Value, reader.GetValue(1));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetFieldType_DateColumn_ReturnsDateTime()
|
|
{
|
|
var source = CreateMockReader(
|
|
new[] { "Id", "UPMJ", "TDAY", "Name" },
|
|
new object[] { 1, 124001m, 120000m, "Test" });
|
|
|
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "UpdatedAt");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(typeof(DateTime), reader.GetFieldType(1));
|
|
}
|
|
|
|
[Fact]
|
|
public void GetOrdinal_NewDateColumn_ReturnsCorrectOrdinal()
|
|
{
|
|
var source = CreateMockReader(
|
|
new[] { "Id", "UPMJ", "TDAY", "Name" },
|
|
new object[] { 1, 124001m, 120000m, "Test" });
|
|
|
|
var transformer = new JdeDateTransformer("UPMJ", "TDAY", "UpdatedAt");
|
|
var reader = transformer.Transform(source);
|
|
|
|
Assert.Equal(1, reader.GetOrdinal("UpdatedAt"));
|
|
}
|
|
|
|
private static IDataReader CreateMockReader(string[] columns, object[] values)
|
|
{
|
|
var reader = Substitute.For<IDataReader>();
|
|
reader.FieldCount.Returns(columns.Length);
|
|
|
|
for (int i = 0; i < columns.Length; i++)
|
|
{
|
|
var index = i;
|
|
reader.GetName(index).Returns(columns[index]);
|
|
reader.GetOrdinal(columns[index]).Returns(index);
|
|
reader.GetValue(index).Returns(values[index]);
|
|
reader.IsDBNull(index).Returns(values[index] == DBNull.Value);
|
|
reader.GetFieldType(index).Returns(values[index]?.GetType() ?? typeof(object));
|
|
}
|
|
|
|
return reader;
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~JdeDateTransformerTests" --verbosity normal`
|
|
Expected: FAIL - JdeDateTransformer does not exist
|
|
|
|
**Step 3: Implement JdeDateTransformer**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Transformers/JdeDateTransformer.cs
|
|
using System.Data;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Transformers;
|
|
|
|
/// <summary>
|
|
/// Transforms JDE Julian date (UPMJ) and time (TDAY) columns into a single DateTime column.
|
|
/// JDE Julian date format: CYYDDD where C=century (0=1900s, 1=2000s), YY=year, DDD=day of year.
|
|
/// JDE time format: HHMMSS as decimal.
|
|
/// </summary>
|
|
public class JdeDateTransformer : DataTransformerBase
|
|
{
|
|
private readonly string _dateColumn;
|
|
private readonly string _timeColumn;
|
|
private readonly string _outputColumn;
|
|
|
|
private int _dateOrdinal;
|
|
private int _timeOrdinal;
|
|
private int[]? _ordinalMap;
|
|
private string[]? _outputNames;
|
|
private Dictionary<string, int>? _nameToOrdinal;
|
|
|
|
public override string TransformerName => $"JdeDate:{_outputColumn}";
|
|
|
|
public JdeDateTransformer(string dateColumn, string timeColumn, string outputColumn)
|
|
{
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(dateColumn);
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(timeColumn);
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(outputColumn);
|
|
|
|
_dateColumn = dateColumn;
|
|
_timeColumn = timeColumn;
|
|
_outputColumn = outputColumn;
|
|
}
|
|
|
|
protected override void OnInitialize(IDataReader source)
|
|
{
|
|
_dateOrdinal = source.GetOrdinal(_dateColumn);
|
|
_timeOrdinal = source.GetOrdinal(_timeColumn);
|
|
|
|
var ordinalList = new List<int>();
|
|
var nameList = new List<string>();
|
|
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
for (int i = 0; i < source.FieldCount; i++)
|
|
{
|
|
if (i == _timeOrdinal)
|
|
continue; // Skip time column
|
|
|
|
if (i == _dateOrdinal)
|
|
{
|
|
_nameToOrdinal[_outputColumn] = ordinalList.Count;
|
|
nameList.Add(_outputColumn);
|
|
}
|
|
else
|
|
{
|
|
var name = source.GetName(i);
|
|
_nameToOrdinal[name] = ordinalList.Count;
|
|
nameList.Add(name);
|
|
}
|
|
ordinalList.Add(i);
|
|
}
|
|
|
|
_ordinalMap = ordinalList.ToArray();
|
|
_outputNames = nameList.ToArray();
|
|
}
|
|
|
|
public override int GetFieldCount(IDataReader source) => _ordinalMap!.Length;
|
|
|
|
public override string GetName(int ordinal, IDataReader source) => _outputNames![ordinal];
|
|
|
|
public override Type GetFieldType(int ordinal, IDataReader source)
|
|
{
|
|
var sourceOrdinal = _ordinalMap![ordinal];
|
|
return sourceOrdinal == _dateOrdinal ? typeof(DateTime) : source.GetFieldType(sourceOrdinal);
|
|
}
|
|
|
|
public override object GetValue(int ordinal, IDataReader source)
|
|
{
|
|
var sourceOrdinal = _ordinalMap![ordinal];
|
|
|
|
if (sourceOrdinal == _dateOrdinal)
|
|
return ParseJdeDateTime(source);
|
|
|
|
return source.GetValue(sourceOrdinal);
|
|
}
|
|
|
|
public override int GetOrdinal(string name, IDataReader source)
|
|
{
|
|
if (_nameToOrdinal!.TryGetValue(name, out var ordinal))
|
|
return ordinal;
|
|
throw new IndexOutOfRangeException($"Column '{name}' not found.");
|
|
}
|
|
|
|
public override bool IsDBNull(int ordinal, IDataReader source)
|
|
{
|
|
var sourceOrdinal = _ordinalMap![ordinal];
|
|
if (sourceOrdinal == _dateOrdinal)
|
|
return source.IsDBNull(_dateOrdinal);
|
|
return source.IsDBNull(sourceOrdinal);
|
|
}
|
|
|
|
private object ParseJdeDateTime(IDataReader source)
|
|
{
|
|
if (source.IsDBNull(_dateOrdinal))
|
|
return DBNull.Value;
|
|
|
|
var julianDate = Convert.ToDecimal(source.GetValue(_dateOrdinal));
|
|
var timeValue = source.IsDBNull(_timeOrdinal) ? 0m : Convert.ToDecimal(source.GetValue(_timeOrdinal));
|
|
|
|
return ParseJdeDateTime(julianDate, timeValue);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Parses JDE Julian date and time into DateTime.
|
|
/// </summary>
|
|
public static DateTime ParseJdeDateTime(decimal julianDate, decimal time)
|
|
{
|
|
// CYYDDD format
|
|
var dateInt = (int)julianDate;
|
|
var century = dateInt / 100000;
|
|
var year = (dateInt / 1000) % 100;
|
|
var dayOfYear = dateInt % 1000;
|
|
|
|
var fullYear = (century == 0 ? 1900 : 2000) + year;
|
|
var date = new DateTime(fullYear, 1, 1).AddDays(dayOfYear - 1);
|
|
|
|
// HHMMSS format
|
|
var timeInt = (int)time;
|
|
var hours = timeInt / 10000;
|
|
var minutes = (timeInt / 100) % 100;
|
|
var seconds = timeInt % 100;
|
|
|
|
return date.AddHours(hours).AddMinutes(minutes).AddSeconds(seconds);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~JdeDateTransformerTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Transformers/JdeDateTransformer.cs tests/JdeScoping.DataSync.Tests/Etl/Transformers/JdeDateTransformerTests.cs
|
|
git commit -m "feat(etl): implement JdeDateTransformer for Julian date parsing"
|
|
```
|
|
|
|
---
|
|
|
|
## Phase 4: Source Implementations
|
|
|
|
### Task 9: Implement DbQuerySource
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Sources/DbQuerySource.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Sources/DbQuerySourceTests.cs`
|
|
|
|
**Step 1: Write failing tests**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Sources/DbQuerySourceTests.cs
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Sources;
|
|
using Microsoft.Data.SqlClient;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Sources;
|
|
|
|
public class DbQuerySourceTests
|
|
{
|
|
[Fact]
|
|
public void Constructor_SetsSourceName()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
var source = new DbQuerySource(factory, "SELECT 1", "TestSource");
|
|
|
|
Assert.Equal("DbQuery:TestSource", source.SourceName);
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullName_UsesDefault()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
var source = new DbQuerySource(factory, "SELECT 1");
|
|
|
|
Assert.Equal("DbQuery:Query", source.SourceName);
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullFactory_ThrowsArgumentNullException()
|
|
{
|
|
Assert.Throws<ArgumentNullException>(() =>
|
|
new DbQuerySource(null!, "SELECT 1"));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullSql_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new DbQuerySource(factory, null!));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_EmptySql_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new DbQuerySource(factory, ""));
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~DbQuerySourceTests" --verbosity normal`
|
|
Expected: FAIL - DbQuerySource does not exist
|
|
|
|
**Step 3: Implement DbQuerySource**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Sources/DbQuerySource.cs
|
|
using System.Data;
|
|
using System.Data.Common;
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
using Microsoft.Data.SqlClient;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Sources;
|
|
|
|
/// <summary>
|
|
/// Import source that executes a SQL query and returns results as IDataReader.
|
|
/// </summary>
|
|
public class DbQuerySource : IImportSource
|
|
{
|
|
private readonly IDbConnectionFactory _connectionFactory;
|
|
private readonly string _sql;
|
|
private readonly object? _parameters;
|
|
private readonly int _commandTimeout;
|
|
|
|
private SqlConnection? _connection;
|
|
private SqlCommand? _command;
|
|
|
|
public string SourceName { get; }
|
|
|
|
public DbQuerySource(
|
|
IDbConnectionFactory connectionFactory,
|
|
string sql,
|
|
string? name = null,
|
|
object? parameters = null,
|
|
int commandTimeout = 3600)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(connectionFactory);
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(sql);
|
|
|
|
_connectionFactory = connectionFactory;
|
|
_sql = sql;
|
|
_parameters = parameters;
|
|
_commandTimeout = commandTimeout;
|
|
SourceName = $"DbQuery:{name ?? "Query"}";
|
|
}
|
|
|
|
public async Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
_connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
|
_command = _connection.CreateCommand();
|
|
_command.CommandText = _sql;
|
|
_command.CommandTimeout = _commandTimeout;
|
|
|
|
AddParameters(_command, _parameters);
|
|
|
|
return await _command.ExecuteReaderAsync(cancellationToken);
|
|
}
|
|
|
|
private static void AddParameters(SqlCommand command, object? parameters)
|
|
{
|
|
if (parameters == null)
|
|
return;
|
|
|
|
var properties = parameters.GetType().GetProperties();
|
|
foreach (var prop in properties)
|
|
{
|
|
var value = prop.GetValue(parameters) ?? DBNull.Value;
|
|
command.Parameters.AddWithValue($"@{prop.Name}", value);
|
|
}
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
if (_command != null)
|
|
{
|
|
await _command.DisposeAsync();
|
|
_command = null;
|
|
}
|
|
if (_connection != null)
|
|
{
|
|
await _connection.DisposeAsync();
|
|
_connection = null;
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~DbQuerySourceTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Sources/DbQuerySource.cs tests/JdeScoping.DataSync.Tests/Etl/Sources/
|
|
git commit -m "feat(etl): implement DbQuerySource for database queries"
|
|
```
|
|
|
|
---
|
|
|
|
## Phase 5: Destination Implementations
|
|
|
|
### Task 10: Implement DbBulkImportDestination
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Destinations/DbBulkImportDestination.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Destinations/DbBulkImportDestinationTests.cs`
|
|
|
|
**Step 1: Write failing tests**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Destinations/DbBulkImportDestinationTests.cs
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Destinations;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Destinations;
|
|
|
|
public class DbBulkImportDestinationTests
|
|
{
|
|
[Fact]
|
|
public void Constructor_SetsDestinationName()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
var dest = new DbBulkImportDestination(factory, "WorkOrder");
|
|
|
|
Assert.Equal("BulkImport:WorkOrder", dest.DestinationName);
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullFactory_ThrowsArgumentNullException()
|
|
{
|
|
Assert.Throws<ArgumentNullException>(() =>
|
|
new DbBulkImportDestination(null!, "WorkOrder"));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullTableName_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new DbBulkImportDestination(factory, null!));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_EmptyTableName_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new DbBulkImportDestination(factory, ""));
|
|
}
|
|
|
|
[Theory]
|
|
[InlineData(0, 10000)] // 0 means default
|
|
[InlineData(5000, 5000)]
|
|
[InlineData(50000, 50000)]
|
|
public void Constructor_BatchSize_SetsCorrectly(int input, int expected)
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
var dest = new DbBulkImportDestination(factory, "WorkOrder", batchSize: input);
|
|
|
|
// We can't easily test internal batch size, but construction should succeed
|
|
Assert.NotNull(dest);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~DbBulkImportDestinationTests" --verbosity normal`
|
|
Expected: FAIL - DbBulkImportDestination does not exist
|
|
|
|
**Step 3: Implement DbBulkImportDestination**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Destinations/DbBulkImportDestination.cs
|
|
using System.Data;
|
|
using System.Diagnostics;
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
using Microsoft.Data.SqlClient;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Destinations;
|
|
|
|
/// <summary>
|
|
/// Destination that truncates the target table and bulk loads all data.
|
|
/// </summary>
|
|
public class DbBulkImportDestination : IImportDestination
|
|
{
|
|
private const int DefaultBatchSize = 10000;
|
|
|
|
private readonly IDbConnectionFactory _connectionFactory;
|
|
private readonly string _tableName;
|
|
private readonly int _batchSize;
|
|
|
|
public string DestinationName => $"BulkImport:{_tableName}";
|
|
|
|
public DbBulkImportDestination(
|
|
IDbConnectionFactory connectionFactory,
|
|
string tableName,
|
|
int batchSize = 0)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(connectionFactory);
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(tableName);
|
|
|
|
_connectionFactory = connectionFactory;
|
|
_tableName = tableName;
|
|
_batchSize = batchSize > 0 ? batchSize : DefaultBatchSize;
|
|
}
|
|
|
|
public async Task<DestinationResult> WriteAsync(
|
|
IDataReader source,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(source);
|
|
|
|
var stopwatch = Stopwatch.StartNew();
|
|
long totalRows = 0;
|
|
int batchCount = 0;
|
|
|
|
await using var connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
|
|
|
// Truncate destination table
|
|
await using (var truncateCmd = connection.CreateCommand())
|
|
{
|
|
truncateCmd.CommandText = $"TRUNCATE TABLE [{_tableName}]";
|
|
await truncateCmd.ExecuteNonQueryAsync(cancellationToken);
|
|
}
|
|
|
|
// Bulk copy data
|
|
using var bulkCopy = new SqlBulkCopy(connection)
|
|
{
|
|
DestinationTableName = $"[{_tableName}]",
|
|
BatchSize = _batchSize,
|
|
BulkCopyTimeout = 3600,
|
|
EnableStreaming = true
|
|
};
|
|
|
|
// Map columns by name
|
|
for (int i = 0; i < source.FieldCount; i++)
|
|
{
|
|
bulkCopy.ColumnMappings.Add(source.GetName(i), source.GetName(i));
|
|
}
|
|
|
|
// Track rows via event
|
|
bulkCopy.NotifyAfter = _batchSize;
|
|
bulkCopy.SqlRowsCopied += (_, e) =>
|
|
{
|
|
totalRows = e.RowsCopied;
|
|
batchCount++;
|
|
};
|
|
|
|
await bulkCopy.WriteToServerAsync(source, cancellationToken);
|
|
|
|
// Final count (in case NotifyAfter didn't fire for last partial batch)
|
|
if (bulkCopy.RowsCopied > totalRows)
|
|
{
|
|
totalRows = bulkCopy.RowsCopied;
|
|
batchCount++;
|
|
}
|
|
|
|
stopwatch.Stop();
|
|
return new DestinationResult(totalRows, batchCount, stopwatch.Elapsed);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~DbBulkImportDestinationTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Destinations/DbBulkImportDestination.cs tests/JdeScoping.DataSync.Tests/Etl/Destinations/
|
|
git commit -m "feat(etl): implement DbBulkImportDestination for full table refresh"
|
|
```
|
|
|
|
---
|
|
|
|
### Task 11: Implement DbBulkMergeDestination
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Destinations/DbBulkMergeDestination.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Destinations/DbBulkMergeDestinationTests.cs`
|
|
|
|
**Step 1: Write failing tests**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Destinations/DbBulkMergeDestinationTests.cs
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Destinations;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Destinations;
|
|
|
|
public class DbBulkMergeDestinationTests
|
|
{
|
|
[Fact]
|
|
public void Constructor_SetsDestinationName()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
var dest = new DbBulkMergeDestination(factory, "WorkOrder", new[] { "OrderNumber" });
|
|
|
|
Assert.Equal("BulkMerge:WorkOrder", dest.DestinationName);
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullFactory_ThrowsArgumentNullException()
|
|
{
|
|
Assert.Throws<ArgumentNullException>(() =>
|
|
new DbBulkMergeDestination(null!, "WorkOrder", new[] { "Id" }));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullTableName_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new DbBulkMergeDestination(factory, null!, new[] { "Id" }));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_EmptyMatchColumns_ThrowsArgumentException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentException>(() =>
|
|
new DbBulkMergeDestination(factory, "WorkOrder", Array.Empty<string>()));
|
|
}
|
|
|
|
[Fact]
|
|
public void Constructor_NullMatchColumns_ThrowsArgumentNullException()
|
|
{
|
|
var factory = Substitute.For<IDbConnectionFactory>();
|
|
|
|
Assert.Throws<ArgumentNullException>(() =>
|
|
new DbBulkMergeDestination(factory, "WorkOrder", null!));
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~DbBulkMergeDestinationTests" --verbosity normal`
|
|
Expected: FAIL - DbBulkMergeDestination does not exist
|
|
|
|
**Step 3: Implement DbBulkMergeDestination**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Destinations/DbBulkMergeDestination.cs
|
|
using System.Data;
|
|
using System.Diagnostics;
|
|
using System.Text;
|
|
using JdeScoping.DataAccess.Interfaces;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
using Microsoft.Data.SqlClient;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Destinations;
|
|
|
|
/// <summary>
|
|
/// Destination that uses temp table + MERGE for incremental updates.
|
|
/// </summary>
|
|
public class DbBulkMergeDestination : IImportDestination
|
|
{
|
|
private const int DefaultBatchSize = 10000;
|
|
|
|
private readonly IDbConnectionFactory _connectionFactory;
|
|
private readonly string _tableName;
|
|
private readonly string[] _matchColumns;
|
|
private readonly string[]? _updateColumns;
|
|
private readonly int _batchSize;
|
|
|
|
public string DestinationName => $"BulkMerge:{_tableName}";
|
|
|
|
public DbBulkMergeDestination(
|
|
IDbConnectionFactory connectionFactory,
|
|
string tableName,
|
|
string[] matchColumns,
|
|
string[]? updateColumns = null,
|
|
int batchSize = 0)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(connectionFactory);
|
|
ArgumentException.ThrowIfNullOrWhiteSpace(tableName);
|
|
ArgumentNullException.ThrowIfNull(matchColumns);
|
|
if (matchColumns.Length == 0)
|
|
throw new ArgumentException("At least one match column is required.", nameof(matchColumns));
|
|
|
|
_connectionFactory = connectionFactory;
|
|
_tableName = tableName;
|
|
_matchColumns = matchColumns;
|
|
_updateColumns = updateColumns;
|
|
_batchSize = batchSize > 0 ? batchSize : DefaultBatchSize;
|
|
}
|
|
|
|
public async Task<DestinationResult> WriteAsync(
|
|
IDataReader source,
|
|
CancellationToken cancellationToken = default)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(source);
|
|
|
|
var stopwatch = Stopwatch.StartNew();
|
|
long totalRows = 0;
|
|
int batchCount = 0;
|
|
|
|
var tempTableName = $"#ETL_{_tableName.Replace(".", "_").Replace("[", "").Replace("]", "")}";
|
|
|
|
await using var connection = await _connectionFactory.CreateLotFinderConnectionAsync(cancellationToken);
|
|
|
|
try
|
|
{
|
|
// Create temp table from destination schema
|
|
await CreateTempTableAsync(connection, tempTableName, cancellationToken);
|
|
|
|
// Get all column names from source
|
|
var allColumns = new List<string>();
|
|
for (int i = 0; i < source.FieldCount; i++)
|
|
allColumns.Add(source.GetName(i));
|
|
|
|
// Determine update columns (all non-match columns if not specified)
|
|
var matchSet = new HashSet<string>(_matchColumns, StringComparer.OrdinalIgnoreCase);
|
|
var updateCols = _updateColumns ?? allColumns.Where(c => !matchSet.Contains(c)).ToArray();
|
|
|
|
// Build MERGE SQL
|
|
var mergeSql = BuildMergeSql(allColumns, updateCols);
|
|
|
|
// Process in batches using DataTable buffer
|
|
var batch = new DataTable();
|
|
SetupDataTable(batch, source);
|
|
|
|
while (source.Read())
|
|
{
|
|
var row = batch.NewRow();
|
|
for (int i = 0; i < source.FieldCount; i++)
|
|
row[i] = source.GetValue(i);
|
|
batch.Rows.Add(row);
|
|
|
|
if (batch.Rows.Count >= _batchSize)
|
|
{
|
|
batchCount++;
|
|
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, cancellationToken);
|
|
totalRows += batch.Rows.Count;
|
|
batch.Clear();
|
|
}
|
|
}
|
|
|
|
// Process remaining rows
|
|
if (batch.Rows.Count > 0)
|
|
{
|
|
batchCount++;
|
|
await ProcessBatchAsync(connection, batch, tempTableName, mergeSql, cancellationToken);
|
|
totalRows += batch.Rows.Count;
|
|
}
|
|
|
|
stopwatch.Stop();
|
|
return new DestinationResult(totalRows, batchCount, stopwatch.Elapsed);
|
|
}
|
|
finally
|
|
{
|
|
await DropTempTableAsync(connection, tempTableName);
|
|
}
|
|
}
|
|
|
|
private async Task CreateTempTableAsync(SqlConnection connection, string tempTableName, CancellationToken ct)
|
|
{
|
|
var sql = $"SELECT TOP 0 * INTO [{tempTableName}] FROM [{_tableName}]";
|
|
await using var cmd = connection.CreateCommand();
|
|
cmd.CommandText = sql;
|
|
await cmd.ExecuteNonQueryAsync(ct);
|
|
}
|
|
|
|
private async Task DropTempTableAsync(SqlConnection connection, string tempTableName)
|
|
{
|
|
try
|
|
{
|
|
var sql = $"IF OBJECT_ID('tempdb..{tempTableName}') IS NOT NULL DROP TABLE [{tempTableName}]";
|
|
await using var cmd = connection.CreateCommand();
|
|
cmd.CommandText = sql;
|
|
await cmd.ExecuteNonQueryAsync();
|
|
}
|
|
catch { /* Ignore cleanup errors */ }
|
|
}
|
|
|
|
private async Task ProcessBatchAsync(
|
|
SqlConnection connection,
|
|
DataTable batch,
|
|
string tempTableName,
|
|
string mergeSql,
|
|
CancellationToken ct)
|
|
{
|
|
// Bulk copy to temp table
|
|
using var bulkCopy = new SqlBulkCopy(connection)
|
|
{
|
|
DestinationTableName = tempTableName,
|
|
BatchSize = batch.Rows.Count
|
|
};
|
|
await bulkCopy.WriteToServerAsync(batch, ct);
|
|
|
|
// Execute MERGE
|
|
await using var cmd = connection.CreateCommand();
|
|
cmd.CommandText = mergeSql;
|
|
await cmd.ExecuteNonQueryAsync(ct);
|
|
|
|
// Truncate temp table
|
|
cmd.CommandText = $"TRUNCATE TABLE [{tempTableName}]";
|
|
await cmd.ExecuteNonQueryAsync(ct);
|
|
}
|
|
|
|
private string BuildMergeSql(IReadOnlyList<string> allColumns, IReadOnlyList<string> updateColumns)
|
|
{
|
|
var tempTableName = $"#ETL_{_tableName.Replace(".", "_").Replace("[", "").Replace("]", "")}";
|
|
var sb = new StringBuilder();
|
|
|
|
sb.AppendLine($"MERGE INTO [{_tableName}] AS target");
|
|
sb.AppendLine($"USING [{tempTableName}] AS source");
|
|
sb.Append("ON ");
|
|
sb.AppendLine(string.Join(" AND ", _matchColumns.Select(c => $"target.[{c}] = source.[{c}]")));
|
|
|
|
if (updateColumns.Count > 0)
|
|
{
|
|
sb.AppendLine("WHEN MATCHED THEN UPDATE SET");
|
|
sb.AppendLine(string.Join(", ", updateColumns.Select(c => $"target.[{c}] = source.[{c}]")));
|
|
}
|
|
|
|
sb.AppendLine("WHEN NOT MATCHED THEN INSERT");
|
|
sb.AppendLine($"({string.Join(", ", allColumns.Select(c => $"[{c}]"))})");
|
|
sb.AppendLine($"VALUES ({string.Join(", ", allColumns.Select(c => $"source.[{c}]"))});");
|
|
|
|
return sb.ToString();
|
|
}
|
|
|
|
private static void SetupDataTable(DataTable table, IDataReader source)
|
|
{
|
|
for (int i = 0; i < source.FieldCount; i++)
|
|
{
|
|
var type = source.GetFieldType(i);
|
|
// Handle nullable types
|
|
if (type.IsValueType)
|
|
type = typeof(Nullable<>).MakeGenericType(type);
|
|
table.Columns.Add(source.GetName(i), Nullable.GetUnderlyingType(type) ?? type);
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~DbBulkMergeDestinationTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 5: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Destinations/DbBulkMergeDestination.cs tests/JdeScoping.DataSync.Tests/Etl/Destinations/DbBulkMergeDestinationTests.cs
|
|
git commit -m "feat(etl): implement DbBulkMergeDestination for incremental updates"
|
|
```
|
|
|
|
---
|
|
|
|
## Phase 6: Pipeline Orchestration
|
|
|
|
### Task 12: Implement EtlPipeline
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/Pipeline/EtlPipeline.cs`
|
|
- Test: `tests/JdeScoping.DataSync.Tests/Etl/Pipeline/EtlPipelineTests.cs`
|
|
|
|
**Step 1: Write failing tests**
|
|
|
|
```csharp
|
|
// tests/JdeScoping.DataSync.Tests/Etl/Pipeline/EtlPipelineTests.cs
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
using JdeScoping.DataSync.Etl.Pipeline;
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
using NSubstitute;
|
|
|
|
namespace JdeScoping.DataSync.Tests.Etl.Pipeline;
|
|
|
|
public class EtlPipelineTests
|
|
{
|
|
[Fact]
|
|
public async Task ExecuteAsync_SuccessfulPipeline_ReturnsSuccessResult()
|
|
{
|
|
var source = CreateMockSource();
|
|
var destination = CreateMockDestination(100);
|
|
|
|
var pipeline = new EtlPipelineBuilder()
|
|
.WithName("TestPipeline")
|
|
.WithSource(source)
|
|
.WithDestination(destination)
|
|
.WithLogger(NullLogger<EtlPipeline>.Instance)
|
|
.Build();
|
|
|
|
var result = await pipeline.ExecuteAsync();
|
|
|
|
Assert.True(result.Success);
|
|
Assert.Equal(100, result.TotalRows);
|
|
Assert.Null(result.Error);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExecuteAsync_WithPreScript_RunsScriptBeforeDestination()
|
|
{
|
|
var callOrder = new List<string>();
|
|
|
|
var source = CreateMockSource();
|
|
var destination = CreateMockDestination(100);
|
|
destination.When(d => d.WriteAsync(Arg.Any<IDataReader>(), Arg.Any<CancellationToken>()))
|
|
.Do(_ => callOrder.Add("destination"));
|
|
|
|
var preScript = Substitute.For<IScriptRunner>();
|
|
preScript.ScriptName.Returns("PreScript");
|
|
preScript.When(s => s.ExecuteAsync(Arg.Any<CancellationToken>()))
|
|
.Do(_ => callOrder.Add("prescript"));
|
|
|
|
var pipeline = new EtlPipelineBuilder()
|
|
.WithName("TestPipeline")
|
|
.WithSource(source)
|
|
.WithDestination(destination)
|
|
.WithPreScript(preScript)
|
|
.WithLogger(NullLogger<EtlPipeline>.Instance)
|
|
.Build();
|
|
|
|
await pipeline.ExecuteAsync();
|
|
|
|
Assert.Equal(new[] { "prescript", "destination" }, callOrder);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExecuteAsync_DestinationFails_ReturnsFailedResult()
|
|
{
|
|
var source = CreateMockSource();
|
|
var destination = Substitute.For<IImportDestination>();
|
|
destination.DestinationName.Returns("FailingDest");
|
|
destination.WriteAsync(Arg.Any<IDataReader>(), Arg.Any<CancellationToken>())
|
|
.ThrowsAsync(new InvalidOperationException("Destination failed"));
|
|
|
|
var pipeline = new EtlPipelineBuilder()
|
|
.WithName("TestPipeline")
|
|
.WithSource(source)
|
|
.WithDestination(destination)
|
|
.WithLogger(NullLogger<EtlPipeline>.Instance)
|
|
.Build();
|
|
|
|
var result = await pipeline.ExecuteAsync();
|
|
|
|
Assert.False(result.Success);
|
|
Assert.NotNull(result.Error);
|
|
Assert.IsType<InvalidOperationException>(result.Error);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task ExecuteAsync_TracksStepResults()
|
|
{
|
|
var source = CreateMockSource();
|
|
var destination = CreateMockDestination(100);
|
|
|
|
var pipeline = new EtlPipelineBuilder()
|
|
.WithName("TestPipeline")
|
|
.WithSource(source)
|
|
.WithDestination(destination)
|
|
.WithLogger(NullLogger<EtlPipeline>.Instance)
|
|
.Build();
|
|
|
|
var result = await pipeline.ExecuteAsync();
|
|
|
|
Assert.Equal(2, result.Steps.Count);
|
|
Assert.Equal("Source", result.Steps[0].StepType);
|
|
Assert.Equal("Destination", result.Steps[1].StepType);
|
|
}
|
|
|
|
private static IImportSource CreateMockSource()
|
|
{
|
|
var reader = Substitute.For<IDataReader>();
|
|
reader.Read().Returns(false);
|
|
reader.FieldCount.Returns(0);
|
|
|
|
var source = Substitute.For<IImportSource>();
|
|
source.SourceName.Returns("MockSource");
|
|
source.ReadDataAsync(Arg.Any<CancellationToken>())
|
|
.Returns(Task.FromResult(reader));
|
|
return source;
|
|
}
|
|
|
|
private static IImportDestination CreateMockDestination(long rows)
|
|
{
|
|
var destination = Substitute.For<IImportDestination>();
|
|
destination.DestinationName.Returns("MockDestination");
|
|
destination.WriteAsync(Arg.Any<IDataReader>(), Arg.Any<CancellationToken>())
|
|
.Returns(Task.FromResult(new DestinationResult(rows, 1, TimeSpan.FromSeconds(1))));
|
|
return destination;
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Run test to verify it fails**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~EtlPipelineTests" --verbosity normal`
|
|
Expected: FAIL - EtlPipeline and EtlPipelineBuilder do not exist
|
|
|
|
**Step 3: Implement EtlPipeline**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Pipeline/EtlPipeline.cs
|
|
using System.Data;
|
|
using System.Diagnostics;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Pipeline;
|
|
|
|
/// <summary>
|
|
/// Orchestrates ETL pipeline execution: source → transformers → destination.
|
|
/// </summary>
|
|
public class EtlPipeline
|
|
{
|
|
private readonly IImportSource _source;
|
|
private readonly IReadOnlyList<IDataTransformer> _transformers;
|
|
private readonly IImportDestination _destination;
|
|
private readonly IReadOnlyList<IScriptRunner> _preScripts;
|
|
private readonly IReadOnlyList<IScriptRunner> _postScripts;
|
|
private readonly ILogger<EtlPipeline> _logger;
|
|
|
|
public string PipelineName { get; }
|
|
|
|
internal EtlPipeline(
|
|
string name,
|
|
IImportSource source,
|
|
IReadOnlyList<IDataTransformer> transformers,
|
|
IImportDestination destination,
|
|
IReadOnlyList<IScriptRunner> preScripts,
|
|
IReadOnlyList<IScriptRunner> postScripts,
|
|
ILogger<EtlPipeline> logger)
|
|
{
|
|
PipelineName = name;
|
|
_source = source;
|
|
_transformers = transformers;
|
|
_destination = destination;
|
|
_preScripts = preScripts;
|
|
_postScripts = postScripts;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<PipelineResult> ExecuteAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
var steps = new List<StepResult>();
|
|
var totalStopwatch = Stopwatch.StartNew();
|
|
|
|
_logger.LogInformation("Starting pipeline {PipelineName}", PipelineName);
|
|
|
|
try
|
|
{
|
|
// 1. Run pre-scripts
|
|
foreach (var script in _preScripts)
|
|
{
|
|
var stepResult = await RunScriptAsync(script, cancellationToken);
|
|
steps.Add(stepResult);
|
|
}
|
|
|
|
// 2. Open source
|
|
var sourceStopwatch = Stopwatch.StartNew();
|
|
await using (_source)
|
|
{
|
|
var reader = await _source.ReadDataAsync(cancellationToken);
|
|
sourceStopwatch.Stop();
|
|
steps.Add(new StepResult(_source.SourceName, "Source", 0, sourceStopwatch.Elapsed));
|
|
|
|
// 3. Apply transformers (chain of decorators)
|
|
foreach (var transformer in _transformers)
|
|
{
|
|
var transformStopwatch = Stopwatch.StartNew();
|
|
reader = transformer.Transform(reader);
|
|
transformStopwatch.Stop();
|
|
steps.Add(new StepResult(transformer.TransformerName, "Transform", 0, transformStopwatch.Elapsed));
|
|
}
|
|
|
|
// 4. Write to destination
|
|
var destResult = await _destination.WriteAsync(reader, cancellationToken);
|
|
steps.Add(new StepResult(
|
|
_destination.DestinationName,
|
|
"Destination",
|
|
destResult.RowsProcessed,
|
|
destResult.Elapsed));
|
|
}
|
|
|
|
// 5. Run post-scripts
|
|
foreach (var script in _postScripts)
|
|
{
|
|
var stepResult = await RunScriptAsync(script, cancellationToken);
|
|
steps.Add(stepResult);
|
|
}
|
|
|
|
totalStopwatch.Stop();
|
|
var totalRows = steps.Sum(s => s.RowsAffected);
|
|
|
|
_logger.LogInformation(
|
|
"Pipeline {PipelineName} completed. Rows={Rows}, Elapsed={Elapsed}ms",
|
|
PipelineName, totalRows, totalStopwatch.ElapsedMilliseconds);
|
|
|
|
return PipelineResult.Succeeded(totalRows, totalStopwatch.Elapsed, steps);
|
|
}
|
|
catch (Exception ex) when (ex is not OperationCanceledException)
|
|
{
|
|
totalStopwatch.Stop();
|
|
var totalRows = steps.Sum(s => s.RowsAffected);
|
|
|
|
_logger.LogError(ex,
|
|
"Pipeline {PipelineName} failed at step {Step}",
|
|
PipelineName, steps.LastOrDefault()?.StepName ?? "Unknown");
|
|
|
|
return PipelineResult.Failed(totalRows, totalStopwatch.Elapsed, steps, ex);
|
|
}
|
|
}
|
|
|
|
private async Task<StepResult> RunScriptAsync(IScriptRunner script, CancellationToken cancellationToken)
|
|
{
|
|
var stopwatch = Stopwatch.StartNew();
|
|
_logger.LogDebug("Running script {ScriptName}", script.ScriptName);
|
|
await script.ExecuteAsync(cancellationToken);
|
|
stopwatch.Stop();
|
|
return new StepResult(script.ScriptName, "Script", 0, stopwatch.Elapsed);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 4: Implement EtlPipelineBuilder**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/Pipeline/EtlPipelineBuilder.cs
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Logging.Abstractions;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Pipeline;
|
|
|
|
/// <summary>
|
|
/// Fluent builder for constructing ETL pipelines.
|
|
/// </summary>
|
|
public class EtlPipelineBuilder
|
|
{
|
|
private string _name = "Unnamed";
|
|
private IImportSource? _source;
|
|
private readonly List<IDataTransformer> _transformers = new();
|
|
private IImportDestination? _destination;
|
|
private readonly List<IScriptRunner> _preScripts = new();
|
|
private readonly List<IScriptRunner> _postScripts = new();
|
|
private ILogger<EtlPipeline>? _logger;
|
|
|
|
public EtlPipelineBuilder WithName(string name)
|
|
{
|
|
_name = name ?? throw new ArgumentNullException(nameof(name));
|
|
return this;
|
|
}
|
|
|
|
public EtlPipelineBuilder WithSource(IImportSource source)
|
|
{
|
|
_source = source ?? throw new ArgumentNullException(nameof(source));
|
|
return this;
|
|
}
|
|
|
|
public EtlPipelineBuilder WithTransformer(IDataTransformer transformer)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(transformer);
|
|
_transformers.Add(transformer);
|
|
return this;
|
|
}
|
|
|
|
public EtlPipelineBuilder WithDestination(IImportDestination destination)
|
|
{
|
|
_destination = destination ?? throw new ArgumentNullException(nameof(destination));
|
|
return this;
|
|
}
|
|
|
|
public EtlPipelineBuilder WithPreScript(IScriptRunner script)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(script);
|
|
_preScripts.Add(script);
|
|
return this;
|
|
}
|
|
|
|
public EtlPipelineBuilder WithPostScript(IScriptRunner script)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(script);
|
|
_postScripts.Add(script);
|
|
return this;
|
|
}
|
|
|
|
public EtlPipelineBuilder WithLogger(ILogger<EtlPipeline> logger)
|
|
{
|
|
_logger = logger;
|
|
return this;
|
|
}
|
|
|
|
public EtlPipeline Build()
|
|
{
|
|
if (_source == null)
|
|
throw new InvalidOperationException("Source is required. Call WithSource() before Build().");
|
|
if (_destination == null)
|
|
throw new InvalidOperationException("Destination is required. Call WithDestination() before Build().");
|
|
|
|
return new EtlPipeline(
|
|
_name,
|
|
_source,
|
|
_transformers,
|
|
_destination,
|
|
_preScripts,
|
|
_postScripts,
|
|
_logger ?? NullLogger<EtlPipeline>.Instance);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 5: Run tests to verify they pass**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~EtlPipelineTests" --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 6: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/Pipeline/ tests/JdeScoping.DataSync.Tests/Etl/Pipeline/
|
|
git commit -m "feat(etl): implement EtlPipeline and EtlPipelineBuilder"
|
|
```
|
|
|
|
---
|
|
|
|
## Phase 7: Configuration and DI
|
|
|
|
### Task 13: Add DI Registration
|
|
|
|
**Files:**
|
|
- Create: `src/JdeScoping.DataSync/Etl/EtlServiceCollectionExtensions.cs`
|
|
- Modify: `src/JdeScoping.DataSync/DependencyInjection.cs`
|
|
|
|
**Step 1: Create ETL DI extensions**
|
|
|
|
```csharp
|
|
// src/JdeScoping.DataSync/Etl/EtlServiceCollectionExtensions.cs
|
|
using JdeScoping.DataSync.Etl.Pipeline;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
|
|
namespace JdeScoping.DataSync.Etl;
|
|
|
|
/// <summary>
|
|
/// Extension methods for registering ETL pipeline services.
|
|
/// </summary>
|
|
public static class EtlServiceCollectionExtensions
|
|
{
|
|
/// <summary>
|
|
/// Adds ETL pipeline services to the service collection.
|
|
/// </summary>
|
|
public static IServiceCollection AddEtlPipeline(this IServiceCollection services)
|
|
{
|
|
// Register factory for creating pipelines
|
|
services.AddTransient<EtlPipelineBuilder>();
|
|
|
|
return services;
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Commit**
|
|
|
|
```bash
|
|
git add src/JdeScoping.DataSync/Etl/EtlServiceCollectionExtensions.cs
|
|
git commit -m "feat(etl): add DI registration for ETL pipeline"
|
|
```
|
|
|
|
---
|
|
|
|
### Task 14: Run Full Test Suite
|
|
|
|
**Step 1: Build the solution**
|
|
|
|
Run: `dotnet build NEW/`
|
|
Expected: Build succeeds
|
|
|
|
**Step 2: Run all DataSync tests**
|
|
|
|
Run: `dotnet test tests/JdeScoping.DataSync.Tests --verbosity normal`
|
|
Expected: All tests pass
|
|
|
|
**Step 3: Commit any fixes if needed**
|
|
|
|
---
|
|
|
|
## Summary
|
|
|
|
This implementation plan creates a complete ETL pipeline system with:
|
|
|
|
1. **Core Interfaces**: `IImportSource`, `IDataTransformer`, `IImportDestination`, `IScriptRunner`
|
|
2. **Result Models**: `PipelineResult`, `StepResult`, `DestinationResult`
|
|
3. **Transformers**: `TransformingDataReader`, `DataTransformerBase`, `ColumnDropTransformer`, `ColumnRenameTransformer`, `JdeDateTransformer`
|
|
4. **Sources**: `DbQuerySource`
|
|
5. **Destinations**: `DbBulkImportDestination`, `DbBulkMergeDestination`
|
|
6. **Scripts**: `SqlScriptRunner`, `CommonScripts`
|
|
7. **Pipeline**: `EtlPipeline`, `EtlPipelineBuilder`
|
|
|
|
Each task follows TDD with failing tests first, then implementation, then verification.
|