The WHERE clause was comparing Code to itself instead of the aliased table reference, which would always be true.
22 KiB
Old ETL Removal Implementation Plan
For Claude: REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
Goal: Remove legacy ETL implementation and wire orchestration to use new EtlPipeline with JSON config.
Architecture: Three-phase migration - build new infrastructure first, wire up, then clean up old code.
Tech Stack: .NET 10, System.Text.Json, EtlPipeline
Working Directory: All paths are relative to NEW/ folder. Run cd /Users/dohertj2/Desktop/JdeScopingTool/NEW before starting.
Phase 1: Build New Infrastructure
Task 1: Create Pipeline Configuration Models
Files:
- Create:
src/JdeScoping.DataSync/Configuration/PipelinesRoot.cs - Create:
src/JdeScoping.DataSync/Configuration/PipelineConfig.cs - Create:
src/JdeScoping.DataSync/Options/PipelineOptions.cs
Step 1: Create PipelinesRoot.cs
namespace JdeScoping.DataSync.Configuration;
public record PipelinesRoot(
PipelineSettings? Settings, // Optional - defaults applied if missing
Dictionary<string, PipelineConfig> Pipelines)
{
public PipelineSettings EffectiveSettings => Settings ?? new PipelineSettings();
}
public record PipelineSettings(
string Timezone = "UTC");
Step 2: Create PipelineConfig.cs
namespace JdeScoping.DataSync.Configuration;
public record PipelineConfig(
SourceConfig Source,
Dictionary<string, SyncModeConfig> SyncModes,
List<TransformerConfig>? Transformers,
DestinationConfig Destination,
List<string>? PreScripts,
List<string>? PostScripts);
public record SourceConfig(
string Connection,
string Query,
Dictionary<string, ParameterConfig>? Parameters);
public record ParameterConfig(
string Name,
string? Format,
string Source = "offset",
string? Value);
public record SyncModeConfig(
string? MinDtOffset,
bool PrePurge = false,
bool ReIndex = false,
string? UpdateWhen = null,
DestinationOverride? Destination = null);
public record DestinationOverride(
string? Type,
List<string>? MatchColumns,
List<string>? ExcludeFromUpdate);
public record TransformerConfig(
string Type,
List<string>? Columns,
Dictionary<string, string>? Mappings);
public record DestinationConfig(
string Table,
List<string>? MatchColumns,
List<string>? ExcludeFromUpdate);
Step 3: Create PipelineOptions.cs
namespace JdeScoping.DataSync.Options;
public class PipelineOptions
{
public const string SectionName = "Pipelines";
public string ConfigPath { get; set; } = "Pipelines/pipelines.json";
}
Step 4: Build to verify
dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 5: Commit
git add -A && git commit -m "feat(datasync): add pipeline configuration models"
Task 2: Create ParameterFormatConverter
Files:
- Create:
src/JdeScoping.DataSync/Services/ParameterFormatConverter.cs - Create:
tests/JdeScoping.DataSync.Tests/Services/ParameterFormatConverterTests.cs
Step 1: Create ParameterFormatConverter.cs
namespace JdeScoping.DataSync.Services;
public class ParameterFormatConverter
{
private readonly TimeZoneInfo _timezone;
public ParameterFormatConverter(string timezone)
{
_timezone = timezone.ToUpperInvariant() switch
{
"UTC" => TimeZoneInfo.Utc,
"LOCAL" => TimeZoneInfo.Local,
_ => TimeZoneInfo.FindSystemTimeZoneById(timezone)
};
}
public object Convert(DateTime value, string? format)
{
var adjusted = TimeZoneInfo.ConvertTime(value, _timezone);
return format?.ToLowerInvariant() switch
{
"jdejulian" => ToJdeJulianDate(adjusted),
"jdetime" => ToJdeTime(adjusted),
null => adjusted,
_ => throw new ArgumentException($"Unknown format: {format}")
};
}
public static int ToJdeJulianDate(DateTime date)
{
int century = date.Year >= 2000 ? 1 : 0;
int year = date.Year % 100;
int dayOfYear = date.DayOfYear;
return century * 100000 + year * 1000 + dayOfYear;
}
public static int ToJdeTime(DateTime time)
{
return time.Hour * 10000 + time.Minute * 100 + time.Second;
}
}
Step 2: Create tests
namespace JdeScoping.DataSync.Tests.Services;
public class ParameterFormatConverterTests
{
[Fact]
public void ToJdeJulianDate_Year2024Day100_Returns124100()
{
var date = new DateTime(2024, 4, 9); // Day 100
var result = ParameterFormatConverter.ToJdeJulianDate(date);
result.ShouldBe(124100);
}
[Fact]
public void ToJdeJulianDate_Year1999Day365_Returns99365()
{
var date = new DateTime(1999, 12, 31);
var result = ParameterFormatConverter.ToJdeJulianDate(date);
result.ShouldBe(99365);
}
[Fact]
public void ToJdeTime_143025_Returns143025()
{
var time = new DateTime(2024, 1, 1, 14, 30, 25);
var result = ParameterFormatConverter.ToJdeTime(time);
result.ShouldBe(143025);
}
[Fact]
public void Convert_WithUtcTimezone_UsesUtc()
{
var converter = new ParameterFormatConverter("UTC");
var utcTime = DateTime.SpecifyKind(new DateTime(2024, 4, 9, 12, 0, 0), DateTimeKind.Utc);
var result = converter.Convert(utcTime, "jdeJulian");
result.ShouldBe(124100);
}
}
Step 3: Run tests
dotnet test tests/JdeScoping.DataSync.Tests --filter "ParameterFormatConverterTests"
Step 4: Commit
git add -A && git commit -m "feat(datasync): add ParameterFormatConverter with JDE date/time support"
Task 3: Extend DbQuerySource for Multiple Connections
Files:
- Modify:
src/JdeScoping.DataSync/Etl/Sources/DbQuerySource.cs - Modify:
tests/JdeScoping.DataSync.Tests/Etl/Sources/DbQuerySourceTests.cs
Note: DbQuerySource already exists but only supports LotFinder. Extend it to support JDE and CMS connections.
Step 1: Update DbQuerySource.cs
using System.Data;
using System.Data.Common;
using JdeScoping.DataAccess.Interfaces;
using JdeScoping.DataSync.Etl.Contracts;
namespace JdeScoping.DataSync.Etl.Sources;
public class DbQuerySource : IImportSource
{
private readonly IDbConnectionFactory _connectionFactory;
private readonly string _connectionType;
private readonly string _query;
private readonly Dictionary<string, object> _parameters;
private DbConnection? _connection;
public string SourceName => $"DbQuery:{_connectionType}";
public DbQuerySource(
IDbConnectionFactory connectionFactory,
string connectionType,
string query,
Dictionary<string, object>? parameters = null)
{
_connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory));
_connectionType = connectionType?.ToLowerInvariant()
?? throw new ArgumentNullException(nameof(connectionType));
_query = query ?? throw new ArgumentNullException(nameof(query));
_parameters = parameters ?? new Dictionary<string, object>();
if (_connectionType is not ("jde" or "cms" or "lotfinder"))
throw new ArgumentException($"Unknown connection type: {connectionType}");
}
public async Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
{
_connection = _connectionType switch
{
"jde" => await _connectionFactory.CreateJdeConnectionAsync(),
"cms" => await _connectionFactory.CreateCmsConnectionAsync(),
"lotfinder" => await _connectionFactory.CreateLotFinderConnectionAsync(),
_ => throw new InvalidOperationException($"Unknown connection type: {_connectionType}")
};
var command = _connection.CreateCommand();
command.CommandText = _query;
foreach (var (name, value) in _parameters)
{
var param = command.CreateParameter();
param.ParameterName = name;
param.Value = value ?? DBNull.Value;
command.Parameters.Add(param);
}
return await command.ExecuteReaderAsync(CommandBehavior.CloseConnection, cancellationToken);
}
public async ValueTask DisposeAsync()
{
if (_connection != null)
{
await _connection.DisposeAsync();
_connection = null;
}
}
}
Step 2: Create basic tests
namespace JdeScoping.DataSync.Tests.Etl.Sources;
public class DbQuerySourceTests
{
[Theory]
[InlineData("jde")]
[InlineData("cms")]
[InlineData("lotfinder")]
public void Constructor_ValidConnectionType_Succeeds(string connectionType)
{
var factory = Substitute.For<IDbConnectionFactory>();
var source = new DbQuerySource(factory, connectionType, "SELECT 1");
source.SourceName.ShouldBe($"DbQuery:{connectionType}");
}
[Fact]
public void Constructor_InvalidConnectionType_Throws()
{
var factory = Substitute.For<IDbConnectionFactory>();
Should.Throw<ArgumentException>(() =>
new DbQuerySource(factory, "invalid", "SELECT 1"));
}
[Fact]
public void Constructor_NullQuery_Throws()
{
var factory = Substitute.For<IDbConnectionFactory>();
Should.Throw<ArgumentNullException>(() =>
new DbQuerySource(factory, "jde", null!));
}
}
Step 3: Run tests
dotnet test tests/JdeScoping.DataSync.Tests --filter "DbQuerySourceTests"
Step 4: Commit
git add -A && git commit -m "feat(datasync): add generic DbQuerySource for JDE/CMS/LotFinder"
Task 4: Extend DbBulkMergeDestination
Files:
- Modify:
src/JdeScoping.DataSync/Etl/Destinations/DbBulkMergeDestination.cs - Create/Modify:
tests/JdeScoping.DataSync.Tests/Etl/Destinations/DbBulkMergeDestinationTests.cs
Step 1: Add excludeFromUpdate and updateCondition parameters
Add to constructor:
public DbBulkMergeDestination(
IDbConnectionFactory connectionFactory,
string tableName,
string[] matchColumns,
string[]? excludeFromUpdate = null,
string? updateCondition = null)
Step 2: Modify MERGE SQL generation to use new parameters
Update the WHEN MATCHED clause to include condition and exclude columns.
Step 3: Add tests for new functionality
Step 4: Run tests
dotnet test tests/JdeScoping.DataSync.Tests --filter "DbBulkMergeDestinationTests"
Step 5: Commit
git add -A && git commit -m "feat(datasync): extend DbBulkMergeDestination with excludeFromUpdate and updateCondition"
Task 5: Create IEtlPipelineFactory and Contracts
Files:
- Create:
src/JdeScoping.DataSync/Contracts/IEtlPipelineFactory.cs - Create:
src/JdeScoping.DataSync/Contracts/SyncMode.cs
Step 1: Create IEtlPipelineFactory.cs
namespace JdeScoping.DataSync.Contracts;
public interface IEtlPipelineFactory
{
IEtlPipelineBuilder ForTable(string tableName);
}
public interface IEtlPipelineBuilder
{
IEtlPipelineBuilder WithMode(SyncMode mode);
IEtlPipelineBuilder WithMinimumDate(DateTime? minDt);
EtlPipeline Build();
}
Step 2: Create SyncMode.cs
namespace JdeScoping.DataSync.Contracts;
public enum SyncMode
{
Mass,
Incremental
}
Step 3: Build to verify
dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 4: Commit
git add -A && git commit -m "feat(datasync): add IEtlPipelineFactory and SyncMode contracts"
Task 6: Create EtlPipelineFactory
Files:
- Create:
src/JdeScoping.DataSync/Services/EtlPipelineFactory.cs - Create:
tests/JdeScoping.DataSync.Tests/Services/EtlPipelineFactoryTests.cs
Step 1: Create EtlPipelineFactory.cs
Implement the factory with:
- Config loading with validation
- PipelineBuilder inner class
- Source/destination/transformer creation methods
Step 2: Add tests for config loading and validation
Step 3: Run tests
dotnet test tests/JdeScoping.DataSync.Tests --filter "EtlPipelineFactoryTests"
Step 4: Commit
git add -A && git commit -m "feat(datasync): add EtlPipelineFactory with JSON config support"
Task 7: Create pipelines.json Config File
Files:
- Create:
src/JdeScoping.DataSync/Pipelines/pipelines.json - Modify:
src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 1: Extract data from existing merge configurations
Read existing merge configs to extract for each table:
MatchOn→matchColumnsUpdateColumns/InsertColumns→ deriveexcludeFromUpdateUpdateWhen→updateCondition
Files to reference:
src/JdeScoping.DataSync/Configuration/MergeConfigurations/*.cssrc/JdeScoping.DataSync/Fetchers/Jde/*.cs(for queries)src/JdeScoping.DataSync/Fetchers/Cms/*.cs(for CMS query)
Step 2: Create Pipelines directory and pipelines.json
Create config for all 9 tables:
- WorkOrder_Curr
- Lot
- LotUsage
- Item
- WorkCenter
- ProfitCenter
- JdeUser
- Branch
- MisData
Important: For MisData, add the post-processing SQL as a postScript:
"postScripts": [
"UPDATE MisData SET ProcessedFlag = 1 WHERE ProcessedFlag IS NULL"
]
This replaces the MisDataPostProcessor class.
Step 3: Add Content item to csproj
<ItemGroup>
<Content Include="Pipelines\pipelines.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
Step 4: Build to verify config copies
dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
ls src/JdeScoping.DataSync/bin/Debug/net10.0/Pipelines/
Step 5: Commit
git add -A && git commit -m "feat(datasync): add pipelines.json config for all sync tables"
Phase 2: Wire Up
Task 8: Update DependencyInjection.cs
Files:
- Modify:
src/JdeScoping.DataSync/DependencyInjection.cs
Step 1: Add new registrations (alongside old for now)
// Add pipeline factory
services.AddOptions<PipelineOptions>()
.Bind(configuration.GetSection(PipelineOptions.SectionName));
services.AddSingleton<IEtlPipelineFactory, EtlPipelineFactory>();
Step 2: Build to verify
dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 3: Commit
git add -A && git commit -m "feat(datasync): register EtlPipelineFactory in DI"
Task 9: Update TableSyncOperation
Files:
- Modify:
src/JdeScoping.DataSync/Services/TableSyncOperation.cs
Step 1: Inject IEtlPipelineFactory
Step 2: Replace old sync logic with pipeline execution
var pipeline = _pipelineFactory
.ForTable(config.TableName)
.WithMode(updateTask.IsMassUpdate ? SyncMode.Mass : SyncMode.Incremental)
.WithMinimumDate(updateTask.MinimumDt)
.Build();
var result = await pipeline.ExecuteAsync(cancellationToken);
if (!result.Success)
throw new InvalidOperationException($"Pipeline failed for {config.TableName}: {result.ErrorMessage}");
// Important: Pass row count to DataUpdateRepository for metrics
var recordCount = result.TotalRows; // Use this for DataUpdate record
Step 3: Build to verify
dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 4: Commit
git add -A && git commit -m "feat(datasync): wire TableSyncOperation to use EtlPipelineFactory"
Phase 3: Clean Up
Important: Tasks in Phase 3 must be executed in order. DataSourceConfig changes come AFTER test and appsettings updates to avoid broken builds.
Task 10: Remove Old DI Registrations
Files:
- Modify:
src/JdeScoping.DataSync/DependencyInjection.cs
Step 1: Remove old registrations
- Remove
using JdeScoping.DataSync.Generated; - Remove all
IDataFetcher<T>registrations - Remove all
IMergeConfiguration<T>registrations - Remove
IBulkMergeHelper,IDataReaderFactory,ISchemaValidator - Remove
IMergeConfigurationRegistry - Remove
IPostProcessor,MisDataPostProcessor - Remove named fetcher registrations
Step 2: Build to verify
dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 3: Commit
git add -A && git commit -m "refactor(datasync): remove old ETL DI registrations"
Task 11: Delete Old Source Files
Files to delete:
BulkCopyTypeRegistry.csContracts/IBulkMergeHelper.csContracts/IDataFetcher.csContracts/IDataReaderFactory.csContracts/IMergeConfiguration.csContracts/IMergeConfigurationRegistry.csContracts/IPostProcessor.csContracts/ISchemaValidator.csConfiguration/MergeConfigurations/(all 9 files)Exceptions/BulkMergeException.csFetchers/(all files)Models/ColumnSchema.csModels/MergeResult.csServices/BulkMergeHelper.csServices/ExpressionParser.csServices/MergeConfigurationRegistry.csServices/MergeSqlBuilder.csServices/MisDataPostProcessor.csServices/SchemaValidator.cs
Step 1: Delete files
rm src/JdeScoping.DataSync/BulkCopyTypeRegistry.cs
rm -rf src/JdeScoping.DataSync/Contracts/IBulkMergeHelper.cs
# ... etc
Step 2: Build to verify
dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 3: Commit
git add -A && git commit -m "refactor(datasync): delete old ETL source files"
Task 12: Delete Integration Tests Project
Files:
- Delete:
tests/JdeScoping.DataSync.IntegrationTests/(entire project) - Modify:
JdeScoping.slnx
Note: Must delete integration tests BEFORE removing SourceGenerator, as integration tests reference generated code.
Step 1: Remove project from solution
dotnet sln JdeScoping.slnx remove tests/JdeScoping.DataSync.IntegrationTests/JdeScoping.DataSync.IntegrationTests.csproj
Step 2: Delete project folder
rm -rf tests/JdeScoping.DataSync.IntegrationTests
Step 3: Build to verify
dotnet build JdeScoping.slnx
Step 4: Commit
git add -A && git commit -m "refactor(datasync): remove obsolete integration tests project"
Task 13: Delete SourceGenerator Project
Files:
- Delete:
src/JdeScoping.DataSync.SourceGenerators/(entire project) - Modify:
JdeScoping.slnx - Modify:
src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 1: Remove project reference from DataSync.csproj
Step 2: Remove project from solution
dotnet sln JdeScoping.slnx remove src/JdeScoping.DataSync.SourceGenerators/JdeScoping.DataSync.SourceGenerators.csproj
Step 3: Delete project folder
rm -rf src/JdeScoping.DataSync.SourceGenerators
Step 4: Build to verify
dotnet build JdeScoping.slnx
Step 5: Commit
git add -A && git commit -m "refactor(datasync): remove SourceGenerator project"
Task 14: Delete Old Unit Test Files
Files to delete:
tests/JdeScoping.DataSync.Tests/Services/BulkMergeHelperTests.cstests/JdeScoping.DataSync.Tests/Services/ExpressionParserTests.cstests/JdeScoping.DataSync.Tests/Services/MergeConfigurationRegistryTests.cstests/JdeScoping.DataSync.Tests/Services/MergeSqlBuilderTests.cstests/JdeScoping.DataSync.Tests/Services/SchemaValidatorTests.cstests/JdeScoping.DataSync.Tests/TableSyncOperationTests.cs
Step 1: Delete test files
Step 2: Run remaining tests
dotnet test tests/JdeScoping.DataSync.Tests
Step 3: Commit
git add -A && git commit -m "refactor(datasync): delete obsolete test files"
Task 15: Update Remaining Tests
Files:
- Modify:
tests/JdeScoping.DataSync.Tests/ScheduleCheckerTests.cs - Modify:
tests/JdeScoping.DataSync.Tests/SyncOrchestratorTests.cs
Step 1: Remove FetcherTypeName references from test fixtures
Step 2: Run tests
dotnet test tests/JdeScoping.DataSync.Tests
Step 3: Commit
git add -A && git commit -m "refactor(datasync): update tests to remove FetcherTypeName"
Task 16: Update appsettings Files
Files:
- Modify:
src/JdeScoping.Host/appsettings.json - Modify:
src/JdeScoping.Host/appsettings.Development.json
Step 1: Remove obsolete properties from DataSources config
- FetcherTypeName
- PostProcessorTypeName
- PrepurgeData
- ReIndexData
Step 2: Build and run to verify
dotnet build src/JdeScoping.Host/JdeScoping.Host.csproj
Step 3: Commit
git add -A && git commit -m "refactor(datasync): remove obsolete appsettings properties"
Task 17: Update DataSourceConfig
Files:
- Modify:
src/JdeScoping.DataSync/Options/DataSourceConfig.cs
Note: This task comes AFTER test and appsettings updates to avoid broken builds.
Step 1: Remove obsolete properties
- FetcherTypeName
- PostProcessorTypeName
- PrepurgeData
- ReIndexData
Step 2: Build to verify
dotnet build JdeScoping.slnx
Step 3: Commit
git add -A && git commit -m "refactor(datasync): remove obsolete DataSourceConfig properties"
Task 18: Final Verification
Step 1: Full build
dotnet build JdeScoping.slnx
Step 2: Run all tests
dotnet test JdeScoping.slnx
Step 3: Commit any fixes
Step 4: Create summary commit
git add -A && git commit -m "feat(datasync): complete migration to JSON-configured ETL pipelines
- Remove legacy Fetchers, MergeConfigurations, BulkMerge services
- Remove SourceGenerator project
- Add EtlPipelineFactory with JSON config
- Add DbQuerySource for JDE/CMS/LotFinder connections
- Extend DbBulkMergeDestination with excludeFromUpdate and updateCondition
- Wire TableSyncOperation to use new pipeline factory
- Update all tests and configuration"