Files
jdescopingtool/PLANS/2026-01-06-old-etl-removal-implementation.md
T
Joseph Doherty d4135e8ad3 fix(data-access): correct self-referential SQL in WorkCenter filter
The WHERE clause was comparing Code to itself instead of the aliased
table reference, which would always be true.
2026-01-06 14:12:07 -05:00

22 KiB

Old ETL Removal Implementation Plan

For Claude: REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.

Goal: Remove legacy ETL implementation and wire orchestration to use new EtlPipeline with JSON config.

Architecture: Three-phase migration - build new infrastructure first, wire up, then clean up old code.

Tech Stack: .NET 10, System.Text.Json, EtlPipeline

Working Directory: All paths are relative to NEW/ folder. Run cd /Users/dohertj2/Desktop/JdeScopingTool/NEW before starting.


Phase 1: Build New Infrastructure

Task 1: Create Pipeline Configuration Models

Files:

  • Create: src/JdeScoping.DataSync/Configuration/PipelinesRoot.cs
  • Create: src/JdeScoping.DataSync/Configuration/PipelineConfig.cs
  • Create: src/JdeScoping.DataSync/Options/PipelineOptions.cs

Step 1: Create PipelinesRoot.cs

namespace JdeScoping.DataSync.Configuration;

public record PipelinesRoot(
    PipelineSettings? Settings,  // Optional - defaults applied if missing
    Dictionary<string, PipelineConfig> Pipelines)
{
    public PipelineSettings EffectiveSettings => Settings ?? new PipelineSettings();
}

public record PipelineSettings(
    string Timezone = "UTC");

Step 2: Create PipelineConfig.cs

namespace JdeScoping.DataSync.Configuration;

public record PipelineConfig(
    SourceConfig Source,
    Dictionary<string, SyncModeConfig> SyncModes,
    List<TransformerConfig>? Transformers,
    DestinationConfig Destination,
    List<string>? PreScripts,
    List<string>? PostScripts);

public record SourceConfig(
    string Connection,
    string Query,
    Dictionary<string, ParameterConfig>? Parameters);

public record ParameterConfig(
    string Name,
    string? Format,
    string Source = "offset",
    string? Value);

public record SyncModeConfig(
    string? MinDtOffset,
    bool PrePurge = false,
    bool ReIndex = false,
    string? UpdateWhen = null,
    DestinationOverride? Destination = null);

public record DestinationOverride(
    string? Type,
    List<string>? MatchColumns,
    List<string>? ExcludeFromUpdate);

public record TransformerConfig(
    string Type,
    List<string>? Columns,
    Dictionary<string, string>? Mappings);

public record DestinationConfig(
    string Table,
    List<string>? MatchColumns,
    List<string>? ExcludeFromUpdate);

Step 3: Create PipelineOptions.cs

namespace JdeScoping.DataSync.Options;

public class PipelineOptions
{
    public const string SectionName = "Pipelines";
    public string ConfigPath { get; set; } = "Pipelines/pipelines.json";
}

Step 4: Build to verify

dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 5: Commit

git add -A && git commit -m "feat(datasync): add pipeline configuration models"

Task 2: Create ParameterFormatConverter

Files:

  • Create: src/JdeScoping.DataSync/Services/ParameterFormatConverter.cs
  • Create: tests/JdeScoping.DataSync.Tests/Services/ParameterFormatConverterTests.cs

Step 1: Create ParameterFormatConverter.cs

namespace JdeScoping.DataSync.Services;

public class ParameterFormatConverter
{
    private readonly TimeZoneInfo _timezone;

    public ParameterFormatConverter(string timezone)
    {
        _timezone = timezone.ToUpperInvariant() switch
        {
            "UTC" => TimeZoneInfo.Utc,
            "LOCAL" => TimeZoneInfo.Local,
            _ => TimeZoneInfo.FindSystemTimeZoneById(timezone)
        };
    }

    public object Convert(DateTime value, string? format)
    {
        var adjusted = TimeZoneInfo.ConvertTime(value, _timezone);

        return format?.ToLowerInvariant() switch
        {
            "jdejulian" => ToJdeJulianDate(adjusted),
            "jdetime" => ToJdeTime(adjusted),
            null => adjusted,
            _ => throw new ArgumentException($"Unknown format: {format}")
        };
    }

    public static int ToJdeJulianDate(DateTime date)
    {
        int century = date.Year >= 2000 ? 1 : 0;
        int year = date.Year % 100;
        int dayOfYear = date.DayOfYear;
        return century * 100000 + year * 1000 + dayOfYear;
    }

    public static int ToJdeTime(DateTime time)
    {
        return time.Hour * 10000 + time.Minute * 100 + time.Second;
    }
}

Step 2: Create tests

namespace JdeScoping.DataSync.Tests.Services;

public class ParameterFormatConverterTests
{
    [Fact]
    public void ToJdeJulianDate_Year2024Day100_Returns124100()
    {
        var date = new DateTime(2024, 4, 9); // Day 100
        var result = ParameterFormatConverter.ToJdeJulianDate(date);
        result.ShouldBe(124100);
    }

    [Fact]
    public void ToJdeJulianDate_Year1999Day365_Returns99365()
    {
        var date = new DateTime(1999, 12, 31);
        var result = ParameterFormatConverter.ToJdeJulianDate(date);
        result.ShouldBe(99365);
    }

    [Fact]
    public void ToJdeTime_143025_Returns143025()
    {
        var time = new DateTime(2024, 1, 1, 14, 30, 25);
        var result = ParameterFormatConverter.ToJdeTime(time);
        result.ShouldBe(143025);
    }

    [Fact]
    public void Convert_WithUtcTimezone_UsesUtc()
    {
        var converter = new ParameterFormatConverter("UTC");
        var utcTime = DateTime.SpecifyKind(new DateTime(2024, 4, 9, 12, 0, 0), DateTimeKind.Utc);
        var result = converter.Convert(utcTime, "jdeJulian");
        result.ShouldBe(124100);
    }
}

Step 3: Run tests

dotnet test tests/JdeScoping.DataSync.Tests --filter "ParameterFormatConverterTests"

Step 4: Commit

git add -A && git commit -m "feat(datasync): add ParameterFormatConverter with JDE date/time support"

Task 3: Extend DbQuerySource for Multiple Connections

Files:

  • Modify: src/JdeScoping.DataSync/Etl/Sources/DbQuerySource.cs
  • Modify: tests/JdeScoping.DataSync.Tests/Etl/Sources/DbQuerySourceTests.cs

Note: DbQuerySource already exists but only supports LotFinder. Extend it to support JDE and CMS connections.

Step 1: Update DbQuerySource.cs

using System.Data;
using System.Data.Common;
using JdeScoping.DataAccess.Interfaces;
using JdeScoping.DataSync.Etl.Contracts;

namespace JdeScoping.DataSync.Etl.Sources;

public class DbQuerySource : IImportSource
{
    private readonly IDbConnectionFactory _connectionFactory;
    private readonly string _connectionType;
    private readonly string _query;
    private readonly Dictionary<string, object> _parameters;
    private DbConnection? _connection;

    public string SourceName => $"DbQuery:{_connectionType}";

    public DbQuerySource(
        IDbConnectionFactory connectionFactory,
        string connectionType,
        string query,
        Dictionary<string, object>? parameters = null)
    {
        _connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory));
        _connectionType = connectionType?.ToLowerInvariant()
            ?? throw new ArgumentNullException(nameof(connectionType));
        _query = query ?? throw new ArgumentNullException(nameof(query));
        _parameters = parameters ?? new Dictionary<string, object>();

        if (_connectionType is not ("jde" or "cms" or "lotfinder"))
            throw new ArgumentException($"Unknown connection type: {connectionType}");
    }

    public async Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
    {
        _connection = _connectionType switch
        {
            "jde" => await _connectionFactory.CreateJdeConnectionAsync(),
            "cms" => await _connectionFactory.CreateCmsConnectionAsync(),
            "lotfinder" => await _connectionFactory.CreateLotFinderConnectionAsync(),
            _ => throw new InvalidOperationException($"Unknown connection type: {_connectionType}")
        };

        var command = _connection.CreateCommand();
        command.CommandText = _query;

        foreach (var (name, value) in _parameters)
        {
            var param = command.CreateParameter();
            param.ParameterName = name;
            param.Value = value ?? DBNull.Value;
            command.Parameters.Add(param);
        }

        return await command.ExecuteReaderAsync(CommandBehavior.CloseConnection, cancellationToken);
    }

    public async ValueTask DisposeAsync()
    {
        if (_connection != null)
        {
            await _connection.DisposeAsync();
            _connection = null;
        }
    }
}

Step 2: Create basic tests

namespace JdeScoping.DataSync.Tests.Etl.Sources;

public class DbQuerySourceTests
{
    [Theory]
    [InlineData("jde")]
    [InlineData("cms")]
    [InlineData("lotfinder")]
    public void Constructor_ValidConnectionType_Succeeds(string connectionType)
    {
        var factory = Substitute.For<IDbConnectionFactory>();
        var source = new DbQuerySource(factory, connectionType, "SELECT 1");
        source.SourceName.ShouldBe($"DbQuery:{connectionType}");
    }

    [Fact]
    public void Constructor_InvalidConnectionType_Throws()
    {
        var factory = Substitute.For<IDbConnectionFactory>();
        Should.Throw<ArgumentException>(() =>
            new DbQuerySource(factory, "invalid", "SELECT 1"));
    }

    [Fact]
    public void Constructor_NullQuery_Throws()
    {
        var factory = Substitute.For<IDbConnectionFactory>();
        Should.Throw<ArgumentNullException>(() =>
            new DbQuerySource(factory, "jde", null!));
    }
}

Step 3: Run tests

dotnet test tests/JdeScoping.DataSync.Tests --filter "DbQuerySourceTests"

Step 4: Commit

git add -A && git commit -m "feat(datasync): add generic DbQuerySource for JDE/CMS/LotFinder"

Task 4: Extend DbBulkMergeDestination

Files:

  • Modify: src/JdeScoping.DataSync/Etl/Destinations/DbBulkMergeDestination.cs
  • Create/Modify: tests/JdeScoping.DataSync.Tests/Etl/Destinations/DbBulkMergeDestinationTests.cs

Step 1: Add excludeFromUpdate and updateCondition parameters

Add to constructor:

public DbBulkMergeDestination(
    IDbConnectionFactory connectionFactory,
    string tableName,
    string[] matchColumns,
    string[]? excludeFromUpdate = null,
    string? updateCondition = null)

Step 2: Modify MERGE SQL generation to use new parameters

Update the WHEN MATCHED clause to include condition and exclude columns.

Step 3: Add tests for new functionality

Step 4: Run tests

dotnet test tests/JdeScoping.DataSync.Tests --filter "DbBulkMergeDestinationTests"

Step 5: Commit

git add -A && git commit -m "feat(datasync): extend DbBulkMergeDestination with excludeFromUpdate and updateCondition"

Task 5: Create IEtlPipelineFactory and Contracts

Files:

  • Create: src/JdeScoping.DataSync/Contracts/IEtlPipelineFactory.cs
  • Create: src/JdeScoping.DataSync/Contracts/SyncMode.cs

Step 1: Create IEtlPipelineFactory.cs

namespace JdeScoping.DataSync.Contracts;

public interface IEtlPipelineFactory
{
    IEtlPipelineBuilder ForTable(string tableName);
}

public interface IEtlPipelineBuilder
{
    IEtlPipelineBuilder WithMode(SyncMode mode);
    IEtlPipelineBuilder WithMinimumDate(DateTime? minDt);
    EtlPipeline Build();
}

Step 2: Create SyncMode.cs

namespace JdeScoping.DataSync.Contracts;

public enum SyncMode
{
    Mass,
    Incremental
}

Step 3: Build to verify

dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 4: Commit

git add -A && git commit -m "feat(datasync): add IEtlPipelineFactory and SyncMode contracts"

Task 6: Create EtlPipelineFactory

Files:

  • Create: src/JdeScoping.DataSync/Services/EtlPipelineFactory.cs
  • Create: tests/JdeScoping.DataSync.Tests/Services/EtlPipelineFactoryTests.cs

Step 1: Create EtlPipelineFactory.cs

Implement the factory with:

  • Config loading with validation
  • PipelineBuilder inner class
  • Source/destination/transformer creation methods

Step 2: Add tests for config loading and validation

Step 3: Run tests

dotnet test tests/JdeScoping.DataSync.Tests --filter "EtlPipelineFactoryTests"

Step 4: Commit

git add -A && git commit -m "feat(datasync): add EtlPipelineFactory with JSON config support"

Task 7: Create pipelines.json Config File

Files:

  • Create: src/JdeScoping.DataSync/Pipelines/pipelines.json
  • Modify: src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 1: Extract data from existing merge configurations

Read existing merge configs to extract for each table:

  • MatchOnmatchColumns
  • UpdateColumns / InsertColumns → derive excludeFromUpdate
  • UpdateWhenupdateCondition

Files to reference:

  • src/JdeScoping.DataSync/Configuration/MergeConfigurations/*.cs
  • src/JdeScoping.DataSync/Fetchers/Jde/*.cs (for queries)
  • src/JdeScoping.DataSync/Fetchers/Cms/*.cs (for CMS query)

Step 2: Create Pipelines directory and pipelines.json

Create config for all 9 tables:

  • WorkOrder_Curr
  • Lot
  • LotUsage
  • Item
  • WorkCenter
  • ProfitCenter
  • JdeUser
  • Branch
  • MisData

Important: For MisData, add the post-processing SQL as a postScript:

"postScripts": [
  "UPDATE MisData SET ProcessedFlag = 1 WHERE ProcessedFlag IS NULL"
]

This replaces the MisDataPostProcessor class.

Step 3: Add Content item to csproj

<ItemGroup>
  <Content Include="Pipelines\pipelines.json">
    <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
  </Content>
</ItemGroup>

Step 4: Build to verify config copies

dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
ls src/JdeScoping.DataSync/bin/Debug/net10.0/Pipelines/

Step 5: Commit

git add -A && git commit -m "feat(datasync): add pipelines.json config for all sync tables"

Phase 2: Wire Up

Task 8: Update DependencyInjection.cs

Files:

  • Modify: src/JdeScoping.DataSync/DependencyInjection.cs

Step 1: Add new registrations (alongside old for now)

// Add pipeline factory
services.AddOptions<PipelineOptions>()
    .Bind(configuration.GetSection(PipelineOptions.SectionName));
services.AddSingleton<IEtlPipelineFactory, EtlPipelineFactory>();

Step 2: Build to verify

dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 3: Commit

git add -A && git commit -m "feat(datasync): register EtlPipelineFactory in DI"

Task 9: Update TableSyncOperation

Files:

  • Modify: src/JdeScoping.DataSync/Services/TableSyncOperation.cs

Step 1: Inject IEtlPipelineFactory

Step 2: Replace old sync logic with pipeline execution

var pipeline = _pipelineFactory
    .ForTable(config.TableName)
    .WithMode(updateTask.IsMassUpdate ? SyncMode.Mass : SyncMode.Incremental)
    .WithMinimumDate(updateTask.MinimumDt)
    .Build();

var result = await pipeline.ExecuteAsync(cancellationToken);

if (!result.Success)
    throw new InvalidOperationException($"Pipeline failed for {config.TableName}: {result.ErrorMessage}");

// Important: Pass row count to DataUpdateRepository for metrics
var recordCount = result.TotalRows;  // Use this for DataUpdate record

Step 3: Build to verify

dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 4: Commit

git add -A && git commit -m "feat(datasync): wire TableSyncOperation to use EtlPipelineFactory"


Phase 3: Clean Up

Important: Tasks in Phase 3 must be executed in order. DataSourceConfig changes come AFTER test and appsettings updates to avoid broken builds.

Task 10: Remove Old DI Registrations

Files:

  • Modify: src/JdeScoping.DataSync/DependencyInjection.cs

Step 1: Remove old registrations

  • Remove using JdeScoping.DataSync.Generated;
  • Remove all IDataFetcher<T> registrations
  • Remove all IMergeConfiguration<T> registrations
  • Remove IBulkMergeHelper, IDataReaderFactory, ISchemaValidator
  • Remove IMergeConfigurationRegistry
  • Remove IPostProcessor, MisDataPostProcessor
  • Remove named fetcher registrations

Step 2: Build to verify

dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 3: Commit

git add -A && git commit -m "refactor(datasync): remove old ETL DI registrations"

Task 11: Delete Old Source Files

Files to delete:

  • BulkCopyTypeRegistry.cs
  • Contracts/IBulkMergeHelper.cs
  • Contracts/IDataFetcher.cs
  • Contracts/IDataReaderFactory.cs
  • Contracts/IMergeConfiguration.cs
  • Contracts/IMergeConfigurationRegistry.cs
  • Contracts/IPostProcessor.cs
  • Contracts/ISchemaValidator.cs
  • Configuration/MergeConfigurations/ (all 9 files)
  • Exceptions/BulkMergeException.cs
  • Fetchers/ (all files)
  • Models/ColumnSchema.cs
  • Models/MergeResult.cs
  • Services/BulkMergeHelper.cs
  • Services/ExpressionParser.cs
  • Services/MergeConfigurationRegistry.cs
  • Services/MergeSqlBuilder.cs
  • Services/MisDataPostProcessor.cs
  • Services/SchemaValidator.cs

Step 1: Delete files

rm src/JdeScoping.DataSync/BulkCopyTypeRegistry.cs
rm -rf src/JdeScoping.DataSync/Contracts/IBulkMergeHelper.cs
# ... etc

Step 2: Build to verify

dotnet build src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 3: Commit

git add -A && git commit -m "refactor(datasync): delete old ETL source files"

Task 12: Delete Integration Tests Project

Files:

  • Delete: tests/JdeScoping.DataSync.IntegrationTests/ (entire project)
  • Modify: JdeScoping.slnx

Note: Must delete integration tests BEFORE removing SourceGenerator, as integration tests reference generated code.

Step 1: Remove project from solution

dotnet sln JdeScoping.slnx remove tests/JdeScoping.DataSync.IntegrationTests/JdeScoping.DataSync.IntegrationTests.csproj

Step 2: Delete project folder

rm -rf tests/JdeScoping.DataSync.IntegrationTests

Step 3: Build to verify

dotnet build JdeScoping.slnx

Step 4: Commit

git add -A && git commit -m "refactor(datasync): remove obsolete integration tests project"

Task 13: Delete SourceGenerator Project

Files:

  • Delete: src/JdeScoping.DataSync.SourceGenerators/ (entire project)
  • Modify: JdeScoping.slnx
  • Modify: src/JdeScoping.DataSync/JdeScoping.DataSync.csproj

Step 1: Remove project reference from DataSync.csproj

Step 2: Remove project from solution

dotnet sln JdeScoping.slnx remove src/JdeScoping.DataSync.SourceGenerators/JdeScoping.DataSync.SourceGenerators.csproj

Step 3: Delete project folder

rm -rf src/JdeScoping.DataSync.SourceGenerators

Step 4: Build to verify

dotnet build JdeScoping.slnx

Step 5: Commit

git add -A && git commit -m "refactor(datasync): remove SourceGenerator project"

Task 14: Delete Old Unit Test Files

Files to delete:

  • tests/JdeScoping.DataSync.Tests/Services/BulkMergeHelperTests.cs
  • tests/JdeScoping.DataSync.Tests/Services/ExpressionParserTests.cs
  • tests/JdeScoping.DataSync.Tests/Services/MergeConfigurationRegistryTests.cs
  • tests/JdeScoping.DataSync.Tests/Services/MergeSqlBuilderTests.cs
  • tests/JdeScoping.DataSync.Tests/Services/SchemaValidatorTests.cs
  • tests/JdeScoping.DataSync.Tests/TableSyncOperationTests.cs

Step 1: Delete test files

Step 2: Run remaining tests

dotnet test tests/JdeScoping.DataSync.Tests

Step 3: Commit

git add -A && git commit -m "refactor(datasync): delete obsolete test files"

Task 15: Update Remaining Tests

Files:

  • Modify: tests/JdeScoping.DataSync.Tests/ScheduleCheckerTests.cs
  • Modify: tests/JdeScoping.DataSync.Tests/SyncOrchestratorTests.cs

Step 1: Remove FetcherTypeName references from test fixtures

Step 2: Run tests

dotnet test tests/JdeScoping.DataSync.Tests

Step 3: Commit

git add -A && git commit -m "refactor(datasync): update tests to remove FetcherTypeName"

Task 16: Update appsettings Files

Files:

  • Modify: src/JdeScoping.Host/appsettings.json
  • Modify: src/JdeScoping.Host/appsettings.Development.json

Step 1: Remove obsolete properties from DataSources config

  • FetcherTypeName
  • PostProcessorTypeName
  • PrepurgeData
  • ReIndexData

Step 2: Build and run to verify

dotnet build src/JdeScoping.Host/JdeScoping.Host.csproj

Step 3: Commit

git add -A && git commit -m "refactor(datasync): remove obsolete appsettings properties"

Task 17: Update DataSourceConfig

Files:

  • Modify: src/JdeScoping.DataSync/Options/DataSourceConfig.cs

Note: This task comes AFTER test and appsettings updates to avoid broken builds.

Step 1: Remove obsolete properties

  • FetcherTypeName
  • PostProcessorTypeName
  • PrepurgeData
  • ReIndexData

Step 2: Build to verify

dotnet build JdeScoping.slnx

Step 3: Commit

git add -A && git commit -m "refactor(datasync): remove obsolete DataSourceConfig properties"

Task 18: Final Verification

Step 1: Full build

dotnet build JdeScoping.slnx

Step 2: Run all tests

dotnet test JdeScoping.slnx

Step 3: Commit any fixes

Step 4: Create summary commit

git add -A && git commit -m "feat(datasync): complete migration to JSON-configured ETL pipelines

- Remove legacy Fetchers, MergeConfigurations, BulkMerge services
- Remove SourceGenerator project
- Add EtlPipelineFactory with JSON config
- Add DbQuerySource for JDE/CMS/LotFinder connections
- Extend DbBulkMergeDestination with excludeFromUpdate and updateCondition
- Wire TableSyncOperation to use new pipeline factory
- Update all tests and configuration"