The WHERE clause was comparing Code to itself instead of the aliased table reference, which would always be true.
29 KiB
Development ETL Pipeline Implementation Plan
For Claude: REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
Goal: Create development ETL pipelines that load cached .json.zstd files into SQL Server for local development.
Architecture: Streaming JSON reader (JsonZstdFileSource) feeds into existing ETL pipeline infrastructure.
Tech Stack: .NET 10, ZstdSharp, System.Text.Json, existing ETL framework
Phase 1: Core Infrastructure + Branch Table
Task 1: Add ZstdSharp NuGet Package
Files:
- Modify:
NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Step 1: Add package reference
<PackageReference Include="ZstdSharp.Port" Version="0.8.1" />
Step 2: Verify package restores
Run: dotnet restore NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Expected: Restore succeeds
Task 2: Create JsonColumnSchema
Files:
- Create:
NEW/src/JdeScoping.DataSync/Etl/Models/JsonColumnSchema.cs
Step 1: Create the file
namespace JdeScoping.DataSync.Etl.Models;
/// <summary>
/// Defines a column schema for JSON-to-DataReader mapping.
/// </summary>
public record JsonColumnSchema(
string Name,
Type ClrType,
bool IsNullable = true)
{
/// <summary>
/// Gets the SQL type name for this column (used in error messages).
/// </summary>
public string SqlTypeName => ClrType switch
{
Type t when t == typeof(string) => "VARCHAR",
Type t when t == typeof(int) => "INT",
Type t when t == typeof(long) => "BIGINT",
Type t when t == typeof(decimal) => "DECIMAL",
Type t when t == typeof(DateTime) => "DATETIME2",
Type t when t == typeof(bool) => "BIT",
Type t when t == typeof(byte[]) => "VARBINARY",
_ => "UNKNOWN"
};
}
Step 2: Verify it compiles
Run: dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Expected: Build succeeds
Task 3: Create JsonStreamingDataReader
Files:
- Create:
NEW/src/JdeScoping.DataSync/Etl/Sources/JsonStreamingDataReader.cs
Step 1: Create the file
using System.Data;
using System.Text.Json;
using JdeScoping.DataSync.Etl.Models;
namespace JdeScoping.DataSync.Etl.Sources;
/// <summary>
/// Streams a JSON array as an IDataReader, parsing one object at a time.
/// </summary>
internal sealed class JsonStreamingDataReader : IDataReader
{
private readonly Stream _stream;
private readonly StreamReader _streamReader;
private readonly JsonColumnSchema[] _schema;
private readonly Dictionary<string, int> _nameToOrdinal;
private object?[] _currentRow;
private bool _disposed;
private bool _started;
private bool _finished;
public JsonStreamingDataReader(Stream stream, JsonColumnSchema[] schema)
{
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
_schema = schema ?? throw new ArgumentNullException(nameof(schema));
_streamReader = new StreamReader(stream);
_currentRow = new object?[schema.Length];
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
for (int i = 0; i < schema.Length; i++)
{
_nameToOrdinal[schema[i].Name] = i;
}
}
public int FieldCount => _schema.Length;
public int Depth => 0;
public bool IsClosed => _disposed;
public int RecordsAffected => -1;
public object this[int ordinal] => GetValue(ordinal);
public object this[string name] => GetValue(GetOrdinal(name));
public string GetName(int ordinal) => _schema[ordinal].Name;
public int GetOrdinal(string name) => _nameToOrdinal.TryGetValue(name, out var ordinal)
? ordinal
: throw new IndexOutOfRangeException($"Column '{name}' not found.");
public Type GetFieldType(int ordinal) => _schema[ordinal].ClrType;
public string GetDataTypeName(int ordinal) => _schema[ordinal].SqlTypeName;
public object GetValue(int ordinal) => _currentRow[ordinal] ?? DBNull.Value;
public bool IsDBNull(int ordinal) => _currentRow[ordinal] is null;
public bool Read()
{
if (_disposed || _finished) return false;
try
{
// Skip to start of array on first read
if (!_started)
{
SkipWhitespaceAndExpect('[');
_started = true;
}
// Check for end of array or next object
SkipWhitespace();
var next = (char)_streamReader.Peek();
if (next == ']')
{
_finished = true;
return false;
}
if (next == ',')
{
_streamReader.Read(); // consume comma
SkipWhitespace();
}
// Read the next JSON object
var jsonObject = ReadJsonObject();
if (jsonObject == null)
{
_finished = true;
return false;
}
// Map JSON properties to row
Array.Clear(_currentRow);
foreach (var property in jsonObject.RootElement.EnumerateObject())
{
if (_nameToOrdinal.TryGetValue(property.Name, out var ordinal))
{
_currentRow[ordinal] = ParseValue(property.Value, _schema[ordinal].ClrType);
}
}
return true;
}
catch (JsonException ex)
{
throw new InvalidDataException($"Failed to parse JSON: {ex.Message}", ex);
}
}
private JsonDocument? ReadJsonObject()
{
SkipWhitespace();
if (_streamReader.Peek() == -1 || (char)_streamReader.Peek() == ']')
return null;
// Read characters until we have a complete JSON object
var buffer = new System.Text.StringBuilder();
int braceCount = 0;
bool inString = false;
bool escaped = false;
while (true)
{
int c = _streamReader.Read();
if (c == -1) break;
char ch = (char)c;
buffer.Append(ch);
if (escaped)
{
escaped = false;
continue;
}
if (ch == '\\' && inString)
{
escaped = true;
continue;
}
if (ch == '"')
{
inString = !inString;
continue;
}
if (!inString)
{
if (ch == '{') braceCount++;
else if (ch == '}')
{
braceCount--;
if (braceCount == 0) break;
}
}
}
var json = buffer.ToString().Trim();
if (string.IsNullOrEmpty(json) || json == "]")
return null;
return JsonDocument.Parse(json);
}
private static object? ParseValue(JsonElement element, Type targetType)
{
if (element.ValueKind == JsonValueKind.Null)
return null;
if (targetType == typeof(string))
return element.GetString();
if (targetType == typeof(int))
return element.TryGetInt32(out var i) ? i : (int)element.GetDouble();
if (targetType == typeof(long))
return element.TryGetInt64(out var l) ? l : (long)element.GetDouble();
if (targetType == typeof(decimal))
return element.TryGetDecimal(out var d) ? d : (decimal)element.GetDouble();
if (targetType == typeof(DateTime))
{
if (element.ValueKind == JsonValueKind.String)
return DateTime.Parse(element.GetString()!, null, System.Globalization.DateTimeStyles.RoundtripKind);
return element.GetDateTime();
}
if (targetType == typeof(bool))
return element.GetBoolean();
if (targetType == typeof(byte[]))
return element.GetBytesFromBase64();
if (targetType == typeof(double))
return element.GetDouble();
throw new NotSupportedException($"Type {targetType.Name} is not supported.");
}
private void SkipWhitespace()
{
while (_streamReader.Peek() != -1 && char.IsWhiteSpace((char)_streamReader.Peek()))
{
_streamReader.Read();
}
}
private void SkipWhitespaceAndExpect(char expected)
{
SkipWhitespace();
var actual = (char)_streamReader.Read();
if (actual != expected)
throw new InvalidDataException($"Expected '{expected}' but found '{actual}'.");
}
// IDataReader methods - typed getters
public bool GetBoolean(int ordinal) => (bool)GetValue(ordinal);
public byte GetByte(int ordinal) => (byte)GetValue(ordinal);
public long GetBytes(int ordinal, long fieldOffset, byte[]? buffer, int bufferOffset, int length)
{
var data = (byte[])GetValue(ordinal);
if (buffer == null) return data.Length;
var toCopy = Math.Min(length, data.Length - (int)fieldOffset);
Array.Copy(data, fieldOffset, buffer, bufferOffset, toCopy);
return toCopy;
}
public char GetChar(int ordinal) => ((string)GetValue(ordinal))[0];
public long GetChars(int ordinal, long fieldOffset, char[]? buffer, int bufferOffset, int length)
{
var data = (string)GetValue(ordinal);
if (buffer == null) return data.Length;
var toCopy = Math.Min(length, data.Length - (int)fieldOffset);
data.CopyTo((int)fieldOffset, buffer, bufferOffset, toCopy);
return toCopy;
}
public IDataReader GetData(int ordinal) => throw new NotSupportedException();
public DateTime GetDateTime(int ordinal) => (DateTime)GetValue(ordinal);
public decimal GetDecimal(int ordinal) => (decimal)GetValue(ordinal);
public double GetDouble(int ordinal) => (double)GetValue(ordinal);
public float GetFloat(int ordinal) => (float)GetValue(ordinal);
public Guid GetGuid(int ordinal) => (Guid)GetValue(ordinal);
public short GetInt16(int ordinal) => (short)GetValue(ordinal);
public int GetInt32(int ordinal) => (int)GetValue(ordinal);
public long GetInt64(int ordinal) => (long)GetValue(ordinal);
public string GetString(int ordinal) => (string)GetValue(ordinal);
public int GetValues(object[] values)
{
var count = Math.Min(values.Length, _currentRow.Length);
for (int i = 0; i < count; i++)
values[i] = GetValue(i);
return count;
}
public DataTable GetSchemaTable()
{
var table = new DataTable("SchemaTable");
table.Columns.Add("ColumnName", typeof(string));
table.Columns.Add("ColumnOrdinal", typeof(int));
table.Columns.Add("DataType", typeof(Type));
table.Columns.Add("AllowDBNull", typeof(bool));
for (int i = 0; i < _schema.Length; i++)
{
table.Rows.Add(_schema[i].Name, i, _schema[i].ClrType, _schema[i].IsNullable);
}
return table;
}
public bool NextResult() => false;
public void Close() => Dispose();
public void Dispose()
{
if (!_disposed)
{
_streamReader.Dispose();
_disposed = true;
}
}
}
Step 2: Verify it compiles
Run: dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Expected: Build succeeds
Task 4: Create JsonZstdFileSource
Files:
- Create:
NEW/src/JdeScoping.DataSync/Etl/Sources/JsonZstdFileSource.cs
Step 1: Create the file
using System.Data;
using JdeScoping.DataSync.Etl.Contracts;
using JdeScoping.DataSync.Etl.Models;
using ZstdSharp;
namespace JdeScoping.DataSync.Etl.Sources;
/// <summary>
/// Import source that reads from a zstd-compressed JSON array file.
/// </summary>
public sealed class JsonZstdFileSource : IImportSource
{
private readonly string _filePath;
private readonly JsonColumnSchema[] _schema;
private FileStream? _fileStream;
private DecompressionStream? _decompressionStream;
private JsonStreamingDataReader? _reader;
public string SourceName => $"JsonZstd:{Path.GetFileName(_filePath)}";
public JsonZstdFileSource(string filePath, JsonColumnSchema[] schema)
{
if (string.IsNullOrWhiteSpace(filePath))
throw new ArgumentException("File path cannot be null or empty.", nameof(filePath));
if (!File.Exists(filePath))
throw new FileNotFoundException($"Cache file not found: {filePath}", filePath);
_filePath = filePath;
_schema = schema ?? throw new ArgumentNullException(nameof(schema));
}
public Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
{
_fileStream = new FileStream(_filePath, FileMode.Open, FileAccess.Read, FileShare.Read,
bufferSize: 65536, useAsync: true);
_decompressionStream = new DecompressionStream(_fileStream);
_reader = new JsonStreamingDataReader(_decompressionStream, _schema);
return Task.FromResult<IDataReader>(_reader);
}
public async ValueTask DisposeAsync()
{
if (_reader != null)
{
_reader.Dispose();
_reader = null;
}
if (_decompressionStream != null)
{
await _decompressionStream.DisposeAsync();
_decompressionStream = null;
}
if (_fileStream != null)
{
await _fileStream.DisposeAsync();
_fileStream = null;
}
}
}
Step 2: Verify it compiles
Run: dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Expected: Build succeeds
Task 5: Create BranchDevEtl
Files:
- Create:
NEW/src/JdeScoping.DataSync/DevEtl/BranchDevEtl.cs
Reference - Branch table schema from 003_CreateBranchTable.sql:
CodeVARCHAR(12) NOT NULLDescriptionVARCHAR(40) NULLLastUpdateDTDATETIME2(7) NOT NULL
Step 1: Create the file
using JdeScoping.DataAccess;
using JdeScoping.DataSync.Etl.Destinations;
using JdeScoping.DataSync.Etl.Models;
using JdeScoping.DataSync.Etl.Pipeline;
using JdeScoping.DataSync.Etl.Sources;
namespace JdeScoping.DataSync.DevEtl;
/// <summary>
/// Development ETL pipeline for the Branch table.
/// </summary>
public static class BranchDevEtl
{
public static readonly string TableName = "Branch";
public static readonly string CacheFileName = "branch.json.zstd";
private static readonly JsonColumnSchema[] Schema =
[
new("Code", typeof(string), IsNullable: false),
new("Description", typeof(string), IsNullable: true),
new("LastUpdateDT", typeof(DateTime), IsNullable: false),
];
public static EtlPipeline Create(IDbConnectionFactory connectionFactory, string cacheFilePath)
{
ArgumentNullException.ThrowIfNull(connectionFactory);
if (string.IsNullOrWhiteSpace(cacheFilePath))
throw new ArgumentException("Cache file path is required.", nameof(cacheFilePath));
return new EtlPipelineBuilder()
.WithName($"{TableName}_Dev")
.WithSource(new JsonZstdFileSource(cacheFilePath, Schema))
.WithDestination(new DbBulkImportDestination(connectionFactory, TableName))
.Build();
}
}
Step 2: Verify it compiles
Run: dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Expected: Build succeeds
Task 6: Create DevEtlRegistry
Files:
- Create:
NEW/src/JdeScoping.DataSync/DevEtl/DevEtlRegistry.cs
Step 1: Create the file
using JdeScoping.DataAccess;
using JdeScoping.DataSync.Etl.Pipeline;
using JdeScoping.DataSync.Etl.Results;
using Microsoft.Extensions.Logging;
namespace JdeScoping.DataSync.DevEtl;
/// <summary>
/// Registry for development ETL pipelines that load from cached JSON files.
/// </summary>
public class DevEtlRegistry
{
private readonly IDbConnectionFactory _connectionFactory;
private readonly string _cacheDirectory;
private readonly ILogger<DevEtlRegistry>? _logger;
private readonly Dictionary<string, Func<IDbConnectionFactory, string, EtlPipeline>> _pipelineFactories = new(StringComparer.OrdinalIgnoreCase)
{
[BranchDevEtl.TableName] = (factory, cacheDir) =>
BranchDevEtl.Create(factory, Path.Combine(cacheDir, BranchDevEtl.CacheFileName)),
};
public DevEtlRegistry(
IDbConnectionFactory connectionFactory,
string cacheDirectory,
ILogger<DevEtlRegistry>? logger = null)
{
_connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory));
if (string.IsNullOrWhiteSpace(cacheDirectory))
throw new ArgumentException("Cache directory is required.", nameof(cacheDirectory));
if (!Directory.Exists(cacheDirectory))
throw new DirectoryNotFoundException($"Cache directory not found: {cacheDirectory}");
_cacheDirectory = cacheDirectory;
_logger = logger;
}
public IEnumerable<string> GetAvailableTables() => _pipelineFactories.Keys;
public EtlPipeline GetPipeline(string tableName)
{
if (!_pipelineFactories.TryGetValue(tableName, out var factory))
throw new ArgumentException($"No pipeline registered for table '{tableName}'.", nameof(tableName));
return factory(_connectionFactory, _cacheDirectory);
}
public async Task<PipelineResult> RunAsync(string tableName, CancellationToken cancellationToken = default)
{
_logger?.LogInformation("Running dev ETL for {TableName}", tableName);
var pipeline = GetPipeline(tableName);
var result = await pipeline.ExecuteAsync(cancellationToken);
if (result.Success)
_logger?.LogInformation("Completed {TableName}: {Rows} rows in {Elapsed:g}",
tableName, result.TotalRows, result.Elapsed);
else
_logger?.LogError(result.Error, "Failed {TableName}: {Error}",
tableName, result.Error?.Message);
return result;
}
public async Task<IReadOnlyList<PipelineResult>> RunAllAsync(CancellationToken cancellationToken = default)
{
var results = new List<PipelineResult>();
foreach (var tableName in GetAvailableTables())
{
if (cancellationToken.IsCancellationRequested)
break;
var result = await RunAsync(tableName, cancellationToken);
results.Add(result);
}
return results;
}
}
Step 2: Verify it compiles
Run: dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj
Expected: Build succeeds
Task 7: Create Integration Test for Branch
Files:
- Create:
NEW/tests/JdeScoping.DataSync.Tests/DevEtl/BranchDevEtlTests.cs
Step 1: Create the test file
using FluentAssertions;
using JdeScoping.DataAccess;
using JdeScoping.DataSync.DevEtl;
using Microsoft.Data.SqlClient;
using Microsoft.Extensions.Configuration;
using Xunit;
namespace JdeScoping.DataSync.Tests.DevEtl;
/// <summary>
/// Integration tests for Branch development ETL.
/// Requires: Local SQL Server, CACHED_DB_FILES directory with branch.json.zstd
/// </summary>
public class BranchDevEtlTests : IAsyncLifetime
{
private readonly string _connectionString;
private readonly string _cacheDirectory;
private readonly IDbConnectionFactory _connectionFactory;
public BranchDevEtlTests()
{
// Load configuration
var config = new ConfigurationBuilder()
.AddJsonFile("appsettings.json", optional: true)
.AddEnvironmentVariables()
.Build();
_connectionString = config.GetConnectionString("LotFinder")
?? throw new InvalidOperationException("LotFinder connection string not configured.");
_cacheDirectory = config["DevEtl:CacheDirectory"]
?? Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "..", "..", "..", "CACHED_DB_FILES");
_connectionFactory = new DbConnectionFactory(_connectionString);
}
public async Task InitializeAsync()
{
// Ensure Branch table is empty before test
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync();
await using var command = new SqlCommand("TRUNCATE TABLE dbo.Branch", connection);
await command.ExecuteNonQueryAsync();
}
public Task DisposeAsync() => Task.CompletedTask;
[Fact]
public async Task Create_ReturnsValidPipeline()
{
// Arrange
var cacheFilePath = Path.Combine(_cacheDirectory, BranchDevEtl.CacheFileName);
Skip.IfNot(File.Exists(cacheFilePath), $"Cache file not found: {cacheFilePath}");
// Act
var pipeline = BranchDevEtl.Create(_connectionFactory, cacheFilePath);
// Assert
pipeline.Should().NotBeNull();
pipeline.Name.Should().Be("Branch_Dev");
}
[Fact]
public async Task Execute_LoadsBranchData()
{
// Arrange
var cacheFilePath = Path.Combine(_cacheDirectory, BranchDevEtl.CacheFileName);
Skip.IfNot(File.Exists(cacheFilePath), $"Cache file not found: {cacheFilePath}");
var pipeline = BranchDevEtl.Create(_connectionFactory, cacheFilePath);
// Act
var result = await pipeline.ExecuteAsync();
// Assert
result.Success.Should().BeTrue(because: result.Error?.Message ?? "Pipeline should succeed");
result.TotalRows.Should().BeGreaterThan(0, "Should load at least one row");
// Verify data in database
await using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync();
await using var command = new SqlCommand("SELECT COUNT(*) FROM dbo.Branch", connection);
var count = (int)(await command.ExecuteScalarAsync())!;
count.Should().Be((int)result.TotalRows, "Database row count should match pipeline result");
}
[Fact]
public async Task Registry_RunAsync_LoadsBranch()
{
// Arrange
Skip.IfNot(Directory.Exists(_cacheDirectory), $"Cache directory not found: {_cacheDirectory}");
var registry = new DevEtlRegistry(_connectionFactory, _cacheDirectory);
// Act
var result = await registry.RunAsync("Branch");
// Assert
result.Success.Should().BeTrue(because: result.Error?.Message ?? "Pipeline should succeed");
result.TotalRows.Should().BeGreaterThan(0);
}
}
Step 2: Add test project dependencies if needed
Verify JdeScoping.DataSync.Tests.csproj has:
- Reference to
JdeScoping.DataSync - FluentAssertions
- xunit
- xunit.runner.visualstudio
Step 3: Run the tests
Run: dotnet test NEW/tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~BranchDevEtlTests"
Expected: Tests pass (or skip if cache file not found)
Task 8: Run End-to-End Test and Debug
Step 1: Ensure database is running
Run: docker ps | grep scopingtool-sqlserver
Expected: Container is running
Step 2: Run the integration test
Run: dotnet test NEW/tests/JdeScoping.DataSync.Tests --filter "BranchDevEtlTests.Execute_LoadsBranchData" -v normal
Step 3: If test fails, debug the issue
Common issues to check:
- Connection string correct in appsettings.json
- Cache file exists and is readable
- Branch table exists in database
- JSON parsing errors (check column name case sensitivity)
Step 4: Verify data in database
Run SQL: SELECT TOP 5 * FROM dbo.Branch ORDER BY Code
Expected: See branch records from cache file
Phase 2: Lessons Learned
Issues Encountered and Fixes
-
JsonDocument Memory Leak
- Issue:
ReadJsonObject()returnedJsonDocumentthat wasn't being disposed, causing memory accumulation - Fix: Changed to
using var jsonObject = ReadJsonObject();in theRead()method - Lesson: Always dispose
JsonDocumentinstances - they own native memory
- Issue:
-
Multiple ReadDataAsync Calls
- Issue:
JsonZstdFileSource.ReadDataAsync()could be called multiple times, causing stream leaks - Fix: Added guard:
if (_fileStream != null) throw new InvalidOperationException(...) - Lesson: Sources should only be readable once; enforce this with guards
- Issue:
-
Exception Safety in Stream Initialization
- Issue: If stream creation failed partway through (e.g., DecompressionStream fails), earlier streams leaked
- Fix: Wrapped initialization in try-catch with cleanup in catch block:
try { _fileStream = new FileStream(...); _decompressionStream = new DecompressionStream(_fileStream); _reader = new JsonStreamingDataReader(...); return Task.FromResult<IDataReader>(_reader); } catch { _reader?.Dispose(); _decompressionStream?.Dispose(); _fileStream?.Dispose(); throw; } - Lesson: Multi-resource initialization needs exception safety
-
Cancellation Token Handling
- Issue:
RunAllAsyncusedIsCancellationRequested + breakwhich silently stops without exception - Fix: Changed to
cancellationToken.ThrowIfCancellationRequested(); - Lesson: Prefer
ThrowIfCancellationRequested()for proper cancellation semantics
- Issue:
-
Connection String Naming Convention
- Issue: Test used
"LotFinder"butDbConnectionFactoryexpects"LotFinderDB" - Fix: Updated appsettings.json key to
"LotFinderDB" - Lesson: Match connection string names to what
DbConnectionFactoryexpects
- Issue: Test used
-
Hardcoded Absolute Paths
- Issue: Fallback cache directory path was user-specific
/Users/dohertj2/Desktop/... - Fix: Changed to relative path using
Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "...") - Lesson: Use relative paths for portability; config should specify absolute paths
- Issue: Fallback cache directory path was user-specific
Patterns That Worked Well
-
IAsyncLifetime for Test Isolation
- Using
IAsyncLifetime.InitializeAsync()to truncate tables before each test ensures clean state - Pattern:
TRUNCATE TABLE dbo.{Table}inInitializeAsync()
- Using
-
Shouldly Assertions
- Project uses Shouldly instead of FluentAssertions
- Pattern:
result.Success.ShouldBeTrue(result.Error?.Message ?? "reason")
-
Nullable File Checks in Tests
- Early return when cache files don't exist (graceful skip)
- Pattern:
if (!File.Exists(cacheFilePath)) return;
-
Static Factory Pattern for DevEtl Classes
- Clean separation: static
Create()method with explicit validation - Pattern:
ArgumentNullException.ThrowIfNull(connectionFactory);
- Clean separation: static
-
Property Naming
- Pipeline property is
PipelineName(notName) - Pattern:
pipeline.PipelineName.ShouldBe("Branch_Dev")
- Pipeline property is
Performance Observations
- Branch table (930 bytes compressed, ~10 rows) loads in ~75ms including decompression
- Streaming approach successfully processes one JSON object at a time
- No memory issues observed - suitable for larger files
Code Corrections from Original Plan
| Original Plan | Actual Implementation |
|---|---|
pipeline.Name |
pipeline.PipelineName |
| FluentAssertions | Shouldly |
Skip.IfNot() |
Early return with if (!exists) return; |
IDbConnectionFactory constructor with string |
Constructor takes IConfiguration |
| Dapper for test queries | Direct SqlConnection + ExecuteScalarAsync |
Phase 3: Remaining Tables
After Phase 2, add remaining tables following the established pattern. Priority order by file size:
- Small (< 1 MB): OrgHierarchy, WorkCenter, ProfitCenter
- Medium (1-20 MB): JdeUser, FunctionCode, Item, RouteMaster
- Large (20-200 MB): Lot, MisData, WorkOrder_Curr/Hist, LotUsage_Hist
- Very Large (200+ MB): LotUsage_Curr, WorkOrderRouting, WorkOrderStep, WorkOrderTime, WorkOrderComponent
For each table:
- Read the CREATE TABLE script from Database/Scripts/
- Create
{Table}DevEtl.cswith explicit schema - Register in
DevEtlRegistry._pipelineFactories - Add integration test
- Verify with sample data