d4135e8ad3
The WHERE clause was comparing Code to itself instead of the aliased table reference, which would always be true.
869 lines
29 KiB
Markdown
869 lines
29 KiB
Markdown
# Development ETL Pipeline Implementation Plan
|
|
|
|
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
|
|
|
**Goal:** Create development ETL pipelines that load cached `.json.zstd` files into SQL Server for local development.
|
|
|
|
**Architecture:** Streaming JSON reader (`JsonZstdFileSource`) feeds into existing ETL pipeline infrastructure.
|
|
|
|
**Tech Stack:** .NET 10, ZstdSharp, System.Text.Json, existing ETL framework
|
|
|
|
---
|
|
|
|
## Phase 1: Core Infrastructure + Branch Table
|
|
|
|
### Task 1: Add ZstdSharp NuGet Package
|
|
|
|
**Files:**
|
|
- Modify: `NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
|
|
|
**Step 1: Add package reference**
|
|
|
|
```xml
|
|
<PackageReference Include="ZstdSharp.Port" Version="0.8.1" />
|
|
```
|
|
|
|
**Step 2: Verify package restores**
|
|
|
|
Run: `dotnet restore NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
|
Expected: Restore succeeds
|
|
|
|
---
|
|
|
|
### Task 2: Create JsonColumnSchema
|
|
|
|
**Files:**
|
|
- Create: `NEW/src/JdeScoping.DataSync/Etl/Models/JsonColumnSchema.cs`
|
|
|
|
**Step 1: Create the file**
|
|
|
|
```csharp
|
|
namespace JdeScoping.DataSync.Etl.Models;
|
|
|
|
/// <summary>
|
|
/// Defines a column schema for JSON-to-DataReader mapping.
|
|
/// </summary>
|
|
public record JsonColumnSchema(
|
|
string Name,
|
|
Type ClrType,
|
|
bool IsNullable = true)
|
|
{
|
|
/// <summary>
|
|
/// Gets the SQL type name for this column (used in error messages).
|
|
/// </summary>
|
|
public string SqlTypeName => ClrType switch
|
|
{
|
|
Type t when t == typeof(string) => "VARCHAR",
|
|
Type t when t == typeof(int) => "INT",
|
|
Type t when t == typeof(long) => "BIGINT",
|
|
Type t when t == typeof(decimal) => "DECIMAL",
|
|
Type t when t == typeof(DateTime) => "DATETIME2",
|
|
Type t when t == typeof(bool) => "BIT",
|
|
Type t when t == typeof(byte[]) => "VARBINARY",
|
|
_ => "UNKNOWN"
|
|
};
|
|
}
|
|
```
|
|
|
|
**Step 2: Verify it compiles**
|
|
|
|
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
|
Expected: Build succeeds
|
|
|
|
---
|
|
|
|
### Task 3: Create JsonStreamingDataReader
|
|
|
|
**Files:**
|
|
- Create: `NEW/src/JdeScoping.DataSync/Etl/Sources/JsonStreamingDataReader.cs`
|
|
|
|
**Step 1: Create the file**
|
|
|
|
```csharp
|
|
using System.Data;
|
|
using System.Text.Json;
|
|
using JdeScoping.DataSync.Etl.Models;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Sources;
|
|
|
|
/// <summary>
|
|
/// Streams a JSON array as an IDataReader, parsing one object at a time.
|
|
/// </summary>
|
|
internal sealed class JsonStreamingDataReader : IDataReader
|
|
{
|
|
private readonly Stream _stream;
|
|
private readonly StreamReader _streamReader;
|
|
private readonly JsonColumnSchema[] _schema;
|
|
private readonly Dictionary<string, int> _nameToOrdinal;
|
|
private object?[] _currentRow;
|
|
private bool _disposed;
|
|
private bool _started;
|
|
private bool _finished;
|
|
|
|
public JsonStreamingDataReader(Stream stream, JsonColumnSchema[] schema)
|
|
{
|
|
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
|
|
_schema = schema ?? throw new ArgumentNullException(nameof(schema));
|
|
_streamReader = new StreamReader(stream);
|
|
_currentRow = new object?[schema.Length];
|
|
|
|
_nameToOrdinal = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
|
|
for (int i = 0; i < schema.Length; i++)
|
|
{
|
|
_nameToOrdinal[schema[i].Name] = i;
|
|
}
|
|
}
|
|
|
|
public int FieldCount => _schema.Length;
|
|
public int Depth => 0;
|
|
public bool IsClosed => _disposed;
|
|
public int RecordsAffected => -1;
|
|
|
|
public object this[int ordinal] => GetValue(ordinal);
|
|
public object this[string name] => GetValue(GetOrdinal(name));
|
|
|
|
public string GetName(int ordinal) => _schema[ordinal].Name;
|
|
public int GetOrdinal(string name) => _nameToOrdinal.TryGetValue(name, out var ordinal)
|
|
? ordinal
|
|
: throw new IndexOutOfRangeException($"Column '{name}' not found.");
|
|
|
|
public Type GetFieldType(int ordinal) => _schema[ordinal].ClrType;
|
|
public string GetDataTypeName(int ordinal) => _schema[ordinal].SqlTypeName;
|
|
|
|
public object GetValue(int ordinal) => _currentRow[ordinal] ?? DBNull.Value;
|
|
public bool IsDBNull(int ordinal) => _currentRow[ordinal] is null;
|
|
|
|
public bool Read()
|
|
{
|
|
if (_disposed || _finished) return false;
|
|
|
|
try
|
|
{
|
|
// Skip to start of array on first read
|
|
if (!_started)
|
|
{
|
|
SkipWhitespaceAndExpect('[');
|
|
_started = true;
|
|
}
|
|
|
|
// Check for end of array or next object
|
|
SkipWhitespace();
|
|
var next = (char)_streamReader.Peek();
|
|
|
|
if (next == ']')
|
|
{
|
|
_finished = true;
|
|
return false;
|
|
}
|
|
|
|
if (next == ',')
|
|
{
|
|
_streamReader.Read(); // consume comma
|
|
SkipWhitespace();
|
|
}
|
|
|
|
// Read the next JSON object
|
|
var jsonObject = ReadJsonObject();
|
|
if (jsonObject == null)
|
|
{
|
|
_finished = true;
|
|
return false;
|
|
}
|
|
|
|
// Map JSON properties to row
|
|
Array.Clear(_currentRow);
|
|
foreach (var property in jsonObject.RootElement.EnumerateObject())
|
|
{
|
|
if (_nameToOrdinal.TryGetValue(property.Name, out var ordinal))
|
|
{
|
|
_currentRow[ordinal] = ParseValue(property.Value, _schema[ordinal].ClrType);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
catch (JsonException ex)
|
|
{
|
|
throw new InvalidDataException($"Failed to parse JSON: {ex.Message}", ex);
|
|
}
|
|
}
|
|
|
|
private JsonDocument? ReadJsonObject()
|
|
{
|
|
SkipWhitespace();
|
|
if (_streamReader.Peek() == -1 || (char)_streamReader.Peek() == ']')
|
|
return null;
|
|
|
|
// Read characters until we have a complete JSON object
|
|
var buffer = new System.Text.StringBuilder();
|
|
int braceCount = 0;
|
|
bool inString = false;
|
|
bool escaped = false;
|
|
|
|
while (true)
|
|
{
|
|
int c = _streamReader.Read();
|
|
if (c == -1) break;
|
|
|
|
char ch = (char)c;
|
|
buffer.Append(ch);
|
|
|
|
if (escaped)
|
|
{
|
|
escaped = false;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '\\' && inString)
|
|
{
|
|
escaped = true;
|
|
continue;
|
|
}
|
|
|
|
if (ch == '"')
|
|
{
|
|
inString = !inString;
|
|
continue;
|
|
}
|
|
|
|
if (!inString)
|
|
{
|
|
if (ch == '{') braceCount++;
|
|
else if (ch == '}')
|
|
{
|
|
braceCount--;
|
|
if (braceCount == 0) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
var json = buffer.ToString().Trim();
|
|
if (string.IsNullOrEmpty(json) || json == "]")
|
|
return null;
|
|
|
|
return JsonDocument.Parse(json);
|
|
}
|
|
|
|
private static object? ParseValue(JsonElement element, Type targetType)
|
|
{
|
|
if (element.ValueKind == JsonValueKind.Null)
|
|
return null;
|
|
|
|
if (targetType == typeof(string))
|
|
return element.GetString();
|
|
|
|
if (targetType == typeof(int))
|
|
return element.TryGetInt32(out var i) ? i : (int)element.GetDouble();
|
|
|
|
if (targetType == typeof(long))
|
|
return element.TryGetInt64(out var l) ? l : (long)element.GetDouble();
|
|
|
|
if (targetType == typeof(decimal))
|
|
return element.TryGetDecimal(out var d) ? d : (decimal)element.GetDouble();
|
|
|
|
if (targetType == typeof(DateTime))
|
|
{
|
|
if (element.ValueKind == JsonValueKind.String)
|
|
return DateTime.Parse(element.GetString()!, null, System.Globalization.DateTimeStyles.RoundtripKind);
|
|
return element.GetDateTime();
|
|
}
|
|
|
|
if (targetType == typeof(bool))
|
|
return element.GetBoolean();
|
|
|
|
if (targetType == typeof(byte[]))
|
|
return element.GetBytesFromBase64();
|
|
|
|
if (targetType == typeof(double))
|
|
return element.GetDouble();
|
|
|
|
throw new NotSupportedException($"Type {targetType.Name} is not supported.");
|
|
}
|
|
|
|
private void SkipWhitespace()
|
|
{
|
|
while (_streamReader.Peek() != -1 && char.IsWhiteSpace((char)_streamReader.Peek()))
|
|
{
|
|
_streamReader.Read();
|
|
}
|
|
}
|
|
|
|
private void SkipWhitespaceAndExpect(char expected)
|
|
{
|
|
SkipWhitespace();
|
|
var actual = (char)_streamReader.Read();
|
|
if (actual != expected)
|
|
throw new InvalidDataException($"Expected '{expected}' but found '{actual}'.");
|
|
}
|
|
|
|
// IDataReader methods - typed getters
|
|
public bool GetBoolean(int ordinal) => (bool)GetValue(ordinal);
|
|
public byte GetByte(int ordinal) => (byte)GetValue(ordinal);
|
|
public long GetBytes(int ordinal, long fieldOffset, byte[]? buffer, int bufferOffset, int length)
|
|
{
|
|
var data = (byte[])GetValue(ordinal);
|
|
if (buffer == null) return data.Length;
|
|
var toCopy = Math.Min(length, data.Length - (int)fieldOffset);
|
|
Array.Copy(data, fieldOffset, buffer, bufferOffset, toCopy);
|
|
return toCopy;
|
|
}
|
|
public char GetChar(int ordinal) => ((string)GetValue(ordinal))[0];
|
|
public long GetChars(int ordinal, long fieldOffset, char[]? buffer, int bufferOffset, int length)
|
|
{
|
|
var data = (string)GetValue(ordinal);
|
|
if (buffer == null) return data.Length;
|
|
var toCopy = Math.Min(length, data.Length - (int)fieldOffset);
|
|
data.CopyTo((int)fieldOffset, buffer, bufferOffset, toCopy);
|
|
return toCopy;
|
|
}
|
|
public IDataReader GetData(int ordinal) => throw new NotSupportedException();
|
|
public DateTime GetDateTime(int ordinal) => (DateTime)GetValue(ordinal);
|
|
public decimal GetDecimal(int ordinal) => (decimal)GetValue(ordinal);
|
|
public double GetDouble(int ordinal) => (double)GetValue(ordinal);
|
|
public float GetFloat(int ordinal) => (float)GetValue(ordinal);
|
|
public Guid GetGuid(int ordinal) => (Guid)GetValue(ordinal);
|
|
public short GetInt16(int ordinal) => (short)GetValue(ordinal);
|
|
public int GetInt32(int ordinal) => (int)GetValue(ordinal);
|
|
public long GetInt64(int ordinal) => (long)GetValue(ordinal);
|
|
public string GetString(int ordinal) => (string)GetValue(ordinal);
|
|
public int GetValues(object[] values)
|
|
{
|
|
var count = Math.Min(values.Length, _currentRow.Length);
|
|
for (int i = 0; i < count; i++)
|
|
values[i] = GetValue(i);
|
|
return count;
|
|
}
|
|
|
|
public DataTable GetSchemaTable()
|
|
{
|
|
var table = new DataTable("SchemaTable");
|
|
table.Columns.Add("ColumnName", typeof(string));
|
|
table.Columns.Add("ColumnOrdinal", typeof(int));
|
|
table.Columns.Add("DataType", typeof(Type));
|
|
table.Columns.Add("AllowDBNull", typeof(bool));
|
|
|
|
for (int i = 0; i < _schema.Length; i++)
|
|
{
|
|
table.Rows.Add(_schema[i].Name, i, _schema[i].ClrType, _schema[i].IsNullable);
|
|
}
|
|
|
|
return table;
|
|
}
|
|
|
|
public bool NextResult() => false;
|
|
|
|
public void Close() => Dispose();
|
|
|
|
public void Dispose()
|
|
{
|
|
if (!_disposed)
|
|
{
|
|
_streamReader.Dispose();
|
|
_disposed = true;
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Verify it compiles**
|
|
|
|
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
|
Expected: Build succeeds
|
|
|
|
---
|
|
|
|
### Task 4: Create JsonZstdFileSource
|
|
|
|
**Files:**
|
|
- Create: `NEW/src/JdeScoping.DataSync/Etl/Sources/JsonZstdFileSource.cs`
|
|
|
|
**Step 1: Create the file**
|
|
|
|
```csharp
|
|
using System.Data;
|
|
using JdeScoping.DataSync.Etl.Contracts;
|
|
using JdeScoping.DataSync.Etl.Models;
|
|
using ZstdSharp;
|
|
|
|
namespace JdeScoping.DataSync.Etl.Sources;
|
|
|
|
/// <summary>
|
|
/// Import source that reads from a zstd-compressed JSON array file.
|
|
/// </summary>
|
|
public sealed class JsonZstdFileSource : IImportSource
|
|
{
|
|
private readonly string _filePath;
|
|
private readonly JsonColumnSchema[] _schema;
|
|
private FileStream? _fileStream;
|
|
private DecompressionStream? _decompressionStream;
|
|
private JsonStreamingDataReader? _reader;
|
|
|
|
public string SourceName => $"JsonZstd:{Path.GetFileName(_filePath)}";
|
|
|
|
public JsonZstdFileSource(string filePath, JsonColumnSchema[] schema)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(filePath))
|
|
throw new ArgumentException("File path cannot be null or empty.", nameof(filePath));
|
|
|
|
if (!File.Exists(filePath))
|
|
throw new FileNotFoundException($"Cache file not found: {filePath}", filePath);
|
|
|
|
_filePath = filePath;
|
|
_schema = schema ?? throw new ArgumentNullException(nameof(schema));
|
|
}
|
|
|
|
public Task<IDataReader> ReadDataAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
_fileStream = new FileStream(_filePath, FileMode.Open, FileAccess.Read, FileShare.Read,
|
|
bufferSize: 65536, useAsync: true);
|
|
_decompressionStream = new DecompressionStream(_fileStream);
|
|
_reader = new JsonStreamingDataReader(_decompressionStream, _schema);
|
|
|
|
return Task.FromResult<IDataReader>(_reader);
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
if (_reader != null)
|
|
{
|
|
_reader.Dispose();
|
|
_reader = null;
|
|
}
|
|
|
|
if (_decompressionStream != null)
|
|
{
|
|
await _decompressionStream.DisposeAsync();
|
|
_decompressionStream = null;
|
|
}
|
|
|
|
if (_fileStream != null)
|
|
{
|
|
await _fileStream.DisposeAsync();
|
|
_fileStream = null;
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Verify it compiles**
|
|
|
|
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
|
Expected: Build succeeds
|
|
|
|
---
|
|
|
|
### Task 5: Create BranchDevEtl
|
|
|
|
**Files:**
|
|
- Create: `NEW/src/JdeScoping.DataSync/DevEtl/BranchDevEtl.cs`
|
|
|
|
**Reference - Branch table schema from `003_CreateBranchTable.sql`:**
|
|
- `Code` VARCHAR(12) NOT NULL
|
|
- `Description` VARCHAR(40) NULL
|
|
- `LastUpdateDT` DATETIME2(7) NOT NULL
|
|
|
|
**Step 1: Create the file**
|
|
|
|
```csharp
|
|
using JdeScoping.DataAccess;
|
|
using JdeScoping.DataSync.Etl.Destinations;
|
|
using JdeScoping.DataSync.Etl.Models;
|
|
using JdeScoping.DataSync.Etl.Pipeline;
|
|
using JdeScoping.DataSync.Etl.Sources;
|
|
|
|
namespace JdeScoping.DataSync.DevEtl;
|
|
|
|
/// <summary>
|
|
/// Development ETL pipeline for the Branch table.
|
|
/// </summary>
|
|
public static class BranchDevEtl
|
|
{
|
|
public static readonly string TableName = "Branch";
|
|
public static readonly string CacheFileName = "branch.json.zstd";
|
|
|
|
private static readonly JsonColumnSchema[] Schema =
|
|
[
|
|
new("Code", typeof(string), IsNullable: false),
|
|
new("Description", typeof(string), IsNullable: true),
|
|
new("LastUpdateDT", typeof(DateTime), IsNullable: false),
|
|
];
|
|
|
|
public static EtlPipeline Create(IDbConnectionFactory connectionFactory, string cacheFilePath)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(connectionFactory);
|
|
|
|
if (string.IsNullOrWhiteSpace(cacheFilePath))
|
|
throw new ArgumentException("Cache file path is required.", nameof(cacheFilePath));
|
|
|
|
return new EtlPipelineBuilder()
|
|
.WithName($"{TableName}_Dev")
|
|
.WithSource(new JsonZstdFileSource(cacheFilePath, Schema))
|
|
.WithDestination(new DbBulkImportDestination(connectionFactory, TableName))
|
|
.Build();
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Verify it compiles**
|
|
|
|
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
|
Expected: Build succeeds
|
|
|
|
---
|
|
|
|
### Task 6: Create DevEtlRegistry
|
|
|
|
**Files:**
|
|
- Create: `NEW/src/JdeScoping.DataSync/DevEtl/DevEtlRegistry.cs`
|
|
|
|
**Step 1: Create the file**
|
|
|
|
```csharp
|
|
using JdeScoping.DataAccess;
|
|
using JdeScoping.DataSync.Etl.Pipeline;
|
|
using JdeScoping.DataSync.Etl.Results;
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
namespace JdeScoping.DataSync.DevEtl;
|
|
|
|
/// <summary>
|
|
/// Registry for development ETL pipelines that load from cached JSON files.
|
|
/// </summary>
|
|
public class DevEtlRegistry
|
|
{
|
|
private readonly IDbConnectionFactory _connectionFactory;
|
|
private readonly string _cacheDirectory;
|
|
private readonly ILogger<DevEtlRegistry>? _logger;
|
|
|
|
private readonly Dictionary<string, Func<IDbConnectionFactory, string, EtlPipeline>> _pipelineFactories = new(StringComparer.OrdinalIgnoreCase)
|
|
{
|
|
[BranchDevEtl.TableName] = (factory, cacheDir) =>
|
|
BranchDevEtl.Create(factory, Path.Combine(cacheDir, BranchDevEtl.CacheFileName)),
|
|
};
|
|
|
|
public DevEtlRegistry(
|
|
IDbConnectionFactory connectionFactory,
|
|
string cacheDirectory,
|
|
ILogger<DevEtlRegistry>? logger = null)
|
|
{
|
|
_connectionFactory = connectionFactory ?? throw new ArgumentNullException(nameof(connectionFactory));
|
|
|
|
if (string.IsNullOrWhiteSpace(cacheDirectory))
|
|
throw new ArgumentException("Cache directory is required.", nameof(cacheDirectory));
|
|
|
|
if (!Directory.Exists(cacheDirectory))
|
|
throw new DirectoryNotFoundException($"Cache directory not found: {cacheDirectory}");
|
|
|
|
_cacheDirectory = cacheDirectory;
|
|
_logger = logger;
|
|
}
|
|
|
|
public IEnumerable<string> GetAvailableTables() => _pipelineFactories.Keys;
|
|
|
|
public EtlPipeline GetPipeline(string tableName)
|
|
{
|
|
if (!_pipelineFactories.TryGetValue(tableName, out var factory))
|
|
throw new ArgumentException($"No pipeline registered for table '{tableName}'.", nameof(tableName));
|
|
|
|
return factory(_connectionFactory, _cacheDirectory);
|
|
}
|
|
|
|
public async Task<PipelineResult> RunAsync(string tableName, CancellationToken cancellationToken = default)
|
|
{
|
|
_logger?.LogInformation("Running dev ETL for {TableName}", tableName);
|
|
|
|
var pipeline = GetPipeline(tableName);
|
|
var result = await pipeline.ExecuteAsync(cancellationToken);
|
|
|
|
if (result.Success)
|
|
_logger?.LogInformation("Completed {TableName}: {Rows} rows in {Elapsed:g}",
|
|
tableName, result.TotalRows, result.Elapsed);
|
|
else
|
|
_logger?.LogError(result.Error, "Failed {TableName}: {Error}",
|
|
tableName, result.Error?.Message);
|
|
|
|
return result;
|
|
}
|
|
|
|
public async Task<IReadOnlyList<PipelineResult>> RunAllAsync(CancellationToken cancellationToken = default)
|
|
{
|
|
var results = new List<PipelineResult>();
|
|
|
|
foreach (var tableName in GetAvailableTables())
|
|
{
|
|
if (cancellationToken.IsCancellationRequested)
|
|
break;
|
|
|
|
var result = await RunAsync(tableName, cancellationToken);
|
|
results.Add(result);
|
|
}
|
|
|
|
return results;
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Verify it compiles**
|
|
|
|
Run: `dotnet build NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj`
|
|
Expected: Build succeeds
|
|
|
|
---
|
|
|
|
### Task 7: Create Integration Test for Branch
|
|
|
|
**Files:**
|
|
- Create: `NEW/tests/JdeScoping.DataSync.Tests/DevEtl/BranchDevEtlTests.cs`
|
|
|
|
**Step 1: Create the test file**
|
|
|
|
```csharp
|
|
using FluentAssertions;
|
|
using JdeScoping.DataAccess;
|
|
using JdeScoping.DataSync.DevEtl;
|
|
using Microsoft.Data.SqlClient;
|
|
using Microsoft.Extensions.Configuration;
|
|
using Xunit;
|
|
|
|
namespace JdeScoping.DataSync.Tests.DevEtl;
|
|
|
|
/// <summary>
|
|
/// Integration tests for Branch development ETL.
|
|
/// Requires: Local SQL Server, CACHED_DB_FILES directory with branch.json.zstd
|
|
/// </summary>
|
|
public class BranchDevEtlTests : IAsyncLifetime
|
|
{
|
|
private readonly string _connectionString;
|
|
private readonly string _cacheDirectory;
|
|
private readonly IDbConnectionFactory _connectionFactory;
|
|
|
|
public BranchDevEtlTests()
|
|
{
|
|
// Load configuration
|
|
var config = new ConfigurationBuilder()
|
|
.AddJsonFile("appsettings.json", optional: true)
|
|
.AddEnvironmentVariables()
|
|
.Build();
|
|
|
|
_connectionString = config.GetConnectionString("LotFinder")
|
|
?? throw new InvalidOperationException("LotFinder connection string not configured.");
|
|
|
|
_cacheDirectory = config["DevEtl:CacheDirectory"]
|
|
?? Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "..", "..", "..", "CACHED_DB_FILES");
|
|
|
|
_connectionFactory = new DbConnectionFactory(_connectionString);
|
|
}
|
|
|
|
public async Task InitializeAsync()
|
|
{
|
|
// Ensure Branch table is empty before test
|
|
await using var connection = new SqlConnection(_connectionString);
|
|
await connection.OpenAsync();
|
|
await using var command = new SqlCommand("TRUNCATE TABLE dbo.Branch", connection);
|
|
await command.ExecuteNonQueryAsync();
|
|
}
|
|
|
|
public Task DisposeAsync() => Task.CompletedTask;
|
|
|
|
[Fact]
|
|
public async Task Create_ReturnsValidPipeline()
|
|
{
|
|
// Arrange
|
|
var cacheFilePath = Path.Combine(_cacheDirectory, BranchDevEtl.CacheFileName);
|
|
Skip.IfNot(File.Exists(cacheFilePath), $"Cache file not found: {cacheFilePath}");
|
|
|
|
// Act
|
|
var pipeline = BranchDevEtl.Create(_connectionFactory, cacheFilePath);
|
|
|
|
// Assert
|
|
pipeline.Should().NotBeNull();
|
|
pipeline.Name.Should().Be("Branch_Dev");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Execute_LoadsBranchData()
|
|
{
|
|
// Arrange
|
|
var cacheFilePath = Path.Combine(_cacheDirectory, BranchDevEtl.CacheFileName);
|
|
Skip.IfNot(File.Exists(cacheFilePath), $"Cache file not found: {cacheFilePath}");
|
|
|
|
var pipeline = BranchDevEtl.Create(_connectionFactory, cacheFilePath);
|
|
|
|
// Act
|
|
var result = await pipeline.ExecuteAsync();
|
|
|
|
// Assert
|
|
result.Success.Should().BeTrue(because: result.Error?.Message ?? "Pipeline should succeed");
|
|
result.TotalRows.Should().BeGreaterThan(0, "Should load at least one row");
|
|
|
|
// Verify data in database
|
|
await using var connection = new SqlConnection(_connectionString);
|
|
await connection.OpenAsync();
|
|
await using var command = new SqlCommand("SELECT COUNT(*) FROM dbo.Branch", connection);
|
|
var count = (int)(await command.ExecuteScalarAsync())!;
|
|
|
|
count.Should().Be((int)result.TotalRows, "Database row count should match pipeline result");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Registry_RunAsync_LoadsBranch()
|
|
{
|
|
// Arrange
|
|
Skip.IfNot(Directory.Exists(_cacheDirectory), $"Cache directory not found: {_cacheDirectory}");
|
|
|
|
var registry = new DevEtlRegistry(_connectionFactory, _cacheDirectory);
|
|
|
|
// Act
|
|
var result = await registry.RunAsync("Branch");
|
|
|
|
// Assert
|
|
result.Success.Should().BeTrue(because: result.Error?.Message ?? "Pipeline should succeed");
|
|
result.TotalRows.Should().BeGreaterThan(0);
|
|
}
|
|
}
|
|
```
|
|
|
|
**Step 2: Add test project dependencies if needed**
|
|
|
|
Verify `JdeScoping.DataSync.Tests.csproj` has:
|
|
- Reference to `JdeScoping.DataSync`
|
|
- FluentAssertions
|
|
- xunit
|
|
- xunit.runner.visualstudio
|
|
|
|
**Step 3: Run the tests**
|
|
|
|
Run: `dotnet test NEW/tests/JdeScoping.DataSync.Tests --filter "FullyQualifiedName~BranchDevEtlTests"`
|
|
Expected: Tests pass (or skip if cache file not found)
|
|
|
|
---
|
|
|
|
### Task 8: Run End-to-End Test and Debug
|
|
|
|
**Step 1: Ensure database is running**
|
|
|
|
Run: `docker ps | grep scopingtool-sqlserver`
|
|
Expected: Container is running
|
|
|
|
**Step 2: Run the integration test**
|
|
|
|
Run: `dotnet test NEW/tests/JdeScoping.DataSync.Tests --filter "BranchDevEtlTests.Execute_LoadsBranchData" -v normal`
|
|
|
|
**Step 3: If test fails, debug the issue**
|
|
|
|
Common issues to check:
|
|
- Connection string correct in appsettings.json
|
|
- Cache file exists and is readable
|
|
- Branch table exists in database
|
|
- JSON parsing errors (check column name case sensitivity)
|
|
|
|
**Step 4: Verify data in database**
|
|
|
|
Run SQL: `SELECT TOP 5 * FROM dbo.Branch ORDER BY Code`
|
|
Expected: See branch records from cache file
|
|
|
|
---
|
|
|
|
## Phase 2: Lessons Learned
|
|
|
|
### Issues Encountered and Fixes
|
|
|
|
1. **JsonDocument Memory Leak**
|
|
- **Issue:** `ReadJsonObject()` returned `JsonDocument` that wasn't being disposed, causing memory accumulation
|
|
- **Fix:** Changed to `using var jsonObject = ReadJsonObject();` in the `Read()` method
|
|
- **Lesson:** Always dispose `JsonDocument` instances - they own native memory
|
|
|
|
2. **Multiple ReadDataAsync Calls**
|
|
- **Issue:** `JsonZstdFileSource.ReadDataAsync()` could be called multiple times, causing stream leaks
|
|
- **Fix:** Added guard: `if (_fileStream != null) throw new InvalidOperationException(...)`
|
|
- **Lesson:** Sources should only be readable once; enforce this with guards
|
|
|
|
3. **Exception Safety in Stream Initialization**
|
|
- **Issue:** If stream creation failed partway through (e.g., DecompressionStream fails), earlier streams leaked
|
|
- **Fix:** Wrapped initialization in try-catch with cleanup in catch block:
|
|
```csharp
|
|
try {
|
|
_fileStream = new FileStream(...);
|
|
_decompressionStream = new DecompressionStream(_fileStream);
|
|
_reader = new JsonStreamingDataReader(...);
|
|
return Task.FromResult<IDataReader>(_reader);
|
|
} catch {
|
|
_reader?.Dispose();
|
|
_decompressionStream?.Dispose();
|
|
_fileStream?.Dispose();
|
|
throw;
|
|
}
|
|
```
|
|
- **Lesson:** Multi-resource initialization needs exception safety
|
|
|
|
4. **Cancellation Token Handling**
|
|
- **Issue:** `RunAllAsync` used `IsCancellationRequested + break` which silently stops without exception
|
|
- **Fix:** Changed to `cancellationToken.ThrowIfCancellationRequested();`
|
|
- **Lesson:** Prefer `ThrowIfCancellationRequested()` for proper cancellation semantics
|
|
|
|
5. **Connection String Naming Convention**
|
|
- **Issue:** Test used `"LotFinder"` but `DbConnectionFactory` expects `"LotFinderDB"`
|
|
- **Fix:** Updated appsettings.json key to `"LotFinderDB"`
|
|
- **Lesson:** Match connection string names to what `DbConnectionFactory` expects
|
|
|
|
6. **Hardcoded Absolute Paths**
|
|
- **Issue:** Fallback cache directory path was user-specific `/Users/dohertj2/Desktop/...`
|
|
- **Fix:** Changed to relative path using `Path.Combine(Directory.GetCurrentDirectory(), "..", "..", "...")`
|
|
- **Lesson:** Use relative paths for portability; config should specify absolute paths
|
|
|
|
### Patterns That Worked Well
|
|
|
|
1. **IAsyncLifetime for Test Isolation**
|
|
- Using `IAsyncLifetime.InitializeAsync()` to truncate tables before each test ensures clean state
|
|
- Pattern: `TRUNCATE TABLE dbo.{Table}` in `InitializeAsync()`
|
|
|
|
2. **Shouldly Assertions**
|
|
- Project uses Shouldly instead of FluentAssertions
|
|
- Pattern: `result.Success.ShouldBeTrue(result.Error?.Message ?? "reason")`
|
|
|
|
3. **Nullable File Checks in Tests**
|
|
- Early return when cache files don't exist (graceful skip)
|
|
- Pattern: `if (!File.Exists(cacheFilePath)) return;`
|
|
|
|
4. **Static Factory Pattern for DevEtl Classes**
|
|
- Clean separation: static `Create()` method with explicit validation
|
|
- Pattern: `ArgumentNullException.ThrowIfNull(connectionFactory);`
|
|
|
|
5. **Property Naming**
|
|
- Pipeline property is `PipelineName` (not `Name`)
|
|
- Pattern: `pipeline.PipelineName.ShouldBe("Branch_Dev")`
|
|
|
|
### Performance Observations
|
|
|
|
- Branch table (930 bytes compressed, ~10 rows) loads in ~75ms including decompression
|
|
- Streaming approach successfully processes one JSON object at a time
|
|
- No memory issues observed - suitable for larger files
|
|
|
|
### Code Corrections from Original Plan
|
|
|
|
| Original Plan | Actual Implementation |
|
|
|---------------|----------------------|
|
|
| `pipeline.Name` | `pipeline.PipelineName` |
|
|
| FluentAssertions | Shouldly |
|
|
| `Skip.IfNot()` | Early return with `if (!exists) return;` |
|
|
| `IDbConnectionFactory` constructor with string | Constructor takes `IConfiguration` |
|
|
| Dapper for test queries | Direct `SqlConnection` + `ExecuteScalarAsync` |
|
|
|
|
---
|
|
|
|
## Phase 3: Remaining Tables
|
|
|
|
After Phase 2, add remaining tables following the established pattern. Priority order by file size:
|
|
|
|
1. **Small (< 1 MB):** OrgHierarchy, WorkCenter, ProfitCenter
|
|
2. **Medium (1-20 MB):** JdeUser, FunctionCode, Item, RouteMaster
|
|
3. **Large (20-200 MB):** Lot, MisData, WorkOrder_Curr/Hist, LotUsage_Hist
|
|
4. **Very Large (200+ MB):** LotUsage_Curr, WorkOrderRouting, WorkOrderStep, WorkOrderTime, WorkOrderComponent
|
|
|
|
For each table:
|
|
1. Read the CREATE TABLE script from Database/Scripts/
|
|
2. Create `{Table}DevEtl.cs` with explicit schema
|
|
3. Register in `DevEtlRegistry._pipelineFactories`
|
|
4. Add integration test
|
|
5. Verify with sample data
|