From 8ce9a7dae11d5934d92b633fee3bcc192a189adb Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Tue, 6 Jan 2026 14:15:19 -0500 Subject: [PATCH] docs: switch cache conversion design from MessagePack to protobuf-net-data protobuf-net-data is purpose-built for IDataReader serialization and returns IDataReader directly from Deserialize(), eliminating the need for custom streaming reader implementations. --- NEW/JdeScoping.slnx | 1 - .../QueryBuilders/MisQueryBuilder.cs | 9 +- .../DataReaderGenerator.cs | 548 ------------------ .../IsExternalInit.cs | 6 - ...deScoping.DataSync.SourceGenerators.csproj | 15 - .../JdeScoping.DataSync.csproj | 4 - ...-06-messagepack-cache-conversion-design.md | 142 ----- ...-01-06-protobuf-cache-conversion-design.md | 164 ++++++ 8 files changed, 169 insertions(+), 720 deletions(-) delete mode 100644 NEW/src/JdeScoping.DataSync.SourceGenerators/DataReaderGenerator.cs delete mode 100644 NEW/src/JdeScoping.DataSync.SourceGenerators/IsExternalInit.cs delete mode 100644 NEW/src/JdeScoping.DataSync.SourceGenerators/JdeScoping.DataSync.SourceGenerators.csproj delete mode 100644 PLANS/2026-01-06-messagepack-cache-conversion-design.md create mode 100644 PLANS/2026-01-06-protobuf-cache-conversion-design.md diff --git a/NEW/JdeScoping.slnx b/NEW/JdeScoping.slnx index b87ba61..de0edba 100644 --- a/NEW/JdeScoping.slnx +++ b/NEW/JdeScoping.slnx @@ -5,7 +5,6 @@ - diff --git a/NEW/src/JdeScoping.DataAccess/QueryBuilders/MisQueryBuilder.cs b/NEW/src/JdeScoping.DataAccess/QueryBuilders/MisQueryBuilder.cs index e2f6151..2823425 100644 --- a/NEW/src/JdeScoping.DataAccess/QueryBuilders/MisQueryBuilder.cs +++ b/NEW/src/JdeScoping.DataAccess/QueryBuilders/MisQueryBuilder.cs @@ -1,10 +1,10 @@ -using JdeScoping.DataAccess.Models; using SqlKata.Compilers; namespace JdeScoping.DataAccess.QueryBuilders; /// /// Builds MIS extraction queries for work order step matching. +/// Uses SQL extraction functions to retrieve criteria from Search.Criteria JSON. /// public sealed class MisQueryBuilder { @@ -21,10 +21,11 @@ public sealed class MisQueryBuilder /// /// Builds the complete MIS extraction SQL including temp table setup and data population. + /// Uses extraction functions to get filter criteria from the database. /// - /// The search model containing filter criteria. + /// The search ID to extract criteria from. /// The SQL statements for MIS extraction. - public IReadOnlyList BuildMisExtractionSql(SearchModel model) + public IReadOnlyList BuildMisExtractionSql(int searchId) { var statements = new List(); @@ -32,7 +33,7 @@ public sealed class MisQueryBuilder statements.Add(BuildTempMisDataTableSql()); // Build and execute MIS CTE query to populate temp table - statements.Add(BuildMisCteSql(model)); + statements.Add(BuildMisCteSql()); return statements; } diff --git a/NEW/src/JdeScoping.DataSync.SourceGenerators/DataReaderGenerator.cs b/NEW/src/JdeScoping.DataSync.SourceGenerators/DataReaderGenerator.cs deleted file mode 100644 index b7baede..0000000 --- a/NEW/src/JdeScoping.DataSync.SourceGenerators/DataReaderGenerator.cs +++ /dev/null @@ -1,548 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Collections.Immutable; -using System.Linq; -using System.Text; -using Microsoft.CodeAnalysis; -using Microsoft.CodeAnalysis.CSharp; -using Microsoft.CodeAnalysis.CSharp.Syntax; -namespace JdeScoping.DataSync.SourceGenerators; - -/// -/// Source generator that creates IDataReader implementations for types listed in BulkCopyTypeRegistry. -/// -[Generator] -public class DataReaderGenerator : IIncrementalGenerator -{ - // Diagnostic for when no types are found (error condition) - private static readonly DiagnosticDescriptor DiagNoTypesFound = new( - "DRGEN001", - "DataReaderGenerator", - "No types found in BulkCopyTypeRegistry. Ensure the Types field contains typeof() expressions.", - "SourceGenerator", - DiagnosticSeverity.Warning, - isEnabledByDefault: true); - - public void Initialize(IncrementalGeneratorInitializationContext context) - { - // Find the BulkCopyTypeRegistry class and extract type symbols directly - var registryProvider = context.SyntaxProvider - .CreateSyntaxProvider( - predicate: static (node, _) => IsBulkCopyTypeRegistry(node), - transform: static (ctx, _) => GetRegisteredTypeSymbols(ctx)) - .Where(static types => types.Length > 0) - .Collect(); - - // Generate source directly from type symbols (no need for compilation lookup) - context.RegisterSourceOutput(registryProvider, static (spc, typeSymbolArrays) => ExecuteFromSymbols(typeSymbolArrays, spc)); - } - - private static bool IsBulkCopyTypeRegistry(SyntaxNode node) - { - return node is ClassDeclarationSyntax cds && - cds.Identifier.Text == "BulkCopyTypeRegistry"; - } - - /// - /// Extract type symbols directly using semantic model - resolves types from referenced assemblies. - /// - private static ImmutableArray GetRegisteredTypeSymbols(GeneratorSyntaxContext context) - { - var classDecl = (ClassDeclarationSyntax)context.Node; - - // Find the Types field - var typesField = classDecl.Members - .OfType() - .FirstOrDefault(f => f.Declaration.Variables.Any(v => v.Identifier.Text == "Types")); - - if (typesField == null) - return ImmutableArray.Empty; - - // Get the initializer - var variable = typesField.Declaration.Variables.First(); - - // Try collection expression syntax (C# 12: [ typeof(X), typeof(Y) ]) - if (variable.Initializer?.Value is CollectionExpressionSyntax collection) - { - return ExtractTypeSymbolsFromCollection(collection, context.SemanticModel); - } - - // Try array initializer: new Type[] { typeof(X), typeof(Y) } - if (variable.Initializer?.Value is ArrayCreationExpressionSyntax arrayCreation && - arrayCreation.Initializer != null) - { - return ExtractTypeSymbolsFromExpressions(arrayCreation.Initializer.Expressions, context.SemanticModel); - } - - // Try implicit array: new[] { typeof(X), typeof(Y) } - if (variable.Initializer?.Value is ImplicitArrayCreationExpressionSyntax implicitArray) - { - return ExtractTypeSymbolsFromExpressions(implicitArray.Initializer.Expressions, context.SemanticModel); - } - - return ImmutableArray.Empty; - } - - private static ImmutableArray ExtractTypeSymbolsFromCollection( - CollectionExpressionSyntax collection, - SemanticModel semanticModel) - { - var types = new List(); - foreach (var element in collection.Elements) - { - if (element is ExpressionElementSyntax exprElement && - exprElement.Expression is TypeOfExpressionSyntax typeOf) - { - var typeInfo = semanticModel.GetTypeInfo(typeOf.Type); - if (typeInfo.Type is INamedTypeSymbol namedType) - { - types.Add(namedType); - } - } - } - return types.ToImmutableArray(); - } - - private static ImmutableArray ExtractTypeSymbolsFromExpressions( - SeparatedSyntaxList expressions, - SemanticModel semanticModel) - { - var types = new List(); - foreach (var expr in expressions) - { - if (expr is TypeOfExpressionSyntax typeOf) - { - var typeInfo = semanticModel.GetTypeInfo(typeOf.Type); - if (typeInfo.Type is INamedTypeSymbol namedType) - { - types.Add(namedType); - } - } - } - return types.ToImmutableArray(); - } - - /// - /// Execute generation directly from resolved type symbols. - /// - private static void ExecuteFromSymbols( - ImmutableArray> typeSymbolArrays, - SourceProductionContext context) - { - if (typeSymbolArrays.IsDefaultOrEmpty) - return; - - var typeSymbols = typeSymbolArrays - .SelectMany(x => x) - .Distinct(SymbolEqualityComparer.Default) - .Cast() - .ToList(); - - if (typeSymbols.Count == 0) - { - context.ReportDiagnostic(Diagnostic.Create(DiagNoTypesFound, Location.None)); - return; - } - - // Build type infos - var typeInfos = new List(); - foreach (var symbol in typeSymbols) - { - var properties = GetBulkCopyProperties(symbol); - typeInfos.Add(new TypeInfo(symbol, properties)); - } - - if (typeInfos.Count == 0) - return; - - // Generate DataReader classes - foreach (var typeInfo in typeInfos) - { - var source = GenerateDataReader(typeInfo); - context.AddSource($"{typeInfo.Symbol.Name}DataReader.g.cs", source); - } - - // Generate factory - var factorySource = GenerateFactory(typeInfos); - context.AddSource("DataReaderFactory.g.cs", factorySource); - - // Generate DI extension - var extensionSource = GenerateDIExtension(); - context.AddSource("BulkCopyServiceCollectionExtensions.g.cs", extensionSource); - - // Generate base class - var baseClassSource = GenerateBaseClass(); - context.AddSource("AsyncEnumerableDataReader.g.cs", baseClassSource); - } - - private static string GenerateBaseClass() - { - return """ - // - #nullable enable - - using System; - using System.Collections.Generic; - using System.Data; - using System.Threading.Tasks; - - namespace JdeScoping.DataSync.Generated; - - /// - /// Base class for IDataReader implementations that wrap IAsyncEnumerable sources. - /// - public abstract class AsyncEnumerableDataReader : IDataReader where T : class - { - private readonly IAsyncEnumerator _enumerator; - private bool _disposed; - protected T? Current { get; private set; } - - protected AsyncEnumerableDataReader(IAsyncEnumerable source) - { - _enumerator = source.GetAsyncEnumerator(); - } - - protected abstract string[] ColumnNames { get; } - protected abstract object GetColumnValue(int ordinal); - protected abstract Type GetColumnType(int ordinal); - - public int FieldCount => ColumnNames.Length; - public int Depth => 0; - public bool IsClosed => _disposed; - public int RecordsAffected => -1; - - public object this[int i] => GetValue(i); - public object this[string name] => GetValue(GetOrdinal(name)); - - public bool Read() - { - var task = _enumerator.MoveNextAsync(); - if (task.IsCompleted) - { - if (task.Result) - { - Current = _enumerator.Current; - return true; - } - return false; - } - - var result = task.AsTask().GetAwaiter().GetResult(); - if (result) - { - Current = _enumerator.Current; - return true; - } - return false; - } - - public object GetValue(int i) - { - if (Current == null) - throw new InvalidOperationException("No current row."); - return GetColumnValue(i); - } - - public string GetName(int i) - { - if (i < 0 || i >= ColumnNames.Length) - throw new IndexOutOfRangeException($"Column index {i} is out of range."); - return ColumnNames[i]; - } - - public int GetOrdinal(string name) - { - for (int i = 0; i < ColumnNames.Length; i++) - { - if (string.Equals(ColumnNames[i], name, StringComparison.OrdinalIgnoreCase)) - return i; - } - throw new IndexOutOfRangeException($"Column '{name}' not found."); - } - - public Type GetFieldType(int i) => GetColumnType(i); - public bool IsDBNull(int i) => GetValue(i) == DBNull.Value; - public string GetDataTypeName(int i) => GetFieldType(i).Name; - - public int GetValues(object[] values) - { - var count = Math.Min(values.Length, FieldCount); - for (int i = 0; i < count; i++) - values[i] = GetValue(i); - return count; - } - - public bool GetBoolean(int i) => (bool)GetValue(i); - public byte GetByte(int i) => (byte)GetValue(i); - public long GetBytes(int i, long fieldOffset, byte[]? buffer, int bufferoffset, int length) => throw new NotSupportedException(); - public char GetChar(int i) => (char)GetValue(i); - public long GetChars(int i, long fieldoffset, char[]? buffer, int bufferoffset, int length) => throw new NotSupportedException(); - public IDataReader GetData(int i) => throw new NotSupportedException(); - public DateTime GetDateTime(int i) => (DateTime)GetValue(i); - public decimal GetDecimal(int i) => (decimal)GetValue(i); - public double GetDouble(int i) => (double)GetValue(i); - public float GetFloat(int i) => (float)GetValue(i); - public Guid GetGuid(int i) => (Guid)GetValue(i); - public short GetInt16(int i) => (short)GetValue(i); - public int GetInt32(int i) => (int)GetValue(i); - public long GetInt64(int i) => (long)GetValue(i); - public string GetString(int i) => (string)GetValue(i); - - public DataTable? GetSchemaTable() => null; - public bool NextResult() => false; - - public void Close() => Dispose(); - - public void Dispose() - { - if (!_disposed) - { - _disposed = true; - _enumerator.DisposeAsync().AsTask().GetAwaiter().GetResult(); - } - } - } - """; - } - - private static List GetBulkCopyProperties(INamedTypeSymbol typeSymbol) - { - var properties = new List(); - - foreach (var member in typeSymbol.GetMembers()) - { - if (member is IPropertySymbol prop && - prop.DeclaredAccessibility == Accessibility.Public && - prop.SetMethod != null && - prop.SetMethod.DeclaredAccessibility == Accessibility.Public && - !prop.IsReadOnly && - !prop.IsIndexer) - { - properties.Add(new PropertyInfo( - prop.Name, - prop.Type.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat), - prop.Type.NullableAnnotation == NullableAnnotation.Annotated || - prop.Type.OriginalDefinition.SpecialType == SpecialType.System_Nullable_T)); - } - } - - // Sort alphabetically for consistency - properties.Sort((a, b) => string.Compare(a.Name, b.Name, StringComparison.Ordinal)); - - return properties; - } - - private static string GenerateDataReader(TypeInfo typeInfo) - { - var typeName = typeInfo.Symbol.Name; - var fullTypeName = typeInfo.Symbol.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat); - var properties = typeInfo.Properties; - - var sb = new StringBuilder(); - sb.AppendLine("// "); - sb.AppendLine("#nullable enable"); - sb.AppendLine(); - sb.AppendLine("using System;"); - sb.AppendLine("using System.Collections.Generic;"); - sb.AppendLine(); - sb.AppendLine("namespace JdeScoping.DataSync.Generated;"); - sb.AppendLine(); - sb.AppendLine($"/// "); - sb.AppendLine($"/// IDataReader implementation for {typeName} for use with SqlBulkCopy."); - sb.AppendLine($"/// "); - sb.AppendLine($"public sealed class {typeName}DataReader : AsyncEnumerableDataReader<{fullTypeName}>"); - sb.AppendLine("{"); - - // Column names and types arrays - sb.AppendLine(" private static readonly string[] _columnNames ="); - sb.AppendLine(" ["); - for (int i = 0; i < properties.Count; i++) - { - var comma = i < properties.Count - 1 ? "," : ""; - sb.AppendLine($" \"{properties[i].Name}\"{comma}"); - } - sb.AppendLine(" ];"); - sb.AppendLine(); - - sb.AppendLine(" private static readonly Type[] _columnTypes ="); - sb.AppendLine(" ["); - for (int i = 0; i < properties.Count; i++) - { - var prop = properties[i]; - var clrType = GetClrTypeName(prop.TypeName); - var comma = i < properties.Count - 1 ? "," : ""; - sb.AppendLine($" typeof({clrType}){comma}"); - } - sb.AppendLine(" ];"); - sb.AppendLine(); - - // Constructor - sb.AppendLine($" public {typeName}DataReader(IAsyncEnumerable<{fullTypeName}> source) : base(source) {{ }}"); - sb.AppendLine(); - - // Override abstract members - sb.AppendLine(" protected override string[] ColumnNames => _columnNames;"); - sb.AppendLine(); - sb.AppendLine(" public static IReadOnlyList GetColumnNames() => _columnNames;"); - sb.AppendLine(); - - // GetColumnValue override - sb.AppendLine(" protected override object GetColumnValue(int ordinal)"); - sb.AppendLine(" {"); - sb.AppendLine(" var entity = Current!;"); - sb.AppendLine(" return ordinal switch"); - sb.AppendLine(" {"); - for (int i = 0; i < properties.Count; i++) - { - var prop = properties[i]; - if (prop.IsNullable) - { - sb.AppendLine($" {i} => entity.{prop.Name} ?? (object)DBNull.Value,"); - } - else - { - sb.AppendLine($" {i} => entity.{prop.Name},"); - } - } - sb.AppendLine(" _ => throw new IndexOutOfRangeException()"); - sb.AppendLine(" };"); - sb.AppendLine(" }"); - sb.AppendLine(); - - // GetColumnType override - sb.AppendLine(" protected override Type GetColumnType(int ordinal) => _columnTypes[ordinal];"); - sb.AppendLine("}"); - - return sb.ToString(); - } - - private static string GetClrTypeName(string fullTypeName) - { - // Handle nullable types - if (fullTypeName.EndsWith("?")) - { - var underlyingType = fullTypeName.TrimEnd('?'); - return GetClrTypeName(underlyingType); - } - - // Handle Nullable - if (fullTypeName.StartsWith("global::System.Nullable<")) - { - var inner = fullTypeName.Substring("global::System.Nullable<".Length); - inner = inner.TrimEnd('>'); - return GetClrTypeName(inner); - } - - // Map common types - return fullTypeName switch - { - "global::System.String" => "string", - "global::System.Int32" => "int", - "global::System.Int64" => "long", - "global::System.Int16" => "short", - "global::System.Byte" => "byte", - "global::System.Boolean" => "bool", - "global::System.Decimal" => "decimal", - "global::System.Double" => "double", - "global::System.Single" => "float", - "global::System.DateTime" => "DateTime", - "global::System.Guid" => "Guid", - "global::System.Char" => "char", - "string" => "string", - "int" => "int", - "long" => "long", - "short" => "short", - "byte" => "byte", - "bool" => "bool", - "decimal" => "decimal", - "double" => "double", - "float" => "float", - "char" => "char", - _ => fullTypeName.Replace("global::", "") - }; - } - - private static string GenerateFactory(List typeInfos) - { - var sb = new StringBuilder(); - sb.AppendLine("// "); - sb.AppendLine("#nullable enable"); - sb.AppendLine(); - sb.AppendLine("using System;"); - sb.AppendLine("using System.Collections.Generic;"); - sb.AppendLine("using System.Data;"); - sb.AppendLine("using JdeScoping.DataSync.Contracts;"); - sb.AppendLine(); - sb.AppendLine("namespace JdeScoping.DataSync.Generated;"); - sb.AppendLine(); - sb.AppendLine("/// "); - sb.AppendLine("/// Factory for creating IDataReader instances from IAsyncEnumerable sources."); - sb.AppendLine("/// "); - sb.AppendLine("public sealed class DataReaderFactory : IDataReaderFactory"); - sb.AppendLine("{"); - - // CreateReader method - sb.AppendLine(" public IDataReader CreateReader(IAsyncEnumerable source) where T : class"); - sb.AppendLine(" {"); - sb.AppendLine(" return source switch"); - sb.AppendLine(" {"); - foreach (var typeInfo in typeInfos) - { - var fullTypeName = typeInfo.Symbol.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat); - sb.AppendLine($" IAsyncEnumerable<{fullTypeName}> typed => new {typeInfo.Symbol.Name}DataReader(typed),"); - } - sb.AppendLine(" _ => throw new NotSupportedException($\"No DataReader converter exists for type {typeof(T).Name}. Add it to BulkCopyTypeRegistry.Types.\")"); - sb.AppendLine(" };"); - sb.AppendLine(" }"); - sb.AppendLine(); - - // GetColumnNames method - sb.AppendLine(" public IReadOnlyList GetColumnNames() where T : class"); - sb.AppendLine(" {"); - sb.AppendLine(" var type = typeof(T);"); - foreach (var typeInfo in typeInfos) - { - var fullTypeName = typeInfo.Symbol.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat); - sb.AppendLine($" if (type == typeof({fullTypeName}))"); - sb.AppendLine($" return {typeInfo.Symbol.Name}DataReader.GetColumnNames();"); - } - sb.AppendLine(" throw new NotSupportedException($\"No DataReader converter exists for type {type.Name}. Add it to BulkCopyTypeRegistry.Types.\");"); - sb.AppendLine(" }"); - sb.AppendLine("}"); - - return sb.ToString(); - } - - private static string GenerateDIExtension() - { - var sb = new StringBuilder(); - sb.AppendLine("// "); - sb.AppendLine("#nullable enable"); - sb.AppendLine(); - sb.AppendLine("using JdeScoping.DataSync.Contracts;"); - sb.AppendLine("using JdeScoping.DataSync.Generated;"); - sb.AppendLine("using Microsoft.Extensions.DependencyInjection;"); - sb.AppendLine(); - sb.AppendLine("namespace JdeScoping.DataSync;"); - sb.AppendLine(); - sb.AppendLine("/// "); - sb.AppendLine("/// Extension methods for registering bulk copy converters."); - sb.AppendLine("/// "); - sb.AppendLine("public static class BulkCopyServiceCollectionExtensions"); - sb.AppendLine("{"); - sb.AppendLine(" /// "); - sb.AppendLine(" /// Adds the generated IDataReaderFactory to the service collection."); - sb.AppendLine(" /// "); - sb.AppendLine(" public static IServiceCollection AddBulkCopyConverters(this IServiceCollection services)"); - sb.AppendLine(" {"); - sb.AppendLine(" services.AddSingleton();"); - sb.AppendLine(" return services;"); - sb.AppendLine(" }"); - sb.AppendLine("}"); - - return sb.ToString(); - } - - private record TypeInfo(INamedTypeSymbol Symbol, List Properties); - private record PropertyInfo(string Name, string TypeName, bool IsNullable); -} diff --git a/NEW/src/JdeScoping.DataSync.SourceGenerators/IsExternalInit.cs b/NEW/src/JdeScoping.DataSync.SourceGenerators/IsExternalInit.cs deleted file mode 100644 index 7a02043..0000000 --- a/NEW/src/JdeScoping.DataSync.SourceGenerators/IsExternalInit.cs +++ /dev/null @@ -1,6 +0,0 @@ -// Polyfill for init-only properties in netstandard2.0 - -namespace System.Runtime.CompilerServices -{ - internal static class IsExternalInit { } -} diff --git a/NEW/src/JdeScoping.DataSync.SourceGenerators/JdeScoping.DataSync.SourceGenerators.csproj b/NEW/src/JdeScoping.DataSync.SourceGenerators/JdeScoping.DataSync.SourceGenerators.csproj deleted file mode 100644 index 9193f23..0000000 --- a/NEW/src/JdeScoping.DataSync.SourceGenerators/JdeScoping.DataSync.SourceGenerators.csproj +++ /dev/null @@ -1,15 +0,0 @@ - - - netstandard2.0 - latest - enable - true - true - JdeScoping.DataSync.SourceGenerators - - - - - - - diff --git a/NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj b/NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj index e62322e..ad69c5a 100644 --- a/NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj +++ b/NEW/src/JdeScoping.DataSync/JdeScoping.DataSync.csproj @@ -8,16 +8,12 @@ - - diff --git a/PLANS/2026-01-06-messagepack-cache-conversion-design.md b/PLANS/2026-01-06-messagepack-cache-conversion-design.md deleted file mode 100644 index e51f04d..0000000 --- a/PLANS/2026-01-06-messagepack-cache-conversion-design.md +++ /dev/null @@ -1,142 +0,0 @@ -# MessagePack Cache Conversion Design - -## Purpose - -Convert the development cache files in `CACHED_DB_FILES/` from zstd-compressed JSON (`.json.zstd`) to zstd-compressed MessagePack (`.msgpack.zstd`) for faster deserialization and smaller file sizes. - -## Goals - -1. **Faster deserialization** - MessagePack is faster to parse than JSON -2. **Smaller file sizes** - MessagePack is more compact than JSON - -## Current State - -- 22 cache files in `CACHED_DB_FILES/` totaling ~3.6 GB (zstd-compressed JSON) -- `JsonZstdFileSource` reads files using ZstdSharp + Utf8JsonReader -- Each `*DevEtl.cs` defines a schema and creates a pipeline from JSON files -- Tests verify ETL loads data from cache files into SQL Server - -## Design Decisions - -| Decision | Choice | Rationale | -|----------|--------|-----------| -| Conversion approach | One-time manual | Cache files are static snapshots, not actively regenerated | -| Data structure | Map format (field names as keys) | Self-describing, maintainable, keys compress well with zstd | -| Compression | Keep zstd | Largest file is 878 MB; raw MessagePack would be 2-4x larger | -| Converter location | Standalone console app in `Tools/CacheConverter/` | Isolated utility, not part of main solution | - -## File Format - -**New extension:** `.msgpack.zstd` - -**File naming:** -- `branch.json.zstd` → `branch.msgpack.zstd` -- `workordertime_curr.json.zstd` → `workordertime_curr.msgpack.zstd` - -**Data structure:** Array of maps (same logical structure as JSON) -``` -[ - { "Code": "ABC", "Description": "Branch ABC", "LastUpdateDT": }, - { "Code": "DEF", "Description": "Branch DEF", "LastUpdateDT": }, - ... -] -``` - -**Library:** MessagePack-CSharp (`MessagePack` NuGet package) - -## Components - -### 1. Converter Tool - -**Location:** `/JdeScopingTool/Tools/CacheConverter/` - -``` -Tools/ -└── CacheConverter/ - ├── CacheConverter.csproj - └── Program.cs -``` - -**Dependencies:** -- `ZstdSharp.Port` - read zstd JSON, write zstd MessagePack -- `MessagePack` - MessagePack serialization - -**Behavior:** -1. Read each `.json.zstd` file from `CACHED_DB_FILES/` -2. Stream JSON → deserialize to `Dictionary[]` -3. Serialize to MessagePack (map format) → compress with zstd -4. Write to `.msgpack.zstd` alongside originals -5. Print before/after sizes for comparison - -**Usage:** -```bash -cd Tools/CacheConverter -dotnet run -- ../../CACHED_DB_FILES -``` - -### 2. MessagePackZstdFileSource - -**New file:** `NEW/src/JdeScoping.DataSync.Dev/Sources/MessagePackZstdFileSource.cs` - -- Implements `IImportSource` (same interface as `JsonZstdFileSource`) -- Reads `.msgpack.zstd` files using streaming decompression -- Uses `MessagePackStreamReader` for efficient streaming deserialization -- Returns an `IDataReader` that yields rows one at a time -- Schema still needed for `IDataReader` field metadata (column names, types, ordinals) - -**Package addition:** Add `MessagePack` to `JdeScoping.DataSync.Dev.csproj` - -### 3. DevEtl Class Updates - -**Changes to each `*DevEtl.cs` file (22 files):** - -1. Update `CacheFileName` constant: - ```csharp - // Before - public static readonly string CacheFileName = "branch.json.zstd"; - // After - public static readonly string CacheFileName = "branch.msgpack.zstd"; - ``` - -2. Update `Create()` method: - ```csharp - // Before - .WithSource(new JsonZstdFileSource(cacheFilePath, Schema)) - // After - .WithSource(new MessagePackZstdFileSource(cacheFilePath, Schema)) - ``` - -**No changes to:** -- Schema definitions (same column names and types) -- Pipeline structure -- `DevEtlRegistry.cs` - -### 4. Cleanup (After Verification) - -Remove obsolete JSON readers: -- `JsonZstdFileSource.cs` -- `JsonStreamingDataReader.cs` -- `Utf8JsonStreamingDataReader.cs` - -Remove old cache files: -- All `*.json.zstd` files in `CACHED_DB_FILES/` - -## Test Strategy - -1. Run converter tool, verify all 22 files convert without errors -2. Compare file sizes (expect 10-30% reduction) -3. Run existing `JdeScoping.DataSync.Dev.Tests` - all tests should pass unchanged -4. Verify data loaded matches previous JSON-based loads - -## Files Changed - -| File | Change | -|------|--------| -| `Tools/CacheConverter/` (new) | Standalone converter tool | -| `Sources/MessagePackZstdFileSource.cs` (new) | New MessagePack reader | -| `JdeScoping.DataSync.Dev.csproj` | Add MessagePack package | -| `*DevEtl.cs` (22 files) | Update file extension and source class | -| `Sources/JsonZstdFileSource.cs` | Delete after migration | -| `Sources/JsonStreamingDataReader.cs` | Delete after migration | -| `Sources/Utf8JsonStreamingDataReader.cs` | Delete after migration | -| `CACHED_DB_FILES/*.json.zstd` | Delete after verification | diff --git a/PLANS/2026-01-06-protobuf-cache-conversion-design.md b/PLANS/2026-01-06-protobuf-cache-conversion-design.md new file mode 100644 index 0000000..3b79264 --- /dev/null +++ b/PLANS/2026-01-06-protobuf-cache-conversion-design.md @@ -0,0 +1,164 @@ +# Protobuf Cache Conversion Design + +## Purpose + +Convert the development cache files in `CACHED_DB_FILES/` from zstd-compressed JSON (`.json.zstd`) to zstd-compressed Protocol Buffers (`.pb.zstd`) using protobuf-net-data for faster deserialization and smaller file sizes. + +## Goals + +1. **Faster deserialization** - Protobuf is faster to parse than JSON +2. **Smaller file sizes** - Protobuf is more compact than JSON +3. **Simpler code** - protobuf-net-data returns `IDataReader` directly, no custom reader needed + +## Current State + +- 22 cache files in `CACHED_DB_FILES/` totaling ~3.6 GB (zstd-compressed JSON) +- `JsonZstdFileSource` reads files using ZstdSharp + Utf8JsonReader +- Custom `Utf8JsonStreamingDataReader` implements `IDataReader` for streaming +- Each `*DevEtl.cs` defines a schema and creates a pipeline from JSON files + +## Design Decisions + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| Serialization library | protobuf-net-data | Purpose-built for IDataReader, returns IDataReader directly | +| Conversion approach | One-time manual | Cache files are static snapshots, not actively regenerated | +| Compression | zstd on whole file | Consistent with current approach, excellent compression | +| Converter location | Standalone console app in `Tools/CacheConverter/` | Isolated utility, not part of main solution | + +## File Format + +**New extension:** `.pb.zstd` + +**File naming:** +- `branch.json.zstd` → `branch.pb.zstd` +- `workordertime_curr.json.zstd` → `workordertime_curr.pb.zstd` + +**Data structure:** protobuf-net-data binary format +- Schema embedded in stream (column names, types, nullability) +- Rows serialized sequentially +- Native ADO.NET type support (DateTime, Guid, decimal, etc.) + +**Libraries:** +- `protobuf-net-data` - IDataReader serialization/deserialization +- `ZstdSharp.Port` - compression + +## Components + +### 1. Converter Tool + +**Location:** `/JdeScopingTool/Tools/CacheConverter/` + +``` +Tools/ +└── CacheConverter/ + ├── CacheConverter.csproj + └── Program.cs +``` + +**Dependencies:** +- `ZstdSharp.Port` - read zstd JSON, write zstd protobuf +- `protobuf-net-data` - protobuf serialization + +**Behavior:** +1. Read each `.json.zstd` file from `CACHED_DB_FILES/` +2. Decompress and parse JSON into an `IDataReader` +3. Use `DataSerializer.Serialize(stream, reader)` to write protobuf +4. Compress with zstd and write to `.pb.zstd` +5. Print before/after sizes for comparison + +**Usage:** +```bash +cd Tools/CacheConverter +dotnet run -- ../../CACHED_DB_FILES +``` + +### 2. ProtobufZstdFileSource + +**New file:** `NEW/src/JdeScoping.DataSync.Dev/Sources/ProtobufZstdFileSource.cs` + +**Key simplification:** No custom `IDataReader` implementation needed! + +```csharp +public sealed class ProtobufZstdFileSource : IImportSource +{ + public async Task ReadDataAsync(CancellationToken ct = default) + { + _fileStream = new FileStream(_filePath, FileMode.Open, ...); + _decompressionStream = new DecompressionStream(_fileStream); + + // protobuf-net-data returns IDataReader directly! + return DataSerializer.Deserialize(_decompressionStream); + } +} +``` + +**Package additions to `JdeScoping.DataSync.Dev.csproj`:** +- `protobuf-net-data` + +### 3. DevEtl Class Updates + +**Changes to each `*DevEtl.cs` file (22 files):** + +1. Update `CacheFileName` constant: + ```csharp + // Before + public static readonly string CacheFileName = "branch.json.zstd"; + // After + public static readonly string CacheFileName = "branch.pb.zstd"; + ``` + +2. Update `Create()` method: + ```csharp + // Before + .WithSource(new JsonZstdFileSource(cacheFilePath, Schema)) + // After + .WithSource(new ProtobufZstdFileSource(cacheFilePath)) + ``` + +3. **Remove schema definitions** - protobuf-net-data embeds schema in the file, so `JsonColumnSchema[]` arrays are no longer needed in DevEtl classes. + +**No changes to:** +- Pipeline structure +- `DevEtlRegistry.cs` + +### 4. Cleanup (After Verification) + +**Remove obsolete files:** +- `Sources/JsonZstdFileSource.cs` +- `Sources/JsonStreamingDataReader.cs` +- `Sources/Utf8JsonStreamingDataReader.cs` +- `Models/JsonColumnSchema.cs` + +**Remove old cache files:** +- All `*.json.zstd` files in `CACHED_DB_FILES/` + +## Code Simplification Summary + +| Before (JSON) | After (Protobuf) | +|---------------|------------------| +| `JsonZstdFileSource` | `ProtobufZstdFileSource` | +| `Utf8JsonStreamingDataReader` (custom) | `DataSerializer.Deserialize()` (library) | +| `JsonStreamingDataReader` (legacy) | Removed | +| `JsonColumnSchema[]` per table | Not needed (embedded in file) | + +## Test Strategy + +1. Run converter tool, verify all 22 files convert without errors +2. Compare file sizes (expect 10-30% reduction) +3. Run existing `JdeScoping.DataSync.Dev.Tests` - all tests should pass unchanged +4. Verify data loaded matches previous JSON-based loads + +## Files Changed + +| File | Change | +|------|--------| +| `Tools/CacheConverter/` (new) | Standalone converter tool | +| `Sources/ProtobufZstdFileSource.cs` (new) | New protobuf reader (much simpler) | +| `JdeScoping.DataSync.Dev.csproj` | Add protobuf-net-data package | +| `*DevEtl.cs` (22 files) | Update file extension, source class, remove schema | +| `Sources/JsonZstdFileSource.cs` | Delete after migration | +| `Sources/JsonStreamingDataReader.cs` | Delete after migration | +| `Sources/Utf8JsonStreamingDataReader.cs` | Delete after migration | +| `Models/JsonColumnSchema.cs` | Delete after migration | +| `CACHED_DB_FILES/*.json.zstd` | Delete after verification |