diff --git a/Tools/CacheConverter/Program.cs b/Tools/CacheConverter/Program.cs index 46a6633..bf97217 100644 --- a/Tools/CacheConverter/Program.cs +++ b/Tools/CacheConverter/Program.cs @@ -1,27 +1,64 @@ using System.Data; +using System.Text; using System.Text.Json; +using System.Text.RegularExpressions; using ProtoBuf.Data; using ZstdSharp; -if (args.Length == 0) +if (args.Length < 2) { - Console.WriteLine("Usage: CacheConverter "); - Console.WriteLine("Example: dotnet run -- ../../CACHED_DB_FILES"); + Console.WriteLine("Usage: CacheConverter "); + Console.WriteLine("Example: dotnet run -- ../../CACHED_DB_FILES ../../NEW/src/JdeScoping.Database/Scripts"); return 1; } var cacheDir = args[0]; +var scriptsDir = args[1]; + if (!Directory.Exists(cacheDir)) { - Console.WriteLine($"Error: Directory not found: {cacheDir}"); + Console.WriteLine($"Error: Cache directory not found: {cacheDir}"); return 1; } +if (!Directory.Exists(scriptsDir)) +{ + Console.WriteLine($"Error: Scripts directory not found: {scriptsDir}"); + return 1; +} + +// Map cache file base names to SQL script filenames and table names +var fileMapping = new Dictionary(StringComparer.OrdinalIgnoreCase) +{ + ["branch"] = ("003_CreateBranchTable.sql", "Branch"), + ["functioncode"] = ("005_CreateFunctionCodeTable.sql", "FunctionCode"), + ["item"] = ("008_CreateItemTable.sql", "Item"), + ["jdeuser"] = ("009_CreateJdeUserTable.sql", "JdeUser"), + ["lot"] = ("013_CreateLotTable.sql", "Lot"), + ["lotusage_curr"] = ("024_CreateLotUsageCurrTable.sql", "LotUsage_Curr"), + ["lotusage_hist"] = ("025_CreateLotUsageHistTable.sql", "LotUsage_Hist"), + ["misdata"] = ("012_CreateMisDataTable.sql", "MisData"), + ["orghierarchy"] = ("010_CreateOrgHierarchyTable.sql", "OrgHierarchy"), + ["profitcenter"] = ("006_CreateProfitCenterTable.sql", "ProfitCenter"), + ["routemaster"] = ("011_CreateRouteMasterTable.sql", "RouteMaster"), + ["workcenter"] = ("007_CreateWorkCenterTable.sql", "WorkCenter"), + ["workorder_curr"] = ("015_CreateWorkOrderCurrTable.sql", "WorkOrder_Curr"), + ["workorder_hist"] = ("016_CreateWorkOrderHistTable.sql", "WorkOrder_Hist"), + ["workordercomponent_curr"] = ("021_CreateWorkOrderComponentCurrTable.sql", "WorkOrderComponent_Curr"), + ["workordercomponent_hist"] = ("022_CreateWorkOrderComponentHistTable.sql", "WorkOrderComponent_Hist"), + ["workorderrouting"] = ("023_CreateWorkOrderRoutingTable.sql", "WorkOrderRouting"), + ["workorderstep_curr"] = ("017_CreateWorkOrderStepCurrTable.sql", "WorkOrderStep_Curr"), + ["workorderstep_hist"] = ("018_CreateWorkOrderStepHistTable.sql", "WorkOrderStep_Hist"), + ["workordertime_curr"] = ("019_CreateWorkOrderTimeCurrTable.sql", "WorkOrderTime_Curr"), + ["workordertime_hist"] = ("020_CreateWorkOrderTimeHistTable.sql", "WorkOrderTime_Hist"), +}; + var jsonFiles = Directory.GetFiles(cacheDir, "*.json.zstd"); Console.WriteLine($"Found {jsonFiles.Length} JSON files to convert"); long totalOriginalSize = 0; long totalNewSize = 0; +const int BatchSize = 10000; foreach (var jsonFile in jsonFiles) { @@ -32,40 +69,106 @@ foreach (var jsonFile in jsonFiles) try { - var originalSize = new FileInfo(jsonFile).Length; - totalOriginalSize += originalSize; - - // Read and decompress JSON - using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan); - using var decompressStream = new DecompressionStream(inputFs); - using var bufferedInput = new BufferedStream(decompressStream, 256 * 1024); - - // Parse JSON array into list of dictionaries - var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true }; - var records = JsonSerializer.Deserialize>>(bufferedInput, jsonOptions) - ?? throw new InvalidDataException("Failed to parse JSON array"); - - if (records.Count == 0) + // Look up the SQL script for this file + if (!fileMapping.TryGetValue(baseName, out var mapping)) { - Console.WriteLine("SKIP (empty)"); + Console.WriteLine($"SKIP (no SQL mapping for '{baseName}')"); continue; } - // Create DataTable from records - var dataTable = CreateDataTable(records); + var scriptPath = Path.Combine(scriptsDir, mapping.ScriptFile); + if (!File.Exists(scriptPath)) + { + Console.WriteLine($"SKIP (script not found: {mapping.ScriptFile})"); + continue; + } - // Write protobuf with zstd compression + // Parse schema from SQL script + var schema = ParseSqlSchema(scriptPath, mapping.TableName); + if (schema.Count == 0) + { + Console.WriteLine("SKIP (could not parse schema)"); + continue; + } + + var originalSize = new FileInfo(jsonFile).Length; + totalOriginalSize += originalSize; + + // Create DataTable with schema from SQL + var dataTable = new DataTable(mapping.TableName); + foreach (var (colName, colType) in schema) + { + dataTable.Columns.Add(colName, colType); + } + + // Stream JSON and write to protobuf in batches + using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan); + using var decompressStream = new DecompressionStream(inputFs); using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024); using var compressStream = new CompressionStream(outputFs, level: 3); - using var reader = dataTable.CreateDataReader(); - DataSerializer.Serialize(compressStream, reader); + + int rowCount = 0; + int batchCount = 0; + + // Stream JSON records one at a time + var buffer = new byte[4096]; + using var memoryStream = new MemoryStream(); + + int bytesRead; + while ((bytesRead = decompressStream.Read(buffer, 0, buffer.Length)) > 0) + { + memoryStream.Write(buffer, 0, bytesRead); + } + + memoryStream.Position = 0; + var jsonReader = new Utf8JsonReader(memoryStream.ToArray(), new JsonReaderOptions { AllowTrailingCommas = true }); + + // Skip to start of array + while (jsonReader.Read()) + { + if (jsonReader.TokenType == JsonTokenType.StartArray) + break; + } + + // Read each object in the array + while (jsonReader.Read()) + { + if (jsonReader.TokenType == JsonTokenType.EndArray) + break; + + if (jsonReader.TokenType == JsonTokenType.StartObject) + { + var row = dataTable.NewRow(); + ReadJsonObject(ref jsonReader, row, dataTable); + dataTable.Rows.Add(row); + rowCount++; + + // Write batch when we hit the batch size + if (dataTable.Rows.Count >= BatchSize) + { + using var reader = dataTable.CreateDataReader(); + DataSerializer.Serialize(compressStream, reader); + dataTable.Clear(); + batchCount++; + } + } + } + + // Write remaining rows + if (dataTable.Rows.Count > 0) + { + using var reader = dataTable.CreateDataReader(); + DataSerializer.Serialize(compressStream, reader); + batchCount++; + } + compressStream.Flush(); var newSize = new FileInfo(outputFile).Length; totalNewSize += newSize; var ratio = (double)newSize / originalSize * 100; - Console.WriteLine($"OK ({originalSize:N0} -> {newSize:N0} bytes, {ratio:F1}%)"); + Console.WriteLine($"OK ({rowCount:N0} rows, {batchCount} batches, {originalSize:N0} -> {newSize:N0} bytes, {ratio:F1}%)"); } catch (Exception ex) { @@ -84,74 +187,197 @@ else } return 0; -static DataTable CreateDataTable(List> records) +/// +/// Parse CREATE TABLE statement from SQL script to extract column names and types. +/// +static List<(string Name, Type Type)> ParseSqlSchema(string scriptPath, string tableName) { - var dt = new DataTable(); - var firstRecord = records[0]; + var result = new List<(string Name, Type Type)>(); + var sql = File.ReadAllText(scriptPath); - // Infer column types from first record - foreach (var (key, value) in firstRecord) + // Find CREATE TABLE block - match the table name + var tablePattern = $@"CREATE\s+TABLE\s+\[dbo\]\.\[{Regex.Escape(tableName)}\]\s*\((.*?)\);"; + var tableMatch = Regex.Match(sql, tablePattern, RegexOptions.IgnoreCase | RegexOptions.Singleline); + + if (!tableMatch.Success) { - var colType = InferType(value); - dt.Columns.Add(key, colType); + return result; } - // Add all rows - foreach (var record in records) + var columnsBlock = tableMatch.Groups[1].Value; + + // Parse each column definition + // Pattern: [ColumnName] TYPE [(size)] [NULL|NOT NULL] + var columnPattern = @"\[(\w+)\]\s+(VARCHAR|CHAR|BIGINT|DECIMAL|DATETIME2|BIT|INT)(?:\s*\(([^)]+)\))?"; + var columnMatches = Regex.Matches(columnsBlock, columnPattern, RegexOptions.IgnoreCase); + + foreach (Match match in columnMatches) { - var row = dt.NewRow(); - foreach (DataColumn col in dt.Columns) - { - if (record.TryGetValue(col.ColumnName, out var value)) - { - row[col] = ConvertValue(value, col.DataType); - } - else - { - row[col] = DBNull.Value; - } - } - dt.Rows.Add(row); + var columnName = match.Groups[1].Value; + var sqlType = match.Groups[2].Value.ToUpperInvariant(); + + // Skip CONSTRAINT lines + if (columnName.Equals("CONSTRAINT", StringComparison.OrdinalIgnoreCase)) + continue; + + var netType = MapSqlTypeToNet(sqlType); + result.Add((columnName, netType)); } - return dt; + return result; } -static Type InferType(JsonElement element) => element.ValueKind switch +/// +/// Map SQL Server types to .NET types. +/// +static Type MapSqlTypeToNet(string sqlType) => sqlType.ToUpperInvariant() switch { - JsonValueKind.String when element.GetString() is string s && DateTime.TryParse(s, out _) => typeof(DateTime), - JsonValueKind.String => typeof(string), - JsonValueKind.Number when element.TryGetInt64(out _) => typeof(long), - JsonValueKind.Number => typeof(decimal), - JsonValueKind.True or JsonValueKind.False => typeof(bool), - JsonValueKind.Null => typeof(string), // Default nullable to string + "VARCHAR" => typeof(string), + "CHAR" => typeof(string), + "BIGINT" => typeof(long), + "INT" => typeof(int), + "DECIMAL" => typeof(decimal), + "DATETIME2" => typeof(DateTime), + "BIT" => typeof(bool), _ => typeof(string) }; -static object ConvertValue(JsonElement element, Type targetType) +/// +/// Read a JSON object into a DataRow using streaming reader. +/// +static void ReadJsonObject(ref Utf8JsonReader reader, DataRow row, DataTable table) { - if (element.ValueKind == JsonValueKind.Null) + while (reader.Read()) + { + if (reader.TokenType == JsonTokenType.EndObject) + break; + + if (reader.TokenType == JsonTokenType.PropertyName) + { + var propertyName = reader.GetString()!; + reader.Read(); // Move to value + + // Find matching column (case-insensitive) + DataColumn? column = null; + foreach (DataColumn col in table.Columns) + { + if (col.ColumnName.Equals(propertyName, StringComparison.OrdinalIgnoreCase)) + { + column = col; + break; + } + } + + if (column == null) + { + // Skip unknown property + SkipJsonValue(ref reader); + continue; + } + + row[column] = ReadJsonValue(ref reader, column.DataType); + } + } +} + +/// +/// Read a JSON value and convert to the target .NET type. +/// +static object ReadJsonValue(ref Utf8JsonReader reader, Type targetType) +{ + if (reader.TokenType == JsonTokenType.Null) return DBNull.Value; if (targetType == typeof(string)) - return (object?)element.GetString() ?? DBNull.Value; + { + return reader.TokenType switch + { + JsonTokenType.String => reader.GetString() ?? (object)DBNull.Value, + JsonTokenType.Number => reader.GetDecimal().ToString(), + JsonTokenType.True => "true", + JsonTokenType.False => "false", + _ => DBNull.Value + }; + } if (targetType == typeof(DateTime)) { - var str = element.GetString(); - if (str != null && DateTime.TryParse(str, out var dt)) - return dt; + if (reader.TokenType == JsonTokenType.String) + { + var str = reader.GetString(); + if (str != null && DateTime.TryParse(str, out var dt)) + return dt; + } return DBNull.Value; } if (targetType == typeof(long)) - return element.GetInt64(); + { + return reader.TokenType switch + { + JsonTokenType.Number => reader.GetInt64(), + JsonTokenType.String when long.TryParse(reader.GetString(), out var val) => val, + _ => DBNull.Value + }; + } + + if (targetType == typeof(int)) + { + return reader.TokenType switch + { + JsonTokenType.Number => reader.GetInt32(), + JsonTokenType.String when int.TryParse(reader.GetString(), out var val) => val, + _ => DBNull.Value + }; + } if (targetType == typeof(decimal)) - return element.GetDecimal(); + { + return reader.TokenType switch + { + JsonTokenType.Number => reader.GetDecimal(), + JsonTokenType.String when decimal.TryParse(reader.GetString(), out var val) => val, + _ => DBNull.Value + }; + } if (targetType == typeof(bool)) - return element.GetBoolean(); + { + return reader.TokenType switch + { + JsonTokenType.True => true, + JsonTokenType.False => false, + JsonTokenType.Number => reader.GetInt32() != 0, + JsonTokenType.String => reader.GetString()?.Equals("true", StringComparison.OrdinalIgnoreCase) ?? false, + _ => DBNull.Value + }; + } - return (object?)element.GetString() ?? DBNull.Value; + return DBNull.Value; +} + +/// +/// Skip a JSON value (used for unknown properties). +/// +static void SkipJsonValue(ref Utf8JsonReader reader) +{ + if (reader.TokenType == JsonTokenType.StartObject) + { + int depth = 1; + while (depth > 0 && reader.Read()) + { + if (reader.TokenType == JsonTokenType.StartObject) depth++; + else if (reader.TokenType == JsonTokenType.EndObject) depth--; + } + } + else if (reader.TokenType == JsonTokenType.StartArray) + { + int depth = 1; + while (depth > 0 && reader.Read()) + { + if (reader.TokenType == JsonTokenType.StartArray) depth++; + else if (reader.TokenType == JsonTokenType.EndArray) depth--; + } + } + // Simple values are already consumed by the Read() call }