diff --git a/Tools/CacheConverter/Program.cs b/Tools/CacheConverter/Program.cs index 8667ee7..59719d9 100644 --- a/Tools/CacheConverter/Program.cs +++ b/Tools/CacheConverter/Program.cs @@ -102,47 +102,49 @@ await Parallel.ForEachAsync(jsonFiles, options, async (jsonFile, cancellationTok dataTable.Columns.Add(colName, colType); } - // Stream JSON and write to protobuf in batches - await using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan | FileOptions.Asynchronous); - await using var decompressStream = new DecompressionStream(inputFs); - await using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024, FileOptions.Asynchronous); - await using var compressStream = new CompressionStream(outputFs, level: 10); - int rowCount = 0; int batchCount = 0; - // True streaming: DeserializeAsyncEnumerable streams each array element without loading entire JSON - var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true }; - await foreach (var element in JsonSerializer.DeserializeAsyncEnumerable( - decompressStream, - jsonOptions, - cancellationToken)) + // Stream JSON and write to protobuf in batches + // Use explicit dispose to ensure file is closed before reading size { - var row = dataTable.NewRow(); - ReadJsonElement(element, row, dataTable); - dataTable.Rows.Add(row); - rowCount++; + await using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan | FileOptions.Asynchronous); + await using var decompressStream = new DecompressionStream(inputFs); + await using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024, FileOptions.Asynchronous); + await using var compressStream = new CompressionStream(outputFs, level: 10); - // Write batch when we hit the batch size - if (dataTable.Rows.Count >= BatchSize) + // True streaming: DeserializeAsyncEnumerable streams each array element without loading entire JSON + var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true }; + await foreach (var element in JsonSerializer.DeserializeAsyncEnumerable( + decompressStream, + jsonOptions, + cancellationToken)) + { + var row = dataTable.NewRow(); + ReadJsonElement(element, row, dataTable); + dataTable.Rows.Add(row); + rowCount++; + + // Write batch when we hit the batch size + if (dataTable.Rows.Count >= BatchSize) + { + using var reader = dataTable.CreateDataReader(); + DataSerializer.Serialize(compressStream, reader); + dataTable.Clear(); + batchCount++; + } + } + + // Write remaining rows + if (dataTable.Rows.Count > 0) { using var reader = dataTable.CreateDataReader(); DataSerializer.Serialize(compressStream, reader); - dataTable.Clear(); batchCount++; } - } - - // Write remaining rows - if (dataTable.Rows.Count > 0) - { - using var reader = dataTable.CreateDataReader(); - DataSerializer.Serialize(compressStream, reader); - batchCount++; - } - - await compressStream.FlushAsync(cancellationToken); + } // Streams closed here + // Read file size after streams are fully closed var newSize = new FileInfo(outputFile).Length; Interlocked.Add(ref totalNewSize, newSize);