fix: read file size after streams are closed in converter

This commit is contained in:
Joseph Doherty
2026-01-06 15:39:47 -05:00
parent 6ebd78d487
commit cd68b2c655
+7 -5
View File
@@ -102,15 +102,17 @@ await Parallel.ForEachAsync(jsonFiles, options, async (jsonFile, cancellationTok
dataTable.Columns.Add(colName, colType); dataTable.Columns.Add(colName, colType);
} }
int rowCount = 0;
int batchCount = 0;
// Stream JSON and write to protobuf in batches // Stream JSON and write to protobuf in batches
// Use explicit dispose to ensure file is closed before reading size
{
await using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan | FileOptions.Asynchronous); await using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan | FileOptions.Asynchronous);
await using var decompressStream = new DecompressionStream(inputFs); await using var decompressStream = new DecompressionStream(inputFs);
await using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024, FileOptions.Asynchronous); await using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024, FileOptions.Asynchronous);
await using var compressStream = new CompressionStream(outputFs, level: 10); await using var compressStream = new CompressionStream(outputFs, level: 10);
int rowCount = 0;
int batchCount = 0;
// True streaming: DeserializeAsyncEnumerable streams each array element without loading entire JSON // True streaming: DeserializeAsyncEnumerable streams each array element without loading entire JSON
var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true }; var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true };
await foreach (var element in JsonSerializer.DeserializeAsyncEnumerable<JsonElement>( await foreach (var element in JsonSerializer.DeserializeAsyncEnumerable<JsonElement>(
@@ -140,9 +142,9 @@ await Parallel.ForEachAsync(jsonFiles, options, async (jsonFile, cancellationTok
DataSerializer.Serialize(compressStream, reader); DataSerializer.Serialize(compressStream, reader);
batchCount++; batchCount++;
} }
} // Streams closed here
await compressStream.FlushAsync(cancellationToken); // Read file size after streams are fully closed
var newSize = new FileInfo(outputFile).Length; var newSize = new FileInfo(outputFile).Length;
Interlocked.Add(ref totalNewSize, newSize); Interlocked.Add(ref totalNewSize, newSize);