fix: read file size after streams are closed in converter
This commit is contained in:
@@ -102,15 +102,17 @@ await Parallel.ForEachAsync(jsonFiles, options, async (jsonFile, cancellationTok
|
||||
dataTable.Columns.Add(colName, colType);
|
||||
}
|
||||
|
||||
int rowCount = 0;
|
||||
int batchCount = 0;
|
||||
|
||||
// Stream JSON and write to protobuf in batches
|
||||
// Use explicit dispose to ensure file is closed before reading size
|
||||
{
|
||||
await using var inputFs = new FileStream(jsonFile, FileMode.Open, FileAccess.Read, FileShare.Read, 256 * 1024, FileOptions.SequentialScan | FileOptions.Asynchronous);
|
||||
await using var decompressStream = new DecompressionStream(inputFs);
|
||||
await using var outputFs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None, 256 * 1024, FileOptions.Asynchronous);
|
||||
await using var compressStream = new CompressionStream(outputFs, level: 10);
|
||||
|
||||
int rowCount = 0;
|
||||
int batchCount = 0;
|
||||
|
||||
// True streaming: DeserializeAsyncEnumerable streams each array element without loading entire JSON
|
||||
var jsonOptions = new JsonSerializerOptions { PropertyNameCaseInsensitive = true };
|
||||
await foreach (var element in JsonSerializer.DeserializeAsyncEnumerable<JsonElement>(
|
||||
@@ -140,9 +142,9 @@ await Parallel.ForEachAsync(jsonFiles, options, async (jsonFile, cancellationTok
|
||||
DataSerializer.Serialize(compressStream, reader);
|
||||
batchCount++;
|
||||
}
|
||||
} // Streams closed here
|
||||
|
||||
await compressStream.FlushAsync(cancellationToken);
|
||||
|
||||
// Read file size after streams are fully closed
|
||||
var newSize = new FileInfo(outputFile).Length;
|
||||
Interlocked.Add(ref totalNewSize, newSize);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user