Skip to content

Commit d1b3816

Browse files
Minor File.ReadAllBytes* improvements (#61519)
* switch from FileStream to RandomAccess * use Array.MaxLength as a limit for File.ReadAllBytes and fix an edge case bug for files: Array.MaxLength < Length < int.MaxValue * there is no gain of using FileOptions.SequentialScan on Unix, as it requires an additional sys call Co-authored-by: Dan Moseley <[email protected]>
1 parent 10e107d commit d1b3816

File tree

3 files changed

+68
-41
lines changed

3 files changed

+68
-41
lines changed

src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,21 @@ public void ReadFileOver2GB()
7272
Assert.Throws<IOException>(() => File.ReadAllBytes(path));
7373
}
7474

75+
[Fact]
76+
[OuterLoop]
77+
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
78+
public void ReadFileOverMaxArrayLength()
79+
{
80+
string path = GetTestFilePath();
81+
using (FileStream fs = File.Create(path))
82+
{
83+
fs.SetLength(Array.MaxLength + 1L);
84+
}
85+
86+
// File is too large for ReadAllBytes at once
87+
Assert.Throws<IOException>(() => File.ReadAllBytes(path));
88+
}
89+
7590
[Fact]
7691
public void Overwrite()
7792
{

src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytesAsync.cs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,31 @@ public Task AlreadyCanceledAsync()
7373
[Fact]
7474
[OuterLoop]
7575
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
76-
public Task ReadFileOver2GBAsync()
76+
public async Task ReadFileOver2GBAsync()
7777
{
7878
string path = GetTestFilePath();
7979
using (FileStream fs = File.Create(path))
8080
{
8181
fs.SetLength(int.MaxValue + 1L);
8282
}
8383

84-
// File is too large for ReadAllBytes at once
85-
return Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
84+
// File is too large for ReadAllBytesAsync at once
85+
await Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
86+
}
87+
88+
[Fact]
89+
[OuterLoop]
90+
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
91+
public async Task ReadFileOverMaxArrayLengthAsync()
92+
{
93+
string path = GetTestFilePath();
94+
using (FileStream fs = File.Create(path))
95+
{
96+
fs.SetLength(Array.MaxLength + 1L);
97+
}
98+
99+
// File is too large for ReadAllBytesAsync at once
100+
await Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
86101
}
87102

88103
[Fact]

src/libraries/System.Private.CoreLib/src/System/IO/File.cs

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -251,27 +251,33 @@ public static void WriteAllText(string path, string? contents, Encoding encoding
251251

252252
public static byte[] ReadAllBytes(string path)
253253
{
254-
// bufferSize == 1 used to avoid unnecessary buffer in FileStream
255-
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, FileOptions.SequentialScan))
254+
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
255+
FileOptions options = OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None;
256+
using (SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options))
256257
{
257258
long fileLength = 0;
258-
if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue)
259+
if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength)
259260
{
260261
throw new IOException(SR.IO_FileTooLong2GB);
261262
}
263+
264+
#if DEBUG
265+
fileLength = 0; // improve the test coverage for ReadAllBytesUnknownLength
266+
#endif
267+
262268
if (fileLength == 0)
263269
{
264-
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there is non-seekable file stream.
270+
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there are non-seekable files.
265271
// Thus we need to assume 0 doesn't mean empty.
266-
return ReadAllBytesUnknownLength(fs);
272+
return ReadAllBytesUnknownLength(sfh);
267273
}
268274

269275
int index = 0;
270276
int count = (int)fileLength;
271277
byte[] bytes = new byte[count];
272278
while (count > 0)
273279
{
274-
int n = fs.Read(bytes, index, count);
280+
int n = RandomAccess.ReadAtOffset(sfh, bytes.AsSpan(index, count), index);
275281
if (n == 0)
276282
{
277283
ThrowHelper.ThrowEndOfFileException();
@@ -519,44 +525,35 @@ private static async Task<string> InternalReadAllTextAsync(string path, Encoding
519525
return Task.FromCanceled<byte[]>(cancellationToken);
520526
}
521527

522-
var fs = new FileStream(
523-
path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, // bufferSize == 1 used to avoid unnecessary buffer in FileStream
524-
FileOptions.Asynchronous | FileOptions.SequentialScan);
528+
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
529+
FileOptions options = FileOptions.Asynchronous | (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None);
530+
SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options);
525531

526-
bool returningInternalTask = false;
527-
try
532+
long fileLength = 0L;
533+
if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength)
528534
{
529-
long fileLength = 0L;
530-
if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue)
531-
{
532-
var e = new IOException(SR.IO_FileTooLong2GB);
533-
ExceptionDispatchInfo.SetCurrentStackTrace(e);
534-
return Task.FromException<byte[]>(e);
535-
}
536-
537-
returningInternalTask = true;
538-
return fileLength > 0 ?
539-
InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken) :
540-
InternalReadAllBytesUnknownLengthAsync(fs, cancellationToken);
541-
}
542-
finally
543-
{
544-
if (!returningInternalTask)
545-
{
546-
fs.Dispose();
547-
}
535+
sfh.Dispose();
536+
return Task.FromException<byte[]>(ExceptionDispatchInfo.SetCurrentStackTrace(new IOException(SR.IO_FileTooLong2GB)));
548537
}
538+
539+
#if DEBUG
540+
fileLength = 0; // improve the test coverage for InternalReadAllBytesUnknownLengthAsync
541+
#endif
542+
543+
return fileLength > 0 ?
544+
InternalReadAllBytesAsync(sfh, (int)fileLength, cancellationToken) :
545+
InternalReadAllBytesUnknownLengthAsync(sfh, cancellationToken);
549546
}
550547

551-
private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int count, CancellationToken cancellationToken)
548+
private static async Task<byte[]> InternalReadAllBytesAsync(SafeFileHandle sfh, int count, CancellationToken cancellationToken)
552549
{
553-
using (fs)
550+
using (sfh)
554551
{
555552
int index = 0;
556553
byte[] bytes = new byte[count];
557554
do
558555
{
559-
int n = await fs.ReadAsync(new Memory<byte>(bytes, index, count - index), cancellationToken).ConfigureAwait(false);
556+
int n = await RandomAccess.ReadAtOffsetAsync(sfh, bytes.AsMemory(index), index, cancellationToken).ConfigureAwait(false);
560557
if (n == 0)
561558
{
562559
ThrowHelper.ThrowEndOfFileException();
@@ -569,7 +566,7 @@ private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int c
569566
}
570567
}
571568

572-
private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStream fs, CancellationToken cancellationToken)
569+
private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(SafeFileHandle sfh, CancellationToken cancellationToken)
573570
{
574571
byte[] rentedArray = ArrayPool<byte>.Shared.Rent(512);
575572
try
@@ -595,7 +592,7 @@ private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStr
595592
}
596593

597594
Debug.Assert(bytesRead < rentedArray.Length);
598-
int n = await fs.ReadAsync(rentedArray.AsMemory(bytesRead), cancellationToken).ConfigureAwait(false);
595+
int n = await RandomAccess.ReadAtOffsetAsync(sfh, rentedArray.AsMemory(bytesRead), bytesRead, cancellationToken).ConfigureAwait(false);
599596
if (n == 0)
600597
{
601598
return rentedArray.AsSpan(0, bytesRead).ToArray();
@@ -605,7 +602,7 @@ private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStr
605602
}
606603
finally
607604
{
608-
fs.Dispose();
605+
sfh.Dispose();
609606
ArrayPool<byte>.Shared.Return(rentedArray);
610607
}
611608
}
@@ -775,7 +772,7 @@ private static void Validate(string path, Encoding encoding)
775772
throw new ArgumentException(SR.Argument_EmptyPath, nameof(path));
776773
}
777774

778-
private static byte[] ReadAllBytesUnknownLength(FileStream fs)
775+
private static byte[] ReadAllBytesUnknownLength(SafeFileHandle sfh)
779776
{
780777
byte[]? rentedArray = null;
781778
Span<byte> buffer = stackalloc byte[512];
@@ -803,7 +800,7 @@ private static byte[] ReadAllBytesUnknownLength(FileStream fs)
803800
}
804801

805802
Debug.Assert(bytesRead < buffer.Length);
806-
int n = fs.Read(buffer.Slice(bytesRead));
803+
int n = RandomAccess.ReadAtOffset(sfh, buffer.Slice(bytesRead), bytesRead);
807804
if (n == 0)
808805
{
809806
return buffer.Slice(0, bytesRead).ToArray();

0 commit comments

Comments
 (0)