Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,8 @@
<Compile Include="$(MSBuildThisFileDirectory)System\StringSplitOptions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SystemException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIEncoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIEncodingSealed.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIUtility.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilderCache.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\CodePageDataItem.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Decoder.cs" />
Expand All @@ -775,6 +777,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderReplacementFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.New.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingData.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingNLS.cs" />
Expand Down
4 changes: 2 additions & 2 deletions src/System.Private.CoreLib/shared/System/String.cs
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ internal static unsafe string CreateStringFromEncoding(
Debug.Assert(byteLength >= 0);

// Get our string length
int stringLength = encoding.GetCharCount(bytes, byteLength, null);
int stringLength = encoding.GetCharCount(bytes, byteLength);
Debug.Assert(stringLength >= 0, "stringLength >= 0");

// They gave us an empty string if they needed one
Expand All @@ -491,7 +491,7 @@ internal static unsafe string CreateStringFromEncoding(
string s = FastAllocateString(stringLength);
fixed (char* pTempChars = &s._firstChar)
{
int doubleCheck = encoding.GetChars(bytes, byteLength, pTempChars, stringLength, null);
int doubleCheck = encoding.GetChars(bytes, byteLength, pTempChars, stringLength);
Debug.Assert(stringLength == doubleCheck,
"Expected encoding.GetChars to return same length as encoding.GetCharCount");
}
Expand Down
1,061 changes: 444 additions & 617 deletions src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs

Large diffs are not rendered by default.

185 changes: 185 additions & 0 deletions src/System.Private.CoreLib/shared/System/Text/ASCIIEncodingSealed.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Text
{
public partial class ASCIIEncoding
{
// This specialized sealed type has two benefits:
// 1) it allows for devirtualization (see https://github.com/dotnet/coreclr/pull/9230), and
// 2) it allows us to provide highly optimized implementations of certain routines because
// we can make assumptions about the fallback mechanisms in use (in particular, always
// replace with "?").
internal sealed class ASCIIEncodingSealed : ASCIIEncoding
{
public override object Clone()
{
// The base implementation of Encoding.Clone calls object.MemberwiseClone and marks the new object mutable.
// We don't want to do this because it violates the invariants we have set for the sealed type.
// Instead, we'll create a new instance of the base ASCIIEncoding type and mark it mutable.

return new ASCIIEncoding()
{
IsReadOnly = false
};
}

public override unsafe int GetByteCount(char* chars, int count)
{
// Validate Parameters

if (chars == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
}

if (count < 0)
{
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
}

// There's a 1:1 mapping from ASCII text (as chars) to ASCII bytes. Even if there's
// invalid data in the incoming array, the particular fallback used by this sealed
// instance ensures that all non-ASCII chars (even surrogate halves) are replaced
// by a single ASCII "?" byte. So as an optimization we can simply reflect the incoming
// char count back to the caller.

return count;
}

public override int GetByteCount(char[] chars)
{
// Validate parameters

if (chars is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
}

// See comment in GetByteCount(char*, int).

return chars.Length;
}

public override int GetByteCount(char[] chars, int index, int count)
{
// Validate input parameters

if (chars is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
}

if ((index | count) < 0)
{
ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
}

if (chars.Length - index < count)
{
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
}

// See comment in GetByteCount(char*, int).

return count;
}

public override int GetByteCount(ReadOnlySpan<char> chars)
{
// See comment in GetByteCount(char*, int).

return chars.Length;
}

public override int GetByteCount(string chars)
{
// Validate input parameters

if (chars is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
}

// See comment in GetByteCount(char*, int).

return chars.Length;
}

public override unsafe int GetCharCount(byte* bytes, int count)
{
// Validate Parameters

if (bytes == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes);
}

if (count < 0)
{
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
}

// See comment in GetByteCount(char*, int) for a description of the chars -> bytes mapping.
// This mapping also works the other way, where single ASCII bytes expand to single ASCII
// chars. If we encounter a non-ASCII byte, the particular fallback used by this sealed
// instance ensures that it's replaced by a single ASCII "?" char in any generated string.
// So as an optimization we can simply reflect the incoming byte count back to the caller.

return count;
}

public override int GetCharCount(byte[] bytes)
{
// Validate parameters

if (bytes is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes);
}

// See comment in GetCharCount(byte*, int).

return bytes.Length;
}

public override int GetCharCount(byte[] bytes, int index, int count)
{
// Validate input parameters

if (bytes is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes);
}

if ((index | count) < 0)
{
ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
}

if (bytes.Length - index < count)
{
ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
}

// See comment in GetCharCount(byte*, int).

return count;
}

public override int GetCharCount(ReadOnlySpan<byte> bytes)
{
// See comment in GetCharCount(byte*, int).

return bytes.Length;
}
}
}
}
70 changes: 70 additions & 0 deletions src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

namespace System.Text
{
/*
* Contains naive unoptimized (non-SIMD) implementations of ASCII transcoding
* operations. Vectorized methods can be substituted here as a drop-in replacement.
*/

internal unsafe static class ASCIIUtility
{
public static uint GetIndexOfFirstNonAsciiByte(byte* pBytes, uint byteCount)
{
uint idx = 0;
for (; idx < byteCount; idx++)
{
if ((sbyte)pBytes[idx] < 0)
{
break;
}
}
return idx;
}

public static uint GetIndexOfFirstNonAsciiChar(char* pChars, uint charCount)
{
uint idx = 0;
for (; idx < charCount; idx++)
{
if (pChars[idx] > 0x7Fu)
{
break;
}
}
return idx;
}

public static uint NarrowUtf16ToAscii(char* pChars, byte* pBytes, uint elementCount)
{
uint idx = 0;
for (; idx < elementCount; idx++)
{
uint ch = pChars[idx];
if (ch > 0x7Fu)
{
break;
}
pBytes[idx] = (byte)ch;
}
return idx;
}

public static uint WidenAsciiToUtf16(byte* pBytes, char* pChars, uint elementCount)
{
uint idx = 0;
for (; idx < elementCount; idx++)
{
byte b = pBytes[idx];
if (b > 0x7F)
{
break;
}
pChars[idx] = (char)b;
}
return idx;
}
}
}
Loading