This repository was archived by the owner on Jan 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Refactor Encoding to split fast-path and fallback logic #23098
Merged
GrabYourPitchforks
merged 15 commits into
dotnet:master
from
GrabYourPitchforks:ascii_7
Mar 11, 2019
Merged
Changes from 3 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
93d1a7c
Refactor Encoding to split fast-path and fallback logic
GrabYourPitchforks 432173f
Remove direct dependency on JitHelpers
GrabYourPitchforks a088a2f
PR feedback - also slightly optimize EncoderNLS draining
GrabYourPitchforks 2e9ac17
Suppress failures in System.Text.Encoding.Tests
GrabYourPitchforks 6402df3
Remove unused fields from EncoderNLS
GrabYourPitchforks 66cb897
Move some GetBytes implementations around
GrabYourPitchforks 44a0ffd
NoInline ASCIIUtility, as inlining was throwing off benchmarks
GrabYourPitchforks 3588a2a
Remove move of ASCIIEncodingSealed and move it back into ASCIIEncoding
GrabYourPitchforks c98d25f
Rename GetBytes -> EncodeRune, add devdoc, #ifdef away non-shipping code
GrabYourPitchforks 2545ee2
Perform same refactoring for GetByteCount we had for GetBytes
GrabYourPitchforks d834abc
Refactor GetCharCount as with other methods
GrabYourPitchforks 93a80d5
Refactor GetChars same as previous functions
GrabYourPitchforks d02e3e4
Misc PR feedback: cleanup comments, delete dead methods
GrabYourPitchforks 0333dff
Minor perf optimizations to fallback logic
GrabYourPitchforks 96b3b94
Rename Encoding.New.cs -> Encoding.Internal.cs
GrabYourPitchforks File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1,061 changes: 444 additions & 617 deletions
1,061
src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs
Large diffs are not rendered by default.
Oops, something went wrong.
185 changes: 185 additions & 0 deletions
185
src/System.Private.CoreLib/shared/System/Text/ASCIIEncodingSealed.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,185 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
|
|
||
| using System; | ||
| using System.Buffers; | ||
| using System.Diagnostics; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Runtime.InteropServices; | ||
|
|
||
| namespace System.Text | ||
| { | ||
| public partial class ASCIIEncoding | ||
| { | ||
| // This specialized sealed type has two benefits: | ||
| // 1) it allows for devirtualization (see https://github.com/dotnet/coreclr/pull/9230), and | ||
| // 2) it allows us to provide highly optimized implementations of certain routines because | ||
| // we can make assumptions about the fallback mechanisms in use (in particular, always | ||
| // replace with "?"). | ||
| internal sealed class ASCIIEncodingSealed : ASCIIEncoding | ||
| { | ||
| public override object Clone() | ||
| { | ||
| // The base implementation of Encoding.Clone calls object.MemberwiseClone and marks the new object mutable. | ||
| // We don't want to do this because it violates the invariants we have set for the sealed type. | ||
| // Instead, we'll create a new instance of the base ASCIIEncoding type and mark it mutable. | ||
|
|
||
| return new ASCIIEncoding() | ||
| { | ||
| IsReadOnly = false | ||
| }; | ||
| } | ||
|
|
||
| public override unsafe int GetByteCount(char* chars, int count) | ||
| { | ||
| // Validate Parameters | ||
|
|
||
| if (chars == null) | ||
| { | ||
| ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars); | ||
| } | ||
|
|
||
| if (count < 0) | ||
| { | ||
| ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); | ||
| } | ||
|
|
||
| // There's a 1:1 mapping from ASCII text (as chars) to ASCII bytes. Even if there's | ||
| // invalid data in the incoming array, the particular fallback used by this sealed | ||
| // instance ensures that all non-ASCII chars (even surrogate halves) are replaced | ||
| // by a single ASCII "?" byte. So as an optimization we can simply reflect the incoming | ||
| // char count back to the caller. | ||
|
|
||
| return count; | ||
| } | ||
|
|
||
| public override int GetByteCount(char[] chars) | ||
| { | ||
| // Validate parameters | ||
|
|
||
| if (chars is null) | ||
| { | ||
| ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars); | ||
| } | ||
|
|
||
| // See comment in GetByteCount(char*, int). | ||
|
|
||
| return chars.Length; | ||
| } | ||
|
|
||
| public override int GetByteCount(char[] chars, int index, int count) | ||
| { | ||
| // Validate input parameters | ||
|
|
||
| if (chars is null) | ||
| { | ||
| ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars); | ||
| } | ||
|
|
||
| if ((index | count) < 0) | ||
| { | ||
| ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); | ||
| } | ||
|
|
||
| if (chars.Length - index < count) | ||
| { | ||
| ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); | ||
| } | ||
|
|
||
| // See comment in GetByteCount(char*, int). | ||
|
|
||
| return count; | ||
| } | ||
|
|
||
| public override int GetByteCount(ReadOnlySpan<char> chars) | ||
| { | ||
| // See comment in GetByteCount(char*, int). | ||
|
|
||
| return chars.Length; | ||
| } | ||
|
|
||
| public override int GetByteCount(string chars) | ||
| { | ||
| // Validate input parameters | ||
|
|
||
| if (chars is null) | ||
| { | ||
| ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars); | ||
| } | ||
|
|
||
| // See comment in GetByteCount(char*, int). | ||
|
|
||
| return chars.Length; | ||
| } | ||
|
|
||
| public override unsafe int GetCharCount(byte* bytes, int count) | ||
| { | ||
| // Validate Parameters | ||
|
|
||
| if (bytes == null) | ||
| { | ||
| ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes); | ||
| } | ||
|
|
||
| if (count < 0) | ||
| { | ||
| ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); | ||
| } | ||
|
|
||
| // See comment in GetByteCount(char*, int) for a description of the chars -> bytes mapping. | ||
| // This mapping also works the other way, where single ASCII bytes expand to single ASCII | ||
| // chars. If we encounter a non-ASCII byte, the particular fallback used by this sealed | ||
| // instance ensures that it's replaced by a single ASCII "?" char in any generated string. | ||
| // So as an optimization we can simply reflect the incoming byte count back to the caller. | ||
|
|
||
| return count; | ||
| } | ||
|
|
||
| public override int GetCharCount(byte[] bytes) | ||
| { | ||
| // Validate parameters | ||
|
|
||
| if (bytes is null) | ||
| { | ||
| ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes); | ||
| } | ||
|
|
||
| // See comment in GetCharCount(byte*, int). | ||
|
|
||
| return bytes.Length; | ||
| } | ||
|
|
||
| public override int GetCharCount(byte[] bytes, int index, int count) | ||
| { | ||
| // Validate input parameters | ||
|
|
||
| if (bytes is null) | ||
| { | ||
| ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes); | ||
| } | ||
|
|
||
| if ((index | count) < 0) | ||
| { | ||
| ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum); | ||
| } | ||
|
|
||
| if (bytes.Length - index < count) | ||
| { | ||
| ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer); | ||
| } | ||
|
|
||
| // See comment in GetCharCount(byte*, int). | ||
|
|
||
| return count; | ||
| } | ||
|
|
||
| public override int GetCharCount(ReadOnlySpan<byte> bytes) | ||
| { | ||
| // See comment in GetCharCount(byte*, int). | ||
|
|
||
| return bytes.Length; | ||
| } | ||
| } | ||
| } | ||
| } | ||
70 changes: 70 additions & 0 deletions
70
src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
| // See the LICENSE file in the project root for more information. | ||
|
|
||
| namespace System.Text | ||
| { | ||
| /* | ||
| * Contains naive unoptimized (non-SIMD) implementations of ASCII transcoding | ||
| * operations. Vectorized methods can be substituted here as a drop-in replacement. | ||
| */ | ||
|
|
||
| internal unsafe static class ASCIIUtility | ||
| { | ||
| public static uint GetIndexOfFirstNonAsciiByte(byte* pBytes, uint byteCount) | ||
| { | ||
| uint idx = 0; | ||
| for (; idx < byteCount; idx++) | ||
| { | ||
| if ((sbyte)pBytes[idx] < 0) | ||
| { | ||
| break; | ||
| } | ||
| } | ||
| return idx; | ||
| } | ||
|
|
||
| public static uint GetIndexOfFirstNonAsciiChar(char* pChars, uint charCount) | ||
| { | ||
| uint idx = 0; | ||
| for (; idx < charCount; idx++) | ||
| { | ||
| if (pChars[idx] > 0x7Fu) | ||
| { | ||
| break; | ||
| } | ||
| } | ||
| return idx; | ||
| } | ||
|
|
||
| public static uint NarrowUtf16ToAscii(char* pChars, byte* pBytes, uint elementCount) | ||
| { | ||
| uint idx = 0; | ||
| for (; idx < elementCount; idx++) | ||
| { | ||
| uint ch = pChars[idx]; | ||
| if (ch > 0x7Fu) | ||
| { | ||
| break; | ||
| } | ||
| pBytes[idx] = (byte)ch; | ||
| } | ||
| return idx; | ||
| } | ||
|
|
||
| public static uint WidenAsciiToUtf16(byte* pBytes, char* pChars, uint elementCount) | ||
| { | ||
| uint idx = 0; | ||
| for (; idx < elementCount; idx++) | ||
| { | ||
| byte b = pBytes[idx]; | ||
| if (b > 0x7F) | ||
| { | ||
| break; | ||
| } | ||
| pChars[idx] = (char)b; | ||
| } | ||
| return idx; | ||
| } | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.