diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index c2fb9889093938..8cee324b5162dc 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Diagnostics; using System.Globalization; using System.Runtime.CompilerServices; @@ -12,216 +13,194 @@ namespace System // The idea is to stay with static helper methods and strings internal static class DomainNameHelper { + // Regular ascii dot '.' + // IDEOGRAPHIC FULL STOP '\u3002' + // FULLWIDTH FULL STOP '\uFF0E' + // HALFWIDTH IDEOGRAPHIC FULL STOP '\uFF61' + // Using IndexOfAnyValues isn't beneficial here as it would defer to IndexOfAny(char, char, char, char) anyway + private const string IriDotCharacters = ".\u3002\uFF0E\uFF61"; + + // The Unicode specification allows certain code points to be normalized not to + // punycode, but to ASCII representations that retain the same meaning. For example, + // the codepoint U+00BC "Vulgar Fraction One Quarter" is normalized to '1/4' rather + // than being punycoded. + // + // This means that a host containing Unicode characters can be normalized to contain + // URI reserved characters, changing the meaning of a URI only when certain properties + // such as IdnHost are accessed. To be safe, disallow control characters in normalized hosts. + private static readonly IndexOfAnyValues s_unsafeForNormalizedHostChars = + IndexOfAnyValues.Create(@"\/?@#:[]"); + + // Takes into account the additional legal domain name characters '-' and '_' + // Note that '_' char is formally invalid but is historically in use, especially on corpnets + private static readonly IndexOfAnyValues s_validChars = + IndexOfAnyValues.Create("-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz."); + + // For IRI, we're accepting anything non-ascii, so invert the condition to just check for invalid ascii characters + private static readonly IndexOfAnyValues s_iriInvalidAsciiChars = IndexOfAnyValues.Create( + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" + + "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + + " !\"#$%&'()*+,/:;<=>?@[\\]^`{|}~\u007F"); + + private static readonly IndexOfAnyValues s_asciiLetterUpperOrColonChars = + IndexOfAnyValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZ:"); + private static readonly IdnMapping s_idnMapping = new IdnMapping(); - internal const string Localhost = "localhost"; - internal const string Loopback = "loopback"; + private const string Localhost = "localhost"; + private const string Loopback = "loopback"; + + // TODO https://github.com/dotnet/runtime/issues/28230: Replace once Ascii is available + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsAscii(ReadOnlySpan chars) => + chars.IndexOfAnyExceptInRange((char)0, (char)127) < 0; internal static string ParseCanonicalName(string str, int start, int end, ref bool loopback) { - string? res = null; - - for (int i = end - 1; i >= start; --i) + // Do a quick search for the colon or uppercase letters + int index = str.AsSpan(start, end - start).LastIndexOfAny(s_asciiLetterUpperOrColonChars); + if (index >= 0) { - if (char.IsAsciiLetterUpper(str[i])) + Debug.Assert(!str.AsSpan(start, index).Contains(':'), + "A colon should appear at most once, and must never be followed by letters."); + + if (str[start + index] == ':') { - res = str.Substring(start, end - start).ToLowerInvariant(); - break; + // Shrink the slice to only include chars before the colon + end = start + index; + + // Look for uppercase letters again. + // The index value doesn't matter anymore (nor does the search direction), just whether we've found anything + index = str.AsSpan(start, index).IndexOfAnyInRange('A', 'Z'); } - if (str[i] == ':') - end = i; } - res ??= str.Substring(start, end - start); + Debug.Assert(index == -1 || char.IsAsciiLetterUpper(str[start + index])); + + if (index >= 0) + { + // We saw uppercase letters. Avoid allocating both the substring and the lower-cased variant. + return string.Create(end - start, (str, start), static (buffer, state) => + { + int newLength = state.str.AsSpan(state.start, buffer.Length).ToLowerInvariant(buffer); + Debug.Assert(newLength == buffer.Length); + }); + } + + string res = str.Substring(start, end - start); - if (res == Localhost || res == Loopback) + if (res is Localhost or Loopback) { loopback = true; return Localhost; } + return res; } - // - // IsValid - // - // Determines whether a string is a valid domain name - // - // subdomain ->