Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 0d0f32d

Browse files
committed
Rewrite Uri.EscapeString
Several public methods (Uri.EscapeDataString, Uri.EscapeUriString) and a bunch of internal call sites rely on the internal EscapeString helper. This helper has several issues with it: - It uses unsafe code. - It unnecessarily requires and copies through a char[] to get to a string when a string is the required result. - It has a lot of complexity around the handling of unicode. This PR rewrites it to utilize Span, Rune, and other newer features in a way that enables it to be both safe and efficient. Most inputs ends up being faster, and for very long inputs, it's much, much faster. The use of ValueStringBuilder also results in less memory allocation, in some cases significantly. The use of Rune also fixes two arguable bugs in the existing implementation around invalid Unicode sequences, which is why a couple tests were tweaked: - Some but not all invalid unicode patterns result in replacement characters being used: a few invalid sequences (e.g. just a high surrogate) result in an exception. We should be standardized on using replacement characters for all such invalid sequences. - Some patterns with invalid unicode patterns actually result in unnecessary encoding, e.g. `Uri.EscapeDataString("\uD800\uD800a")` results in `a` being encoded.
1 parent faefe04 commit 0d0f32d

File tree

7 files changed

+255
-260
lines changed

7 files changed

+255
-260
lines changed

src/System.Private.Uri/src/System.Private.Uri.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
<Compile Include="System\UriPartial.cs" />
3838
<Compile Include="System\UriScheme.cs" />
3939
<Compile Include="System\UriSyntax.cs" />
40+
<Compile Include="$(CommonPath)\CoreLib\System\Text\ValueStringBuilder.cs">
41+
<Link>Common\CoreLib\System\Text\ValueStringBuilder.cs</Link>
42+
</Compile>
4043
</ItemGroup>
4144
<ItemGroup Condition="'$(TargetsWindows)' == 'true'">
4245
<Compile Include="System\Uri.Windows.cs" />

src/System.Private.Uri/src/System/Uri.cs

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,8 +1373,8 @@ public static string HexEscape(char character)
13731373
return string.Create(3, character, (Span<char> chars, char c) =>
13741374
{
13751375
chars[0] = '%';
1376-
chars[1] = UriHelper.s_hexUpperChars[(c & 0xf0) >> 4];
1377-
chars[2] = UriHelper.s_hexUpperChars[c & 0xf];
1376+
chars[1] = (char)UriHelper.HexUpperChars[(c & 0xf0) >> 4];
1377+
chars[2] = (char)UriHelper.HexUpperChars[c & 0xf];
13781378
});
13791379
}
13801380

@@ -1897,18 +1897,9 @@ private static bool CheckForColonInFirstPathSegment(string uriString)
18971897
return (index >= 0 && uriString[index] == ':');
18981898
}
18991899

1900-
internal static unsafe string InternalEscapeString(string rawString)
1901-
{
1902-
if ((object)rawString == null)
1903-
return string.Empty;
1904-
1905-
int position = 0;
1906-
char[]? dest = UriHelper.EscapeString(rawString, 0, rawString.Length, null, ref position, true, '?', '#', '%');
1907-
if ((object?)dest == null)
1908-
return rawString;
1909-
1910-
return new string(dest, 0, position);
1911-
}
1900+
internal static unsafe string InternalEscapeString(string rawString) =>
1901+
rawString is null ? string.Empty :
1902+
UriHelper.EscapeString(rawString, checkExistingEscaped: true, UriHelper.UnreservedReservedTable, '?', '#');
19121903

19131904
//
19141905
// This method is called first to figure out the scheme or a simple file path
@@ -2469,11 +2460,7 @@ private unsafe void CreateHostString()
24692460
flags |= Flags.E_HostNotCanonical;
24702461
if (NotAny(Flags.UserEscaped))
24712462
{
2472-
int position = 0;
2473-
char[]? dest = UriHelper.EscapeString(host, 0, host.Length, null, ref position, true, '?',
2474-
'#', IsImplicitFile ? c_DummyChar : '%');
2475-
if ((object?)dest != null)
2476-
host = new string(dest, 0, position);
2463+
host = UriHelper.EscapeString(host, checkExistingEscaped: !IsImplicitFile, UriHelper.UnreservedReservedTable, '?', '#');
24772464
}
24782465
else
24792466
{
@@ -2771,8 +2758,10 @@ private string ReCreateParts(UriComponents parts, ushort nonCanonical, UriFormat
27712758
case UriFormat.UriEscaped:
27722759
if (NotAny(Flags.UserEscaped))
27732760
{
2774-
chars = UriHelper.EscapeString(_string, _info.Offset.User, _info.Offset.Host, chars,
2775-
ref count, true, '?', '#', '%')!; // TODO-NULLABLE: Remove ! when [NotNullIfNotNull] respected
2761+
chars = UriHelper.EscapeString(
2762+
_string.AsSpan(_info.Offset.User, _info.Offset.Host - _info.Offset.User),
2763+
chars, ref count,
2764+
checkExistingEscaped: true, '?', '#');
27762765
}
27772766
else
27782767
{
@@ -2938,8 +2927,12 @@ private string ReCreateParts(UriComponents parts, ushort nonCanonical, UriFormat
29382927
case UriFormat.UriEscaped:
29392928
//Can Assert IsImplicitfile == false
29402929
if (NotAny(Flags.UserEscaped))
2941-
chars = UriHelper.EscapeString(_string, delimiterAwareIndex, _info.Offset.Fragment, chars,
2942-
ref count, true, '#', c_DummyChar, '%')!; // TODO-NULLABLE: Remove ! when [NotNullIfNotNull] respected
2930+
{
2931+
chars = UriHelper.EscapeString(
2932+
_string.AsSpan(delimiterAwareIndex, _info.Offset.Fragment - delimiterAwareIndex),
2933+
chars, ref count,
2934+
checkExistingEscaped: true, '#');
2935+
}
29432936
else
29442937
{
29452938
UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.Fragment, chars,
@@ -2991,8 +2984,12 @@ private string ReCreateParts(UriComponents parts, ushort nonCanonical, UriFormat
29912984
{
29922985
case UriFormat.UriEscaped:
29932986
if (NotAny(Flags.UserEscaped))
2994-
chars = UriHelper.EscapeString(_string, delimiterAwareIndex, _info.Offset.End, chars,
2995-
ref count, true, c_DummyChar, c_DummyChar, '%')!; // TODO-NULLABLE: Remove ! when [NotNullIfNotNull] respected
2987+
{
2988+
chars = UriHelper.EscapeString(
2989+
_string.AsSpan(delimiterAwareIndex, _info.Offset.End - delimiterAwareIndex),
2990+
chars, ref count,
2991+
checkExistingEscaped: true);
2992+
}
29962993
else
29972994
{
29982995
UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.End, chars,
@@ -4623,8 +4620,11 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma
46234620
str = str.Remove(dosPathIdx + _info.Offset.Path - 1, 1);
46244621
str = str.Insert(dosPathIdx + _info.Offset.Path - 1, ":");
46254622
}
4626-
dest = UriHelper.EscapeString(str, _info.Offset.Path, _info.Offset.Query, dest, ref end, true,
4627-
'?', '#', IsImplicitFile ? c_DummyChar : '%')!; // TODO-NULLABLE: Remove ! when [NotNullIfNotNull] respected
4623+
4624+
dest = UriHelper.EscapeString(
4625+
str.AsSpan(_info.Offset.Path, _info.Offset.Query - _info.Offset.Path),
4626+
dest, ref end,
4627+
checkExistingEscaped: !IsImplicitFile, '?', '#');
46284628
}
46294629
else
46304630
{
@@ -4636,8 +4636,7 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma
46364636
// On Unix, escape '\\' in path of file uris to '%5C' canonical form.
46374637
if (!IsWindowsSystem && InFact(Flags.BackslashInPath) && _syntax.NotAny(UriSyntaxFlags.ConvertPathSlashes) && _syntax.InFact(UriSyntaxFlags.FileLikeUri) && !IsImplicitFile)
46384638
{
4639-
string str = new string(dest, pos, end - pos);
4640-
dest = UriHelper.EscapeString(str, 0, str.Length, dest, ref pos, true, '\\', c_DummyChar, '%')!; // TODO-NULLABLE: Remove ! when [NotNullIfNotNull] respected
4639+
dest = UriHelper.EscapeString(new string(dest, pos, end - pos), dest, ref pos, checkExistingEscaped: true, '\\');
46414640
end = pos;
46424641
}
46434642
}
@@ -4685,9 +4684,7 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma
46854684
if (formatAs == UriFormat.UriEscaped && NotAny(Flags.UserEscaped) && InFact(Flags.E_PathNotCanonical))
46864685
{
46874686
//Note: Flags.UserEscaped check is solely based on trusting the user
4688-
string srcString = new string(dest, pos, end - pos);
4689-
dest = UriHelper.EscapeString(srcString, 0, end - pos, dest, ref pos, true, '?', '#',
4690-
IsImplicitFile ? c_DummyChar : '%')!; // TODO-NULLABLE: Remove ! when [NotNullIfNotNull] respected
4687+
dest = UriHelper.EscapeString(new string(dest, pos, end - pos), dest, ref pos, checkExistingEscaped: !IsImplicitFile, '?', '#');
46914688
end = pos;
46924689
}
46934690
}
@@ -5337,22 +5334,9 @@ protected virtual string Unescape(string path)
53375334
}
53385335

53395336
[Obsolete("The method has been deprecated. Please use GetComponents() or static EscapeUriString() to escape a Uri component or a string. https://go.microsoft.com/fwlink/?linkid=14202")]
5340-
protected static string EscapeString(string? str)
5341-
{
5342-
// This method just does not make sense as protected
5343-
// It should go public static asap
5344-
5345-
if (str == null)
5346-
{
5347-
return string.Empty;
5348-
}
5349-
5350-
int destStart = 0;
5351-
char[]? dest = UriHelper.EscapeString(str, 0, str.Length, null, ref destStart, true, '?', '#', '%');
5352-
if (dest == null)
5353-
return str;
5354-
return new string(dest, 0, destStart);
5355-
}
5337+
protected static string EscapeString(string? str) =>
5338+
str is null ? string.Empty :
5339+
UriHelper.EscapeString(str, checkExistingEscaped: true, UriHelper.UnreservedReservedTable, '?', '#');
53565340

53575341
//
53585342
// CheckSecurity

src/System.Private.Uri/src/System/UriBuilder.cs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,9 @@ public string Path
242242
}
243243
set
244244
{
245-
if ((value == null) || (value.Length == 0))
246-
{
247-
value = "/";
248-
}
249-
_path = Uri.InternalEscapeString(value.Replace('\\', '/'));
245+
_path = string.IsNullOrEmpty(value) ?
246+
"/" :
247+
Uri.InternalEscapeString(value.Replace('\\', '/'));
250248
_changed = true;
251249
}
252250
}

src/System.Private.Uri/src/System/UriExt.cs

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System.Globalization;
6-
using System.Text;
7-
using System.Runtime.InteropServices;
86
using System.Diagnostics;
97
using System.Diagnostics.CodeAnalysis;
108

@@ -250,7 +248,7 @@ private unsafe bool CheckForEscapedUnreserved(string data)
250248
&& tempPtr[i + 1] >= '0' && tempPtr[i + 1] <= '7') // max 0x7F
251249
{
252250
char ch = UriHelper.EscapedAscii(tempPtr[i + 1], tempPtr[i + 2]);
253-
if (ch != c_DummyChar && UriHelper.Is3986Unreserved(ch))
251+
if (ch != c_DummyChar && UriHelper.IsUnreserved(ch))
254252
{
255253
return true;
256254
}
@@ -569,46 +567,15 @@ public static string UnescapeDataString(string stringToUnescape)
569567
}
570568
}
571569

572-
//
573570
// Where stringToEscape is intended to be a completely unescaped URI string.
574571
// This method will escape any character that is not a reserved or unreserved character, including percent signs.
575-
// Note that EscapeUriString will also do not escape a '#' sign.
576-
//
577-
public static string EscapeUriString(string stringToEscape)
578-
{
579-
if ((object)stringToEscape == null)
580-
throw new ArgumentNullException(nameof(stringToEscape));
581-
582-
if (stringToEscape.Length == 0)
583-
return string.Empty;
584-
585-
int position = 0;
586-
char[]? dest = UriHelper.EscapeString(stringToEscape, 0, stringToEscape.Length, null, ref position, true,
587-
c_DummyChar, c_DummyChar, c_DummyChar);
588-
if ((object?)dest == null)
589-
return stringToEscape;
590-
return new string(dest, 0, position);
591-
}
572+
public static string EscapeUriString(string stringToEscape) =>
573+
UriHelper.EscapeString(stringToEscape, checkExistingEscaped: false, UriHelper.UnreservedReservedTable);
592574

593-
//
594575
// Where stringToEscape is intended to be URI data, but not an entire URI.
595576
// This method will escape any character that is not an unreserved character, including percent signs.
596-
//
597-
public static string EscapeDataString(string stringToEscape)
598-
{
599-
if ((object)stringToEscape == null)
600-
throw new ArgumentNullException(nameof(stringToEscape));
601-
602-
if (stringToEscape.Length == 0)
603-
return string.Empty;
604-
605-
int position = 0;
606-
char[]? dest = UriHelper.EscapeString(stringToEscape, 0, stringToEscape.Length, null, ref position, false,
607-
c_DummyChar, c_DummyChar, c_DummyChar);
608-
if (dest == null)
609-
return stringToEscape;
610-
return new string(dest, 0, position);
611-
}
577+
public static string EscapeDataString(string stringToEscape) =>
578+
UriHelper.EscapeString(stringToEscape, checkExistingEscaped: false, UriHelper.UnreservedTable);
612579

613580
//
614581
// Cleans up the specified component according to Iri rules
@@ -781,19 +748,12 @@ private unsafe string GetRelativeSerializationString(UriFormat format)
781748
{
782749
if (format == UriFormat.UriEscaped)
783750
{
784-
if (_string.Length == 0)
785-
return string.Empty;
786-
int position = 0;
787-
char[]? dest = UriHelper.EscapeString(_string, 0, _string.Length, null, ref position, true,
788-
c_DummyChar, c_DummyChar, '%');
789-
if ((object?)dest == null)
790-
return _string;
791-
return new string(dest, 0, position);
751+
return UriHelper.EscapeString(_string, checkExistingEscaped: true, UriHelper.UnreservedReservedTable);
792752
}
793-
794753
else if (format == UriFormat.Unescaped)
754+
{
795755
return UnescapeDataString(_string);
796-
756+
}
797757
else if (format == UriFormat.SafeUnescaped)
798758
{
799759
if (_string.Length == 0)
@@ -806,7 +766,9 @@ private unsafe string GetRelativeSerializationString(UriFormat format)
806766
return new string(dest, 0, position);
807767
}
808768
else
769+
{
809770
throw new ArgumentOutOfRangeException(nameof(format));
771+
}
810772
}
811773

812774
//

0 commit comments

Comments
 (0)