diff --git a/src/Compilers/CSharp/Portable/Parser/AbstractLexer.cs b/src/Compilers/CSharp/Portable/Parser/AbstractLexer.cs
index 6eb8346d6fdd3..43006797668e2 100644
--- a/src/Compilers/CSharp/Portable/Parser/AbstractLexer.cs
+++ b/src/Compilers/CSharp/Portable/Parser/AbstractLexer.cs
@@ -11,24 +11,27 @@ namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
// separate out text windowing implementation (keeps scanning & lexing functions from abusing details)
internal class AbstractLexer : IDisposable
{
- internal readonly SlidingTextWindow TextWindow;
+ ///
+ /// Not readonly. This is a mutable struct that will be modified as we lex tokens.
+ ///
+ internal SlidingTextWindow TextWindow;
+
private List? _errors;
+ protected int LexemeStartPosition;
protected AbstractLexer(SourceText text)
{
this.TextWindow = new SlidingTextWindow(text);
}
- protected int LexemeStartPosition => this.TextWindow.LexemeStartPosition;
-
public virtual void Dispose()
{
- this.TextWindow.Dispose();
+ this.TextWindow.Free();
}
protected void Start()
{
- TextWindow.Start();
+ LexemeStartPosition = this.TextWindow.Position;
_errors = null;
}
@@ -135,10 +138,10 @@ private int GetLexemeOffsetFromPosition(int position)
}
protected string GetNonInternedLexemeText()
- => TextWindow.GetText(intern: false);
+ => TextWindow.GetText(LexemeStartPosition, intern: false);
protected string GetInternedLexemeText()
- => TextWindow.GetText(intern: true);
+ => TextWindow.GetText(LexemeStartPosition, intern: true);
protected int CurrentLexemeWidth
=> this.TextWindow.Position - LexemeStartPosition;
diff --git a/src/Compilers/CSharp/Portable/Parser/Lexer.cs b/src/Compilers/CSharp/Portable/Parser/Lexer.cs
index 0304312bc20a5..84601ad926b9f 100644
--- a/src/Compilers/CSharp/Portable/Parser/Lexer.cs
+++ b/src/Compilers/CSharp/Portable/Parser/Lexer.cs
@@ -1341,21 +1341,17 @@ private bool ScanIdentifier_FastPath(ref TokenInfo info)
return false;
}
- var currentOffset = TextWindow.Offset;
- var characterWindow = TextWindow.CharacterWindow;
- var characterWindowCount = TextWindow.CharacterWindowCount;
-
- var startOffset = currentOffset;
+ var textWindowCharSpan = this.TextWindow.CurrentWindowSpan;
+ var currentIndex = 0;
while (true)
{
- if (currentOffset == characterWindowCount)
- {
- // no more contiguous characters. Fall back to slow path
+ // If we do not not have any more contiguous characters within the char span that we can look at,
+ // then fall back to slow path
+ if (currentIndex == textWindowCharSpan.Length)
return false;
- }
- switch (characterWindow[currentOffset])
+ switch (textWindowCharSpan[currentIndex])
{
case '&':
// CONSIDER: This method is performance critical, so
@@ -1405,13 +1401,13 @@ private bool ScanIdentifier_FastPath(ref TokenInfo info)
// All of the following characters are not valid in an
// identifier. If we see any of them, then we know we're
// done.
- var length = currentOffset - startOffset;
+ var length = currentIndex;
TextWindow.AdvanceChar(length);
- info.Text = info.StringValue = TextWindow.Intern(characterWindow, startOffset, length);
+ info.Text = info.StringValue = TextWindow.Intern(textWindowCharSpan[..length]);
info.IsVerbatim = false;
return true;
case >= '0' and <= '9':
- if (currentOffset == startOffset)
+ if (currentIndex == 0)
{
return false;
}
@@ -1423,7 +1419,7 @@ private bool ScanIdentifier_FastPath(ref TokenInfo info)
case '_':
// All of these characters are valid inside an identifier.
// consume it and keep processing.
- currentOffset++;
+ currentIndex++;
continue;
// case '@': verbatim identifiers are handled in the slow path
@@ -2293,6 +2289,8 @@ private void ScanToEndOfLine()
/// A trivia node with the whitespace text
private SyntaxTrivia ScanWhitespace()
{
+ Debug.Assert(SyntaxFacts.IsWhitespace(TextWindow.PeekChar()));
+
int hashCode = Hash.FnvOffsetBias; // FNV base
bool onlySpaces = true;
@@ -2326,6 +2324,8 @@ private SyntaxTrivia ScanWhitespace()
break;
}
+ Debug.Assert(this.CurrentLexemeWidth > 0);
+
if (this.CurrentLexemeWidth == 1 && onlySpaces)
{
return SyntaxFactory.Space;
@@ -2336,24 +2336,18 @@ private SyntaxTrivia ScanWhitespace()
if (width < MaxCachedTokenSize)
{
- return _cache.LookupTrivia(
- TextWindow.CharacterWindow.AsSpan(TextWindow.LexemeRelativeStart, width),
- hashCode,
- CreateWhitespaceTrivia,
- TextWindow);
+ return _cache.LookupWhitespaceTrivia(
+ TextWindow,
+ this.LexemeStartPosition,
+ hashCode);
}
else
{
- return CreateWhitespaceTrivia(TextWindow);
+ return SyntaxFactory.Whitespace(this.GetInternedLexemeText());
}
}
}
- private static SyntaxTrivia CreateWhitespaceTrivia(SlidingTextWindow textWindow)
- {
- return SyntaxFactory.Whitespace(textWindow.GetText(intern: true));
- }
-
private void LexDirectiveAndExcludedTrivia(
bool isFollowingToken,
ref SyntaxListBuilder triviaList)
diff --git a/src/Compilers/CSharp/Portable/Parser/LexerCache.cs b/src/Compilers/CSharp/Portable/Parser/LexerCache.cs
index af20e88e0f25a..c44fc2450eec7 100644
--- a/src/Compilers/CSharp/Portable/Parser/LexerCache.cs
+++ b/src/Compilers/CSharp/Portable/Parser/LexerCache.cs
@@ -5,8 +5,10 @@
// #define COLLECT_STATS
using System;
+using System.Diagnostics;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Syntax.InternalSyntax;
+using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
@@ -181,21 +183,28 @@ internal bool TryGetKeywordKind(string key, out SyntaxKind kind)
return kind != SyntaxKind.None;
}
- internal SyntaxTrivia LookupTrivia(
- ReadOnlySpan textBuffer,
- int hashCode,
- Func createTriviaFunction,
- TArg data)
+ internal SyntaxTrivia LookupWhitespaceTrivia(
+ in SlidingTextWindow textWindow,
+ int lexemeStartPosition,
+ int hashCode)
{
- var value = TriviaMap.FindItem(textBuffer, hashCode);
+ var span = TextSpan.FromBounds(lexemeStartPosition, textWindow.Position);
+ Debug.Assert(span.Length > 0);
- if (value == null)
+ if (textWindow.TryGetTextIfWithinWindow(span, out var lexemeTextSpan))
{
- value = createTriviaFunction(data);
- TriviaMap.AddItem(textBuffer, hashCode, value);
+ var value = TriviaMap.FindItem(lexemeTextSpan, hashCode);
+ if (value == null)
+ {
+ value = SyntaxFactory.Whitespace(textWindow.GetText(lexemeStartPosition, intern: true));
+ TriviaMap.AddItem(lexemeTextSpan, hashCode, value);
+ }
+
+ return value;
}
- return value;
+ // Otherwise, if it's outside of the window, just grab from the underlying text.
+ return SyntaxFactory.Whitespace(textWindow.GetText(lexemeStartPosition, intern: true));
}
// TODO: remove this when done tweaking this cache.
diff --git a/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs b/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs
index 09dcf9e03b64d..0da0c67d0f5a8 100644
--- a/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs
+++ b/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs
@@ -411,7 +411,7 @@ private bool ScanOpenQuote(
out int startingQuoteCount)
{
// Handles reading the start of the interpolated string literal (up to where the content begins)
- var window = _lexer.TextWindow;
+ ref var window = ref _lexer.TextWindow;
var start = window.Position;
if ((window.PeekChar(0), window.PeekChar(1), window.PeekChar(2)) is ('$', '@', '"') or ('@', '$', '"'))
diff --git a/src/Compilers/CSharp/Portable/Parser/QuickScanner.cs b/src/Compilers/CSharp/Portable/Parser/QuickScanner.cs
index 8c2719757aec3..d722814c193c0 100644
--- a/src/Compilers/CSharp/Portable/Parser/QuickScanner.cs
+++ b/src/Compilers/CSharp/Portable/Parser/QuickScanner.cs
@@ -193,19 +193,22 @@ private enum CharFlags : byte
{
this.Start();
var state = QuickScanState.Initial;
- int i = TextWindow.Offset;
- int n = TextWindow.CharacterWindowCount;
- n = Math.Min(n, i + MaxCachedTokenSize);
+
+ var textWindowCharSpan = TextWindow.CurrentWindowSpan;
+
+ // Cap how much of the char span we're willing to look at.
+ textWindowCharSpan = textWindowCharSpan[..Math.Min(MaxCachedTokenSize, textWindowCharSpan.Length)];
int hashCode = Hash.FnvOffsetBias;
//localize frequently accessed fields
- var charWindow = TextWindow.CharacterWindow;
var charPropLength = CharProperties.Length;
- for (; i < n; i++)
+ // Where we are currently pointing in the charWindow as we read in a character at a time.
+ var currentIndex = 0;
+ for (; currentIndex < textWindowCharSpan.Length; currentIndex++)
{
- char c = charWindow[i];
+ char c = textWindowCharSpan[currentIndex];
int uc = unchecked((int)c);
var flags = uc < charPropLength ? (CharFlags)CharProperties[uc] : CharFlags.Complex;
@@ -228,14 +231,22 @@ private enum CharFlags : byte
state = QuickScanState.Bad; // ran out of characters in window
exitWhile:
- TextWindow.AdvanceChar(i - TextWindow.Offset);
Debug.Assert(state == QuickScanState.Bad || state == QuickScanState.Done, "can only exit with Bad or Done");
if (state == QuickScanState.Done)
{
// this is a good token!
+ var tokenLength = currentIndex;
+
+ Debug.Assert(tokenLength > 0);
+
+ // It is fine to advance text window here. AdvanceChar is doc'ed to not change charWindow in any way.
+ // Note: we need to advance here, instead of after LookupToken as LookupToken can call into CreateQuickToken
+ // as a callback, which expects the text window to be in the position after lexing has occurred.
+ TextWindow.AdvanceChar(tokenLength);
+
var token = _cache.LookupToken(
- TextWindow.CharacterWindow.AsSpan(TextWindow.LexemeRelativeStart, i - TextWindow.LexemeRelativeStart),
+ textWindowCharSpan[..tokenLength],
hashCode,
CreateQuickToken,
this);
@@ -243,7 +254,6 @@ private enum CharFlags : byte
}
else
{
- TextWindow.Reset(TextWindow.LexemeStartPosition);
return null;
}
}
@@ -251,9 +261,9 @@ private enum CharFlags : byte
private static SyntaxToken CreateQuickToken(Lexer lexer)
{
#if DEBUG
- var quickWidth = lexer.TextWindow.Width;
+ var quickWidth = lexer.CurrentLexemeWidth;
#endif
- lexer.TextWindow.Reset(lexer.TextWindow.LexemeStartPosition);
+ lexer.TextWindow.Reset(lexer.LexemeStartPosition);
var token = lexer.LexSyntaxToken();
#if DEBUG
Debug.Assert(quickWidth == token.FullWidth);
diff --git a/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs b/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs
index fa5fd29df9885..bc1d2702e51e9 100644
--- a/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs
+++ b/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs
@@ -2,9 +2,14 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+#if DEBUG
+#define TRACING
+#endif
+
using System;
using System.Diagnostics;
using System.Text;
+using System.Threading;
using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
@@ -12,14 +17,19 @@
namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
{
///
- /// Keeps a sliding buffer over the SourceText of a file for the lexer. Also
- /// provides the lexer with the ability to keep track of a current "lexeme"
- /// by leaving a marker and advancing ahead the offset. The lexer can then
- /// decide to "keep" the lexeme by erasing the marker, or abandon the current
- /// lexeme by moving the offset back to the marker.
+ /// Keeps a sliding buffer over the SourceText of a file for the lexer. Generally, the sliding buffer will
+ /// almost always move forward a 'chunk' () at a time. However, the buffer
+ /// also supports moving backward to a previous location if needed.
///
- internal sealed class SlidingTextWindow : IDisposable
+ [NonCopyable]
+ internal struct SlidingTextWindow
{
+#if TRACING
+ public static int GetTextInsideWindowCount = 0;
+ public static int GetTextOutsideWindowCount = 0;
+ public static long TotalTextSize = 0;
+#endif
+
///
/// In many cases, e.g. PeekChar, we need the ability to indicate that there are
/// no characters left and we have reached the end of the stream, or some other
@@ -34,207 +44,190 @@ internal sealed class SlidingTextWindow : IDisposable
///
public const char InvalidCharacter = char.MaxValue;
- private const int DefaultWindowLength = 2048;
+ ///
+ /// This number was picked based on the roslyn codebase. If we look at parsing all files,
+ /// this window ensures that 97.4% of all characters grabbed are within the window. Note
+ /// that this is lower than expected, but heavily influenced by how heavily roslyn uses
+ /// large strings in its codebase to represent test content. With all these files, lexemes
+ /// average around 60 characters. If tests are excluded, then average lexeme length drops
+ /// to 25 characters, and the hit rate goes up to 98.4%. Because this hitrate is so high,
+ /// we don't bother to dynamically resize the window. Instead, when we hit the end, we just
+ /// grab in the next chunk of characters from the source text, as only one in every 65 lexemes
+ /// will need to go back to the source-text to read the full lexeme.
+ ///
+ public const int DefaultWindowLength = 4096;
+
+ private static readonly ObjectPool s_windowPool = new ObjectPool(() => new char[DefaultWindowLength]);
- private readonly SourceText _text; // Source of text to parse.
- private int _basis; // Offset of the window relative to the SourceText start.
- private int _offset; // Offset from the start of the window.
- private readonly int _textEnd; // Absolute end position
- private char[] _characterWindow; // Moveable window of chars from source text
- private int _characterWindowCount; // # of valid characters in chars buffer
+ ///
+ /// Underlying source text we are lexing. This is the final truth of all characters. Chunks of
+ /// this text will be read into a character window as needed to allow fast contiguous access to
+ /// a portion of the file at a time (as many implementations have logarithmic
+ /// access time to characters).
+ ///
+ public SourceText Text { get; }
- private int _lexemeStart; // Start of current lexeme relative to the window start.
+ ///
+ /// Absolute end position of . Attempts to read at or past this position will
+ /// produce .
+ ///
+ private readonly int _textEnd;
- // Example for the above variables:
- // The text starts at 0.
- // The window onto the text starts at basis.
- // The current character is at (basis + offset), AKA the current "Position".
- // The current lexeme started at (basis + lexemeStart), which is <= (basis + offset)
- // The current lexeme is the characters between the lexemeStart and the offset.
+ ///
+ /// The current position in that we are reading characters from. This is the absolute
+ /// position in the source text. This is not allowed to be negative. It is allowed to be greater than or
+ /// equal to
+ ///
+ private int _positionInText;
- private readonly StringTable _strings;
+ ///
+ /// Current segment of readable characters. The backing store for this is an array given out by .
+ /// The length of this will normally be , but may be smaller if we are at the end of the file
+ /// and there are not enough characters left to fill the window.
+ ///
+ private ArraySegment _characterWindow;
- private static readonly ObjectPool s_windowPool = new ObjectPool(() => new char[DefaultWindowLength]);
+ ///
+ /// Where the current character window starts in the source text. This is the absolute position in the source text.
+ /// In other words, if this is 2048, then that means it represents the chunk of characters starting at position 2048
+ /// in the source text. _characterWindow.Count represents how large the chunk is. Characters
+ /// [0, _characterWindow.Count) are valid characters within the window, and represent
+ /// the chunk [_characterWindowStartPositionInText, CharacterWindowEndPositionInText) in .
+ ///
+ private int _characterWindowStartPositionInText;
+
+#if DEBUG
+ private bool _freed;
+#endif
+
+ private readonly StringTable _strings;
public SlidingTextWindow(SourceText text)
{
- _text = text;
- _basis = 0;
- _offset = 0;
+ this.Text = text;
_textEnd = text.Length;
_strings = StringTable.GetInstance();
- _characterWindow = s_windowPool.Allocate();
- _lexemeStart = 0;
+ _characterWindow = new ArraySegment(s_windowPool.Allocate());
+
+ // Read the first chunk of the file into the character window.
+ this.ReadChunkAt(0);
}
- public void Dispose()
+ public void Free()
{
- if (_characterWindow != null)
- {
- s_windowPool.Free(_characterWindow);
- _characterWindow = null!;
- _strings.Free();
- }
- }
+#if DEBUG
+ Debug.Assert(!_freed);
+ _freed = true;
+#endif
- public SourceText Text => _text;
+ s_windowPool.Free(_characterWindow.Array!);
+ _strings.Free();
+ }
///
- /// The current absolute position in the text file.
+ /// Reads a chunk of characters from the underlying at the given position and places them
+ /// at the start of the character window. The character window's length will be set to the number of characters
+ /// read.
+ ///
+ /// Note: this does NOT set . We are just reading the characters into the window. The actual
+ /// position in the text is unchanged by this, and callers should set that if necessary.
///
- public int Position
+ private void ReadChunkAt(int position)
{
- get
- {
- return _basis + _offset;
- }
+ position = Math.Min(position, _textEnd);
+
+ var amountToRead = Math.Min(_textEnd - position, DefaultWindowLength);
+ this.Text.CopyTo(position, _characterWindow.Array!, 0, amountToRead);
+ _characterWindowStartPositionInText = position;
+ _characterWindow = new(_characterWindow.Array!, 0, amountToRead);
}
///
- /// The current offset inside the window (relative to the window start).
+ /// The current absolute position in the text file.
///
- public int Offset
- {
- get
- {
- return _offset;
- }
- }
+ public readonly int Position => _positionInText;
///
- /// The buffer backing the current window.
+ /// Gets a view over the underlying characters that this window is currently pointing at.
+ /// The view starts at and contains as many legal characters from
+ /// that are available after that. This span may be empty.
///
- public char[] CharacterWindow
+ public readonly ReadOnlySpan CurrentWindowSpan
{
get
{
- return _characterWindow;
+ var start = _positionInText - _characterWindowStartPositionInText;
+ // If we are outside the bounds of the current character window, then we cannot return a span around any of it.
+ if (start < 0 || start >= _characterWindow.Count)
+ return default;
+
+ return _characterWindow.AsSpan(start);
}
}
-
///
- /// Returns the start of the current lexeme relative to the window start.
+ /// Similar to , except this represents the index (exclusive) of the last character
+ /// that encompases in . This is equal to
+ /// + 's .
///
- public int LexemeRelativeStart
- {
- get
- {
- return _lexemeStart;
- }
- }
+ private readonly int CharacterWindowEndPositionInText => _characterWindowStartPositionInText + _characterWindow.Count;
///
- /// Number of characters in the character window.
+ /// Returns true if is within the current character window, and thus the character at that position
+ /// can be read from the character window without going back to the underlying .
///
- public int CharacterWindowCount
+ private readonly bool PositionIsWithinWindow(int position)
{
- get
- {
- return _characterWindowCount;
- }
+ return position >= _characterWindowStartPositionInText &&
+ position < this.CharacterWindowEndPositionInText;
}
///
- /// The absolute position of the start of the current lexeme in the given
- /// SourceText.
+ /// Returns true if is within the current character window, and thus the sub-string corresponding to
+ /// that span can be read can be read from the character window without going back to the underlying .
///
- public int LexemeStartPosition
+ public readonly bool SpanIsWithinWindow(TextSpan span)
{
- get
- {
- return _basis + _lexemeStart;
- }
+ return span.Start >= _characterWindowStartPositionInText &&
+ span.End <= this.CharacterWindowEndPositionInText;
}
///
- /// The number of characters in the current lexeme.
+ /// Returns the span of characters corresponding to from if
+ /// is entirely within bounds of the current character window (see ). Otherwise, returns . Used to allow
+ /// fast path access to a sequence of characters in the common case where they are in the window, or
+ /// having the caller fall back to a slower approach. Note: this does not mutate the window in any way,
+ /// it just reads from a segment of the character window if that is available, return default for
+ /// if not.
///
- public int Width
+ public readonly bool TryGetTextIfWithinWindow(TextSpan span, out ReadOnlySpan textSpan)
{
- get
+ if (SpanIsWithinWindow(span))
{
- return _offset - _lexemeStart;
+ textSpan = _characterWindow.AsSpan(span.Start - _characterWindowStartPositionInText, span.Length);
+ return true;
}
+
+ textSpan = default;
+ return false;
}
///
- /// Start parsing a new lexeme.
+ /// Moves this window to point at the given position in the text. This will ensure that reading characters
+ /// (and normally characters after it) will be fast.
///
- public void Start()
- {
- _lexemeStart = _offset;
- }
-
public void Reset(int position)
{
- // if position is within already read character range then just use what we have
- int relative = position - _basis;
- if (relative >= 0 && relative <= _characterWindowCount)
- {
- _offset = relative;
- }
- else
- {
- // we need to reread text buffer
- int amountToRead = Math.Min(_text.Length, position + _characterWindow.Length) - position;
- amountToRead = Math.Max(amountToRead, 0);
- if (amountToRead > 0)
- {
- _text.CopyTo(position, _characterWindow, 0, amountToRead);
- }
-
- _lexemeStart = 0;
- _offset = 0;
- _basis = position;
- _characterWindowCount = amountToRead;
- }
- }
+ // Move us to that position.
+ _positionInText = Math.Min(position, _textEnd);
- private bool MoreChars()
- {
- if (_offset >= _characterWindowCount)
- {
- if (this.Position >= _textEnd)
- {
- return false;
- }
-
- // if lexeme scanning is sufficiently into the char buffer,
- // then refocus the window onto the lexeme
- if (_lexemeStart > (_characterWindowCount / 4))
- {
- Array.Copy(_characterWindow,
- _lexemeStart,
- _characterWindow,
- 0,
- _characterWindowCount - _lexemeStart);
- _characterWindowCount -= _lexemeStart;
- _offset -= _lexemeStart;
- _basis += _lexemeStart;
- _lexemeStart = 0;
- }
-
- if (_characterWindowCount >= _characterWindow.Length)
- {
- // grow char array, since we need more contiguous space
- char[] oldWindow = _characterWindow;
- char[] newWindow = new char[_characterWindow.Length * 2];
- Array.Copy(oldWindow, 0, newWindow, 0, _characterWindowCount);
- s_windowPool.ForgetTrackedObject(oldWindow, newWindow);
- _characterWindow = newWindow;
- }
-
- int amountToRead = Math.Min(_textEnd - (_basis + _characterWindowCount),
- _characterWindow.Length - _characterWindowCount);
- _text.CopyTo(_basis + _characterWindowCount,
- _characterWindow,
- _characterWindowCount,
- amountToRead);
- _characterWindowCount += amountToRead;
- return amountToRead > 0;
- }
+ // if position is within already read character range then just use what we have
+ if (PositionIsWithinWindow(_positionInText))
+ return;
- return true;
+ // Otherwise, ensure that the character window contains this position so we can read characters directly
+ // from there.
+ this.ReadChunkAt(_positionInText);
}
///
@@ -243,24 +236,21 @@ private bool MoreChars()
///
/// Comments and string literals are allowed to contain any Unicode character.
///
- ///
- internal bool IsReallyAtEnd()
- {
- return _offset >= _characterWindowCount && Position >= _textEnd;
- }
+ public readonly bool IsReallyAtEnd()
+ => Position >= _textEnd;
///
- /// Advance the current position by one. No guarantee that this
- /// position is valid.
+ /// Advance the current position by one. No guarantee that this position is valid. This will not change the character window
+ /// in any way. Specifically, it will not create a new character window, nor will it change the contents of the current window.
///
public void AdvanceChar()
- {
- _offset++;
- }
+ => AdvanceChar(1);
///
/// Advances the text window if it currently pointing at the character. Returns if it did advance, otherwise.
+ /// langword="true"/> if it did advance, otherwise. This can change the
+ /// character window if the is at the end of the current character window, and
+ /// peeking then needs to read in a new chunk of characters to compare to .
///
public bool TryAdvance(char c)
{
@@ -272,12 +262,13 @@ public bool TryAdvance(char c)
}
///
- /// Advance the current position by n. No guarantee that this position
- /// is valid.
+ /// Advance the current position by n. No guarantee that this position is valid. This will not change the character window
+ /// in any way. Specifically, it will not create a new character window, nor will it change the contents of the current window.
///
public void AdvanceChar(int n)
{
- _offset += n;
+ _positionInText += n;
+ Debug.Assert(_positionInText >= 0, "Position in text cannot be negative.");
}
///
@@ -332,14 +323,17 @@ public char NextChar()
///
public char PeekChar()
{
- if (_offset >= _characterWindowCount
- && !MoreChars())
- {
+ if (IsReallyAtEnd())
return InvalidCharacter;
- }
- // N.B. MoreChars may update the offset.
- return _characterWindow[_offset];
+ var position = _positionInText;
+
+ // If the position is outside of the bounds of the current character window, then update its contents to
+ // contain that position.
+ if (!PositionIsWithinWindow(position))
+ this.ReadChunkAt(position);
+
+ return _characterWindow.Array![position - _characterWindowStartPositionInText];
}
///
@@ -353,41 +347,13 @@ public char PeekChar(int delta)
{
int position = this.Position;
this.AdvanceChar(delta);
-
- char ch;
- if (_offset >= _characterWindowCount
- && !MoreChars())
- {
- ch = InvalidCharacter;
- }
- else
- {
- // N.B. MoreChars may update the offset.
- ch = _characterWindow[_offset];
- }
-
+ var ch = PeekChar();
this.Reset(position);
return ch;
}
public char PreviousChar()
- {
- Debug.Assert(this.Position > 0);
- if (_offset > 0)
- {
- // The allowed region of the window that can be read is from 0 to _characterWindowCount (which _offset
- // is in between). So as long as _offset is greater than 0, we can read the previous character directly
- // from the current chunk of characters in the window.
- return this.CharacterWindow[_offset - 1];
- }
-
- // The prior character isn't in the window (trying to read the current character caused us to
- // read in the next chunk of text into the window, throwing out the preceding characters).
- // Just go back to the source text to find this character. While more expensive, this should
- // be rare given that most of the time we won't be calling this right after loading a new text
- // chunk.
- return this.Text[this.Position - 1];
- }
+ => PeekChar(-1);
///
/// If the next characters in the window match the given string,
@@ -409,87 +375,99 @@ internal bool AdvanceIfMatches(string desired)
return true;
}
- public string Intern(StringBuilder text)
+ public readonly string Intern(StringBuilder text)
{
return _strings.Add(text);
}
- public string Intern(char[] array, int start, int length)
- {
- return _strings.Add(array.AsSpan(start, length));
- }
+ public readonly string Intern(char[] array, int start, int length)
+ => Intern(array.AsSpan(start, length));
- public string GetInternedText()
- {
- return this.Intern(_characterWindow, _lexemeStart, this.Width);
- }
+ public readonly string Intern(ReadOnlySpan chars)
+ => _strings.Add(chars);
- public string GetText(bool intern)
- {
- return this.GetText(this.LexemeStartPosition, this.Width, intern);
- }
+ ///
+ /// Gets the text, in the range [startPosition, this.Position) from .
+ /// This will grab the text from the character window if it is within the bounds of the current
+ /// chunk, or from the underlying if it is not.
+ ///
+ public readonly string GetText(int startPosition, bool intern)
+ => this.GetText(startPosition, this.Position - startPosition, intern);
- public string GetText(int position, int length, bool intern)
+ ///
+ /// Gets the text, in the range [position, position + length) from .
+ /// This will grab the text from the character window if it is within the bounds of the current
+ /// chunk, or from the underlying if it is not.
+ ///
+ public readonly string GetText(int position, int length, bool intern)
{
- int offset = position - _basis;
+ var span = new TextSpan(position, length);
+
+#if TRACING
+ Interlocked.Add(ref TotalTextSize, length);
+#endif
+
+ // If the chunk being grabbed is not within the bounds of what is in the character window,
+ // then grab it from the source text. See docs at top of file for details on how common
+ // this is.
+ if (!SpanIsWithinWindow(span))
+ {
+#if TRACING
+ Interlocked.Increment(ref GetTextOutsideWindowCount);
+#endif
+ return this.Text.ToString(span);
+ }
+
+#if TRACING
+ Interlocked.Increment(ref GetTextInsideWindowCount);
+#endif
+
+ var offset = position - _characterWindowStartPositionInText;
+ var array = _characterWindow.Array!;
// PERF: Whether interning or not, there are some frequently occurring
// easy cases we can pick off easily.
switch (length)
{
- case 0:
- return string.Empty;
+ case 0: return string.Empty;
case 1:
- if (_characterWindow[offset] == ' ')
- {
- return " ";
- }
- if (_characterWindow[offset] == '\n')
+ switch (array[offset])
{
- return "\n";
+ case ' ': return " ";
+ case '\n': return "\n";
}
+
break;
case 2:
- char firstChar = _characterWindow[offset];
- if (firstChar == '\r' && _characterWindow[offset + 1] == '\n')
- {
- return "\r\n";
- }
- if (firstChar == '/' && _characterWindow[offset + 1] == '/')
+ switch (array[offset], array[offset + 1])
{
- return "//";
+ case ('\r', '\n'): return "\r\n";
+ case ('/', '/'): return "//";
}
+
break;
case 3:
- if (_characterWindow[offset] == '/' && _characterWindow[offset + 1] == '/' && _characterWindow[offset + 2] == ' ')
+ if (array[offset] == '/' && array[offset + 1] == '/' && array[offset + 2] == ' ')
{
return "// ";
}
+
break;
}
- if (intern)
- {
- return this.Intern(_characterWindow, offset, length);
- }
- else
- {
- return new string(_characterWindow, offset, length);
- }
+ return intern
+ ? this.Intern(array, offset, length)
+ : new string(array, offset, length);
}
- internal TestAccessor GetTestAccessor()
- => new TestAccessor(this);
-
- internal readonly struct TestAccessor(SlidingTextWindow window)
+ public static class TestAccessor
{
- private readonly SlidingTextWindow _window = window;
-
- internal void SetDefaultCharacterWindow()
- => _window._characterWindow = new char[DefaultWindowLength];
+ public static int GetOffset(in SlidingTextWindow window) => window._positionInText - window._characterWindowStartPositionInText;
+ public static int GetCharacterWindowStartPositionInText(in SlidingTextWindow window) => window._characterWindowStartPositionInText;
+ public static ArraySegment GetCharacterWindow(in SlidingTextWindow window) => window._characterWindow;
}
}
}
diff --git a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalTests.cs b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalTests.cs
index a2360d1bf4c0d..87ee09a07c453 100644
--- a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalTests.cs
+++ b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalTests.cs
@@ -4577,15 +4577,20 @@ disabled text 2
[Fact, WorkItem("https://github.com/dotnet/roslyn/issues/78593")]
public void TestDotPrefixedNumberStartingAtStartOfSlidingTextWindow()
{
- // This test depends on the line endings for the file being \r\n to ensure the right contents lines up at
- // the right locations.
+ // Make a file that looks effectively like:
//
- // It specifically validates what happens when we see `.0` at the start of the
- // sliding text window, where the lexer tries to peek back one char to see if this
- // is actually `..0` (a range expr) or `.0` (a floating point number).
- var code = Resources.DotPrefixedNumberStartingAtStartOfSlidingTextWindow;
- if (!code.Contains("\r\n"))
- code = code.Replace("\n", "\r\n");
+ // //////
+ // ;
+ // ..0;
+ //
+ // We want both dots at the end of the window. When we lex the first dot, we'll peek ahead to see if we're
+ // on a number, and we don't want that to move the window forward. Then when we lex the second dot, we will
+ // peek forward, making the text window point at "0;".
+ //
+ // Because we will have seen `.0` we will attempt to peek backwards to see if the prior token was a dot.
+ // This will involve reading back a chunk that includes that dot.
+ var windowEnd = "\r\n;\r\n..";
+ var code = new string('/', SlidingTextWindow.DefaultWindowLength - windowEnd.Length) + windowEnd + "0;";
var sourceText = SourceText.From(code);
@@ -4594,10 +4599,6 @@ public void TestDotPrefixedNumberStartingAtStartOfSlidingTextWindow()
using var lexer = new Lexer(sourceText, CSharpParseOptions.Default);
- // Ensure we have a normal window size, not some larger array that another test created and cached in
- // the window pool
- lexer.TextWindow.GetTestAccessor().SetDefaultCharacterWindow();
-
using var parser = new LanguageParser(lexer, oldTree: null, changes: null);
Microsoft.CodeAnalysis.SyntaxTreeExtensions.VerifySource(
@@ -4610,52 +4611,27 @@ public void TestDotPrefixedNumberStartingAtStartOfSlidingTextWindow()
// (a dot token starting a number, right at the start of the character window).
var lexer = new Lexer(sourceText, CSharpParseOptions.Default);
- // Ensure we have a normal window size, not some larger array that another test created and cached in
- // the window pool
- lexer.TextWindow.GetTestAccessor().SetDefaultCharacterWindow();
-
var mode = LexerMode.Syntax;
- for (var i = 0; i < 1326; i++)
- lexer.Lex(ref mode);
-
- // Lexer will read from index 0 in the arrray.
- Assert.Equal(0, lexer.TextWindow.Offset);
-
- // We have 205 real chars in the window
- Assert.Equal(205, lexer.TextWindow.CharacterWindowCount);
-
- // The lexer is at position 10199 in the file.
- Assert.Equal(10199, lexer.TextWindow.Position);
-
- /// The 205 characters represent the final part of the doc
- Assert.Equal(lexer.TextWindow.Text.Length, lexer.TextWindow.Position + lexer.TextWindow.CharacterWindowCount);
-
- // We're at the start of a token.
- Assert.Equal(lexer.TextWindow.LexemeStartPosition, lexer.TextWindow.Position);
-
- // Ensure that the lexer's window is starting with the next FP number (".03") right at
- // the start of the window.
- Assert.True(lexer.TextWindow.CharacterWindow is ['.', '0', '3', ',', ..], $"Start of window was '{new string(lexer.TextWindow.CharacterWindow, 0, 4)}'");
-
- var token = lexer.Lex(ref mode);
- Assert.Equal(SyntaxKind.NumericLiteralToken, token.Kind);
- Assert.Equal(3, token.FullWidth);
- Assert.Equal(".03", token.ToString());
-
- // But we moved 3 characters forward.
- Assert.Equal(3, lexer.TextWindow.Offset);
-
- // We still have 205 real chars in the window
- Assert.Equal(205, lexer.TextWindow.CharacterWindowCount);
-
- // The lexer position has moved 3 characters forward as well.
- Assert.Equal(10202, lexer.TextWindow.Position);
-
- // We're at the start of a token.
- Assert.Equal(lexer.TextWindow.LexemeStartPosition, lexer.TextWindow.Position);
-
- // Character window didn't changee.
- Assert.True(lexer.TextWindow.CharacterWindow is ['.', '0', '3', ',', ..], $"Start of window was '{new string(lexer.TextWindow.CharacterWindow, 0, 4)}'");
+ var token1 = lexer.Lex(ref mode);
+ Assert.Equal(SyntaxKind.SemicolonToken, token1.Kind);
+ Assert.Equal(SlidingTextWindow.DefaultWindowLength - 2, token1.FullWidth);
+
+ Assert.Equal(SlidingTextWindow.DefaultWindowLength - 2, SlidingTextWindow.TestAccessor.GetOffset(lexer.TextWindow));
+ var token2 = lexer.Lex(ref mode);
+ Assert.Equal(SyntaxKind.DotToken, token2.Kind);
+
+ Assert.Equal(SlidingTextWindow.DefaultWindowLength - 1, SlidingTextWindow.TestAccessor.GetOffset(lexer.TextWindow));
+ var token3 = lexer.Lex(ref mode);
+ Assert.Equal(SyntaxKind.DotToken, token3.Kind);
+
+ // We will have jumped the window backwards to be able to read the prior dot.
+ Assert.Equal(code.IndexOf('.'), SlidingTextWindow.TestAccessor.GetCharacterWindowStartPositionInText(lexer.TextWindow));
+ Assert.Equal(2, SlidingTextWindow.TestAccessor.GetOffset(lexer.TextWindow));
+ Assert.StartsWith("..0;", SlidingTextWindow.TestAccessor.GetCharacterWindow(lexer.TextWindow).AsSpan().ToString());
+
+ var token4 = lexer.Lex(ref mode);
+ Assert.Equal(SyntaxKind.NumericLiteralToken, token4.Kind);
+ Assert.Equal("0", token4.ValueText);
}
}
}
diff --git a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/SyntaxTokenParserTests.cs b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/SyntaxTokenParserTests.cs
index 7a3ae5b79fff7..081c938863a37 100644
--- a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/SyntaxTokenParserTests.cs
+++ b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/SyntaxTokenParserTests.cs
@@ -1,4 +1,4 @@
-// Licensed to the .NET Foundation under one or more agreements.
+// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
@@ -193,7 +193,7 @@ public void SkipForwardTo_PastDocumentEnd()
var sourceText = SourceText.From("class C { }");
var parser = SyntaxFactory.CreateTokenParser(sourceText, TestOptions.Regular);
parser.SkipForwardTo(100);
- AssertToken(expectedKind: SyntaxKind.EndOfFileToken, expectedContextualKind: SyntaxKind.None, new TextSpan(100, 0), "", parser.ParseNextToken());
+ AssertToken(expectedKind: SyntaxKind.EndOfFileToken, expectedContextualKind: SyntaxKind.None, new TextSpan(11, 0), "", parser.ParseNextToken());
}
[Fact]
diff --git a/src/Compilers/CSharp/Test/Syntax/Resources.resx b/src/Compilers/CSharp/Test/Syntax/Resources.resx
index 37ec16cc49bde..d5101d3f5d885 100644
--- a/src/Compilers/CSharp/Test/Syntax/Resources.resx
+++ b/src/Compilers/CSharp/Test/Syntax/Resources.resx
@@ -1,6 +1,5 @@
-