dotnet · CyrusNajmabadi · Jul 23, 2025 · Jun 30, 2025 · Jun 30, 2025 · Jun 30, 2025
@@ -11,24 +11,27 @@ namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
     // separate out text windowing implementation (keeps scanning & lexing functions from abusing details)
     internal class AbstractLexer : IDisposable
     {
-        internal readonly SlidingTextWindow TextWindow;
+        /// <summary>
+        /// Not readonly.  This is a mutable struct that will be modified as we lex tokens.
+        /// </summary>
+        internal SlidingTextWindow TextWindow;
+
         private List<SyntaxDiagnosticInfo>? _errors;
+        protected int LexemeStartPosition;
 
         protected AbstractLexer(SourceText text)
         {
             this.TextWindow = new SlidingTextWindow(text);
         }
 
-        protected int LexemeStartPosition => this.TextWindow.LexemeStartPosition;
-
         public virtual void Dispose()
         {
-            this.TextWindow.Dispose();
+            this.TextWindow.Free();
         }
 
         protected void Start()
         {
-            TextWindow.Start();
+            LexemeStartPosition = this.TextWindow.Position;
             _errors = null;
         }
 
@@ -135,10 +138,10 @@ private int GetLexemeOffsetFromPosition(int position)
         }
 
         protected string GetNonInternedLexemeText()
-            => TextWindow.GetText(intern: false);
+            => TextWindow.GetText(LexemeStartPosition, intern: false);
 
         protected string GetInternedLexemeText()
-            => TextWindow.GetText(intern: true);
+            => TextWindow.GetText(LexemeStartPosition, intern: true);
 
         protected int CurrentLexemeWidth
             => this.TextWindow.Position - LexemeStartPosition;

@@ -1341,21 +1341,17 @@ private bool ScanIdentifier_FastPath(ref TokenInfo info)
                 return false;
             }
 
-            var currentOffset = TextWindow.Offset;
-            var characterWindow = TextWindow.CharacterWindow;
-            var characterWindowCount = TextWindow.CharacterWindowCount;
-
-            var startOffset = currentOffset;
+            var textWindowCharSpan = this.TextWindow.CurrentWindowSpan;
+            var currentIndex = 0;
 
             while (true)
             {
-                if (currentOffset == characterWindowCount)
-                {
-                    // no more contiguous characters.  Fall back to slow path
+                // If we do not not have any more contiguous characters within the char span that we can look at,
+                // then fall back to slow path
+                if (currentIndex == textWindowCharSpan.Length)
                     return false;
-                }
 
-                switch (characterWindow[currentOffset])
+                switch (textWindowCharSpan[currentIndex])
                 {
                     case '&':
                         // CONSIDER: This method is performance critical, so
@@ -1405,13 +1401,13 @@ private bool ScanIdentifier_FastPath(ref TokenInfo info)
                         // All of the following characters are not valid in an 
                         // identifier.  If we see any of them, then we know we're
                         // done.
-                        var length = currentOffset - startOffset;
+                        var length = currentIndex;
                         TextWindow.AdvanceChar(length);
-                        info.Text = info.StringValue = TextWindow.Intern(characterWindow, startOffset, length);
+                        info.Text = info.StringValue = TextWindow.Intern(textWindowCharSpan[..length]);
                         info.IsVerbatim = false;
                         return true;
                     case >= '0' and <= '9':
-                        if (currentOffset == startOffset)
+                        if (currentIndex == 0)
                         {
                             return false;
                         }
@@ -1423,7 +1419,7 @@ private bool ScanIdentifier_FastPath(ref TokenInfo info)
                     case '_':
                         // All of these characters are valid inside an identifier.
                         // consume it and keep processing.
-                        currentOffset++;
+                        currentIndex++;
                         continue;
 
                     // case '@':  verbatim identifiers are handled in the slow path
@@ -2293,6 +2289,8 @@ private void ScanToEndOfLine()
         /// <returns>A trivia node with the whitespace text</returns>
         private SyntaxTrivia ScanWhitespace()
         {
+            Debug.Assert(SyntaxFacts.IsWhitespace(TextWindow.PeekChar()));
+
             int hashCode = Hash.FnvOffsetBias;  // FNV base
             bool onlySpaces = true;
 
@@ -2326,6 +2324,8 @@ private SyntaxTrivia ScanWhitespace()
                     break;
             }
 
+            Debug.Assert(this.CurrentLexemeWidth > 0);
+
             if (this.CurrentLexemeWidth == 1 && onlySpaces)
             {
                 return SyntaxFactory.Space;
@@ -2336,24 +2336,18 @@ private SyntaxTrivia ScanWhitespace()
 
                 if (width < MaxCachedTokenSize)
                 {
-                    return _cache.LookupTrivia(
-                        TextWindow.CharacterWindow.AsSpan(TextWindow.LexemeRelativeStart, width),
-                        hashCode,
-                        CreateWhitespaceTrivia,
-                        TextWindow);
+                    return _cache.LookupWhitespaceTrivia(
+                        TextWindow,
+                        this.LexemeStartPosition,
+                        hashCode);
                 }
                 else
                 {
-                    return CreateWhitespaceTrivia(TextWindow);
+                    return SyntaxFactory.Whitespace(this.GetInternedLexemeText());
                 }
             }
         }
 
-        private static SyntaxTrivia CreateWhitespaceTrivia(SlidingTextWindow textWindow)
-        {
-            return SyntaxFactory.Whitespace(textWindow.GetText(intern: true));
-        }
-
         private void LexDirectiveAndExcludedTrivia(
             bool isFollowingToken,
             ref SyntaxListBuilder triviaList)

@@ -5,8 +5,10 @@
 // #define COLLECT_STATS
 
 using System;
+using System.Diagnostics;
 using Microsoft.CodeAnalysis.PooledObjects;
 using Microsoft.CodeAnalysis.Syntax.InternalSyntax;
+using Microsoft.CodeAnalysis.Text;
 using Roslyn.Utilities;
 
 namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
@@ -181,21 +183,28 @@ internal bool TryGetKeywordKind(string key, out SyntaxKind kind)
             return kind != SyntaxKind.None;
         }
 
-        internal SyntaxTrivia LookupTrivia<TArg>(
-            ReadOnlySpan<char> textBuffer,
-            int hashCode,
-            Func<TArg, SyntaxTrivia> createTriviaFunction,
-            TArg data)
+        internal SyntaxTrivia LookupWhitespaceTrivia(
+            in SlidingTextWindow textWindow,
+            int lexemeStartPosition,
+            int hashCode)
         {
-            var value = TriviaMap.FindItem(textBuffer, hashCode);
+            var span = TextSpan.FromBounds(lexemeStartPosition, textWindow.Position);
+            Debug.Assert(span.Length > 0);
 
-            if (value == null)
+            if (textWindow.TryGetTextIfWithinWindow(span, out var lexemeTextSpan))
             {
-                value = createTriviaFunction(data);
-                TriviaMap.AddItem(textBuffer, hashCode, value);
+                var value = TriviaMap.FindItem(lexemeTextSpan, hashCode);
+                if (value == null)
+                {
+                    value = SyntaxFactory.Whitespace(textWindow.GetText(lexemeStartPosition, intern: true));
+                    TriviaMap.AddItem(lexemeTextSpan, hashCode, value);
+                }
+
+                return value;
             }
 
-            return value;
+            // Otherwise, if it's outside of the window, just grab from the underlying text.
+            return SyntaxFactory.Whitespace(textWindow.GetText(lexemeStartPosition, intern: true));
         }
 
         // TODO: remove this when done tweaking this cache.

@@ -411,7 +411,7 @@ private bool ScanOpenQuote(
                 out int startingQuoteCount)
             {
                 // Handles reading the start of the interpolated string literal (up to where the content begins)
-                var window = _lexer.TextWindow;
+                ref var window = ref _lexer.TextWindow;
                 var start = window.Position;
 
                 if ((window.PeekChar(0), window.PeekChar(1), window.PeekChar(2)) is ('$', '@', '"') or ('@', '$', '"'))

@@ -193,19 +193,22 @@ private enum CharFlags : byte
         {
             this.Start();
             var state = QuickScanState.Initial;
-            int i = TextWindow.Offset;
-            int n = TextWindow.CharacterWindowCount;
-            n = Math.Min(n, i + MaxCachedTokenSize);
+
+            var textWindowCharSpan = TextWindow.CurrentWindowSpan;
+
+            // Cap how much of the char span we're willing to look at.
+            textWindowCharSpan = textWindowCharSpan[..Math.Min(MaxCachedTokenSize, textWindowCharSpan.Length)];
 
             int hashCode = Hash.FnvOffsetBias;
 
             //localize frequently accessed fields
-            var charWindow = TextWindow.CharacterWindow;
             var charPropLength = CharProperties.Length;
 
-            for (; i < n; i++)
+            // Where we are currently pointing in the charWindow as we read in a character at a time.
+            var currentIndex = 0;
+            for (; currentIndex < textWindowCharSpan.Length; currentIndex++)
             {
-                char c = charWindow[i];
+                char c = textWindowCharSpan[currentIndex];
                 int uc = unchecked((int)c);
 
                 var flags = uc < charPropLength ? (CharFlags)CharProperties[uc] : CharFlags.Complex;
@@ -228,32 +231,39 @@ private enum CharFlags : byte
             state = QuickScanState.Bad; // ran out of characters in window
 exitWhile:
 
-            TextWindow.AdvanceChar(i - TextWindow.Offset);
             Debug.Assert(state == QuickScanState.Bad || state == QuickScanState.Done, "can only exit with Bad or Done");
 
             if (state == QuickScanState.Done)
             {
                 // this is a good token!
+                var tokenLength = currentIndex;
+
+                Debug.Assert(tokenLength > 0);
+
+                // It is fine to advance text window here.  AdvanceChar is doc'ed to not change charWindow in any way.
+                // Note: we need to advance here, instead of after LookupToken as LookupToken can call into CreateQuickToken
+                // as a callback, which expects the text window to be in the position after lexing has occurred.
+                TextWindow.AdvanceChar(tokenLength);
+
                 var token = _cache.LookupToken(
-                    TextWindow.CharacterWindow.AsSpan(TextWindow.LexemeRelativeStart, i - TextWindow.LexemeRelativeStart),
+                    textWindowCharSpan[..tokenLength],
                     hashCode,
                     CreateQuickToken,
                     this);
                 return token;
             }
             else
             {
-                TextWindow.Reset(TextWindow.LexemeStartPosition);
                 return null;
             }
         }
 
         private static SyntaxToken CreateQuickToken(Lexer lexer)
         {
 #if DEBUG
-            var quickWidth = lexer.TextWindow.Width;
+            var quickWidth = lexer.CurrentLexemeWidth;
 #endif
-            lexer.TextWindow.Reset(lexer.TextWindow.LexemeStartPosition);
+            lexer.TextWindow.Reset(lexer.LexemeStartPosition);
             var token = lexer.LexSyntaxToken();
 #if DEBUG
             Debug.Assert(quickWidth == token.FullWidth);