-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Cleanup split between lexer and sliding-text-window #79205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
40ad7f0
0ea5fbc
a472f31
3d1fc7f
fd45aec
16573f2
e1eb1b4
362c1cb
d8a8748
712f2f5
a852cfd
8f278a3
15fbaec
daf9f80
ebf24ab
54e6e27
284cf83
e9a740c
f6817ee
1f0ec4a
e2885e4
7fc3b85
cedf899
a127c8f
df0944b
1441828
799edf7
337ae17
31d6b9c
596a146
40a3098
ad0183f
b60a263
6a66e2a
522713c
68b59a4
7de725f
d88102e
d964f29
3857e2d
dcd95b1
28ee138
8686e17
cb19c83
7eb1016
64984e5
68d625b
f577a9f
46a9aaa
781eb9a
583b578
8be8696
45769cb
41dd1f4
4828ded
aaacaf2
8f2c2ae
2c8c6c7
7dbd1de
a1dba56
7a2ab71
c66a5c3
9b4941a
b05a58c
1f907ab
3c87760
362a47c
b69e574
7219354
91959bd
aa6e641
b4bc7a4
0491e62
e417a13
ae6d7a3
9be1995
0620cbe
51cef33
38772d9
39f10e2
a42ff59
2e41f81
64a3105
37ce151
c47db70
61d4fd5
4256939
17e1edb
de4388f
a21a493
061f4e2
b301919
636a56b
d8367f7
82ee10a
fa199ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,8 +4,6 @@ | |
|
|
||
| using System; | ||
| using System.Collections.Generic; | ||
| using Microsoft.CodeAnalysis.CSharp.Symbols; | ||
| using Microsoft.CodeAnalysis.CSharp.Syntax; | ||
| using Microsoft.CodeAnalysis.Text; | ||
|
|
||
| namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax | ||
|
|
@@ -15,6 +13,7 @@ internal class AbstractLexer : IDisposable | |
| { | ||
| internal readonly SlidingTextWindow TextWindow; | ||
| private List<SyntaxDiagnosticInfo>? _errors; | ||
| protected int LexemeStartPosition; | ||
|
|
||
| protected AbstractLexer(SourceText text) | ||
| { | ||
|
|
@@ -28,7 +27,7 @@ public virtual void Dispose() | |
|
|
||
| protected void Start() | ||
| { | ||
| TextWindow.Start(); | ||
| LexemeStartPosition = this.TextWindow.Position; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the text window is now blissfully unaware of what the lexer is doing with lexemes. |
||
| _errors = null; | ||
| } | ||
|
|
||
|
|
@@ -131,9 +130,18 @@ protected XmlSyntaxDiagnosticInfo MakeError(int position, int width, XmlParseErr | |
|
|
||
| private int GetLexemeOffsetFromPosition(int position) | ||
| { | ||
| return position >= TextWindow.LexemeStartPosition ? position - TextWindow.LexemeStartPosition : position; | ||
| return position >= LexemeStartPosition ? position - LexemeStartPosition : position; | ||
| } | ||
|
|
||
| protected string GetNonInternedLexemeText() | ||
| => TextWindow.GetText(LexemeStartPosition, intern: false); | ||
|
|
||
| protected string GetInternedLexemeText() | ||
| => TextWindow.GetText(LexemeStartPosition, intern: true); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these .GetText helpers on the TextWindow were used all over the place. MOved to two simple helpers on Lexer for getting either an interned or non-interned chunk of text for the current lexeme. |
||
|
|
||
| protected int CurrentLexemeWidth | ||
| => this.TextWindow.Position - LexemeStartPosition; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was previously exposed as the (imo) confusingly named SlidingTextWindow.Width. I tried to ensure that helpers related to lexemes used that term in their name to keep it clear what 'width/count/etc.' things are referring to. |
||
|
|
||
| protected static SyntaxDiagnosticInfo MakeError(ErrorCode code) | ||
| { | ||
| return new SyntaxDiagnosticInfo(code); | ||
|
|
||
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,9 +5,11 @@ | |
| // #define COLLECT_STATS | ||
|
|
||
| using System; | ||
| using System.Linq; | ||
| using Microsoft.CodeAnalysis.PooledObjects; | ||
| using Microsoft.CodeAnalysis.Syntax.InternalSyntax; | ||
| using Roslyn.Utilities; | ||
| using static Microsoft.CodeAnalysis.CSharp.NullableWalker; | ||
|
|
||
| namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax | ||
| { | ||
|
|
@@ -200,6 +202,35 @@ internal SyntaxTrivia LookupTrivia<TArg>( | |
| return value; | ||
| } | ||
|
|
||
|
|
||
| internal SyntaxTrivia LookupWhitespaceTrivia( | ||
| SlidingTextWindow textWindow, | ||
| int lexemeStartPosition, | ||
| int hashCode) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. easier to just read as the new function. There was only ever one caller ofr LookupTrivia, and it always used the same generic arg. |
||
| { | ||
| var lexemeWidth = textWindow.Position - lexemeStartPosition; | ||
| var textBuffer = textWindow.CharacterWindow; | ||
|
|
||
| // If the whitespace is entirely within the character window, grab from that and cache. | ||
| var lexemeEndPosition = lexemeStartPosition + lexemeWidth; | ||
| if (lexemeStartPosition >= textWindow.CharacterWindowStartPositionInText && lexemeEndPosition <= textWindow.CharacterWindowEndPositionInText) | ||
| { | ||
| var keyStart = lexemeStartPosition - textWindow.CharacterWindowStartPositionInText; | ||
| var value = TriviaMap.FindItem(textBuffer, keyStart, lexemeWidth, hashCode); | ||
|
|
||
| if (value == null) | ||
| { | ||
| value = SyntaxFactory.Whitespace(textWindow.GetText(lexemeStartPosition, intern: true)); | ||
| TriviaMap.AddItem(textBuffer, keyStart, lexemeWidth, hashCode, value); | ||
| } | ||
|
|
||
| return value; | ||
| } | ||
|
|
||
| // Otherwise, if it's outside of the window, just grab from the underlying text. | ||
| return SyntaxFactory.Whitespace(textWindow.GetText(lexemeStartPosition, intern: true)); | ||
| } | ||
|
|
||
| // TODO: remove this when done tweaking this cache. | ||
| #if COLLECT_STATS | ||
| private static int hits = 0; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,7 +87,7 @@ private void ScanRawStringLiteral(ref TokenInfo info, bool inDirective) | |
| // trusting the contents. | ||
| if (this.HasErrors) | ||
| { | ||
| var afterStartDelimiter = TextWindow.LexemeStartPosition + startingQuoteCount; | ||
| var afterStartDelimiter = this.LexemeStartPosition + startingQuoteCount; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reference to |
||
| var valueLength = TextWindow.Position - afterStartDelimiter; | ||
|
|
||
| info.StringValue = TextWindow.GetText( | ||
|
|
@@ -120,7 +120,7 @@ private void ScanRawStringLiteral(ref TokenInfo info, bool inDirective) | |
| }; | ||
| } | ||
|
|
||
| info.Text = TextWindow.GetText(intern: true); | ||
| info.Text = this.GetInternedLexemeText(); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. References to |
||
| } | ||
|
|
||
| private void ScanSingleLineRawStringLiteral(ref TokenInfo info, int startingQuoteCount) | ||
|
|
@@ -169,7 +169,7 @@ private void ScanSingleLineRawStringLiteral(ref TokenInfo info, int startingQuot | |
| } | ||
|
|
||
| // We have enough quotes to finish this string at this point. | ||
| var afterStartDelimiter = TextWindow.LexemeStartPosition + startingQuoteCount; | ||
| var afterStartDelimiter = this.LexemeStartPosition + startingQuoteCount; | ||
| var valueLength = beforeEndDelimiter - afterStartDelimiter; | ||
|
|
||
| info.StringValue = TextWindow.GetText( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
|
|
||
| using System; | ||
| using System.Diagnostics; | ||
| using System.Linq; | ||
| using Roslyn.Utilities; | ||
|
|
||
| namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax | ||
|
|
@@ -193,19 +194,22 @@ private enum CharFlags : byte | |
| { | ||
| this.Start(); | ||
| var state = QuickScanState.Initial; | ||
| int i = TextWindow.Offset; | ||
| int n = TextWindow.CharacterWindowCount; | ||
| n = Math.Min(n, i + MaxCachedTokenSize); | ||
|
|
||
| var charWindow = TextWindow.CharacterWindow; | ||
| var startIndex = TextWindow.Position - TextWindow.CharacterWindowStartPositionInText; | ||
| var currentIndex = startIndex; | ||
| var n = charWindow.Length; | ||
|
|
||
| n = Math.Min(n, currentIndex + MaxCachedTokenSize); | ||
|
|
||
| int hashCode = Hash.FnvOffsetBias; | ||
|
|
||
| //localize frequently accessed fields | ||
| var charWindow = TextWindow.CharacterWindow; | ||
| var charPropLength = CharProperties.Length; | ||
|
|
||
| for (; i < n; i++) | ||
| for (; currentIndex < n; currentIndex++) | ||
| { | ||
| char c = charWindow[i]; | ||
| char c = charWindow[currentIndex]; | ||
| int uc = unchecked((int)c); | ||
|
|
||
| var flags = uc < charPropLength ? (CharFlags)CharProperties[uc] : CharFlags.Complex; | ||
|
|
@@ -228,34 +232,36 @@ private enum CharFlags : byte | |
| state = QuickScanState.Bad; // ran out of characters in window | ||
| exitWhile: | ||
|
|
||
| TextWindow.AdvanceChar(i - TextWindow.Offset); | ||
| // Note: it is fine to change this position here and then read from charWindow below. AdvanceChar guarantees | ||
| // that it will not actually mutate that state. | ||
| TextWindow.AdvanceChar(currentIndex - TextWindow.Offset); | ||
| Debug.Assert(state == QuickScanState.Bad || state == QuickScanState.Done, "can only exit with Bad or Done"); | ||
|
|
||
| if (state == QuickScanState.Done) | ||
| { | ||
| // this is a good token! | ||
| var token = _cache.LookupToken( | ||
| TextWindow.CharacterWindow, | ||
| TextWindow.LexemeRelativeStart, | ||
| i - TextWindow.LexemeRelativeStart, | ||
| charWindow, | ||
| keyStart: startIndex, | ||
| keyLength: currentIndex - startIndex, | ||
| hashCode, | ||
| CreateQuickToken, | ||
| this); | ||
| return token; | ||
| } | ||
| else | ||
| { | ||
| TextWindow.Reset(TextWindow.LexemeStartPosition); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to reset anything. the text window didn't move. this function only ever reads directly from the backing char array. |
||
| TextWindow.Reset(this.LexemeStartPosition); | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| private static SyntaxToken CreateQuickToken(Lexer lexer) | ||
| { | ||
| #if DEBUG | ||
| var quickWidth = lexer.TextWindow.Width; | ||
| var quickWidth = lexer.CurrentLexemeWidth; | ||
| #endif | ||
| lexer.TextWindow.Reset(lexer.TextWindow.LexemeStartPosition); | ||
| lexer.TextWindow.Reset(lexer.LexemeStartPosition); | ||
| var token = lexer.LexSyntaxToken(); | ||
| #if DEBUG | ||
| Debug.Assert(quickWidth == token.FullWidth); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,8 +43,6 @@ internal sealed class SlidingTextWindow : IDisposable | |
| private char[] _characterWindow; // Moveable window of chars from source text | ||
| private int _characterWindowCount; // # of valid characters in chars buffer | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. primary goal was to simplify the heck out of this goop. Instead of having lots of things to keep in sync, we simplify our lives by using well known types. Specifically, ArraySegment encapsulates the window and its count. And there's no need for a separate offset/basis. |
||
|
|
||
| private int _lexemeStart; // Start of current lexeme relative to the window start. | ||
|
|
||
| // Example for the above variables: | ||
| // The text starts at 0. | ||
| // The window onto the text starts at basis. | ||
|
|
@@ -64,7 +62,6 @@ public SlidingTextWindow(SourceText text) | |
| _textEnd = text.Length; | ||
| _strings = StringTable.GetInstance(); | ||
| _characterWindow = s_windowPool.Allocate(); | ||
| _lexemeStart = 0; | ||
| } | ||
|
|
||
| public void Dispose() | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. naming this like all our other freeable-types. and explicitly made non-IDisposable. This shouldn't be boxed into that type. |
||
|
|
@@ -112,17 +109,6 @@ public char[] CharacterWindow | |
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Returns the start of the current lexeme relative to the window start. | ||
| /// </summary> | ||
| public int LexemeRelativeStart | ||
| { | ||
| get | ||
| { | ||
| return _lexemeStart; | ||
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Number of characters in the character window. | ||
| /// </summary> | ||
|
|
@@ -135,35 +121,19 @@ public int CharacterWindowCount | |
| } | ||
|
|
||
| /// <summary> | ||
| /// The absolute position of the start of the current lexeme in the given | ||
| /// SourceText. | ||
| /// </summary> | ||
| public int LexemeStartPosition | ||
| { | ||
| get | ||
| { | ||
| return _basis + _lexemeStart; | ||
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// The number of characters in the current lexeme. | ||
| /// Offset of the <see cref="_characterWindow"/> within <see cref="_text"/>. In other words, if this is 2048, then that means | ||
| /// it represents the chunk of characters starting at position 2048 in the source text. <see cref="CharacterWindowCount"/> represents | ||
| /// how large the chunk is. Characters <c>[0, CharacterWindowCount)</c> are valid characters within the window, and represents | ||
| /// the chunk <c>[CharacterWindowStartPositionInText, CharacterWindowEndPositionInText)</c> in <see cref="_text"/>. | ||
| /// </summary> | ||
| public int Width | ||
| { | ||
| get | ||
| { | ||
| return _offset - _lexemeStart; | ||
| } | ||
| } | ||
| public int CharacterWindowStartPositionInText => _basis; | ||
|
|
||
| /// <summary> | ||
| /// Start parsing a new lexeme. | ||
| /// Similar to <see cref="CharacterWindowStartPositionInText"/>, except this represents the index (exclusive) of the first character | ||
| /// that <see cref="_characterWindow"/> encompases in <see cref="_text"/>. This is equal to <see cref="CharacterWindowStartPositionInText"/> | ||
| /// + <see cref="CharacterWindowCount"/>. | ||
| /// </summary> | ||
| public void Start() | ||
| { | ||
| _lexemeStart = _offset; | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need "start" teh text window. it does not care about lexemes. it just cares about caching the most likely segment of chars needed by the lexer to read from. |
||
| public int CharacterWindowEndPositionInText => this.CharacterWindowStartPositionInText + this.CharacterWindowCount; | ||
|
|
||
| public void Reset(int position) | ||
| { | ||
|
|
@@ -183,7 +153,6 @@ public void Reset(int position) | |
| _text.CopyTo(position, _characterWindow, 0, amountToRead); | ||
| } | ||
|
|
||
| _lexemeStart = 0; | ||
| _offset = 0; | ||
| _basis = position; | ||
| _characterWindowCount = amountToRead; | ||
|
|
@@ -199,21 +168,6 @@ private bool MoreChars() | |
| return false; | ||
| } | ||
|
|
||
| // if lexeme scanning is sufficiently into the char buffer, | ||
| // then refocus the window onto the lexeme | ||
| if (_lexemeStart > (_characterWindowCount / 4)) | ||
| { | ||
| Array.Copy(_characterWindow, | ||
| _lexemeStart, | ||
| _characterWindow, | ||
| 0, | ||
| _characterWindowCount - _lexemeStart); | ||
| _characterWindowCount -= _lexemeStart; | ||
| _offset -= _lexemeStart; | ||
| _basis += _lexemeStart; | ||
| _lexemeStart = 0; | ||
| } | ||
|
|
||
| if (_characterWindowCount >= _characterWindow.Length) | ||
| { | ||
| // grow char array, since we need more contiguous space | ||
|
|
@@ -272,8 +226,8 @@ public bool TryAdvance(char c) | |
| } | ||
|
|
||
| /// <summary> | ||
| /// Advance the current position by n. No guarantee that this position | ||
| /// is valid. | ||
| /// Advance the current position by n. No guarantee that this position is valid. This will <em>not</em> change the character window | ||
| /// in any way. Specifically, it will not create a new character window, nor will it change the contents of the current window. | ||
| /// </summary> | ||
| public void AdvanceChar(int n) | ||
CyrusNajmabadi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
|
|
@@ -419,14 +373,9 @@ public string Intern(char[] array, int start, int length) | |
| return _strings.Add(array, start, length); | ||
| } | ||
|
|
||
| public string GetInternedText() | ||
| { | ||
| return this.Intern(_characterWindow, _lexemeStart, this.Width); | ||
| } | ||
|
|
||
| public string GetText(bool intern) | ||
| public string GetText(int startPosition, bool intern) | ||
| { | ||
| return this.GetText(this.LexemeStartPosition, this.Width, intern); | ||
| return this.GetText(startPosition, this.Position - startPosition, intern); | ||
| } | ||
|
|
||
| public string GetText(int position, int length, bool intern) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved to lexer-layer from SlidingTextWindow