@@ -13,6 +13,12 @@ namespace System.Text.RegularExpressions.Symbolic
1313 internal sealed class SymbolicRegexNode < S > where S : notnull
1414 {
1515 internal const string EmptyCharClass = "[]" ;
16+ /// <summary>Some byte other than 0 to represent true</summary>
17+ internal const byte TrueByte = 1 ;
18+ /// <summary>Some byte other than 0 to represent false</summary>
19+ internal const byte FalseByte = 2 ;
20+ /// <summary>The undefined value is the default value 0</summary>
21+ internal const byte UndefinedByte = 0 ;
1622
1723 internal readonly SymbolicRegexBuilder < S > _builder ;
1824 internal readonly SymbolicRegexKind _kind ;
@@ -23,7 +29,11 @@ internal sealed class SymbolicRegexNode<S> where S : notnull
2329 internal readonly SymbolicRegexNode < S > ? _right ;
2430 internal readonly SymbolicRegexSet < S > ? _alts ;
2531
26- private Dictionary < uint , bool > ? _nullabilityCache ;
32+ /// <summary>
33+ /// Caches nullability of this node for any given context (0 <= context < ContextLimit)
34+ /// when _info.StartsWithSomeAnchor and _info.CanBeNullable are true. Otherwise the cache is null.
35+ /// </summary>
36+ private byte [ ] ? _nullabilityCache ;
2737
2838 private S _startSet ;
2939
@@ -50,6 +60,7 @@ private SymbolicRegexNode(SymbolicRegexBuilder<S> builder, SymbolicRegexKind kin
5060 _info = info ;
5161 _hashcode = ComputeHashCode ( ) ;
5262 _startSet = ComputeStartSet ( ) ;
63+ _nullabilityCache = info . StartsWithSomeAnchor && info . CanBeNullable ? new byte [ CharKind . ContextLimit ] : null ;
5364 }
5465
5566 private bool _isInternalizedUnion ;
@@ -162,92 +173,100 @@ static void AppendToList(SymbolicRegexNode<S> concat, List<SymbolicRegexNode<S>>
162173 /// <param name="context">kind info for previous and next characters</param>
163174 internal bool IsNullableFor ( uint context )
164175 {
165- if ( ! _info . StartsWithSomeAnchor )
166- return IsNullable ;
167-
168- if ( ! _info . CanBeNullable )
169- return false ;
176+ if ( _nullabilityCache is null )
177+ {
178+ // if _nullabilityCache is null then IsNullable==CanBeNullable
179+ // Observe that if IsNullable==true then CanBeNullable==true.
180+ // but when the node does not start with an anchor
181+ // and IsNullable==false then CanBeNullable==false.
182+ return _info . IsNullable ;
183+ }
170184
171185 if ( ! StackHelper . TryEnsureSufficientExecutionStack ( ) )
172186 {
173187 return StackHelper . CallOnEmptyStack ( IsNullableFor , context ) ;
174188 }
175189
176- // Initialize the nullability cache for this node.
177- _nullabilityCache ??= new Dictionary < uint , bool > ( ) ;
190+ Debug . Assert ( context < CharKind . ContextLimit ) ;
178191
179- if ( ! _nullabilityCache . TryGetValue ( context , out bool is_nullable ) )
192+ // If nullablity has been computed for the given context then return it
193+ byte b = Volatile . Read ( ref _nullabilityCache [ context ] ) ;
194+ if ( b != UndefinedByte )
180195 {
181- switch ( _kind )
182- {
183- case SymbolicRegexKind . Loop :
184- Debug . Assert ( _left is not null ) ;
185- is_nullable = _lower == 0 || _left . IsNullableFor ( context ) ;
186- break ;
196+ return b == TrueByte ;
197+ }
187198
188- case SymbolicRegexKind . Concat :
189- Debug . Assert ( _left is not null && _right is not null ) ;
190- is_nullable = _left . IsNullableFor ( context ) && _right . IsNullableFor ( context ) ;
191- break ;
199+ // Otherwise compute the nullability recursively for the given context
200+ bool is_nullable ;
201+ switch ( _kind )
202+ {
203+ case SymbolicRegexKind . Loop :
204+ Debug . Assert ( _left is not null ) ;
205+ is_nullable = _lower == 0 || _left . IsNullableFor ( context ) ;
206+ break ;
192207
193- case SymbolicRegexKind . Or :
194- case SymbolicRegexKind . And :
195- Debug . Assert ( _alts is not null ) ;
196- is_nullable = _alts . IsNullableFor ( context ) ;
197- break ;
208+ case SymbolicRegexKind . Concat :
209+ Debug . Assert ( _left is not null && _right is not null ) ;
210+ is_nullable = _left . IsNullableFor ( context ) && _right . IsNullableFor ( context ) ;
211+ break ;
198212
199- case SymbolicRegexKind . Not :
200- Debug . Assert ( _left is not null ) ;
201- is_nullable = ! _left . IsNullableFor ( context ) ;
202- break ;
213+ case SymbolicRegexKind . Or :
214+ case SymbolicRegexKind . And :
215+ Debug . Assert ( _alts is not null ) ;
216+ is_nullable = _alts . IsNullableFor ( context ) ;
217+ break ;
203218
204- case SymbolicRegexKind . StartAnchor :
205- is_nullable = CharKind . Prev ( context ) == CharKind . StartStop ;
206- break ;
219+ case SymbolicRegexKind . Not :
220+ Debug . Assert ( _left is not null ) ;
221+ is_nullable = ! _left . IsNullableFor ( context ) ;
222+ break ;
207223
208- case SymbolicRegexKind . EndAnchor :
209- is_nullable = CharKind . Next ( context ) == CharKind . StartStop ;
210- break ;
224+ case SymbolicRegexKind . StartAnchor :
225+ is_nullable = CharKind . Prev ( context ) == CharKind . StartStop ;
226+ break ;
211227
212- case SymbolicRegexKind . BOLAnchor :
213- // Beg-Of-Line anchor is nullable when the previous character is Newline or Start
214- // note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
215- is_nullable = ( CharKind . Prev ( context ) & CharKind . NewLineS ) != 0 ;
216- break ;
228+ case SymbolicRegexKind . EndAnchor :
229+ is_nullable = CharKind . Next ( context ) == CharKind . StartStop ;
230+ break ;
217231
218- case SymbolicRegexKind . EOLAnchor :
219- // End -Of-Line anchor is nullable when the next character is Newline or Stop
220- // note: at least one of the bits must be 1, but both could also be 1 in case of \Z
221- is_nullable = ( CharKind . Next ( context ) & CharKind . NewLineS ) != 0 ;
222- break ;
232+ case SymbolicRegexKind . BOLAnchor :
233+ // Beg -Of-Line anchor is nullable when the previous character is Newline or Start
234+ // note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
235+ is_nullable = ( CharKind . Prev ( context ) & CharKind . NewLineS ) != 0 ;
236+ break ;
223237
224- case SymbolicRegexKind . WBAnchor :
225- // test that prev char is word letter iff next is not not word letter
226- is_nullable = ( ( CharKind . Prev ( context ) & CharKind . WordLetter ) ^ ( CharKind . Next ( context ) & CharKind . WordLetter ) ) != 0 ;
227- break ;
238+ case SymbolicRegexKind . EOLAnchor :
239+ // End-Of-Line anchor is nullable when the next character is Newline or Stop
240+ // note: at least one of the bits must be 1, but both could also be 1 in case of \Z
241+ is_nullable = ( CharKind . Next ( context ) & CharKind . NewLineS ) != 0 ;
242+ break ;
228243
229- case SymbolicRegexKind . NWBAnchor :
230- // test that prev char is word letter iff next is word letter
231- is_nullable = ( ( CharKind . Prev ( context ) & CharKind . WordLetter ) ^ ( CharKind . Next ( context ) & CharKind . WordLetter ) ) = = 0 ;
232- break ;
244+ case SymbolicRegexKind . WBAnchor :
245+ // test that prev char is word letter iff next is not not word letter
246+ is_nullable = ( ( CharKind . Prev ( context ) & CharKind . WordLetter ) ^ ( CharKind . Next ( context ) & CharKind . WordLetter ) ) ! = 0 ;
247+ break ;
233248
234- case SymbolicRegexKind . EndAnchorZ :
235- // \Z anchor is nullable when the next character is either the last Newline or Stop
236- // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
237- is_nullable = ( CharKind . Next ( context ) & CharKind . StartStop ) != 0 ;
238- break ;
249+ case SymbolicRegexKind . NWBAnchor :
250+ // test that prev char is word letter iff next is word letter
251+ is_nullable = ( ( CharKind . Prev ( context ) & CharKind . WordLetter ) ^ ( CharKind . Next ( context ) & CharKind . WordLetter ) ) == 0 ;
252+ break ;
239253
240- default : //SymbolicRegexKind.EndAnchorZRev:
241- // EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
242- // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
243- Debug . Assert ( _kind == SymbolicRegexKind . EndAnchorZRev ) ;
244- is_nullable = ( CharKind . Prev ( context ) & CharKind . StartStop ) != 0 ;
245- break ;
246- }
254+ case SymbolicRegexKind . EndAnchorZ :
255+ // \Z anchor is nullable when the next character is either the last Newline or Stop
256+ // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
257+ is_nullable = ( CharKind . Next ( context ) & CharKind . StartStop ) != 0 ;
258+ break ;
247259
248- _nullabilityCache [ context ] = is_nullable ;
260+ default : // SymbolicRegexKind.EndAnchorZRev:
261+ // EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
262+ // note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
263+ Debug . Assert ( _kind == SymbolicRegexKind . EndAnchorZRev ) ;
264+ is_nullable = ( CharKind . Prev ( context ) & CharKind . StartStop ) != 0 ;
265+ break ;
249266 }
250267
268+ Volatile . Write ( ref _nullabilityCache [ context ] , is_nullable ? TrueByte : FalseByte ) ;
269+
251270 return is_nullable ;
252271 }
253272
0 commit comments