Skip to content

Commit 1e48eca

Browse files
veanesstephentoub
andauthored
lock protect nullability cache of symbolic regex node (#60942)
* fixed nullability checking to be threadsafe * use volatile write for nullability cache Co-authored-by: Stephen Toub <[email protected]> * made read of nullability cache volatile and fixed other PR comments * made non-lock-protected reads from SymbolicRegexBuilder._delta volatile * Apply suggestions from code review Co-authored-by: Stephen Toub <[email protected]>
1 parent f7be57f commit 1e48eca

File tree

5 files changed

+121
-72
lines changed

5 files changed

+121
-72
lines changed

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/CharKind.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,12 @@ internal static class CharKind
2929
/// <summary>Gets the next character kind from a context</summary>
3030
internal static uint Next(uint context) => context >> 3;
3131

32-
/// <summary>Creates the context of the previous and the next character kinds.</summary>
32+
/// <summary>Encodes the pair (prevKind, nextKind) using 6 bits</summary>
3333
internal static uint Context(uint prevKind, uint nextKind) => (nextKind << 3) | prevKind;
3434

35+
/// <summary>Exclusive maximum context (limit) is 64 because a context uses bit-shifting where each kind needs 3 bits.</summary>
36+
internal const int ContextLimit = 64;
37+
3538
internal static string DescribePrev(uint i) => i switch
3639
{
3740
StartStop => @"\A",

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,7 @@ public DfaMatchingState<TSetType> TakeTransition(
359359
Debug.Assert(builder._delta is not null);
360360

361361
int offset = (currentState.Id << builder._mintermsCount) | mintermId;
362-
return
363-
builder._delta[offset] ??
364-
matcher.CreateNewTransition(currentState, minterm, offset);
362+
return Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(currentState, minterm, offset);
365363
}
366364
}
367365

@@ -391,7 +389,7 @@ public DfaMatchingState<TSetType> TakeTransition(
391389
DfaMatchingState<TSetType> nextStates = builder.MkState(oneState, currentStates.PrevCharKind);
392390

393391
int offset = (nextStates.Id << builder._mintermsCount) | mintermId;
394-
DfaMatchingState<TSetType> p = builder._delta[offset] ?? matcher.CreateNewTransition(nextStates, minterm, offset);
392+
DfaMatchingState<TSetType> p = Volatile.Read(ref builder._delta[offset]) ?? matcher.CreateNewTransition(nextStates, minterm, offset);
395393

396394
// Observe that if p.Node is an Or it will be flattened.
397395
union = builder.MkOr2(union, p.Node);

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs

Lines changed: 84 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ namespace System.Text.RegularExpressions.Symbolic
1313
internal sealed class SymbolicRegexNode<S> where S : notnull
1414
{
1515
internal const string EmptyCharClass = "[]";
16+
/// <summary>Some byte other than 0 to represent true</summary>
17+
internal const byte TrueByte = 1;
18+
/// <summary>Some byte other than 0 to represent false</summary>
19+
internal const byte FalseByte = 2;
20+
/// <summary>The undefined value is the default value 0</summary>
21+
internal const byte UndefinedByte = 0;
1622

1723
internal readonly SymbolicRegexBuilder<S> _builder;
1824
internal readonly SymbolicRegexKind _kind;
@@ -23,7 +29,11 @@ internal sealed class SymbolicRegexNode<S> where S : notnull
2329
internal readonly SymbolicRegexNode<S>? _right;
2430
internal readonly SymbolicRegexSet<S>? _alts;
2531

26-
private Dictionary<uint, bool>? _nullabilityCache;
32+
/// <summary>
33+
/// Caches nullability of this node for any given context (0 &lt;= context &lt; ContextLimit)
34+
/// when _info.StartsWithSomeAnchor and _info.CanBeNullable are true. Otherwise the cache is null.
35+
/// </summary>
36+
private byte[]? _nullabilityCache;
2737

2838
private S _startSet;
2939

@@ -50,6 +60,7 @@ private SymbolicRegexNode(SymbolicRegexBuilder<S> builder, SymbolicRegexKind kin
5060
_info = info;
5161
_hashcode = ComputeHashCode();
5262
_startSet = ComputeStartSet();
63+
_nullabilityCache = info.StartsWithSomeAnchor && info.CanBeNullable ? new byte[CharKind.ContextLimit] : null;
5364
}
5465

5566
private bool _isInternalizedUnion;
@@ -162,92 +173,100 @@ static void AppendToList(SymbolicRegexNode<S> concat, List<SymbolicRegexNode<S>>
162173
/// <param name="context">kind info for previous and next characters</param>
163174
internal bool IsNullableFor(uint context)
164175
{
165-
if (!_info.StartsWithSomeAnchor)
166-
return IsNullable;
167-
168-
if (!_info.CanBeNullable)
169-
return false;
176+
if (_nullabilityCache is null)
177+
{
178+
// if _nullabilityCache is null then IsNullable==CanBeNullable
179+
// Observe that if IsNullable==true then CanBeNullable==true.
180+
// but when the node does not start with an anchor
181+
// and IsNullable==false then CanBeNullable==false.
182+
return _info.IsNullable;
183+
}
170184

171185
if (!StackHelper.TryEnsureSufficientExecutionStack())
172186
{
173187
return StackHelper.CallOnEmptyStack(IsNullableFor, context);
174188
}
175189

176-
// Initialize the nullability cache for this node.
177-
_nullabilityCache ??= new Dictionary<uint, bool>();
190+
Debug.Assert(context < CharKind.ContextLimit);
178191

179-
if (!_nullabilityCache.TryGetValue(context, out bool is_nullable))
192+
// If nullablity has been computed for the given context then return it
193+
byte b = Volatile.Read(ref _nullabilityCache[context]);
194+
if (b != UndefinedByte)
180195
{
181-
switch (_kind)
182-
{
183-
case SymbolicRegexKind.Loop:
184-
Debug.Assert(_left is not null);
185-
is_nullable = _lower == 0 || _left.IsNullableFor(context);
186-
break;
196+
return b == TrueByte;
197+
}
187198

188-
case SymbolicRegexKind.Concat:
189-
Debug.Assert(_left is not null && _right is not null);
190-
is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
191-
break;
199+
// Otherwise compute the nullability recursively for the given context
200+
bool is_nullable;
201+
switch (_kind)
202+
{
203+
case SymbolicRegexKind.Loop:
204+
Debug.Assert(_left is not null);
205+
is_nullable = _lower == 0 || _left.IsNullableFor(context);
206+
break;
192207

193-
case SymbolicRegexKind.Or:
194-
case SymbolicRegexKind.And:
195-
Debug.Assert(_alts is not null);
196-
is_nullable = _alts.IsNullableFor(context);
197-
break;
208+
case SymbolicRegexKind.Concat:
209+
Debug.Assert(_left is not null && _right is not null);
210+
is_nullable = _left.IsNullableFor(context) && _right.IsNullableFor(context);
211+
break;
198212

199-
case SymbolicRegexKind.Not:
200-
Debug.Assert(_left is not null);
201-
is_nullable = !_left.IsNullableFor(context);
202-
break;
213+
case SymbolicRegexKind.Or:
214+
case SymbolicRegexKind.And:
215+
Debug.Assert(_alts is not null);
216+
is_nullable = _alts.IsNullableFor(context);
217+
break;
203218

204-
case SymbolicRegexKind.StartAnchor:
205-
is_nullable = CharKind.Prev(context) == CharKind.StartStop;
206-
break;
219+
case SymbolicRegexKind.Not:
220+
Debug.Assert(_left is not null);
221+
is_nullable = !_left.IsNullableFor(context);
222+
break;
207223

208-
case SymbolicRegexKind.EndAnchor:
209-
is_nullable = CharKind.Next(context) == CharKind.StartStop;
210-
break;
224+
case SymbolicRegexKind.StartAnchor:
225+
is_nullable = CharKind.Prev(context) == CharKind.StartStop;
226+
break;
211227

212-
case SymbolicRegexKind.BOLAnchor:
213-
// Beg-Of-Line anchor is nullable when the previous character is Newline or Start
214-
// note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
215-
is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
216-
break;
228+
case SymbolicRegexKind.EndAnchor:
229+
is_nullable = CharKind.Next(context) == CharKind.StartStop;
230+
break;
217231

218-
case SymbolicRegexKind.EOLAnchor:
219-
// End-Of-Line anchor is nullable when the next character is Newline or Stop
220-
// note: at least one of the bits must be 1, but both could also be 1 in case of \Z
221-
is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
222-
break;
232+
case SymbolicRegexKind.BOLAnchor:
233+
// Beg-Of-Line anchor is nullable when the previous character is Newline or Start
234+
// note: at least one of the bits must be 1, but both could also be 1 in case of very first newline
235+
is_nullable = (CharKind.Prev(context) & CharKind.NewLineS) != 0;
236+
break;
223237

224-
case SymbolicRegexKind.WBAnchor:
225-
// test that prev char is word letter iff next is not not word letter
226-
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
227-
break;
238+
case SymbolicRegexKind.EOLAnchor:
239+
// End-Of-Line anchor is nullable when the next character is Newline or Stop
240+
// note: at least one of the bits must be 1, but both could also be 1 in case of \Z
241+
is_nullable = (CharKind.Next(context) & CharKind.NewLineS) != 0;
242+
break;
228243

229-
case SymbolicRegexKind.NWBAnchor:
230-
// test that prev char is word letter iff next is word letter
231-
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
232-
break;
244+
case SymbolicRegexKind.WBAnchor:
245+
// test that prev char is word letter iff next is not not word letter
246+
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) != 0;
247+
break;
233248

234-
case SymbolicRegexKind.EndAnchorZ:
235-
// \Z anchor is nullable when the next character is either the last Newline or Stop
236-
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
237-
is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
238-
break;
249+
case SymbolicRegexKind.NWBAnchor:
250+
// test that prev char is word letter iff next is word letter
251+
is_nullable = ((CharKind.Prev(context) & CharKind.WordLetter) ^ (CharKind.Next(context) & CharKind.WordLetter)) == 0;
252+
break;
239253

240-
default: //SymbolicRegexKind.EndAnchorZRev:
241-
// EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
242-
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
243-
Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
244-
is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
245-
break;
246-
}
254+
case SymbolicRegexKind.EndAnchorZ:
255+
// \Z anchor is nullable when the next character is either the last Newline or Stop
256+
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
257+
is_nullable = (CharKind.Next(context) & CharKind.StartStop) != 0;
258+
break;
247259

248-
_nullabilityCache[context] = is_nullable;
260+
default: // SymbolicRegexKind.EndAnchorZRev:
261+
// EndAnchorZRev (rev(\Z)) anchor is nullable when the prev character is either the first Newline or Start
262+
// note: CharKind.NewLineS == CharKind.Newline|CharKind.StartStop
263+
Debug.Assert(_kind == SymbolicRegexKind.EndAnchorZRev);
264+
is_nullable = (CharKind.Prev(context) & CharKind.StartStop) != 0;
265+
break;
249266
}
250267

268+
Volatile.Write(ref _nullabilityCache[context], is_nullable ? TrueByte : FalseByte);
269+
251270
return is_nullable;
252271
}
253272

src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,5 +1827,33 @@ public async Task UseRegexConcurrently_ThreadSafe_Success(RegexEngine engine, Ti
18271827
}, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default)).ToArray());
18281828
}
18291829
}
1830+
1831+
[Theory]
1832+
[MemberData(nameof(MatchWordsInAnchoredRegexes_TestData))]
1833+
public async Task MatchWordsInAnchoredRegexes(RegexEngine engine, RegexOptions options, string pattern, string input, (int, int)[] matches)
1834+
{
1835+
// The aim of these test is to test corner cases of matches involving anchors
1836+
// For NonBacktracking these tests are meant to
1837+
// cover most contexts in _nullabilityForContext in SymbolicRegexNode
1838+
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
1839+
MatchCollection ms = r.Matches(input);
1840+
Assert.Equal(matches.Length, ms.Count);
1841+
for (int i = 0; i < matches.Length; i++)
1842+
{
1843+
Assert.Equal(ms[i].Index, matches[i].Item1);
1844+
Assert.Equal(ms[i].Length, matches[i].Item2);
1845+
}
1846+
}
1847+
1848+
public static IEnumerable<object[]> MatchWordsInAnchoredRegexes_TestData()
1849+
{
1850+
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
1851+
{
1852+
yield return new object[] { engine, RegexOptions.None, @"\b\w{10,}\b", "this is a complicated word in a\nnontrivial sentence", new (int, int)[] { (10, 11), (32, 10) } };
1853+
yield return new object[] { engine, RegexOptions.Multiline, @"^\w{10,}\b", "this is a\ncomplicated word in a\nnontrivial sentence", new (int, int)[] { (10, 11), (32, 10) } };
1854+
yield return new object[] { engine, RegexOptions.None, @"\b\d{1,2}\/\d{1,2}\/\d{2,4}\b", "date 10/12/1966 and 10/12/66 are the same", new (int, int)[] { (5, 10), (20, 8) } };
1855+
yield return new object[] { engine, RegexOptions.Multiline, @"\b\d{1,2}\/\d{1,2}\/\d{2,4}$", "date 10/12/1966\nand 10/12/66\nare the same", new (int, int)[] { (5, 10), (20, 8) } };
1856+
}
1857+
}
18301858
}
18311859
}

src/libraries/System.Text.RegularExpressions/tests/RegexExperiment.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -520,8 +520,9 @@ public void TestConjunctionOverCounting(string conjunct1, string conjunct2, stri
520520
Assert.Contains("conditional", e.Message);
521521
}
522522
}
523+
#endregion
523524

524-
525+
#region Random input generation tests
525526
public static IEnumerable<object[]> GenerateRandomMembers_TestData()
526527
{
527528
string[] patterns = new string[] { @"pa[5\$s]{2}w[o0]rd$", @"\w\d+", @"\d{10}" };
@@ -536,7 +537,7 @@ public static IEnumerable<object[]> GenerateRandomMembers_TestData()
536537
{
537538
foreach (string input in inputs)
538539
{
539-
yield return new object[] {engine, pattern, input, !negative };
540+
yield return new object[] { engine, pattern, input, !negative };
540541
}
541542
}
542543
}

0 commit comments

Comments
 (0)