Skip to content

Commit c28bec4

Browse files
authored
Add an additional length check to default FrozenDictionary and FrozenSet (#92546)
* Add an additional length check to FrozenDictionary and FrozenSet On construction of the collection, we compute an unsigned long which is effectively 64 boolean flags, each representing the presence of a key string of a particular length (mod 64). When reading from the collection, we can exit early if the key being tested does not map to a bit which has been switched on by the original computation. I believe this has similarities to how Bloom Filters work. This adds a relatively small cost on creation of the collection as small cost to each read operation. However it can speed up reads with certain data patterns especially when the difference between the maximum and minimum key length is large but there aren't many different lengths.
1 parent 6de7549 commit c28bec4

10 files changed

+72
-30
lines changed

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,12 @@ private static FrozenDictionary<TKey, TValue> CreateFromDictionary<TKey, TValue>
161161

162162
// Calculate the minimum and maximum lengths of the strings in the dictionary. Several of the analyses need this.
163163
int minLength = int.MaxValue, maxLength = 0;
164+
ulong lengthFilter = 0;
164165
foreach (string key in keys)
165166
{
166167
if (key.Length < minLength) minLength = key.Length;
167168
if (key.Length > maxLength) maxLength = key.Length;
169+
lengthFilter |= (1UL << (key.Length % 64));
168170
}
169171
Debug.Assert(minLength >= 0 && maxLength >= minLength);
170172

@@ -215,12 +217,12 @@ private static FrozenDictionary<TKey, TValue> CreateFromDictionary<TKey, TValue>
215217
if (analysis.IgnoreCase)
216218
{
217219
frozenDictionary = analysis.AllAsciiIfIgnoreCase
218-
? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff)
219-
: new OrdinalStringFrozenDictionary_FullCaseInsensitive<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
220+
? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter)
221+
: new OrdinalStringFrozenDictionary_FullCaseInsensitive<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
220222
}
221223
else
222224
{
223-
frozenDictionary = new OrdinalStringFrozenDictionary_Full<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
225+
frozenDictionary = new OrdinalStringFrozenDictionary_Full<TValue>(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
224226
}
225227
}
226228

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenSet.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,12 @@ private static FrozenSet<T> CreateFromSet<T>(HashSet<T> source)
109109

110110
// Calculate the minimum and maximum lengths of the strings in the set. Several of the analyses need this.
111111
int minLength = int.MaxValue, maxLength = 0;
112+
ulong lengthFilter = 0;
112113
foreach (string s in entries)
113114
{
114115
if (s.Length < minLength) minLength = s.Length;
115116
if (s.Length > maxLength) maxLength = s.Length;
117+
lengthFilter |= (1UL << (s.Length % 64));
116118
}
117119
Debug.Assert(minLength >= 0 && maxLength >= minLength);
118120

@@ -163,12 +165,12 @@ private static FrozenSet<T> CreateFromSet<T>(HashSet<T> source)
163165
if (analysis.IgnoreCase)
164166
{
165167
frozenSet = analysis.AllAsciiIfIgnoreCase
166-
? new OrdinalStringFrozenSet_FullCaseInsensitiveAscii(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff)
167-
: new OrdinalStringFrozenSet_FullCaseInsensitive(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
168+
? new OrdinalStringFrozenSet_FullCaseInsensitiveAscii(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter)
169+
: new OrdinalStringFrozenSet_FullCaseInsensitive(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
168170
}
169171
else
170172
{
171-
frozenSet = new OrdinalStringFrozenSet_Full(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff);
173+
frozenSet = new OrdinalStringFrozenSet_Full(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter);
172174
}
173175
}
174176

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary.cs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ internal OrdinalStringFrozenDictionary(
6464
private protected int HashCount { get; }
6565
private protected abstract bool Equals(string? x, string? y);
6666
private protected abstract int GetHashCode(string s);
67+
private protected virtual bool CheckLengthQuick(string key) => true;
6768
private protected override string[] KeysCore => _keys;
6869
private protected override TValue[] ValuesCore => _values;
6970
private protected override Enumerator GetEnumeratorCore() => new Enumerator(_keys, _values);
@@ -74,20 +75,23 @@ private protected override ref readonly TValue GetValueRefOrNullRefCore(string k
7475
{
7576
if ((uint)(key.Length - _minimumLength) <= (uint)_maximumLengthDiff)
7677
{
77-
int hashCode = GetHashCode(key);
78-
_hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex);
79-
80-
while (index <= endIndex)
78+
if (CheckLengthQuick(key))
8179
{
82-
if (hashCode == _hashTable.HashCodes[index])
80+
int hashCode = GetHashCode(key);
81+
_hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex);
82+
83+
while (index <= endIndex)
8384
{
84-
if (Equals(key, _keys[index]))
85+
if (hashCode == _hashTable.HashCodes[index])
8586
{
86-
return ref _values[index];
87+
if (Equals(key, _keys[index]))
88+
{
89+
return ref _values[index];
90+
}
8791
}
88-
}
8992

90-
index++;
93+
index++;
94+
}
9195
}
9296
}
9397

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_Full.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@ namespace System.Collections.Frozen
77
{
88
internal sealed class OrdinalStringFrozenDictionary_Full<TValue> : OrdinalStringFrozenDictionary<TValue>
99
{
10+
private readonly ulong _lengthFilter;
11+
1012
internal OrdinalStringFrozenDictionary_Full(
1113
string[] keys,
1214
TValue[] values,
1315
IEqualityComparer<string> comparer,
1416
int minimumLength,
15-
int maximumLengthDiff)
17+
int maximumLengthDiff,
18+
ulong lengthFilter)
1619
: base(keys, values, comparer, minimumLength, maximumLengthDiff)
1720
{
21+
_lengthFilter = lengthFilter;
1822
}
1923

2024
// This override is necessary to force the jit to emit the code in such a way that it
@@ -24,5 +28,6 @@ internal OrdinalStringFrozenDictionary_Full(
2428

2529
private protected override bool Equals(string? x, string? y) => string.Equals(x, y);
2630
private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(s.AsSpan());
31+
private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0;
2732
}
2833
}

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitive.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@ namespace System.Collections.Frozen
77
{
88
internal sealed class OrdinalStringFrozenDictionary_FullCaseInsensitive<TValue> : OrdinalStringFrozenDictionary<TValue>
99
{
10+
private readonly ulong _lengthFilter;
11+
1012
internal OrdinalStringFrozenDictionary_FullCaseInsensitive(
1113
string[] keys,
1214
TValue[] values,
1315
IEqualityComparer<string> comparer,
1416
int minimumLength,
15-
int maximumLengthDiff)
17+
int maximumLengthDiff,
18+
ulong lengthFilter)
1619
: base(keys, values, comparer, minimumLength, maximumLengthDiff)
1720
{
21+
_lengthFilter = lengthFilter;
1822
}
1923

2024
// This override is necessary to force the jit to emit the code in such a way that it
@@ -24,5 +28,6 @@ internal OrdinalStringFrozenDictionary_FullCaseInsensitive(
2428

2529
private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y);
2630
private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(s.AsSpan());
31+
private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0;
2732
}
2833
}

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@ namespace System.Collections.Frozen
77
{
88
internal sealed class OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii<TValue> : OrdinalStringFrozenDictionary<TValue>
99
{
10+
private readonly ulong _lengthFilter;
11+
1012
internal OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii(
1113
string[] keys,
1214
TValue[] values,
1315
IEqualityComparer<string> comparer,
1416
int minimumLength,
15-
int maximumLengthDiff)
17+
int maximumLengthDiff,
18+
ulong lengthFilter)
1619
: base(keys, values, comparer, minimumLength, maximumLengthDiff)
1720
{
21+
_lengthFilter = lengthFilter;
1822
}
1923

2024
// This override is necessary to force the jit to emit the code in such a way that it
@@ -24,5 +28,6 @@ internal OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii(
2428

2529
private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y);
2630
private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCaseAscii(s.AsSpan());
31+
private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0;
2732
}
2833
}

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet.cs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ internal OrdinalStringFrozenSet(
5454
private protected int HashCount { get; }
5555
private protected abstract bool Equals(string? x, string? y);
5656
private protected abstract int GetHashCode(string s);
57+
private protected virtual bool CheckLengthQuick(string key) => true;
5758
private protected override string[] ItemsCore => _items;
5859
private protected override Enumerator GetEnumeratorCore() => new Enumerator(_items);
5960
private protected override int CountCore => _hashTable.Count;
@@ -64,20 +65,23 @@ private protected override int FindItemIndex(string item)
6465
if (item is not null && // this implementation won't be used for null values
6566
(uint)(item.Length - _minimumLength) <= (uint)_maximumLengthDiff)
6667
{
67-
int hashCode = GetHashCode(item);
68-
_hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex);
69-
70-
while (index <= endIndex)
68+
if (CheckLengthQuick(item))
7169
{
72-
if (hashCode == _hashTable.HashCodes[index])
70+
int hashCode = GetHashCode(item);
71+
_hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex);
72+
73+
while (index <= endIndex)
7374
{
74-
if (Equals(item, _items[index]))
75+
if (hashCode == _hashTable.HashCodes[index])
7576
{
76-
return index;
77+
if (Equals(item, _items[index]))
78+
{
79+
return index;
80+
}
7781
}
78-
}
7982

80-
index++;
83+
index++;
84+
}
8185
}
8286
}
8387

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_Full.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@ namespace System.Collections.Frozen
77
{
88
internal sealed class OrdinalStringFrozenSet_Full : OrdinalStringFrozenSet
99
{
10+
private readonly ulong _lengthFilter;
11+
1012
internal OrdinalStringFrozenSet_Full(
1113
string[] entries,
1214
IEqualityComparer<string> comparer,
1315
int minimumLength,
14-
int maximumLengthDiff)
16+
int maximumLengthDiff,
17+
ulong lengthFilter)
1518
: base(entries, comparer, minimumLength, maximumLengthDiff)
1619
{
20+
_lengthFilter = lengthFilter;
1721
}
1822

1923
// This override is necessary to force the jit to emit the code in such a way that it
@@ -23,5 +27,6 @@ internal OrdinalStringFrozenSet_Full(
2327

2428
private protected override bool Equals(string? x, string? y) => string.Equals(x, y);
2529
private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(s.AsSpan());
30+
private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0;
2631
}
2732
}

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitive.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@ namespace System.Collections.Frozen
77
{
88
internal sealed class OrdinalStringFrozenSet_FullCaseInsensitive : OrdinalStringFrozenSet
99
{
10+
private readonly ulong _lengthFilter;
11+
1012
internal OrdinalStringFrozenSet_FullCaseInsensitive(
1113
string[] entries,
1214
IEqualityComparer<string> comparer,
1315
int minimumLength,
14-
int maximumLengthDiff)
16+
int maximumLengthDiff,
17+
ulong lengthFilter)
1518
: base(entries, comparer, minimumLength, maximumLengthDiff)
1619
{
20+
_lengthFilter = lengthFilter;
1721
}
1822

1923
// This override is necessary to force the jit to emit the code in such a way that it
@@ -23,5 +27,6 @@ internal OrdinalStringFrozenSet_FullCaseInsensitive(
2327

2428
private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y);
2529
private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(s.AsSpan());
30+
private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0;
2631
}
2732
}

src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitiveAscii.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@ namespace System.Collections.Frozen
77
{
88
internal sealed class OrdinalStringFrozenSet_FullCaseInsensitiveAscii : OrdinalStringFrozenSet
99
{
10+
private readonly ulong _lengthFilter;
11+
1012
internal OrdinalStringFrozenSet_FullCaseInsensitiveAscii(
1113
string[] entries,
1214
IEqualityComparer<string> comparer,
1315
int minimumLength,
14-
int maximumLengthDiff)
16+
int maximumLengthDiff,
17+
ulong lengthFilter)
1518
: base(entries, comparer, minimumLength, maximumLengthDiff)
1619
{
20+
_lengthFilter = lengthFilter;
1721
}
1822

1923
// This override is necessary to force the jit to emit the code in such a way that it
@@ -23,5 +27,6 @@ internal OrdinalStringFrozenSet_FullCaseInsensitiveAscii(
2327

2428
private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y);
2529
private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCaseAscii(s.AsSpan());
30+
private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0;
2631
}
2732
}

0 commit comments

Comments
 (0)