Skip to content

Commit 648c3cf

Browse files
russcamMpdreamz
authored andcommitted
Include similarity in index settings (#2924)
Similarity settings are nested under settings>index Rename DefaultSimilarity to ClassicSimilarity Add DFISimilarity Add similarity settings to create index integration tests Add XML comments for each similarity Refactor SimilarityJsonConverter to not look up the NEST ISimilarity type with Type.GetType() but to infer it from the type property value directly. Add Similarity to the updatable index settings Closes #2890
1 parent e92b893 commit 648c3cf

File tree

24 files changed

+500
-140
lines changed

24 files changed

+500
-140
lines changed

src/Nest/Aggregations/AggregationContainer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public static implicit operator AggregationDictionary(AggregationBase aggregator
4545

4646
public void Add(string key, AggregationContainer value) => this.BackingDictionary.Add(ValidateKey(key), value);
4747

48-
public override string ValidateKey(string key)
48+
protected override string ValidateKey(string key)
4949
{
5050
if (AggregateJsonConverter.AllReservedAggregationNames.Contains(key))
5151
throw new ArgumentException(

src/Nest/CommonAbstractions/DictionaryLike/IsADictionaryBase.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ public abstract class IsADictionaryBase<TKey, TValue> : IIsADictionary<TKey, TVa
1010
protected Dictionary<TKey, TValue> BackingDictionary { get; }
1111
private ICollection<KeyValuePair<TKey, TValue>> Self => BackingDictionary;
1212

13-
protected IsADictionaryBase() { this.BackingDictionary = new Dictionary<TKey, TValue>(); }
13+
protected IsADictionaryBase() => this.BackingDictionary = new Dictionary<TKey, TValue>();
1414

1515
protected IsADictionaryBase(IDictionary<TKey, TValue> backingDictionary)
1616
{
@@ -48,18 +48,18 @@ void ICollection<KeyValuePair<TKey, TValue>>.Add(KeyValuePair<TKey, TValue> item
4848
bool IDictionary<TKey, TValue>.Remove(TKey key) => this.BackingDictionary.Remove(key);
4949
bool IDictionary<TKey, TValue>.TryGetValue(TKey key, out TValue value) => this.BackingDictionary.TryGetValue(key, out value);
5050

51-
public virtual TKey ValidateKey(TKey key) => key;
51+
protected virtual TKey ValidateKey(TKey key) => key;
5252

5353
TValue IDictionary<TKey, TValue>.this[TKey key]
5454
{
55-
get { return this.BackingDictionary[key]; }
56-
set { this.BackingDictionary[ValidateKey(key)] = value; }
55+
get => this.BackingDictionary[key];
56+
set => this.BackingDictionary[ValidateKey(key)] = value;
5757
}
5858

5959
public TValue this[TKey key]
6060
{
61-
get { return this.BackingDictionary[key]; }
62-
set { this.BackingDictionary[ValidateKey(key)] = value; }
61+
get => this.BackingDictionary[key];
62+
set => this.BackingDictionary[ValidateKey(key)] = value;
6363
}
6464
}
6565
}

src/Nest/IndexModules/IndexSettings/IndexState.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@ public interface IIndexState
1313

1414
[JsonProperty("mappings")]
1515
IMappings Mappings { get; set; }
16-
17-
[JsonProperty("similarity")]
18-
ISimilarities Similarity { get; set; }
1916
}
2017

2118
public class IndexState : IIndexState
@@ -25,7 +22,5 @@ public class IndexState : IIndexState
2522
public IMappings Mappings { get; set; }
2623

2724
public IAliases Aliases { get; set; }
28-
29-
public ISimilarities Similarity { get; set; }
3025
}
3126
}

src/Nest/IndexModules/IndexSettings/Settings/DynamicIndexSettings.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ public interface IDynamicIndexSettings : IIsADictionary<string, object>
9191
/// Configure analysis
9292
/// </summary>
9393
IAnalysis Analysis { get; set; }
94+
95+
/// <summary>
96+
/// Configure similarity
97+
/// </summary>
98+
ISimilarities Similarity { get; set; }
9499
}
95100

96101
public class DynamicIndexSettings : IsADictionaryBase<string, object>, IDynamicIndexSettings
@@ -147,6 +152,9 @@ public DynamicIndexSettings(IDictionary<string, object> container) : base(contai
147152
/// <inheritdoc/>
148153
public IAnalysis Analysis { get; set; }
149154

155+
/// <inheritdoc/>
156+
public ISimilarities Similarity { get; set; }
157+
150158
/// <summary>
151159
/// Add any setting to the index
152160
/// </summary>
@@ -227,7 +235,12 @@ public TDescriptor Translog(Func<TranslogSettingsDescriptor, ITranslogSettings>
227235
public TDescriptor UnassignedNodeLeftDelayedTimeout(Time time) =>
228236
Assign(a => a.UnassignedNodeLeftDelayedTimeout = time);
229237

238+
/// <inheritdoc/>
230239
public TDescriptor Analysis(Func<AnalysisDescriptor, IAnalysis> selector) =>
231240
Assign(a => a.Analysis = selector?.Invoke(new AnalysisDescriptor()));
241+
242+
/// <inheritdoc/>
243+
public TDescriptor Similarity(Func<SimilaritiesDescriptor, IPromise<ISimilarities>> selector) =>
244+
Assign(a => a.Similarity = selector?.Invoke(new SimilaritiesDescriptor())?.Value);
232245
}
233246
}

src/Nest/IndexModules/IndexSettings/Settings/IndexSettingsConverter.cs

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ public override void WriteJson(JsonWriter writer, object value, JsonSerializer s
7575
d[UpdatableIndexSettings.SlowlogIndexingSource] = indexing?.Source;
7676

7777
d[UpdatableIndexSettings.Analysis] = ds.Analysis;
78+
d[UpdatableIndexSettings.Similarity] = ds.Similarity;
7879

7980
var indexSettings = value as IIndexSettings;
8081

@@ -94,7 +95,17 @@ public override void WriteJson(JsonWriter writer, object value, JsonSerializer s
9495
base.WriteJson(writer, d, serializer);
9596
}
9697

97-
private object AsArrayOrSingleItem<T>(IEnumerable<T> items)
98+
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
99+
{
100+
var s = new IndexSettings();
101+
SetKnownIndexSettings(reader, serializer, s);
102+
if (!typeof (IUpdateIndexSettingsRequest).IsAssignableFrom(objectType)) return s;
103+
104+
var request = new UpdateIndexSettingsRequest() { IndexSettings = s};
105+
return request;
106+
}
107+
108+
private static object AsArrayOrSingleItem<T>(IEnumerable<T> items)
98109
{
99110
if (items == null || !items.Any())
100111
return null;
@@ -105,29 +116,21 @@ private object AsArrayOrSingleItem<T>(IEnumerable<T> items)
105116
return items;
106117
}
107118

108-
public JObject Flatten(JObject original, string prefix = "", JObject newObject = null)
119+
private static JObject Flatten(JObject original, string prefix = "", JObject newObject = null)
109120
{
110121
newObject = newObject ?? new JObject();
111122
foreach (var property in original.Properties())
112123
{
113-
if (property.Value is JObject && property.Name != UpdatableIndexSettings.Analysis)
124+
if (property.Value is JObject &&
125+
property.Name != UpdatableIndexSettings.Analysis &&
126+
property.Name != UpdatableIndexSettings.Similarity)
114127
Flatten(property.Value.Value<JObject>(), prefix + property.Name + ".", newObject);
115128
else newObject.Add(prefix + property.Name, property.Value);
116129
}
117130
return newObject;
118131
}
119132

120-
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
121-
{
122-
var s = new IndexSettings();
123-
SetKnownIndexSettings(reader, serializer, s);
124-
if (!typeof (IUpdateIndexSettingsRequest).IsAssignableFrom(objectType)) return s;
125-
126-
var request = new UpdateIndexSettingsRequest() { IndexSettings = s};
127-
return request;
128-
}
129-
130-
private void SetKnownIndexSettings(JsonReader reader, JsonSerializer serializer, IIndexSettings s)
133+
private static void SetKnownIndexSettings(JsonReader reader, JsonSerializer serializer, IIndexSettings s)
131134
{
132135
var settings = Flatten(JObject.Load(reader)).Properties().ToDictionary(kv => kv.Name);
133136

@@ -219,6 +222,8 @@ private void SetKnownIndexSettings(JsonReader reader, JsonSerializer serializer,
219222
var setting = kv.Value;
220223
if (kv.Key == UpdatableIndexSettings.Analysis || kv.Key == "index.analysis")
221224
s.Analysis = setting.Value.Value<JObject>().ToObject<Analysis>(serializer);
225+
if (kv.Key == UpdatableIndexSettings.Similarity || kv.Key == "index.similarity")
226+
s.Similarity = setting.Value.Value<JObject>().ToObject<Similarities>(serializer);
222227
else
223228
{
224229
dict?.Add(kv.Key, serializer.Deserialize(kv.Value.Value.CreateReader()));

src/Nest/IndexModules/IndexSettings/Settings/UpdatableIndexSettings.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ public static class UpdatableIndexSettings
5050
public const string MergeSchedulerMaxThreadCount = "index.merge.scheduler.max_thread_count";
5151
public const string MergeSchedulerAutoThrottle = "index.merge.scheduler.auto_throttle";
5252

53+
public const string Similarity = "similarity";
54+
5355
public const string SlowlogSearchThresholdQueryWarn = "index.search.slowlog.threshold.query.warn";
5456
public const string SlowlogSearchThresholdQueryInfo = "index.search.slowlog.threshold.query.info";
5557
public const string SlowlogSearchThresholdQueryDebug = "index.search.slowlog.threshold.query.debug";

src/Nest/IndexModules/Similarity/BM25Similarity.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
namespace Nest
44
{
55
/// <summary>
6-
/// BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker, Susan Jones, Micheline Hancock-Beaulieu,
7-
/// and Mike Gatford. Okapi at TREC-3. In Proceedings of the Third Text REtrieval Conference (TREC 1994). Gaithersburg, USA, November 1994.
6+
/// BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker, Susan Jones, Micheline Hancock-Beaulieu,
7+
/// and Mike Gatford. Okapi at TREC-3. In Proceedings of the Third Text Retrieval Conference (TREC 1994). Gaithersburg, USA, November 1994.
88
/// </summary>
99
public interface IBM25Similarity : ISimilarity
1010
{
@@ -41,7 +41,7 @@ public class BM25Similarity : IBM25Similarity
4141
public bool? DiscountOverlaps { get; set; }
4242
}
4343
/// <inheritdoc/>
44-
public class BM25SimilarityDescriptor
44+
public class BM25SimilarityDescriptor
4545
: DescriptorBase<BM25SimilarityDescriptor, IBM25Similarity>, IBM25Similarity
4646
{
4747
string ISimilarity.Type => "BM25";
@@ -50,11 +50,11 @@ public class BM25SimilarityDescriptor
5050
double? IBM25Similarity.B { get; set; }
5151

5252
/// <inheritdoc/>
53-
public BM25SimilarityDescriptor DiscountOverlaps(bool? discount = true) => Assign(a => a.DiscountOverlaps = discount);
53+
public BM25SimilarityDescriptor DiscountOverlaps(bool discount = true) => Assign(a => a.DiscountOverlaps = discount);
5454
/// <inheritdoc/>
55-
public BM25SimilarityDescriptor K1(double? k1) => Assign(a => a.K1 = k1);
55+
public BM25SimilarityDescriptor K1(double k1) => Assign(a => a.K1 = k1);
5656
/// <inheritdoc/>
57-
public BM25SimilarityDescriptor B(double? b) => Assign(a => a.B = b);
57+
public BM25SimilarityDescriptor B(double b) => Assign(a => a.B = b);
5858
}
5959

6060
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
using Newtonsoft.Json;
2+
3+
namespace Nest
4+
{
5+
/// <summary>
6+
/// The classic similarity that is based on the TF/IDF model.
7+
/// </summary>
8+
public interface IClassicSimilarity : ISimilarity
9+
{
10+
/// <summary>
11+
/// Determines whether overlap tokens (tokens with 0 position increment) are ignored when computing norm.
12+
/// By default this is <c>true</c>, meaning overlap tokens do not count when computing norms.
13+
/// </summary>
14+
[JsonProperty("discount_overlaps")]
15+
bool? DiscountOverlaps { get; set; }
16+
}
17+
18+
/// <inheritdoc />
19+
public class ClassicSimilarity : IClassicSimilarity
20+
{
21+
public string Type => "classic";
22+
23+
/// <inheritdoc />
24+
public bool? DiscountOverlaps { get; set; }
25+
}
26+
27+
/// <inheritdoc />
28+
public class ClassicSimilarityDescriptor
29+
: DescriptorBase<ClassicSimilarityDescriptor, IClassicSimilarity>, IClassicSimilarity
30+
{
31+
string ISimilarity.Type => "classic";
32+
bool? IClassicSimilarity.DiscountOverlaps { get; set; }
33+
34+
/// <inheritdoc />
35+
public ClassicSimilarityDescriptor DiscountOverlaps(bool discount = true) => Assign(a => a.DiscountOverlaps = discount);
36+
}
37+
}

src/Nest/IndexModules/Similarity/CustomSimilarity.cs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,41 @@
44

55
namespace Nest
66
{
7+
/// <summary>
8+
/// A custom similarity
9+
/// </summary>
710
public interface ICustomSimilarity : ISimilarity, IIsADictionary<string, object> { }
811

912
/// <inheritdoc/>
1013
public class CustomSimilarity : IsADictionaryBase<string, object>, ICustomSimilarity
1114
{
12-
public string Type { get { return this["type"] as string; } set { this.Add("type", value); } }
15+
public string Type
16+
{
17+
get => this["type"] as string;
18+
set => this.Add("type", value);
19+
}
1320

1421
public CustomSimilarity(string type)
1522
{
1623
if (!string.IsNullOrEmpty(type)) this.Type = type;
1724
}
1825

1926
internal CustomSimilarity(IDictionary<string, object> container) : base(container) { }
27+
2028
internal CustomSimilarity(Dictionary<string, object> container)
21-
: base(container.Select(kv => kv).ToDictionary(kv => kv.Key, kv => kv.Value))
22-
{}
29+
: base(container.Select(kv => kv).ToDictionary(kv => kv.Key, kv => kv.Value)) { }
2330

2431
public void Add(string key, object value) => BackingDictionary.Add(key, value);
2532
}
33+
2634
/// <inheritdoc/>
2735
public class CustomSimilarityDescriptor
2836
: IsADictionaryDescriptorBase<CustomSimilarityDescriptor, ICustomSimilarity, string, object>
2937
{
30-
3138
public CustomSimilarityDescriptor() : base(new CustomSimilarity(string.Empty)) { }
3239

3340
internal CustomSimilarityDescriptor Type(string type) => Assign("type", type);
41+
3442
public CustomSimilarityDescriptor Add(string key, object value) => Assign(key, value);
3543
}
3644

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
using System.Runtime.Serialization;
2+
using Newtonsoft.Json;
3+
using Newtonsoft.Json.Converters;
4+
5+
namespace Nest
6+
{
7+
/// <summary>
8+
/// <see cref="IDFISimilarity"/> independence measure
9+
/// </summary>
10+
[JsonConverter(typeof(StringEnumConverter))]
11+
public enum DFIIndependenceMeasure
12+
{
13+
[EnumMember(Value = "standardized")]
14+
Standardized,
15+
16+
[EnumMember(Value = "saturated")]
17+
Saturated,
18+
19+
[EnumMember(Value = "chisquared")]
20+
ChiSquared
21+
}
22+
}

0 commit comments

Comments
 (0)