Skip to content

Commit 2eeb718

Browse files
authored
MultiRange query for SortedNumeric DocValues (#14404)
* Numeric Multi-Range DocValues query
1 parent 7cbe0fa commit 2eeb718

File tree

4 files changed

+475
-5
lines changed

4 files changed

+475
-5
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ API Changes
7070

7171
New Features
7272
---------------------
73-
(No changes)
73+
* GITHUB#14404: Introducing DocValuesMultiRangeQuery.SortedNumericStabbingBuilder into sandbox.
74+
(Mikhail Khludnev)
7475

7576
Improvements
7677
---------------------

lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesMultiRangeQuery.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.util.ArrayList;
2020
import java.util.List;
2121
import java.util.Objects;
22+
import org.apache.lucene.document.SortedNumericDocValuesField;
2223
import org.apache.lucene.document.SortedSetDocValuesField;
2324
import org.apache.lucene.search.*;
2425
import org.apache.lucene.util.BytesRef;
@@ -72,6 +73,42 @@ public String toString() {
7273
}
7374
}
7475

76+
/** Representation of a single clause in a MultiRangeQuery */
77+
public static class LongRange {
78+
protected long lower;
79+
protected long upper;
80+
81+
/** copies ByteRefs passed */
82+
public LongRange(long lowerValue, long upperValue) {
83+
this.lower = lowerValue;
84+
this.upper = upperValue;
85+
}
86+
87+
@Override
88+
public boolean equals(Object o) {
89+
if (this == o) {
90+
return true;
91+
}
92+
if (o == null || getClass() != o.getClass()) {
93+
return false;
94+
}
95+
LongRange that = (LongRange) o;
96+
return lower == that.lower && upper == that.upper;
97+
}
98+
99+
@Override
100+
public int hashCode() {
101+
int result = (int) lower;
102+
result = (int) (31L * result + upper);
103+
return result;
104+
}
105+
106+
@Override
107+
public String toString() {
108+
return lower + ".." + upper;
109+
}
110+
}
111+
75112
/**
76113
* Builder for creating a multi-range query for stabbing by SortedSet or Sorted field values. For
77114
* example, it matches IPs in docvalues field by multiple IP ranges. For the single range it
@@ -114,4 +151,39 @@ protected Query createSortedSetDocValuesMultiRangeQuery() {
114151
return new SortedSetDocValuesMultiRangeQuery(fieldName, clauses);
115152
}
116153
}
154+
155+
/**
156+
* Builder for creating a multi-range query for stabbing by SortedNumerics or Numerics field
157+
* values. For the single range it behaves like {@link
158+
* SortedNumericDocValuesField#newSlowRangeQuery(String, long, long)}
159+
*/
160+
public static class SortedNumericStabbingBuilder {
161+
protected final String fieldName;
162+
protected final List<LongRange> clauses = new ArrayList<>();
163+
164+
public SortedNumericStabbingBuilder(String fieldName) {
165+
this.fieldName = Objects.requireNonNull(fieldName);
166+
}
167+
168+
public SortedNumericStabbingBuilder add(long lowerValue, long upperValue) {
169+
clauses.add(new LongRange(lowerValue, upperValue));
170+
return this;
171+
}
172+
173+
public Query build() {
174+
if (clauses.isEmpty()) {
175+
return new MatchNoDocsQuery();
176+
}
177+
if (clauses.size() == 1) {
178+
LongRange theOnlyOne = clauses.getFirst();
179+
return SortedNumericDocValuesField.newSlowRangeQuery(
180+
fieldName, theOnlyOne.lower, theOnlyOne.upper);
181+
}
182+
return createSortedNumericDocValuesMultiRangeQuery();
183+
}
184+
185+
protected Query createSortedNumericDocValuesMultiRangeQuery() {
186+
return new SortedNumericDocValuesMultiRangeQuery(fieldName, clauses);
187+
}
188+
}
117189
}
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.lucene.sandbox.search;
18+
19+
import java.io.IOException;
20+
import java.util.Collection;
21+
import java.util.Comparator;
22+
import java.util.Iterator;
23+
import java.util.List;
24+
import java.util.NavigableSet;
25+
import java.util.Objects;
26+
import java.util.TreeSet;
27+
import org.apache.lucene.index.DocValues;
28+
import org.apache.lucene.index.DocValuesSkipper;
29+
import org.apache.lucene.index.LeafReaderContext;
30+
import org.apache.lucene.index.SortedNumericDocValues;
31+
import org.apache.lucene.search.ConstantScoreScorerSupplier;
32+
import org.apache.lucene.search.ConstantScoreWeight;
33+
import org.apache.lucene.search.DocValuesRangeIterator;
34+
import org.apache.lucene.search.IndexSearcher;
35+
import org.apache.lucene.search.Query;
36+
import org.apache.lucene.search.QueryVisitor;
37+
import org.apache.lucene.search.ScoreMode;
38+
import org.apache.lucene.search.ScorerSupplier;
39+
import org.apache.lucene.search.TwoPhaseIterator;
40+
import org.apache.lucene.search.Weight;
41+
import org.apache.lucene.util.PriorityQueue;
42+
43+
/**
44+
* A union multiple ranges over SortedNumericDocValuesField
45+
*
46+
* @lucene.experimental
47+
*/
48+
public class SortedNumericDocValuesMultiRangeQuery extends Query {
49+
50+
protected final String fieldName;
51+
protected final NavigableSet<DocValuesMultiRangeQuery.LongRange> sortedClauses;
52+
53+
protected SortedNumericDocValuesMultiRangeQuery(
54+
String fieldName, List<DocValuesMultiRangeQuery.LongRange> clauses) {
55+
this.fieldName = fieldName;
56+
sortedClauses = resolveOverlaps(clauses);
57+
}
58+
59+
private static final class Edge {
60+
private final DocValuesMultiRangeQuery.LongRange range;
61+
private final boolean point;
62+
private final boolean upper;
63+
64+
private static Edge createPoint(DocValuesMultiRangeQuery.LongRange r) {
65+
return new Edge(r);
66+
}
67+
68+
long getValue() {
69+
return upper ? range.upper : range.lower;
70+
}
71+
72+
private Edge(DocValuesMultiRangeQuery.LongRange range, boolean upper) {
73+
this.range = range;
74+
this.upper = upper;
75+
this.point = false;
76+
}
77+
78+
/** expecting Arrays.equals(lower.bytes,upper.bytes) i.e. point */
79+
private Edge(DocValuesMultiRangeQuery.LongRange range) {
80+
this.range = range;
81+
this.upper = false;
82+
this.point = true;
83+
}
84+
}
85+
86+
/** Merges overlapping ranges. map.floor() doesn't work with overlaps */
87+
private static NavigableSet<DocValuesMultiRangeQuery.LongRange> resolveOverlaps(
88+
Collection<DocValuesMultiRangeQuery.LongRange> clauses) {
89+
NavigableSet<DocValuesMultiRangeQuery.LongRange> sortedClauses =
90+
new TreeSet<>(
91+
Comparator.comparing(r -> r.lower)
92+
// .thenComparing(r -> r.upper)// have to ignore upper boundary for .floor() lookups
93+
);
94+
PriorityQueue<Edge> heap =
95+
new PriorityQueue<>(clauses.size() * 2) {
96+
@Override
97+
protected boolean lessThan(Edge a, Edge b) {
98+
return a.getValue() - b.getValue() < 0;
99+
}
100+
};
101+
for (DocValuesMultiRangeQuery.LongRange r : clauses) {
102+
long cmp = r.lower - r.upper;
103+
if (cmp == 0) {
104+
heap.add(Edge.createPoint(r));
105+
} else {
106+
if (cmp < 0) {
107+
heap.add(new Edge(r, false));
108+
heap.add(new Edge(r, true));
109+
} // else drop reverse ranges
110+
}
111+
}
112+
int totalEdges = heap.size();
113+
int depth = 0;
114+
Edge started = null;
115+
for (int i = 0; i < totalEdges; i++) {
116+
Edge smallest = heap.pop();
117+
if (depth == 0 && smallest.point) {
118+
if (i < totalEdges - 1 && heap.top().point) { // repeating same points
119+
if (smallest.getValue() == heap.top().getValue()) {
120+
continue;
121+
}
122+
}
123+
sortedClauses.add(smallest.range);
124+
}
125+
if (!smallest.point) {
126+
if (!smallest.upper) {
127+
depth++;
128+
if (depth == 1) { // just started
129+
started = smallest;
130+
}
131+
} else {
132+
depth--;
133+
if (depth == 0) {
134+
sortedClauses.add(
135+
started.range == smallest.range // no overlap case, the most often
136+
? smallest.range
137+
: new DocValuesMultiRangeQuery.LongRange(
138+
started.getValue(), smallest.getValue()));
139+
started = null;
140+
}
141+
}
142+
}
143+
}
144+
return sortedClauses;
145+
}
146+
147+
@Override
148+
public String toString(String fld) {
149+
return (Objects.equals(fieldName, fld) ? "" : fieldName + ":") + sortedClauses;
150+
}
151+
152+
@Override
153+
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
154+
throws IOException {
155+
return new MultiRangeWeight(boost, scoreMode);
156+
}
157+
158+
@Override
159+
public void visit(QueryVisitor visitor) {
160+
if (visitor.acceptField(fieldName)) {
161+
visitor.visitLeaf(this);
162+
}
163+
}
164+
165+
@Override
166+
public boolean equals(Object o) {
167+
if (this == o) return true;
168+
if (o == null || getClass() != o.getClass()) return false;
169+
SortedNumericDocValuesMultiRangeQuery that = (SortedNumericDocValuesMultiRangeQuery) o;
170+
return Objects.equals(fieldName, that.fieldName)
171+
// && Objects.equals(sortedClauses, that.sortedClauses)
172+
&& upperBoundWiseEquals(sortedClauses, that.sortedClauses);
173+
}
174+
175+
/**
176+
* TreeSet.equals is ruled by {@linkplain Comparator} logic. This comparator have to be upper
177+
* bound agnostic to support floor() lookups. However, equals() should be upper bound sensitive
178+
* and here we ensure that.
179+
*/
180+
private boolean upperBoundWiseEquals(
181+
NavigableSet<DocValuesMultiRangeQuery.LongRange> left,
182+
NavigableSet<DocValuesMultiRangeQuery.LongRange> right) {
183+
for (Iterator<DocValuesMultiRangeQuery.LongRange> li = left.iterator(), ri = right.iterator();
184+
li.hasNext() && ri.hasNext(); ) {
185+
if (!li.next().equals(ri.next()) || li.hasNext() != ri.hasNext()) {
186+
return false;
187+
}
188+
}
189+
return true;
190+
}
191+
192+
@Override
193+
public int hashCode() {
194+
return Objects.hash(fieldName, sortedClauses);
195+
}
196+
197+
/** Weight for {@linkplain SortedNumericDocValuesMultiRangeQuery} */
198+
protected class MultiRangeWeight extends ConstantScoreWeight {
199+
final ScoreMode scoreMode;
200+
201+
public MultiRangeWeight(float boost, ScoreMode scoreMode) {
202+
super(SortedNumericDocValuesMultiRangeQuery.this, boost);
203+
this.scoreMode = scoreMode;
204+
}
205+
206+
@Override
207+
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
208+
if (context.reader().getFieldInfos().fieldInfo(fieldName) == null) {
209+
return null;
210+
}
211+
long lowerValue = sortedClauses.getFirst().lower;
212+
long upperValue = sortedClauses.getLast().upper;
213+
int maxDoc = context.reader().maxDoc();
214+
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(fieldName);
215+
if (skipper != null) {
216+
if (skipper.minValue() > upperValue || skipper.maxValue() < lowerValue) {
217+
return null;
218+
}
219+
}
220+
221+
SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), fieldName);
222+
TwoPhaseIterator iterator;
223+
iterator =
224+
new TwoPhaseIterator(values) {
225+
final DocValuesMultiRangeQuery.LongRange lookupVal =
226+
new DocValuesMultiRangeQuery.LongRange(-Long.MAX_VALUE, -Long.MAX_VALUE);
227+
228+
@Override
229+
public boolean matches() throws IOException {
230+
NavigableSet<DocValuesMultiRangeQuery.LongRange> rangeTree = sortedClauses;
231+
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
232+
final long value = values.nextValue();
233+
if (value >= lowerValue && value <= upperValue) {
234+
lookupVal.lower = value;
235+
lookupVal.upper = value;
236+
DocValuesMultiRangeQuery.LongRange lessOrEq = rangeTree.floor(lookupVal);
237+
if (lessOrEq != null) {
238+
if (lessOrEq.upper >= value) {
239+
assert lessOrEq.lower <= value;
240+
return true;
241+
}
242+
assert lessOrEq.upper < value
243+
: "always true. prev range is over before the value";
244+
// cut range tree for greater values, if we'll look up then
245+
if (i < count - 1) {
246+
rangeTree = rangeTree.tailSet(lessOrEq, false);
247+
}
248+
} // else
249+
// lessOrEq == null - value before the first range
250+
}
251+
}
252+
return false; // all values were < lowerValue
253+
}
254+
255+
@Override
256+
public float matchCost() {
257+
return sortedClauses.size();
258+
}
259+
};
260+
if (skipper != null) {
261+
iterator =
262+
new DocValuesRangeIterator(
263+
iterator, skipper, lowerValue, upperValue, sortedClauses.size() > 1);
264+
}
265+
return ConstantScoreScorerSupplier.fromIterator(
266+
TwoPhaseIterator.asDocIdSetIterator(iterator), score(), scoreMode, maxDoc);
267+
}
268+
269+
@Override
270+
public boolean isCacheable(LeafReaderContext ctx) {
271+
return DocValues.isCacheable(ctx, fieldName);
272+
}
273+
}
274+
}

0 commit comments

Comments
 (0)