Skip to content

Commit

Permalink
Add range and regexp Intervals (#111465)
Browse files Browse the repository at this point in the history
Lucene/pull/13562 introduced IntervalsSource for range and
regexp queries. This exposes these features in ES.

This is done to achieve parity with Span queries that support
regexp and range.

Relates to #110491
  • Loading branch information
mayya-sharipova authored and javanna committed Aug 29, 2024
1 parent 40ddde2 commit 233c17a
Show file tree
Hide file tree
Showing 12 changed files with 774 additions and 3 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/111465.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 111465
summary: Add range and regexp Intervals
area: Search
type: enhancement
issues: []
66 changes: 66 additions & 0 deletions docs/reference/query-dsl/intervals-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ Valid rules include:
* <<intervals-match,`match`>>
* <<intervals-prefix,`prefix`>>
* <<intervals-wildcard,`wildcard`>>
* <<intervals-regexp,`regexp`>>
* <<intervals-fuzzy,`fuzzy`>>
* <<intervals-range,`range`>>
* <<intervals-all_of,`all_of`>>
* <<intervals-any_of,`any_of`>>
--
Expand Down Expand Up @@ -178,6 +180,36 @@ The `pattern` is normalized using the search analyzer from this field, unless
`analyzer` is specified separately.
--

[[intervals-regexp]]
==== `regexp` rule parameters

The `regexp` rule matches terms using a regular expression pattern.
This pattern can expand to match at most 128 terms.
If the pattern matches more than 128 terms,{es} returns an error.

`pattern`::
(Required, string) Regexp pattern used to find matching terms.
For a list of operators supported by the
`regexp` pattern, see <<regexp-syntax, Regular expression syntax>>.

WARNING: Avoid using wildcard patterns, such as `.*` or `.*?+``. This can
increase the iterations needed to find matching terms and slow search
performance.
--
`analyzer`::
(Optional, string) <<analysis, analyzer>> used to normalize the `pattern`.
Defaults to the top-level `<field>`'s analyzer.

`use_field`::
+
--
(Optional, string) If specified, match intervals from this field rather than the
top-level `<field>`.

The `pattern` is normalized using the search analyzer from this field, unless
`analyzer` is specified separately.
--

[[intervals-fuzzy]]
==== `fuzzy` rule parameters

Expand Down Expand Up @@ -214,6 +246,40 @@ The `term` is normalized using the search analyzer from this field, unless
`analyzer` is specified separately.
--

[[intervals-range]]
==== `range` rule parameters

The `range` rule matches terms contained within a provided range.
This range can expand to match at most 128 terms.
If the range matches more than 128 terms,{es} returns an error.

`gt`::
(Optional, string) Greater than: match terms greater than the provided term.

`gte`::
(Optional, string) Greater than or equal to: match terms greater than or
equal to the provided term.

`lt`::
(Optional, string) Less than: match terms less than the provided term.

`lte`::
(Optional, string) Less than or equal to: match terms less than or
equal to the provided term.

NOTE: It is required to provide one of `gt` or `gte` params.
It is required to provide one of `lt` or `lte` params.


`analyzer`::
(Optional, string) <<analysis, analyzer>> used to normalize the `pattern`.
Defaults to the top-level `<field>`'s analyzer.

`use_field`::
(Optional, string) If specified, match intervals from this field rather than the
top-level `<field>`.


[[intervals-all_of]]
==== `all_of` rule parameters

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,30 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
);
}

@Override
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
return toIntervalsSource(
Intervals.regexp(pattern),
new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
context
);
}

@Override
public IntervalsSource rangeIntervals(
BytesRef lowerTerm,
BytesRef upperTerm,
boolean includeLower,
boolean includeUpper,
SearchExecutionContext context
) {
return toIntervalsSource(
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper),
new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
context
);
}

@Override
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -476,3 +476,45 @@ setup:
- match: { hits.hits.0._id: "6" }
- match: { hits.hits.1._id: "5" }

---
"Test regexp":
- requires:
cluster_features: "gte_v8.16.0"
reason: "Implemented in 8.16"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cold
- regexp:
pattern: ou.*ide
- match: { hits.total.value: 3 }


---
"Test range":
- requires:
cluster_features: "gte_v8.16.0"
reason: "Implemented in 8.16"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cold
- range:
gte: out
lte: ouu
- match: { hits.total.value: 3 }

Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,30 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
);
}

/**
* Create a regexp {@link IntervalsSource} for the given pattern.
*/
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
throw new IllegalArgumentException(
"Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

/**
* Create a range {@link IntervalsSource} for the given ranges
*/
public IntervalsSource rangeIntervals(
BytesRef lowerTerm,
BytesRef upperTerm,
boolean includeLower,
boolean includeUpper,
SearchExecutionContext context
) {
throw new IllegalArgumentException(
"Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

/**
* An enum used to describe the relation between the range of terms in a
* shard when compared with a query range
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,22 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
throw new QueryShardException(context, fail("wildcard intervals query"));
}

@Override
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
throw new QueryShardException(context, fail("regexp intervals query"));
}

@Override
public IntervalsSource rangeIntervals(
BytesRef lowerTerm,
BytesRef upperTerm,
boolean includeLower,
boolean includeUpper,
SearchExecutionContext context
) {
throw new QueryShardException(context, fail("range intervals query"));
}

@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
throw new IllegalArgumentException(fail("aggregation or sorts"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,28 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
return Intervals.wildcard(pattern);
}

@Override
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
return Intervals.regexp(pattern);
}

@Override
public IntervalsSource rangeIntervals(
BytesRef lowerTerm,
BytesRef upperTerm,
boolean includeLower,
boolean includeUpper,
SearchExecutionContext context
) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper);
}

private void checkForPositions() {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery");
Expand Down
Loading

0 comments on commit 233c17a

Please sign in to comment.