elastic · mayya-sharipova · Aug 26, 2024 · Jul 30, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/docs/changelog/111465.yaml b/docs/changelog/111465.yaml
@@ -0,0 +1,5 @@
+pr: 111465
+summary: Add range and regexp Intervals
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc
@@ -73,7 +73,9 @@ Valid rules include:
 * <<intervals-match,`match`>>
 * <<intervals-prefix,`prefix`>>
 * <<intervals-wildcard,`wildcard`>>
+* <<intervals-regexp,`regexp`>>
 * <<intervals-fuzzy,`fuzzy`>>
+* <<intervals-range,`range`>>
 * <<intervals-all_of,`all_of`>>
 * <<intervals-any_of,`any_of`>>
 --
@@ -178,6 +180,36 @@ The `pattern` is normalized using the search analyzer from this field, unless
 `analyzer` is specified separately.
 --
 
+[[intervals-regexp]]
+==== `regexp` rule parameters
+
+The `regexp` rule matches terms using a regular expression pattern.
+This pattern can expand to match at most 128 terms.
+If the pattern matches more than 128 terms,{es} returns an error.
+
+`pattern`::
+(Required, string) Regexp pattern used to find matching terms.
+For a list of operators supported by the
+`regexp` pattern, see <<regexp-syntax, Regular expression syntax>>.
+
+WARNING: Avoid using wildcard patterns, such as `.*` or `.*?+``. This can
+increase the iterations needed to find matching terms and slow search
+performance.
+--
+`analyzer`::
+(Optional, string) <<analysis, analyzer>> used to normalize the `pattern`.
+Defaults to the top-level `<field>`'s analyzer.
+
+`use_field`::
++
+--
+(Optional, string) If specified, match intervals from this field rather than the
+top-level `<field>`.
+
+The `pattern` is normalized using the search analyzer from this field, unless
+`analyzer` is specified separately.
+--
+
 [[intervals-fuzzy]]
 ==== `fuzzy` rule parameters
 
@@ -214,6 +246,40 @@ The `term` is normalized using the search analyzer from this field, unless
 `analyzer` is specified separately.
 --
 
+[[intervals-range]]
+==== `range` rule parameters
+
+The `range` rule matches terms contained within a provided range.
+This range can expand to match at most 128 terms.
+If the range matches more than 128 terms,{es} returns an error.
+
+`gt`::
+(Optional, string) Greater than: match terms greater than the provided term.
+
+`gte`::
+(Optional, string) Greater than or equal to: match terms greater than or
+equal to the provided term.
+
+`lt`::
+(Optional, string) Less than: match terms less than the provided term.
+
+`lte`::
+(Optional, string) Less than or equal to: match terms less than or
+equal to the provided term.
+
+NOTE: It is required to provide one of `gt` or `gte` params.
+It is required to provide one of `lt` or `lte` params.
+
+
+`analyzer`::
+(Optional, string) <<analysis, analyzer>> used to normalize the `pattern`.
+Defaults to the top-level `<field>`'s analyzer.
+
+`use_field`::
+(Optional, string) If specified, match intervals from this field rather than the
+top-level `<field>`.
+
+
 [[intervals-all_of]]
 ==== `all_of` rule parameters
 

diff --git a/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -304,6 +304,30 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
  );
  }
 
+ @Override
+ public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
+ return toIntervalsSource(
+ Intervals.regexp(pattern),
+ new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
+ context
+ );
+ }
+
+ @Override
+ public IntervalsSource rangeIntervals(
+ BytesRef lowerTerm,
+ BytesRef upperTerm,
+ boolean includeLower,
+ boolean includeUpper,
+ SearchExecutionContext context
+ ) {
+ return toIntervalsSource(
+ Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper),
+ new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
+ context
+ );
+ }
+
  @Override
  public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
  throws IOException {

diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml
@@ -476,3 +476,45 @@ setup:
  - match: { hits.hits.0._id: "6" }
  - match: { hits.hits.1._id: "5" }
 
+---
+"Test regexp":
+ - requires:
+ cluster_features: "gte_v8.16.0"
+ reason: "Implemented in 8.16"
+ - do:
+ search:
+ index: test
+ body:
+ query:
+ intervals:
+ text:
+ all_of:
+ intervals:
+ - match:
+ query: cold
+ - regexp:
+ pattern: ou.*ide
+ - match: { hits.total.value: 3 }
+
+
+---
+"Test range":
+ - requires:
+ cluster_features: "gte_v8.16.0"
+ reason: "Implemented in 8.16"
+ - do:
+ search:
+ index: test
+ body:
+ query:
+ intervals:
+ text:
+ all_of:
+ intervals:
+ - match:
+ query: cold
+ - range:
+ gte: out
+ lte: ouu
+ - match: { hits.total.value: 3 }
+
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java
@@ -443,6 +443,30 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
  );
  }
 
+ /**
+ * Create a regexp {@link IntervalsSource} for the given pattern.
+ */
+ public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
+ throw new IllegalArgumentException(
+ "Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
+ );
+ }
+
+ /**
+ * Create a range {@link IntervalsSource} for the given ranges
+ */
+ public IntervalsSource rangeIntervals(
+ BytesRef lowerTerm,
+ BytesRef upperTerm,
+ boolean includeLower,
+ boolean includeUpper,
+ SearchExecutionContext context
+ ) {
+ throw new IllegalArgumentException(
+ "Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
+ );
+ }
+
  /**
  * An enum used to describe the relation between the range of terms in a
  * shard when compared with a query range

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/PlaceHolderFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/PlaceHolderFieldMapper.java
@@ -247,6 +247,22 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
  throw new QueryShardException(context, fail("wildcard intervals query"));
  }
 
+ @Override
+ public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
+ throw new QueryShardException(context, fail("regexp intervals query"));
+ }
+
+ @Override
+ public IntervalsSource rangeIntervals(
+ BytesRef lowerTerm,
+ BytesRef upperTerm,
+ boolean includeLower,
+ boolean includeUpper,
+ SearchExecutionContext context
+ ) {
+ throw new QueryShardException(context, fail("range intervals query"));
+ }
+
  @Override
  public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
  throw new IllegalArgumentException(fail("aggregation or sorts"));

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -848,6 +848,28 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
  return Intervals.wildcard(pattern);
  }
 
+ @Override
+ public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
+ if (getTextSearchInfo().hasPositions() == false) {
+ throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
+ }
+ return Intervals.regexp(pattern);
+ }
+
+ @Override
+ public IntervalsSource rangeIntervals(
+ BytesRef lowerTerm,
+ BytesRef upperTerm,
+ boolean includeLower,
+ boolean includeUpper,
+ SearchExecutionContext context
+ ) {
+ if (getTextSearchInfo().hasPositions() == false) {
+ throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
+ }
+ return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper);
+ }
+
  private void checkForPositions() {
  if (getTextSearchInfo().hasPositions() == false) {
  throw new IllegalStateException("field:[" + name() + "] was indexed without position data; cannot run PhraseQuery");