Skip to content

Commit

Permalink
Enable Search for each subfields
Browse files Browse the repository at this point in the history
Signed-off-by: Mingshi Liu <[email protected]>
  • Loading branch information
mingshl committed Feb 14, 2023
1 parent bf27aae commit 7763f4f
Show file tree
Hide file tree
Showing 3 changed files with 321 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.Nullable;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.xcontent.DeprecationHandler;
import org.opensearch.common.xcontent.NamedXContentRegistry;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.flatobject.xcontent.KeyValueJsonXContentParser;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.fielddata.IndexFieldData;
Expand Down Expand Up @@ -320,18 +323,19 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL
return new SourceValueFetcher(name(), context, nullValue) {
@Override
protected String parseSourceValue(Object value) {
String flatObjectkeywordValue = value.toString();
if (flatObjectkeywordValue.length() > ignoreAbove) {
String flatObjectKeywordValue = value.toString();

if (flatObjectKeywordValue.length() > ignoreAbove) {
return null;
}

NamedAnalyzer normalizer = normalizer();
if (normalizer == null) {
return flatObjectkeywordValue;
return flatObjectKeywordValue;
}

try {
return normalizeValue(normalizer, name(), flatObjectkeywordValue);
return normalizeValue(normalizer, name(), flatObjectKeywordValue);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
Expand Down Expand Up @@ -437,22 +441,41 @@ public FlatObjectFieldType fieldType() {

@Override
protected void parseCreateField(ParseContext context) throws IOException {
String value;
// default not having external value set
String value = null;
String fieldName;

if (context.externalValueSet()) {
value = context.externalValue().toString();
ParseValueAddFields(context, value);
} else {
logger.info("\n check the context.doc without parser:" + context.doc() + "\n");
/**
* To be determined to write a new parser to read
* context and tokenized into key-value pairs
* option 1:catalog.title=Lucene in Action
* option 2: catalog.title=Lucene, catalog.title=in, catalog.title=Action
*/
XContentParser parser = context.parser();
value = parser.textOrNull();
KeyValueJsonXContentParser KeyValueJsonParser = new KeyValueJsonXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
context
);
XContentParser parser = KeyValueJsonParser.parseObject();

XContentParser.Token currentToken;
while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
switch (currentToken) {
case FIELD_NAME:
fieldName = parser.currentName();
logger.info("fieldName: " + fieldName);
break;
case VALUE_STRING:
value = parser.textOrNull();
logger.info("value: " + value);
ParseValueAddFields(context, value);
break;
}

}

}

}

private void ParseValueAddFields(ParseContext context, String value) throws IOException {
if (value == null || value.length() > ignoreAbove) {
return;
}
Expand All @@ -479,7 +502,6 @@ protected void parseCreateField(ParseContext context) throws IOException {
}

private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException {
logger.info("\n check the field before normalizer:" + field + "\n");

try (TokenStream ts = normalizer.tokenStream(field, value)) {
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
Expand All @@ -506,7 +528,6 @@ private static String normalizeValue(NamedAnalyzer normalizer, String field, Str
);
}
ts.end();
logger.info("\n check the new Value after normalizer" + newValue);
return newValue;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.flatobject.xcontent;

import org.opensearch.common.bytes.BytesReference;
import org.opensearch.common.xcontent.DeprecationHandler;
import org.opensearch.common.xcontent.NamedXContentRegistry;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.common.xcontent.XContentLocation;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.common.xcontent.support.AbstractXContentParser;
import org.opensearch.index.mapper.ParseContext;

import java.io.IOException;
import java.nio.CharBuffer;
import java.util.logging.Logger;

public class KeyValueJsonXContentParser extends AbstractXContentParser {
private XContentParser parser;
private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent);
private ParseContext parseContext;

private NamedXContentRegistry xContentRegistry;

private DeprecationHandler deprecationHandler;
/**
* logging function
*/

private static final Logger logger = Logger.getLogger((KeyValueJsonXContentParser.class.getName()));

public KeyValueJsonXContentParser(
NamedXContentRegistry xContentRegistry,
DeprecationHandler deprecationHandler,
ParseContext parseContext
) throws IOException {
super(xContentRegistry, deprecationHandler);
this.parseContext = parseContext;
this.deprecationHandler = deprecationHandler;
this.xContentRegistry = xContentRegistry;
this.parser = parseContext.parser();
}

public XContentParser parseObject() throws IOException {
String currentFieldName = null;
builder.startObject();
while (this.parser.nextToken() != Token.END_OBJECT) {
currentFieldName = this.parser.currentName();
logger.info("currentFieldName: " + currentFieldName + "\n");
this.parser.nextToken();
StringBuilder parsedFields = new StringBuilder();
parseValue(currentFieldName, parsedFields);
builder.field(currentFieldName + "_path", currentFieldName);
builder.field(currentFieldName + "_value", parsedFields.toString());
}

builder.endObject();
String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, XContentType.JSON);
logger.info("Before createParser, jString: " + jString + "\n");

return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString));
}

private void parseValue(String currentFieldName, StringBuilder parsedFields) throws IOException {
switch (this.parser.currentToken()) {
case START_OBJECT:
parseObject();
break;
case VALUE_STRING:
/**
* this is "value" only format for each subfield
* parsedFields will contain {"key_path": "key", "key_value": "value"}
*/
parsedFields.append(this.parser.textOrNull());
logger.info("currentFieldName and parsedFields :" + currentFieldName + " " + parsedFields.toString() + "\n");
break;
// Handle other token types as needed
default:
throw new IOException("Unsupported token type [" + parser.currentToken() + "]");
}
}

@Override
public XContentType contentType() {
return XContentType.JSON;
}

@Override
public Token nextToken() throws IOException {
return this.parser.nextToken();
}

@Override
public void skipChildren() throws IOException {
this.parser.skipChildren();
}

@Override
public Token currentToken() {
return this.parser.currentToken();
}

@Override
public String currentName() throws IOException {
return this.parser.currentName();
}

@Override
public String text() throws IOException {
return this.parser.text();
}

@Override
public CharBuffer charBuffer() throws IOException {
return this.parser.charBuffer();
}

@Override
public Object objectText() throws IOException {
return this.parser.objectText();
}

@Override
public Object objectBytes() throws IOException {
return this.parser.objectBytes();
}

@Override
public boolean hasTextCharacters() {
return this.parser.hasTextCharacters();
}

@Override
public char[] textCharacters() throws IOException {
return this.parser.textCharacters();
}

@Override
public int textLength() throws IOException {
return this.parser.textLength();
}

@Override
public int textOffset() throws IOException {
return this.parser.textOffset();
}

@Override
public Number numberValue() throws IOException {
return this.parser.numberValue();
}

@Override
public NumberType numberType() throws IOException {
return this.parser.numberType();
}

@Override
public byte[] binaryValue() throws IOException {
return this.parser.binaryValue();
}

@Override
public XContentLocation getTokenLocation() {
return this.parser.getTokenLocation();
}

@Override
protected boolean doBooleanValue() throws IOException {
return this.parser.booleanValue();
}

@Override
protected short doShortValue() throws IOException {
return this.parser.shortValue();
}

@Override
protected int doIntValue() throws IOException {
return this.parser.intValue();
}

@Override
protected long doLongValue() throws IOException {
return this.parser.longValue();
}

@Override
protected float doFloatValue() throws IOException {
return this.parser.floatValue();
}

@Override
protected double doDoubleValue() throws IOException {
return this.parser.doubleValue();
}

@Override
public boolean isClosed() {
return this.parser.isClosed();
}

@Override
public void close() throws IOException {
this.parser.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.flatobject.xcontent;

import org.junit.Test;
import org.opensearch.common.bytes.BytesReference;
import org.opensearch.common.xcontent.DeprecationHandler;
import org.opensearch.common.xcontent.NamedXContentRegistry;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.flatobject.mapper.FlatObjectFieldMapper;

import java.io.IOException;
import java.util.logging.Logger;

import static org.junit.Assert.assertEquals;

public class KeyValueJsonXContentParserTests {
private static final Logger logger = Logger.getLogger((FlatObjectFieldMapper.class.getName()));

@Test
public void testKeyValueJsonXContentParserTests() throws IOException {

NamedXContentRegistry xContentRegistry = NamedXContentRegistry.EMPTY;
DeprecationHandler deprecationHandler = DeprecationHandler.IGNORE_DEPRECATIONS;

try (XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent)) {
builder.startObject();
builder.field("catalog", "{title: Lucene in Action}");
builder.endObject();
String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, XContentType.JSON);

String value;
String fieldName;

try (XContentParser parser = JsonXContent.jsonXContent.createParser(xContentRegistry, deprecationHandler, jString)) {

XContentParser.Token currentToken;
while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
switch (currentToken) {
case FIELD_NAME:
fieldName = parser.currentName();
assertEquals(fieldName, "catalog");

break;
case VALUE_STRING:
value = parser.textOrNull();
assertEquals(value, "{title: Lucene in Action}");
break;

}
}

}

}
}
}

0 comments on commit 7763f4f

Please sign in to comment.