Skip to content

Commit

Permalink
flat multiple nested fields
Browse files Browse the repository at this point in the history
Signed-off-by: Mingshi Liu <[email protected]>
  • Loading branch information
mingshl committed Feb 14, 2023
1 parent 7763f4f commit 48a1226
Showing 1 changed file with 47 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.flatobject.xcontent;

import com.fasterxml.jackson.core.JsonParser;
import org.opensearch.common.bytes.BytesReference;
import org.opensearch.common.xcontent.DeprecationHandler;
import org.opensearch.common.xcontent.NamedXContentRegistry;
Expand Down Expand Up @@ -51,30 +52,52 @@ public KeyValueJsonXContentParser(
}

public XContentParser parseObject() throws IOException {
String currentFieldName = null;
builder.startObject();
while (this.parser.nextToken() != Token.END_OBJECT) {
currentFieldName = this.parser.currentName();
logger.info("currentFieldName: " + currentFieldName + "\n");
this.parser.nextToken();
StringBuilder parsedFields = new StringBuilder();
parseValue(currentFieldName, parsedFields);
builder.field(currentFieldName + "_path", currentFieldName);
builder.field(currentFieldName + "_value", parsedFields.toString());
}

parseToken();
builder.endObject();
String jString = XContentHelper.convertToJson(BytesReference.bytes(builder), false, XContentType.JSON);
logger.info("Before createParser, jString: " + jString + "\n");

return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString));
}

private void parseToken() throws IOException {
String currentFieldName;
while (this.parser.nextToken() != Token.END_OBJECT) {

currentFieldName = this.parser.currentName();

logger.info("currentFieldName: " + currentFieldName + "\n");
StringBuilder parsedFields = new StringBuilder();
if (this.parser.nextToken() == Token.START_OBJECT){
/**
* for nested Json, make a copy of parser, then parse the entire Json as string.
* for example:
* {"grandpa": {
* "dad": {
* "son": "me"
* } }
* the flat field for "grandpa" would be {"grandpa_path": "grandpa", "grandpa_value"= "{"dad: {"son": "me"}"}"}
*/
//To do. to convert the entire JsonObject without changing the tokenizer position.
parsedFields.append(this.parser.toString() );
// parsedFields.append(this.parser.mapOrdered().toString() );
builder.field(currentFieldName + "_path", currentFieldName);
builder.field(currentFieldName + "_value", parsedFields.toString());
parseToken();
}
else{
parseValue(currentFieldName, parsedFields);
builder.field(currentFieldName + "_path", currentFieldName);
builder.field(currentFieldName + "_value", parsedFields.toString());
}

}
}

private void parseValue(String currentFieldName, StringBuilder parsedFields) throws IOException {
logger.info("this.parser.currentToken(): " + this.parser.currentToken() + "\n");
switch (this.parser.currentToken()) {
case START_OBJECT:
parseObject();
break;
case VALUE_STRING:
/**
* this is "value" only format for each subfield
Expand All @@ -84,6 +107,16 @@ private void parseValue(String currentFieldName, StringBuilder parsedFields) thr
logger.info("currentFieldName and parsedFields :" + currentFieldName + " " + parsedFields.toString() + "\n");
break;
// Handle other token types as needed
// ToDo, what do we do, if encountered these fields?
// should never gets to START_OBJECT
case START_OBJECT:
throw new IOException("Unsupported token type");
case FIELD_NAME:
logger.info("token is FIELD_NAME: " + this.parser.currentName() + "\n");
break;
case VALUE_EMBEDDED_OBJECT:
logger.info("token is VALUE_EMBEDDED_OBJECT: " + this.parser.objectText()+ "\n");
break;
default:
throw new IOException("Unsupported token type [" + parser.currentToken() + "]");
}
Expand Down

0 comments on commit 48a1226

Please sign in to comment.