Skip to content

Commit 620d328

Browse files
use standard config in ingest processor intead of always return list (opensearch-project#2985) (opensearch-project#3031)
Signed-off-by: Mingshi Liu <mingshl@amazon.com> (cherry picked from commit f4b4724) Co-authored-by: Mingshi Liu <mingshl@amazon.com>
1 parent 14b915f commit 620d328

File tree

4 files changed

+219
-27
lines changed

4 files changed

+219
-27
lines changed

common/build.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies {
3535
exclude group: 'com.google.j2objc', module: 'j2objc-annotations'
3636
exclude group: 'com.google.guava', module: 'listenablefuture'
3737
}
38+
compileOnly 'com.jayway.jsonpath:json-path:2.9.0'
3839
}
3940

4041
lombok {

common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java

+26
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import com.google.gson.JsonElement;
2929
import com.google.gson.JsonParser;
3030
import com.google.gson.JsonSyntaxException;
31+
import com.jayway.jsonpath.JsonPath;
3132

3233
import lombok.extern.log4j.Log4j2;
3334

@@ -293,4 +294,29 @@ public static String getJsonPath(String jsonPathWithSource) {
293294
// Extract the substring from the startIndex to the end of the input string
294295
return (startIndex != -1) ? jsonPathWithSource.substring(startIndex) : jsonPathWithSource;
295296
}
297+
298+
/**
299+
* Checks if the given input string matches the JSONPath format.
300+
*
301+
* <p>The JSONPath format is a way to navigate and extract data from JSON documents.
302+
* It uses a syntax similar to XPath for XML documents. This method attempts to compile
303+
* the input string as a JSONPath expression using the {@link com.jayway.jsonpath.JsonPath}
304+
* library. If the compilation succeeds, it means the input string is a valid JSONPath
305+
* expression.
306+
*
307+
* @param input the input string to be checked for JSONPath format validity
308+
* @return true if the input string is a valid JSONPath expression, false otherwise
309+
*/
310+
public static boolean isValidJSONPath(String input) {
311+
if (input == null || input.isBlank()) {
312+
return false;
313+
}
314+
try {
315+
JsonPath.compile(input); // This will throw an exception if the path is invalid
316+
return true;
317+
} catch (Exception e) {
318+
return false;
319+
}
320+
}
321+
296322
}

plugin/src/main/java/org/opensearch/ml/processor/MLInferenceIngestProcessor.java

+19-22
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import static org.opensearch.ml.processor.InferenceProcessorAttributes.*;
88

99
import java.io.IOException;
10-
import java.util.ArrayList;
1110
import java.util.Collection;
1211
import java.util.HashMap;
1312
import java.util.HashSet;
@@ -37,9 +36,7 @@
3736
import org.opensearch.script.ScriptService;
3837
import org.opensearch.script.TemplateScript;
3938

40-
import com.jayway.jsonpath.Configuration;
4139
import com.jayway.jsonpath.JsonPath;
42-
import com.jayway.jsonpath.Option;
4340

4441
/**
4542
* MLInferenceIngestProcessor requires a modelId string to call model inferences
@@ -75,11 +72,6 @@ public class MLInferenceIngestProcessor extends AbstractProcessor implements Mod
7572
public static final String DEFAULT_MODEl_INPUT = "{ \"parameters\": ${ml_inference.parameters} }";
7673
private final NamedXContentRegistry xContentRegistry;
7774

78-
private Configuration suppressExceptionConfiguration = Configuration
79-
.builder()
80-
.options(Option.SUPPRESS_EXCEPTIONS, Option.DEFAULT_PATH_LEAF_TO_NULL, Option.ALWAYS_RETURN_LIST)
81-
.build();
82-
8375
protected MLInferenceIngestProcessor(
8476
String modelId,
8577
List<Map<String, String>> inputMaps,
@@ -320,24 +312,29 @@ private void getMappedModelInputFromDocuments(
320312
Object documentFieldValue = ingestDocument.getFieldValue(originalFieldPath, Object.class);
321313
String documentFieldValueAsString = toString(documentFieldValue);
322314
updateModelParameters(modelInputFieldName, documentFieldValueAsString, modelParameters);
315+
return;
323316
}
324-
// else when cannot find field path in document, try check for nested array using json path
325-
else {
326-
if (documentFieldName.contains(DOT_SYMBOL)) {
327-
328-
Map<String, Object> sourceObject = ingestDocument.getSourceAndMetadata();
329-
ArrayList<Object> fieldValueList = JsonPath
330-
.using(suppressExceptionConfiguration)
331-
.parse(sourceObject)
332-
.read(documentFieldName);
333-
if (!fieldValueList.isEmpty()) {
334-
updateModelParameters(modelInputFieldName, toString(fieldValueList), modelParameters);
335-
} else if (!ignoreMissing) {
336-
throw new IllegalArgumentException("cannot find field name defined from input map: " + documentFieldName);
317+
// If the standard dot path fails, try to check for a nested array using JSON path
318+
if (StringUtils.isValidJSONPath(documentFieldName)) {
319+
Map<String, Object> sourceObject = ingestDocument.getSourceAndMetadata();
320+
Object fieldValue = JsonPath.using(suppressExceptionConfiguration).parse(sourceObject).read(documentFieldName);
321+
322+
if (fieldValue != null) {
323+
if (fieldValue instanceof List) {
324+
List<?> fieldValueList = (List<?>) fieldValue;
325+
if (!fieldValueList.isEmpty()) {
326+
updateModelParameters(modelInputFieldName, toString(fieldValueList), modelParameters);
327+
} else if (!ignoreMissing) {
328+
throw new IllegalArgumentException("Cannot find field name defined from input map: " + documentFieldName);
329+
}
330+
} else {
331+
updateModelParameters(modelInputFieldName, toString(fieldValue), modelParameters);
337332
}
338333
} else if (!ignoreMissing) {
339-
throw new IllegalArgumentException("cannot find field name defined from input map: " + documentFieldName);
334+
throw new IllegalArgumentException("Cannot find field name defined from input map: " + documentFieldName);
340335
}
336+
} else {
337+
throw new IllegalArgumentException("Cannot find field name defined from input map: " + documentFieldName);
341338
}
342339
}
343340

0 commit comments

Comments
 (0)