Skip to content

Commit 2e81222

Browse files
committed
Fix potential NPE issue in chunking processor; add changee log
Signed-off-by: zane-neo <zaniu@amazon.com>
1 parent 5e68be4 commit 2e81222

File tree

5 files changed

+40
-14
lines changed

5 files changed

+40
-14
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2121
- Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
2222
### Bug Fixes
2323
- Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
24+
- Fix may type validation issue in multiple pipeline processors ([#661](https://github.com/opensearch-project/neural-search/pull/661))
2425
### Infrastructure
2526
### Documentation
2627
### Maintenance

src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,12 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
109109
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
110110
try {
111111
validateEmbeddingFieldsValue(ingestDocument);
112-
Map<String, Object> ProcessMap = buildMapWithTargetKeyAndOriginalValue(ingestDocument);
113-
List<String> inferenceList = createInferenceList(ProcessMap);
112+
Map<String, Object> processMap = buildMapWithTargetKeyAndOriginalValue(ingestDocument);
113+
List<String> inferenceList = createInferenceList(processMap);
114114
if (inferenceList.size() == 0) {
115115
handler.accept(ingestDocument, null);
116116
} else {
117-
doExecute(ingestDocument, ProcessMap, inferenceList, handler);
117+
doExecute(ingestDocument, processMap, inferenceList, handler);
118118
}
119119
} catch (Exception e) {
120120
handler.accept(null, e);
@@ -213,7 +213,8 @@ private void validateEmbeddingFieldsValue(IngestDocument ingestDocument) {
213213
sourceAndMetadataMap,
214214
fieldMap,
215215
1,
216-
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment)
216+
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment),
217+
true
217218
);
218219
}
219220

src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import java.util.List;
1313
import java.util.Objects;
1414

15+
import org.apache.commons.lang.StringUtils;
1516
import org.opensearch.cluster.metadata.IndexMetadata;
1617
import org.opensearch.env.Environment;
1718
import org.opensearch.index.IndexService;
@@ -171,7 +172,8 @@ public IngestDocument execute(final IngestDocument ingestDocument) {
171172
sourceAndMetadataMap,
172173
fieldMap,
173174
1,
174-
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment)
175+
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment),
176+
false
175177
);
176178
// fixed token length algorithm needs runtime parameter max_token_count for tokenization
177179
Map<String, Object> runtimeParameters = new HashMap<>();
@@ -253,7 +255,13 @@ private List<String> chunkLeafType(final Object value, final Map<String, Object>
253255
// leaf type means null, String or List<String>
254256
// the result should be an empty list when the input is null
255257
List<String> result = new ArrayList<>();
258+
if (value == null) {
259+
return result;
260+
}
256261
if (value instanceof String) {
262+
if (StringUtils.isBlank(String.valueOf(value))) {
263+
return result;
264+
}
257265
result = chunkString(value.toString(), runTimeParameters);
258266
} else if (isListOfString(value)) {
259267
result = chunkList((List<String>) value, runTimeParameters);

src/main/java/org/opensearch/neuralsearch/processor/TextImageEmbeddingProcessor.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,8 @@ private void validateEmbeddingFieldsValue(final IngestDocument ingestDocument) {
176176
sourceAndMetadataMap,
177177
fieldMap,
178178
1,
179-
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment)
179+
ProcessorDocumentUtils.getMaxDepth(sourceAndMetadataMap, clusterService, environment),
180+
true
180181
);
181182
}
182183

src/main/java/org/opensearch/neuralsearch/util/ProcessorDocumentUtils.java

+23-8
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ public static void validateMapTypeValue(
3535
final Map<String, Object> sourceValue,
3636
final Object fieldMap,
3737
final int depth,
38-
final long maxDepth
38+
final long maxDepth,
39+
final boolean allowEmpty
3940
) {
4041
if (sourceValue == null) return; // allow map type value to be null.
4142
validateDepth(sourceKey, depth, maxDepth);
@@ -55,37 +56,51 @@ public static void validateMapTypeValue(
5556
Object nextSourceValue = sourceValue.get(key);
5657
if (nextSourceValue != null) {
5758
if (nextSourceValue instanceof List) {
58-
validateListTypeValue(key, (List) nextSourceValue, fieldMap, depth + 1, maxDepth);
59+
validateListTypeValue(key, (List) nextSourceValue, fieldMap, depth + 1, maxDepth, allowEmpty);
5960
} else if (nextSourceValue instanceof Map) {
60-
validateMapTypeValue(key, (Map<String, Object>) nextSourceValue, nextFieldMap, depth + 1, maxDepth);
61+
validateMapTypeValue(key, (Map<String, Object>) nextSourceValue, nextFieldMap, depth + 1, maxDepth, allowEmpty);
6162
} else if (!(nextSourceValue instanceof String)) {
6263
throw new IllegalArgumentException("map type field [" + key + "] is neither string nor nested type, cannot process it");
63-
} else if (StringUtils.isBlank((String) nextSourceValue)) {
64+
} else if (!allowEmpty && StringUtils.isBlank((String) nextSourceValue)) {
6465
throw new IllegalArgumentException("map type field [" + key + "] has empty string value, cannot process it");
6566
}
6667
}
6768
});
6869
}
6970

7071
@SuppressWarnings({ "rawtypes", "unchecked" })
71-
private static void validateListTypeValue(String sourceKey, List sourceValue, Object fieldMap, int depth, long maxDepth) {
72+
private static void validateListTypeValue(
73+
String sourceKey,
74+
List sourceValue,
75+
Object fieldMap,
76+
int depth,
77+
long maxDepth,
78+
boolean allowEmpty
79+
) {
7280
validateDepth(sourceKey, depth, maxDepth);
7381
if (sourceValue == null || sourceValue.isEmpty()) return;
7482
Object firstNonNullElement = sourceValue.stream().filter(Objects::nonNull).findFirst().orElse(null);
7583
if (firstNonNullElement == null) return;
7684
for (Object element : sourceValue) {
7785
if (firstNonNullElement instanceof List) { // nested list case.
78-
validateListTypeValue(sourceKey, (List) element, fieldMap, depth + 1, maxDepth);
86+
validateListTypeValue(sourceKey, (List) element, fieldMap, depth + 1, maxDepth, allowEmpty);
7987
} else if (firstNonNullElement instanceof Map) {
80-
validateMapTypeValue(sourceKey, (Map<String, Object>) element, ((Map) fieldMap).get(sourceKey), depth + 1, maxDepth);
88+
validateMapTypeValue(
89+
sourceKey,
90+
(Map<String, Object>) element,
91+
((Map) fieldMap).get(sourceKey),
92+
depth + 1,
93+
maxDepth,
94+
allowEmpty
95+
);
8196
} else if (!(firstNonNullElement instanceof String)) {
8297
throw new IllegalArgumentException("list type field [" + sourceKey + "] has non string value, cannot process it");
8398
} else {
8499
if (element == null) {
85100
throw new IllegalArgumentException("list type field [" + sourceKey + "] has null, cannot process it");
86101
} else if (!(element instanceof String)) {
87102
throw new IllegalArgumentException("list type field [" + sourceKey + "] has non string value, cannot process it");
88-
} else if (StringUtils.isBlank(element.toString())) {
103+
} else if (!allowEmpty && StringUtils.isBlank(element.toString())) {
89104
throw new IllegalArgumentException("list type field [" + sourceKey + "] has empty string, cannot process it");
90105
}
91106
}

0 commit comments

Comments
 (0)