5
5
6
6
package org .opensearch .ml .engine .ingest ;
7
7
8
+ import static org .opensearch .ml .common .transport .batch .MLBatchIngestionInput .INGEST_FIELDS ;
8
9
import static org .opensearch .ml .common .utils .StringUtils .getJsonPath ;
9
10
import static org .opensearch .ml .common .utils .StringUtils .obtainFieldNameFromJsonPath ;
10
11
11
- import java .util .Collection ;
12
12
import java .util .HashMap ;
13
13
import java .util .List ;
14
14
import java .util .Map ;
15
- import java .util .Optional ;
16
15
import java .util .concurrent .CompletableFuture ;
17
16
import java .util .concurrent .atomic .AtomicInteger ;
18
17
import java .util .stream .Collectors ;
34
33
35
34
@ Log4j2
36
35
public class AbstractIngestion implements Ingestable {
37
- public static final String OUTPUT = "output" ;
38
- public static final String INPUT = "input" ;
39
- public static final String OUTPUT_FIELD_NAMES = "output_names" ;
40
- public static final String INPUT_FIELD_NAMES = "input_names" ;
41
- public static final String INGEST_FIELDS = "ingest_fields" ;
42
- public static final String ID_FIELD = "id_field" ;
43
36
44
37
private final Client client ;
45
38
@@ -85,12 +78,11 @@ protected double calculateSuccessRate(List<Double> successRates) {
85
78
* Filters fields in the map where the value contains the specified source index as a prefix.
86
79
*
87
80
* @param mlBatchIngestionInput The MLBatchIngestionInput.
88
- * @param index The source index to filter by.
89
- * @return A new map with only the entries that match the specified source index.
81
+ * @param indexInFieldMap The source index to filter by.
82
+ * @return A new map with only the entries that match the specified source index and correctly mapped to JsonPath .
90
83
*/
91
- protected Map <String , Object > filterFieldMapping (MLBatchIngestionInput mlBatchIngestionInput , int index ) {
84
+ protected Map <String , Object > filterFieldMapping (MLBatchIngestionInput mlBatchIngestionInput , int indexInFieldMap ) {
92
85
Map <String , Object > fieldMap = mlBatchIngestionInput .getFieldMapping ();
93
- int indexInFieldMap = index + 1 ;
94
86
String prefix = "source[" + indexInFieldMap + "]" ;
95
87
96
88
Map <String , Object > filteredFieldMap = fieldMap .entrySet ().stream ().filter (entry -> {
@@ -104,19 +96,28 @@ protected Map<String, Object> filterFieldMapping(MLBatchIngestionInput mlBatchIn
104
96
}).collect (Collectors .toMap (Map .Entry ::getKey , entry -> {
105
97
Object value = entry .getValue ();
106
98
if (value instanceof String ) {
107
- return value ;
99
+ return getJsonPath (( String ) value ) ;
108
100
} else if (value instanceof List ) {
109
- return ((List <String >) value ).stream ().filter (val -> val .contains (prefix )).collect (Collectors .toList ());
101
+ return ((List <String >) value )
102
+ .stream ()
103
+ .filter (val -> val .contains (prefix ))
104
+ .map (StringUtils ::getJsonPath )
105
+ .collect (Collectors .toList ());
110
106
}
111
107
return null ;
112
108
}));
113
109
114
- if (filteredFieldMap .containsKey (OUTPUT )) {
115
- filteredFieldMap .put (OUTPUT_FIELD_NAMES , fieldMap .get (OUTPUT_FIELD_NAMES ));
116
- }
117
- if (filteredFieldMap .containsKey (INPUT )) {
118
- filteredFieldMap .put (INPUT_FIELD_NAMES , fieldMap .get (INPUT_FIELD_NAMES ));
110
+ Map <String , Object > ingestFields = mlBatchIngestionInput .getIngestFields ();
111
+ if (ingestFields != null && ingestFields .get (INGEST_FIELDS ) instanceof List ) {
112
+ ((List <String >) ingestFields .get (INGEST_FIELDS ))
113
+ .stream ()
114
+ .filter (val -> val .contains (prefix ))
115
+ .map (StringUtils ::getJsonPath )
116
+ .forEach (jsonPath -> {
117
+ filteredFieldMap .put (obtainFieldNameFromJsonPath (jsonPath ), jsonPath );
118
+ });
119
119
}
120
+
120
121
return filteredFieldMap ;
121
122
}
122
123
@@ -128,42 +129,21 @@ protected Map<String, Object> filterFieldMapping(MLBatchIngestionInput mlBatchIn
128
129
* @return A new map that contains all the fields and data for ingestion.
129
130
*/
130
131
protected Map <String , Object > processFieldMapping (String jsonStr , Map <String , Object > fieldMapping ) {
131
- String inputJsonPath = fieldMapping .containsKey (INPUT ) ? getJsonPath ((String ) fieldMapping .get (INPUT )) : null ;
132
- List <String > remoteModelInput = inputJsonPath != null ? (List <String >) JsonPath .read (jsonStr , inputJsonPath ) : null ;
133
- List <String > inputFieldNames = inputJsonPath != null ? (List <String >) fieldMapping .get (INPUT_FIELD_NAMES ) : null ;
134
-
135
- String outputJsonPath = fieldMapping .containsKey (OUTPUT ) ? getJsonPath ((String ) fieldMapping .get (OUTPUT )) : null ;
136
- List <List > remoteModelOutput = outputJsonPath != null ? (List <List >) JsonPath .read (jsonStr , outputJsonPath ) : null ;
137
- List <String > outputFieldNames = outputJsonPath != null ? (List <String >) fieldMapping .get (OUTPUT_FIELD_NAMES ) : null ;
138
-
139
- List <String > ingestFieldsJsonPath = Optional
140
- .ofNullable ((List <String >) fieldMapping .get (INGEST_FIELDS ))
141
- .stream ()
142
- .flatMap (Collection ::stream )
143
- .map (StringUtils ::getJsonPath )
144
- .collect (Collectors .toList ());
145
-
146
132
Map <String , Object > jsonMap = new HashMap <>();
147
-
148
- populateJsonMap (jsonMap , inputFieldNames , remoteModelInput );
149
- populateJsonMap (jsonMap , outputFieldNames , remoteModelOutput );
150
-
151
- for (String fieldPath : ingestFieldsJsonPath ) {
152
- jsonMap .put (obtainFieldNameFromJsonPath (fieldPath ), JsonPath .read (jsonStr , fieldPath ));
133
+ if (fieldMapping == null || fieldMapping .isEmpty ()) {
134
+ return jsonMap ;
153
135
}
154
136
155
- if (fieldMapping .containsKey (ID_FIELD )) {
156
- List <String > docIdJsonPath = Optional
157
- .ofNullable ((List <String >) fieldMapping .get (ID_FIELD ))
158
- .stream ()
159
- .flatMap (Collection ::stream )
160
- .map (StringUtils ::getJsonPath )
161
- .collect (Collectors .toList ());
162
- if (docIdJsonPath .size () != 1 ) {
163
- throw new IllegalArgumentException ("The Id field must contains only 1 jsonPath for each source" );
137
+ fieldMapping .entrySet ().stream ().forEach (entry -> {
138
+ Object value = entry .getValue ();
139
+ if (value instanceof String ) {
140
+ String jsonPath = (String ) value ;
141
+ jsonMap .put (entry .getKey (), JsonPath .read (jsonStr , jsonPath ));
142
+ } else if (value instanceof List ) {
143
+ ((List <String >) value ).stream ().forEach (jsonPath -> { jsonMap .put (entry .getKey (), JsonPath .read (jsonStr , jsonPath )); });
164
144
}
165
- jsonMap . put ( "_id" , JsonPath . read ( jsonStr , docIdJsonPath . get ( 0 )) );
166
- }
145
+ } );
146
+
167
147
return jsonMap ;
168
148
}
169
149
@@ -180,12 +160,11 @@ protected void batchIngest(
180
160
? mlBatchIngestionInput .getFieldMapping ()
181
161
: filterFieldMapping (mlBatchIngestionInput , sourceIndex );
182
162
Map <String , Object > jsonMap = processFieldMapping (jsonStr , filteredMapping );
183
- if (isSoleSource || sourceIndex == 0 ) {
163
+ if (jsonMap .isEmpty ()) {
164
+ return ;
165
+ }
166
+ if (isSoleSource && !jsonMap .containsKey ("_id" )) {
184
167
IndexRequest indexRequest = new IndexRequest (mlBatchIngestionInput .getIndexName ());
185
- if (jsonMap .containsKey ("_id" )) {
186
- String id = (String ) jsonMap .remove ("_id" );
187
- indexRequest .id (id );
188
- }
189
168
indexRequest .source (jsonMap );
190
169
bulkRequest .add (indexRequest );
191
170
} else {
0 commit comments