8
8
import static org .opensearch .ml .common .utils .StringUtils .getJsonPath ;
9
9
import static org .opensearch .ml .common .utils .StringUtils .obtainFieldNameFromJsonPath ;
10
10
11
- import java .util .Arrays ;
12
11
import java .util .HashMap ;
13
12
import java .util .List ;
14
13
import java .util .Map ;
14
+ import java .util .Optional ;
15
15
import java .util .concurrent .CompletableFuture ;
16
16
import java .util .concurrent .atomic .AtomicInteger ;
17
17
import java .util .stream .Collectors ;
@@ -96,16 +96,16 @@ protected Map<String, Object> filterFieldMapping(MLBatchIngestionInput mlBatchIn
96
96
Object value = entry .getValue ();
97
97
if (value instanceof String ) {
98
98
return ((String ) value ).contains (prefix );
99
- } else if (value instanceof String [] ) {
100
- return Arrays . stream (( String [] ) value ).anyMatch (val -> val .contains (prefix ));
99
+ } else if (value instanceof List ) {
100
+ return (( List < String > ) value ). stream ( ).anyMatch (val -> val .contains (prefix ));
101
101
}
102
102
return false ;
103
103
}).collect (Collectors .toMap (Map .Entry ::getKey , entry -> {
104
104
Object value = entry .getValue ();
105
105
if (value instanceof String ) {
106
106
return value ;
107
- } else if (value instanceof String [] ) {
108
- return Arrays . stream (( String [] ) value ).filter (val -> val .contains (prefix )).toArray ( String []:: new );
107
+ } else if (value instanceof List ) {
108
+ return (( List < String > ) value ).stream (). filter (val -> val .contains (prefix )).collect ( Collectors . toList () );
109
109
}
110
110
return null ;
111
111
}));
@@ -136,32 +136,28 @@ protected Map<String, Object> processFieldMapping(String jsonStr, Map<String, Ob
136
136
List <String > outputFieldNames = outputJsonPath != null ? (List <String >) fieldMapping .get (OUTPUT_FIELD_NAMES ) : null ;
137
137
138
138
List <String > ingestFieldsJsonPath = Optional
139
- .ofNullable ((List <String >) fieldMapping .get (INGEST_FIELDS ))
140
- .stream ()
141
- .map (StringUtils ::getJsonPath )
142
- .collect (Collectors .toList ());
139
+ .ofNullable ((List <String >) fieldMapping .get (INGEST_FIELDS ))
140
+ .stream ()
141
+ .flatMap (java .util .Collection ::stream )
142
+ .map (StringUtils ::getJsonPath )
143
+ .collect (Collectors .toList ());
143
144
144
- if (remoteModelInput .size () != inputFieldNames .size () || remoteModelOutput .size () != outputFieldNames .size ()) {
145
- throw new IllegalArgumentException ("the fieldMapping and source data do not match" );
146
- }
147
145
Map <String , Object > jsonMap = new HashMap <>();
148
146
149
- for (int index = 0 ; index < remoteModelInput .size (); index ++) {
150
- jsonMap .put (inputFieldNames .get (index ), remoteModelInput .get (index ));
151
- jsonMap .put (outputFieldNames .get (index ), remoteModelOutput .get (index ));
152
- }
147
+ populateJsonMap (jsonMap , inputFieldNames , remoteModelInput );
148
+ populateJsonMap (jsonMap , outputFieldNames , remoteModelOutput );
153
149
154
150
for (String fieldPath : ingestFieldsJsonPath ) {
155
151
jsonMap .put (obtainFieldNameFromJsonPath (fieldPath ), JsonPath .read (jsonStr , fieldPath ));
156
152
}
157
153
158
154
if (fieldMapping .containsKey (ID_FIELD )) {
159
155
List <String > docIdJsonPath = Optional
160
- .ofNullable ((List <String >) fieldMapping .get (ID_FIELD ))
161
- .stream ()
162
- .flatMap (Collection ::stream )
163
- .map (StringUtils ::getJsonPath )
164
- .collect (Collectors .toList ());
156
+ .ofNullable ((List <String >) fieldMapping .get (ID_FIELD ))
157
+ .stream ()
158
+ .flatMap (java . util . Collection ::stream )
159
+ .map (StringUtils ::getJsonPath )
160
+ .collect (Collectors .toList ());
165
161
if (docIdJsonPath .size () != 1 ) {
166
162
throw new IllegalArgumentException ("The Id field must contains only 1 jsonPath for each source" );
167
163
}
@@ -180,25 +176,39 @@ protected void batchIngest(
180
176
BulkRequest bulkRequest = new BulkRequest ();
181
177
sourceLines .stream ().forEach (jsonStr -> {
182
178
Map <String , Object > filteredMapping = isSoleSource
183
- ? mlBatchIngestionInput .getFieldMapping ()
184
- : filterFieldMapping (mlBatchIngestionInput , sourceIndex );
179
+ ? mlBatchIngestionInput .getFieldMapping ()
180
+ : filterFieldMapping (mlBatchIngestionInput , sourceIndex );
185
181
Map <String , Object > jsonMap = processFieldMapping (jsonStr , filteredMapping );
186
182
if (isSoleSource || sourceIndex == 0 ) {
187
- IndexRequest indexRequest = new IndexRequest (mlBatchIngestionInput .getIndexName ()). source ( jsonMap ) ;
183
+ IndexRequest indexRequest = new IndexRequest (mlBatchIngestionInput .getIndexName ());
188
184
if (jsonMap .containsKey ("_id" )) {
189
- indexRequest .id ((String ) jsonMap .get ("_id" ));
185
+ String id = (String ) jsonMap .remove ("_id" );
186
+ indexRequest .id (id );
190
187
}
188
+ indexRequest .source (jsonMap );
191
189
bulkRequest .add (indexRequest );
192
190
} else {
193
191
// bulk update docs as they were partially ingested
194
192
if (!jsonMap .containsKey ("_id" )) {
195
193
throw new IllegalArgumentException ("The id filed must be provided to match documents for multiple sources" );
196
194
}
197
- String id = (String ) jsonMap .get ("_id" );
195
+ String id = (String ) jsonMap .remove ("_id" );
198
196
UpdateRequest updateRequest = new UpdateRequest (mlBatchIngestionInput .getIndexName (), id ).doc (jsonMap ).upsert (jsonMap );
199
197
bulkRequest .add (updateRequest );
200
198
}
201
199
});
202
200
client .bulk (bulkRequest , bulkResponseListener );
203
201
}
202
+
203
+ private void populateJsonMap (Map <String , Object > jsonMap , List <String > fieldNames , List <?> modelData ) {
204
+ if (modelData != null ) {
205
+ if (modelData .size () != fieldNames .size ()) {
206
+ throw new IllegalArgumentException ("The fieldMapping and source data do not match" );
207
+ }
208
+
209
+ for (int index = 0 ; index < modelData .size (); index ++) {
210
+ jsonMap .put (fieldNames .get (index ), modelData .get (index ));
211
+ }
212
+ }
213
+ }
204
214
}
0 commit comments