Skip to content

Commit 2fcb819

Browse files
authored
Complete advanced output transform (ingest) (#236)
Signed-off-by: Tyler Ohlsen <ohltyler@amazon.com>
1 parent 1b8486e commit 2fcb819

10 files changed

+464
-218
lines changed

public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx

+10-5
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
*/
55

66
import React, { useEffect, useState } from 'react';
7-
import { useFormikContext, getIn } from 'formik';
7+
import { useFormikContext } from 'formik';
88
import {
99
EuiButton,
10-
EuiCodeBlock,
1110
EuiFilePicker,
1211
EuiFlexGroup,
1312
EuiFlexItem,
@@ -122,9 +121,15 @@ export function SourceData(props: SourceDataProps) {
122121
</EuiButton>
123122
</EuiFlexItem>
124123
<EuiFlexItem grow={false}>
125-
<EuiCodeBlock language="json" fontSize="m" isCopyable={false}>
126-
{getIn(values, 'ingest.docs')}
127-
</EuiCodeBlock>
124+
<JsonField
125+
fieldPath={'ingest.docs'}
126+
helpText="Documents should be formatted as a valid JSON array."
127+
// when ingest doc values change, don't update the form
128+
// since we initially only support running ingest once per configuration
129+
onFormChange={() => {}}
130+
editorHeight="25vh"
131+
readOnly={true}
132+
/>
128133
</EuiFlexItem>
129134
</EuiFlexGroup>
130135
</>

public/pages/workflow_detail/workflow_inputs/processor_inputs/input_transform_modal.tsx

+56-119
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@
55

66
import React, { useState } from 'react';
77
import { useFormikContext, getIn } from 'formik';
8-
import { isEmpty, get } from 'lodash';
9-
import jsonpath from 'jsonpath';
8+
import { isEmpty } from 'lodash';
109
import {
1110
EuiButton,
12-
EuiCodeBlock,
11+
EuiCodeEditor,
1312
EuiFlexGroup,
1413
EuiFlexItem,
1514
EuiModal,
@@ -27,12 +26,16 @@ import {
2726
JSONPATH_ROOT_SELECTOR,
2827
ML_INFERENCE_DOCS_LINK,
2928
PROCESSOR_CONTEXT,
30-
SimulateIngestPipelineDoc,
3129
SimulateIngestPipelineResponse,
3230
WorkflowConfig,
3331
WorkflowFormValues,
3432
} from '../../../../../common';
35-
import { formikToIngestPipeline, generateId } from '../../../../utils';
33+
import {
34+
formikToIngestPipeline,
35+
generateTransform,
36+
prepareDocsForSimulate,
37+
unwrapTransformedDocs,
38+
} from '../../../../utils';
3639
import { simulatePipeline, useAppDispatch } from '../../../../store';
3740
import { getCore } from '../../../../services';
3841
import { MapField } from '../input_fields';
@@ -58,8 +61,8 @@ export function InputTransformModal(props: InputTransformModalProps) {
5861
const [sourceInput, setSourceInput] = useState<string>('[]');
5962
const [transformedOutput, setTransformedOutput] = useState<string>('[]');
6063

61-
// parse out the values and determine if there are none/some/all valid jsonpaths
62-
const mapValues = getIn(values, `ingest.enrich.${props.config.id}.inputMap`);
64+
// get the current input map
65+
const map = getIn(values, `ingest.enrich.${props.config.id}.inputMap`);
6366

6467
return (
6568
<EuiModal onClose={props.onClose} style={{ width: '70vw' }}>
@@ -68,7 +71,7 @@ export function InputTransformModal(props: InputTransformModalProps) {
6871
<p>{`Configure input`}</p>
6972
</EuiModalHeaderTitle>
7073
</EuiModalHeader>
71-
<EuiModalBody>
74+
<EuiModalBody style={{ height: '60vh' }}>
7275
<EuiFlexGroup direction="column">
7376
<EuiFlexItem>
7477
<>
@@ -78,10 +81,12 @@ export function InputTransformModal(props: InputTransformModalProps) {
7881
onClick={async () => {
7982
switch (props.context) {
8083
case PROCESSOR_CONTEXT.INGEST: {
84+
// get the current ingest pipeline up to, but not including, this processor
8185
const curIngestPipeline = formikToIngestPipeline(
8286
values,
8387
props.uiConfig,
84-
props.config.id
88+
props.config.id,
89+
false
8590
);
8691
// if there are preceding processors, we need to generate the ingest pipeline
8792
// up to this point and simulate, in order to get the latest transformed
@@ -103,7 +108,7 @@ export function InputTransformModal(props: InputTransformModalProps) {
103108
})
104109
.catch((error: any) => {
105110
getCore().notifications.toasts.addDanger(
106-
`Failed to fetch input schema`
111+
`Failed to fetch input data`
107112
);
108113
});
109114
} else {
@@ -118,9 +123,22 @@ export function InputTransformModal(props: InputTransformModalProps) {
118123
Fetch
119124
</EuiButton>
120125
<EuiSpacer size="s" />
121-
<EuiCodeBlock fontSize="m" isCopyable={false}>
122-
{sourceInput}
123-
</EuiCodeBlock>
126+
<EuiCodeEditor
127+
mode="json"
128+
theme="textmate"
129+
width="100%"
130+
height="15vh"
131+
value={sourceInput}
132+
readOnly={true}
133+
setOptions={{
134+
fontSize: '12px',
135+
autoScrollEditorIntoView: true,
136+
showLineNumbers: false,
137+
showGutter: false,
138+
showPrintMargin: false,
139+
}}
140+
tabSize={2}
141+
/>
124142
</>
125143
</EuiFlexItem>
126144
<EuiFlexItem>
@@ -148,72 +166,23 @@ export function InputTransformModal(props: InputTransformModalProps) {
148166
<EuiText>Expected output</EuiText>
149167
<EuiButton
150168
style={{ width: '100px' }}
151-
disabled={
152-
isEmpty(mapValues) || isEmpty(JSON.parse(sourceInput))
153-
}
169+
disabled={isEmpty(map) || isEmpty(JSON.parse(sourceInput))}
154170
onClick={async () => {
155171
switch (props.context) {
156172
case PROCESSOR_CONTEXT.INGEST: {
157-
if (
158-
!isEmpty(mapValues) &&
159-
!isEmpty(JSON.parse(sourceInput))
160-
) {
161-
let output = {};
173+
if (!isEmpty(map) && !isEmpty(JSON.parse(sourceInput))) {
162174
let sampleSourceInput = {};
163175
try {
164176
sampleSourceInput = JSON.parse(sourceInput)[0];
177+
const output = generateTransform(
178+
sampleSourceInput,
179+
map
180+
);
181+
setTransformedOutput(
182+
JSON.stringify(output, undefined, 2)
183+
);
165184
} catch {}
166-
167-
mapValues.forEach(
168-
(mapValue: { key: string; value: string }) => {
169-
const path = mapValue.value;
170-
try {
171-
let transformedResult = undefined;
172-
// ML inference processors will use standard dot notation or JSONPath depending on the input.
173-
// We follow the same logic here to generate consistent results.
174-
if (
175-
mapValue.value.startsWith(
176-
JSONPATH_ROOT_SELECTOR
177-
)
178-
) {
179-
// JSONPath transform
180-
transformedResult = jsonpath.query(
181-
sampleSourceInput,
182-
path
183-
);
184-
// Bracket notation not supported - throw an error
185-
} else if (
186-
mapValue.value.includes(']') ||
187-
mapValue.value.includes(']')
188-
) {
189-
throw new Error();
190-
// Standard dot notation
191-
} else {
192-
transformedResult = get(
193-
sampleSourceInput,
194-
path
195-
);
196-
}
197-
198-
output = {
199-
...output,
200-
[mapValue.key]: transformedResult || '',
201-
};
202-
203-
setTransformedOutput(
204-
JSON.stringify(output, undefined, 2)
205-
);
206-
} catch (e: any) {
207-
console.error(e);
208-
getCore().notifications.toasts.addDanger(
209-
'Error generating expected output. Ensure your inputs are valid JSONPath or dot notation syntax.',
210-
e
211-
);
212-
}
213-
}
214-
);
215185
}
216-
217186
break;
218187
}
219188
// TODO: complete for search request / search response contexts
@@ -223,9 +192,22 @@ export function InputTransformModal(props: InputTransformModalProps) {
223192
Generate
224193
</EuiButton>
225194
<EuiSpacer size="s" />
226-
<EuiCodeBlock fontSize="m" isCopyable={false}>
227-
{transformedOutput}
228-
</EuiCodeBlock>
195+
<EuiCodeEditor
196+
mode="json"
197+
theme="textmate"
198+
width="100%"
199+
height="15vh"
200+
value={transformedOutput}
201+
readOnly={true}
202+
setOptions={{
203+
fontSize: '12px',
204+
autoScrollEditorIntoView: true,
205+
showLineNumbers: false,
206+
showGutter: false,
207+
showPrintMargin: false,
208+
}}
209+
tabSize={2}
210+
/>
229211
</>
230212
</EuiFlexItem>
231213
</EuiFlexGroup>
@@ -238,48 +220,3 @@ export function InputTransformModal(props: InputTransformModalProps) {
238220
</EuiModal>
239221
);
240222
}
241-
242-
// docs are expected to be in a certain format to be passed to the simulate ingest pipeline API.
243-
// for details, see https://opensearch.org/docs/latest/ingest-pipelines/simulate-ingest
244-
function prepareDocsForSimulate(
245-
docs: string,
246-
indexName: string
247-
): SimulateIngestPipelineDoc[] {
248-
const preparedDocs = [] as SimulateIngestPipelineDoc[];
249-
const docObjs = JSON.parse(docs) as {}[];
250-
docObjs.forEach((doc) => {
251-
preparedDocs.push({
252-
_index: indexName,
253-
_id: generateId(),
254-
_source: doc,
255-
});
256-
});
257-
return preparedDocs;
258-
}
259-
260-
// docs are returned in a certain format from the simulate ingest pipeline API. We want
261-
// to format them into a more readable string to display
262-
function unwrapTransformedDocs(
263-
simulatePipelineResponse: SimulateIngestPipelineResponse
264-
) {
265-
let errorDuringSimulate = undefined as string | undefined;
266-
const transformedDocsSources = simulatePipelineResponse.docs.map(
267-
(transformedDoc) => {
268-
if (transformedDoc.error !== undefined) {
269-
errorDuringSimulate = transformedDoc.error.reason || '';
270-
} else {
271-
return transformedDoc.doc._source;
272-
}
273-
}
274-
);
275-
276-
// there is an edge case where simulate may fail if there is some server-side or OpenSearch issue when
277-
// running ingest (e.g., hitting rate limits on remote model)
278-
// We pull out any returned error from a document and propagate it to the user.
279-
if (errorDuringSimulate !== undefined) {
280-
getCore().notifications.toasts.addDanger(
281-
`Failed to simulate ingest on all documents: ${errorDuringSimulate}`
282-
);
283-
}
284-
return JSON.stringify(transformedDocsSources, undefined, 2);
285-
}

0 commit comments

Comments
 (0)