Skip to content

Commit 08b97d3

Browse files
authored
Add auto-generation of index mappings & settings based on processors (opensearch-project#552)
Signed-off-by: Tyler Ohlsen <ohltyler@amazon.com>
1 parent 9025d1b commit 08b97d3

File tree

4 files changed

+336
-62
lines changed

4 files changed

+336
-62
lines changed

common/constants.ts

+71-21
Original file line numberDiff line numberDiff line change
@@ -77,31 +77,81 @@ export const SEARCH_CONNECTORS_NODE_API_PATH = `${BASE_CONNECTOR_NODE_API_PATH}/
7777
* based on the specified remote model from a remote service, if found
7878
*/
7979

80-
// Cohere
81-
export const COHERE_DIMENSIONS = {
82-
[`embed-english-v3.0`]: 1024,
83-
[`embed-english-light-v3.0`]: 384,
84-
[`embed-multilingual-v3.0`]: 1024,
85-
[`embed-multilingual-light-v3.0`]: 384,
86-
[`embed-english-v2.0`]: 4096,
87-
[`embed-english-light-v2.0`]: 1024,
88-
[`embed-multilingual-v2.0`]: 768,
80+
interface RemoteEmbeddingModelConfig {
81+
dimension: number;
82+
fieldName: string;
83+
}
84+
85+
// Amazon BedRock
86+
export const BEDROCK_CONFIGS = {
87+
[`amazon.titan-embed-text-v1`]: {
88+
dimension: 1536,
89+
fieldName: 'embedding',
90+
} as RemoteEmbeddingModelConfig,
91+
[`amazon.titan-embed-text-v2`]: {
92+
dimension: 1024,
93+
fieldName: 'embedding',
94+
} as RemoteEmbeddingModelConfig,
95+
[`amazon.titan-embed-image-v1`]: {
96+
dimension: 1024,
97+
fieldName: 'embedding',
98+
} as RemoteEmbeddingModelConfig,
99+
[`cohere.embed-english-v3`]: {
100+
dimension: 1024,
101+
fieldName: 'embeddings',
102+
} as RemoteEmbeddingModelConfig,
103+
[`cohere.embed-multilingual-v3`]: {
104+
dimension: 1024,
105+
fieldName: 'embeddings',
106+
} as RemoteEmbeddingModelConfig,
89107
};
90108

91-
// OpenAI
92-
export const OPENAI_DIMENSIONS = {
93-
[`text-embedding-3-small`]: 1536,
94-
[`text-embedding-3-large`]: 3072,
95-
[`text-embedding-ada-002`]: 1536,
109+
// Cohere
110+
export const COHERE_CONFIGS = {
111+
[`embed-english-v3.0`]: {
112+
dimension: 1024,
113+
fieldName: 'embeddings',
114+
} as RemoteEmbeddingModelConfig,
115+
[`embed-english-light-v3.0`]: {
116+
dimension: 384,
117+
fieldName: 'embeddings',
118+
} as RemoteEmbeddingModelConfig,
119+
[`embed-multilingual-v3.0`]: {
120+
dimension: 1024,
121+
fieldName: 'embeddings',
122+
} as RemoteEmbeddingModelConfig,
123+
[`embed-multilingual-light-v3.0`]: {
124+
dimension: 384,
125+
fieldName: 'embeddings',
126+
} as RemoteEmbeddingModelConfig,
127+
[`embed-english-v2.0`]: {
128+
dimension: 4096,
129+
fieldName: 'embeddings',
130+
} as RemoteEmbeddingModelConfig,
131+
[`embed-english-light-v2.0`]: {
132+
dimension: 1024,
133+
fieldName: 'embeddings',
134+
} as RemoteEmbeddingModelConfig,
135+
[`embed-multilingual-v2.0`]: {
136+
dimension: 768,
137+
fieldName: 'embeddings',
138+
} as RemoteEmbeddingModelConfig,
96139
};
97140

98-
// Amazon BedRock
99-
export const BEDROCK_DIMENSIONS = {
100-
[`amazon.titan-embed-text-v1`]: 1536,
101-
[`amazon.titan-embed-text-v2`]: 1024,
102-
[`amazon.titan-embed-image-v1`]: 1024,
103-
[`cohere.embed-english-v3`]: 1024, // same as Cohere directly
104-
[`cohere.embed-multilingual-v3`]: 1024, // same as Cohere directly
141+
// OpenAI
142+
export const OPENAI_CONFIGS = {
143+
[`text-embedding-3-small`]: {
144+
dimension: 1536,
145+
fieldName: 'embedding',
146+
} as RemoteEmbeddingModelConfig,
147+
[`text-embedding-3-large`]: {
148+
dimension: 3072,
149+
fieldName: 'embedding',
150+
} as RemoteEmbeddingModelConfig,
151+
[`text-embedding-ada-002`]: {
152+
dimension: 1536,
153+
fieldName: 'embedding',
154+
} as RemoteEmbeddingModelConfig,
105155
};
106156

107157
/**

public/pages/workflow_detail/workflow_inputs/ingest_inputs/advanced_settings.tsx

+100-9
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,129 @@
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6-
import React from 'react';
6+
import React, { useEffect } from 'react';
7+
import { useSelector } from 'react-redux';
8+
import { isEmpty } from 'lodash';
79
import {
810
EuiAccordion,
911
EuiFlexGroup,
1012
EuiFlexItem,
1113
EuiSpacer,
1214
} from '@elastic/eui';
1315
import { JsonField } from '../input_fields';
16+
import { getIn, useFormikContext } from 'formik';
17+
import { WorkflowFormValues } from '../../../../../common';
18+
import { AppState } from '../../../../store';
19+
import {
20+
getEmbeddingField,
21+
getEmbeddingModelDimensions,
22+
getUpdatedIndexMappings,
23+
getUpdatedIndexSettings,
24+
isKnnIndex,
25+
removeVectorFieldFromIndexMappings,
26+
} from '../../../../utils';
1427

1528
interface AdvancedSettingsProps {}
1629

1730
/**
1831
* Input component for configuring ingest-side advanced settings
1932
*/
2033
export function AdvancedSettings(props: AdvancedSettingsProps) {
34+
const { values, setFieldValue } = useFormikContext<WorkflowFormValues>();
35+
const { models, connectors } = useSelector((state: AppState) => state.ml);
36+
const ingestMLProcessors = (Object.values(
37+
values?.ingest?.enrich
38+
) as any[]).filter((ingestProcessor) => ingestProcessor?.model !== undefined);
39+
const ingestProcessorModelIds = ingestMLProcessors
40+
.map((ingestProcessor) => ingestProcessor?.model?.id as string | undefined)
41+
.filter((modelId) => !isEmpty(modelId));
42+
const indexMappingsPath = 'ingest.index.mappings';
43+
const indexSettingsPath = 'ingest.index.settings';
44+
const curMappings = getIn(values, indexMappingsPath);
45+
const curSettings = getIn(values, indexSettingsPath);
46+
47+
// listen on when processor with models are added / removed. dynamically update index
48+
// settings to be knn-enabled or knn-disabled.
49+
useEffect(() => {
50+
if (ingestProcessorModelIds.length > 0) {
51+
ingestProcessorModelIds.forEach((ingestProcessorModelId) => {
52+
const processorModel = Object.values(models).find(
53+
(model) => model.id === ingestProcessorModelId
54+
);
55+
if (processorModel?.connectorId !== undefined) {
56+
const processorConnector = connectors[processorModel?.connectorId];
57+
const dimension = getEmbeddingModelDimensions(processorConnector);
58+
59+
// If a dimension is found, it is a known embedding model.
60+
// Ensure the index is configured to be knn-enabled.
61+
if (dimension !== undefined) {
62+
if (!isKnnIndex(curSettings)) {
63+
setFieldValue(
64+
indexSettingsPath,
65+
getUpdatedIndexSettings(curSettings, true)
66+
);
67+
}
68+
}
69+
}
70+
});
71+
} else {
72+
if (isKnnIndex(curSettings)) {
73+
setFieldValue(
74+
indexSettingsPath,
75+
getUpdatedIndexSettings(curSettings, false)
76+
);
77+
}
78+
}
79+
}, [ingestProcessorModelIds.length]);
80+
81+
// listener on when there are updates to any ingest processors. Try to update
82+
// any index mappings accordingly, such as setting the knn_vector mappings
83+
// for models that output vector embeddings, or removing any mappings, if no ML
84+
// processor defined.
85+
useEffect(() => {
86+
if (ingestMLProcessors.length > 0) {
87+
ingestMLProcessors.forEach((ingestMLProcessor) => {
88+
const processorModel = Object.values(models).find(
89+
(model) => model.id === ingestMLProcessor?.model?.id
90+
);
91+
if (processorModel?.connectorId !== undefined) {
92+
const processorConnector = connectors[processorModel?.connectorId];
93+
const dimension = getEmbeddingModelDimensions(processorConnector);
94+
const embeddingFieldName = getEmbeddingField(
95+
processorConnector,
96+
ingestMLProcessor
97+
);
98+
if (embeddingFieldName !== undefined && dimension !== undefined) {
99+
setFieldValue(
100+
indexMappingsPath,
101+
getUpdatedIndexMappings(
102+
curMappings,
103+
embeddingFieldName,
104+
dimension
105+
)
106+
);
107+
}
108+
}
109+
});
110+
} else {
111+
setFieldValue(
112+
indexMappingsPath,
113+
removeVectorFieldFromIndexMappings(curMappings)
114+
);
115+
}
116+
}, [getIn(values, 'ingest.enrich')]);
117+
21118
return (
22119
<EuiFlexGroup direction="column">
23120
<EuiFlexItem grow={false}>
24121
<EuiAccordion id="advancedSettings" buttonContent="Advanced settings">
25122
<EuiSpacer size="s" />
26123
<EuiFlexGroup direction="column">
27124
<EuiFlexItem>
28-
<JsonField
29-
label="Index mappings"
30-
fieldPath={'ingest.index.mappings'}
31-
/>
125+
<JsonField label="Index mappings" fieldPath={indexMappingsPath} />
32126
</EuiFlexItem>
33127
<EuiFlexItem>
34-
<JsonField
35-
label="Index settings"
36-
fieldPath={'ingest.index.settings'}
37-
/>
128+
<JsonField label="Index settings" fieldPath={indexSettingsPath} />
38129
</EuiFlexItem>
39130
</EuiFlexGroup>
40131
</EuiAccordion>

public/pages/workflows/new_workflow/quick_configure_inputs.tsx

+5-31
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,18 @@ import {
1616
EuiCompressedFieldNumber,
1717
} from '@elastic/eui';
1818
import {
19-
BEDROCK_DIMENSIONS,
20-
COHERE_DIMENSIONS,
2119
DEFAULT_IMAGE_FIELD,
2220
DEFAULT_LLM_RESPONSE_FIELD,
2321
DEFAULT_TEXT_FIELD,
2422
DEFAULT_VECTOR_FIELD,
2523
MODEL_STATE,
2624
Model,
2725
ModelInterface,
28-
OPENAI_DIMENSIONS,
2926
QuickConfigureFields,
3027
WORKFLOW_TYPE,
3128
} from '../../../../common';
3229
import { AppState } from '../../../store';
33-
import { parseModelInputs } from '../../../utils';
30+
import { getEmbeddingModelDimensions, parseModelInputs } from '../../../utils';
3431
import { get } from 'lodash';
3532

3633
interface QuickConfigureInputsProps {
@@ -121,33 +118,10 @@ export function QuickConfigureInputs(props: QuickConfigureInputsProps) {
121118
if (selectedModel?.connectorId !== undefined) {
122119
const connector = connectors[selectedModel.connectorId];
123120
if (connector !== undefined) {
124-
// some APIs allow specifically setting the dimensions at runtime,
125-
// so we check for that first.
126-
if (connector.parameters?.dimensions !== undefined) {
127-
setFieldValues({
128-
...fieldValues,
129-
embeddingLength: connector.parameters?.dimensions,
130-
});
131-
} else if (connector.parameters?.model !== undefined) {
132-
const dimensions =
133-
// @ts-ignore
134-
COHERE_DIMENSIONS[connector.parameters?.model] ||
135-
// @ts-ignore
136-
OPENAI_DIMENSIONS[connector.parameters?.model] ||
137-
// @ts-ignore
138-
BEDROCK_DIMENSIONS[connector.parameters?.model];
139-
if (dimensions !== undefined) {
140-
setFieldValues({
141-
...fieldValues,
142-
embeddingLength: dimensions,
143-
});
144-
}
145-
} else {
146-
setFieldValues({
147-
...fieldValues,
148-
embeddingLength: undefined,
149-
});
150-
}
121+
setFieldValues({
122+
...fieldValues,
123+
embeddingLength: getEmbeddingModelDimensions(connector),
124+
});
151125
}
152126
}
153127
}, [fieldValues.modelId, deployedModels, connectors]);

0 commit comments

Comments
 (0)