Skip to content

Commit 4edadd7

Browse files
Add new RAG + hybrid search preset (#665) (#669)
(cherry picked from commit 3ff7b00) Signed-off-by: Tyler Ohlsen <ohltyler@amazon.com> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 7049c26 commit 4edadd7

File tree

10 files changed

+126
-121
lines changed

10 files changed

+126
-121
lines changed

common/constants.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ export enum WORKFLOW_TYPE {
166166
SEMANTIC_SEARCH = 'Semantic Search',
167167
MULTIMODAL_SEARCH = 'Multimodal Search',
168168
HYBRID_SEARCH = 'Hybrid Search',
169-
RAG = 'RAG with Lexical Retrieval',
170169
VECTOR_SEARCH_WITH_RAG = 'RAG with Vector Retrieval',
170+
HYBRID_SEARCH_WITH_RAG = 'RAG with Hybrid Search',
171171
CUSTOM = 'Custom Search',
172172
UNKNOWN = 'Unknown',
173173
}

common/utils.ts

+17-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*/
55

66
import moment from 'moment';
7-
import { DATE_FORMAT_PATTERN, WORKFLOW_TYPE, Workflow } from './';
7+
import { DATE_FORMAT_PATTERN, WORKFLOW_TYPE } from './';
88
import { isEmpty } from 'lodash';
99

1010
export function toFormattedDate(timestampMillis: number): String {
@@ -44,14 +44,25 @@ export function customStringifySingleLine(jsonObj: {}): string {
4444
return JSON.stringify(jsonObj, undefined, 0);
4545
}
4646

47-
export function isVectorSearchUseCase(workflow: Workflow | undefined): boolean {
47+
export function isVectorSearchUseCase(workflowType?: WORKFLOW_TYPE): boolean {
4848
return (
49-
workflow?.ui_metadata?.type !== undefined &&
49+
workflowType !== undefined &&
5050
[
51-
WORKFLOW_TYPE.HYBRID_SEARCH,
52-
WORKFLOW_TYPE.MULTIMODAL_SEARCH,
5351
WORKFLOW_TYPE.SEMANTIC_SEARCH,
52+
WORKFLOW_TYPE.MULTIMODAL_SEARCH,
53+
WORKFLOW_TYPE.HYBRID_SEARCH,
54+
WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG,
55+
WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG,
56+
].includes(workflowType)
57+
);
58+
}
59+
60+
export function isRAGUseCase(workflowType?: WORKFLOW_TYPE): boolean {
61+
return (
62+
workflowType !== undefined &&
63+
[
5464
WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG,
55-
].includes(workflow?.ui_metadata?.type)
65+
WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG,
66+
].includes(workflowType)
5667
);
5768
}

public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ export function SourceData(props: SourceDataProps) {
6868

6969
// try to clear out any default values for the ML ingest processor, if applicable
7070
if (
71-
isVectorSearchUseCase(props.workflow) &&
71+
isVectorSearchUseCase(props.workflow?.ui_metadata?.type) &&
7272
isEditModalOpen &&
7373
selectedOption !== SOURCE_OPTIONS.EXISTING_INDEX
7474
) {

public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ export function SourceDataModal(props: SourceDataProps) {
112112

113113
// 2. Update several form values if an index is selected (and if vector search)
114114
if (selectedIndex !== undefined) {
115-
if (isVectorSearchUseCase(props.workflow)) {
115+
if (isVectorSearchUseCase(props.workflow?.ui_metadata?.type)) {
116116
dispatch(getMappings({ index: selectedIndex, dataSourceId }))
117117
.unwrap()
118118
.then((resp: IndexMappings) => {

public/pages/workflows/new_workflow/quick_configure_modal.tsx

+42-64
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ import {
5959
MODEL_STATE,
6060
ML_REMOTE_MODEL_LINK,
6161
MODEL_CATEGORY,
62+
isRAGUseCase,
6263
} from '../../../../common';
6364
import { APP_PATH, getInitialValue } from '../../../utils';
6465
import { AppState, createWorkflow, useAppDispatch } from '../../../store';
@@ -140,10 +141,7 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) {
140141
// If not custom/blank, we will have more req'd form fields for the users to supply
141142
if (workflowType !== WORKFLOW_TYPE.CUSTOM) {
142143
// if a RAG workflow, require an LLM
143-
if (
144-
workflowType === WORKFLOW_TYPE.RAG ||
145-
workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG
146-
) {
144+
if (isRAGUseCase(workflowType)) {
147145
tempFormValues = {
148146
...tempFormValues,
149147
llm: getInitialValue('model'),
@@ -160,24 +158,21 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) {
160158
}),
161159
};
162160
}
163-
// all workflows besides custom and vanilla RAG require an embedding model
164-
if (workflowType !== WORKFLOW_TYPE.RAG) {
165-
tempFormValues = {
166-
...tempFormValues,
167-
embeddingModel: getInitialValue('model'),
168-
};
169-
tempFormSchemaObj = {
170-
...tempFormSchemaObj,
171-
embeddingModel: yup.object({
172-
id: yup
173-
.string()
174-
.trim()
175-
.min(1, 'Too short')
176-
.max(MAX_STRING_LENGTH, 'Too long')
177-
.required('Required'),
178-
}),
179-
};
180-
}
161+
tempFormValues = {
162+
...tempFormValues,
163+
embeddingModel: getInitialValue('model'),
164+
};
165+
tempFormSchemaObj = {
166+
...tempFormSchemaObj,
167+
embeddingModel: yup.object({
168+
id: yup
169+
.string()
170+
.trim()
171+
.min(1, 'Too short')
172+
.max(MAX_STRING_LENGTH, 'Too long')
173+
.required('Required'),
174+
}),
175+
};
181176
}
182177
setFormValues(tempFormValues);
183178
setFormSchemaObj(tempFormSchemaObj);
@@ -297,30 +292,26 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) {
297292
/>
298293
</EuiFlexItem>
299294
)}
300-
{(props.workflow?.ui_metadata?.type === WORKFLOW_TYPE.RAG ||
301-
props.workflow?.ui_metadata?.type ===
302-
WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) &&
303-
!isEmpty(deployedModels) && (
304-
<EuiFlexItem>
305-
<ModelField
306-
modelCategory={MODEL_CATEGORY.LLM}
307-
fieldPath="llm"
308-
showMissingInterfaceCallout={false}
309-
label="Large language model"
310-
helpText="The large language model to generate user-friendly responses."
311-
fullWidth={true}
312-
showError={true}
313-
onModelChange={(modelId) =>
314-
setQuickConfigureFields({
315-
...quickConfigureFields,
316-
llmId: modelId,
317-
})
318-
}
319-
/>
320-
</EuiFlexItem>
321-
)}
295+
{isRAGUseCase(props.workflow?.ui_metadata?.type) && (
296+
<EuiFlexItem>
297+
<ModelField
298+
modelCategory={MODEL_CATEGORY.LLM}
299+
fieldPath="llm"
300+
showMissingInterfaceCallout={false}
301+
label="Large language model"
302+
helpText="The large language model to generate user-friendly responses."
303+
fullWidth={true}
304+
showError={true}
305+
onModelChange={(modelId) =>
306+
setQuickConfigureFields({
307+
...quickConfigureFields,
308+
llmId: modelId,
309+
})
310+
}
311+
/>
312+
</EuiFlexItem>
313+
)}
322314
{props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.CUSTOM &&
323-
props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.RAG &&
324315
!isEmpty(deployedModels) && (
325316
<EuiFlexItem>
326317
<>
@@ -449,7 +440,7 @@ function injectQuickConfigureFields(
449440
workflow.ui_metadata.config,
450441
quickConfigureFields,
451442
embeddingModelInterface,
452-
isVectorSearchUseCase(workflow)
443+
isVectorSearchUseCase(workflow?.ui_metadata?.type)
453444
);
454445
workflow.ui_metadata.config = updateIndexConfig(
455446
workflow.ui_metadata.config,
@@ -463,32 +454,19 @@ function injectQuickConfigureFields(
463454
workflow.ui_metadata.config,
464455
quickConfigureFields,
465456
embeddingModelInterface,
466-
isVectorSearchUseCase(workflow)
467-
);
468-
}
469-
break;
470-
}
471-
case WORKFLOW_TYPE.RAG: {
472-
if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) {
473-
workflow.ui_metadata.config = updateIndexConfig(
474-
workflow.ui_metadata.config,
475-
quickConfigureFields
476-
);
477-
workflow.ui_metadata.config = updateRAGSearchResponseProcessors(
478-
workflow.ui_metadata.config,
479-
quickConfigureFields,
480-
llmInterface
457+
isVectorSearchUseCase(workflow?.ui_metadata?.type)
481458
);
482459
}
483460
break;
484461
}
485-
case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG: {
462+
case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG:
463+
case WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG: {
486464
if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) {
487465
workflow.ui_metadata.config = updateIngestProcessors(
488466
workflow.ui_metadata.config,
489467
quickConfigureFields,
490468
embeddingModelInterface,
491-
isVectorSearchUseCase(workflow)
469+
isVectorSearchUseCase(workflow?.ui_metadata?.type)
492470
);
493471
workflow.ui_metadata.config = updateIndexConfig(
494472
workflow.ui_metadata.config,
@@ -502,7 +480,7 @@ function injectQuickConfigureFields(
502480
workflow.ui_metadata.config,
503481
quickConfigureFields,
504482
embeddingModelInterface,
505-
isVectorSearchUseCase(workflow)
483+
isVectorSearchUseCase(workflow?.ui_metadata?.type)
506484
);
507485
workflow.ui_metadata.config = updateRAGSearchResponseProcessors(
508486
workflow.ui_metadata.config,

public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx

+7-20
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import {
2626
ModelInterface,
2727
QuickConfigureFields,
2828
WORKFLOW_TYPE,
29+
isRAGUseCase,
30+
isVectorSearchUseCase,
2931
} from '../../../../common';
3032
import { AppState } from '../../../store';
3133
import { getEmbeddingModelDimensions, parseModelInputs } from '../../../utils';
@@ -80,15 +82,8 @@ export function QuickConfigureOptionalFields(
8082
};
8183
break;
8284
}
83-
case WORKFLOW_TYPE.RAG: {
84-
defaultFieldValues = {
85-
textField: DEFAULT_TEXT_FIELD,
86-
promptField: '',
87-
llmResponseField: DEFAULT_LLM_RESPONSE_FIELD,
88-
};
89-
break;
90-
}
91-
case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG: {
85+
case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG:
86+
case WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG: {
9287
defaultFieldValues = {
9388
textField: DEFAULT_TEXT_FIELD,
9489
vectorField: DEFAULT_VECTOR_FIELD,
@@ -157,11 +152,7 @@ export function QuickConfigureOptionalFields(
157152
fullWidth={true}
158153
label={'Text field'}
159154
isInvalid={false}
160-
helpText={`The name of the text document field to be ${
161-
props.workflowType === WORKFLOW_TYPE.RAG
162-
? 'used as context to the large language model (LLM).'
163-
: 'embedded.'
164-
}`}
155+
helpText={`The name of the text document field to be embedded`}
165156
>
166157
<EuiCompressedFieldText
167158
data-testid="textFieldQuickConfigure"
@@ -198,10 +189,7 @@ export function QuickConfigureOptionalFields(
198189
<EuiSpacer size="s" />
199190
</>
200191
)}
201-
{(props.workflowType === WORKFLOW_TYPE.SEMANTIC_SEARCH ||
202-
props.workflowType === WORKFLOW_TYPE.MULTIMODAL_SEARCH ||
203-
props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH ||
204-
props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) && (
192+
{isVectorSearchUseCase(props.workflowType) && (
205193
<>
206194
<EuiCompressedFormRow
207195
fullWidth={true}
@@ -244,8 +232,7 @@ export function QuickConfigureOptionalFields(
244232
)}
245233
</>
246234
)}
247-
{(props.workflowType === WORKFLOW_TYPE.RAG ||
248-
props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) && (
235+
{isRAGUseCase(props.workflowType) && (
249236
<>
250237
<EuiCompressedFormRow
251238
fullWidth={true}

public/pages/workflows/new_workflow/utils.ts

+27-13
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,14 @@ export function enrichPresetWorkflowWithUiMetadata(
5656
uiMetadata = fetchHybridSearchMetadata(workflowVersion);
5757
break;
5858
}
59-
case WORKFLOW_TYPE.RAG: {
60-
uiMetadata = fetchRAGMetadata(workflowVersion);
61-
break;
62-
}
6359
case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG: {
6460
uiMetadata = fetchVectorSearchWithRAGMetadata(workflowVersion);
6561
break;
6662
}
63+
case WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG: {
64+
uiMetadata = fetchHybridSearchWithRAGMetadata(workflowVersion);
65+
break;
66+
}
6767
default: {
6868
uiMetadata = fetchEmptyMetadata();
6969
break;
@@ -243,36 +243,50 @@ export function fetchHybridSearchMetadata(version: string): UIState {
243243
return baseState;
244244
}
245245

246-
export function fetchRAGMetadata(version: string): UIState {
246+
export function fetchVectorSearchWithRAGMetadata(version: string): UIState {
247247
let baseState = fetchEmptyMetadata();
248-
baseState.type = WORKFLOW_TYPE.RAG;
249-
baseState.config.ingest.index.name.value = generateId('my_index', 6);
250-
baseState.config.search.request.value = customStringify(FETCH_ALL_QUERY);
248+
baseState.type = WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG;
249+
// Ingest config: knn index w/ an ML inference processor
250+
baseState.config.ingest.enrich.processors = [new MLIngestProcessor().toObj()];
251+
baseState.config.ingest.index.name.value = generateId('knn_index', 6);
252+
baseState.config.ingest.index.settings.value = customStringify({
253+
[`index.knn`]: true,
254+
});
255+
// Search config: match query => ML inference processor for generating embeddings =>
256+
// ML inference processor for returning LLM-generated response of results
257+
baseState.config.search.request.value = customStringify(MATCH_QUERY_TEXT);
258+
baseState.config.search.enrichRequest.processors = [
259+
injectQueryTemplateInProcessor(
260+
new MLSearchRequestProcessor().toObj(),
261+
KNN_QUERY
262+
),
263+
];
251264
baseState.config.search.enrichResponse.processors = [
252265
new MLSearchResponseProcessor().toObj(),
253266
];
254267
return baseState;
255268
}
256269

257-
export function fetchVectorSearchWithRAGMetadata(version: string): UIState {
270+
export function fetchHybridSearchWithRAGMetadata(version: string): UIState {
258271
let baseState = fetchEmptyMetadata();
259-
baseState.type = WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG;
272+
baseState.type = WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG;
260273
// Ingest config: knn index w/ an ML inference processor
261274
baseState.config.ingest.enrich.processors = [new MLIngestProcessor().toObj()];
262275
baseState.config.ingest.index.name.value = generateId('knn_index', 6);
263276
baseState.config.ingest.index.settings.value = customStringify({
264277
[`index.knn`]: true,
265278
});
266-
// Search config: match query => ML inference processor for generating embeddings =>
267-
// ML inference processor for returning LLM-generated response of results
279+
// Search config: match query => ML inference processor for generating embeddings
280+
// with hybrid search => ML inference processor for returning LLM-generated response of results
268281
baseState.config.search.request.value = customStringify(MATCH_QUERY_TEXT);
269282
baseState.config.search.enrichRequest.processors = [
270283
injectQueryTemplateInProcessor(
271284
new MLSearchRequestProcessor().toObj(),
272-
KNN_QUERY
285+
HYBRID_SEARCH_QUERY_MATCH_KNN
273286
),
274287
];
275288
baseState.config.search.enrichResponse.processors = [
289+
new NormalizationProcessor().toObj(),
276290
new MLSearchResponseProcessor().toObj(),
277291
];
278292
return baseState;

0 commit comments

Comments
 (0)