From 1636461563047229cc58bf58cda44a3d7422d5af Mon Sep 17 00:00:00 2001 From: Tyler Ohlsen Date: Thu, 6 Mar 2025 11:23:30 -0800 Subject: [PATCH 1/5] Add new RAG + hybrid search preset Signed-off-by: Tyler Ohlsen --- common/constants.ts | 1 + common/utils.ts | 1 + .../new_workflow/quick_configure_modal.tsx | 10 +++++-- .../quick_configure_optional_fields.tsx | 9 ++++-- public/pages/workflows/new_workflow/utils.ts | 28 +++++++++++++++++++ .../templates/hybrid_search_with_rag.json | 14 ++++++++++ 6 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 server/resources/templates/hybrid_search_with_rag.json diff --git a/common/constants.ts b/common/constants.ts index 73b77f7f..6d9522ee 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -168,6 +168,7 @@ export enum WORKFLOW_TYPE { HYBRID_SEARCH = 'Hybrid Search', RAG = 'RAG with Lexical Retrieval', VECTOR_SEARCH_WITH_RAG = 'RAG with Vector Retrieval', + HYBRID_SEARCH_WITH_RAG = 'RAG with Hybrid Search', CUSTOM = 'Custom Search', UNKNOWN = 'Unknown', } diff --git a/common/utils.ts b/common/utils.ts index 8205e795..1a78cdcf 100644 --- a/common/utils.ts +++ b/common/utils.ts @@ -52,6 +52,7 @@ export function isVectorSearchUseCase(workflow: Workflow | undefined): boolean { WORKFLOW_TYPE.MULTIMODAL_SEARCH, WORKFLOW_TYPE.SEMANTIC_SEARCH, WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG, + WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG, ].includes(workflow?.ui_metadata?.type) ); } diff --git a/public/pages/workflows/new_workflow/quick_configure_modal.tsx b/public/pages/workflows/new_workflow/quick_configure_modal.tsx index 06d5228a..0ee8b0c6 100644 --- a/public/pages/workflows/new_workflow/quick_configure_modal.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_modal.tsx @@ -142,7 +142,8 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { // if a RAG workflow, require an LLM if ( workflowType === WORKFLOW_TYPE.RAG || - workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG + workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG || + workflowType === WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG ) { tempFormValues = { ...tempFormValues, @@ -299,7 +300,9 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { )} {(props.workflow?.ui_metadata?.type === WORKFLOW_TYPE.RAG || props.workflow?.ui_metadata?.type === - WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) && + WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG || + props.workflow?.ui_metadata?.type === + WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG) && !isEmpty(deployedModels) && ( )} {(props.workflowType === WORKFLOW_TYPE.RAG || - props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) && ( + props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG || + props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG) && ( <> ML inference processor for generating embeddings + // with hybrid search => ML inference processor for returning LLM-generated response of results + baseState.config.search.request.value = customStringify(MATCH_QUERY_TEXT); + baseState.config.search.enrichRequest.processors = [ + injectQueryTemplateInProcessor( + new MLSearchRequestProcessor().toObj(), + HYBRID_SEARCH_QUERY_MATCH_KNN + ), + ]; + baseState.config.search.enrichResponse.processors = [ + new MLSearchResponseProcessor().toObj(), + ]; + return baseState; +} + // populate the `query_template` config value with a given query template // by default, we replace any vector pattern ("{{vector}}") with the unquoted // vector template placeholder (${vector}) so it becomes a proper template diff --git a/server/resources/templates/hybrid_search_with_rag.json b/server/resources/templates/hybrid_search_with_rag.json new file mode 100644 index 00000000..b44d7502 --- /dev/null +++ b/server/resources/templates/hybrid_search_with_rag.json @@ -0,0 +1,14 @@ +{ + "name": "RAG with Hybrid Search", + "description": "Build a search application that uses retrieval-augmented generation (RAG) to retrieve relevant documents using hybrid search, pass them to large language models, and synthesize answers.", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.19.0", + "3.0.0" + ] + }, + "ui_metadata": { + "type": "RAG with Hybrid Search" + } +} \ No newline at end of file From 545ae3d027c82d103b3efece1966edc312f9d6f1 Mon Sep 17 00:00:00 2001 From: Tyler Ohlsen Date: Thu, 6 Mar 2025 11:30:03 -0800 Subject: [PATCH 2/5] Add normalization processorg Signed-off-by: Tyler Ohlsen --- public/pages/workflows/new_workflow/utils.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/public/pages/workflows/new_workflow/utils.ts b/public/pages/workflows/new_workflow/utils.ts index 84c4aa6a..6c5ae04c 100644 --- a/public/pages/workflows/new_workflow/utils.ts +++ b/public/pages/workflows/new_workflow/utils.ts @@ -301,6 +301,7 @@ export function fetchHybridSearchWithRAGMetadata(version: string): UIState { ), ]; baseState.config.search.enrichResponse.processors = [ + new NormalizationProcessor().toObj(), new MLSearchResponseProcessor().toObj(), ]; return baseState; From b7cf1f30508a0c92428dd1d686629e2f73e491ea Mon Sep 17 00:00:00 2001 From: Tyler Ohlsen Date: Thu, 6 Mar 2025 12:09:34 -0800 Subject: [PATCH 3/5] Fix bug of partial pipeline not bucketing normalization processor correctly Signed-off-by: Tyler Ohlsen --- public/utils/form_to_pipeline_utils.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/public/utils/form_to_pipeline_utils.ts b/public/utils/form_to_pipeline_utils.ts index cd61c323..3aae8866 100644 --- a/public/utils/form_to_pipeline_utils.ts +++ b/public/utils/form_to_pipeline_utils.ts @@ -8,6 +8,7 @@ import { IProcessorConfig, IngestPipelineConfig, PROCESSOR_CONTEXT, + PROCESSOR_TYPE, SearchPipelineConfig, WorkflowConfig, WorkflowFormValues, @@ -83,8 +84,17 @@ export function formikToPartialPipeline( requestProcessors, context ), + // for search response, we need to explicitly separate out any phase results processors + phase_results_processors: processorConfigsToTemplateProcessors( + precedingProcessors.filter((processor) => + isPhaseResultsProcessor(processor) + ), + context + ), response_processors: processorConfigsToTemplateProcessors( - precedingProcessors, + precedingProcessors.filter( + (processor) => !isPhaseResultsProcessor(processor) + ), context ), } as SearchPipelineConfig) @@ -113,3 +123,8 @@ function getPrecedingProcessors( }); return precedingProcessors; } + +// currently the only phase results processor supported is the normalization processor +function isPhaseResultsProcessor(processor: IProcessorConfig): boolean { + return processor.type === PROCESSOR_TYPE.NORMALIZATION; +} From d3c40e5632c452e219b935978e487c5995bcbe27 Mon Sep 17 00:00:00 2001 From: Tyler Ohlsen Date: Thu, 6 Mar 2025 15:56:16 -0800 Subject: [PATCH 4/5] Update preset filter fns Signed-off-by: Tyler Ohlsen --- common/utils.ts | 21 +++++-- .../ingest_inputs/source_data.tsx | 2 +- .../ingest_inputs/source_data_modal.tsx | 2 +- .../new_workflow/quick_configure_modal.tsx | 58 ++++++++----------- .../quick_configure_optional_fields.tsx | 12 ++-- 5 files changed, 47 insertions(+), 48 deletions(-) diff --git a/common/utils.ts b/common/utils.ts index 1a78cdcf..a200f33c 100644 --- a/common/utils.ts +++ b/common/utils.ts @@ -44,15 +44,26 @@ export function customStringifySingleLine(jsonObj: {}): string { return JSON.stringify(jsonObj, undefined, 0); } -export function isVectorSearchUseCase(workflow: Workflow | undefined): boolean { +export function isVectorSearchUseCase(workflowType?: WORKFLOW_TYPE): boolean { return ( - workflow?.ui_metadata?.type !== undefined && + workflowType !== undefined && [ - WORKFLOW_TYPE.HYBRID_SEARCH, - WORKFLOW_TYPE.MULTIMODAL_SEARCH, WORKFLOW_TYPE.SEMANTIC_SEARCH, + WORKFLOW_TYPE.MULTIMODAL_SEARCH, + WORKFLOW_TYPE.HYBRID_SEARCH, + WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG, + WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG, + ].includes(workflowType) + ); +} + +export function isRAGUseCase(workflowType?: WORKFLOW_TYPE): boolean { + return ( + workflowType !== undefined && + [ + WORKFLOW_TYPE.RAG, WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG, WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG, - ].includes(workflow?.ui_metadata?.type) + ].includes(workflowType) ); } diff --git a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx index 0269d74b..5da7fc64 100644 --- a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx +++ b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx @@ -68,7 +68,7 @@ export function SourceData(props: SourceDataProps) { // try to clear out any default values for the ML ingest processor, if applicable if ( - isVectorSearchUseCase(props.workflow) && + isVectorSearchUseCase(props.workflow?.ui_metadata?.type) && isEditModalOpen && selectedOption !== SOURCE_OPTIONS.EXISTING_INDEX ) { diff --git a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx index 7254ad8b..2d448547 100644 --- a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx +++ b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx @@ -112,7 +112,7 @@ export function SourceDataModal(props: SourceDataProps) { // 2. Update several form values if an index is selected (and if vector search) if (selectedIndex !== undefined) { - if (isVectorSearchUseCase(props.workflow)) { + if (isVectorSearchUseCase(props.workflow?.ui_metadata?.type)) { dispatch(getMappings({ index: selectedIndex, dataSourceId })) .unwrap() .then((resp: IndexMappings) => { diff --git a/public/pages/workflows/new_workflow/quick_configure_modal.tsx b/public/pages/workflows/new_workflow/quick_configure_modal.tsx index 0ee8b0c6..cea77f61 100644 --- a/public/pages/workflows/new_workflow/quick_configure_modal.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_modal.tsx @@ -59,6 +59,7 @@ import { MODEL_STATE, ML_REMOTE_MODEL_LINK, MODEL_CATEGORY, + isRAGUseCase, } from '../../../../common'; import { APP_PATH, getInitialValue } from '../../../utils'; import { AppState, createWorkflow, useAppDispatch } from '../../../store'; @@ -140,11 +141,7 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { // If not custom/blank, we will have more req'd form fields for the users to supply if (workflowType !== WORKFLOW_TYPE.CUSTOM) { // if a RAG workflow, require an LLM - if ( - workflowType === WORKFLOW_TYPE.RAG || - workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG || - workflowType === WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG - ) { + if (isRAGUseCase(workflowType)) { tempFormValues = { ...tempFormValues, llm: getInitialValue('model'), @@ -298,30 +295,25 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { /> )} - {(props.workflow?.ui_metadata?.type === WORKFLOW_TYPE.RAG || - props.workflow?.ui_metadata?.type === - WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG || - props.workflow?.ui_metadata?.type === - WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG) && - !isEmpty(deployedModels) && ( - - - setQuickConfigureFields({ - ...quickConfigureFields, - llmId: modelId, - }) - } - /> - - )} + {isRAGUseCase(props.workflow?.ui_metadata?.type) && ( + + + setQuickConfigureFields({ + ...quickConfigureFields, + llmId: modelId, + }) + } + /> + + )} {props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.CUSTOM && props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.RAG && !isEmpty(deployedModels) && ( @@ -452,7 +444,7 @@ function injectQuickConfigureFields( workflow.ui_metadata.config, quickConfigureFields, embeddingModelInterface, - isVectorSearchUseCase(workflow) + isVectorSearchUseCase(workflow?.ui_metadata?.type) ); workflow.ui_metadata.config = updateIndexConfig( workflow.ui_metadata.config, @@ -466,7 +458,7 @@ function injectQuickConfigureFields( workflow.ui_metadata.config, quickConfigureFields, embeddingModelInterface, - isVectorSearchUseCase(workflow) + isVectorSearchUseCase(workflow?.ui_metadata?.type) ); } break; @@ -492,7 +484,7 @@ function injectQuickConfigureFields( workflow.ui_metadata.config, quickConfigureFields, embeddingModelInterface, - isVectorSearchUseCase(workflow) + isVectorSearchUseCase(workflow?.ui_metadata?.type) ); workflow.ui_metadata.config = updateIndexConfig( workflow.ui_metadata.config, @@ -506,7 +498,7 @@ function injectQuickConfigureFields( workflow.ui_metadata.config, quickConfigureFields, embeddingModelInterface, - isVectorSearchUseCase(workflow) + isVectorSearchUseCase(workflow?.ui_metadata?.type) ); workflow.ui_metadata.config = updateRAGSearchResponseProcessors( workflow.ui_metadata.config, diff --git a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx index 516a35e0..da219c73 100644 --- a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx @@ -26,6 +26,8 @@ import { ModelInterface, QuickConfigureFields, WORKFLOW_TYPE, + isRAGUseCase, + isVectorSearchUseCase, } from '../../../../common'; import { AppState } from '../../../store'; import { getEmbeddingModelDimensions, parseModelInputs } from '../../../utils'; @@ -199,11 +201,7 @@ export function QuickConfigureOptionalFields( )} - {(props.workflowType === WORKFLOW_TYPE.SEMANTIC_SEARCH || - props.workflowType === WORKFLOW_TYPE.MULTIMODAL_SEARCH || - props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH || - props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG || - props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG) && ( + {isVectorSearchUseCase(props.workflowType) && ( <> )} - {(props.workflowType === WORKFLOW_TYPE.RAG || - props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG || - props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG) && ( + {isRAGUseCase(props.workflowType) && ( <> Date: Mon, 10 Mar 2025 08:27:13 -0700 Subject: [PATCH 5/5] Remove RAG w lexical search Signed-off-by: Tyler Ohlsen --- common/constants.ts | 1 - common/utils.ts | 3 +- .../new_workflow/quick_configure_modal.tsx | 48 ++++++------------- .../quick_configure_optional_fields.tsx | 14 +----- public/pages/workflows/new_workflow/utils.ts | 15 ------ .../templates/lexical_search_with_rag.json | 14 ------ 6 files changed, 17 insertions(+), 78 deletions(-) delete mode 100644 server/resources/templates/lexical_search_with_rag.json diff --git a/common/constants.ts b/common/constants.ts index 6d9522ee..de57fdf7 100644 --- a/common/constants.ts +++ b/common/constants.ts @@ -166,7 +166,6 @@ export enum WORKFLOW_TYPE { SEMANTIC_SEARCH = 'Semantic Search', MULTIMODAL_SEARCH = 'Multimodal Search', HYBRID_SEARCH = 'Hybrid Search', - RAG = 'RAG with Lexical Retrieval', VECTOR_SEARCH_WITH_RAG = 'RAG with Vector Retrieval', HYBRID_SEARCH_WITH_RAG = 'RAG with Hybrid Search', CUSTOM = 'Custom Search', diff --git a/common/utils.ts b/common/utils.ts index a200f33c..8e71edf8 100644 --- a/common/utils.ts +++ b/common/utils.ts @@ -4,7 +4,7 @@ */ import moment from 'moment'; -import { DATE_FORMAT_PATTERN, WORKFLOW_TYPE, Workflow } from './'; +import { DATE_FORMAT_PATTERN, WORKFLOW_TYPE } from './'; import { isEmpty } from 'lodash'; export function toFormattedDate(timestampMillis: number): String { @@ -61,7 +61,6 @@ export function isRAGUseCase(workflowType?: WORKFLOW_TYPE): boolean { return ( workflowType !== undefined && [ - WORKFLOW_TYPE.RAG, WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG, WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG, ].includes(workflowType) diff --git a/public/pages/workflows/new_workflow/quick_configure_modal.tsx b/public/pages/workflows/new_workflow/quick_configure_modal.tsx index cea77f61..ea1dcb34 100644 --- a/public/pages/workflows/new_workflow/quick_configure_modal.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_modal.tsx @@ -158,24 +158,21 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { }), }; } - // all workflows besides custom and vanilla RAG require an embedding model - if (workflowType !== WORKFLOW_TYPE.RAG) { - tempFormValues = { - ...tempFormValues, - embeddingModel: getInitialValue('model'), - }; - tempFormSchemaObj = { - ...tempFormSchemaObj, - embeddingModel: yup.object({ - id: yup - .string() - .trim() - .min(1, 'Too short') - .max(MAX_STRING_LENGTH, 'Too long') - .required('Required'), - }), - }; - } + tempFormValues = { + ...tempFormValues, + embeddingModel: getInitialValue('model'), + }; + tempFormSchemaObj = { + ...tempFormSchemaObj, + embeddingModel: yup.object({ + id: yup + .string() + .trim() + .min(1, 'Too short') + .max(MAX_STRING_LENGTH, 'Too long') + .required('Required'), + }), + }; } setFormValues(tempFormValues); setFormSchemaObj(tempFormSchemaObj); @@ -315,7 +312,6 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) { )} {props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.CUSTOM && - props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.RAG && !isEmpty(deployedModels) && ( <> @@ -463,20 +459,6 @@ function injectQuickConfigureFields( } break; } - case WORKFLOW_TYPE.RAG: { - if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) { - workflow.ui_metadata.config = updateIndexConfig( - workflow.ui_metadata.config, - quickConfigureFields - ); - workflow.ui_metadata.config = updateRAGSearchResponseProcessors( - workflow.ui_metadata.config, - quickConfigureFields, - llmInterface - ); - } - break; - } case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG: case WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG: { if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) { diff --git a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx index da219c73..e9d3ec63 100644 --- a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx +++ b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx @@ -82,14 +82,6 @@ export function QuickConfigureOptionalFields( }; break; } - case WORKFLOW_TYPE.RAG: { - defaultFieldValues = { - textField: DEFAULT_TEXT_FIELD, - promptField: '', - llmResponseField: DEFAULT_LLM_RESPONSE_FIELD, - }; - break; - } case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG: case WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG: { defaultFieldValues = { @@ -160,11 +152,7 @@ export function QuickConfigureOptionalFields( fullWidth={true} label={'Text field'} isInvalid={false} - helpText={`The name of the text document field to be ${ - props.workflowType === WORKFLOW_TYPE.RAG - ? 'used as context to the large language model (LLM).' - : 'embedded.' - }`} + helpText={`The name of the text document field to be embedded`} >