diff --git a/common/constants.ts b/common/constants.ts
index 73b77f7f..de57fdf7 100644
--- a/common/constants.ts
+++ b/common/constants.ts
@@ -166,8 +166,8 @@ export enum WORKFLOW_TYPE {
SEMANTIC_SEARCH = 'Semantic Search',
MULTIMODAL_SEARCH = 'Multimodal Search',
HYBRID_SEARCH = 'Hybrid Search',
- RAG = 'RAG with Lexical Retrieval',
VECTOR_SEARCH_WITH_RAG = 'RAG with Vector Retrieval',
+ HYBRID_SEARCH_WITH_RAG = 'RAG with Hybrid Search',
CUSTOM = 'Custom Search',
UNKNOWN = 'Unknown',
}
diff --git a/common/utils.ts b/common/utils.ts
index 8205e795..8e71edf8 100644
--- a/common/utils.ts
+++ b/common/utils.ts
@@ -4,7 +4,7 @@
*/
import moment from 'moment';
-import { DATE_FORMAT_PATTERN, WORKFLOW_TYPE, Workflow } from './';
+import { DATE_FORMAT_PATTERN, WORKFLOW_TYPE } from './';
import { isEmpty } from 'lodash';
export function toFormattedDate(timestampMillis: number): String {
@@ -44,14 +44,25 @@ export function customStringifySingleLine(jsonObj: {}): string {
return JSON.stringify(jsonObj, undefined, 0);
}
-export function isVectorSearchUseCase(workflow: Workflow | undefined): boolean {
+export function isVectorSearchUseCase(workflowType?: WORKFLOW_TYPE): boolean {
return (
- workflow?.ui_metadata?.type !== undefined &&
+ workflowType !== undefined &&
[
- WORKFLOW_TYPE.HYBRID_SEARCH,
- WORKFLOW_TYPE.MULTIMODAL_SEARCH,
WORKFLOW_TYPE.SEMANTIC_SEARCH,
+ WORKFLOW_TYPE.MULTIMODAL_SEARCH,
+ WORKFLOW_TYPE.HYBRID_SEARCH,
+ WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG,
+ WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG,
+ ].includes(workflowType)
+ );
+}
+
+export function isRAGUseCase(workflowType?: WORKFLOW_TYPE): boolean {
+ return (
+ workflowType !== undefined &&
+ [
WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG,
- ].includes(workflow?.ui_metadata?.type)
+ WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG,
+ ].includes(workflowType)
);
}
diff --git a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx
index 0269d74b..5da7fc64 100644
--- a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx
+++ b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data.tsx
@@ -68,7 +68,7 @@ export function SourceData(props: SourceDataProps) {
// try to clear out any default values for the ML ingest processor, if applicable
if (
- isVectorSearchUseCase(props.workflow) &&
+ isVectorSearchUseCase(props.workflow?.ui_metadata?.type) &&
isEditModalOpen &&
selectedOption !== SOURCE_OPTIONS.EXISTING_INDEX
) {
diff --git a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx
index 7254ad8b..2d448547 100644
--- a/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx
+++ b/public/pages/workflow_detail/workflow_inputs/ingest_inputs/source_data_modal.tsx
@@ -112,7 +112,7 @@ export function SourceDataModal(props: SourceDataProps) {
// 2. Update several form values if an index is selected (and if vector search)
if (selectedIndex !== undefined) {
- if (isVectorSearchUseCase(props.workflow)) {
+ if (isVectorSearchUseCase(props.workflow?.ui_metadata?.type)) {
dispatch(getMappings({ index: selectedIndex, dataSourceId }))
.unwrap()
.then((resp: IndexMappings) => {
diff --git a/public/pages/workflows/new_workflow/quick_configure_modal.tsx b/public/pages/workflows/new_workflow/quick_configure_modal.tsx
index 06d5228a..ea1dcb34 100644
--- a/public/pages/workflows/new_workflow/quick_configure_modal.tsx
+++ b/public/pages/workflows/new_workflow/quick_configure_modal.tsx
@@ -59,6 +59,7 @@ import {
MODEL_STATE,
ML_REMOTE_MODEL_LINK,
MODEL_CATEGORY,
+ isRAGUseCase,
} from '../../../../common';
import { APP_PATH, getInitialValue } from '../../../utils';
import { AppState, createWorkflow, useAppDispatch } from '../../../store';
@@ -140,10 +141,7 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) {
// If not custom/blank, we will have more req'd form fields for the users to supply
if (workflowType !== WORKFLOW_TYPE.CUSTOM) {
// if a RAG workflow, require an LLM
- if (
- workflowType === WORKFLOW_TYPE.RAG ||
- workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG
- ) {
+ if (isRAGUseCase(workflowType)) {
tempFormValues = {
...tempFormValues,
llm: getInitialValue('model'),
@@ -160,24 +158,21 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) {
}),
};
}
- // all workflows besides custom and vanilla RAG require an embedding model
- if (workflowType !== WORKFLOW_TYPE.RAG) {
- tempFormValues = {
- ...tempFormValues,
- embeddingModel: getInitialValue('model'),
- };
- tempFormSchemaObj = {
- ...tempFormSchemaObj,
- embeddingModel: yup.object({
- id: yup
- .string()
- .trim()
- .min(1, 'Too short')
- .max(MAX_STRING_LENGTH, 'Too long')
- .required('Required'),
- }),
- };
- }
+ tempFormValues = {
+ ...tempFormValues,
+ embeddingModel: getInitialValue('model'),
+ };
+ tempFormSchemaObj = {
+ ...tempFormSchemaObj,
+ embeddingModel: yup.object({
+ id: yup
+ .string()
+ .trim()
+ .min(1, 'Too short')
+ .max(MAX_STRING_LENGTH, 'Too long')
+ .required('Required'),
+ }),
+ };
}
setFormValues(tempFormValues);
setFormSchemaObj(tempFormSchemaObj);
@@ -297,30 +292,26 @@ export function QuickConfigureModal(props: QuickConfigureModalProps) {
/>
)}
- {(props.workflow?.ui_metadata?.type === WORKFLOW_TYPE.RAG ||
- props.workflow?.ui_metadata?.type ===
- WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) &&
- !isEmpty(deployedModels) && (
-
-
- setQuickConfigureFields({
- ...quickConfigureFields,
- llmId: modelId,
- })
- }
- />
-
- )}
+ {isRAGUseCase(props.workflow?.ui_metadata?.type) && (
+
+
+ setQuickConfigureFields({
+ ...quickConfigureFields,
+ llmId: modelId,
+ })
+ }
+ />
+
+ )}
{props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.CUSTOM &&
- props.workflow?.ui_metadata?.type !== WORKFLOW_TYPE.RAG &&
!isEmpty(deployedModels) && (
<>
@@ -449,7 +440,7 @@ function injectQuickConfigureFields(
workflow.ui_metadata.config,
quickConfigureFields,
embeddingModelInterface,
- isVectorSearchUseCase(workflow)
+ isVectorSearchUseCase(workflow?.ui_metadata?.type)
);
workflow.ui_metadata.config = updateIndexConfig(
workflow.ui_metadata.config,
@@ -463,32 +454,19 @@ function injectQuickConfigureFields(
workflow.ui_metadata.config,
quickConfigureFields,
embeddingModelInterface,
- isVectorSearchUseCase(workflow)
- );
- }
- break;
- }
- case WORKFLOW_TYPE.RAG: {
- if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) {
- workflow.ui_metadata.config = updateIndexConfig(
- workflow.ui_metadata.config,
- quickConfigureFields
- );
- workflow.ui_metadata.config = updateRAGSearchResponseProcessors(
- workflow.ui_metadata.config,
- quickConfigureFields,
- llmInterface
+ isVectorSearchUseCase(workflow?.ui_metadata?.type)
);
}
break;
}
- case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG: {
+ case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG:
+ case WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG: {
if (!isEmpty(quickConfigureFields) && workflow.ui_metadata?.config) {
workflow.ui_metadata.config = updateIngestProcessors(
workflow.ui_metadata.config,
quickConfigureFields,
embeddingModelInterface,
- isVectorSearchUseCase(workflow)
+ isVectorSearchUseCase(workflow?.ui_metadata?.type)
);
workflow.ui_metadata.config = updateIndexConfig(
workflow.ui_metadata.config,
@@ -502,7 +480,7 @@ function injectQuickConfigureFields(
workflow.ui_metadata.config,
quickConfigureFields,
embeddingModelInterface,
- isVectorSearchUseCase(workflow)
+ isVectorSearchUseCase(workflow?.ui_metadata?.type)
);
workflow.ui_metadata.config = updateRAGSearchResponseProcessors(
workflow.ui_metadata.config,
diff --git a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx
index 8f8c28e3..e9d3ec63 100644
--- a/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx
+++ b/public/pages/workflows/new_workflow/quick_configure_optional_fields.tsx
@@ -26,6 +26,8 @@ import {
ModelInterface,
QuickConfigureFields,
WORKFLOW_TYPE,
+ isRAGUseCase,
+ isVectorSearchUseCase,
} from '../../../../common';
import { AppState } from '../../../store';
import { getEmbeddingModelDimensions, parseModelInputs } from '../../../utils';
@@ -80,15 +82,8 @@ export function QuickConfigureOptionalFields(
};
break;
}
- case WORKFLOW_TYPE.RAG: {
- defaultFieldValues = {
- textField: DEFAULT_TEXT_FIELD,
- promptField: '',
- llmResponseField: DEFAULT_LLM_RESPONSE_FIELD,
- };
- break;
- }
- case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG: {
+ case WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG:
+ case WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG: {
defaultFieldValues = {
textField: DEFAULT_TEXT_FIELD,
vectorField: DEFAULT_VECTOR_FIELD,
@@ -157,11 +152,7 @@ export function QuickConfigureOptionalFields(
fullWidth={true}
label={'Text field'}
isInvalid={false}
- helpText={`The name of the text document field to be ${
- props.workflowType === WORKFLOW_TYPE.RAG
- ? 'used as context to the large language model (LLM).'
- : 'embedded.'
- }`}
+ helpText={`The name of the text document field to be embedded`}
>
>
)}
- {(props.workflowType === WORKFLOW_TYPE.SEMANTIC_SEARCH ||
- props.workflowType === WORKFLOW_TYPE.MULTIMODAL_SEARCH ||
- props.workflowType === WORKFLOW_TYPE.HYBRID_SEARCH ||
- props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) && (
+ {isVectorSearchUseCase(props.workflowType) && (
<>
)}
- {(props.workflowType === WORKFLOW_TYPE.RAG ||
- props.workflowType === WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG) && (
+ {isRAGUseCase(props.workflowType) && (
<>
ML inference processor for generating embeddings =>
+ // ML inference processor for returning LLM-generated response of results
+ baseState.config.search.request.value = customStringify(MATCH_QUERY_TEXT);
+ baseState.config.search.enrichRequest.processors = [
+ injectQueryTemplateInProcessor(
+ new MLSearchRequestProcessor().toObj(),
+ KNN_QUERY
+ ),
+ ];
baseState.config.search.enrichResponse.processors = [
new MLSearchResponseProcessor().toObj(),
];
return baseState;
}
-export function fetchVectorSearchWithRAGMetadata(version: string): UIState {
+export function fetchHybridSearchWithRAGMetadata(version: string): UIState {
let baseState = fetchEmptyMetadata();
- baseState.type = WORKFLOW_TYPE.VECTOR_SEARCH_WITH_RAG;
+ baseState.type = WORKFLOW_TYPE.HYBRID_SEARCH_WITH_RAG;
// Ingest config: knn index w/ an ML inference processor
baseState.config.ingest.enrich.processors = [new MLIngestProcessor().toObj()];
baseState.config.ingest.index.name.value = generateId('knn_index', 6);
baseState.config.ingest.index.settings.value = customStringify({
[`index.knn`]: true,
});
- // Search config: match query => ML inference processor for generating embeddings =>
- // ML inference processor for returning LLM-generated response of results
+ // Search config: match query => ML inference processor for generating embeddings
+ // with hybrid search => ML inference processor for returning LLM-generated response of results
baseState.config.search.request.value = customStringify(MATCH_QUERY_TEXT);
baseState.config.search.enrichRequest.processors = [
injectQueryTemplateInProcessor(
new MLSearchRequestProcessor().toObj(),
- KNN_QUERY
+ HYBRID_SEARCH_QUERY_MATCH_KNN
),
];
baseState.config.search.enrichResponse.processors = [
+ new NormalizationProcessor().toObj(),
new MLSearchResponseProcessor().toObj(),
];
return baseState;
diff --git a/public/utils/form_to_pipeline_utils.ts b/public/utils/form_to_pipeline_utils.ts
index cd61c323..3aae8866 100644
--- a/public/utils/form_to_pipeline_utils.ts
+++ b/public/utils/form_to_pipeline_utils.ts
@@ -8,6 +8,7 @@ import {
IProcessorConfig,
IngestPipelineConfig,
PROCESSOR_CONTEXT,
+ PROCESSOR_TYPE,
SearchPipelineConfig,
WorkflowConfig,
WorkflowFormValues,
@@ -83,8 +84,17 @@ export function formikToPartialPipeline(
requestProcessors,
context
),
+ // for search response, we need to explicitly separate out any phase results processors
+ phase_results_processors: processorConfigsToTemplateProcessors(
+ precedingProcessors.filter((processor) =>
+ isPhaseResultsProcessor(processor)
+ ),
+ context
+ ),
response_processors: processorConfigsToTemplateProcessors(
- precedingProcessors,
+ precedingProcessors.filter(
+ (processor) => !isPhaseResultsProcessor(processor)
+ ),
context
),
} as SearchPipelineConfig)
@@ -113,3 +123,8 @@ function getPrecedingProcessors(
});
return precedingProcessors;
}
+
+// currently the only phase results processor supported is the normalization processor
+function isPhaseResultsProcessor(processor: IProcessorConfig): boolean {
+ return processor.type === PROCESSOR_TYPE.NORMALIZATION;
+}
diff --git a/server/resources/templates/hybrid_search_with_rag.json b/server/resources/templates/hybrid_search_with_rag.json
new file mode 100644
index 00000000..b44d7502
--- /dev/null
+++ b/server/resources/templates/hybrid_search_with_rag.json
@@ -0,0 +1,14 @@
+{
+ "name": "RAG with Hybrid Search",
+ "description": "Build a search application that uses retrieval-augmented generation (RAG) to retrieve relevant documents using hybrid search, pass them to large language models, and synthesize answers.",
+ "version": {
+ "template": "1.0.0",
+ "compatibility": [
+ "2.19.0",
+ "3.0.0"
+ ]
+ },
+ "ui_metadata": {
+ "type": "RAG with Hybrid Search"
+ }
+}
\ No newline at end of file
diff --git a/server/resources/templates/lexical_search_with_rag.json b/server/resources/templates/lexical_search_with_rag.json
deleted file mode 100644
index 6c19863c..00000000
--- a/server/resources/templates/lexical_search_with_rag.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- "name": "RAG with Lexical Retrieval",
- "description": "Build a search application that uses retrieval-augmented generation (RAG) to retrieve keyword-matched documents using lexical search, pass them to large language models, and synthesize answers.",
- "version": {
- "template": "1.0.0",
- "compatibility": [
- "2.19.0",
- "3.0.0"
- ]
- },
- "ui_metadata": {
- "type": "RAG with Lexical Retrieval"
- }
-}
\ No newline at end of file