Skip to content

Commit e2aa3b9

Browse files
opensearch-trigger-bot[bot]github-actions[bot]amitgalitz
authored
[Backport 2.x] Adding new use cases (#589)
* Adding new use cases (#588) adding new use cases Signed-off-by: Amit Galitzky <amgalitz@amazon.com> (cherry picked from commit eab5ab6) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> * removing _doc Signed-off-by: Amit Galitzky <amgalitz@amazon.com> --------- Signed-off-by: Amit Galitzky <amgalitz@amazon.com> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Amit Galitzky <amgalitz@amazon.com>
1 parent a5fcbde commit e2aa3b9

38 files changed

+1051
-185
lines changed

build.gradle

-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ dependencies {
179179

180180
// ZipArchive dependencies used for integration tests
181181
zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"
182-
183182
secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"
184183

185184
configurations.all {

src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java

+65-7
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,79 @@ public enum DefaultUseCases {
2121
/** defaults file and substitution ready template for OpenAI embedding model */
2222
OPEN_AI_EMBEDDING_MODEL_DEPLOY(
2323
"open_ai_embedding_model_deploy",
24-
"defaults/open-ai-embedding-defaults.json",
24+
"defaults/openai-embedding-defaults.json",
2525
"substitutionTemplates/deploy-remote-model-template.json"
2626
),
27-
/** defaults file and substitution ready template for cohere embedding model */
27+
/** defaults file and substitution ready template for Cohere embedding model */
2828
COHERE_EMBEDDING_MODEL_DEPLOY(
2929
"cohere-embedding_model_deploy",
3030
"defaults/cohere-embedding-defaults.json",
3131
"substitutionTemplates/deploy-remote-model-template-extra-params.json"
3232
),
33+
/** defaults file and substitution ready template for Bedrock Titan embedding model */
34+
BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(
35+
"bedrock-titan-embedding_model_deploy",
36+
"defaults/bedrock-titan-embedding-defaults.json",
37+
"substitutionTemplates/deploy-remote-bedrock-model-template.json"
38+
),
39+
/** defaults file and substitution ready template for Bedrock Titan multimodal embedding model */
40+
BEDROCK_TITAN_MULTIMODAL_MODEL_DEPLOY(
41+
"bedrock-titan-multimodal_model_deploy",
42+
"defaults/bedrock-titan-multimodal-defaults.json",
43+
"substitutionTemplates/deploy-remote-bedrock-model-template.json"
44+
),
45+
/** defaults file and substitution ready template for Cohere chat model */
46+
COHERE_CHAT_MODEL_DEPLOY(
47+
"cohere-chat_model_deploy",
48+
"defaults/cohere-chat-defaults.json",
49+
"substitutionTemplates/deploy-remote-model-chat-template.json"
50+
),
51+
/** defaults file and substitution ready template for OpenAI chat model */
52+
OPENAI_CHAT_MODEL_DEPLOY(
53+
"openai-chat_model_deploy",
54+
"defaults/openai-chat-defaults.json",
55+
"substitutionTemplates/deploy-remote-model-chat-template.json"
56+
),
3357
/** defaults file and substitution ready template for local neural sparse model and ingest pipeline*/
34-
LOCAL_NEURAL_SPARSE_SEARCH(
35-
"local_neural_sparse_search",
36-
"defaults/local-sparse-search-defaults.json",
37-
"substitutionTemplates/neural-sparse-local-template.json"
38-
);
58+
LOCAL_NEURAL_SPARSE_SEARCH_BI_ENCODER(
59+
"local_neural_sparse_search_bi_encoder",
60+
"defaults/local-sparse-search-biencoder-defaults.json",
61+
"substitutionTemplates/neural-sparse-local-biencoder-template.json"
62+
),
63+
/** defaults file and substitution ready template for semantic search, no model creation*/
64+
SEMANTIC_SEARCH("semantic_search", "defaults/semantic-search-defaults.json", "substitutionTemplates/semantic-search-template.json"),
65+
/** defaults file and substitution ready template for multimodal search, no model creation*/
66+
MULTI_MODAL_SEARCH(
67+
"multi_modal_search",
68+
"defaults/multi-modal-search-defaults.json",
69+
"substitutionTemplates/multi-modal-search-template.json"
70+
),
71+
/** defaults file and substitution ready template for multimodal search, no model creation*/
72+
MULTI_MODAL_SEARCH_WITH_BEDROCK_TITAN(
73+
"multi_modal_search_with_bedrock_titan_multi_modal",
74+
"defaults/multimodal-search-bedrock-titan-defaults.json",
75+
"substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json"
76+
),
77+
/** defaults file and substitution ready template for semantic search with query enricher processor attached, no model creation*/
78+
SEMANTIC_SEARCH_WITH_QUERY_ENRICHER(
79+
"semantic_search_with_query_enricher",
80+
"defaults/semantic-search-defaults.json",
81+
"substitutionTemplates/semantic-search-with-query-enricher-template.json"
82+
),
83+
/** defaults file and substitution ready template for semantic search with cohere embedding model*/
84+
SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING(
85+
"semantic_search_with_cohere_embedding",
86+
"defaults/cohere-embedding-semantic-search-defaults.json",
87+
"substitutionTemplates/semantic-search-with-model-template.json"
88+
),
89+
/** defaults file and substitution ready template for semantic search with query enricher processor attached and cohere embedding model*/
90+
SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING_AND_QUERY_ENRICHER(
91+
"semantic_search_with_cohere_embedding_query_enricher",
92+
"defaults/cohere-embedding-semantic-search-defaults.json",
93+
"substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json"
94+
),
95+
/** defaults file and substitution ready template for hybrid search, no model creation*/
96+
HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json");
3997

4098
private final String useCaseName;
4199
private final String defaultsFile;

src/main/java/org/opensearch/flowframework/util/ParseUtils.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,11 @@ public static Object conditionallySubstitute(Object value, Map<String, WorkflowD
378378
String regex = "\\$\\{\\{\\s*" + Pattern.quote(e.getKey()) + "\\s*\\}\\}";
379379
String replacement = e.getValue();
380380

381-
// Special handling for JSON strings that contain placeholders (connectors action)
382-
replacement = Matcher.quoteReplacement(replacement.replace("\"", "\\\""));
381+
// Correctly escape backslashes, newlines, and quotes for JSON compatibility
382+
replacement = replacement.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n");
383+
384+
// Use Matcher.quoteReplacement to handle special replacement characters like $ and \ that weren't previously handled
385+
replacement = Matcher.quoteReplacement(replacement);
383386
value = ((String) value).replaceAll(regex, replacement);
384387
}
385388
}

src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,12 @@ public PlainActionFuture<WorkflowData> execute(
8484
String pipelineId = (String) inputs.get(PIPELINE_ID);
8585
String configurations = (String) inputs.get(CONFIGURATIONS);
8686

87-
byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8);
87+
// Special case for processors that have arrays that need to have the quotes removed
88+
// (e.g. "weights": "[0.7, 0.3]" -> "weights": [0.7, 0.3]
89+
// Define a regular expression pattern to match stringified arrays
90+
String transformedJsonString = configurations.replaceAll("\"\\[(.*?)]\"", "[$1]");
91+
92+
byte[] byteArr = transformedJsonString.getBytes(StandardCharsets.UTF_8);
8893
BytesReference configurationsBytes = new BytesArray(byteArr);
8994

9095
String pipelineToBeCreated = this.getName();

src/main/java/org/opensearch/flowframework/workflow/CreateConnectorStep.java

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ public void onFailure(Exception e) {
160160
parameters = getParameterMap(inputs.get(PARAMETERS_FIELD));
161161
credentials = getStringToStringMap(inputs.get(CREDENTIAL_FIELD), CREDENTIAL_FIELD);
162162
actions = getConnectorActionList(inputs.get(ACTIONS_FIELD));
163+
// TODO: check for un-needed substitution? ${{create_connector. and remove field so we don't need almost duplicate templates
163164
} catch (IllegalArgumentException iae) {
164165
logger.error("IllegalArgumentException in connector configuration", iae);
165166
throw new FlowFrameworkException("IllegalArgumentException in connector configuration", RestStatus.BAD_REQUEST);

src/main/java/org/opensearch/flowframework/workflow/RegisterLocalSparseEncodingModelStep.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,12 @@ public RegisterLocalSparseEncodingModelStep(
5252

5353
@Override
5454
protected Set<String> getRequiredKeys() {
55-
return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL);
55+
return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT);
5656
}
5757

5858
@Override
5959
protected Set<String> getOptionalKeys() {
60-
return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD);
60+
return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD, MODEL_CONTENT_HASH_VALUE, URL, FUNCTION_NAME);
6161
}
6262

6363
@Override

src/main/java/org/opensearch/flowframework/workflow/WorkflowStepFactory.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ public enum WorkflowSteps {
156156
/** Register Local Sparse Encoding Model Step */
157157
REGISTER_LOCAL_SPARSE_ENCODING_MODEL(
158158
RegisterLocalSparseEncodingModelStep.NAME,
159-
List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
160-
List.of(MODEL_ID, REGISTER_MODEL_STATUS),
159+
List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT),
160+
List.of(MODEL_ID, REGISTER_MODEL_STATUS, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
161161
List.of(OPENSEARCH_ML),
162162
TimeValue.timeValueSeconds(60)
163163
),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"template.name": "deploy-bedrock-titan-embedding-model",
3+
"template.description": "Deploying Amazon Bedrock Titan embedding model ",
4+
"create_connector.name": "Amazon Bedrock Connector: embedding",
5+
"create_connector.description": "The connector to bedrock Titan embedding model",
6+
"create_connector.region": "us-east-1",
7+
"create_connector.endpoint": "api.openai.com",
8+
"create_connector.credential.access_key": "123",
9+
"create_connector.credential.secret_key": "123",
10+
"create_connector.credential.session_token": "123",
11+
"create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke",
12+
"create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
13+
"create_connector.actions.pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";",
14+
"create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ",
15+
"register_remote_model.name": "Bedrock embedding model",
16+
"register_remote_model.description": "bedrock-embedding-model"
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"template.name": "deploy-bedrock-titan-multimodal-embedding-model",
3+
"template.description": "deploying Amazon Bedrock Titan multimodal embedding model ",
4+
"create_connector.name": "Amazon Bedrock Connector: multi-modal embedding",
5+
"create_connector.description": "The connector to bedrock Titan multi-modal embedding model",
6+
"create_connector.region": "us-east-1",
7+
"create_connector.input_docs_processed_step_size": 2,
8+
"create_connector.endpoint": "api.openai.com",
9+
"create_connector.credential.access_key": "123",
10+
"create_connector.credential.secret_key": "123",
11+
"create_connector.credential.session_token": "123",
12+
"create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-image-v1/invoke",
13+
"create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText:-null}\", \"inputImage\": \"${parameters.inputImage:-null}\" }",
14+
"create_connector.actions.pre_process_function": "\n StringBuilder parametersBuilder = new StringBuilder(\"{\");\n if (params.text_docs.length > 0 && params.text_docs[0] != null) {\n parametersBuilder.append(\"\\\"inputText\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[0]);\n parametersBuilder.append(\"\\\"\");\n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\",\");\n }\n }\n \n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\"\\\"inputImage\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[1]);\n parametersBuilder.append(\"\\\"\");\n }\n parametersBuilder.append(\"}\");\n \n return \"{\" +\"\\\"parameters\\\":\" + parametersBuilder + \"}\";",
15+
"create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return null;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ",
16+
"register_remote_model.name": "Bedrock multi-modal embedding model",
17+
"register_remote_model.description": "bedrock-multi-modal-embedding-model"
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"template.name": "deploy-cohere-chat-model",
3+
"template.description": "deploying cohere chat model",
4+
"create_connector.name": "Cohere Chat Model",
5+
"create_connector.description": "The connector to Cohere's public chat API",
6+
"create_connector.protocol": "http",
7+
"create_connector.model": "command",
8+
"create_connector.endpoint": "api.cohere.ai",
9+
"create_connector.credential.key": "123",
10+
"create_connector.actions.url": "https://api.cohere.ai/v1/chat",
11+
"create_connector.actions.request_body": "{ \"message\": \"${parameters.message}\", \"model\": \"${parameters.model}\" }",
12+
"register_remote_model.name": "Cohere chat model",
13+
"register_remote_model.description": "cohere-chat-model"
14+
}

src/main/resources/defaults/cohere-embedding-defaults.json

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
"create_connector.model": "embed-english-v3.0",
88
"create_connector.input_type": "search_document",
99
"create_connector.truncate": "end",
10-
"create_connector.endpoint": "api.openai.com",
1110
"create_connector.credential.key": "123",
1211
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
1312
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"template.name": "semantic search with cohere embedding",
3+
"template.description": "Setting up semantic search, with cohere embedding model",
4+
"create_connector.name": "cohere-embedding-connector",
5+
"create_connector.description": "The connector to Cohere's public embed API",
6+
"create_connector.protocol": "http",
7+
"create_connector.model": "embed-english-v3.0",
8+
"create_connector.input_type": "search_document",
9+
"create_connector.truncate": "end",
10+
"create_connector.credential.key": "123",
11+
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
12+
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
13+
"create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
14+
"create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
15+
"register_remote_model.name": "Cohere english embed model",
16+
"register_remote_model.description": "cohere-embedding-model",
17+
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
18+
"create_ingest_pipeline.description": "A text embedding pipeline",
19+
"text_embedding.field_map.input": "passage_text",
20+
"text_embedding.field_map.output": "passage_embedding",
21+
"create_index.name": "my-nlp-index",
22+
"create_index.settings.number_of_shards": "2",
23+
"create_index.mappings.method.engine": "lucene",
24+
"create_index.mappings.method.space_type": "l2",
25+
"create_index.mappings.method.name": "hnsw",
26+
"text_embedding.field_map.output.dimension": "1024",
27+
"create_search_pipeline.pipeline_id": "default_model_pipeline"
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"template.name": "hybrid-search",
3+
"template.description": "Setting up hybrid search, ingest pipeline and index",
4+
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
5+
"create_ingest_pipeline.description": "A text embedding pipeline",
6+
"create_ingest_pipeline.model_id": "123",
7+
"text_embedding.field_map.input": "passage_text",
8+
"text_embedding.field_map.output": "passage_embedding",
9+
"create_index.name": "my-nlp-index",
10+
"create_index.settings.number_of_shards": "2",
11+
"create_index.mappings.method.engine": "lucene",
12+
"create_index.mappings.method.space_type": "l2",
13+
"create_index.mappings.method.name": "hnsw",
14+
"text_embedding.field_map.output.dimension": "1024",
15+
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
16+
"normalization-processor.normalization.technique": "min_max",
17+
"normalization-processor.combination.technique": "arithmetic_mean",
18+
"normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"template.name": "local-model-neural-sparse-search",
3+
"template.description": "setting up neural sparse search with local model",
4+
"register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1",
5+
"register_local_sparse_encoding_model.description": "This is a neural sparse encoding model",
6+
"register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT",
7+
"register_local_sparse_encoding_model.deploy": "true",
8+
"register_local_sparse_encoding_model.version": "1.0.1",
9+
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline-sparse",
10+
"create_ingest_pipeline.description": "A sparse encoding ingest pipeline",
11+
"create_ingest_pipeline.text_embedding.field_map.input": "passage_text",
12+
"create_ingest_pipeline.text_embedding.field_map.output": "passage_embedding",
13+
"create_index.name": "my-nlp-index"
14+
}

src/main/resources/defaults/local-sparse-search-defaults.json

-17
This file was deleted.

0 commit comments

Comments
 (0)