Skip to content

Commit 6f8122f

Browse files
committed
addressed comments and cleaned up defaults/templates
Signed-off-by: Amit Galitzky <amgalitz@amazon.com>
1 parent a9913fc commit 6f8122f

21 files changed

+81
-49
lines changed

src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public enum DefaultUseCases {
2828
COHERE_EMBEDDING_MODEL_DEPLOY(
2929
"cohere-embedding_model_deploy",
3030
"defaults/cohere-embedding-defaults.json",
31-
"substitutionTemplates/deploy-remote-model-template-extra-params.json"
31+
"substitutionTemplates/deploy-remote-model-extra-params-template.json"
3232
),
3333
/** defaults file and substitution ready template for Bedrock Titan embedding model */
3434
BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(

src/main/java/org/opensearch/flowframework/util/ParseUtils.java

+38-22
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public class ParseUtils {
5959
// Matches ${{ foo.bar }} (whitespace optional) with capturing groups 1=foo, 2=bar
6060
// private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*(.+)\\.(.+?)\\s*\\}\\}");
6161
private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*([\\w_]+)\\.([\\w_]+)\\s*\\}\\}");
62+
private static final Pattern JSON_ARRAY_DOUBLE_QUOTES_PATTERN = Pattern.compile("\"\\[(.*?)]\"");
6263

6364
private ParseUtils() {}
6465

@@ -70,7 +71,7 @@ private ParseUtils() {}
7071
* @param json the json string
7172
* @return The XContent parser for the json string
7273
* @throws IOException on failure to create the parser
73-
*/
74+
*/
7475
public static XContentParser jsonToParser(String json) throws IOException {
7576
XContentParser parser = JsonXContent.jsonXContent.createParser(
7677
NamedXContentRegistry.EMPTY,
@@ -104,7 +105,7 @@ public static String resourceToString(String path) throws IOException {
104105
* Builds an XContent object representing a map of String keys to String values.
105106
*
106107
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
107-
* @param map A map as key-value String pairs.
108+
* @param map A map as key-value String pairs.
108109
* @throws IOException on a build failure
109110
*/
110111
public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
@@ -119,7 +120,7 @@ public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map<?
119120
* Builds an XContent object representing a map of String keys to Object values.
120121
*
121122
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
122-
* @param map A map as key-value String to Object.
123+
* @param map A map as key-value String to Object.
123124
* @throws IOException on a build failure
124125
*/
125126
public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
@@ -138,7 +139,7 @@ public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map<?
138139
* Builds an XContent object representing a LLMSpec.
139140
*
140141
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
141-
* @param llm LLMSpec
142+
* @param llm LLMSpec
142143
* @throws IOException on a build failure
143144
*/
144145
public static void buildLLMMap(XContentBuilder xContentBuilder, LLMSpec llm) throws IOException {
@@ -171,6 +172,7 @@ public static Map<String, String> parseStringToStringMap(XContentParser parser)
171172
* Parses an XContent object representing a map of String keys to Object values.
172173
* The Object value here can either be a string or a map
173174
* If an array is found in the given parser we conver the array to a string representation of the array
175+
*
174176
* @param parser An XContent parser whose position is at the start of the map object to parse
175177
* @return A map as identified by the key-value pairs in the XContent
176178
* @throws IOException on a parse failure
@@ -189,10 +191,13 @@ public static Map<String, Object> parseStringToObjectMap(XContentParser parser)
189191
// Handle array: convert it to a string representation
190192
List<String> elements = new ArrayList<>();
191193
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
192-
elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element
194+
if (parser.currentToken().equals(XContentParser.Token.VALUE_NUMBER)) {
195+
elements.add(String.valueOf(parser.numberValue())); // If number value don't add escaping quotes
196+
} else {
197+
elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element
198+
}
193199
}
194-
String arrayString = "[" + String.join(", ", elements) + "]";
195-
map.put(fieldName, arrayString);
200+
map.put(fieldName, elements.toString());
196201
} else {
197202
// Otherwise, parse it as a string
198203
map.put(fieldName, parser.text());
@@ -220,6 +225,7 @@ public static Instant parseInstant(XContentParser parser) throws IOException {
220225
* (e.g., john||own_index,testrole|__user__, no backend role so you see two verticle line after john.).
221226
* This is the user string format used internally in the OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT and may be
222227
* parsed using User.parse(string).
228+
*
223229
* @param client Client containing user info. A public API request will fill in the user info in the thread context.
224230
* @return parsed user object
225231
*/
@@ -233,7 +239,7 @@ public static User getUserContext(Client client) {
233239
* Creates a XContentParser from a given Registry
234240
*
235241
* @param xContentRegistry main registry for serializable content
236-
* @param bytesReference given bytes to be parsed
242+
* @param bytesReference given bytes to be parsed
237243
* @return bytesReference of {@link java.time.Instant}
238244
* @throws IOException IOException if content can't be parsed correctly
239245
*/
@@ -244,7 +250,8 @@ public static XContentParser createXContentParserFromRegistry(NamedXContentRegis
244250

245251
/**
246252
* Generates a string to string Map
247-
* @param map content map
253+
*
254+
* @param map content map
248255
* @param fieldName fieldName
249256
* @return instance of the map
250257
*/
@@ -260,15 +267,15 @@ public static Map<String, String> getStringToStringMap(Object map, String fieldN
260267
* Creates a map containing the specified input keys, with values derived from template data or previous node
261268
* output.
262269
*
263-
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
264-
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
265-
* @param currentNodeInputs Input params and content for this node, from workflow parsing
266-
* @param outputs WorkflowData content of previous steps
270+
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
271+
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
272+
* @param currentNodeInputs Input params and content for this node, from workflow parsing
273+
* @param outputs WorkflowData content of previous steps
267274
* @param previousNodeInputs Input params for this node that come from previous steps
268-
* @param params Params that came from REST path
275+
* @param params Params that came from REST path
269276
* @return A map containing the requiredInputKeys with their corresponding values,
270-
* and optionalInputKeys with their corresponding values if present.
271-
* Throws a {@link FlowFrameworkException} if a required key is not present.
277+
* and optionalInputKeys with their corresponding values if present.
278+
* Throws a {@link FlowFrameworkException} if a required key is not present.
272279
*/
273280
public static Map<String, Object> getInputsFromPreviousSteps(
274281
Set<String> requiredInputKeys,
@@ -357,9 +364,10 @@ public static Map<String, Object> getInputsFromPreviousSteps(
357364

358365
/**
359366
* Executes substitution on the given value by looking at any matching values in either the ouputs or params map
360-
* @param value the Object that will have the substitution done on
367+
*
368+
* @param value the Object that will have the substitution done on
361369
* @param outputs potential location of values to be substituted in
362-
* @param params potential location of values to be subsituted in
370+
* @param params potential location of values to be subsituted in
363371
* @return the substituted object back
364372
*/
365373
public static Object conditionallySubstitute(Object value, Map<String, WorkflowData> outputs, Map<String, String> params) {
@@ -403,6 +411,7 @@ public static Object conditionallySubstitute(Object value, Map<String, WorkflowD
403411

404412
/**
405413
* Generates a string based on an arbitrary String to object map using Jackson
414+
*
406415
* @param map content map
407416
* @return instance of the string
408417
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
@@ -415,6 +424,7 @@ public static String parseArbitraryStringToObjectMapToString(Map<String, Object>
415424

416425
/**
417426
* Generates a String to String map based on a Json File
427+
*
418428
* @param path file path
419429
* @return instance of the string
420430
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
@@ -430,15 +440,21 @@ public static Map<String, String> parseJsonFileToStringToStringMap(String path)
430440
* (e.g. "[\"text\", \"hello\"]" to "["text", "hello"]"), this is needed for processors that take in string arrays,
431441
* This also removes the quotations around the array making the array valid to consume
432442
* (e.g. "weights": "[0.7, 0.3]" to "weights": [0.7, 0.3])
443+
*
433444
* @param input The inputString given to be transformed
434445
* @return the transformed string
435446
*/
436447
public static String removingBackslashesAndQuotesInArrayInJsonString(String input) {
437-
return Pattern.compile("\"\\[(.*?)]\"").matcher(input).replaceAll(matchResult -> {
448+
Matcher matcher = JSON_ARRAY_DOUBLE_QUOTES_PATTERN.matcher(input);
449+
StringBuffer result = new StringBuffer();
450+
while (matcher.find()) {
438451
// Extract matched content and remove backslashes before quotes
439-
String withoutEscapes = matchResult.group(1).replaceAll("\\\\\"", "\"");
452+
String withoutEscapes = matcher.group(1).replaceAll("\\\\\"", "\"");
440453
// Return the transformed string with the brackets but without the outer quotes
441-
return "[" + withoutEscapes + "]";
442-
});
454+
matcher.appendReplacement(result, "[" + withoutEscapes + "]");
455+
}
456+
// Append remaining input after the last match
457+
matcher.appendTail(result);
458+
return result.toString();
443459
}
444460
}

src/main/java/org/opensearch/flowframework/workflow/CreateIndexStep.java

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ public PlainActionFuture<WorkflowData> execute(
9090

9191
String configurations = (String) inputs.get(CONFIGURATIONS);
9292

93+
logger.info("configurations: " + configurations);
94+
9395
byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8);
9496
BytesReference configurationsBytes = new BytesArray(byteArr);
9597
CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName);

src/main/resources/defaults/bedrock-titan-embedding-defaults.json

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"create_connector.name": "Amazon Bedrock Connector: embedding",
55
"create_connector.description": "The connector to bedrock Titan embedding model",
66
"create_connector.region": "us-east-1",
7-
"create_connector.endpoint": "api.openai.com",
87
"create_connector.credential.access_key": "123",
98
"create_connector.credential.secret_key": "123",
109
"create_connector.credential.session_token": "123",

src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json src/main/resources/defaults/bedrock-titan-multimodal-defaults.json

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
{
22
"template.name": "deploy-bedrock-titan-multimodal-embedding-model",
3-
"template.description": "deploying Amazon Bedrock Titan multimodal embedding model ",
3+
"template.description": "Deploying Amazon Bedrock Titan multimodal embedding model ",
44
"create_connector.name": "Amazon Bedrock Connector: multi-modal embedding",
55
"create_connector.description": "The connector to bedrock Titan multi-modal embedding model",
66
"create_connector.region": "us-east-1",
7-
"create_connector.input_docs_processed_step_size": 2,
8-
"create_connector.endpoint": "api.openai.com",
7+
"create_connector.input_docs_processed_step_size": "2",
98
"create_connector.credential.access_key": "123",
109
"create_connector.credential.secret_key": "123",
1110
"create_connector.credential.session_token": "123",

src/main/resources/defaults/cohere-chat-defaults.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"template.name": "deploy-cohere-chat-model",
3-
"template.description": "deploying cohere chat model",
3+
"template.description": "Deploying a Cohere chat model",
44
"create_connector.name": "Cohere Chat Model",
55
"create_connector.description": "The connector to Cohere's public chat API",
66
"create_connector.protocol": "http",

src/main/resources/defaults/cohere-embedding-defaults.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"template.name": "deploy-cohere-model",
3-
"template.description": "deploying cohere embedding model",
3+
"template.description": "Deploying a Cohere embedding model",
44
"create_connector.name": "cohere-embedding-connector",
55
"create_connector.description": "The connector to Cohere's public embed API",
66
"create_connector.protocol": "http",

src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"template.name": "semantic search with cohere embedding",
3-
"template.description": "Setting up semantic search, with cohere embedding model",
3+
"template.description": "Setting up semantic search, with a Cohere embedding model",
44
"create_connector.name": "cohere-embedding-connector",
55
"create_connector.description": "The connector to Cohere's public embed API",
66
"create_connector.protocol": "http",

src/main/resources/defaults/conversational-search-defaults.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"template.name": "deploy-cohere-chat-model",
3-
"template.description": "deploying cohere chat model",
3+
"template.description": "A template to deploy a Cohere chat model",
44
"create_connector.name": "Cohere Chat Model",
55
"create_connector.description": "The connector to Cohere's public chat API",
66
"create_connector.protocol": "http",
@@ -13,7 +13,7 @@
1313
"register_remote_model.description": "cohere-chat-model",
1414
"create_search_pipeline.pipeline_id": "rag-pipeline",
1515
"create_search_pipeline.retrieval_augmented_generation.tag": "openai_pipeline_demo",
16-
"create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline Using cohere Connector",
16+
"create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline using a Cohere chat model",
1717
"create_search_pipeline.retrieval_augmented_generation.context_field_list": "[\"text\"]",
1818
"create_search_pipeline.retrieval_augmented_generation.system_prompt": "You are a helpful assistant",
1919
"create_search_pipeline.retrieval_augmented_generation.user_instructions": "Generate a concise and informative answer in less than 100 words for the given question"

src/main/resources/defaults/local-sparse-search-biencoder-defaults.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"template.name": "local-model-neural-sparse-search",
3-
"template.description": "setting up neural sparse search with local model",
3+
"template.description": "Setting up neural sparse search with pretrained local model",
44
"register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1",
55
"register_local_sparse_encoding_model.description": "This is a neural sparse encoding model",
66
"register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT",

src/main/resources/defaults/multi-modal-search-defaults.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
"create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline",
55
"create_ingest_pipeline.description": "A text/image embedding pipeline",
66
"create_ingest_pipeline.model_id": "123",
7-
"create_ingest_pipeline.embedding": "vector_embedding",
7+
"text_image_embedding.embedding": "vector_embedding",
88
"text_image_embedding.field_map.text": "image_description",
99
"text_image_embedding.field_map.image": "image_binary",
1010
"create_index.name": "my-multimodal-nlp-index",
11-
"create_index.settings.number_of_shards": 2,
12-
"text_image_embedding.field_map.output.dimension": 1024,
11+
"create_index.settings.number_of_shards": "2",
12+
"text_image_embedding.field_map.output.dimension": "1024",
1313
"create_index.mappings.method.engine": "lucene",
1414
"create_index.mappings.method.name": "hnsw"
1515
}

src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"register_remote_model.description": "bedrock-multi-modal-embedding-model",
1818
"create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline",
1919
"create_ingest_pipeline.description": "A text/image embedding pipeline",
20-
"text_image_embedding.create_ingest_pipeline.embedding": "vector_embedding",
20+
"text_image_embedding.embedding": "vector_embedding",
2121
"text_image_embedding.field_map.text": "image_description",
2222
"text_image_embedding.field_map.image": "image_binary",
2323
"create_index.name": "my-multimodal-nlp-index",

src/main/resources/defaults/openai-chat-defaults.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"template.name": "deploy-openai-chat-model",
3-
"template.description": "deploying openAI chat model",
3+
"template.description": "Deploying an OpenAI chat model",
44
"create_connector.name": "OpenAI Chat Connector",
55
"create_connector.description": "Connector to public OpenAI model",
66
"create_connector.protocol": "http",

src/main/resources/defaults/openai-embedding-defaults.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"template.name": "deploy-openai-model",
3-
"template.description": "deploying openAI embedding model",
3+
"template.description": "Deploying an OpenAI embedding model",
44
"create_connector.name": "OpenAI-embedding-connector",
55
"create_connector.description": "Connector to public OpenAI model",
66
"create_connector.protocol": "http",

src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
"input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}"
2727
},
2828
"credential": {
29-
"access_ key": "${{create_connector.credential.access_key}}",
29+
"access_key": "${{create_connector.credential.access_key}}",
3030
"secret_key": "${{create_connector.credential.secret_key}}",
3131
"session_token": "${{create_connector.credential.session_token}}"
3232
},

src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"id": "register_model",
4545
"type": "register_remote_model",
4646
"previous_node_inputs": {
47-
"create_connector_step_1": "parameters"
47+
"create_connector": "parameters"
4848
},
4949
"user_inputs": {
5050
"name": "${{register_remote_model.name}}",
@@ -56,7 +56,7 @@
5656
"id": "deploy_model",
5757
"type": "deploy_model",
5858
"previous_node_inputs": {
59-
"register_model_1": "model_id"
59+
"register_model": "model_id"
6060
}
6161
}
6262
],

src/main/resources/substitutionTemplates/deploy-remote-model-template.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"id": "register_model",
4747
"type": "register_remote_model",
4848
"previous_node_inputs": {
49-
"create_connector_step_1": "parameters"
49+
"create_connector": "parameters"
5050
},
5151
"user_inputs": {
5252
"name": "${{register_remote_model.name}}",
@@ -58,7 +58,7 @@
5858
"id": "deploy_model",
5959
"type": "deploy_model",
6060
"previous_node_inputs": {
61-
"register_model_1": "model_id"
61+
"register_model": "model_id"
6262
}
6363
}
6464
],

src/main/resources/substitutionTemplates/multi-modal-search-template.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
{
2424
"text_image_embedding": {
2525
"model_id": "${{create_ingest_pipeline.model_id}}",
26-
"embedding": "${{create_ingest_pipeline.embedding}}",
26+
"embedding": "${{text_image_embedding.embedding}}",
2727
"field_map": {
2828
"text": "${{text_image_embedding.field_map.text}}",
2929
"image": "${{text_image_embedding.field_map.image}}"
@@ -53,7 +53,7 @@
5353
"id": {
5454
"type": "text"
5555
},
56-
"${{text_embedding.field_map.output}}": {
56+
"${{text_image_embedding.embedding}}": {
5757
"type": "knn_vector",
5858
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
5959
"method": {

0 commit comments

Comments
 (0)