Skip to content

Commit 9b072c4

Browse files
authored
add titan embeeding v2 to blueprint (opensearch-project#2480)
Signed-off-by: Yaliang Wu <ylwu@amazon.com>
1 parent 2c11e7f commit 9b072c4

File tree

2 files changed

+38
-10
lines changed

2 files changed

+38
-10
lines changed

docs/remote_inference_blueprints/bedrock_connector_titan_embedding_blueprint.md

+37-8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ PUT /_cluster/settings
1919

2020
If you are using self-managed Opensearch, you should supply AWS credentials:
2121

22+
If you are using Titan Text Embedding V2, change "model" to `amazon.titan-embed-text-v2:0`
2223
```json
2324
POST /_plugins/_ml/connectors/_create
2425
{
@@ -28,7 +29,8 @@ POST /_plugins/_ml/connectors/_create
2829
"protocol": "aws_sigv4",
2930
"parameters": {
3031
"region": "<PLEASE ADD YOUR AWS REGION HERE>",
31-
"service_name": "bedrock"
32+
"service_name": "bedrock",
33+
"model": "amazon.titan-embed-text-v1"
3234
},
3335
"credential": {
3436
"access_key": "<PLEASE ADD YOUR AWS ACCESS KEY HERE>",
@@ -39,14 +41,14 @@ POST /_plugins/_ml/connectors/_create
3941
{
4042
"action_type": "predict",
4143
"method": "POST",
42-
"url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke",
44+
"url": "https://bedrock-runtime.${parameters.region}.amazonaws.com/model/${parameters.model}/invoke",
4345
"headers": {
4446
"content-type": "application/json",
4547
"x-amz-content-sha256": "required"
4648
},
4749
"request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
48-
"pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";",
49-
"post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n "
50+
"pre_process_function": "connector.pre_process.bedrock.embedding",
51+
"post_process_function": "connector.post_process.bedrock.embedding"
5052
}
5153
]
5254
}
@@ -64,7 +66,8 @@ POST /_plugins/_ml/connectors/_create
6466
"protocol": "aws_sigv4",
6567
"parameters": {
6668
"region": "<PLEASE ADD YOUR AWS REGION HERE>",
67-
"service_name": "bedrock"
69+
"service_name": "bedrock",
70+
"model": "amazon.titan-embed-text-v1"
6871
},
6972
"credential": {
7073
"roleArn": "<PLEASE ADD YOUR AWS ROLE ARN HERE>"
@@ -79,8 +82,8 @@ POST /_plugins/_ml/connectors/_create
7982
"x-amz-content-sha256": "required"
8083
},
8184
"request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
82-
"pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";",
83-
"post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n "
85+
"pre_process_function": "connector.pre_process.bedrock.embedding",
86+
"post_process_function": "connector.post_process.bedrock.embedding"
8487
}
8588
]
8689
}
@@ -151,7 +154,7 @@ POST /_plugins/_ml/models/sKR9PIsBQRofe4CSlUov/_predict
151154
}
152155
```
153156

154-
Sample response:
157+
Sample response of Titan Text Embedding V1:
155158
```json
156159
{
157160
"inference_results": [
@@ -177,3 +180,29 @@ Sample response:
177180
}
178181
```
179182

183+
Sample response of Titan Text Embedding V2:
184+
```json
185+
{
186+
"inference_results": [
187+
{
188+
"output": [
189+
{
190+
"name": "sentence_embedding",
191+
"data_type": "FLOAT32",
192+
"shape": [
193+
1024
194+
],
195+
"data": [
196+
-0.041385926,
197+
0.08503958,
198+
0.0026220535,
199+
...
200+
]
201+
}
202+
],
203+
"status_code": 200
204+
}
205+
]
206+
}
207+
```
208+

docs/tutorials/semantic_search/generate_embeddings_for_arrays_of_object.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ PUT my_books
8181
Create sub-pipeline to generate embedding for one item in the array.
8282

8383
This pipeline contains 3 processors
84-
- set processor: The `text_embedding` processor is unable to identify "_ingest._value.title". You need to copy "_ingest._value.title" to a temporary field for text_embedding to process it.
84+
- set processor: The `text_embedding` processor is unable to identify "_ingest._value.title". You need to copy "_ingest._value.title" to a non-existing temporary field for text_embedding to process it.
8585
- text_embedding processor: convert value of the temporary field to embedding
8686
- remove processor: remove temporary field
8787
```
@@ -228,7 +228,6 @@ Response
228228
"description": "This is first book"
229229
},
230230
{
231-
"title": "second book",
232231
"description": "This is second book"
233232
}
234233
]

0 commit comments

Comments
 (0)