-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathos_neural_search_setup.rb
113 lines (94 loc) · 2.89 KB
/
os_neural_search_setup.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
require 'opensearch'
# Initialize the client
client = OpenSearch::Client.new(url: 'http://localhost:9200')
# Based on tutorial
# https://opensearch.org/docs/latest/search-plugins/neural-search-tutorial/
# Define the settings you want to update
# "flat settings" format as specified https://docs.aws.amazon.com/opensearch-service/latest/developerguide/supported-operations.html#version_api_notes
settings = {
persistent: {
'plugins.ml_commons.only_run_on_ml_node': false,
'plugins.ml_commons.native_memory_threshold': '99',
'plugins.ml_commons.model_access_control_enabled': true
}
}
# Update the cluster settings
response = os.api_client.cluster.put_settings(body: settings)
response = os.api_client.cluster.get_settings
body = {
name: "NLP_model_group",
description: "A model group for NLP models",
}
# Perform a POST request to register the model group
response = os.api_client.perform_request('POST', '/_plugins/_ml/model_groups/_register', {}, body.to_json)
model_group_id = response.body['model_group_id']
body = {
"name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
"version": "1.0.1",
"model_group_id": model_group_id,
"model_format": "TORCH_SCRIPT"
}
response = os.api_client.perform_request('POST', '/_plugins/_ml/models/_register', {}, body.to_json)
task_id = response.body['task_id']
def wait_for_task_id(task_id)
while true
response = os.api_client.perform_request('GET', "/_plugins/_ml/tasks/#{task_id}")
return if response.body['state'] == 'COMPLETED'
puts "Current state: #{response.body['state']}"
sleep 10
end
end
response = wait_for_task_id(task_id)
model_id = response.body['model_id']
# DEPLOY MODEL
response = os.api_client.perform_request('POST', "/_plugins/_ml/models/#{model_id}/_deploy")
task_id = response.body['task_id']
response = wait_for_task_id(task_id)
# Create an Ingest Pipeline
pipeline_id = "nlp-ingest-pipeline"
pipeline_body = {
description: "An NLP ingest pipeline",
processors: [
{
text_embedding: {
model_id: model_id,
field_map: {
"text": "passage_embedding"
}
}
}
]
}
response = os.api_client.ingest.put_pipeline(id: pipeline_id, body: pipeline_body)
# Create an index which uses the pipeline
index_name = "my-nlp-index"
index_configuration = {
settings: {
"index.knn": true,
"default_pipeline": "nlp-ingest-pipeline"
},
mappings: {
properties: {
id: {
type: "text"
},
passage_embedding: {
type: "knn_vector",
dimension: 768,
method: {
engine: "lucene",
space_type: "l2",
name: "hnsw",
parameters: {}
}
},
text: {
type: "text"
}
}
}
}
# Create the index with the specified settings and mappings
response = os.api_client.indices.create(index: index_name, body: index_configuration)
# Print the response
puts response