Skip to content

Commit 30bb554

Browse files
committed
Sync benchmark folder from main
Signed-off-by: Heemin Kim <heemin@amazon.com>
1 parent 6be0f0c commit 30bb554

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+860
-328
lines changed

benchmarks/osb/README.md

+247-238
Large diffs are not rendered by default.

benchmarks/osb/params/no-train-params.json

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
"target_index_bulk_index_data_set_format": "hdf5",
1111
"target_index_bulk_index_data_set_path": "<path to data>",
1212
"target_index_bulk_index_clients": 10,
13+
"target_index_max_num_segments": 10,
14+
"target_index_force_merge_timeout": 45.0,
1315
"hnsw_ef_search": 512,
1416
"hnsw_ef_construction": 512,
1517
"hnsw_m": 16,

benchmarks/osb/params/train-params.json

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
"target_index_bulk_index_data_set_format": "hdf5",
1111
"target_index_bulk_index_data_set_path": "<path to data>",
1212
"target_index_bulk_index_clients": 10,
13+
"target_index_max_num_segments": 10,
14+
"target_index_force_merge_timeout": 45.0,
1315
"ivf_nlists": 10,
1416
"ivf_nprobes": 1,
1517
"pq_code_size": 8,

benchmarks/osb/procedures/no-train-test.json

+10
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,16 @@
4646
"retries": 100
4747
}
4848
},
49+
{
50+
"operation": {
51+
"name": "force-merge",
52+
"operation-type": "force-merge",
53+
"request-timeout": {{ target_index_force_merge_timeout }},
54+
"index": "{{ target_index_name }}",
55+
"mode": "polling",
56+
"max-num-segments": {{ target_index_max_num_segments }}
57+
}
58+
},
4959
{
5060
"operation": {
5161
"name": "knn-query-from-data-set",

benchmarks/osb/procedures/train-test.json

+10
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,16 @@
100100
"retries": 100
101101
}
102102
},
103+
{
104+
"operation": {
105+
"name": "force-merge",
106+
"operation-type": "force-merge",
107+
"request-timeout": {{ target_index_force_merge_timeout }},
108+
"index": "{{ target_index_name }}",
109+
"mode": "polling",
110+
"max-num-segments": {{ target_index_max_num_segments }}
111+
}
112+
},
103113
{
104114
"operation": {
105115
"name": "knn-query-from-data-set",

benchmarks/perf-tool/add-parent-doc-id-to-dataset.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ def run(self, source_path, target_path) -> None:
116116
possible_colors = ['red', 'green', 'yellow', 'blue', None]
117117
possible_tastes = ['sweet', 'salty', 'sour', 'bitter', None]
118118
max_age = 100
119-
min_field_size = 1000
120-
max_field_size = 10001
119+
min_field_size = 10
120+
max_field_size = 10
121121

122122
# Copy train and test data
123123
for key in in_file.keys():

benchmarks/perf-tool/okpt/io/config/parsers/test.py

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class TestConfig:
2424
test_id: str
2525
endpoint: str
2626
port: int
27+
timeout: int
2728
num_runs: int
2829
show_runs: bool
2930
setup: List[Step]
@@ -67,6 +68,7 @@ def parse(self, file_obj: TextIOWrapper) -> TestConfig:
6768
test_config = TestConfig(
6869
endpoint=config_obj['endpoint'],
6970
port=config_obj['port'],
71+
timeout=config_obj['timeout'],
7072
test_name=config_obj['test_name'],
7173
test_id=config_obj['test_id'],
7274
num_runs=config_obj['num_runs'],

benchmarks/perf-tool/okpt/io/config/schemas/test.yml

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ endpoint:
1212
port:
1313
type: integer
1414
default: 9200
15+
timeout:
16+
type: integer
17+
default: 60
1518
test_name:
1619
type: string
1720
test_id:

benchmarks/perf-tool/okpt/test/steps/factory.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from okpt.test.steps.steps import CreateIndexStep, DisableRefreshStep, RefreshIndexStep, DeleteIndexStep, \
1212
TrainModelStep, DeleteModelStep, ForceMergeStep, ClearCacheStep, IngestStep, IngestMultiFieldStep, \
13-
IngestNestedFieldStep, QueryStep, QueryWithFilterStep, QueryNestedFieldStep, GetStatsStep
13+
IngestNestedFieldStep, QueryStep, QueryWithFilterStep, QueryNestedFieldStep, GetStatsStep, WarmupStep
1414

1515

1616
def create_step(step_config: StepConfig) -> Step:
@@ -44,5 +44,7 @@ def create_step(step_config: StepConfig) -> Step:
4444
return ClearCacheStep(step_config)
4545
elif step_config.step_name == GetStatsStep.label:
4646
return GetStatsStep(step_config)
47+
elif step_config.step_name == WarmupStep.label:
48+
return WarmupStep(step_config)
4749

4850
raise ConfigurationError(f'Invalid step {step_config.step_name}')

benchmarks/perf-tool/okpt/test/steps/steps.py

+39-5
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ def __init__(self, step_config: StepConfig):
3838
default_port = 9200 if self.endpoint == 'localhost' else 80
3939
self.port = parse_int_param('port', step_config.config,
4040
step_config.implicit_config, default_port)
41+
self.timeout = parse_int_param('timeout', step_config.config, {}, 60)
4142
self.opensearch = get_opensearch_client(str(self.endpoint),
42-
int(self.port))
43+
int(self.port), int(self.timeout))
4344

4445

4546
class CreateIndexStep(OpenSearchStep):
@@ -163,6 +164,25 @@ def _get_measures(self) -> List[str]:
163164
return ['took']
164165

165166

167+
class WarmupStep(OpenSearchStep):
168+
"""See base class."""
169+
170+
label = 'warmup_operation'
171+
172+
def __init__(self, step_config: StepConfig):
173+
super().__init__(step_config)
174+
self.index_name = parse_string_param('index_name', step_config.config, {},
175+
None)
176+
177+
def _action(self):
178+
"""Performs warmup operation on an index."""
179+
warmup_operation(self.endpoint, self.port, self.index_name)
180+
return {}
181+
182+
def _get_measures(self) -> List[str]:
183+
return ['took']
184+
185+
166186
class TrainModelStep(OpenSearchStep):
167187
"""See base class."""
168188

@@ -739,9 +759,6 @@ def get_body(self, vec):
739759
}
740760
}
741761

742-
def get_exclude_fields(self):
743-
return ['nested_field.' + self.field_name]
744-
745762
class GetStatsStep(OpenSearchStep):
746763
"""See base class."""
747764

@@ -841,6 +858,23 @@ def delete_model(endpoint, port, model_id):
841858
return response.json()
842859

843860

861+
def warmup_operation(endpoint, port, index):
862+
"""
863+
Performs warmup operation on index to load native library files
864+
of that index to reduce query latencies.
865+
Args:
866+
endpoint: Endpoint OpenSearch is running on
867+
port: Port OpenSearch is running on
868+
index: index name
869+
Returns:
870+
number of shards the plugin succeeded and failed to warm up.
871+
"""
872+
response = requests.get('http://' + endpoint + ':' + str(port) +
873+
'/_plugins/_knn/warmup/' + index,
874+
headers={'content-type': 'application/json'})
875+
return response.json()
876+
877+
844878
def get_opensearch_client(endpoint: str, port: int, timeout=60):
845879
"""
846880
Get an opensearch client from an endpoint and port
@@ -947,7 +981,7 @@ def query_index(opensearch: OpenSearch, index_name: str, body: dict,
947981

948982

949983
def bulk_index(opensearch: OpenSearch, index_name: str, body: List):
950-
return opensearch.bulk(index=index_name, body=body, timeout='5m')
984+
return opensearch.bulk(index=index_name, body=body)
951985

952986
def get_segment_stats(opensearch: OpenSearch, index_name: str):
953987
return opensearch.indices.segments(index=index_name)

benchmarks/perf-tool/release-configs/faiss-hnsw/filtering/relaxed-filter/index.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
"index": {
44
"knn": true,
55
"number_of_shards": 24,
6-
"number_of_replicas": 1
6+
"number_of_replicas": 1,
7+
"knn.algo_param.ef_search": 100
78
}
89
},
910
"mappings": {
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,40 @@
11
endpoint: [ENDPOINT]
2+
port: [PORT]
23
test_name: "Faiss HNSW Relaxed Filter Test"
34
test_id: "Faiss HNSW Relaxed Filter Test"
4-
num_runs: 10
5+
num_runs: 3
56
show_runs: false
67
steps:
78
- name: delete_index
89
index_name: target_index
910
- name: create_index
1011
index_name: target_index
11-
index_spec: [INDEX_SPEC_PATH]/relaxed-filter/index.json
12+
index_spec: release-configs/faiss-hnsw/filtering/relaxed-filter/index.json
1213
- name: ingest_multi_field
1314
index_name: target_index
1415
field_name: target_field
1516
bulk_size: 500
1617
dataset_format: hdf5
17-
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
18+
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
1819
attributes_dataset_name: attributes
1920
attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ]
2021
- name: refresh_index
2122
index_name: target_index
23+
- name: force_merge
24+
index_name: target_index
25+
max_num_segments: 1
26+
- name: warmup_operation
27+
index_name: target_index
2228
- name: query_with_filter
2329
k: 100
2430
r: 1
2531
calculate_recall: true
2632
index_name: target_index
2733
field_name: target_field
2834
dataset_format: hdf5
29-
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
35+
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
3036
neighbors_format: hdf5
31-
neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters-updated.hdf5
37+
neighbors_path: dataset/sift-128-euclidean-with-relaxed-filters.hdf5
3238
neighbors_dataset: neighbors_filter_5
33-
filter_spec: [INDEX_SPEC_PATH]/relaxed-filter-spec.json
39+
filter_spec: release-configs/faiss-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json
3440
filter_type: FILTER

benchmarks/perf-tool/release-configs/faiss-hnsw/filtering/restrictive-filter/index.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
"index": {
44
"knn": true,
55
"number_of_shards": 24,
6-
"number_of_replicas": 1
6+
"number_of_replicas": 1,
7+
"knn.algo_param.ef_search": 100
78
}
89
},
910
"mappings": {
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,40 @@
11
endpoint: [ENDPOINT]
2+
port: [PORT]
23
test_name: "Faiss HNSW Restrictive Filter Test"
34
test_id: "Faiss HNSW Restrictive Filter Test"
4-
num_runs: 10
5+
num_runs: 3
56
show_runs: false
67
steps:
78
- name: delete_index
89
index_name: target_index
910
- name: create_index
1011
index_name: target_index
11-
index_spec: [INDEX_SPEC_PATH]/index.json
12+
index_spec: release-configs/faiss-hnsw/filtering/restrictive-filter/index.json
1213
- name: ingest_multi_field
1314
index_name: target_index
1415
field_name: target_field
1516
bulk_size: 500
1617
dataset_format: hdf5
17-
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
18+
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
1819
attributes_dataset_name: attributes
1920
attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ]
2021
- name: refresh_index
2122
index_name: target_index
2223
- name: force_merge
2324
index_name: target_index
2425
max_num_segments: 1
26+
- name: warmup_operation
27+
index_name: target_index
2528
- name: query_with_filter
2629
k: 100
2730
r: 1
2831
calculate_recall: true
2932
index_name: target_index
3033
field_name: target_field
3134
dataset_format: hdf5
32-
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
35+
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
3336
neighbors_format: hdf5
34-
neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters.hdf5
37+
neighbors_path: dataset/sift-128-euclidean-with-restrictive-filters.hdf5
3538
neighbors_dataset: neighbors_filter_4
36-
filter_spec: [INDEX_SPEC_PATH]/restrictive-filter-spec.json
39+
filter_spec: release-configs/faiss-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json
3740
filter_type: FILTER

benchmarks/perf-tool/release-configs/faiss-hnsw/index.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
"index": {
44
"knn": true,
55
"number_of_shards": 24,
6-
"number_of_replicas": 1
6+
"number_of_replicas": 1,
7+
"knn.algo_param.ef_search": 100
78
}
89
},
910
"mappings": {

benchmarks/perf-tool/release-configs/faiss-hnsw/nested/simple/index.json

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
}
99
},
1010
"mappings": {
11+
"_source": {
12+
"excludes": ["nested_field"]
13+
},
1114
"properties": {
1215
"nested_field": {
1316
"type": "nested",
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,35 @@
1-
endpoint: localhost
1+
endpoint: [ENDPOINT]
2+
port: [PORT]
23
test_name: "Faiss HNSW Test"
34
test_id: "Faiss HNSW Test"
4-
num_runs: 10
5+
num_runs: 3
56
show_runs: false
67
steps:
78
- name: delete_index
89
index_name: target_index
910
- name: create_index
1011
index_name: target_index
11-
index_spec: /home/ec2-user/[PATH]/index.json
12+
index_spec: release-configs/faiss-hnsw/index.json
1213
- name: ingest
1314
index_name: target_index
1415
field_name: target_field
1516
bulk_size: 500
1617
dataset_format: hdf5
17-
dataset_path: [DATASET_PATH]/sift-128-euclidean.hdf5
18+
dataset_path: dataset/sift-128-euclidean.hdf5
1819
- name: refresh_index
1920
index_name: target_index
2021
- name: force_merge
2122
index_name: target_index
2223
max_num_segments: 1
24+
- name: warmup_operation
25+
index_name: target_index
2326
- name: query
2427
k: 100
2528
r: 1
2629
calculate_recall: true
2730
index_name: target_index
2831
field_name: target_field
2932
dataset_format: hdf5
30-
dataset_path: [DATASET_PATH]/sift-128-euclidean.hdf5
33+
dataset_path: dataset/sift-128-euclidean.hdf5
3134
neighbors_format: hdf5
32-
neighbors_path: [DATASET_PATH]/sift-128-euclidean.hdf5
35+
neighbors_path: dataset/sift-128-euclidean.hdf5

0 commit comments

Comments
 (0)