Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync benchmark folder from main #1497

Merged
merged 1 commit into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
485 changes: 247 additions & 238 deletions benchmarks/osb/README.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions benchmarks/osb/params/no-train-params.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "<path to data>",
"target_index_bulk_index_clients": 10,
"target_index_max_num_segments": 10,
"target_index_force_merge_timeout": 45.0,
"hnsw_ef_search": 512,
"hnsw_ef_construction": 512,
"hnsw_m": 16,
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/osb/params/train-params.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
"target_index_bulk_index_data_set_format": "hdf5",
"target_index_bulk_index_data_set_path": "<path to data>",
"target_index_bulk_index_clients": 10,
"target_index_max_num_segments": 10,
"target_index_force_merge_timeout": 45.0,
"ivf_nlists": 10,
"ivf_nprobes": 1,
"pq_code_size": 8,
Expand Down
10 changes: 10 additions & 0 deletions benchmarks/osb/procedures/no-train-test.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@
"retries": 100
}
},
{
"operation": {
"name": "force-merge",
"operation-type": "force-merge",
"request-timeout": {{ target_index_force_merge_timeout }},
"index": "{{ target_index_name }}",
"mode": "polling",
"max-num-segments": {{ target_index_max_num_segments }}
}
},
{
"operation": {
"name": "knn-query-from-data-set",
Expand Down
10 changes: 10 additions & 0 deletions benchmarks/osb/procedures/train-test.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@
"retries": 100
}
},
{
"operation": {
"name": "force-merge",
"operation-type": "force-merge",
"request-timeout": {{ target_index_force_merge_timeout }},
"index": "{{ target_index_name }}",
"mode": "polling",
"max-num-segments": {{ target_index_max_num_segments }}
}
},
{
"operation": {
"name": "knn-query-from-data-set",
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/perf-tool/add-parent-doc-id-to-dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def run(self, source_path, target_path) -> None:
possible_colors = ['red', 'green', 'yellow', 'blue', None]
possible_tastes = ['sweet', 'salty', 'sour', 'bitter', None]
max_age = 100
min_field_size = 1000
max_field_size = 10001
min_field_size = 10
max_field_size = 10

# Copy train and test data
for key in in_file.keys():
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/perf-tool/okpt/io/config/parsers/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class TestConfig:
test_id: str
endpoint: str
port: int
timeout: int
num_runs: int
show_runs: bool
setup: List[Step]
Expand Down Expand Up @@ -67,6 +68,7 @@ def parse(self, file_obj: TextIOWrapper) -> TestConfig:
test_config = TestConfig(
endpoint=config_obj['endpoint'],
port=config_obj['port'],
timeout=config_obj['timeout'],
test_name=config_obj['test_name'],
test_id=config_obj['test_id'],
num_runs=config_obj['num_runs'],
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/perf-tool/okpt/io/config/schemas/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ endpoint:
port:
type: integer
default: 9200
timeout:
type: integer
default: 60
test_name:
type: string
test_id:
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/perf-tool/okpt/test/steps/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from okpt.test.steps.steps import CreateIndexStep, DisableRefreshStep, RefreshIndexStep, DeleteIndexStep, \
TrainModelStep, DeleteModelStep, ForceMergeStep, ClearCacheStep, IngestStep, IngestMultiFieldStep, \
IngestNestedFieldStep, QueryStep, QueryWithFilterStep, QueryNestedFieldStep, GetStatsStep
IngestNestedFieldStep, QueryStep, QueryWithFilterStep, QueryNestedFieldStep, GetStatsStep, WarmupStep


def create_step(step_config: StepConfig) -> Step:
Expand Down Expand Up @@ -44,5 +44,7 @@ def create_step(step_config: StepConfig) -> Step:
return ClearCacheStep(step_config)
elif step_config.step_name == GetStatsStep.label:
return GetStatsStep(step_config)
elif step_config.step_name == WarmupStep.label:
return WarmupStep(step_config)

raise ConfigurationError(f'Invalid step {step_config.step_name}')
44 changes: 39 additions & 5 deletions benchmarks/perf-tool/okpt/test/steps/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ def __init__(self, step_config: StepConfig):
default_port = 9200 if self.endpoint == 'localhost' else 80
self.port = parse_int_param('port', step_config.config,
step_config.implicit_config, default_port)
self.timeout = parse_int_param('timeout', step_config.config, {}, 60)
self.opensearch = get_opensearch_client(str(self.endpoint),
int(self.port))
int(self.port), int(self.timeout))


class CreateIndexStep(OpenSearchStep):
Expand Down Expand Up @@ -163,6 +164,25 @@ def _get_measures(self) -> List[str]:
return ['took']


class WarmupStep(OpenSearchStep):
"""See base class."""

label = 'warmup_operation'

def __init__(self, step_config: StepConfig):
super().__init__(step_config)
self.index_name = parse_string_param('index_name', step_config.config, {},
None)

def _action(self):
"""Performs warmup operation on an index."""
warmup_operation(self.endpoint, self.port, self.index_name)
return {}

def _get_measures(self) -> List[str]:
return ['took']


class TrainModelStep(OpenSearchStep):
"""See base class."""

Expand Down Expand Up @@ -739,9 +759,6 @@ def get_body(self, vec):
}
}

def get_exclude_fields(self):
return ['nested_field.' + self.field_name]

class GetStatsStep(OpenSearchStep):
"""See base class."""

Expand Down Expand Up @@ -841,6 +858,23 @@ def delete_model(endpoint, port, model_id):
return response.json()


def warmup_operation(endpoint, port, index):
"""
Performs warmup operation on index to load native library files
of that index to reduce query latencies.
Args:
endpoint: Endpoint OpenSearch is running on
port: Port OpenSearch is running on
index: index name
Returns:
number of shards the plugin succeeded and failed to warm up.
"""
response = requests.get('http://' + endpoint + ':' + str(port) +
'/_plugins/_knn/warmup/' + index,
headers={'content-type': 'application/json'})
return response.json()


def get_opensearch_client(endpoint: str, port: int, timeout=60):
"""
Get an opensearch client from an endpoint and port
Expand Down Expand Up @@ -947,7 +981,7 @@ def query_index(opensearch: OpenSearch, index_name: str, body: dict,


def bulk_index(opensearch: OpenSearch, index_name: str, body: List):
return opensearch.bulk(index=index_name, body=body, timeout='5m')
return opensearch.bulk(index=index_name, body=body)

def get_segment_stats(opensearch: OpenSearch, index_name: str):
return opensearch.indices.segments(index=index_name)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"index": {
"knn": true,
"number_of_shards": 24,
"number_of_replicas": 1
"number_of_replicas": 1,
"knn.algo_param.ef_search": 100
}
},
"mappings": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,40 @@
endpoint: [ENDPOINT]
port: [PORT]
test_name: "Faiss HNSW Relaxed Filter Test"
test_id: "Faiss HNSW Relaxed Filter Test"
num_runs: 10
num_runs: 3
show_runs: false
steps:
- name: delete_index
index_name: target_index
- name: create_index
index_name: target_index
index_spec: [INDEX_SPEC_PATH]/relaxed-filter/index.json
index_spec: release-configs/faiss-hnsw/filtering/relaxed-filter/index.json
- name: ingest_multi_field
index_name: target_index
field_name: target_field
bulk_size: 500
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
attributes_dataset_name: attributes
attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ]
- name: refresh_index
index_name: target_index
- name: force_merge
index_name: target_index
max_num_segments: 1
- name: warmup_operation
index_name: target_index
- name: query_with_filter
k: 100
r: 1
calculate_recall: true
index_name: target_index
field_name: target_field
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
neighbors_format: hdf5
neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters-updated.hdf5
neighbors_path: dataset/sift-128-euclidean-with-relaxed-filters.hdf5
neighbors_dataset: neighbors_filter_5
filter_spec: [INDEX_SPEC_PATH]/relaxed-filter-spec.json
filter_spec: release-configs/faiss-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json
filter_type: FILTER
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"index": {
"knn": true,
"number_of_shards": 24,
"number_of_replicas": 1
"number_of_replicas": 1,
"knn.algo_param.ef_search": 100
}
},
"mappings": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
endpoint: [ENDPOINT]
port: [PORT]
test_name: "Faiss HNSW Restrictive Filter Test"
test_id: "Faiss HNSW Restrictive Filter Test"
num_runs: 10
num_runs: 3
show_runs: false
steps:
- name: delete_index
index_name: target_index
- name: create_index
index_name: target_index
index_spec: [INDEX_SPEC_PATH]/index.json
index_spec: release-configs/faiss-hnsw/filtering/restrictive-filter/index.json
- name: ingest_multi_field
index_name: target_index
field_name: target_field
bulk_size: 500
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
attributes_dataset_name: attributes
attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ]
- name: refresh_index
index_name: target_index
- name: force_merge
index_name: target_index
max_num_segments: 1
- name: warmup_operation
index_name: target_index
- name: query_with_filter
k: 100
r: 1
calculate_recall: true
index_name: target_index
field_name: target_field
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5
dataset_path: dataset/sift-128-euclidean-with-attr.hdf5
neighbors_format: hdf5
neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters.hdf5
neighbors_path: dataset/sift-128-euclidean-with-restrictive-filters.hdf5
neighbors_dataset: neighbors_filter_4
filter_spec: [INDEX_SPEC_PATH]/restrictive-filter-spec.json
filter_spec: release-configs/faiss-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json
filter_type: FILTER
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"index": {
"knn": true,
"number_of_shards": 24,
"number_of_replicas": 1
"number_of_replicas": 1,
"knn.algo_param.ef_search": 100
}
},
"mappings": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
}
},
"mappings": {
"_source": {
"excludes": ["nested_field"]
},
"properties": {
"nested_field": {
"type": "nested",
Expand Down
15 changes: 9 additions & 6 deletions benchmarks/perf-tool/release-configs/faiss-hnsw/test.yml
Original file line number Diff line number Diff line change
@@ -1,32 +1,35 @@
endpoint: localhost
endpoint: [ENDPOINT]
port: [PORT]
test_name: "Faiss HNSW Test"
test_id: "Faiss HNSW Test"
num_runs: 10
num_runs: 3
show_runs: false
steps:
- name: delete_index
index_name: target_index
- name: create_index
index_name: target_index
index_spec: /home/ec2-user/[PATH]/index.json
index_spec: release-configs/faiss-hnsw/index.json
- name: ingest
index_name: target_index
field_name: target_field
bulk_size: 500
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean.hdf5
dataset_path: dataset/sift-128-euclidean.hdf5
- name: refresh_index
index_name: target_index
- name: force_merge
index_name: target_index
max_num_segments: 1
- name: warmup_operation
index_name: target_index
- name: query
k: 100
r: 1
calculate_recall: true
index_name: target_index
field_name: target_field
dataset_format: hdf5
dataset_path: [DATASET_PATH]/sift-128-euclidean.hdf5
dataset_path: dataset/sift-128-euclidean.hdf5
neighbors_format: hdf5
neighbors_path: [DATASET_PATH]/sift-128-euclidean.hdf5
neighbors_path: dataset/sift-128-euclidean.hdf5
Loading
Loading