Skip to content

Commit 1c6da01

Browse files
authored
Workflow to run performance tests using opensearch-benchmark (#3415)
Signed-off-by: Rishabh Singh <sngri@amazon.com>
1 parent f13a33b commit 1c6da01

22 files changed

+950
-0
lines changed

src/run_benchmark_test.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright OpenSearch Contributors
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# The OpenSearch Contributors require contributions made to
5+
# this file be licensed under the Apache-2.0 license or a
6+
# compatible open source license.
7+
8+
import sys
9+
10+
from manifests.bundle_manifest import BundleManifest
11+
from system import console
12+
from test_workflow.benchmark_test.benchmark_args import BenchmarkArgs
13+
from test_workflow.benchmark_test.benchmark_test_runners import BenchmarkTestRunners
14+
15+
16+
def main() -> int:
17+
"""
18+
Entry point for Benchmark Test with bundle manifest, config file containing the required arguments for running
19+
benchmarking test. Will call out in test.sh with benchmark as argument
20+
"""
21+
benchmark_args = BenchmarkArgs()
22+
console.configure(level=benchmark_args.logging_level)
23+
manifest = BundleManifest.from_file(benchmark_args.bundle_manifest)
24+
BenchmarkTestRunners.from_args(benchmark_args, manifest).run()
25+
return 0
26+
27+
28+
if __name__ == "__main__":
29+
sys.exit(main())

src/test_workflow/README.md

+8
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
- [Identifying Regressions in Performance Tests](#identifying-regressions-in-performance-tests)
77
- [Identifying Regressions in Nightly Performance Tests](#identifying-regressions-in-nightly-performance-tests)
88
- [Identifying Issues in Longevity Tests](#identifying-issues-in-longevity-tests)
9+
- [Benchmark Tests](#benchmarking-tests)
910
- [Testing in CI/CD](#testing-in-cicd)
1011
- [Test Workflow (in development)](#test-workflow-in-development)
1112
- [Component-Level Details](#component-level-details)
@@ -174,6 +175,13 @@ Internal tools provide dashboards for monitoring cluster behavior during these t
174175
|Indexing Latency|Consistent during each test iteration|upward trends|
175176
|Query Latency|Varies based on the query being issued|upward trends|
176177

178+
### Benchmarking Tests
179+
180+
Runs benchmarking tests on a remote opensource OpenSearch cluster, uses [OpenSearch Benchmark](https://github.com/opensearch-project/OpenSearch-Benchmark).
181+
At a high-level the benchmarking test workflow uses [opensearch-cluster-cdk](https://github.com/opensearch-project/opensearch-cluster-cdk.git) to first set-up an OpenSearch cluster (single/multi-node) and then executes `opensearch-benchmark` to run benchmark test against that cluster. The performance metric that opensearch-benchmark generates during the run are ingested into another OS cluster for further analysis and dashboarding purpose.
182+
183+
The benchmarking tests will be run nightly and if you have a feature in any released/un-released OpenSearch version that you want to benchmark periodically please create an issue and the team will reach out to you. In case you want to run the benchmarking test locally you can use `opensearch-cluster-cdk` repo to spin up an OS cluster in your personal AWS account and then use `opensearch-benchmark` to run performance test against it. The detailed instructions are available on respective GitHub repositories.
184+
177185
## Testing in CI/CD
178186

179187
The CI/CD infrastructure is divided into two main workflows - `build` and `test`. The `build` workflow automates the process to generate all OpenSearch and OpenSearch Dashboards artifacts, and provide them as distributions to the `test` workflow, which runs exhaustive testing on the artifacts based on the artifact type. The next section talks in detail about the test workflow.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright OpenSearch Contributors
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# The OpenSearch Contributors require contributions made to
5+
# this file be licensed under the Apache-2.0 license or a
6+
# compatible open source license.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# The OpenSearch Contributors require contributions made to
4+
# this file be licensed under the Apache-2.0 license or a
5+
# compatible open source license.
6+
#
7+
# Modifications Copyright OpenSearch Contributors. See
8+
# GitHub history for details.
9+
10+
import argparse
11+
import json
12+
import logging
13+
from typing import IO
14+
15+
from test_workflow.json_args import JsonArgs
16+
17+
18+
# Contains the arguments required to run a perf test.
19+
class BenchmarkArgs:
20+
bundle_manifest: IO
21+
stack_suffix: str
22+
config: IO
23+
keep: bool
24+
insecure: bool
25+
single_node: bool
26+
min_distribution: bool
27+
manager_node_count: int
28+
data_node_count: int
29+
client_node_count: int
30+
ingest_node_count: int
31+
ml_node_count: int
32+
data_node_storage: int
33+
ml_node_storage: int
34+
jvm_sys_props: str
35+
additional_config: str
36+
workload: str
37+
benchmark_config: IO
38+
user_tag: str
39+
target_hosts: str
40+
logging_level: int
41+
42+
def __init__(self) -> None:
43+
parser = argparse.ArgumentParser(description="Test an OpenSearch Bundle")
44+
parser.add_argument("--bundle-manifest", type=argparse.FileType("r"), help="Bundle Manifest file.",
45+
required=True)
46+
parser.add_argument("--suffix", dest="suffix", help="Suffix to be added to stack name for performance test")
47+
parser.add_argument("--component", dest="component", default="OpenSearch",
48+
help="Component name that needs to be performance tested")
49+
parser.add_argument("--config", type=argparse.FileType("r"), help="Config file.", required=True)
50+
parser.add_argument(
51+
"--without-security", dest="insecure", action="store_true",
52+
help="Force the security of the cluster to be disabled.", default=False)
53+
parser.add_argument("--keep", dest="keep", action="store_true",
54+
help="Do not delete the working temporary directory.")
55+
parser.add_argument("--single-node", dest="single_node", action="store_true",
56+
help="Is this a single node cluster")
57+
parser.add_argument("--min-distribution", dest="min_distribution", action="store_true",
58+
help="Is it the minimal OpenSearch distribution with no security and plugins")
59+
parser.add_argument("--manager-node-count", dest="manager_node_count",
60+
help="Number of cluster manager nodes, default is 3")
61+
parser.add_argument("--data-node-count", dest="data_node_count", help="Number of data nodes, default is 2")
62+
parser.add_argument("--client-node-count", dest="client_node_count",
63+
help="Number of dedicated client nodes, default is 0")
64+
parser.add_argument("--ingest-node-count", dest="ingest_node_count",
65+
help="Number of dedicated ingest nodes, default is 0")
66+
parser.add_argument("--ml-node-count", dest="ml_node_count",
67+
help="Number of dedicated machine learning nodes, default is 0")
68+
parser.add_argument("--jvm-sys-props", dest="jvm_sys_props",
69+
help="A comma-separated list of key=value pairs that will be added to jvm.options as JVM system properties.")
70+
parser.add_argument("--additional-config", nargs='*', action=JsonArgs, dest="additional_config",
71+
help="Additional opensearch.yml config parameters passed as JSON")
72+
parser.add_argument("--ml-node-storage", dest="ml_node_storage",
73+
help="User provided ml-node ebs block storage size defaults to 100Gb")
74+
parser.add_argument("--data-node-storage", dest="data_node_storage",
75+
help="User provided data-node ebs block storage size, defaults to 100Gb")
76+
parser.add_argument("--workload", dest="workload", help="workload type for the OpenSearch benchmarking",
77+
required=True)
78+
parser.add_argument("--benchmark-config", dest="benchmark_config",
79+
help="absolute filepath to custom opensearch-benchmark.ini config")
80+
parser.add_argument("--user-tag", dest="user_tag",
81+
help="Attach arbitrary text to the meta-data of each metric record")
82+
parser.add_argument(
83+
"-v", "--verbose", help="Show more verbose output.", action="store_const", default=logging.INFO,
84+
const=logging.DEBUG, dest="logging_level"
85+
)
86+
87+
args = parser.parse_args()
88+
self.bundle_manifest = args.bundle_manifest
89+
self.stack_suffix = args.suffix if args.suffix else None
90+
self.config = args.config
91+
self.keep = args.keep
92+
self.single_node = args.single_node
93+
self.min_distribution = args.min_distribution
94+
self.component = args.component
95+
self.insecure = args.insecure
96+
self.manager_node_count = args.manager_node_count if args.manager_node_count else None
97+
self.data_node_count = args.data_node_count if args.data_node_count else None
98+
self.client_node_count = args.client_node_count if args.client_node_count else None
99+
self.ingest_node_count = args.ingest_node_count if args.ingest_node_count else None
100+
self.ml_node_count = args.ml_node_count if args.ml_node_count else None
101+
self.jvm_sys_props = args.jvm_sys_props if args.jvm_sys_props else None
102+
self.data_node_storage = args.data_node_storage if args.data_node_storage else None
103+
self.ml_node_storage = args.ml_node_storage if args.ml_node_storage else None
104+
self.workload = args.workload
105+
self.benchmark_config = args.benchmark_config if args.benchmark_config else None
106+
self.user_tag = args.user_tag if args.user_tag else None
107+
self.additional_config = json.dumps(args.additional_config) if args.additional_config is not None else None
108+
self.logging_level = args.logging_level
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# Copyright OpenSearch Contributors
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# The OpenSearch Contributors require contributions made to
5+
# this file be licensed under the Apache-2.0 license or a
6+
# compatible open source license.
7+
8+
9+
import json
10+
import logging
11+
import os
12+
import subprocess
13+
from contextlib import contextmanager
14+
from typing import Any, Generator
15+
16+
import requests
17+
from requests.auth import HTTPBasicAuth
18+
from retry.api import retry_call # type: ignore
19+
20+
from manifests.bundle_manifest import BundleManifest
21+
from test_workflow.benchmark_test.benchmark_args import BenchmarkArgs
22+
23+
24+
class BenchmarkTestCluster:
25+
manifest: BundleManifest
26+
work_dir: str
27+
current_workspace: str
28+
args: BenchmarkArgs
29+
output_file: str
30+
params: str
31+
is_endpoint_public: bool
32+
cluster_endpoint: str
33+
cluster_endpoint_with_port: str
34+
35+
"""
36+
Represents a performance test cluster. This class deploys the opensearch bundle with CDK. Supports both single
37+
and multi-node clusters
38+
"""
39+
40+
def __init__(
41+
self,
42+
bundle_manifest: BundleManifest,
43+
config: dict,
44+
args: BenchmarkArgs,
45+
current_workspace: str
46+
) -> None:
47+
self.manifest = bundle_manifest
48+
self.current_workspace = current_workspace
49+
self.args = args
50+
self.output_file = "output.json"
51+
role = config["Constants"]["Role"]
52+
params_dict = self.setup_cdk_params(config)
53+
params_list = []
54+
for key, value in params_dict.items():
55+
if value:
56+
'''
57+
TODO: To send json input to typescript code from command line it needs to be enclosed in
58+
single-quotes, this is a temp fix to achieve that since the quoted string passed from command line in
59+
tesh.sh wrapper script gets un-quoted and we need to handle it here.
60+
'''
61+
if key == 'additionalConfig':
62+
params_list.append(f" -c {key}=\'{value}\'")
63+
else:
64+
params_list.append(f" -c {key}={value}")
65+
role_params = (
66+
f" --require-approval=never --plugin cdk-assume-role-credential-plugin"
67+
f" -c assume-role-credentials:writeIamRoleName={role} -c assume-role-credentials:readIamRoleName={role} "
68+
)
69+
self.params = "".join(params_list) + role_params
70+
self.is_endpoint_public = False
71+
self.cluster_endpoint = None
72+
self.cluster_endpoint_with_port = None
73+
self.stack_name = f"opensearch-infra-stack-{self.args.stack_suffix}-{self.manifest.build.id}-{self.manifest.build.architecture}"
74+
75+
def start(self) -> None:
76+
command = f"npm install && cdk deploy \"*\" {self.params} --outputs-file {self.output_file}"
77+
78+
logging.info(f'Executing "{command}" in {os.getcwd()}')
79+
subprocess.check_call(command, cwd=os.getcwd(), shell=True)
80+
with open(self.output_file, "r") as read_file:
81+
load_output = json.load(read_file)
82+
self.create_endpoint(load_output)
83+
self.wait_for_processing()
84+
85+
def create_endpoint(self, cdk_output: dict) -> None:
86+
loadbalancer_url = cdk_output[self.stack_name].get('loadbalancerurl', None)
87+
if loadbalancer_url is None:
88+
raise RuntimeError("Unable to fetch the cluster endpoint from cdk output")
89+
self.cluster_endpoint = loadbalancer_url
90+
self.cluster_endpoint_with_port = "".join([loadbalancer_url, ":", str(self.port)])
91+
92+
@property
93+
def endpoint(self) -> str:
94+
return self.cluster_endpoint
95+
96+
@property
97+
def endpoint_with_port(self) -> str:
98+
return self.cluster_endpoint_with_port
99+
100+
@property
101+
def port(self) -> int:
102+
return 80 if self.args.insecure else 443
103+
104+
def terminate(self) -> None:
105+
command = f"cdk destroy {self.stack_name} {self.params} --force"
106+
logging.info(f'Executing "{command}" in {os.getcwd()}')
107+
108+
subprocess.check_call(command, cwd=os.getcwd(), shell=True)
109+
110+
def wait_for_processing(self, tries: int = 3, delay: int = 15, backoff: int = 2) -> None:
111+
logging.info(f"Waiting for domain at {self.endpoint} to be up")
112+
protocol = "http://" if self.args.insecure else "https://"
113+
url = "".join([protocol, self.endpoint, "/_cluster/health"])
114+
request_args = {"url": url} if self.args.insecure else {"url": url, "auth": HTTPBasicAuth("admin", "admin"), "verify": False} # type: ignore
115+
retry_call(requests.get, fkwargs=request_args,
116+
tries=tries, delay=delay, backoff=backoff)
117+
118+
def setup_cdk_params(self, config: dict) -> dict:
119+
if self.args.stack_suffix:
120+
suffix = self.args.stack_suffix + '-' + self.manifest.build.id + '-' + self.manifest.build.architecture
121+
else:
122+
suffix = self.manifest.build.id + '-' + self.manifest.build.architecture
123+
return {
124+
"distributionUrl": self.manifest.build.location,
125+
"vpcId": config["Constants"]["VpcId"],
126+
"account": config["Constants"]["AccountId"],
127+
"region": config["Constants"]["Region"],
128+
"suffix": suffix,
129+
"securityDisabled": str(self.args.insecure).lower(),
130+
"cpuArch": self.manifest.build.architecture,
131+
"singleNodeCluster": str(self.args.single_node).lower(),
132+
"distVersion": self.manifest.build.version,
133+
"minDistribution": str(self.args.min_distribution).lower(),
134+
"serverAccessType": config["Constants"]["serverAccessType"],
135+
"restrictServerAccessTo": config["Constants"]["restrictServerAccessTo"],
136+
"additionalConfig": self.args.additional_config,
137+
"managerNodeCount": self.args.manager_node_count,
138+
"dataNodeCount": self.args.data_node_count,
139+
"clientNodeCount": self.args.client_node_count,
140+
"ingestNodeCount": self.args.ingest_node_count,
141+
"mlNodeCount": self.args.ml_node_count,
142+
"dataNodeStorage": self.args.data_node_storage,
143+
"mlNodeStorage": self.args.ml_node_storage,
144+
"jvmSysProps": self.args.jvm_sys_props
145+
}
146+
147+
@classmethod
148+
@contextmanager
149+
def create(cls, *args: Any) -> Generator[Any, None, None]:
150+
"""
151+
Set up the cluster. When this method returns, the cluster must be available to take requests.
152+
Throws ClusterCreationException if the cluster could not start for some reason. If this exception is thrown, the caller does not need to call "destroy".
153+
"""
154+
cluster = cls(*args)
155+
156+
try:
157+
cluster.start()
158+
yield cluster
159+
finally:
160+
cluster.terminate()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright OpenSearch Contributors
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# The OpenSearch Contributors require contributions made to
5+
# this file be licensed under the Apache-2.0 license or a
6+
# compatible open source license.
7+
8+
import abc
9+
import os
10+
11+
from manifests.bundle_manifest import BundleManifest
12+
from test_workflow.benchmark_test.benchmark_args import BenchmarkArgs
13+
14+
15+
class BenchmarkTestRunner(abc.ABC):
16+
args: BenchmarkArgs
17+
test_manifest: BundleManifest
18+
security: bool
19+
tests_dir: str
20+
21+
def __init__(self, args: BenchmarkArgs, test_manifest: BundleManifest) -> None:
22+
self.args = args
23+
self.test_manifest = test_manifest
24+
25+
self.security = "security" in self.test_manifest.components and not self.args.insecure
26+
self.tests_dir = os.path.join(os.getcwd(), "test-results", "benchmark-test", f"{'with' if self.security else 'without'}-security")
27+
os.makedirs(self.tests_dir, exist_ok=True)
28+
29+
@abc.abstractmethod
30+
def run_tests(self) -> None:
31+
pass
32+
33+
def run(self) -> None:
34+
self.run_tests()

0 commit comments

Comments
 (0)