Skip to content

Commit 31dfe45

Browse files
committed
Address feedback + fix merge conflicts
Signed-off-by: Finn Roblin <finnrobl@amazon.com>
2 parents acb9e94 + fa1adf2 commit 31dfe45

31 files changed

+1941
-804
lines changed

.github/workflows/docker-test.yml

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Docker Build and Test
2+
on:
3+
pull_request:
4+
workflow_dispatch:
5+
inputs:
6+
logLevel:
7+
description: Log level
8+
required: true
9+
default: warning
10+
type: choice
11+
options:
12+
- info
13+
- warning
14+
- debug
15+
16+
jobs:
17+
docker:
18+
runs-on: ubuntu-latest
19+
strategy:
20+
matrix:
21+
platform: ['linux/amd64', 'linux/arm64']
22+
steps:
23+
- name: Set up QEMU
24+
uses: docker/setup-qemu-action@v3
25+
26+
- name: Set up Docker Buildx
27+
uses: docker/setup-buildx-action@v3
28+
with:
29+
version: 'v0.9.1'
30+
- uses: actions/checkout@v4
31+
with:
32+
path: 'opensearch-benchmark-git'
33+
- name: Docker Build ${{ matrix.platform }}
34+
run: |
35+
docker buildx version
36+
cp -a opensearch-benchmark-git/* ./
37+
echo "Disable VERSION arg to enter docker build test mode"
38+
PLATFORM=${{ matrix.platform }}
39+
PLATFORM=`echo $PLATFORM | tr '/' '-'`
40+
docker buildx build --platform ${{ matrix.platform }} --build-arg BUILD_ENV=testing --build-arg BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` -f "docker/Dockerfile" -t "osb/osb-$PLATFORM" -o type=docker .
41+
docker images | grep "osb/osb-$PLATFORM"

.github/workflows/docker.yml

-42
This file was deleted.

.github/workflows/manual-integ.yml

+13
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,19 @@ jobs:
1414
- uses: actions/setup-python@v4
1515
with:
1616
python-version: ${{ matrix.python-version }}
17+
- uses: KengoTODA/actions-setup-docker-compose@v1
18+
with:
19+
version: '1.29.2'
20+
# - name: Enforce docker-compose v1
21+
# run: |
22+
# echo "GitHub starts to switch runners to include docker-compose v2"
23+
# echo "which uses 'docker compose' command to replace 'docker-compose'"
24+
# echo "this would cause issues in our test validation so we enforce v1 here"
25+
# echo "https://github.com/actions/runner-images/commit/2a4bc14da46f1f8e358aa902a69edb9bef135472"
26+
# sudo apt-get remove -y docker-compose-plugin
27+
# sudo pip install docker-compose==1.29.2
28+
# docker --version
29+
# docker-compose --version
1730
- name: Check out repository code
1831
uses: actions/checkout@v2
1932
- name: Clone pyenv

DEVELOPER_GUIDE.md

+2
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ Integration tests are expected to run for approximately **20-30 mins** and can b
173173
* Amazon Linux 2
174174
* MacOS
175175
176+
Integration tests run against the standard [OpenSearch Benchmark workloads](https://github.com/opensearch-project/opensearch-benchmark-workloads). Sometimes, it may be necessary to run integration tests against a modified forked copy of these workloads. In that case, please follow the instructions [here](https://github.com/opensearch-project/opensearch-benchmark-workloads/blob/main/README.md#testing-the-workload).
177+
176178
Invoke integration tests by running the following command within the root directory of the repository:
177179
178180
```

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
[![CI](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/main.yml/badge.svg)](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/main.yml)
2+
[![Integration](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/manual-integ.yml/badge.svg)](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/manual-integ.yml)
3+
[![Release](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/release-drafter.yml/badge.svg)](https://github.com/opensearch-project/opensearch-benchmark/actions/workflows/release-drafter.yml)
4+
[![Chat](https://img.shields.io/badge/chat-on%20forums-blue)](https://forum.opensearch.org/categories)
5+
![PRs welcome!](https://img.shields.io/badge/PRs-welcome!-success)
6+
17
<img src="https://github.com/opensearch-project/opensearch-benchmark/blob/main/opensearch_benchmark.png?raw=true" height="64px" alt="OpenSearch Benchmark">
28

39
OpenSearch Benchmark is the macrobenchmarking framework for OpenSearch.

docker/Dockerfile

+16-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
1-
###############################################################################
2-
# Install OpenSearch Benchmark from PyPI to build a Docker image
3-
###############################################################################
1+
########################################################
2+
# Install OpenSearch Benchmark to build a Docker image #
3+
########################################################
44

5-
FROM python:3.11.2-slim
65
ARG VERSION
6+
ARG BUILD_ENV=production
77

8-
ENV BENCHMARK_RUNNING_IN_DOCKER True
8+
FROM python:3.11.2-slim as build_env_testing
9+
ONBUILD COPY opensearch-benchmark-git/ ./
10+
11+
FROM python:3.11.2-slim as build_env_production
12+
ONBUILD RUN echo Production Environment
13+
14+
FROM build_env_${BUILD_ENV}
15+
WORKDIR /opensearch-benchmark
16+
ENV BENCHMARK_RUNNING_IN_DOCKER=True
917

1018
RUN apt-get -y update && \
1119
apt-get install -y curl git gcc pbzip2 pigz && \
@@ -15,9 +23,9 @@ RUN apt-get -y update && \
1523
RUN groupadd --gid 1000 opensearch-benchmark && \
1624
useradd -d /opensearch-benchmark -m -k /dev/null -g 1000 -N -u 1000 -l -s /bin/bash benchmark
1725

18-
RUN if [ -z "$VERSION" ] ; then python3 -m pip install opensearch-benchmark ; else python3 -m pip install opensearch-benchmark==$VERSION ; fi
19-
20-
WORKDIR /opensearch-benchmark
26+
ENV PIP_ONLY_BINARY=h5py
27+
RUN if [ "$BUILD_ENV" = "testing" ] ; then echo Testing; ls -l; python3 -m pip install -e . ; \
28+
else echo Production; if [ -z "$VERSION" ] ; then python3 -m pip install opensearch-benchmark ; else python3 -m pip install opensearch-benchmark==$VERSION ; fi; fi
2129

2230
RUN mkdir -p /opensearch-benchmark/.benchmark && \
2331
chown -R 1000:0 /opensearch-benchmark/.benchmark

docker/Dockerfile-development

-75
This file was deleted.

docker/docker-compose-tests.yml

-37
This file was deleted.

osbenchmark/builder/utils/template_renderer.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from osbenchmark.exceptions import InvalidSyntax, SystemSetupError
55
from osbenchmark.utils import io
6+
from osbenchmark.workload import loader
67

78

89
class TemplateRenderer:
@@ -11,6 +12,7 @@ def render_template_file(self, root_path, variables, file_name):
1112

1213
def _render_template_file(self, root_path, variables, file_name):
1314
env = jinja2.Environment(loader=jinja2.FileSystemLoader(root_path), autoescape=select_autoescape(['html', 'xml']))
15+
env.filters["version_between"] = loader.version_between
1416
template = env.get_template(io.basename(file_name))
1517
# force a new line at the end. Jinja seems to remove it.
1618
return template.render(variables) + "\n"
@@ -20,6 +22,7 @@ def render_template_string(self, template_string, variables):
2022

2123
def _render_template_string(self, template_string, variables):
2224
env = jinja2.Environment(loader=jinja2.BaseLoader, autoescape=select_autoescape(['html', 'xml']))
25+
env.filters["version_between"] = loader.version_between
2326
template = env.from_string(template_string)
2427

2528
return template.render(variables)

osbenchmark/metrics.py

+41-2
Original file line numberDiff line numberDiff line change
@@ -1440,7 +1440,6 @@ def as_dict(self):
14401440
if self.plugin_params:
14411441
d["plugin-params"] = self.plugin_params
14421442
return d
1443-
14441443
def to_result_dicts(self):
14451444
"""
14461445
:return: a list of dicts, suitable for persisting the results of this test execution in a format that is Kibana-friendly.
@@ -1784,6 +1783,7 @@ def __call__(self):
17841783
op_type = task.operation.type
17851784
error_rate = self.error_rate(t, op_type)
17861785
duration = self.duration(t)
1786+
17871787
if task.operation.include_in_results_publishing or error_rate > 0:
17881788
self.logger.debug("Gathering request metrics for [%s].", t)
17891789
result.add_op_metrics(
@@ -1800,8 +1800,19 @@ def __call__(self):
18001800
self.workload.meta_data,
18011801
self.test_procedure.meta_data,
18021802
task.operation.meta_data,
1803-
task.meta_data)
1803+
task.meta_data,
1804+
),
1805+
)
1806+
1807+
result.add_correctness_metrics(
1808+
t,
1809+
task.operation.name,
1810+
self.single_latency(t, op_type, metric_name="recall@k"),
1811+
self.single_latency(t, op_type, metric_name="recall@1"),
1812+
error_rate,
1813+
duration,
18041814
)
1815+
18051816
self.logger.debug("Gathering indexing metrics.")
18061817
result.total_time = self.sum("indexing_total_time")
18071818
result.total_time_per_shard = self.shard_stats("indexing_total_time")
@@ -1996,6 +2007,7 @@ def single_latency(self, task, operation_type, metric_name="latency"):
19962007
class GlobalStats:
19972008
def __init__(self, d=None):
19982009
self.op_metrics = self.v(d, "op_metrics", default=[])
2010+
self.correctness_metrics = self.v(d, "correctness_metrics", default=[])
19992011
self.total_time = self.v(d, "total_time")
20002012
self.total_time_per_shard = self.v(d, "total_time_per_shard", default={})
20012013
self.indexing_throttle_time = self.v(d, "indexing_throttle_time")
@@ -2081,6 +2093,22 @@ def op_metrics(op_item, key, single_value=False):
20812093
"max": item["max"]
20822094
}
20832095
})
2096+
elif metric == "correctness_metrics":
2097+
for item in value:
2098+
if "recall@k" in item:
2099+
all_results.append({
2100+
"task": item["task"],
2101+
"operation": item["operation"],
2102+
"name": "recall@k",
2103+
"value": item["recall@k"]
2104+
})
2105+
if "recall@1" in item:
2106+
all_results.append({
2107+
"task": item["task"],
2108+
"operation": item["operation"],
2109+
"name": "recall@1",
2110+
"value": item["recall@1"]
2111+
})
20842112
elif metric.startswith("total_transform_") and value is not None:
20852113
for item in value:
20862114
all_results.append({
@@ -2124,6 +2152,17 @@ def add_op_metrics(self, task, operation, throughput, latency, service_time, cli
21242152
doc["meta"] = meta
21252153
self.op_metrics.append(doc)
21262154

2155+
def add_correctness_metrics(self, task, operation, recall_at_k_stats, recall_at_1_stats, error_rate, duration):
2156+
self.correctness_metrics.append({
2157+
"task": task,
2158+
"operation": operation,
2159+
"recall@k": recall_at_k_stats,
2160+
"recall@1":recall_at_1_stats,
2161+
"error_rate": error_rate,
2162+
"duration": duration
2163+
}
2164+
)
2165+
21272166
def tasks(self):
21282167
# ensure we can read test_execution.json files before Benchmark 0.8.0
21292168
return [v.get("task", v["operation"]) for v in self.op_metrics]

0 commit comments

Comments
 (0)