Skip to content

Commit 76148c5

Browse files
Merge pull request #1 from mzegla/request_rate
Introduce GenerationHandle and enhance benchmark app
2 parents b63bda2 + 6400edc commit 76148c5

18 files changed

+688
-157
lines changed

.gitmodules

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
[submodule "thirdparty/openvino_tokenizers"]
22
path = thirdparty/openvino_tokenizers
33
url = https://github.com/openvinotoolkit/openvino_tokenizers.git
4+
branch = master
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
FROM ubuntu:22.04
2+
3+
ARG JOBS
4+
WORKDIR /workspace
5+
RUN apt-get update -y && apt-get install -y python3-pip python3-venv git
6+
7+
# Install OpenVINO
8+
RUN git clone https://github.com/openvinotoolkit/openvino.git && \
9+
cd /workspace/openvino && \
10+
git submodule update --init -- /workspace/openvino/thirdparty/xbyak /workspace/openvino/thirdparty/pugixml /workspace/openvino/thirdparty/open_model_zoo \
11+
/workspace/openvino/thirdparty/protobuf /workspace/openvino/thirdparty/snappy /workspace/openvino/thirdparty/telemetry /workspace/openvino/src/plugins/intel_cpu/thirdparty/mlas \
12+
/workspace/openvino/src/plugins/intel_cpu/thirdparty/onednn /workspace/openvino/src/bindings/python/thirdparty/pybind11 && cd -
13+
14+
RUN /workspace/openvino/install_build_dependencies.sh
15+
RUN python3 -m pip install -r /workspace/openvino/src/bindings/python/wheel/requirements-dev.txt
16+
RUN cmake -DENABLE_PYTHON=ON -DENABLE_PYTHON_PACKAGING=ON -DENABLE_WHEEL=ON -DENABLE_CPPLINT=OFF -DENABLE_SAMPLES=OFF -DENABLE_INTEL_GPU=OFF \
17+
-DENABLE_INTEL_NPU=OFF -DENABLE_TEMPLATE=OFF -DENABLE_AUTO=OFF -DENABLE_HETERO=OFF -DENABLE_AUTO_BATCH=OFF -DENABLE_OV_TF_FRONTEND=ON -DENABLE_OV_ONNX_FRONTEND=OFF \
18+
-DENABLE_OV_TF_LITE_FRONTEND=OFF -DENABLE_OV_PADDLE_FRONTEND=OFF -S /workspace/openvino -B /workspace/openvino_build
19+
RUN cmake --build /workspace/openvino_build --parallel $JOBS
20+
RUN cmake -P /workspace/openvino_build/cmake_install.cmake
21+
RUN python3 -m pip install /workspace/openvino_build/wheels/openvino-2024*
22+
ENV OpenVINO_DIR=/workspace/openvino_build
23+
24+
# Download dataset
25+
RUN wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
26+
27+
# Build continuous batching library
28+
RUN git clone --branch request_rate https://github.com/mzegla/openvino.genai.git && cd /workspace/openvino.genai/text_generation/causal_lm/cpp/continuous_batching && \
29+
git submodule update --remote --init && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ && cmake --build ./build/ -j $JOBS
30+
31+
# Install test dependencies
32+
RUN python3 -m pip install --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly/ /workspace/openvino.genai/thirdparty/openvino_tokenizers
33+
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/openvino.genai/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt
34+
ENV PYTHONPATH=/workspace/openvino.genai/text_generation/causal_lm/cpp/continuous_batching/build/python

text_generation/causal_lm/cpp/continuous_batching/apps/accuracy_sample.cpp

+9-10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <cxxopts.hpp>
66

77
#include "continuous_batching_pipeline.hpp"
8+
#include "tokenizer.hpp"
89

910
void print_generation_result(const GenerationResult& generation_result) {
1011
for (size_t output_id = 0; output_id < generation_result.m_generation_ids.size(); ++output_id) {
@@ -46,15 +47,15 @@ int main(int argc, char* argv[]) try {
4647
std::vector<std::string> prompt_examples = {
4748
"What is OpenVINO?",
4849
"How are you?",
49-
"What is OpenVINO?",
50-
"What is the current time",
50+
"What is your name?",
51+
"Tell me something about Canada",
5152
"What is OpenVINO?",
5253
};
5354

5455
std::vector<GenerationConfig> sampling_params_examples {
5556
GenerationConfig::beam_search(),
56-
// GenerationConfig::greedy(),
57-
// GenerationConfig::multinomial(),
57+
GenerationConfig::greedy(),
58+
GenerationConfig::multinomial(),
5859
};
5960

6061
std::vector<std::string> prompts(num_prompts);
@@ -66,7 +67,7 @@ int main(int argc, char* argv[]) try {
6667
}
6768

6869
// Perform the inference
69-
70+
7071
SchedulerConfig scheduler_config {
7172
// batch size
7273
.max_num_batched_tokens = 32,
@@ -84,21 +85,20 @@ int main(int argc, char* argv[]) try {
8485

8586
for (size_t request_id = 0; request_id < generation_results.size(); ++request_id) {
8687
const GenerationResult & generation_result = generation_results[request_id];
87-
8888
std::cout << "Question: " << prompts[request_id] << std::endl;
8989
switch (generation_result.m_status)
9090
{
91-
case GenerationResultStatus::FINISHED:
91+
case GenerationStatus::FINISHED:
9292
print_generation_result(generation_result);
9393
break;
94-
case GenerationResultStatus::IGNORED:
94+
case GenerationStatus::IGNORED:
9595
std::cout << "Request was ignored due to lack of memory." <<std::endl;
9696
if (generation_result.m_generation_ids.size() > 0) {
9797
std::cout << "Partial result:" << std::endl;
9898
print_generation_result(generation_result);
9999
}
100100
break;
101-
case GenerationResultStatus::ABORTED:
101+
case GenerationStatus::DROPPED_BY_PIPELINE:
102102
std::cout << "Request was aborted." <<std::endl;
103103
if (generation_result.m_generation_ids.size() > 0) {
104104
std::cout << "Partial result:" << std::endl;
@@ -110,7 +110,6 @@ int main(int argc, char* argv[]) try {
110110
}
111111
std::cout << std::endl;
112112
}
113-
114113
} catch (const std::exception& error) {
115114
std::cerr << error.what() << '\n';
116115
return EXIT_FAILURE;

0 commit comments

Comments
 (0)