Skip to content

Commit 5777912

Browse files
committed
causal_lm->greedt_causal_lm
1 parent c1aafc1 commit 5777912

File tree

6 files changed

+33
-28
lines changed

6 files changed

+33
-28
lines changed

.github/workflows/causal_lm_cpp.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@ name: causal_lm_cpp
22
on:
33
pull_request:
44
paths:
5-
- text_generation/causal_lm/cpp/**
6-
- '!text_generation/causal_lm/cpp/README.md'
5+
- text_generation/causal_lm/cpp/*
6+
- llm_bench/python/**
77
- thirdparty/openvino_contrib
88
- .github/workflows/causal_lm_cpp.yml
9+
- '!**.md'
910
concurrency:
1011
group: ${{ github.workflow }}-${{ github.ref }}
1112
cancel-in-progress: true
1213
jobs:
13-
causal_lm_cpp:
14+
cpp-greedy_causal_lm-ubuntu:
1415
runs-on: ubuntu-20.04-8-cores
1516
steps:
1617
- uses: actions/checkout@v4

llm_bench/python/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ torch
77
transformers>=4.33.0
88
diffusers>=0.22.0
99
optimum>=1.14.0,<1.15.0
10+
# TODO: replace with slyalin after https://github.com/huggingface/optimum-intel/pull/486/#discussion_r1428853330 is resolved
1011
git+https://github.com/Wovchena/optimum-intel.git@stateful
1112
git+https://github.com/openvinotoolkit/nncf.git
1213
packaging

text_generation/causal_lm/cpp/CMakeLists.txt

+7-7
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@ project(causal_lm)
88
list(APPEND CUSTOM_OPERATIONS tokenizer)
99
add_subdirectory(../../../thirdparty/openvino_contrib/modules/custom_operations/ "${CMAKE_CURRENT_BINARY_DIR}/custom_operations/")
1010

11-
add_executable(causal_lm causal_lm.cpp)
12-
target_compile_definitions(causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
11+
add_executable(greedy_causal_lm greedy_causal_lm.cpp)
12+
target_compile_definitions(greedy_causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
1313
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
14-
target_link_libraries(causal_lm PRIVATE openvino::runtime user_ov_extensions)
15-
set_target_properties(causal_lm PROPERTIES CXX_STANDARD 17)
16-
set_target_properties(causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
14+
target_link_libraries(greedy_causal_lm PRIVATE openvino::runtime user_ov_extensions)
15+
set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD 17)
16+
set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
1717
if(MSVC)
1818
target_compile_options(
19-
causal_lm PRIVATE
19+
greedy_causal_lm PRIVATE
2020
/Wall # Display all warnings
2121
/wd4710 /wd4711 # Disable the inline warnings
2222
/EHsc # Enable standard C++ stack unwinding, assume functions with extern "C" never throw
2323
)
2424
else()
25-
target_compile_options(causal_lm PRIVATE -Wall) # Display all warnings
25+
target_compile_options(greedy_causal_lm PRIVATE -Wall) # Display all warnings
2626
endif()
+18-15
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
# Casual LM
1+
# Causal LM
22

3-
This application showcases inference of a casual language model (LM). It doesn't have many configuration options to encourage the reader to explore and modify the source code. There's a Jupyter notebook which corresponds to this pipeline and discusses how to create an LLM-powered Chatbot: https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot.
3+
This application showcases inference of a causal language model (LM). It doesn't have many configuration options to encourage the reader to explore and modify the source code. There's a Jupyter notebook which corresponds to this pipeline and discusses how to create an LLM-powered Chatbot: https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot.
44

55
> [!NOTE]
66
> This project is not for production use.
77
88
## How it works
99

10-
The program loads a tokenizer, detokenizer, and a model (`.xml` and `.bin`) to OpenVINO. The model is reshaped to batch 1 and variable prompt length. A prompt is tokenized and passed to the model. The model greedily generates token by token until the special end of sequence (EOS) token is obtained. The predicted tokens are converted to chars and printed in a streaming fashion.
10+
The program loads a model, a tokenizer and a detokenizer (`.xml` and `.bin`) to OpenVINO. A prompt is tokenized and passed to the model. The model greedily generates token by token until the special end of sequence (EOS) token is obtained. The predicted tokens are converted to chars and printed in a streaming fashion.
1111

1212
## Install OpenVINO Runtime
1313

1414
Install OpenVINO Runtime from an archive: [Linux](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_installing_openvino_from_archive_linux.html). `<INSTALL_DIR>` below refers to the extraction location.
1515

16-
## Build `Casual LM` and `user_ov_extensions`
16+
## Build `greedy_causal_lm` and `user_ov_extensions`
1717

1818
```sh
1919
git submodule update --init
@@ -24,19 +24,22 @@ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ && cmake --build ./build/ --c
2424
## Supported models
2525

2626
1. LLaMA 2
27-
1. https://huggingface.co/meta-llama/Llama-2-7b-hf
28-
2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
29-
3. https://huggingface.co/meta-llama/Llama-2-13b-hf
3027
4. https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
31-
5. https://huggingface.co/meta-llama/Llama-2-70b-hf
28+
3. https://huggingface.co/meta-llama/Llama-2-13b-hf
29+
2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
30+
1. https://huggingface.co/meta-llama/Llama-2-7b-hf
3231
6. https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
33-
2. OpenLLaMA
34-
1. https://huggingface.co/openlm-research/open_llama_3b
35-
2. https://huggingface.co/openlm-research/open_llama_7b
32+
5. https://huggingface.co/meta-llama/Llama-2-70b-hf
33+
2. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
34+
3. OpenLLaMA
3635
3. https://huggingface.co/openlm-research/open_llama_13b
36+
1. https://huggingface.co/openlm-research/open_llama_3b
3737
4. https://huggingface.co/openlm-research/open_llama_3b_v2
38+
2. https://huggingface.co/openlm-research/open_llama_7b
3839
5. https://huggingface.co/openlm-research/open_llama_7b_v2
39-
3. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
40+
4. TinyLlama
41+
1. https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6
42+
2. https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T
4043

4144
This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.
4245

@@ -47,14 +50,14 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upg
4750
```sh
4851
source <INSTALL_DIR>/setupvars.sh
4952
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
50-
python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --save_orig --stateful
53+
python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --precision FP16 --stateful
5154
python ./convert_tokenizers.py ./Llama-2-7b-hf/
5255
```
5356

5457
## Run
5558

56-
Usage: `causal_lm <openvino_model.xml> <tokenizer.xml> <detokenizer.xml> "<prompt>"`
59+
Usage: `greedy_causal_lm <openvino_model.xml> <tokenizer.xml> <detokenizer.xml> "<prompt>"`
5760

58-
Example: `./build/causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP32/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?"`
61+
Example: `./build/greedy_causal_lm ./Llama-2-7b-hf/pytorch/dldt/FP32/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?"`
5962

6063
To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.

text_generation/causal_lm/cpp/set_up_and_run.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/
1717
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
1818

1919
source ./ov/setupvars.sh
20-
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ../../../llm_bench/python/convert.py --model_id openlm-research/open_llama_3b_v2 --output_dir ./open_llama_3b_v2/ --stateful &
20+
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ../../../llm_bench/python/convert.py --model_id openlm-research/open_llama_3b_v2 --output_dir ./open_llama_3b_v2/ --precision FP16 --stateful &
2121
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
2222
cmake --build ./build/ --config Release -j
2323
wait
2424

25-
python ./convert_tokenizers.py ./open_llama_3b_v2/pytorch/dldt/FP32/
26-
./build/causal_lm ./open_llama_3b_v2/pytorch/dldt/FP32/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0"
25+
python ./convert_tokenizers.py ./open_llama_3b_v2/pytorch/dldt/FP16/
26+
./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0"

0 commit comments

Comments
 (0)