Skip to content

Commit 0f73fe6

Browse files
committed
causal_lm->greedt_causal_lm
1 parent c1aafc1 commit 0f73fe6

File tree

5 files changed

+28
-27
lines changed

5 files changed

+28
-27
lines changed

.github/workflows/causal_lm_cpp.yml

+5-7
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@ name: causal_lm_cpp
22
on:
33
pull_request:
44
paths:
5-
- text_generation/causal_lm/cpp/**
6-
- '!text_generation/causal_lm/cpp/README.md'
5+
- text_generation/causal_lm/cpp/*
6+
- '!**.md'
7+
- llm_bench/python/**
78
- thirdparty/openvino_contrib
89
- .github/workflows/causal_lm_cpp.yml
9-
concurrency:
10-
group: ${{ github.workflow }}-${{ github.ref }}
11-
cancel-in-progress: true
1210
jobs:
13-
causal_lm_cpp:
14-
runs-on: ubuntu-20.04-8-cores
11+
cpp-greedy_causal_lm:
12+
runs-on: ubuntu-20.04
1513
steps:
1614
- uses: actions/checkout@v4
1715
with:

text_generation/causal_lm/cpp/CMakeLists.txt

+7-7
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,19 @@ project(causal_lm)
88
list(APPEND CUSTOM_OPERATIONS tokenizer)
99
add_subdirectory(../../../thirdparty/openvino_contrib/modules/custom_operations/ "${CMAKE_CURRENT_BINARY_DIR}/custom_operations/")
1010

11-
add_executable(causal_lm causal_lm.cpp)
12-
target_compile_definitions(causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
11+
add_executable(greedy_causal_lm greedy_causal_lm.cpp)
12+
target_compile_definitions(greedy_causal_lm PRIVATE USER_OV_EXTENSIONS_PATH=\"$<TARGET_FILE:user_ov_extensions>\")
1313
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
14-
target_link_libraries(causal_lm PRIVATE openvino::runtime user_ov_extensions)
15-
set_target_properties(causal_lm PROPERTIES CXX_STANDARD 17)
16-
set_target_properties(causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
14+
target_link_libraries(greedy_causal_lm PRIVATE openvino::runtime user_ov_extensions)
15+
set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD 17)
16+
set_target_properties(greedy_causal_lm PROPERTIES CXX_STANDARD_REQUIRED ON)
1717
if(MSVC)
1818
target_compile_options(
19-
causal_lm PRIVATE
19+
greedy_causal_lm PRIVATE
2020
/Wall # Display all warnings
2121
/wd4710 /wd4711 # Disable the inline warnings
2222
/EHsc # Enable standard C++ stack unwinding, assume functions with extern "C" never throw
2323
)
2424
else()
25-
target_compile_options(causal_lm PRIVATE -Wall) # Display all warnings
25+
target_compile_options(greedy_causal_lm PRIVATE -Wall) # Display all warnings
2626
endif()

text_generation/causal_lm/cpp/README.md

+13-10
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ This application showcases inference of a casual language model (LM). It doesn't
77
88
## How it works
99

10-
The program loads a tokenizer, detokenizer, and a model (`.xml` and `.bin`) to OpenVINO. The model is reshaped to batch 1 and variable prompt length. A prompt is tokenized and passed to the model. The model greedily generates token by token until the special end of sequence (EOS) token is obtained. The predicted tokens are converted to chars and printed in a streaming fashion.
10+
The program loads a model, a tokenizer and a detokenizer (`.xml` and `.bin`) to OpenVINO. A prompt is tokenized and passed to the model. The model greedily generates token by token until the special end of sequence (EOS) token is obtained. The predicted tokens are converted to chars and printed in a streaming fashion.
1111

1212
## Install OpenVINO Runtime
1313

@@ -24,19 +24,22 @@ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ && cmake --build ./build/ --c
2424
## Supported models
2525

2626
1. LLaMA 2
27-
1. https://huggingface.co/meta-llama/Llama-2-7b-hf
28-
2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
29-
3. https://huggingface.co/meta-llama/Llama-2-13b-hf
3027
4. https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
31-
5. https://huggingface.co/meta-llama/Llama-2-70b-hf
28+
3. https://huggingface.co/meta-llama/Llama-2-13b-hf
29+
2. https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
30+
1. https://huggingface.co/meta-llama/Llama-2-7b-hf
3231
6. https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
33-
2. OpenLLaMA
34-
1. https://huggingface.co/openlm-research/open_llama_3b
35-
2. https://huggingface.co/openlm-research/open_llama_7b
32+
5. https://huggingface.co/meta-llama/Llama-2-70b-hf
33+
2. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
34+
3. OpenLLaMA
3635
3. https://huggingface.co/openlm-research/open_llama_13b
36+
1. https://huggingface.co/openlm-research/open_llama_3b
3737
4. https://huggingface.co/openlm-research/open_llama_3b_v2
38+
2. https://huggingface.co/openlm-research/open_llama_7b
3839
5. https://huggingface.co/openlm-research/open_llama_7b_v2
39-
3. [Llama2-7b-WhoIsHarryPotter](https://huggingface.co/microsoft/Llama2-7b-WhoIsHarryPotter)
40+
4. TinyLlama
41+
1. https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6
42+
2. https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T
4043

4144
This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.
4245

@@ -47,7 +50,7 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upg
4750
```sh
4851
source <INSTALL_DIR>/setupvars.sh
4952
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
50-
python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --save_orig --stateful
53+
python ../../../llm_bench/python/convert.py --model_id meta-llama/Llama-2-7b-hf --output_dir ./Llama-2-7b-hf/ --precision FP16 --stateful
5154
python ./convert_tokenizers.py ./Llama-2-7b-hf/
5255
```
5356

text_generation/causal_lm/cpp/set_up_and_run.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/
1717
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
1818

1919
source ./ov/setupvars.sh
20-
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ../../../llm_bench/python/convert.py --model_id openlm-research/open_llama_3b_v2 --output_dir ./open_llama_3b_v2/ --stateful &
20+
python -m pip install --upgrade-strategy eager "optimum[openvino]>=1.14" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ../../../llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T --output_dir ./TinyLlama-1.1B-intermediate-step-1195k-token-2.5T/ --precision FP16 --stateful &
2121
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
2222
cmake --build ./build/ --config Release -j
2323
wait
2424

25-
python ./convert_tokenizers.py ./open_llama_3b_v2/pytorch/dldt/FP32/
26-
./build/causal_lm ./open_llama_3b_v2/pytorch/dldt/FP32/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0"
25+
python ./convert_tokenizers.py ./TinyLlama-1.1B-intermediate-step-1195k-token-2.5T/pytorch/dldt/FP16/
26+
./build/greedy_causal_lm ./TinyLlama-1.1B-intermediate-step-1195k-token-2.5T/pytorch/dldt/FP16/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "Why is the Sun yellow?"

0 commit comments

Comments
 (0)