@@ -32,12 +32,12 @@ jobs:
32
32
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
33
33
sudo apt-get install libtbb-dev
34
34
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
35
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
35
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
36
36
cmake --build ./build/ --config Release -j
37
37
- name : greedy_causal_lm
38
38
run : |
39
39
source ./ov/setupvars.sh
40
- ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
40
+ ./build/text_generation/causal_lm/cpp/ greedy_causal_lm ./open_llama_3b_v2/ "return 0"
41
41
42
42
cpp-beam_search_causal_lm-ubuntu :
43
43
runs-on : ubuntu-20.04
@@ -60,13 +60,13 @@ jobs:
60
60
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
61
61
sudo apt-get install libtbb-dev
62
62
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
63
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
63
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
64
64
cmake --build ./build/ --config Release -j
65
65
- name : Compare
66
66
run : |
67
67
source ./ov/setupvars.sh
68
68
69
- timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt
69
+ timeout 25s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt
70
70
python -c "
71
71
import transformers
72
72
with open('pred.txt', 'r') as file:
82
82
"
83
83
echo "Why is the Sun yellow?" passed
84
84
85
- timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
85
+ timeout 25s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
86
86
python -c "
87
87
import transformers
88
88
with open('pred.txt', 'r') as file:
98
98
"
99
99
echo "69" passed
100
100
101
- timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt
101
+ timeout 25s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt
102
102
python -c "
103
103
import transformers
104
104
with open('pred.txt', 'r') as file:
@@ -114,7 +114,7 @@ jobs:
114
114
"
115
115
echo "Hi" passed
116
116
117
- timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt
117
+ timeout 25s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt
118
118
python -c "
119
119
import transformers
120
120
with open('pred.txt', 'r') as file:
@@ -130,7 +130,7 @@ jobs:
130
130
"
131
131
echo "return 0" passed
132
132
133
- ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt
133
+ ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt
134
134
python -c "
135
135
import transformers
136
136
with open('pred.txt', 'r') as file:
@@ -146,7 +146,7 @@ jobs:
146
146
"
147
147
echo "你好! 你好嗎?" passed
148
148
149
- timeout 1m ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好! 你好嗎?" > ./pred.txt
149
+ timeout 1m ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好! 你好嗎?" > ./pred.txt
150
150
python -c "
151
151
import transformers
152
152
with open('pred.txt', 'r') as file:
@@ -188,7 +188,7 @@ jobs:
188
188
python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
189
189
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
190
190
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
191
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
191
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
192
192
cmake --build ./build/ --config Release -j
193
193
- name : Compare
194
194
shell : cmd
@@ -229,12 +229,12 @@ jobs:
229
229
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
230
230
sudo apt-get install libtbb-dev
231
231
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat
232
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
232
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
233
233
cmake --build ./build/ --config Release -j
234
234
- name : Compare
235
235
run : |
236
236
source ./ov/setupvars.sh
237
- timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt
237
+ timeout 50s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt
238
238
239
239
cpp-beam_search_causal_lm-Qwen1_5-7B-Chat :
240
240
runs-on : ubuntu-20.04-16-cores
@@ -257,12 +257,12 @@ jobs:
257
257
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
258
258
sudo apt-get install libtbb-dev
259
259
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat
260
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
260
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
261
261
cmake --build ./build/ --config Release -j
262
262
- name : Run
263
263
run : |
264
264
source ./ov/setupvars.sh
265
- timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" > ./pred_qwen15.txt
265
+ timeout 50s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" > ./pred_qwen15.txt
266
266
267
267
cpp-beam_search_causal_lm-Phi-2 :
268
268
runs-on : ubuntu-20.04-16-cores
@@ -285,12 +285,12 @@ jobs:
285
285
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
286
286
sudo apt-get install libtbb-dev
287
287
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2
288
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
288
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
289
289
cmake --build ./build/ --config Release -j 15
290
290
- name : Compare
291
291
run : |
292
292
source ./ov/setupvars.sh
293
- timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt
293
+ timeout 50s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./phi-2/ 69 > ./pred.txt
294
294
295
295
cpp-beam_search_causal_lm-notus-7b-v1 :
296
296
runs-on : ubuntu-20.04-16-cores
@@ -313,12 +313,12 @@ jobs:
313
313
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
314
314
sudo apt-get install libtbb-dev
315
315
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1
316
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
316
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
317
317
cmake --build ./build/ --config Release -j
318
318
- name : Compare
319
319
run : |
320
320
source ./ov/setupvars.sh
321
- timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt
321
+ timeout 50s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt
322
322
323
323
cpp-speculative_decoding_lm-ubuntu :
324
324
runs-on : ubuntu-20.04-16-cores
@@ -342,13 +342,13 @@ jobs:
342
342
sudo apt-get install libtbb-dev
343
343
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
344
344
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
345
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
345
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
346
346
cmake --build ./build/ --config Release -j
347
347
- name : run and compare
348
348
run : |
349
349
source ./ov/setupvars.sh
350
350
./build/speculative_decoding_lm ./dolly-v2-3b/ ./dolly-v2-7b/ "Alan Turing was a" > predictions_speculative.txt
351
- ./build/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt
351
+ ./build/text_generation/causal_lm/cpp/ greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt
352
352
python -c "
353
353
with open('predictions_greedy.txt', 'r') as f:
354
354
predicted_greedy = f.readline()
@@ -380,7 +380,7 @@ jobs:
380
380
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
381
381
sudo apt-get install libtbb-dev
382
382
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
383
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
383
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
384
384
cmake --build ./build/ --config Release -j
385
385
- name : run and compare
386
386
run : |
@@ -394,7 +394,7 @@ jobs:
394
394
A:' > ./prompt.txt
395
395
396
396
./build/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
397
- ./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt
397
+ ./build/text_generation/causal_lm/cpp/ greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt
398
398
python -c "
399
399
with open('predictions_greedy.txt', 'r') as f:
400
400
predicted_greedy = f.readline()
@@ -425,13 +425,13 @@ jobs:
425
425
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
426
426
sudo apt-get install libtbb-dev
427
427
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5
428
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
428
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
429
429
cmake --build ./build/ --config Release -j 15
430
430
- name : Run Generation
431
431
run : |
432
432
source ./ov/setupvars.sh
433
- timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
434
- timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
433
+ timeout 50s ./build/text_generation/causal_lm/cpp/ greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
434
+ timeout 50s ./build/text_generation/causal_lm/cpp/ beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
435
435
- name : Compare
436
436
run : |
437
437
python -c "
@@ -470,13 +470,13 @@ jobs:
470
470
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
471
471
sudo apt-get install libtbb-dev
472
472
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat
473
- cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
473
+ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
474
474
cmake --build ./build/ --config Release -j
475
475
- run : source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/ --output ./redpajama-3b-chat/ --with-detokenizer --trust-remote-code
476
476
- name : Run Generation
477
477
run : |
478
478
source ./ov/setupvars.sh
479
- timeout 50s ./build/greedy_causal_lm ./redpajama-3b-chat/ "Alan Turing was a" > ./pred_greedy.txt
479
+ timeout 50s ./build/text_generation/causal_lm/cpp/ greedy_causal_lm ./redpajama-3b-chat/ "Alan Turing was a" > ./pred_greedy.txt
480
480
- name : Compare
481
481
run : |
482
482
python -c "
0 commit comments