Skip to content

Commit a0933a1

Browse files
authored
Merge branch 'master' into medium_runner
2 parents 1112f09 + 7fce092 commit a0933a1

File tree

84 files changed

+1648
-1043
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1648
-1043
lines changed

.github/workflows/causal_lm_cpp.yml

+1-290
Original file line numberDiff line numberDiff line change
@@ -22,293 +22,6 @@ env:
2222
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18343-5e16b688156/openvino_toolkit_windows_2025.1.0.dev20250304_x86_64.zip
2323

2424
jobs:
25-
cpp-greedy_causal_lm-windows:
26-
runs-on: windows-latest
27-
env:
28-
PYTHONIOENCODING: "utf8"
29-
defaults:
30-
run:
31-
shell: cmd
32-
steps:
33-
- uses: actions/checkout@v4
34-
with:
35-
submodules: recursive
36-
- uses: actions/setup-python@v4
37-
with:
38-
python-version: 3.9
39-
- run: curl --output ov.zip ${{ env.w_ov_link }}
40-
- run: unzip -d ov ov.zip
41-
- run: dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}"
42-
shell: bash
43-
- name: Build app
44-
run: |
45-
call .\ov\setupvars.bat
46-
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
47-
cmake --build ./build/ --config Release -j
48-
env:
49-
CMAKE_TLS_VERIFY: 0
50-
- name: Download and convert model
51-
run: |
52-
call .\ov\setupvars.bat
53-
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
54-
python -m pip install -r ./samples/requirements.txt
55-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
56-
optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
57-
curl -o adapter_model.safetensors -s -L https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors?download=true
58-
- run: >
59-
set PATH=.\build\openvino_genai\;%PATH%
60-
&& call .\ov\setupvars.bat
61-
&& .\build\samples\cpp\text_generation\Release\greedy_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ 69 > .\cpp.txt
62-
- run: |
63-
echo import transformers > ref.py
64-
echo predictions = open('cpp.txt', 'r').read() >> ref.py
65-
echo tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0', trust_remote_code=True) >> ref.py
66-
echo prompt = '69' >> ref.py
67-
echo if tokenizer.chat_template: >> ref.py
68-
echo prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) >> ref.py
69-
echo tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) >> ref.py
70-
echo for beam in transformers.AutoModelForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0', trust_remote_code=True).generate(**tokenized, max_new_tokens=100, do_sample=False): >> ref.py
71-
echo ref = tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) >> ref.py
72-
echo idx = predictions.find(ref) >> ref.py
73-
echo if -1 == idx: >> ref.py
74-
echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
75-
echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py
76-
- run: python ref.py
77-
- run: >
78-
set PATH=.\build\openvino_genai\;%PATH%
79-
&& set "PYTHONPATH=./build/"
80-
&& call .\ov\setupvars.bat
81-
&& python samples\python\text_generation\greedy_causal_lm.py .\TinyLlama-1.1B-Chat-v1.0\ 69 > .\py.txt
82-
- run: fc .\cpp.txt .\py.txt
83-
- run: >
84-
set PATH=.\build\openvino_genai\;%PATH%
85-
&& set "PYTHONPATH=./build/"
86-
&& call .\ov\setupvars.bat
87-
&& python samples\python\text_generation\lora.py .\TinyLlama\TinyLlama-1.1B-intermediate-step-1431k-3T\ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?"
88-
89-
cpp-chat_sample-ubuntu:
90-
runs-on: ubuntu-24.04
91-
defaults:
92-
run:
93-
shell: bash
94-
steps:
95-
- uses: actions/checkout@v4
96-
with:
97-
submodules: recursive
98-
- uses: actions/setup-python@v4
99-
with:
100-
python-version: 3.11
101-
- name: Install OpenVINO
102-
run: |
103-
mkdir ./ov/
104-
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
105-
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
106-
- name: Build app
107-
run: |
108-
source ./ov/setupvars.sh
109-
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
110-
cmake --build ./build/ --config Release -j
111-
- name: Download and convert and model
112-
run: |
113-
source ./ov/setupvars.sh
114-
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
115-
python -m pip install -r ./samples/requirements.txt
116-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
117-
- name: Compare
118-
env:
119-
PYTHONPATH: "./build"
120-
run: |
121-
source ./ov/setupvars.sh
122-
printf 'What is 2 + 2?\nWhat is the previous answer?\nAdd 1 to it.\nSubtract 5 from it.\nWhy is the sun yellow?\nWhat was my first question?\n' > ./input.txt
123-
timeout 30s ./build/samples/cpp/text_generation/chat_sample ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred.txt
124-
python -c "
125-
from transformers import AutoTokenizer, AutoModelForCausalLM
126-
model_id = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
127-
tokenizer = AutoTokenizer.from_pretrained(model_id)
128-
model = AutoModelForCausalLM.from_pretrained(model_id)
129-
prompts = ['What is 2 + 2?', 'What is the previous answer?', 'Add 1 to it.', 'Subtract 5 from it.', 'Why is the sun yellow?', 'What was my first question?']
130-
def gen_prompt(prompt):
131-
return {'role': 'user', 'content': prompt}
132-
def gen_answer(answer):
133-
return {'role': 'assistant', 'content': answer}
134-
chat_history = []
135-
chat_prompt = ''
136-
output = open('ref.txt', 'w')
137-
for prompt in prompts:
138-
output.write('question:\n')
139-
chat_history.append(gen_prompt(prompt))
140-
chat_prompt = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
141-
tokenized = tokenizer(chat_prompt, return_tensors='pt', add_special_tokens=False)
142-
answer = model.generate(**tokenized, max_length=1000, do_sample=False)
143-
answer_str = tokenizer.decode(answer[0, tokenized['input_ids'].numel():], skip_special_tokens=True)
144-
chat_history.append(gen_answer(answer_str))
145-
output.write(answer_str)
146-
output.write('\n----------\n')
147-
output.write('question:\n')
148-
output.close()
149-
"
150-
diff pred.txt ref.txt
151-
echo "Chat sample cpp" passed
152-
timeout 30s ./samples/python/text_generation/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt
153-
diff pred2.txt ref.txt
154-
echo "Chat sample python" passed
155-
156-
visual_language_chat_sample-ubuntu-minicpm_v2_6:
157-
runs-on: ubuntu-22.04-16-cores
158-
steps:
159-
- uses: actions/checkout@v4
160-
with:
161-
submodules: recursive
162-
- uses: actions/setup-python@v4
163-
with:
164-
python-version: 3.11
165-
- uses: ./.github/actions/install_openvino
166-
with:
167-
ov_link: ${{ env.l_u22_ov_link }}
168-
- uses: ./.github/actions/build_app
169-
with:
170-
build_target: 'visual_language_chat benchmark_vlm py_openvino_genai'
171-
- uses: ./.github/actions/install_python_deps
172-
- name: Download and convert tiny-random-minicpmv-2_6 model and an image
173-
run: |
174-
source ./ov/setupvars.sh
175-
optimum-cli export openvino -m katuni4ka/tiny-random-minicpmv-2_6 tiny-random-minicpmv-2_6 --trust-remote-code --task image-text-to-text
176-
mkdir images
177-
- name: Generate images - tiny-random-minicpmv-2_6
178-
shell: python
179-
run: |
180-
from PIL import Image
181-
import numpy as np
182-
import requests
183-
res = 28, 28
184-
lines = np.arange(res[0] * res[1] * 3, dtype=np.uint8) % 255
185-
lines = lines.reshape([*res, 3])
186-
lines_image = Image.fromarray(lines)
187-
lines_image.save("images/lines.png")
188-
cat = Image.open(requests.get("https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11", stream=True).raw).convert('RGB')
189-
cat.save("images/cat.png")
190-
- name: Run visual_language_chat C++ sample - tiny-random-minicpmv-2_6
191-
run: >
192-
set -o pipefail
193-
&& source ./ov/setupvars.sh
194-
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./tiny-random-minicpmv-2_6/ ./images/
195-
<<< $'Describe the images?' | tee cpp.txt
196-
timeout-minutes: 2
197-
- name: Run benchmark_vlm C++ sample - tiny-random-minicpmv-2_6
198-
run: >
199-
set -o pipefail
200-
&& source ./ov/setupvars.sh
201-
&& ./build/samples/cpp/visual_language_chat/benchmark_vlm -m ./tiny-random-minicpmv-2_6/ -i ./images/cat.png -n 3
202-
timeout-minutes: 2
203-
- name: Run visual_language_chat Python sample - tiny-random-minicpmv-2_6
204-
run: >
205-
set -o pipefail
206-
&& source ./ov/setupvars.sh
207-
&& ./samples/python/visual_language_chat/visual_language_chat.py ./tiny-random-minicpmv-2_6/ ./images/
208-
<<< $'Describe the images?' | tee py.txt
209-
env:
210-
PYTHONPATH: "./build/"
211-
- name: Run benchmark_vlm Python sample - tiny-random-minicpmv-2_6
212-
run: >
213-
set -o pipefail
214-
&& source ./ov/setupvars.sh
215-
&& ./samples/python/visual_language_chat/benchmark_vlm.py -m ./tiny-random-minicpmv-2_6/ -i ./images/cat.png -n 3
216-
env:
217-
PYTHONPATH: "./build/"
218-
- name: Encode cpp.txt with Python encoding instead of terminal one
219-
shell: python
220-
run: |
221-
with open("cpp.txt", "rb") as f:
222-
content = f.read().decode("utf-8", "replace")
223-
with open("cpp.txt", "wb") as f:
224-
f.write(content.encode("utf-8"))
225-
- run: diff cpp.txt py.txt
226-
- name: Run visual_language_chat C++ sample with 2 prompts - tiny-random-minicpmv-2_6
227-
run: >
228-
set -o pipefail
229-
&& source ./ov/setupvars.sh
230-
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./tiny-random-minicpmv-2_6/ ./images/cat.png
231-
<<< $'What is unusual on this image?\nGo on.' | tee cpp2.txt
232-
timeout-minutes: 2
233-
- name: Run visual_language_chat Python sample with 2 prompts - tiny-random-minicpmv-2_6
234-
run: >
235-
set -o pipefail
236-
&& source ./ov/setupvars.sh
237-
&& ./samples/python/visual_language_chat/visual_language_chat.py ./tiny-random-minicpmv-2_6/ ./images/cat.png
238-
<<< $'What is unusual on this image?\nGo on.' | tee py2.txt
239-
env:
240-
PYTHONPATH: "./build/"
241-
- name: Encode cpp2.txt with Python encoding instead of terminal one
242-
shell: python
243-
run: |
244-
with open("cpp2.txt", "rb") as f:
245-
content = f.read().decode("utf-8", "replace")
246-
with open("cpp2.txt", "wb") as f:
247-
f.write(content.encode("utf-8"))
248-
- run: diff cpp2.txt py2.txt
249-
250-
visual_language_chat_sample-ubuntu-internvl2:
251-
runs-on: ubuntu-22.04-16-cores
252-
steps:
253-
- uses: actions/checkout@v4
254-
with:
255-
submodules: recursive
256-
- uses: actions/setup-python@v4
257-
with:
258-
python-version: 3.11
259-
- uses: ./.github/actions/install_openvino
260-
with:
261-
ov_link: ${{ env.l_u22_ov_link }}
262-
- uses: ./.github/actions/build_app
263-
with:
264-
build_target: 'visual_language_chat py_openvino_genai'
265-
- uses: ./.github/actions/install_python_deps
266-
- name: Download and convert InternVL2 model
267-
run: |
268-
# Lowering transformers version, workaround for https://huggingface.co/OpenGVLab/InternVL2-1B/discussions/7
269-
python -m pip install -U "transformers<4.45.0"
270-
source ./ov/setupvars.sh
271-
optimum-cli export openvino --model OpenGVLab/InternVL2-4B ./internvl2_4b_ov/ --trust-remote-code
272-
- name: Download images
273-
run: |
274-
wget https://llava-vl.github.io/static/images/monalisa.jpg
275-
- name: Run visual_language_chat C++ sample - InternVL2
276-
run: >
277-
source ./ov/setupvars.sh
278-
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./internvl2_4b_ov/ monalisa.jpg
279-
<<< $'Who drew this painting?\nWhen did the painter live?'
280-
timeout-minutes: 4
281-
282-
visual_language_chat_sample-ubuntu-qwen2vl:
283-
runs-on: ubuntu-22.04-16-cores
284-
steps:
285-
- uses: actions/checkout@v4
286-
with:
287-
submodules: recursive
288-
- uses: actions/setup-python@v4
289-
with:
290-
python-version: 3.11
291-
- uses: ./.github/actions/install_openvino
292-
with:
293-
ov_link: ${{ env.l_u22_ov_link }}
294-
- uses: ./.github/actions/build_app
295-
with:
296-
build_target: 'visual_language_chat py_openvino_genai'
297-
- uses: ./.github/actions/install_python_deps
298-
- name: Download and convert Qwen2VL model
299-
run: |
300-
source ./ov/setupvars.sh
301-
optimum-cli export openvino --model Qwen/Qwen2-VL-2B-Instruct ./qwen2_vl_2b_ov/ --trust-remote-code
302-
- name: Download images
303-
run: |
304-
wget https://llava-vl.github.io/static/images/monalisa.jpg
305-
- name: Run visual_language_chat C++ sample - Qwen2VL
306-
run: >
307-
source ./ov/setupvars.sh
308-
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./qwen2_vl_2b_ov/ monalisa.jpg
309-
<<< $'Who drew this painting?\nWhen did the painter live?'
310-
timeout-minutes: 4
311-
31225
cpp-continuous-batching-ubuntu:
31326
runs-on: ubuntu-22.04-8-cores
31427
defaults:
@@ -446,9 +159,7 @@ jobs:
446159
447160
Overall_Status:
448161
name: ci/gha_overall_status_causal_lm
449-
needs: [cpp-greedy_causal_lm-windows, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
450-
visual_language_chat_sample-ubuntu-minicpm_v2_6, visual_language_chat_sample-ubuntu-internvl2,
451-
cpp-continuous-batching-windows, cpp-continuous-batching-macos]
162+
needs: [cpp-continuous-batching-ubuntu, cpp-continuous-batching-windows, cpp-continuous-batching-macos]
452163
if: ${{ always() }}
453164
runs-on: ubuntu-latest
454165
steps:

.github/workflows/genai-tools.yml

+3
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ jobs:
9696
run: |
9797
optimum-cli export openvino --model ./tiny-random-qwen2 --task text-generation-with-past --weight-format fp16 ./ov_models/tiny-random-qwen2
9898
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-qwen2/ -d cpu -n 1 --optimum -ic 10
99+
- name: Test Prompt Lookup Decoding via GenAI
100+
run: |
101+
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-qwen2 -p "Why is the Sun yellow?" -d cpu -n 1 --max_ngram_size 3 --num_assistant_tokens 5 -ic 20
99102
- name: Test Speculative Decoding via GenAI
100103
run: |
101104
optimum-cli export openvino --model ./tiny-random-qwen2 --task text-generation-with-past --weight-format int8 ./ov_models/tiny-random-qwen2-int8

.github/workflows/linux.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -323,17 +323,20 @@ jobs:
323323
genai_tests_wheel:
324324
name: Python (${{ matrix.test.name}}) Tests (wheel)
325325
needs: [ openvino_download, genai_build_wheel ]
326-
timeout-minutes: 60
326+
timeout-minutes: ${{ matrix.test.timeout }}
327327
strategy:
328328
fail-fast: false
329329
matrix:
330330
test:
331331
- name: 'Whisper'
332332
cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py'
333+
timeout: 45
333334
- name: 'Cacheopt E2E'
334335
cmd: 'tests/python_tests/test_kv_cache_eviction.py'
336+
timeout: 60
335337
- name: 'LLM & VLM'
336338
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore tests/python_tests/test_kv_cache_eviction.py --ignore tests/python_tests/test_whisper_pipeline_static.py'
339+
timeout: 90
337340
defaults:
338341
run:
339342
shell: bash

.github/workflows/mac.yml

+5-3
Original file line numberDiff line numberDiff line change
@@ -336,17 +336,20 @@ jobs:
336336
genai_tests_wheel:
337337
name: Python (${{ matrix.test.name}}) Tests (wheel)
338338
needs: [ openvino_download, genai_build_wheel ]
339-
timeout-minutes: 60
339+
timeout-minutes: ${{ matrix.test.timeout }}
340340
strategy:
341341
fail-fast: false
342342
matrix:
343343
test:
344344
- name: 'Whisper'
345345
cmd: 'tests/python_tests/test_whisper_pipeline.py'
346+
timeout: 45
346347
- name: 'Cacheopt E2E'
347348
cmd: 'tests/python_tests/test_kv_cache_eviction.py'
349+
timeout: 60
348350
- name: 'LLM & VLM'
349351
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore tests/python_tests/test_kv_cache_eviction.py --ignore tests/python_tests/test_whisper_pipeline_static.py'
352+
timeout: 90
350353
defaults:
351354
run:
352355
shell: bash
@@ -355,8 +358,7 @@ jobs:
355358
INSTALL_DIR: ${{ github.workspace }}/install
356359
SRC_DIR: ${{ github.workspace }}/src
357360
BUILD_DIR: ${{ github.workspace }}/build
358-
TRANSFORMERS_CACHE: ${{ github.workspace }}/models # Hugging Face transformers cache
359-
HF_HOME: ${{ github.workspace }}/datasets # Hugging Face datasets cache
361+
HF_HOME: ${{ github.workspace }}/hf_cache
360362

361363
steps:
362364
- name: Clone openvino.genai

.github/workflows/windows.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -369,17 +369,20 @@ jobs:
369369
genai_tests_wheel:
370370
name: Python (${{ matrix.test.name}}) Tests (wheel)
371371
needs: [ openvino_download, genai_build_wheel ]
372-
timeout-minutes: 60
372+
timeout-minutes: ${{ matrix.test.timeout }}
373373
strategy:
374374
fail-fast: false
375375
matrix:
376376
test:
377377
- name: 'Whisper'
378378
cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py'
379+
timeout: 45
379380
- name: 'Cacheopt E2E'
380381
cmd: 'tests/python_tests/test_kv_cache_eviction.py'
382+
timeout: 60
381383
- name: 'LLM & VLM'
382384
cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore tests/python_tests/test_whisper_pipeline_static.py --ignore tests/python_tests/test_kv_cache_eviction.py'
385+
timeout: 90
383386
defaults:
384387
run:
385388
shell: pwsh

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ if(NOT DEFINED CPACK_ARCHIVE_COMPONENT_INSTALL)
107107
endif()
108108
set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
109109
# Workaround https://gitlab.kitware.com/cmake/cmake/-/issues/2614
110-
set(CPACK_COMPONENTS_ALL core_genai core_genai_dev cpp_samples_genai licensing_genai openvino_tokenizers openvino_tokenizers_docs)
110+
set(CPACK_COMPONENTS_ALL core_genai core_genai_dev core_c_genai core_c_genai_dev cpp_samples_genai licensing_genai openvino_tokenizers openvino_tokenizers_docs)
111111
if(ENABLE_PYTHON)
112112
list(APPEND CPACK_COMPONENTS_ALL pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR})
113113
endif()

0 commit comments

Comments
 (0)