causal_lm_cpp #9554

Workflow file for this run

.github/workflows/causal_lm_cpp.yml at a978db3

	name: causal_lm_cpp
	on:
	workflow_dispatch:
	pull_request:
	merge_group:
	push:
	branches:
	- master
	- 'releases/**'

	permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions

	concurrency:
	# github.ref is not unique in post-commit
	group: ${{ github.event_name == 'push' && github.run_id \|\| github.ref }}-causal-lm-cpp
	cancel-in-progress: true

	env:
	l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18343-5e16b688156/openvino_toolkit_ubuntu20_2025.1.0.dev20250304_x86_64.tgz
	l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18343-5e16b688156/openvino_toolkit_ubuntu22_2025.1.0.dev20250304_x86_64.tgz
	m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18343-5e16b688156/openvino_toolkit_macos_12_6_2025.1.0.dev20250304_x86_64.tgz
	w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18343-5e16b688156/openvino_toolkit_windows_2025.1.0.dev20250304_x86_64.zip

	jobs:
	cpp-continuous-batching-ubuntu:
	runs-on: ubuntu-22.04-8-cores
	defaults:
	run:
	shell: bash
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.12
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl ${{ env.l_ov_link }} \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Build app
	run: \|
	source ./ov/setupvars.sh
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Download and convert and model
	run: \|
	source ./ov/setupvars.sh
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
	python -m pip install -r ./samples/requirements.txt
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
	- name: Run gtests
	run: \|
	source ./ov/setupvars.sh
	./build/tests/cpp/tests_continuous_batching
	- name: Run accuracy_sample
	run: \|
	source ./ov/setupvars.sh
	timeout --verbose 50s ./build/tools/continuous_batching/accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5
	- name: Run throughput_benchmark
	run: \|
	wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
	source ./ov/setupvars.sh
	timeout --verbose 200s ./build/tools/continuous_batching/benchmark/continuous_batching_benchmark -n 10 -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
	timeout --verbose 200s ./build/tools/continuous_batching/benchmark/continuous_batching_benchmark -n 10 --dynamic_split_fuse --max_batch_size 256 --max_input_len 256 -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1

	cpp-continuous-batching-windows:
	runs-on: windows-latest
	env:
	PYTHONIOENCODING: "utf8"
	defaults:
	run:
	shell: cmd
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.9
	- name: Install OpenVINO
	run: \|
	curl --output ov.zip ${{ env.w_ov_link }}
	unzip -d ov ov.zip
	dirs=(ov/) && mv ov//* ov && rmdir "${dirs[@]}"
	shell: bash
	- name: Build app
	run: \|
	call .\ov\setupvars.bat
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	env:
	CMAKE_TLS_VERIFY: 0
	- name: Download and convert and model
	run: \|
	call .\ov\setupvars.bat
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
	python -m pip install -r ./samples/requirements.txt
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
	- name: Run gtests
	run: \|
	set PATH=.\build\openvino_genai\;%PATH%
	call .\ov\setupvars.bat
	.\build\tests\cpp\Release\tests_continuous_batching.exe
	- name: Run accuracy_sample
	run: \|
	set PATH=.\build\openvino_genai\;%PATH%
	call .\ov\setupvars.bat
	.\build\tools\continuous_batching\accuracy\Release\continuous_batching_accuracy.exe -m .\TinyLlama-1.1B-Chat-v1.0\ -n 5
	- name: Run throughput_benchmark
	run: \|
	curl -o .\ShareGPT_V3_unfiltered_cleaned_split.json -s -L "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"
	set PATH=.\build\openvino_genai\;%PATH%
	call .\ov\setupvars.bat
	.\build\tools\continuous_batching\benchmark\Release\continuous_batching_benchmark.exe -n 2 -m .\TinyLlama-1.1B-Chat-v1.0\ --dataset .\ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1

	cpp-continuous-batching-macos:
	runs-on: macos-13
	defaults:
	run:
	shell: bash
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.9
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl ${{ env.m_ov_link }} \| tar --directory ./ov/ --strip-components 1 -xz
	brew install coreutils scons
	- name: Build app
	run: \|
	source ./ov/setupvars.sh
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Download and convert and model
	run: \|
	source ./ov/setupvars.sh
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
	python -m pip install -r ./samples/requirements.txt
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
	- name: Run gtests
	run: \|
	source ./ov/setupvars.sh
	./build/tests/cpp/tests_continuous_batching
	- name: Run accuracy_sample
	run: \|
	source ./ov/setupvars.sh
	timeout --verbose 120s ./build/tools/continuous_batching/accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5
	- name: Run throughput_benchmark
	run: \|
	wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
	source ./ov/setupvars.sh
	./build/tools/continuous_batching/benchmark/continuous_batching_benchmark -n 5 -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1

	Overall_Status:
	name: ci/gha_overall_status_causal_lm
	needs: [cpp-continuous-batching-ubuntu, cpp-continuous-batching-windows, cpp-continuous-batching-macos]
	if: ${{ always() }}
	runs-on: ubuntu-latest
	steps:
	- name: Check status of all jobs
	if: >-
	${{
	contains(needs.*.result, 'failure') \|\|
	contains(needs.*.result, 'cancelled')
	}}
	run: exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

causal_lm_cpp #9554

Workflow file

causal_lm_cpp #9554

Jobs

Run details

Workflow file for this run