Temp #15

Workflow file for this run

.github/workflows/causal_lm_cpp.yml at 60bc1c1

	name: causal_lm_cpp
	on:
	pull_request:
	paths:
	- .github/workflows/causal_lm_cpp.yml
	- llm_bench/python/**
	- text_generation/causal_lm/cpp/*
	- thirdparty/openvino_tokenizers
	- '!**.md'
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true
	jobs:
	cpp-greedy_causal_lm-ubuntu:
	runs-on: ubuntu-20.04-8-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/linux/l_openvino_toolkit_ubuntu20_2023.3.0.13775.ceeafaf64f3_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id openlm-research/open_llama_3b_v2 --output_dir ./open_llama_3b_v2/ --precision FP16 &
	cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
	cmake --build ./build/ --config Release -j
	wait
	- name: convert_tokenizer and run
	run: \|
	source ./ov/setupvars.sh
	convert_tokenizer ./open_llama_3b_v2/pytorch/dldt/FP16/ --output ./open_llama_3b_v2/pytorch/dldt/FP16/ --with-detokenizer
	./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0"
	cpp-beam_search_causal_lm-ubuntu:
	runs-on: ubuntu-20.04
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/linux/l_openvino_toolkit_ubuntu20_2023.3.0.13775.ceeafaf64f3_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16 &
	cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
	cmake --build ./build/ --config Release -j
	wait
	- name: Compare
	run: \|
	source ./ov/setupvars.sh
	convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --output ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --with-detokenizer

	timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ 69 > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	tokenized = tokenizer('69', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo 69 passed

	timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ Hi > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	tokenized = tokenizer('Hi', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo Hi passed
	cpp-beam_search_causal_lm-windows:
	runs-on: windows-latest
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	shell: bash
	run: \|
	curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/windows/w_openvino_toolkit_windows_2023.3.0.13775.ceeafaf64f3_x86_64.zip
	unzip ov.zip
	- name: Download, convert and build
	shell: cmd
	run: \|
	call w_openvino_toolkit_windows_2023.3.0.13775.ceeafaf64f3_x86_64\setupvars.bat
	python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
	python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16
	cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Compare
	shell: cmd
	run: \|
	call w_openvino_toolkit_windows_2023.3.0.13775.ceeafaf64f3_x86_64\setupvars.bat
	convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --output .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --with-detokenizer

	.\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ "69" > .\pred.txt
	echo import transformers > ref.py
	echo predictions = open('pred.txt', 'r').read() >> ref.py
	echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py
	echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py
	echo for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False): >> ref.py
	echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' >> ref.py
	echo idx = predictions.find(ref) >> ref.py
	echo if -1 == idx: >> ref.py
	echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
	echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py
	python ref.py
	cpp-beam_search_causal_lm-Qwen-7B-Chat:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/linux/l_openvino_toolkit_ubuntu20_2023.3.0.13775.ceeafaf64f3_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id Qwen/Qwen-7B-Chat --output_dir ./Qwen-7B-Chat/ --precision FP16 &
	cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
	cmake --build ./build/ --config Release -j
	wait
	- name: Compare
	run: \|
	source ./ov/setupvars.sh
	convert_tokenizer ./Qwen-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
	timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/pytorch/dldt/FP16/ 69 > ./pred.txt

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Temp #15

Workflow file

Temp #15

Jobs

Run details

Workflow file for this run