openvinotoolkit
diff --git a/‎.github/workflows/causal_lm_cpp.yml
+1-290 b/‎.github/workflows/causal_lm_cpp.yml
+1-290
diff --git a/‎.github/workflows/genai-tools.yml
+3 b/‎.github/workflows/genai-tools.yml
+3
diff --git a/‎.github/workflows/linux.yml
+4-1 b/‎.github/workflows/linux.yml
+4-1
diff --git a/‎.github/workflows/mac.yml
+5-3 b/‎.github/workflows/mac.yml
+5-3
diff --git a/‎.github/workflows/windows.yml
+4-1 b/‎.github/workflows/windows.yml
+4-1
diff --git a/‎CMakeLists.txt
+1-1 b/‎CMakeLists.txt
+1-1
@@ -22,293 +22,6 @@ env:
   w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2025.1.0-18343-5e16b688156/openvino_toolkit_windows_2025.1.0.dev20250304_x86_64.zip
 
 jobs:
-  cpp-greedy_causal_lm-windows:
-    runs-on: windows-latest
-    env:
-      PYTHONIOENCODING: "utf8"
-    defaults:
-      run:
-        shell: cmd
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.9
-      - run: curl --output ov.zip ${{ env.w_ov_link }}
-      - run: unzip -d ov ov.zip
-      - run: dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}"
-        shell: bash
-      - name: Build app
-        run: |
-          call .\ov\setupvars.bat
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release -j
-        env:
-          CMAKE_TLS_VERIFY: 0
-      - name: Download and convert model
-        run: |
-          call .\ov\setupvars.bat
-          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt
-          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-          optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
-          curl -o adapter_model.safetensors -s -L https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors?download=true
-      - run: >
-          set PATH=.\build\openvino_genai\;%PATH%
-          && call .\ov\setupvars.bat
-          && .\build\samples\cpp\text_generation\Release\greedy_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ 69 > .\cpp.txt
-      - run: |
-          echo import transformers > ref.py
-          echo predictions = open('cpp.txt', 'r').read() >> ref.py
-          echo tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0', trust_remote_code=True) >> ref.py
-          echo prompt = '69' >> ref.py
-          echo if tokenizer.chat_template: >> ref.py
-          echo     prompt = tokenizer.apply_chat_template([{'role': 'user', 'content': prompt}], tokenize=False, add_generation_prompt=True) >> ref.py
-          echo tokenized = tokenizer(prompt, return_tensors='pt', add_special_tokens=False) >> ref.py
-          echo for beam in transformers.AutoModelForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0', trust_remote_code=True).generate(**tokenized, max_new_tokens=100, do_sample=False): >> ref.py
-          echo     ref = tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) >> ref.py
-          echo     idx = predictions.find(ref) >> ref.py
-          echo     if -1 == idx: >> ref.py
-          echo         raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
-          echo     predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py
-      - run: python ref.py
-      - run: >
-          set PATH=.\build\openvino_genai\;%PATH%
-          && set "PYTHONPATH=./build/"
-          && call .\ov\setupvars.bat
-          && python samples\python\text_generation\greedy_causal_lm.py .\TinyLlama-1.1B-Chat-v1.0\ 69 > .\py.txt
-      - run: fc .\cpp.txt .\py.txt
-      - run: >
-          set PATH=.\build\openvino_genai\;%PATH%
-          && set "PYTHONPATH=./build/"
-          && call .\ov\setupvars.bat
-          && python samples\python\text_generation\lora.py .\TinyLlama\TinyLlama-1.1B-intermediate-step-1431k-3T\ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?"
-
-  cpp-chat_sample-ubuntu:
-    runs-on: ubuntu-24.04
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11
-      - name: Install OpenVINO
-        run: |
-          mkdir ./ov/
-          curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
-          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
-      - name: Build app
-        run: |
-          source ./ov/setupvars.sh
-          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-          cmake --build ./build/ --config Release -j
-      - name: Download and convert and model
-        run: |
-          source ./ov/setupvars.sh
-          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt
-          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-      - name: Compare
-        env:
-          PYTHONPATH: "./build"
-        run: |
-          source ./ov/setupvars.sh
-          printf 'What is 2 + 2?\nWhat is the previous answer?\nAdd 1 to it.\nSubtract 5 from it.\nWhy is the sun yellow?\nWhat was my first question?\n' > ./input.txt
-          timeout 30s ./build/samples/cpp/text_generation/chat_sample ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred.txt
-          python -c "
-          from transformers import AutoTokenizer, AutoModelForCausalLM
-          model_id = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
-          tokenizer = AutoTokenizer.from_pretrained(model_id)
-          model = AutoModelForCausalLM.from_pretrained(model_id)
-          prompts = ['What is 2 + 2?', 'What is the previous answer?', 'Add 1 to it.', 'Subtract 5 from it.', 'Why is the sun yellow?', 'What was my first question?']
-          def gen_prompt(prompt):
-              return {'role': 'user', 'content': prompt}
-          def gen_answer(answer):
-              return {'role': 'assistant', 'content': answer}
-          chat_history = []
-          chat_prompt = ''
-          output = open('ref.txt', 'w')
-          for prompt in prompts:
-              output.write('question:\n')
-              chat_history.append(gen_prompt(prompt))
-              chat_prompt = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
-              tokenized = tokenizer(chat_prompt, return_tensors='pt', add_special_tokens=False)
-              answer = model.generate(**tokenized, max_length=1000, do_sample=False)
-              answer_str = tokenizer.decode(answer[0, tokenized['input_ids'].numel():], skip_special_tokens=True)
-              chat_history.append(gen_answer(answer_str))
-              output.write(answer_str)
-              output.write('\n----------\n')
-          output.write('question:\n')
-          output.close()
-          "
-          diff pred.txt ref.txt
-          echo "Chat sample cpp" passed
-          timeout 30s ./samples/python/text_generation/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt
-          diff pred2.txt ref.txt
-          echo "Chat sample python" passed
-
-  visual_language_chat_sample-ubuntu-minicpm_v2_6:
-    runs-on: ubuntu-22.04-16-cores
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11
-      - uses: ./.github/actions/install_openvino
-        with:
-          ov_link: ${{ env.l_u22_ov_link }}
-      - uses: ./.github/actions/build_app
-        with:
-          build_target: 'visual_language_chat benchmark_vlm py_openvino_genai'
-      - uses: ./.github/actions/install_python_deps
-      - name: Download and convert tiny-random-minicpmv-2_6 model and an image
-        run: |
-          source ./ov/setupvars.sh
-          optimum-cli export openvino -m katuni4ka/tiny-random-minicpmv-2_6 tiny-random-minicpmv-2_6 --trust-remote-code --task image-text-to-text
-          mkdir images
-      - name: Generate images - tiny-random-minicpmv-2_6
-        shell: python
-        run: |
-          from PIL import Image
-          import numpy as np
-          import requests
-          res = 28, 28
-          lines = np.arange(res[0] * res[1] * 3, dtype=np.uint8) % 255
-          lines = lines.reshape([*res, 3])
-          lines_image = Image.fromarray(lines)
-          lines_image.save("images/lines.png")
-          cat = Image.open(requests.get("https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11", stream=True).raw).convert('RGB')
-          cat.save("images/cat.png")
-      - name: Run visual_language_chat C++ sample - tiny-random-minicpmv-2_6
-        run: >
-          set -o pipefail
-          && source ./ov/setupvars.sh
-          && ./build/samples/cpp/visual_language_chat/visual_language_chat ./tiny-random-minicpmv-2_6/ ./images/
-          <<< $'Describe the images?' | tee cpp.txt
-        timeout-minutes: 2
-      - name: Run benchmark_vlm C++ sample - tiny-random-minicpmv-2_6
-        run: >
-          set -o pipefail
-          && source ./ov/setupvars.sh
-          && ./build/samples/cpp/visual_language_chat/benchmark_vlm  -m ./tiny-random-minicpmv-2_6/ -i ./images/cat.png -n 3
-        timeout-minutes: 2
-      - name: Run visual_language_chat Python sample - tiny-random-minicpmv-2_6
-        run: >
-          set -o pipefail
-          && source ./ov/setupvars.sh
-          && ./samples/python/visual_language_chat/visual_language_chat.py ./tiny-random-minicpmv-2_6/ ./images/
-          <<< $'Describe the images?' | tee py.txt
-        env:
-          PYTHONPATH: "./build/"
-      - name: Run benchmark_vlm Python sample - tiny-random-minicpmv-2_6
-        run: >
-          set -o pipefail
-          && source ./ov/setupvars.sh
-          && ./samples/python/visual_language_chat/benchmark_vlm.py -m ./tiny-random-minicpmv-2_6/ -i ./images/cat.png -n 3
-        env:
-          PYTHONPATH: "./build/"
-      - name: Encode cpp.txt with Python encoding instead of terminal one
-        shell: python
-        run: |
-          with open("cpp.txt", "rb") as f:
-            content = f.read().decode("utf-8", "replace")
-          with open("cpp.txt", "wb") as f:
-            f.write(content.encode("utf-8"))
-      - run: diff cpp.txt py.txt
-      - name: Run visual_language_chat C++ sample with 2 prompts - tiny-random-minicpmv-2_6
-        run: >
-          set -o pipefail
-          && source ./ov/setupvars.sh
-          && ./build/samples/cpp/visual_language_chat/visual_language_chat ./tiny-random-minicpmv-2_6/ ./images/cat.png
-          <<< $'What is unusual on this image?\nGo on.' | tee cpp2.txt
-        timeout-minutes: 2
-      - name: Run visual_language_chat Python sample with 2 prompts - tiny-random-minicpmv-2_6
-        run: >
-          set -o pipefail
-          && source ./ov/setupvars.sh
-          && ./samples/python/visual_language_chat/visual_language_chat.py ./tiny-random-minicpmv-2_6/ ./images/cat.png
-          <<< $'What is unusual on this image?\nGo on.' | tee py2.txt
-        env:
-          PYTHONPATH: "./build/"
-      - name: Encode cpp2.txt with Python encoding instead of terminal one
-        shell: python
-        run: |
-          with open("cpp2.txt", "rb") as f:
-            content = f.read().decode("utf-8", "replace")
-          with open("cpp2.txt", "wb") as f:
-            f.write(content.encode("utf-8"))
-      - run: diff cpp2.txt py2.txt
-
-  visual_language_chat_sample-ubuntu-internvl2:
-    runs-on: ubuntu-22.04-16-cores
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11
-      - uses: ./.github/actions/install_openvino
-        with:
-          ov_link: ${{ env.l_u22_ov_link }}
-      - uses: ./.github/actions/build_app
-        with:
-          build_target: 'visual_language_chat py_openvino_genai'
-      - uses: ./.github/actions/install_python_deps
-      - name: Download and convert InternVL2 model
-        run: |
-          # Lowering transformers version, workaround for https://huggingface.co/OpenGVLab/InternVL2-1B/discussions/7
-          python -m pip install -U "transformers<4.45.0"
-          source ./ov/setupvars.sh
-          optimum-cli export openvino --model OpenGVLab/InternVL2-4B ./internvl2_4b_ov/ --trust-remote-code
-      - name: Download images
-        run: |
-          wget https://llava-vl.github.io/static/images/monalisa.jpg
-      - name: Run visual_language_chat C++ sample - InternVL2
-        run: >
-          source ./ov/setupvars.sh
-          && ./build/samples/cpp/visual_language_chat/visual_language_chat ./internvl2_4b_ov/ monalisa.jpg
-          <<< $'Who drew this painting?\nWhen did the painter live?'
-        timeout-minutes: 4
-
-  visual_language_chat_sample-ubuntu-qwen2vl:
-    runs-on: ubuntu-22.04-16-cores
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - uses: actions/setup-python@v4
-        with:
-          python-version: 3.11
-      - uses: ./.github/actions/install_openvino
-        with:
-          ov_link: ${{ env.l_u22_ov_link }}
-      - uses: ./.github/actions/build_app
-        with:
-          build_target: 'visual_language_chat py_openvino_genai'
-      - uses: ./.github/actions/install_python_deps
-      - name: Download and convert Qwen2VL model
-        run: |
-          source ./ov/setupvars.sh
-          optimum-cli export openvino --model Qwen/Qwen2-VL-2B-Instruct ./qwen2_vl_2b_ov/ --trust-remote-code
-      - name: Download images
-        run: |
-          wget https://llava-vl.github.io/static/images/monalisa.jpg
-      - name: Run visual_language_chat C++ sample - Qwen2VL
-        run: >
-          source ./ov/setupvars.sh
-          && ./build/samples/cpp/visual_language_chat/visual_language_chat ./qwen2_vl_2b_ov/ monalisa.jpg
-          <<< $'Who drew this painting?\nWhen did the painter live?'
-        timeout-minutes: 4
-
   cpp-continuous-batching-ubuntu:
     runs-on: ubuntu-22.04-8-cores
     defaults:
@@ -446,9 +159,7 @@ jobs:
 
   Overall_Status:
     name: ci/gha_overall_status_causal_lm
-    needs: [cpp-greedy_causal_lm-windows, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
-            visual_language_chat_sample-ubuntu-minicpm_v2_6, visual_language_chat_sample-ubuntu-internvl2,
-            cpp-continuous-batching-windows, cpp-continuous-batching-macos]
+    needs: [cpp-continuous-batching-ubuntu, cpp-continuous-batching-windows, cpp-continuous-batching-macos]
     if: ${{ always() }}
     runs-on: ubuntu-latest
     steps:
 
@@ -96,6 +96,9 @@ jobs:
         run: |
           optimum-cli export openvino --model ./tiny-random-qwen2 --task text-generation-with-past --weight-format fp16 ./ov_models/tiny-random-qwen2
           python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-qwen2/ -d cpu -n 1 --optimum -ic 10
+      - name: Test Prompt Lookup Decoding via GenAI
+        run: |
+          python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-qwen2 -p "Why is the Sun yellow?" -d cpu -n 1 --max_ngram_size 3 --num_assistant_tokens 5 -ic 20
       - name: Test Speculative Decoding via GenAI
         run: |
           optimum-cli export openvino --model ./tiny-random-qwen2 --task text-generation-with-past --weight-format int8 ./ov_models/tiny-random-qwen2-int8
 
@@ -323,17 +323,20 @@ jobs:
   genai_tests_wheel:
     name: Python (${{ matrix.test.name}}) Tests (wheel)
     needs: [ openvino_download, genai_build_wheel ]
-    timeout-minutes: 60
+    timeout-minutes: ${{ matrix.test.timeout }}
     strategy:
       fail-fast: false
       matrix:
         test:
           - name: 'Whisper'
             cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py'
+            timeout: 45
           - name: 'Cacheopt E2E'
             cmd: 'tests/python_tests/test_kv_cache_eviction.py'
+            timeout: 60
           - name: 'LLM & VLM'
             cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore tests/python_tests/test_kv_cache_eviction.py --ignore tests/python_tests/test_whisper_pipeline_static.py'
+            timeout: 90
     defaults:
       run:
         shell: bash
 
@@ -336,17 +336,20 @@ jobs:
   genai_tests_wheel:
     name: Python (${{ matrix.test.name}}) Tests (wheel)
     needs: [ openvino_download, genai_build_wheel ]
-    timeout-minutes: 60
+    timeout-minutes: ${{ matrix.test.timeout }}
     strategy:
       fail-fast: false
       matrix:
         test:
           - name: 'Whisper'
             cmd: 'tests/python_tests/test_whisper_pipeline.py'
+            timeout: 45
           - name: 'Cacheopt E2E'
             cmd: 'tests/python_tests/test_kv_cache_eviction.py'
+            timeout: 60 
           - name: 'LLM & VLM'
             cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore tests/python_tests/test_kv_cache_eviction.py --ignore tests/python_tests/test_whisper_pipeline_static.py'
+            timeout: 90
     defaults:
       run:
         shell: bash
@@ -355,8 +358,7 @@ jobs:
       INSTALL_DIR: ${{ github.workspace }}/install
       SRC_DIR: ${{ github.workspace }}/src
       BUILD_DIR: ${{ github.workspace }}/build
-      TRANSFORMERS_CACHE: ${{ github.workspace }}/models  # Hugging Face transformers cache
-      HF_HOME: ${{ github.workspace }}/datasets           # Hugging Face datasets cache
+      HF_HOME: ${{ github.workspace }}/hf_cache
 
     steps:
       - name: Clone openvino.genai
 
@@ -369,17 +369,20 @@ jobs:
   genai_tests_wheel:
     name: Python (${{ matrix.test.name}}) Tests (wheel)
     needs: [ openvino_download, genai_build_wheel ]
-    timeout-minutes: 60
+    timeout-minutes: ${{ matrix.test.timeout }}
     strategy:
       fail-fast: false
       matrix:
         test:
           - name: 'Whisper'
             cmd: 'tests/python_tests/test_whisper_pipeline.py tests/python_tests/test_whisper_pipeline_static.py'
+            timeout: 45
           - name: 'Cacheopt E2E'
             cmd: 'tests/python_tests/test_kv_cache_eviction.py'
+            timeout: 60
           - name: 'LLM & VLM'
             cmd: 'tests/python_tests --ignore tests/python_tests/test_whisper_pipeline.py --ignore tests/python_tests/test_whisper_pipeline_static.py --ignore tests/python_tests/test_kv_cache_eviction.py'
+            timeout: 90
     defaults:
       run:
         shell: pwsh
 
@@ -107,7 +107,7 @@ if(NOT DEFINED CPACK_ARCHIVE_COMPONENT_INSTALL)
 endif()
 set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
 # Workaround https://gitlab.kitware.com/cmake/cmake/-/issues/2614
-set(CPACK_COMPONENTS_ALL core_genai core_genai_dev cpp_samples_genai licensing_genai openvino_tokenizers openvino_tokenizers_docs)
+set(CPACK_COMPONENTS_ALL core_genai core_genai_dev core_c_genai core_c_genai_dev cpp_samples_genai licensing_genai openvino_tokenizers openvino_tokenizers_docs)
 if(ENABLE_PYTHON)
     list(APPEND CPACK_COMPONENTS_ALL pygenai_${Python3_VERSION_MAJOR}_${Python3_VERSION_MINOR})
 endif()