Skip to content

Commit 9bcadf7

Browse files
[Prompt lookup] (openvinotoolkit#1245)
*Description:* * Implementation of Prompt lookup decoding based on continuous batching pipeline (cb_promp_lookup_impl + prompt_lookup_impl) * Update `prompt_lookup_sample` to use new API * Update statistic to make of printing more usable *Ticket:* * https://jira.devtools.intel.com/browse/CVS-137987 *Example of usage:* * **Input:** `return 0;` * **Result Prompt lookup:** ``` =============================== Total duration, ms: 3.02267 Draft model duration, ms: 0.000724718 Main model duration, ms: 3.02195 Draft model duration, %: 0.0239761 Main model duration, %: 99.976 AVG acceptance rate, %: 10.8333 =============================== Request_id: 0 ||| 0 0 0 0 0 0 0 0 20 20 0 0 0 0 20 100 80 0 0 0 0 0 0 60 0 0 20 0 0 0 0 0 20 0 0 50 ``` * **Result Greedy:** ``` =============================== Total duration, ms: 3.18111 Draft model duration, ms: 1.538e-06 Main model duration, ms: 3.18111 Draft model duration, %: 4.83479e-05 Main model duration, %: 100 AVG acceptance rate, %: -nan =============================== ``` * **Speedup**: 100 Generated tokens: 5.24% && 300 Generated tokens: 81% (9.42 vs 5.19) --------- Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
1 parent 7d2a303 commit 9bcadf7

27 files changed

+606
-406
lines changed

.github/workflows/causal_lm_cpp.yml

+8-23
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,6 @@ jobs:
491491
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
492492
python -m pip install -r ./samples/requirements.txt
493493
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
494-
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat --task text-generation-with-past
495494
- name: run and compare
496495
run: |
497496
source ./ov/setupvars.sh
@@ -505,36 +504,22 @@ jobs:
505504
506505
./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
507506
./build/samples/cpp/text_generation/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt
507+
python ./samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_py.txt
508508
python -c "
509509
with open('predictions_greedy.txt', 'r') as f:
510510
predicted_greedy = f.readline()
511511
with open('predictions_prompt_lookup.txt', 'r') as f:
512512
predicted_prompt_lookup = f.readline()
513+
with open('predictions_py.txt', 'r') as f:
514+
predicted_prompt_lookup_py = f.readline()
513515
assert predicted_greedy == predicted_prompt_lookup
516+
assert predicted_greedy == predicted_prompt_lookup_py
517+
assert predicted_prompt_lookup == predicted_prompt_lookup_py
514518
"
515519
echo "Prompt lookup" passed
516-
- name: run and compare (model with seq_length_axis = 1)
517-
run: |
518-
source ./ov/setupvars.sh
519-
520-
echo 'Code:```python
521-
def add(a, b):
522-
return a + b
523-
```
524-
Question: Can you please add 2 and 3
525-
A:' > ./prompt.txt
526-
527-
./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
528-
./build/samples/cpp/text_generation/greedy_causal_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_greedy.txt
529-
python -c "
530-
with open('predictions_greedy.txt', 'r') as f:
531-
predicted_greedy = f.readline()
532-
with open('predictions_prompt_lookup.txt', 'r') as f:
533-
predicted_prompt_lookup = f.readline()
534-
assert predicted_greedy == predicted_prompt_lookup
535-
"
536-
echo "Prompt lookup" passed
537-
520+
env:
521+
PYTHONPATH: "./build/:$PYTHONPATH"
522+
LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH"
538523
cpp-Phi-1_5:
539524
runs-on: ubuntu-20.04-16-cores
540525
defaults:
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,23 @@
11
# Copyright (C) 2023-2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4-
find_package(OpenVINO REQUIRED COMPONENTS Runtime Threading)
5-
64
find_package(OpenVINOGenAI REQUIRED
75
PATHS
86
"${CMAKE_BINARY_DIR}" # Reuse the package from the build.
97
${OpenVINO_DIR} # GenAI may be installed alogside OpenVINO.
108
NO_CMAKE_FIND_ROOT_PATH
119
)
1210

13-
add_executable(prompt_lookup_decoding_lm prompt_lookup_decoding_lm.cpp)
14-
target_link_libraries(prompt_lookup_decoding_lm PRIVATE openvino::runtime openvino::threading)
15-
set_target_properties(prompt_lookup_decoding_lm PROPERTIES
16-
COMPILE_PDB_NAME prompt_lookup_decoding_lm
11+
set(TARGET_NAME prompt_lookup_decoding_lm)
12+
add_executable(${TARGET_NAME} ${TARGET_NAME}.cpp)
13+
target_link_libraries(${TARGET_NAME} PRIVATE openvino::genai)
14+
15+
set_target_properties(${TARGET_NAME} PROPERTIES
16+
COMPILE_PDB_NAME ${TARGET_NAME}
1717
# Ensure out of box LC_RPATH on macOS with SIP
1818
INSTALL_RPATH_USE_LINK_PATH ON)
19-
target_compile_features(prompt_lookup_decoding_lm PRIVATE cxx_std_17)
20-
21-
get_target_property(genai_imported openvino::genai IMPORTED_LOCATION)
22-
set(OPENVINO_TOKENIZERS_PATH $<IF:$<BOOL:${genai_imported}>,${genai_imported},$<TARGET_FILE_DIR:openvino::genai>>)
23-
set(OPENVINO_TOKENIZERS_FILENAME "${CMAKE_SHARED_LIBRARY_PREFIX}openvino_tokenizers${CMAKE_SHARED_LIBRARY_SUFFIX}")
24-
target_compile_definitions(prompt_lookup_decoding_lm PRIVATE
25-
OPENVINO_TOKENIZERS_PATH="${OPENVINO_TOKENIZERS_PATH}/${OPENVINO_TOKENIZERS_FILENAME}")
2619

27-
install(TARGETS prompt_lookup_decoding_lm
20+
install(TARGETS ${TARGET_NAME}
2821
RUNTIME DESTINATION samples_bin/
2922
COMPONENT samples_bin
3023
EXCLUDE_FROM_ALL)

0 commit comments

Comments
 (0)