Applied comments.

popovaan · popovaan · commit 6877837637a4 · 2024-10-07T17:06:17.000+02:00
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -717,7 +717,7 @@ jobs:
           source ./ov/setupvars.sh
           export PYTHONPATH=./build/:$PYTHONPATH
           printf 'What is on the image?\nWhat is special on the image?\n' > ./input.txt
-          timeout 300s python ./samples/python/vlm_chat_sample/vlm_chat_sample.py ./miniCPM-V-2_6/ d5fbbd1a-d484-415c-88cb-9986625b7b11 < input.txt > ./pred.txt || ( [[ $? -eq 124 ]] && \
+          timeout 120s python ./samples/python/vlm_chat_sample/vlm_chat_sample.py ./miniCPM-V-2_6/ d5fbbd1a-d484-415c-88cb-9986625b7b11 < input.txt > ./pred.txt || ( [[ $? -eq 124 ]] && \
           echo "Timeout reached, but it's excpected." )
 
   cpp-continuous-batching-ubuntu:
diff --git a/samples/python/vlm_chat_sample/README.md b/samples/python/vlm_chat_sample/README.md
@@ -1,6 +1,6 @@
 # Python vlm_chat_sample that supports VLM models
 
-This example showcases inference of text-generation Vision Language Models (VLMs): `miniCPM-V-2_6` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.VLMPipeline` and configures it for the chat scenario. #TODO: add a link to notebook when available
+This example showcases inference of text-generation Vision Language Models (VLMs): `miniCPM-V-2_6` and other models with the same signature. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.VLMPipeline` and configures it for the chat scenario. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/minicpm-v-multimodal-chatbot) which provides an example of Visual-language assistant.
 
 ## Download and convert the model and tokenizers
 
@@ -14,7 +14,7 @@ pip install --upgrade-strategy eager -r ../../requirements.txt
 # TODO: add optimum cli command for miniCPM-V-2_6 when available
 
 ## Run:
-https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 can be used as a sample image.
+[This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image.
 
 `vlm_chat_sample.py ./miniCPM-V-2_6/ 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`
 
diff --git a/samples/requirements.txt b/samples/requirements.txt
@@ -3,4 +3,5 @@ optimum[openvino]==1.22.0
 einops==0.8.0  # For Qwen
 transformers_stream_generator==0.0.5  # For Qwen
 diffusers==0.30.3
-torchvision
+pillow
+torchvision # needed for mini-CPM export script. Need to remove when we switch to exporting with optimum-intel.
diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
@@ -109,14 +109,13 @@ void init_vlm_pipeline(py::module_& m) {
             ScopedVar env_manager(utils::ov_tokenizers_module_path());
             return std::make_unique<ov::genai::VLMPipeline>(model_path, device, utils::properties_to_any_map(config));
         }),
-        py::arg("model_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files", 
+        py::arg("model_path"), "folder with exported model files", 
         py::arg("device") = "CPU", "device on which inference will be done",
         py::arg("config") = ov::AnyMap({}), "openvino.properties map"
         R"(
             VLMPipeline class constructor.
-            model_path (str): Path to the model file.
+            model_path (str): Path to the folder with exported model files.
             device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
-            Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
         )")
 
         .def(py::init([](
@@ -134,10 +133,9 @@ void init_vlm_pipeline(py::module_& m) {
         py::arg("config") = ov::AnyMap({}), "openvino.properties map"
         R"(
             VLMPipeline class constructor for manualy created openvino_genai.Tokenizer.
-            model_path (str): Path to the model file.
+            model_path (str): Path to the folder with exported model files.
             tokenizer (openvino_genai.Tokenizer): tokenizer object.
             device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
-            Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
         )")
         .def("start_chat", &ov::genai::VLMPipeline::start_chat)
         .def("finish_chat", &ov::genai::VLMPipeline::finish_chat)