diff --git a/site/docs/guides/chat-scenario.mdx b/site/docs/guides/chat-scenario.mdx new file mode 100644 index 0000000000..e2da1ac13c --- /dev/null +++ b/site/docs/guides/chat-scenario.mdx @@ -0,0 +1,82 @@ +--- +sidebar_position: 2 +title: Chat Scenario +--- + +# Using OpenVINO GenAI in Chat Scenario + +For chat applications, OpenVINO GenAI provides special optimizations to maintain conversation context and improve performance using KV-cache. + +Refer to the [Stateful Models vs Stateless Models](/docs/concepts/stateful-vs-stateless-models) for more information about KV-cache. + +:::tip +Use `start_chat()` and `finish_chat()` to properly manage the chat session's KV-cache. This improves performance by reusing context between messages. +::: + +:::info +Chat mode is supported for both `LLMPipeline` and `VLMPipeline`. +::: + +A simple chat example (with grouped beam search decoding): + + + + ```python showLineNumbers + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path, 'CPU') + + config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5} + pipe.set_generation_config(config) + + # highlight-next-line + pipe.start_chat() + while True: + try: + prompt = input('question:\n') + except EOFError: + break + answer = pipe.generate(prompt) + print('answer:\n') + print(answer) + print('\n----------\n') + # highlight-next-line + pipe.finish_chat() + ``` + + + ```cpp showLineNumbers + #include "openvino/genai/llm_pipeline.hpp" + #include + + int main(int argc, char* argv[]) { + std::string prompt; + + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + ov::genai::GenerationConfig config; + config.max_new_tokens = 100; + config.num_beam_groups = 3; + config.num_beams = 15; + config.diversity_penalty = 1.0f; + + // highlight-next-line + pipe.start_chat(); + std::cout << "question:\n"; + while (std::getline(std::cin, prompt)) { + std::cout << "answer:\n"; + auto answer = pipe.generate(prompt, config); + std::cout << answer << std::endl; + std::cout << "\n----------\n" + "question:\n"; + } + // highlight-next-line + pipe.finish_chat(); + } + ``` + + + +:::info +For more information, refer to the [Python](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/chat_sample.py) and [C++](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text_generation/chat_sample.cpp) chat samples. +::: diff --git a/site/docs/guides/model-preparation/convert-to-openvino.mdx b/site/docs/guides/model-preparation/convert-to-openvino.mdx index ad66f54722..14b9bee195 100644 --- a/site/docs/guides/model-preparation/convert-to-openvino.mdx +++ b/site/docs/guides/model-preparation/convert-to-openvino.mdx @@ -8,7 +8,8 @@ import UseCasesNote from './_use_cases_note.mdx'; # Convert Models to OpenVINO Format -This page explains how to convert various generative AI models from Hugging Face and ModelScope to OpenVINO IR format. Refer to the [Supported Models](../../supported-models/index.mdx) for a list of available models. +This page explains how to convert various generative AI models from [Hugging Face](https://huggingface.co/) and [ModelScope](https://modelscope.cn/) to OpenVINO IR format. +Refer to the [Supported Models](../../supported-models/index.mdx) for a list of available models. For downloading pre-converted models, see [Download Pre-Converted OpenVINO Models](./download-openvino-models.mdx). diff --git a/site/docs/guides/model-preparation/download-openvino-models.mdx b/site/docs/guides/model-preparation/download-openvino-models.mdx index fd514a9bff..d3cf2c9983 100644 --- a/site/docs/guides/model-preparation/download-openvino-models.mdx +++ b/site/docs/guides/model-preparation/download-openvino-models.mdx @@ -8,7 +8,7 @@ import UseCasesNote from './_use_cases_note.mdx'; # Download Pre-Converted OpenVINO Models OpenVINO GenAI allows to run different generative AI models (see [Supported Models](../../supported-models/index.mdx)). -While you can convert models from other frameworks (see [Convert Models to OpenVINO Format](./convert-to-openvino.mdx)), using pre-converted models can save time and effort. +While you can convert models from other frameworks (see [Convert Models to OpenVINO Format](./convert-to-openvino.mdx)), using pre-converted models from [Hugging Face](https://huggingface.co/) and [ModelScope](https://modelscope.cn/) can save time and effort. ## Download from Hugging Face diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_chat_scenario.mdx b/site/docs/guides/streaming.mdx similarity index 65% rename from site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_chat_scenario.mdx rename to site/docs/guides/streaming.mdx index 9a8f197117..550c36e256 100644 --- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_chat_scenario.mdx +++ b/site/docs/guides/streaming.mdx @@ -1,76 +1,16 @@ -### Using GenAI in Chat Scenario +--- +sidebar_position: 3 +--- -For chat applications, OpenVINO GenAI provides special optimizations to maintain conversation context and improve performance using KV-cache. - -:::tip -Use `start_chat()` and `finish_chat()` to properly manage the chat session's KV-cache. This improves performance by reusing context between messages. -::: - -A simple chat example (with grouped beam search decoding): - - - - ```python showLineNumbers - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path, 'CPU') - - config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5} - pipe.set_generation_config(config) - - # highlight-next-line - pipe.start_chat() - while True: - try: - prompt = input('question:\n') - except EOFError: - break - answer = pipe.generate(prompt) - print('answer:\n') - print(answer) - print('\n----------\n') - # highlight-next-line - pipe.finish_chat() - ``` - - - ```cpp showLineNumbers - #include "openvino/genai/llm_pipeline.hpp" - #include - - int main(int argc, char* argv[]) { - std::string prompt; - - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); - - ov::genai::GenerationConfig config; - config.max_new_tokens = 100; - config.num_beam_groups = 3; - config.num_beams = 15; - config.diversity_penalty = 1.0f; - - // highlight-next-line - pipe.start_chat(); - std::cout << "question:\n"; - while (std::getline(std::cin, prompt)) { - std::cout << "answer:\n"; - auto answer = pipe.generate(prompt, config); - std::cout << answer << std::endl; - std::cout << "\n----------\n" - "question:\n"; - } - // highlight-next-line - pipe.finish_chat(); - } - ``` - - - -#### Streaming the Output +# Streaming the Output For more interactive UIs during generation, you can stream output tokens. -##### Streaming Function +:::info +Streaming is supported for both `LLMPipeline` and `VLMPipeline`. +::: + +## Streaming Function In this example, a function outputs words to the console immediately upon generation: @@ -138,11 +78,7 @@ In this example, a function outputs words to the console immediately upon genera -:::info -For more information, refer to the [chat sample](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/chat_sample/). -::: - -##### Custom Streamer Class +## Custom Streamer Class You can also create your custom streamer for more sophisticated processing: @@ -210,7 +146,7 @@ You can also create your custom streamer for more sophisticated processing: int main(int argc, char* argv[]) { std::string prompt; // highlight-next-line - CustomStreamer custom_streamer; + std::shared_ptr custom_streamer; std::string model_path = argv[1]; ov::genai::LLMPipeline pipe(model_path, "CPU"); @@ -232,5 +168,5 @@ You can also create your custom streamer for more sophisticated processing: :::info -For fully implemented iterable CustomStreamer refer to [multinomial_causal_lm](https://github.com/openvinotoolkit/openvino.genai/blob/releases/2025/0/samples/python/text_generation/multinomial_causal_lm.py) sample. +For fully implemented iterable `CustomStreamer` refer to [multinomial_causal_lm](https://github.com/openvinotoolkit/openvino.genai/blob/releases/2025/0/samples/python/text_generation/multinomial_causal_lm.py) sample. ::: diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_generation_parameters.mdx b/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_generation_parameters.mdx deleted file mode 100644 index edf4313be6..0000000000 --- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_generation_parameters.mdx +++ /dev/null @@ -1,127 +0,0 @@ -### Use Different Generation Parameters - -Fine-tune your LLM's output by adjusting various generation parameters. -OpenVINO GenAI supports multiple sampling strategies and generation configurations to help you achieve the desired balance between deterministic and creative outputs. - -#### Basic Generation Configuration - -1. Get the model default config with `get_generation_config()` -2. Modify parameters -3. Apply the updated config: - - Use `set_generation_config(config)` - - Pass config directly to `generate()` (e.g. `generate(prompt, config)`) - - Specify options as inputs in the `generate()` method (e.g. `generate(prompt, max_new_tokens=100)`) - - - - ```python - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path, "CPU") - - # Get default configuration - config = pipe.get_generation_config() - - # Modify parameters - config.max_new_tokens = 100 - config.temperature = 0.7 - config.top_k = 50 - config.top_p = 0.9 - config.repetition_penalty = 1.2 - - # Generate text with custom configuration - output = pipe.generate("The Sun is yellow because", config) - ``` - - - ```cpp - int main() { - ov::genai::LLMPipeline pipe(model_path, "CPU"); - - // Get default configuration - auto config = pipe.get_generation_config(); - - // Modify parameters - config.max_new_tokens = 100; - config.temperature = 0.7f; - config.top_k = 50; - config.top_p = 0.9f; - config.repetition_penalty = 1.2f; - - // Generate text with custom configuration - auto output = pipe.generate("The Sun is yellow because", config); - } - ``` - - - -:::info Understanding Basic Generation Parameters - -- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`. -- `temperature`: Controls the level of creativity in AI-generated text: - - Low temperature (e.g. 0.2) leads to more focused and deterministic output, choosing tokens with the highest probability. - - Medium temperature (e.g. 1.0) maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias. - - High temperature (e.g. 2.0) makes output more creative and adventurous, increasing the chances of selecting less likely tokens. -- `top_k`: Limits token selection to the k most likely next tokens. Higher values allow more diverse outputs. -- `top_p`: Selects from the smallest set of tokens whose cumulative probability exceeds p. Helps balance diversity and quality. -- `repetition_penalty`: Reduces the likelihood of repeating tokens. Values above 1.0 discourage repetition. - -For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig). - -::: - - -#### Optimizing Generation with Grouped Beam Search - -Beam search helps explore multiple possible text completions simultaneously, often leading to higher quality outputs. - - - - ```python - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(model_path, "CPU") - - # Get default generation config - config = pipe.get_generation_config() - - # Modify parameters - config.max_new_tokens = 256 - config.num_beams = 15 - config.num_beam_groups = 3 - config.diversity_penalty = 1.0 - - # Generate text with custom configuration - print(pipe.generate("The Sun is yellow because", config)) - ``` - - - ```cpp - int main(int argc, char* argv[]) { - std::string model_path = argv[1]; - ov::genai::LLMPipeline pipe(model_path, "CPU"); - - // Get default generation config - ov::genai::GenerationConfig config = pipe.get_generation_config(); - - // Modify parameters - config.max_new_tokens = 256; - config.num_beams = 15; - config.num_beam_groups = 3; - config.diversity_penalty = 1.0f; - - // Generate text with custom configuration - cout << pipe.generate("The Sun is yellow because", config); - } - ``` - - - -:::info Understanding Beam Search Generation Parameters - -- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`. -- `num_beams`: The number of beams for beam search. 1 disables beam search. -- `num_beam_groups`: The number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. -- `diversity_penalty`: value is subtracted from a beam's score if it generates the same token as any beam from other group at a particular time. - -For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig). - -::: diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/index.mdx b/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/index.mdx deleted file mode 100644 index f47f23e1b2..0000000000 --- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/index.mdx +++ /dev/null @@ -1,18 +0,0 @@ -import ChatScenario from './_chat_scenario.mdx'; -import GenerationParameters from './_generation_parameters.mdx'; -import LoraAdapters from './_lora_adapters.mdx'; -import SpeculativeDecoding from './_speculative_decoding.mdx'; - -## Additional Usage Options - -:::tip -Check out our [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp) samples. -::: - - - - - - - - diff --git a/site/docs/use-cases/1-LLM-pipeline/index.mdx b/site/docs/use-cases/1-LLM-pipeline/index.mdx deleted file mode 100644 index 7b394ef719..0000000000 --- a/site/docs/use-cases/1-LLM-pipeline/index.mdx +++ /dev/null @@ -1,14 +0,0 @@ ---- -sidebar_position: 1 ---- -import ConvertModelSection from './_sections/_convert_model.mdx'; -import RunModelSection from './_sections/_run_model/index.mdx'; -import UsageOptionsSection from './_sections/_usage_options/index.mdx'; - -# Text Generation Using LLMs - - - - - - diff --git a/site/docs/use-cases/2-Image-Generation/index.mdx b/site/docs/use-cases/2-Image-Generation/index.mdx deleted file mode 100644 index a08ae53bb7..0000000000 --- a/site/docs/use-cases/2-Image-Generation/index.mdx +++ /dev/null @@ -1,14 +0,0 @@ ---- -sidebar_position: 1 ---- -import ConvertModelSection from '../1-LLM-pipeline/_sections/_convert_model.mdx'; -import RunModelSection from './_sections/_run_model/index.mdx'; -import UsageOptionsSection from './_sections/_usage_options/index.mdx'; - -# Image Generation Using Diffusers - - - - - - diff --git a/site/docs/use-cases/3-Processing-speech-whisper.md b/site/docs/use-cases/3-Processing-speech-whisper.md deleted file mode 100644 index 8d151624f6..0000000000 --- a/site/docs/use-cases/3-Processing-speech-whisper.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -sidebar_position: 1 ---- - -# Processing Speech Using Whisper diff --git a/site/docs/use-cases/4-Processing-images-using-VLMs.md b/site/docs/use-cases/4-Processing-images-using-VLMs.md deleted file mode 100644 index 0cb1438342..0000000000 --- a/site/docs/use-cases/4-Processing-images-using-VLMs.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -sidebar_position: 1 ---- - -# Processing Images Using VLMs diff --git a/site/docs/use-cases/_shared/_basic_generation_configuration.mdx b/site/docs/use-cases/_shared/_basic_generation_configuration.mdx new file mode 100644 index 0000000000..021808897e --- /dev/null +++ b/site/docs/use-cases/_shared/_basic_generation_configuration.mdx @@ -0,0 +1,26 @@ +#### Basic Generation Configuration + +1. Get the model default config with `get_generation_config()` +2. Modify parameters +3. Apply the updated config using one of the following methods: + - Use `set_generation_config(config)` + - Pass config directly to `generate()` (e.g. `generate(prompt, config)`) + - Specify options as inputs in the `generate()` method (e.g. `generate(prompt, max_new_tokens=100)`) + +{/* Python and C++ code examples */} +{props.children} + +:::info Understanding Basic Generation Parameters + +- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`. +- `temperature`: Controls the level of creativity in AI-generated text: + - Low temperature (e.g. 0.2) leads to more focused and deterministic output, choosing tokens with the highest probability. + - Medium temperature (e.g. 1.0) maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias. + - High temperature (e.g. 2.0) makes output more creative and adventurous, increasing the chances of selecting less likely tokens. +- `top_k`: Limits token selection to the k most likely next tokens. Higher values allow more diverse outputs. +- `top_p`: Selects from the smallest set of tokens whose cumulative probability exceeds p. Helps balance diversity and quality. +- `repetition_penalty`: Reduces the likelihood of repeating tokens. Values above 1.0 discourage repetition. + +For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig). + +::: diff --git a/site/docs/use-cases/_shared/_chat_scenario.mdx b/site/docs/use-cases/_shared/_chat_scenario.mdx new file mode 100644 index 0000000000..fbf5b36e45 --- /dev/null +++ b/site/docs/use-cases/_shared/_chat_scenario.mdx @@ -0,0 +1,3 @@ +### Using OpenVINO GenAI in Chat Scenario + +Refer to the [Chat Scenario](/docs/guides/chat-scenario) guide for more information on using OpenVINO GenAI in chat applications. diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_convert_model.mdx b/site/docs/use-cases/_shared/_convert_model.mdx similarity index 73% rename from site/docs/use-cases/1-LLM-pipeline/_sections/_convert_model.mdx rename to site/docs/use-cases/_shared/_convert_model.mdx index 9867d52fda..8d6e1153c3 100644 --- a/site/docs/use-cases/1-LLM-pipeline/_sections/_convert_model.mdx +++ b/site/docs/use-cases/_shared/_convert_model.mdx @@ -1,3 +1,8 @@ ## Convert and Optimize Model +{/* optimum-cli export code examples */} +{props.children} + +:::info Refer to the [Model Preparation](/docs/category/model-preparation) guide for detailed instructions on how to download, convert and optimize models for OpenVINO GenAI. +::: diff --git a/site/docs/use-cases/_shared/_streaming.mdx b/site/docs/use-cases/_shared/_streaming.mdx new file mode 100644 index 0000000000..f09a14c266 --- /dev/null +++ b/site/docs/use-cases/_shared/_streaming.mdx @@ -0,0 +1,3 @@ +### Streaming the Output + +Refer to the [Streaming](/docs/guides/streaming) guide for more information on streaming the output with OpenVINO GenAI. diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_cpp.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx similarity index 100% rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_cpp.mdx rename to site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_python.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx similarity index 100% rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_python.mdx rename to site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_cpp.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx similarity index 100% rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_cpp.mdx rename to site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_python.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx similarity index 100% rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_python.mdx rename to site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_cpp.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx similarity index 100% rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_cpp.mdx rename to site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_python.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx similarity index 100% rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_python.mdx rename to site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/index.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx similarity index 85% rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/index.mdx rename to site/docs/use-cases/image-generation/_sections/_run_model/index.mdx index f342482981..aab59d23da 100644 --- a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/index.mdx +++ b/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx @@ -10,11 +10,9 @@ import InpaintingPython from './_inpainting_python.mdx'; ## Run Model Using OpenVINO GenAI OpenVINO GenAI supports the following diffusion model pipelines: -- `Text2ImagePipeline` for creating images from text prompts. -- `Image2ImagePipeline` for modifying existing images based on prompts. -- `InpaintingPipeline` for selectively replacing portions of images using masks. - -See all supported [image generation models](/docs/supported-models/#image-generation-models). +- [`Text2ImagePipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.Text2ImagePipeline.html) for creating images from text prompts. +- [`Image2ImagePipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.Image2ImagePipeline.html) for modifying existing images based on prompts. +- [`InpaintingPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.InpaintingPipeline.html) for selectively replacing portions of images using masks. ### `Text2ImagePipeline` diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx similarity index 90% rename from site/docs/use-cases/2-Image-Generation/_sections/_usage_options/index.mdx rename to site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx index 251bde4bf9..5439e56101 100644 --- a/site/docs/use-cases/2-Image-Generation/_sections/_usage_options/index.mdx +++ b/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx @@ -1,7 +1,7 @@ ## Additional Usage Options :::tip -Check out our [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp) samples. +Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/image_generation) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/image_generation) image generation samples. ::: ### Use Different Generation Parameters @@ -65,7 +65,7 @@ You can adjust several parameters to control the image generation process, inclu - `guidance_scale`: Balances prompt adherence vs. creativity. Higher values follow prompt more strictly, lower values allow more creative freedom. - `rng_seed`: Controls randomness for reproducible results. Same seed produces identical images across runs. -For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.Text2ImagePipeline.html#openvino_genai.Text2ImagePipeline.generate). +For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.ImageGenerationConfig.html). ::: diff --git a/site/docs/use-cases/image-generation/index.mdx b/site/docs/use-cases/image-generation/index.mdx new file mode 100644 index 0000000000..6dbf946c73 --- /dev/null +++ b/site/docs/use-cases/image-generation/index.mdx @@ -0,0 +1,21 @@ +--- +sidebar_position: 2 +--- +import OptimumCLI from '@site/src/components/OptimumCLI'; +import ConvertModelSection from '../_shared/_convert_model.mdx'; +import RunModelSection from './_sections/_run_model/index.mdx'; +import UsageOptionsSection from './_sections/_usage_options/index.mdx'; + +# Image Generation Using Diffusers + + + Download and convert model (e.g. [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)) to OpenVINO format from Hugging Face: + + + + See all supported [Image Generation Models](/docs/supported-models/#image-generation-models). + + + + + diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx new file mode 100644 index 0000000000..c10c8be4ca --- /dev/null +++ b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx @@ -0,0 +1,21 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`#include "openvino/genai/visual_language/pipeline.hpp" +#include "load_image.hpp" +#include + +int main(int argc, char* argv[]) { + std::string models_path = argv[1], images_path = argv[2];; + std::vector images = utils::load_images(images_path); + + ov::genai::VLMPipeline pipe(models_path, "${props.device || 'CPU'}"); + ov::genai::VLMDecodedResults result = pipe.generate( + prompt, + ov::genai::images(images), + ov::genai::max_new_tokens(100) + ); + std::cout << result.texts[0] << std::endl; +} +`} + diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx new file mode 100644 index 0000000000..3e308b555a --- /dev/null +++ b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx @@ -0,0 +1,27 @@ +import CodeBlock from '@theme/CodeBlock'; + + +{`import openvino_genai as ov_genai +import openvino as ov +from PIL import Image +import numpy as np +from pathlib import Path + +def read_image(path: str) -> ov.Tensor: + pic = Image.open(path).convert("RGB") + image_data = np.array(pic)[None] + return ov.Tensor(image_data) + +def read_images(path: str) -> list[ov.Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + +images = read_images("./images") + +pipe = ov_genai.VLMPipeline(model_path, "${props.device || 'CPU'}") +result = pipe.generate(prompt, images=images, max_new_tokens=100) +print(result.texts[0]) +`} + diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx new file mode 100644 index 0000000000..b5082eb1ef --- /dev/null +++ b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx @@ -0,0 +1,36 @@ +import CodeExampleCPP from './_code_example_cpp.mdx'; +import CodeExamplePython from './_code_example_python.mdx'; + +## Run Model Using OpenVINO GenAI + +OpenVINO GenAI introduces the [`VLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.VLMPipeline.html) pipeline for inference of multimodal text-generation Vision Language Models (VLMs). +It can generate text from a text prompt and images as inputs. + + + + + + + + + + + + + + + + + + + + + + + + +:::tip + +Use CPU or GPU as devices without any other code change. + +::: diff --git a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx new file mode 100644 index 0000000000..e8ab518763 --- /dev/null +++ b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx @@ -0,0 +1,61 @@ +import BasicGenerationConfiguration from '@site/docs/use-cases/_shared/_basic_generation_configuration.mdx'; +import ChatScenario from '@site/docs/use-cases/_shared/_chat_scenario.mdx'; +import Streaming from '@site/docs/use-cases/_shared/_streaming.mdx'; + +## Additional Usage Options + +:::tip +Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) visual language chat samples. +::: + +### Use Different Generation Parameters + +Similar to [text generation](/docs/use-cases/text-generation/#use-different-generation-parameters), VLM pipelines support various generation parameters to control the text output. + + + + + ```python + import openvino_genai as ov_genai + pipe = ov_genai.VLMPipeline(model_path, "CPU") + + # Get default configuration + config = pipe.get_generation_config() + + # Modify parameters + config.max_new_tokens = 100 + config.temperature = 0.7 + config.top_k = 50 + config.top_p = 0.9 + config.repetition_penalty = 1.2 + + # Generate text with custom configuration + output = pipe.generate(prompt, images, config) + ``` + + + ```cpp + int main() { + ov::genai::VLMPipeline pipe(model_path, "CPU"); + + // Get default configuration + auto config = pipe.get_generation_config(); + + // Modify parameters + config.max_new_tokens = 100; + config.temperature = 0.7f; + config.top_k = 50; + config.top_p = 0.9f; + config.repetition_penalty = 1.2f; + + // Generate text with custom configuration + auto output = pipe.generate(prompt, images, config); + } + ``` + + + + + + + diff --git a/site/docs/use-cases/image-processing/index.mdx b/site/docs/use-cases/image-processing/index.mdx new file mode 100644 index 0000000000..b6605e9242 --- /dev/null +++ b/site/docs/use-cases/image-processing/index.mdx @@ -0,0 +1,21 @@ +--- +sidebar_position: 4 +--- +import OptimumCLI from '@site/src/components/OptimumCLI'; +import ConvertModelSection from '../_shared/_convert_model.mdx'; +import RunModelSection from './_sections/_run_model/index.mdx'; +import UsageOptionsSection from './_sections/_usage_options/index.mdx'; + +# Image Processing Using VLMs + + + Download and convert model (e.g. [openbmb/MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)) to OpenVINO format from Hugging Face: + + + + See all supported [Visual Language Models](/docs/supported-models/#visual-language-models-vlms). + + + + + diff --git a/site/docs/use-cases/speech-processing.md b/site/docs/use-cases/speech-processing.md new file mode 100644 index 0000000000..9f1fceb05f --- /dev/null +++ b/site/docs/use-cases/speech-processing.md @@ -0,0 +1,5 @@ +--- +sidebar_position: 3 +--- + +# Speech Processing Using Whisper diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_cpp.mdx b/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx similarity index 100% rename from site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_cpp.mdx rename to site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_python.mdx b/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx similarity index 100% rename from site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_python.mdx rename to site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/index.mdx b/site/docs/use-cases/text-generation/_sections/_run_model/index.mdx similarity index 81% rename from site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/index.mdx rename to site/docs/use-cases/text-generation/_sections/_run_model/index.mdx index a208e25ebd..72b526b76a 100644 --- a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/index.mdx +++ b/site/docs/use-cases/text-generation/_sections/_run_model/index.mdx @@ -3,7 +3,7 @@ import CodeExamplePython from './_code_example_python.mdx'; ## Run Model Using OpenVINO GenAI -`LLMPipeline` is the main object used for decoding. You can construct it straight away from the folder with the converted model. +[`LLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.LLMPipeline.html) is the main object used for decoding. You can construct it straight away from the folder with the converted model. It will automatically load the main model, tokenizer, detokenizer and default generation configuration. diff --git a/site/docs/use-cases/text-generation/_sections/_usage_options/_generation_parameters.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/_generation_parameters.mdx new file mode 100644 index 0000000000..112a0e6935 --- /dev/null +++ b/site/docs/use-cases/text-generation/_sections/_usage_options/_generation_parameters.mdx @@ -0,0 +1,106 @@ +import BasicGenerationConfiguration from '@site/docs/use-cases/_shared/_basic_generation_configuration.mdx'; + +### Use Different Generation Parameters + +Fine-tune your LLM's output by adjusting various generation parameters. +OpenVINO GenAI supports multiple sampling strategies and generation configurations to help you achieve the desired balance between deterministic and creative outputs. + + + + + ```python + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path, "CPU") + + # Get default configuration + config = pipe.get_generation_config() + + # Modify parameters + config.max_new_tokens = 100 + config.temperature = 0.7 + config.top_k = 50 + config.top_p = 0.9 + config.repetition_penalty = 1.2 + + # Generate text with custom configuration + output = pipe.generate("The Sun is yellow because", config) + ``` + + + ```cpp + int main() { + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + // Get default configuration + auto config = pipe.get_generation_config(); + + // Modify parameters + config.max_new_tokens = 100; + config.temperature = 0.7f; + config.top_k = 50; + config.top_p = 0.9f; + config.repetition_penalty = 1.2f; + + // Generate text with custom configuration + auto output = pipe.generate("The Sun is yellow because", config); + } + ``` + + + + +#### Optimizing Generation with Grouped Beam Search + +Beam search helps explore multiple possible text completions simultaneously, often leading to higher quality outputs. + + + + ```python + import openvino_genai as ov_genai + pipe = ov_genai.LLMPipeline(model_path, "CPU") + + # Get default generation config + config = pipe.get_generation_config() + + # Modify parameters + config.max_new_tokens = 256 + config.num_beams = 15 + config.num_beam_groups = 3 + config.diversity_penalty = 1.0 + + # Generate text with custom configuration + print(pipe.generate("The Sun is yellow because", config)) + ``` + + + ```cpp + int main(int argc, char* argv[]) { + std::string model_path = argv[1]; + ov::genai::LLMPipeline pipe(model_path, "CPU"); + + // Get default generation config + ov::genai::GenerationConfig config = pipe.get_generation_config(); + + // Modify parameters + config.max_new_tokens = 256; + config.num_beams = 15; + config.num_beam_groups = 3; + config.diversity_penalty = 1.0f; + + // Generate text with custom configuration + cout << pipe.generate("The Sun is yellow because", config); + } + ``` + + + +:::info Understanding Beam Search Generation Parameters + +- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`. +- `num_beams`: The number of beams for beam search. 1 disables beam search. +- `num_beam_groups`: The number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. +- `diversity_penalty`: value is subtracted from a beam's score if it generates the same token as any beam from other group at a particular time. + +For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig). + +::: diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_lora_adapters.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/_lora_adapters.mdx similarity index 100% rename from site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_lora_adapters.mdx rename to site/docs/use-cases/text-generation/_sections/_usage_options/_lora_adapters.mdx diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_speculative_decoding.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/_speculative_decoding.mdx similarity index 93% rename from site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_speculative_decoding.mdx rename to site/docs/use-cases/text-generation/_sections/_usage_options/_speculative_decoding.mdx index 6fdb6227f8..0709cafecc 100644 --- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_speculative_decoding.mdx +++ b/site/docs/use-cases/text-generation/_sections/_usage_options/_speculative_decoding.mdx @@ -101,5 +101,5 @@ This reduces the number of infer requests to the main model, increasing performa :::info -For more information, refer to the [Speculative Decoding sample](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/speculative_decoding_lm/). +For more information, refer to [Python](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/speculative_decoding_lm.py) and [C++](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text_generation/speculative_decoding_lm.cpp) speculative decoding samples. ::: diff --git a/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx new file mode 100644 index 0000000000..35dd1ca5bd --- /dev/null +++ b/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx @@ -0,0 +1,21 @@ +import ChatScenario from '@site/docs/use-cases/_shared/_chat_scenario.mdx'; +import Streaming from '@site/docs/use-cases/_shared/_streaming.mdx'; +import GenerationParameters from './_generation_parameters.mdx'; +import LoraAdapters from './_lora_adapters.mdx'; +import SpeculativeDecoding from './_speculative_decoding.mdx'; + +## Additional Usage Options + +:::tip +Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/text_generation) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/text_generation) text generation samples. +::: + + + + + + + + + + diff --git a/site/docs/use-cases/text-generation/index.mdx b/site/docs/use-cases/text-generation/index.mdx new file mode 100644 index 0000000000..6ebbff82f1 --- /dev/null +++ b/site/docs/use-cases/text-generation/index.mdx @@ -0,0 +1,21 @@ +--- +sidebar_position: 1 +--- +import OptimumCLI from '@site/src/components/OptimumCLI'; +import ConvertModelSection from '../_shared/_convert_model.mdx'; +import RunModelSection from './_sections/_run_model/index.mdx'; +import UsageOptionsSection from './_sections/_usage_options/index.mdx'; + +# Text Generation Using LLMs + + + Download and convert model (e.g. [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0)) to OpenVINO format from Hugging Face: + + + + See all supported [Large Language Models](/docs/supported-models/#large-language-models-llms). + + + + + diff --git a/site/src/components/image-generation.tsx b/site/src/components/image-generation.tsx index 3e7fce1681..10d785d78a 100644 --- a/site/src/components/image-generation.tsx +++ b/site/src/components/image-generation.tsx @@ -6,8 +6,8 @@ import { SectionImage } from './Section/section-image'; import ImagePlaceholder from '@site/static/img/image-generation-placeholder.webp'; -import CodeExampleCpp from '@site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_cpp.mdx'; -import CodeExamplePython from '@site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_python.mdx'; +import CodeExampleCpp from '@site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx'; +import CodeExamplePython from '@site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx'; const FEATURES = [ 'Alter parameters (width, height, iterations) and compile model for static size', @@ -39,8 +39,8 @@ export const ImageGeneration = () => {
- - + + ); diff --git a/site/src/components/image-processing.tsx b/site/src/components/image-processing.tsx index f69cd5c2d0..359f210b67 100644 --- a/site/src/components/image-processing.tsx +++ b/site/src/components/image-processing.tsx @@ -2,54 +2,18 @@ import { ExploreCodeSamples } from '@site/src/components/GoToLink/explore-code-s import { GoToDocumentation } from '@site/src/components/GoToLink/go-to-documentation'; import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/LanguageTabs'; import { Section } from '@site/src/components/Section'; -import CodeBlock from '@theme/CodeBlock'; import ImagePlaceholder from '@site/static/img/image-generation-placeholder.webp'; +import CodeExampleCpp from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx'; +import CodeExamplePython from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx'; + const FEATURES = [ 'Use different generation parameters (sampling types, etc.)', 'Optimize for chat scenarios by using chat mode', 'Pass multiple images to a model', ]; -const pythonCodeBlock = ( - - {`import numpy as np -import openvino as ov -import openvino_genai as ov_genai -from PIL import Image - -# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU -pipe = ov_genai.VLMPipeline("./MiniCPM-V-2_6/", "CPU") - -image = Image.open("dog.jpg") -image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8) -image_data = ov.Tensor(image_data) - -prompt = "Can you describe the image?" -print(pipe.generate(prompt, image=image_data, max_new_tokens=100))`} - -); - -const cppCodeBlock = ( - - {`#include "load_image.hpp" -#include -#include - -int main(int argc, char* argv[]) { - std::string models_path = argv[1]; - ov::genai::VLMPipeline pipe(models_path, "CPU"); - ov::Tensor rgb = utils::load_image(argv[2]); - std::cout << pipe.generate( - prompt, - ov::genai::image(rgb), - ov::genai::max_new_tokens(100) - ) << '\\n'; -}`} - -); - export const ImageProcessing = () => { return ( @@ -68,12 +32,16 @@ export const ImageProcessing = () => {
- {pythonCodeBlock} - {cppCodeBlock} + + + + + +
- - + +
); diff --git a/site/src/components/speech-to-text.tsx b/site/src/components/speech-to-text.tsx index 09767b6645..a092e3d6a5 100644 --- a/site/src/components/speech-to-text.tsx +++ b/site/src/components/speech-to-text.tsx @@ -68,8 +68,8 @@ export const SpeechToText = () => { {cppCodeBlock}
- - + + ); diff --git a/site/src/components/text-generation.tsx b/site/src/components/text-generation.tsx index 0b79b92d3b..235cda6b62 100644 --- a/site/src/components/text-generation.tsx +++ b/site/src/components/text-generation.tsx @@ -6,8 +6,8 @@ import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/La import ImagePlaceholder from '@site/static/img/image-generation-placeholder.webp'; -import CodeExampleCpp from '@site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_cpp.mdx'; -import CodeExamplePython from '@site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_python.mdx'; +import CodeExampleCpp from '@site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx'; +import CodeExamplePython from '@site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx'; // TODO Consider moving to mdx const FEATURES = [ @@ -40,8 +40,8 @@ export const TextGeneration = () => {
- - + + );