diff --git a/site/docs/guides/chat-scenario.mdx b/site/docs/guides/chat-scenario.mdx
new file mode 100644
index 0000000000..e2da1ac13c
--- /dev/null
+++ b/site/docs/guides/chat-scenario.mdx
@@ -0,0 +1,82 @@
+---
+sidebar_position: 2
+title: Chat Scenario
+---
+
+# Using OpenVINO GenAI in Chat Scenario
+
+For chat applications, OpenVINO GenAI provides special optimizations to maintain conversation context and improve performance using KV-cache.
+
+Refer to the [Stateful Models vs Stateless Models](/docs/concepts/stateful-vs-stateless-models) for more information about KV-cache.
+
+:::tip
+Use `start_chat()` and `finish_chat()` to properly manage the chat session's KV-cache. This improves performance by reusing context between messages.
+:::
+
+:::info
+Chat mode is supported for both `LLMPipeline` and `VLMPipeline`.
+:::
+
+A simple chat example (with grouped beam search decoding):
+
+
+
+ ```python showLineNumbers
+ import openvino_genai as ov_genai
+ pipe = ov_genai.LLMPipeline(model_path, 'CPU')
+
+ config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5}
+ pipe.set_generation_config(config)
+
+ # highlight-next-line
+ pipe.start_chat()
+ while True:
+ try:
+ prompt = input('question:\n')
+ except EOFError:
+ break
+ answer = pipe.generate(prompt)
+ print('answer:\n')
+ print(answer)
+ print('\n----------\n')
+ # highlight-next-line
+ pipe.finish_chat()
+ ```
+
+
+ ```cpp showLineNumbers
+ #include "openvino/genai/llm_pipeline.hpp"
+ #include
+
+ int main(int argc, char* argv[]) {
+ std::string prompt;
+
+ std::string model_path = argv[1];
+ ov::genai::LLMPipeline pipe(model_path, "CPU");
+
+ ov::genai::GenerationConfig config;
+ config.max_new_tokens = 100;
+ config.num_beam_groups = 3;
+ config.num_beams = 15;
+ config.diversity_penalty = 1.0f;
+
+ // highlight-next-line
+ pipe.start_chat();
+ std::cout << "question:\n";
+ while (std::getline(std::cin, prompt)) {
+ std::cout << "answer:\n";
+ auto answer = pipe.generate(prompt, config);
+ std::cout << answer << std::endl;
+ std::cout << "\n----------\n"
+ "question:\n";
+ }
+ // highlight-next-line
+ pipe.finish_chat();
+ }
+ ```
+
+
+
+:::info
+For more information, refer to the [Python](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/chat_sample.py) and [C++](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text_generation/chat_sample.cpp) chat samples.
+:::
diff --git a/site/docs/guides/model-preparation/convert-to-openvino.mdx b/site/docs/guides/model-preparation/convert-to-openvino.mdx
index ad66f54722..14b9bee195 100644
--- a/site/docs/guides/model-preparation/convert-to-openvino.mdx
+++ b/site/docs/guides/model-preparation/convert-to-openvino.mdx
@@ -8,7 +8,8 @@ import UseCasesNote from './_use_cases_note.mdx';
# Convert Models to OpenVINO Format
-This page explains how to convert various generative AI models from Hugging Face and ModelScope to OpenVINO IR format. Refer to the [Supported Models](../../supported-models/index.mdx) for a list of available models.
+This page explains how to convert various generative AI models from [Hugging Face](https://huggingface.co/) and [ModelScope](https://modelscope.cn/) to OpenVINO IR format.
+Refer to the [Supported Models](../../supported-models/index.mdx) for a list of available models.
For downloading pre-converted models, see [Download Pre-Converted OpenVINO Models](./download-openvino-models.mdx).
diff --git a/site/docs/guides/model-preparation/download-openvino-models.mdx b/site/docs/guides/model-preparation/download-openvino-models.mdx
index fd514a9bff..d3cf2c9983 100644
--- a/site/docs/guides/model-preparation/download-openvino-models.mdx
+++ b/site/docs/guides/model-preparation/download-openvino-models.mdx
@@ -8,7 +8,7 @@ import UseCasesNote from './_use_cases_note.mdx';
# Download Pre-Converted OpenVINO Models
OpenVINO GenAI allows to run different generative AI models (see [Supported Models](../../supported-models/index.mdx)).
-While you can convert models from other frameworks (see [Convert Models to OpenVINO Format](./convert-to-openvino.mdx)), using pre-converted models can save time and effort.
+While you can convert models from other frameworks (see [Convert Models to OpenVINO Format](./convert-to-openvino.mdx)), using pre-converted models from [Hugging Face](https://huggingface.co/) and [ModelScope](https://modelscope.cn/) can save time and effort.
## Download from Hugging Face
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_chat_scenario.mdx b/site/docs/guides/streaming.mdx
similarity index 65%
rename from site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_chat_scenario.mdx
rename to site/docs/guides/streaming.mdx
index 9a8f197117..550c36e256 100644
--- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_chat_scenario.mdx
+++ b/site/docs/guides/streaming.mdx
@@ -1,76 +1,16 @@
-### Using GenAI in Chat Scenario
+---
+sidebar_position: 3
+---
-For chat applications, OpenVINO GenAI provides special optimizations to maintain conversation context and improve performance using KV-cache.
-
-:::tip
-Use `start_chat()` and `finish_chat()` to properly manage the chat session's KV-cache. This improves performance by reusing context between messages.
-:::
-
-A simple chat example (with grouped beam search decoding):
-
-
-
- ```python showLineNumbers
- import openvino_genai as ov_genai
- pipe = ov_genai.LLMPipeline(model_path, 'CPU')
-
- config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5}
- pipe.set_generation_config(config)
-
- # highlight-next-line
- pipe.start_chat()
- while True:
- try:
- prompt = input('question:\n')
- except EOFError:
- break
- answer = pipe.generate(prompt)
- print('answer:\n')
- print(answer)
- print('\n----------\n')
- # highlight-next-line
- pipe.finish_chat()
- ```
-
-
- ```cpp showLineNumbers
- #include "openvino/genai/llm_pipeline.hpp"
- #include
-
- int main(int argc, char* argv[]) {
- std::string prompt;
-
- std::string model_path = argv[1];
- ov::genai::LLMPipeline pipe(model_path, "CPU");
-
- ov::genai::GenerationConfig config;
- config.max_new_tokens = 100;
- config.num_beam_groups = 3;
- config.num_beams = 15;
- config.diversity_penalty = 1.0f;
-
- // highlight-next-line
- pipe.start_chat();
- std::cout << "question:\n";
- while (std::getline(std::cin, prompt)) {
- std::cout << "answer:\n";
- auto answer = pipe.generate(prompt, config);
- std::cout << answer << std::endl;
- std::cout << "\n----------\n"
- "question:\n";
- }
- // highlight-next-line
- pipe.finish_chat();
- }
- ```
-
-
-
-#### Streaming the Output
+# Streaming the Output
For more interactive UIs during generation, you can stream output tokens.
-##### Streaming Function
+:::info
+Streaming is supported for both `LLMPipeline` and `VLMPipeline`.
+:::
+
+## Streaming Function
In this example, a function outputs words to the console immediately upon generation:
@@ -138,11 +78,7 @@ In this example, a function outputs words to the console immediately upon genera
-:::info
-For more information, refer to the [chat sample](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/chat_sample/).
-:::
-
-##### Custom Streamer Class
+## Custom Streamer Class
You can also create your custom streamer for more sophisticated processing:
@@ -210,7 +146,7 @@ You can also create your custom streamer for more sophisticated processing:
int main(int argc, char* argv[]) {
std::string prompt;
// highlight-next-line
- CustomStreamer custom_streamer;
+ std::shared_ptr custom_streamer;
std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
@@ -232,5 +168,5 @@ You can also create your custom streamer for more sophisticated processing:
:::info
-For fully implemented iterable CustomStreamer refer to [multinomial_causal_lm](https://github.com/openvinotoolkit/openvino.genai/blob/releases/2025/0/samples/python/text_generation/multinomial_causal_lm.py) sample.
+For fully implemented iterable `CustomStreamer` refer to [multinomial_causal_lm](https://github.com/openvinotoolkit/openvino.genai/blob/releases/2025/0/samples/python/text_generation/multinomial_causal_lm.py) sample.
:::
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_generation_parameters.mdx b/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_generation_parameters.mdx
deleted file mode 100644
index edf4313be6..0000000000
--- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_generation_parameters.mdx
+++ /dev/null
@@ -1,127 +0,0 @@
-### Use Different Generation Parameters
-
-Fine-tune your LLM's output by adjusting various generation parameters.
-OpenVINO GenAI supports multiple sampling strategies and generation configurations to help you achieve the desired balance between deterministic and creative outputs.
-
-#### Basic Generation Configuration
-
-1. Get the model default config with `get_generation_config()`
-2. Modify parameters
-3. Apply the updated config:
- - Use `set_generation_config(config)`
- - Pass config directly to `generate()` (e.g. `generate(prompt, config)`)
- - Specify options as inputs in the `generate()` method (e.g. `generate(prompt, max_new_tokens=100)`)
-
-
-
- ```python
- import openvino_genai as ov_genai
- pipe = ov_genai.LLMPipeline(model_path, "CPU")
-
- # Get default configuration
- config = pipe.get_generation_config()
-
- # Modify parameters
- config.max_new_tokens = 100
- config.temperature = 0.7
- config.top_k = 50
- config.top_p = 0.9
- config.repetition_penalty = 1.2
-
- # Generate text with custom configuration
- output = pipe.generate("The Sun is yellow because", config)
- ```
-
-
- ```cpp
- int main() {
- ov::genai::LLMPipeline pipe(model_path, "CPU");
-
- // Get default configuration
- auto config = pipe.get_generation_config();
-
- // Modify parameters
- config.max_new_tokens = 100;
- config.temperature = 0.7f;
- config.top_k = 50;
- config.top_p = 0.9f;
- config.repetition_penalty = 1.2f;
-
- // Generate text with custom configuration
- auto output = pipe.generate("The Sun is yellow because", config);
- }
- ```
-
-
-
-:::info Understanding Basic Generation Parameters
-
-- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`.
-- `temperature`: Controls the level of creativity in AI-generated text:
- - Low temperature (e.g. 0.2) leads to more focused and deterministic output, choosing tokens with the highest probability.
- - Medium temperature (e.g. 1.0) maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias.
- - High temperature (e.g. 2.0) makes output more creative and adventurous, increasing the chances of selecting less likely tokens.
-- `top_k`: Limits token selection to the k most likely next tokens. Higher values allow more diverse outputs.
-- `top_p`: Selects from the smallest set of tokens whose cumulative probability exceeds p. Helps balance diversity and quality.
-- `repetition_penalty`: Reduces the likelihood of repeating tokens. Values above 1.0 discourage repetition.
-
-For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig).
-
-:::
-
-
-#### Optimizing Generation with Grouped Beam Search
-
-Beam search helps explore multiple possible text completions simultaneously, often leading to higher quality outputs.
-
-
-
- ```python
- import openvino_genai as ov_genai
- pipe = ov_genai.LLMPipeline(model_path, "CPU")
-
- # Get default generation config
- config = pipe.get_generation_config()
-
- # Modify parameters
- config.max_new_tokens = 256
- config.num_beams = 15
- config.num_beam_groups = 3
- config.diversity_penalty = 1.0
-
- # Generate text with custom configuration
- print(pipe.generate("The Sun is yellow because", config))
- ```
-
-
- ```cpp
- int main(int argc, char* argv[]) {
- std::string model_path = argv[1];
- ov::genai::LLMPipeline pipe(model_path, "CPU");
-
- // Get default generation config
- ov::genai::GenerationConfig config = pipe.get_generation_config();
-
- // Modify parameters
- config.max_new_tokens = 256;
- config.num_beams = 15;
- config.num_beam_groups = 3;
- config.diversity_penalty = 1.0f;
-
- // Generate text with custom configuration
- cout << pipe.generate("The Sun is yellow because", config);
- }
- ```
-
-
-
-:::info Understanding Beam Search Generation Parameters
-
-- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`.
-- `num_beams`: The number of beams for beam search. 1 disables beam search.
-- `num_beam_groups`: The number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
-- `diversity_penalty`: value is subtracted from a beam's score if it generates the same token as any beam from other group at a particular time.
-
-For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig).
-
-:::
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/index.mdx b/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/index.mdx
deleted file mode 100644
index f47f23e1b2..0000000000
--- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/index.mdx
+++ /dev/null
@@ -1,18 +0,0 @@
-import ChatScenario from './_chat_scenario.mdx';
-import GenerationParameters from './_generation_parameters.mdx';
-import LoraAdapters from './_lora_adapters.mdx';
-import SpeculativeDecoding from './_speculative_decoding.mdx';
-
-## Additional Usage Options
-
-:::tip
-Check out our [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp) samples.
-:::
-
-
-
-
-
-
-
-
diff --git a/site/docs/use-cases/1-LLM-pipeline/index.mdx b/site/docs/use-cases/1-LLM-pipeline/index.mdx
deleted file mode 100644
index 7b394ef719..0000000000
--- a/site/docs/use-cases/1-LLM-pipeline/index.mdx
+++ /dev/null
@@ -1,14 +0,0 @@
----
-sidebar_position: 1
----
-import ConvertModelSection from './_sections/_convert_model.mdx';
-import RunModelSection from './_sections/_run_model/index.mdx';
-import UsageOptionsSection from './_sections/_usage_options/index.mdx';
-
-# Text Generation Using LLMs
-
-
-
-
-
-
diff --git a/site/docs/use-cases/2-Image-Generation/index.mdx b/site/docs/use-cases/2-Image-Generation/index.mdx
deleted file mode 100644
index a08ae53bb7..0000000000
--- a/site/docs/use-cases/2-Image-Generation/index.mdx
+++ /dev/null
@@ -1,14 +0,0 @@
----
-sidebar_position: 1
----
-import ConvertModelSection from '../1-LLM-pipeline/_sections/_convert_model.mdx';
-import RunModelSection from './_sections/_run_model/index.mdx';
-import UsageOptionsSection from './_sections/_usage_options/index.mdx';
-
-# Image Generation Using Diffusers
-
-
-
-
-
-
diff --git a/site/docs/use-cases/3-Processing-speech-whisper.md b/site/docs/use-cases/3-Processing-speech-whisper.md
deleted file mode 100644
index 8d151624f6..0000000000
--- a/site/docs/use-cases/3-Processing-speech-whisper.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-sidebar_position: 1
----
-
-# Processing Speech Using Whisper
diff --git a/site/docs/use-cases/4-Processing-images-using-VLMs.md b/site/docs/use-cases/4-Processing-images-using-VLMs.md
deleted file mode 100644
index 0cb1438342..0000000000
--- a/site/docs/use-cases/4-Processing-images-using-VLMs.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-sidebar_position: 1
----
-
-# Processing Images Using VLMs
diff --git a/site/docs/use-cases/_shared/_basic_generation_configuration.mdx b/site/docs/use-cases/_shared/_basic_generation_configuration.mdx
new file mode 100644
index 0000000000..021808897e
--- /dev/null
+++ b/site/docs/use-cases/_shared/_basic_generation_configuration.mdx
@@ -0,0 +1,26 @@
+#### Basic Generation Configuration
+
+1. Get the model default config with `get_generation_config()`
+2. Modify parameters
+3. Apply the updated config using one of the following methods:
+ - Use `set_generation_config(config)`
+ - Pass config directly to `generate()` (e.g. `generate(prompt, config)`)
+ - Specify options as inputs in the `generate()` method (e.g. `generate(prompt, max_new_tokens=100)`)
+
+{/* Python and C++ code examples */}
+{props.children}
+
+:::info Understanding Basic Generation Parameters
+
+- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`.
+- `temperature`: Controls the level of creativity in AI-generated text:
+ - Low temperature (e.g. 0.2) leads to more focused and deterministic output, choosing tokens with the highest probability.
+ - Medium temperature (e.g. 1.0) maintains a balance between creativity and focus, selecting tokens based on their probabilities without significant bias.
+ - High temperature (e.g. 2.0) makes output more creative and adventurous, increasing the chances of selecting less likely tokens.
+- `top_k`: Limits token selection to the k most likely next tokens. Higher values allow more diverse outputs.
+- `top_p`: Selects from the smallest set of tokens whose cumulative probability exceeds p. Helps balance diversity and quality.
+- `repetition_penalty`: Reduces the likelihood of repeating tokens. Values above 1.0 discourage repetition.
+
+For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig).
+
+:::
diff --git a/site/docs/use-cases/_shared/_chat_scenario.mdx b/site/docs/use-cases/_shared/_chat_scenario.mdx
new file mode 100644
index 0000000000..fbf5b36e45
--- /dev/null
+++ b/site/docs/use-cases/_shared/_chat_scenario.mdx
@@ -0,0 +1,3 @@
+### Using OpenVINO GenAI in Chat Scenario
+
+Refer to the [Chat Scenario](/docs/guides/chat-scenario) guide for more information on using OpenVINO GenAI in chat applications.
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_convert_model.mdx b/site/docs/use-cases/_shared/_convert_model.mdx
similarity index 73%
rename from site/docs/use-cases/1-LLM-pipeline/_sections/_convert_model.mdx
rename to site/docs/use-cases/_shared/_convert_model.mdx
index 9867d52fda..8d6e1153c3 100644
--- a/site/docs/use-cases/1-LLM-pipeline/_sections/_convert_model.mdx
+++ b/site/docs/use-cases/_shared/_convert_model.mdx
@@ -1,3 +1,8 @@
## Convert and Optimize Model
+{/* optimum-cli export code examples */}
+{props.children}
+
+:::info
Refer to the [Model Preparation](/docs/category/model-preparation) guide for detailed instructions on how to download, convert and optimize models for OpenVINO GenAI.
+:::
diff --git a/site/docs/use-cases/_shared/_streaming.mdx b/site/docs/use-cases/_shared/_streaming.mdx
new file mode 100644
index 0000000000..f09a14c266
--- /dev/null
+++ b/site/docs/use-cases/_shared/_streaming.mdx
@@ -0,0 +1,3 @@
+### Streaming the Output
+
+Refer to the [Streaming](/docs/guides/streaming) guide for more information on streaming the output with OpenVINO GenAI.
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_cpp.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx
similarity index 100%
rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_cpp.mdx
rename to site/docs/use-cases/image-generation/_sections/_run_model/_image2image_cpp.mdx
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_python.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx
similarity index 100%
rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_image2image_python.mdx
rename to site/docs/use-cases/image-generation/_sections/_run_model/_image2image_python.mdx
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_cpp.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx
similarity index 100%
rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_cpp.mdx
rename to site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_cpp.mdx
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_python.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx
similarity index 100%
rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_inpainting_python.mdx
rename to site/docs/use-cases/image-generation/_sections/_run_model/_inpainting_python.mdx
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_cpp.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx
similarity index 100%
rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_cpp.mdx
rename to site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_python.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx
similarity index 100%
rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_python.mdx
rename to site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/index.mdx b/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx
similarity index 85%
rename from site/docs/use-cases/2-Image-Generation/_sections/_run_model/index.mdx
rename to site/docs/use-cases/image-generation/_sections/_run_model/index.mdx
index f342482981..aab59d23da 100644
--- a/site/docs/use-cases/2-Image-Generation/_sections/_run_model/index.mdx
+++ b/site/docs/use-cases/image-generation/_sections/_run_model/index.mdx
@@ -10,11 +10,9 @@ import InpaintingPython from './_inpainting_python.mdx';
## Run Model Using OpenVINO GenAI
OpenVINO GenAI supports the following diffusion model pipelines:
-- `Text2ImagePipeline` for creating images from text prompts.
-- `Image2ImagePipeline` for modifying existing images based on prompts.
-- `InpaintingPipeline` for selectively replacing portions of images using masks.
-
-See all supported [image generation models](/docs/supported-models/#image-generation-models).
+- [`Text2ImagePipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.Text2ImagePipeline.html) for creating images from text prompts.
+- [`Image2ImagePipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.Image2ImagePipeline.html) for modifying existing images based on prompts.
+- [`InpaintingPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.InpaintingPipeline.html) for selectively replacing portions of images using masks.
### `Text2ImagePipeline`
diff --git a/site/docs/use-cases/2-Image-Generation/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx
similarity index 90%
rename from site/docs/use-cases/2-Image-Generation/_sections/_usage_options/index.mdx
rename to site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx
index 251bde4bf9..5439e56101 100644
--- a/site/docs/use-cases/2-Image-Generation/_sections/_usage_options/index.mdx
+++ b/site/docs/use-cases/image-generation/_sections/_usage_options/index.mdx
@@ -1,7 +1,7 @@
## Additional Usage Options
:::tip
-Check out our [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp) samples.
+Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/image_generation) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/image_generation) image generation samples.
:::
### Use Different Generation Parameters
@@ -65,7 +65,7 @@ You can adjust several parameters to control the image generation process, inclu
- `guidance_scale`: Balances prompt adherence vs. creativity. Higher values follow prompt more strictly, lower values allow more creative freedom.
- `rng_seed`: Controls randomness for reproducible results. Same seed produces identical images across runs.
-For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.Text2ImagePipeline.html#openvino_genai.Text2ImagePipeline.generate).
+For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.ImageGenerationConfig.html).
:::
diff --git a/site/docs/use-cases/image-generation/index.mdx b/site/docs/use-cases/image-generation/index.mdx
new file mode 100644
index 0000000000..6dbf946c73
--- /dev/null
+++ b/site/docs/use-cases/image-generation/index.mdx
@@ -0,0 +1,21 @@
+---
+sidebar_position: 2
+---
+import OptimumCLI from '@site/src/components/OptimumCLI';
+import ConvertModelSection from '../_shared/_convert_model.mdx';
+import RunModelSection from './_sections/_run_model/index.mdx';
+import UsageOptionsSection from './_sections/_usage_options/index.mdx';
+
+# Image Generation Using Diffusers
+
+
+ Download and convert model (e.g. [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)) to OpenVINO format from Hugging Face:
+
+
+
+ See all supported [Image Generation Models](/docs/supported-models/#image-generation-models).
+
+
+
+
+
diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx
new file mode 100644
index 0000000000..c10c8be4ca
--- /dev/null
+++ b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx
@@ -0,0 +1,21 @@
+import CodeBlock from '@theme/CodeBlock';
+
+
+{`#include "openvino/genai/visual_language/pipeline.hpp"
+#include "load_image.hpp"
+#include
+
+int main(int argc, char* argv[]) {
+ std::string models_path = argv[1], images_path = argv[2];;
+ std::vector images = utils::load_images(images_path);
+
+ ov::genai::VLMPipeline pipe(models_path, "${props.device || 'CPU'}");
+ ov::genai::VLMDecodedResults result = pipe.generate(
+ prompt,
+ ov::genai::images(images),
+ ov::genai::max_new_tokens(100)
+ );
+ std::cout << result.texts[0] << std::endl;
+}
+`}
+
diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx
new file mode 100644
index 0000000000..3e308b555a
--- /dev/null
+++ b/site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx
@@ -0,0 +1,27 @@
+import CodeBlock from '@theme/CodeBlock';
+
+
+{`import openvino_genai as ov_genai
+import openvino as ov
+from PIL import Image
+import numpy as np
+from pathlib import Path
+
+def read_image(path: str) -> ov.Tensor:
+ pic = Image.open(path).convert("RGB")
+ image_data = np.array(pic)[None]
+ return ov.Tensor(image_data)
+
+def read_images(path: str) -> list[ov.Tensor]:
+ entry = Path(path)
+ if entry.is_dir():
+ return [read_image(str(file)) for file in sorted(entry.iterdir())]
+ return [read_image(path)]
+
+images = read_images("./images")
+
+pipe = ov_genai.VLMPipeline(model_path, "${props.device || 'CPU'}")
+result = pipe.generate(prompt, images=images, max_new_tokens=100)
+print(result.texts[0])
+`}
+
diff --git a/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
new file mode 100644
index 0000000000..b5082eb1ef
--- /dev/null
+++ b/site/docs/use-cases/image-processing/_sections/_run_model/index.mdx
@@ -0,0 +1,36 @@
+import CodeExampleCPP from './_code_example_cpp.mdx';
+import CodeExamplePython from './_code_example_python.mdx';
+
+## Run Model Using OpenVINO GenAI
+
+OpenVINO GenAI introduces the [`VLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.VLMPipeline.html) pipeline for inference of multimodal text-generation Vision Language Models (VLMs).
+It can generate text from a text prompt and images as inputs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+:::tip
+
+Use CPU or GPU as devices without any other code change.
+
+:::
diff --git a/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
new file mode 100644
index 0000000000..e8ab518763
--- /dev/null
+++ b/site/docs/use-cases/image-processing/_sections/_usage_options/index.mdx
@@ -0,0 +1,61 @@
+import BasicGenerationConfiguration from '@site/docs/use-cases/_shared/_basic_generation_configuration.mdx';
+import ChatScenario from '@site/docs/use-cases/_shared/_chat_scenario.mdx';
+import Streaming from '@site/docs/use-cases/_shared/_streaming.mdx';
+
+## Additional Usage Options
+
+:::tip
+Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat) visual language chat samples.
+:::
+
+### Use Different Generation Parameters
+
+Similar to [text generation](/docs/use-cases/text-generation/#use-different-generation-parameters), VLM pipelines support various generation parameters to control the text output.
+
+
+
+
+ ```python
+ import openvino_genai as ov_genai
+ pipe = ov_genai.VLMPipeline(model_path, "CPU")
+
+ # Get default configuration
+ config = pipe.get_generation_config()
+
+ # Modify parameters
+ config.max_new_tokens = 100
+ config.temperature = 0.7
+ config.top_k = 50
+ config.top_p = 0.9
+ config.repetition_penalty = 1.2
+
+ # Generate text with custom configuration
+ output = pipe.generate(prompt, images, config)
+ ```
+
+
+ ```cpp
+ int main() {
+ ov::genai::VLMPipeline pipe(model_path, "CPU");
+
+ // Get default configuration
+ auto config = pipe.get_generation_config();
+
+ // Modify parameters
+ config.max_new_tokens = 100;
+ config.temperature = 0.7f;
+ config.top_k = 50;
+ config.top_p = 0.9f;
+ config.repetition_penalty = 1.2f;
+
+ // Generate text with custom configuration
+ auto output = pipe.generate(prompt, images, config);
+ }
+ ```
+
+
+
+
+
+
+
diff --git a/site/docs/use-cases/image-processing/index.mdx b/site/docs/use-cases/image-processing/index.mdx
new file mode 100644
index 0000000000..b6605e9242
--- /dev/null
+++ b/site/docs/use-cases/image-processing/index.mdx
@@ -0,0 +1,21 @@
+---
+sidebar_position: 4
+---
+import OptimumCLI from '@site/src/components/OptimumCLI';
+import ConvertModelSection from '../_shared/_convert_model.mdx';
+import RunModelSection from './_sections/_run_model/index.mdx';
+import UsageOptionsSection from './_sections/_usage_options/index.mdx';
+
+# Image Processing Using VLMs
+
+
+ Download and convert model (e.g. [openbmb/MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6)) to OpenVINO format from Hugging Face:
+
+
+
+ See all supported [Visual Language Models](/docs/supported-models/#visual-language-models-vlms).
+
+
+
+
+
diff --git a/site/docs/use-cases/speech-processing.md b/site/docs/use-cases/speech-processing.md
new file mode 100644
index 0000000000..9f1fceb05f
--- /dev/null
+++ b/site/docs/use-cases/speech-processing.md
@@ -0,0 +1,5 @@
+---
+sidebar_position: 3
+---
+
+# Speech Processing Using Whisper
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_cpp.mdx b/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx
similarity index 100%
rename from site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_cpp.mdx
rename to site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_python.mdx b/site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx
similarity index 100%
rename from site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_python.mdx
rename to site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/index.mdx b/site/docs/use-cases/text-generation/_sections/_run_model/index.mdx
similarity index 81%
rename from site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/index.mdx
rename to site/docs/use-cases/text-generation/_sections/_run_model/index.mdx
index a208e25ebd..72b526b76a 100644
--- a/site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/index.mdx
+++ b/site/docs/use-cases/text-generation/_sections/_run_model/index.mdx
@@ -3,7 +3,7 @@ import CodeExamplePython from './_code_example_python.mdx';
## Run Model Using OpenVINO GenAI
-`LLMPipeline` is the main object used for decoding. You can construct it straight away from the folder with the converted model.
+[`LLMPipeline`](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.LLMPipeline.html) is the main object used for decoding. You can construct it straight away from the folder with the converted model.
It will automatically load the main model, tokenizer, detokenizer and default generation configuration.
diff --git a/site/docs/use-cases/text-generation/_sections/_usage_options/_generation_parameters.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/_generation_parameters.mdx
new file mode 100644
index 0000000000..112a0e6935
--- /dev/null
+++ b/site/docs/use-cases/text-generation/_sections/_usage_options/_generation_parameters.mdx
@@ -0,0 +1,106 @@
+import BasicGenerationConfiguration from '@site/docs/use-cases/_shared/_basic_generation_configuration.mdx';
+
+### Use Different Generation Parameters
+
+Fine-tune your LLM's output by adjusting various generation parameters.
+OpenVINO GenAI supports multiple sampling strategies and generation configurations to help you achieve the desired balance between deterministic and creative outputs.
+
+
+
+
+ ```python
+ import openvino_genai as ov_genai
+ pipe = ov_genai.LLMPipeline(model_path, "CPU")
+
+ # Get default configuration
+ config = pipe.get_generation_config()
+
+ # Modify parameters
+ config.max_new_tokens = 100
+ config.temperature = 0.7
+ config.top_k = 50
+ config.top_p = 0.9
+ config.repetition_penalty = 1.2
+
+ # Generate text with custom configuration
+ output = pipe.generate("The Sun is yellow because", config)
+ ```
+
+
+ ```cpp
+ int main() {
+ ov::genai::LLMPipeline pipe(model_path, "CPU");
+
+ // Get default configuration
+ auto config = pipe.get_generation_config();
+
+ // Modify parameters
+ config.max_new_tokens = 100;
+ config.temperature = 0.7f;
+ config.top_k = 50;
+ config.top_p = 0.9f;
+ config.repetition_penalty = 1.2f;
+
+ // Generate text with custom configuration
+ auto output = pipe.generate("The Sun is yellow because", config);
+ }
+ ```
+
+
+
+
+#### Optimizing Generation with Grouped Beam Search
+
+Beam search helps explore multiple possible text completions simultaneously, often leading to higher quality outputs.
+
+
+
+ ```python
+ import openvino_genai as ov_genai
+ pipe = ov_genai.LLMPipeline(model_path, "CPU")
+
+ # Get default generation config
+ config = pipe.get_generation_config()
+
+ # Modify parameters
+ config.max_new_tokens = 256
+ config.num_beams = 15
+ config.num_beam_groups = 3
+ config.diversity_penalty = 1.0
+
+ # Generate text with custom configuration
+ print(pipe.generate("The Sun is yellow because", config))
+ ```
+
+
+ ```cpp
+ int main(int argc, char* argv[]) {
+ std::string model_path = argv[1];
+ ov::genai::LLMPipeline pipe(model_path, "CPU");
+
+ // Get default generation config
+ ov::genai::GenerationConfig config = pipe.get_generation_config();
+
+ // Modify parameters
+ config.max_new_tokens = 256;
+ config.num_beams = 15;
+ config.num_beam_groups = 3;
+ config.diversity_penalty = 1.0f;
+
+ // Generate text with custom configuration
+ cout << pipe.generate("The Sun is yellow because", config);
+ }
+ ```
+
+
+
+:::info Understanding Beam Search Generation Parameters
+
+- `max_new_tokens`: The maximum numbers of tokens to generate, excluding the number of tokens in the prompt. `max_new_tokens` has priority over `max_length`.
+- `num_beams`: The number of beams for beam search. 1 disables beam search.
+- `num_beam_groups`: The number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
+- `diversity_penalty`: value is subtracted from a beam's score if it generates the same token as any beam from other group at a particular time.
+
+For the full list of generation parameters, refer to the [API reference](https://docs.openvino.ai/2025/api/genai_api/_autosummary/openvino_genai.GenerationConfig.html#openvino-genai-generationconfig).
+
+:::
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_lora_adapters.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/_lora_adapters.mdx
similarity index 100%
rename from site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_lora_adapters.mdx
rename to site/docs/use-cases/text-generation/_sections/_usage_options/_lora_adapters.mdx
diff --git a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_speculative_decoding.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/_speculative_decoding.mdx
similarity index 93%
rename from site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_speculative_decoding.mdx
rename to site/docs/use-cases/text-generation/_sections/_usage_options/_speculative_decoding.mdx
index 6fdb6227f8..0709cafecc 100644
--- a/site/docs/use-cases/1-LLM-pipeline/_sections/_usage_options/_speculative_decoding.mdx
+++ b/site/docs/use-cases/text-generation/_sections/_usage_options/_speculative_decoding.mdx
@@ -101,5 +101,5 @@ This reduces the number of infer requests to the main model, increasing performa
:::info
-For more information, refer to the [Speculative Decoding sample](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/speculative_decoding_lm/).
+For more information, refer to [Python](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text_generation/speculative_decoding_lm.py) and [C++](https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text_generation/speculative_decoding_lm.cpp) speculative decoding samples.
:::
diff --git a/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx b/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx
new file mode 100644
index 0000000000..35dd1ca5bd
--- /dev/null
+++ b/site/docs/use-cases/text-generation/_sections/_usage_options/index.mdx
@@ -0,0 +1,21 @@
+import ChatScenario from '@site/docs/use-cases/_shared/_chat_scenario.mdx';
+import Streaming from '@site/docs/use-cases/_shared/_streaming.mdx';
+import GenerationParameters from './_generation_parameters.mdx';
+import LoraAdapters from './_lora_adapters.mdx';
+import SpeculativeDecoding from './_speculative_decoding.mdx';
+
+## Additional Usage Options
+
+:::tip
+Check out [Python](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/text_generation) and [C++](https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/text_generation) text generation samples.
+:::
+
+
+
+
+
+
+
+
+
+
diff --git a/site/docs/use-cases/text-generation/index.mdx b/site/docs/use-cases/text-generation/index.mdx
new file mode 100644
index 0000000000..6ebbff82f1
--- /dev/null
+++ b/site/docs/use-cases/text-generation/index.mdx
@@ -0,0 +1,21 @@
+---
+sidebar_position: 1
+---
+import OptimumCLI from '@site/src/components/OptimumCLI';
+import ConvertModelSection from '../_shared/_convert_model.mdx';
+import RunModelSection from './_sections/_run_model/index.mdx';
+import UsageOptionsSection from './_sections/_usage_options/index.mdx';
+
+# Text Generation Using LLMs
+
+
+ Download and convert model (e.g. [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0)) to OpenVINO format from Hugging Face:
+
+
+
+ See all supported [Large Language Models](/docs/supported-models/#large-language-models-llms).
+
+
+
+
+
diff --git a/site/src/components/image-generation.tsx b/site/src/components/image-generation.tsx
index 3e7fce1681..10d785d78a 100644
--- a/site/src/components/image-generation.tsx
+++ b/site/src/components/image-generation.tsx
@@ -6,8 +6,8 @@ import { SectionImage } from './Section/section-image';
import ImagePlaceholder from '@site/static/img/image-generation-placeholder.webp';
-import CodeExampleCpp from '@site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_cpp.mdx';
-import CodeExamplePython from '@site/docs/use-cases/2-Image-Generation/_sections/_run_model/_text2image_python.mdx';
+import CodeExampleCpp from '@site/docs/use-cases/image-generation/_sections/_run_model/_text2image_cpp.mdx';
+import CodeExamplePython from '@site/docs/use-cases/image-generation/_sections/_run_model/_text2image_python.mdx';
const FEATURES = [
'Alter parameters (width, height, iterations) and compile model for static size',
@@ -39,8 +39,8 @@ export const ImageGeneration = () => {
-
-
+
+
);
diff --git a/site/src/components/image-processing.tsx b/site/src/components/image-processing.tsx
index f69cd5c2d0..359f210b67 100644
--- a/site/src/components/image-processing.tsx
+++ b/site/src/components/image-processing.tsx
@@ -2,54 +2,18 @@ import { ExploreCodeSamples } from '@site/src/components/GoToLink/explore-code-s
import { GoToDocumentation } from '@site/src/components/GoToLink/go-to-documentation';
import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/LanguageTabs';
import { Section } from '@site/src/components/Section';
-import CodeBlock from '@theme/CodeBlock';
import ImagePlaceholder from '@site/static/img/image-generation-placeholder.webp';
+import CodeExampleCpp from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_cpp.mdx';
+import CodeExamplePython from '@site/docs/use-cases/image-processing/_sections/_run_model/_code_example_python.mdx';
+
const FEATURES = [
'Use different generation parameters (sampling types, etc.)',
'Optimize for chat scenarios by using chat mode',
'Pass multiple images to a model',
];
-const pythonCodeBlock = (
-
- {`import numpy as np
-import openvino as ov
-import openvino_genai as ov_genai
-from PIL import Image
-
-# Choose GPU instead of CPU in the line below to run the model on Intel integrated or discrete GPU
-pipe = ov_genai.VLMPipeline("./MiniCPM-V-2_6/", "CPU")
-
-image = Image.open("dog.jpg")
-image_data = np.array(image.getdata()).reshape(1, image.size[1], image.size[0], 3).astype(np.uint8)
-image_data = ov.Tensor(image_data)
-
-prompt = "Can you describe the image?"
-print(pipe.generate(prompt, image=image_data, max_new_tokens=100))`}
-
-);
-
-const cppCodeBlock = (
-
- {`#include "load_image.hpp"
-#include
-#include
-
-int main(int argc, char* argv[]) {
- std::string models_path = argv[1];
- ov::genai::VLMPipeline pipe(models_path, "CPU");
- ov::Tensor rgb = utils::load_image(argv[2]);
- std::cout << pipe.generate(
- prompt,
- ov::genai::image(rgb),
- ov::genai::max_new_tokens(100)
- ) << '\\n';
-}`}
-
-);
-
export const ImageProcessing = () => {
return (
@@ -68,12 +32,16 @@ export const ImageProcessing = () => {
- {pythonCodeBlock}
- {cppCodeBlock}
+
+
+
+
+
+
-
-
+
+
);
diff --git a/site/src/components/speech-to-text.tsx b/site/src/components/speech-to-text.tsx
index 09767b6645..a092e3d6a5 100644
--- a/site/src/components/speech-to-text.tsx
+++ b/site/src/components/speech-to-text.tsx
@@ -68,8 +68,8 @@ export const SpeechToText = () => {
{cppCodeBlock}
-
-
+
+
);
diff --git a/site/src/components/text-generation.tsx b/site/src/components/text-generation.tsx
index 0b79b92d3b..235cda6b62 100644
--- a/site/src/components/text-generation.tsx
+++ b/site/src/components/text-generation.tsx
@@ -6,8 +6,8 @@ import { LanguageTabs, TabItemCpp, TabItemPython } from '@site/src/components/La
import ImagePlaceholder from '@site/static/img/image-generation-placeholder.webp';
-import CodeExampleCpp from '@site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_cpp.mdx';
-import CodeExamplePython from '@site/docs/use-cases/1-LLM-pipeline/_sections/_run_model/_code_example_python.mdx';
+import CodeExampleCpp from '@site/docs/use-cases/text-generation/_sections/_run_model/_code_example_cpp.mdx';
+import CodeExamplePython from '@site/docs/use-cases/text-generation/_sections/_run_model/_code_example_python.mdx';
// TODO Consider moving to mdx
const FEATURES = [
@@ -40,8 +40,8 @@ export const TextGeneration = () => {
-
-
+
+
);