Wovchena
diff --git a/‎src/cpp/include/openvino/genai/generation_config.hpp
-6 b/‎src/cpp/include/openvino/genai/generation_config.hpp
-6
diff --git a/‎src/cpp/include/openvino/genai/llm_pipeline.hpp
+57-16 b/‎src/cpp/include/openvino/genai/llm_pipeline.hpp
+57-16
diff --git a/‎src/cpp/include/openvino/genai/streamer_base.hpp
+12-1 b/‎src/cpp/include/openvino/genai/streamer_base.hpp
+12-1
diff --git a/‎src/cpp/include/openvino/genai/tokenizer.hpp
+17-11 b/‎src/cpp/include/openvino/genai/tokenizer.hpp
+17-11
@@ -31,7 +31,6 @@ enum class StopCriteria { early, heuristic, never };
  * @param num_beam_groups number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
  * @param diversity_penalty this value is subtracted from a beam's score if it generates a token same as any beam from other group at a
  *        particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled.
- *        [more datails in this paper](https://arxiv.org/pdf/1610.02424.pdf).
  * @param length_penalty exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
  *        the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
  *        likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while
@@ -48,7 +47,6 @@ enum class StopCriteria { early, heuristic, never };
  * @param do_sample whether or not to use multinomial random sampling
  *        that add up to `top_p` or higher are kept.
  * @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty. 
- *        [more datails in this paper](https://arxiv.org/pdf/1909.05858.pdf).
  * @param pad_token_id id of padding token
  * @param bos_token_id id of <bos> token
  * @param eos_token_id id of <eos> token
@@ -90,10 +88,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     // used for chat scenario
     std::string bos_token = "<s>";
     std::string eos_token = "</s>";
-    
-    // speculative sampling
-    std::variant<std::string, ov::CompiledModel, ov::InferRequest> draft_model;  // todo: remove or try to add ov::Model const ov::Model&,
 };
 
-
 } // namespace ov
@@ -3,12 +3,10 @@
 
 #pragma once
 
-#include <filesystem>
 #include <optional>
+#include <variant>
 
-#include <openvino/runtime/infer_request.hpp>
 #include <openvino/core/any.hpp>
-
 #include "openvino/genai/generation_config.hpp"
 #include "openvino/genai/tokenizer.hpp"
 #include "openvino/genai/streamer_base.hpp"
@@ -84,18 +82,24 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     * @param streamer optional streamer
     * @return std::string decoded resulting text
     */
-    std::string generate(std::string text, OptionalGenerationConfig generation_config, OptionalStreamerVariant streamer);
+    std::string generate(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt);
 
-
     template <typename... Properties>
     util::EnableIfAllStringAny<std::string, Properties...> generate(
         std::string text,
         Properties&&... properties) {
         return generate(text, AnyMap{std::forward<Properties>(properties)...});
     }
-
     std::string generate(std::string text, const ov::AnyMap& config);
 
+    template <typename... Properties>
+    util::EnableIfAllStringAny<EncodedResults, Properties...> generate(
+        ov::Tensor input_ids,
+        Properties&&... properties) {
+        return generate(input_ids, AnyMap{std::forward<Properties>(properties)...});
+    }
+    EncodedResults generate(ov::Tensor input_ids, const ov::AnyMap& config);
+
     /**
     * @brief High level generate for batched prompts which encodes inputs and returns decoded outputs. 
     * Streamer cannot be used for multibatch inputs.
@@ -119,16 +123,22 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     */
     EncodedResults generate(ov::Tensor input_ids, 
                             std::optional<ov::Tensor> attention_mask, 
-                            OptionalGenerationConfig generation_config,
-                            OptionalStreamerVariant streamer);
-
-    std::string operator()(std::string text, OptionalGenerationConfig generation_config);
-    DecodedResults operator()(std::vector<std::string> text, OptionalGenerationConfig generation_config);
-    DecodedResults operator()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config);
+                            OptionalGenerationConfig generation_config=nullopt,
+                            OptionalStreamerVariant streamer=nullopt);
+    
+    template <typename InputsType, typename... Properties>
+    util::EnableIfAllStringAny<std::string, Properties...> operator()(
+        InputsType text,
+        Properties&&... properties) {
+        return generate(text, AnyMap{std::forward<Properties>(properties)...});
+    }
+    
+    DecodedResults operator()(std::vector<std::string> text, OptionalGenerationConfig generation_config=nullopt);
+    DecodedResults operator()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config=nullopt);
 
     // generate with streamers
-    std::string operator()(std::string text, OptionalGenerationConfig generation_config, StreamerVariant streamer);
-    std::string operator()(std::string text, StreamerVariant streamer);
+    std::string operator()(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt);
+    std::string operator()(std::string text, OptionalStreamerVariant streamer);
 
     ov::Tokenizer get_tokenizer();
     GenerationConfig get_generation_config() const;
@@ -143,9 +153,40 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
     std::unique_ptr<LLMPipelineImpl> m_pimpl;
 };
 
+/*
+ * utils that allow to use generate and operarator() in the folllowing way:
+ * pipe.generate(input_ids, ov::max_new_tokens(200), ov::temperature(1.0f),...)
+ * pipe(text, ov::max_new_tokens(200), ov::temperature(1.0f),...)
+ * All names match to names in cofnig except streamer.
+*/
 static constexpr ov::Property<size_t> max_new_tokens{"max_new_tokens"};
+static constexpr ov::Property<size_t> max_length{"max_length"};
+static constexpr ov::Property<bool> ignore_eos{"ignore_eos"};
+
+static constexpr ov::Property<size_t> num_beam_groups{"num_beam_groups"};
+static constexpr ov::Property<size_t> num_beams{"num_beams"};
+static constexpr ov::Property<float> diversity_penalty{"diversity_penalty"};
+static constexpr ov::Property<float> length_penalty{"length_penalty"};
+static constexpr ov::Property<size_t> num_return_sequences{"num_return_sequences"};
+static constexpr ov::Property<size_t> no_repeat_ngram_size{"no_repeat_ngram_size"};
+static constexpr ov::Property<StopCriteria> stop_criteria{"stop_criteria"};
+
 static constexpr ov::Property<float> temperature{"temperature"};
-static constexpr ov::Property<std::function<void (std::string)>> streamer_lambda{"streamer_lambda"};
-static constexpr ov::Property<std::shared_ptr<StreamerBase>> streamer{"streamer"};
+static constexpr ov::Property<float> top_p{"top_p"};
+static constexpr ov::Property<int> top_k{"top_k"};
+static constexpr ov::Property<bool> do_sample{"do_sample"};
+static constexpr ov::Property<float> repetition_penalty{"repetition_penalty"};
+
+
+static constexpr ov::Property<int64_t> pad_token_id{"pad_token_id"};
+static constexpr ov::Property<int64_t> bos_token_id{"bos_token_id"};
+static constexpr ov::Property<int64_t> eos_token_id{"eos_token_id"};
+    
+static constexpr ov::Property<std::string> bos_token{"bos_token"};
+static constexpr ov::Property<std::string> eos_token{"eos_token"};
+
+// only lambda streamer can be set via ov::streamer(),... syntaxic sugar,
+// because std::variant<StremaerBase, std::function<>> can not be stored in AnyMap
+static constexpr ov::Property<std::function<void (std::string)>> streamer_lambda{"streamer"};
 
 } // namespace ov
@@ -7,10 +7,21 @@
 
 namespace ov {
 
+/** 
+ * @brief base class for streamers. In order to use inherit from from this class and inplement put, and methods
+ * 
+ * @param m_tokenizer tokenizer
+*/
 class StreamerBase {
 public:
+    Tokenizer m_tokenizer;
+    StreamerBase(Tokenizer tokenizer): m_tokenizer(tokenizer) {};
+    StreamerBase() = default;
+    
+    /// @brief put is called every time new token is decoded
     virtual void put(int64_t token) = 0;
-
+    
+    /// @brief end is called at the end of generation. It can be used to flush cache if your own streamer has one
     virtual void end() = 0;
 };
 
 
@@ -3,20 +3,16 @@
 
 #pragma once
 
-#include <filesystem>
 #include <string>
 #include <vector>
-#include <memory>
 #include <initializer_list>
-
 #include <openvino/runtime/tensor.hpp>
-
 #include "openvino/genai/visibility.hpp"
 
 namespace ov {
 
 /**
-* @brief class used to encode prompts and decode resulting tokens
+* @brief class is used to encode prompts and decode resulting tokens
 */
 class OPENVINO_GENAI_EXPORTS Tokenizer {
 public:
@@ -25,21 +21,22 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @param tokenizer_path openvino_tokenizer.xml and openvino_detokenizer.xml should be located in the tokenizer_path
     * @param device device. Currently only 'CPU' is supported
     */
-    Tokenizer(const std::string tokenizers_path, const std::string device="CPU");
+    Tokenizer(const std::string& tokenizers_path, const std::string& device="CPU");
 
     /**
     * @brief encode a single prompt
     * @return pair of [input_ids, attention_mask]
     */
-    std::pair<ov::Tensor, ov::Tensor> encode(std::string prompt);
+    std::pair<ov::Tensor, ov::Tensor> encode(const std::string prompt);
 
     /**
     * @brief encode batch of prompts. Left padding will be applied by default
     * @param prompts vector storing batch of prompts
     * @return pair of [input_ids, attention_mask]
     */
-    std::pair<ov::Tensor, ov::Tensor> encode(std::vector<std::string> prompts);
-    std::pair<ov::Tensor, ov::Tensor> encode(std::initializer_list<std::string> prompts);
+    std::pair<ov::Tensor, ov::Tensor> encode(std::vector<std::string>& prompts);
+    std::pair<ov::Tensor, ov::Tensor> encode(std::vector<std::string>&& prompts);
+    std::pair<ov::Tensor, ov::Tensor> encode(std::initializer_list<std::string>& prompts);
 
     /**
     * @brief decode sequence of tokens
@@ -62,8 +59,17 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     */
     std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens);
 
-    int64_t m_bos_token_id = 1;  // todo: read from rt_info
-    int64_t m_eos_token_id = 2;  // todo: read from rt_info
+    // information about <bos>, <eos> tokens should be public,
+    // they are used at least in StreamerBase descendants
+    int64_t get_bos_token_id() const;
+    int64_t get_eos_token_id() const;
+    int64_t get_pad_token_id() const;
+    
+    // Also need write access to set these tokens when they are not successfully read from xml rt_info.
+    // In the latter case values can be read from config.json in LLMPipeline
+    void set_bos_token_id(int64_t);
+    void set_eos_token_id(int64_t);
+    void set_pad_token_id(int64_t);
 
     Tokenizer() = default;
     ~Tokenizer();