From a9adb6692c340d4a5485188ce3e1f9b6851865bb Mon Sep 17 00:00:00 2001
From: Adrian Boguszewski <adrian.boguszewski@intel.com>
Date: Mon, 20 Jan 2025 15:49:37 +0100
Subject: [PATCH 01/25] Moved files, updated readme

---
 ai_ref_kits/agentic_llm_rag/README.md         |  16 ++++++++--------
 ai_ref_kits/agentic_llm_rag/app.py            |   2 +-
 .../paint_concierge_personality.yaml          |   0
 .../store_employee_personality.yaml           |  18 +++++++++---------
 .../{ => data}/Sample_Prompts.txt             |   0
 .../{ => data}/test_painting_llm_rag.pdf      | Bin
 .../{create_tools.py => tools.py}             |   0
 7 files changed, 18 insertions(+), 18 deletions(-)
 rename ai_ref_kits/agentic_llm_rag/{ => config}/paint_concierge_personality.yaml (100%)
 rename ai_ref_kits/agentic_llm_rag/{ => config}/store_employee_personality.yaml (99%)
 rename ai_ref_kits/agentic_llm_rag/{ => data}/Sample_Prompts.txt (100%)
 rename ai_ref_kits/agentic_llm_rag/{ => data}/test_painting_llm_rag.pdf (100%)
 rename ai_ref_kits/agentic_llm_rag/{create_tools.py => tools.py} (100%)

diff --git a/ai_ref_kits/agentic_llm_rag/README.md b/ai_ref_kits/agentic_llm_rag/README.md
index 9edb9528..c15b100b 100644
--- a/ai_ref_kits/agentic_llm_rag/README.md
+++ b/ai_ref_kits/agentic_llm_rag/README.md
@@ -152,12 +152,12 @@ _NOTE: This application requires more than 16GB of memory because the models are
 
 For the python script, you must include the following model directory arguments.
 
-- `--personality path/to/personality.txt`: The path to your custom personality txt file (for example, `personality.txt`).  
+- `--personality path/to/personality.yaml`: The path to your custom personality yaml file (for example, `config/personality.yaml`).  
 This file defines the assistant's personality, including instructions, system configuration, and greeting prompts. You can create and specify your own custom personality file.
 
 - `--chat_model path/to/chat_model`: The path to your chat model directory (for example, `model/llama3.1-8B-INT4`) that drives conversation flow and response generation.
 
-- `--rag_pdf`: The path to the document (for example, test_painting_llm_rag.pdf) that contains additional knowledge for Retrieval-Augmented Generation (RAG).
+- `--rag_pdf`: The path to the document (for example, `data/test_painting_llm_rag.pdf`) that contains additional knowledge for Retrieval-Augmented Generation (RAG).
 
 - `--embedding_model path/to/embedding_model`: The path to your embedding model directory (for example, `model/bge-small-FP32`) for understanding and matching text inputs.
 
@@ -166,16 +166,16 @@ This file defines the assistant's personality, including instructions, system co
 To run the application, execute the `app.py` script with the following command. Make sure to include all necessary model directory arguments.
 ```shell
 python app.py \
-  --personality personality.txt \
+  --personality path/to/personality.yaml \
   --chat_model path/to/chat_model \
   --embedding_model path/to/embedding_model \
   --rag_pdf path/to/rag_document \  
   --public
 ```
 
-### Create a Custom TXT Personality File
+### Create a Custom YAML Personality File
 
-You can create a personality file for your virtual AI assistant as a TXT file. Each personality can be customized based on the specific role of the assistant.
+You can create a personality file for your virtual AI assistant as a YAML file. Each personality can be customized based on the specific role of the assistant.
 
 #### Components of a Personality File
 
@@ -185,11 +185,11 @@ A typical personality file has the following sections:
 2. **System Configuration**: Instructions that define the assistant's behavior and limitations.
 3. **Greet the User Prompt**: The first interaction when the assistant introduces itself.
 
-#### Tips for Creating the TXT File 
+#### Tips for Creating the YAML File 
 
-The TXT file _instructions_ section should provide an introduction to the assistant, the title of the assistant, and important notes for the user. It should be clear and concise, and give users context for how to interact with the assistant.
+The YAML file _instructions_ section should provide an introduction to the assistant, the title of the assistant, and important notes for the user. It should be clear and concise, and give users context for how to interact with the assistant.
 
-```txt
+```yaml
 instructions: | 
   # [Assistant Name]: [Brief Role Description]
 
diff --git a/ai_ref_kits/agentic_llm_rag/app.py b/ai_ref_kits/agentic_llm_rag/app.py
index 2faaf804..ab201015 100644
--- a/ai_ref_kits/agentic_llm_rag/app.py
+++ b/ai_ref_kits/agentic_llm_rag/app.py
@@ -24,7 +24,7 @@
 from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
 from llama_index.llms.openvino import OpenVINOLLM
 
-from create_tools import Math, PaintCostCalculator
+from tools import Math, PaintCostCalculator
 
 # Initialize logging
 logging.basicConfig(level=logging.INFO)
diff --git a/ai_ref_kits/agentic_llm_rag/paint_concierge_personality.yaml b/ai_ref_kits/agentic_llm_rag/config/paint_concierge_personality.yaml
similarity index 100%
rename from ai_ref_kits/agentic_llm_rag/paint_concierge_personality.yaml
rename to ai_ref_kits/agentic_llm_rag/config/paint_concierge_personality.yaml
diff --git a/ai_ref_kits/agentic_llm_rag/store_employee_personality.yaml b/ai_ref_kits/agentic_llm_rag/config/store_employee_personality.yaml
similarity index 99%
rename from ai_ref_kits/agentic_llm_rag/store_employee_personality.yaml
rename to ai_ref_kits/agentic_llm_rag/config/store_employee_personality.yaml
index f7d7f6f3..bacaf3ac 100644
--- a/ai_ref_kits/agentic_llm_rag/store_employee_personality.yaml
+++ b/ai_ref_kits/agentic_llm_rag/config/store_employee_personality.yaml
@@ -1,9 +1,9 @@
-system_configuration: >
-  You are a helpful, respectful, and knowledgeable Paint Employee Concierge working at a retail store, designed to help new employees with their onboarding experience and complex questions from customers.
-  Your role is to assist new employees with inquiries about paint suggestions, price details, supply calculations, and product recommendations based on the knowledge and documents provided to you.
-  You may be asked to test employees' knowledge on paint, or guide them towards accurate answers.
-  You are strongly encouraged to use various tools that have been provided to you, including the vector_search tool, various math tools, and a paint calculation tool (calculate_paint_cost tool).
-  Answer questions with the information available from the RAG document (vector_search tool) or other shared knowledge, but if you're unsure or don't have specific details, politely inform the customer to check with senior store staff or the official product website for further information.
-  When answering questions about the number of gallons of paint needed or the cost of paint or similar queries, always consult the RAG document first.
-  Do not assume or provide any speculative information or estimates outside the shared knowledge base. Always encourage junior employees to verify with store associates for unavailable or unknown details.
-  Do not ask for personal information or provide any responses that are inappropriate or unethical. Always remain professional, empathetic, and polite.
+system_configuration: >
+  You are a helpful, respectful, and knowledgeable Paint Employee Concierge working at a retail store, designed to help new employees with their onboarding experience and complex questions from customers.
+  Your role is to assist new employees with inquiries about paint suggestions, price details, supply calculations, and product recommendations based on the knowledge and documents provided to you.
+  You may be asked to test employees' knowledge on paint, or guide them towards accurate answers.
+  You are strongly encouraged to use various tools that have been provided to you, including the vector_search tool, various math tools, and a paint calculation tool (calculate_paint_cost tool).
+  Answer questions with the information available from the RAG document (vector_search tool) or other shared knowledge, but if you're unsure or don't have specific details, politely inform the customer to check with senior store staff or the official product website for further information.
+  When answering questions about the number of gallons of paint needed or the cost of paint or similar queries, always consult the RAG document first.
+  Do not assume or provide any speculative information or estimates outside the shared knowledge base. Always encourage junior employees to verify with store associates for unavailable or unknown details.
+  Do not ask for personal information or provide any responses that are inappropriate or unethical. Always remain professional, empathetic, and polite.
diff --git a/ai_ref_kits/agentic_llm_rag/Sample_Prompts.txt b/ai_ref_kits/agentic_llm_rag/data/Sample_Prompts.txt
similarity index 100%
rename from ai_ref_kits/agentic_llm_rag/Sample_Prompts.txt
rename to ai_ref_kits/agentic_llm_rag/data/Sample_Prompts.txt
diff --git a/ai_ref_kits/agentic_llm_rag/test_painting_llm_rag.pdf b/ai_ref_kits/agentic_llm_rag/data/test_painting_llm_rag.pdf
similarity index 100%
rename from ai_ref_kits/agentic_llm_rag/test_painting_llm_rag.pdf
rename to ai_ref_kits/agentic_llm_rag/data/test_painting_llm_rag.pdf
diff --git a/ai_ref_kits/agentic_llm_rag/create_tools.py b/ai_ref_kits/agentic_llm_rag/tools.py
similarity index 100%
rename from ai_ref_kits/agentic_llm_rag/create_tools.py
rename to ai_ref_kits/agentic_llm_rag/tools.py

From 9b340b3aa48cfc90fe2ab905e340e6e56a06d378 Mon Sep 17 00:00:00 2001
From: Adrian Boguszewski <adrian.boguszewski@intel.com>
Date: Mon, 20 Jan 2025 15:53:45 +0100
Subject: [PATCH 02/25] Moved code to main function

---
 ai_ref_kits/agentic_llm_rag/app.py | 43 +++++++++++++++++-------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/ai_ref_kits/agentic_llm_rag/app.py b/ai_ref_kits/agentic_llm_rag/app.py
index ab201015..ffef12f8 100644
--- a/ai_ref_kits/agentic_llm_rag/app.py
+++ b/ai_ref_kits/agentic_llm_rag/app.py
@@ -279,18 +279,9 @@ def run():
     run()
 
 
-if __name__ == "__main__":
-    # Define the argument parser at the end
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--chat_model", type=str, default="model/llama3.1-8B-INT4", help="Path to the chat model directory")
-    parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
-    parser.add_argument("--rag_pdf", type=str, default="test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")
-    parser.add_argument("--personality", type=str, default="paint_concierge_personality.yaml", help="Path to the yaml file with chatbot personality")
-
-    args = parser.parse_args()
-
+def main(chat_model: str, embedding_model: str, rag_pdf: str, personality: str):
     # Load models and embedding based on parsed arguments
-    llm, embedding = setup_models(args.chat_model, args.embedding_model)
+    llm, embedding = setup_models(chat_model, embedding_model)
 
     Settings.embed_model = embedding
     Settings.llm = llm
@@ -299,7 +290,7 @@ def run():
     multiply_tool, divide_tool, add_tool, subtract_tool, paint_cost_calculator = setup_tools()
 
     # Step 4: Load documents and create the VectorStoreIndex
-    text_example_en_path = Path(args.rag_pdf)
+    text_example_en_path = Path(rag_pdf)
     index = load_documents(text_example_en_path)
     log.info(f"loading in {index}")
     vector_tool = QueryEngineTool(
@@ -314,20 +305,34 @@ def run():
     nest_asyncio.apply()
 
     # Load agent config
-    personality_file_path = Path(args.personality)
+    personality_file_path = Path(personality)
 
     with open(personality_file_path, "rb") as f:
         chatbot_config = yaml.safe_load(f)
 
     react_system_prompt = PromptTemplate(chatbot_config['system_configuration'])
     log.info(f"react_system_prompt {react_system_prompt}")
-    #Define agent and available tools
-    agent = ReActAgent.from_tools([multiply_tool, divide_tool, add_tool, subtract_tool, paint_cost_calculator, vector_tool],
-                                  llm=llm,
-                                  max_iterations=10,  # Set a max_iterations value
-                                  handle_reasoning_failure_fn=custom_handle_reasoning_failure,
-                                  verbose=True)
+    # Define agent and available tools
+    agent = ReActAgent.from_tools(
+        [multiply_tool, divide_tool, add_tool, subtract_tool, paint_cost_calculator, vector_tool],
+        llm=llm,
+        max_iterations=10,  # Set a max_iterations value
+        handle_reasoning_failure_fn=custom_handle_reasoning_failure,
+        verbose=True)
     agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})
 
     # Step 6: Run the app
     run_app(agent)
+
+
+if __name__ == "__main__":
+    # Define the argument parser at the end
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--chat_model", type=str, default="model/llama3.2-3B-INT4", help="Path to the chat model directory")
+    parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
+    parser.add_argument("--rag_pdf", type=str, default="test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")
+    parser.add_argument("--personality", type=str, default="paint_concierge_personality.yaml", help="Path to the yaml file with chatbot personality")
+
+    args = parser.parse_args()
+
+    main(args.chat_model, args.embedding_model, args.rag_pdf, args.personality)

From 09cc8210b49605d060571c52e64e4873f94e39f1 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 27 Feb 2025 14:47:56 -0700
Subject: [PATCH 03/25] Agentic LLM RAG: Fix issues, improve performance, and
 add shopping cart feature

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/README.md         | 103 ++---
 ai_ref_kits/agentic_llm_rag/app.py            | 390 +++++++++++-------
 .../convert_and_optimize_llm.py               |  26 +-
 ai_ref_kits/agentic_llm_rag/css/gradio.css    | 153 +++++++
 .../agentic_llm_rag/data/Sample_Prompts.txt   |  43 +-
 ai_ref_kits/agentic_llm_rag/system_prompt.py  |  58 +++
 ai_ref_kits/agentic_llm_rag/tools.py          | 120 +++++-
 7 files changed, 624 insertions(+), 269 deletions(-)
 create mode 100644 ai_ref_kits/agentic_llm_rag/css/gradio.css
 create mode 100644 ai_ref_kits/agentic_llm_rag/system_prompt.py

diff --git a/ai_ref_kits/agentic_llm_rag/README.md b/ai_ref_kits/agentic_llm_rag/README.md
index c15b100b..a35e36fe 100644
--- a/ai_ref_kits/agentic_llm_rag/README.md
+++ b/ai_ref_kits/agentic_llm_rag/README.md
@@ -1,35 +1,34 @@
 <div id="top" align="center">
   <h1>AI Insight Agent with RAG</h1>
   <h4>
-    <a href="https://www.intel.com/content/www/us/en/developer/topic-technology/edge-5g/open-potential.html">🏠&nbsp;About&nbsp;the&nbsp;Kits&nbsp;·</a>
-    <a href="">👨‍💻&nbsp;Code&nbsp;Demo&nbsp;Video</a>
+    <a href="https://www.intel.com/content/www/us/en/developer/topic-technology/edge-5g/open-potential.html">🏠&nbsp;About&nbsp;the&nbsp;Kits&nbsp;</a>
+    <!-- <a href="">👨‍💻&nbsp;Code&nbsp;Demo&nbsp;Video</a> -->
   </h4>
 </div>
 
 [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](https://github.com/openvinotoolkit/openvino_build_deploy/blob/master/LICENSE.txt)
 
-The AI Insight Agent with RAG uses Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) to interpret user prompts, engage in meaningful dialogue, perform calculations, and deliver spoken responses through advanced speech recognition. This solution uses the OpenVINO™ toolkit to power a streamlined, voice-activated interface. Designed for both consumers and employees, it functions as a smart, personalized retail assistant, offering an interactive and user-friendly experience similar to an advanced digital kiosk.
+<p align="center">
+  <img src="https://github.com/user-attachments/assets/dd626685-7aa6-4e67-a929-5e9be2982800" width="500">
+</p>
+
+The AI Insight Agent with RAG uses Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) to interpret user prompts, engage in meaningful dialogue, perform calculations, use RAG techniques to improve its knowledge and interact with the user to add items to a virtual shopping cart. This solution uses the OpenVINO™ toolkit to power the AI models at the edge. Designed for both consumers and employees, it functions as a smart, personalized retail assistant, offering an interactive and user-friendly experience similar to an advanced digital kiosk.
 
 This kit uses the following technology stack:
 - [OpenVINO Toolkit](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) ([docs](https://docs.openvino.ai/))
-- [Llama](https://llama.meta.com/llama3/)
-- [Whisper](https://openai.com/index/whisper/)
+- [Qwen2-7B-Instruct](https://huggingface.co/Qwen)
+- [bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5)
 - [Gradio interface](https://www.gradio.app/docs/gradio/chatinterface)
 
 Check out our [AI Reference Kits repository](/) for other kits.
 
-![ai-insight-agent-with-rag](https://github.com/user-attachments/assets/1a7ca6bc-3bde-4e97-be61-83a3709e9b73)
-
-### What's New
-
-New updates will be added to this contents list.
+![ai-insight-agent-with-rag](https://github.com/user-attachments/assets/da97bea7-29e8-497f-b7ba-4e00c79773f1)
 
 <details open><summary><b>Table of Contents</b></summary>
   
 - [Getting Started](#get-started)
   - [Installing Prerequisites](#install-prerequisites)
-  - [Setting Up Your Environment](#set-up-your-environment)
-  - [Accessing Llama](#get-access-to-llama)
+  - [Setting Up Your Environment](#set-up-your-environment)  
   - [Converting and Optimizing the Model](*convert-and-optimize-the-model)
   - [Running the Application](#run-the-application)
 - [Additional Resources](#additional-resources)
@@ -38,7 +37,7 @@ New updates will be added to this contents list.
 
 # Getting Started
 
-To get started with the AI Insight Agent with RAG, you install Python, set up your environment, and then you can run the application. We recommend using Ubuntu to set up and run this project.
+To get started with the AI Insight Agent with RAG, you install Python, set up your environment, and then you can run the application. We recommend using Ubuntu 24.04 to set up and run this project.
 
 ## Installing Prerequisites
 
@@ -107,39 +106,30 @@ To install the required packages, run the following commands:
 python -m pip install --upgrade pip 
 pip install -r requirements.txt
 ```
-## Accessing Llama
-
-_NOTE: If you already have access to the Llama model weights, you can proceed to the authentication step, which is mandatory to convert the Llama model._
-
 ## Converting and Optimizing the Model
 
-The application uses three separate models. Each model requires conversion and optimization for use with OpenVINO™. The following process includes a step to convert and optimize each model.
+The application uses 2 separate models. Each model requires conversion and optimization for use with OpenVINO™. The following process includes a step to convert and optimize each model.
 
 _NOTE: This reference kit requires more than 8GB of bandwidth and disk space for downloading models. Because of the large model size, when you run the kit for the first time, the conversion can take more than two hours and require more than  32GB of memory. After the first run, the subsequent runs should finish much faster._
 
 ## Chat Model and Embedding Model Conversion
   
-The _chat model_ is the core of the chatbot's ability to generate meaningful and context-aware responses. It processes the text input from the ASR model and produces a human-like response.  
+The _chat model_ is the core of the chatbot's ability to generate meaningful and context-aware responses.
 
 The _embedding model_ represents text data (both user queries and potential responses or knowledge base entries) as numerical vectors. These vectors are essential for tasks such as semantic search and similarity matching.
 
 This conversion script handles the conversion and optimization of:
 
-- The chat model (`llama3.1-8B`) with `int4` precision.
+- The chat model (`qwen2-7B`) with `int4` precision.
 - The embedding model (`bge-large`) with `FP32` precision.
 
-Before you can run the script to convert the models, you must have a Hugging Face token (`--hf_token`) for authentication, which allows you to get access to gated models, such as Llama. After the models are converted, they’re saved to the model directory you specify when you run the script.
+After the models are converted, they’re saved to the model directory you specify when you run the script.
 
-To get access to the original Llama model weights:
-1. Go to the Llama model page on Hugging Face [meta-llama/Meta-Llama 3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B).  
-2. Read and accept the license agreement.  
 _Requests can take up to one hour to process._
 
-After you get access to the Llama model weights, you can convert the chat and embedding models.
-
 To convert the chat and embedding models, run:
 ```shell
-python convert_and_optimize_llm.py --chat_model_type llama3.1-8B --embedding_model_type bge-large --precision int4 --hf_token <your_huggingface_token> --model_dir model
+python convert_and_optimize_llm.py --chat_model_type qwen2-7B --embedding_model_type bge-large --precision int4 --model_dir model
 ```
 
 After you run the conversion scripts, you can run `app.py` to launch the application.
@@ -150,56 +140,43 @@ To run the AI Insight Agent with RAG application, you execute the following pyth
 
 _NOTE: This application requires more than 16GB of memory because the models are very large (especially the chatbot model). If you have a less powerful device, the application might also run slowly._
 
-For the python script, you must include the following model directory arguments.
+After that, you should be able to run the application with default values:
+
+```shell
+python app.py
+```
 
-- `--personality path/to/personality.yaml`: The path to your custom personality yaml file (for example, `config/personality.yaml`).  
-This file defines the assistant's personality, including instructions, system configuration, and greeting prompts. You can create and specify your own custom personality file.
+For more settings, you can change the argument values:
 
-- `--chat_model path/to/chat_model`: The path to your chat model directory (for example, `model/llama3.1-8B-INT4`) that drives conversation flow and response generation.
+- `--chat_model`: The path to your chat model directory (for example, `model/qwen2-7B-INT4`) that drives conversation flow and response generation.
 
 - `--rag_pdf`: The path to the document (for example, `data/test_painting_llm_rag.pdf`) that contains additional knowledge for Retrieval-Augmented Generation (RAG).
 
-- `--embedding_model path/to/embedding_model`: The path to your embedding model directory (for example, `model/bge-small-FP32`) for understanding and matching text inputs.
+- `--embedding_model`: The path to your embedding model directory (for example, `model/bge-small-FP32`) for understanding and matching text inputs.
+
+- `--device`: Include this flag to select the inference device for both models. (for example, `CPU`). If you have access to a dedicated GPU (ARC, Flex), you can change the value to `GPU.1`. Possible values: `CPU,GPU,GPU.1,NPU`
 
 - `--public`: Include this flag to make the Gradio interface publicly accessible over the network. Without this flag, the interface will only be available on your local machine.
 
 To run the application, execute the `app.py` script with the following command. Make sure to include all necessary model directory arguments.
 ```shell
-python app.py \
-  --personality path/to/personality.yaml \
-  --chat_model path/to/chat_model \
-  --embedding_model path/to/embedding_model \
-  --rag_pdf path/to/rag_document \  
+python app.py \ 
+  --chat_model model/qwen2-7B-INT4 \
+  --embedding_model data/test_painting_llm_rag.pdf \
+  --rag_pdf model/bge-small-FP32 \  
+  --device GPU.1 \
   --public
 ```
 
-### Create a Custom YAML Personality File
-
-You can create a personality file for your virtual AI assistant as a YAML file. Each personality can be customized based on the specific role of the assistant.
-
-#### Components of a Personality File
-
-A typical personality file has the following sections:
-
-1. **Instructions**: A brief, descriptive title for the assistant.
-2. **System Configuration**: Instructions that define the assistant's behavior and limitations.
-3. **Greet the User Prompt**: The first interaction when the assistant introduces itself.
+### System Prompt Usage in LlamaIndex ReActAgent
 
-#### Tips for Creating the YAML File 
+The LlamaIndex ReActAgent library relies on a default system prompt that provides essential instructions to the LLM for correctly interacting with available tools. This prompt is fundamental for enabling both tool usage and RAG (Retrieval-Augmented Generation) queries.
 
-The YAML file _instructions_ section should provide an introduction to the assistant, the title of the assistant, and important notes for the user. It should be clear and concise, and give users context for how to interact with the assistant.
+#### Important:
+Do not override or modify the default system prompt. Altering it may prevent the LLM from using the tools or executing RAG queries properly.
 
-```yaml
-instructions: | 
-  # [Assistant Name]: [Brief Role Description]
-
-        Instructions for use:  
-        1. Provide a brief step-by-step guide for how the assistant works.  
-        2. Include key points the user should know before they interact with the assistant.  
-        3. Mention any important disclaimers, if applicable.
-
-        **Note: [Add a disclaimer or key note about what the assistant can and cannot do].**
-```
+#### Customizing the Prompt:
+If you need to add extra rules or custom behavior, modify the Additional Rules section located in the system_prompt.py file.
 
 ### Use the Web Interface
 After the script runs, Gradio provides a local URL (typically `http://127.0.0.1:XXXX`) that you can open in your web browser to interact with the assistant. If you configured the application to be accessible publicly, Gradio also provides a public URL.
@@ -212,10 +189,6 @@ When you test the AI Insight Agent with RAG application, you can test both the i
 2. Test text interaction with the application.  
   - Type your question in the text box and press **Enter**.
   _The assistant responds to your question in text form._
-3. To add products to the cart:
-  - Select the products displayed in the interface and specify the quantity.
-  - Click the Add to Cart button to update your cart.
-
 
 For further testing of the AI Insight Agent with RAG appplication, you can engage with the chatbot assistant by asking it questions, or giving it commands that align with the assistant's capabilities. This hands-on experience can help you to understand the assistant's interactive quality and performance.
 
diff --git a/ai_ref_kits/agentic_llm_rag/app.py b/ai_ref_kits/agentic_llm_rag/app.py
index ffef12f8..51c72406 100644
--- a/ai_ref_kits/agentic_llm_rag/app.py
+++ b/ai_ref_kits/agentic_llm_rag/app.py
@@ -23,8 +23,11 @@
 from llama_index.core.tools import QueryEngineTool, ToolMetadata
 from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
 from llama_index.llms.openvino import OpenVINOLLM
-
-from tools import Math, PaintCostCalculator
+from llama_index.core.agent import ReActChatFormatter
+from llama_index.core.llms import MessageRole
+# Agent tools
+from tools import PaintCalculator, ShoppingCart
+from system_prompt import react_system_header_str
 
 # Initialize logging
 logging.basicConfig(level=logging.INFO)
@@ -33,54 +36,98 @@
 #Filter unnecessary warnings for demonstration
 warnings.filterwarnings("ignore")
 
-llm_device = "GPU"
-embedding_device = "GPU"
 ov_config = {
     hints.performance_mode(): hints.PerformanceMode.LATENCY,
     streams.num(): "1",
     props.cache_dir(): ""
 }
 
-
-def phi_completion_to_prompt(completion):
-    return f"<|system|><|end|><|user|>{completion}<|end|><|assistant|>\n"
-
-
-def llama3_completion_to_prompt(completion):
-    return f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{completion}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
-
-
-def setup_models(llm_model_path, embedding_model_path):
-    # Load the Llama model locally
+def setup_models(llm_model_path, embedding_model_path, device):
+    # Load LLM model locally    
     llm = OpenVINOLLM(
         model_id_or_path=str(llm_model_path),
-        context_window=3900,
-        max_new_tokens=1000,
+        context_window=8192,
+        max_new_tokens=500,
         model_kwargs={"ov_config": ov_config},
-        generate_kwargs={"do_sample": False, "temperature": None, "top_p": None},
-        completion_to_prompt=phi_completion_to_prompt if llm_model_path == "Phi-3-mini-4k-instruct-int4-ov" else llama3_completion_to_prompt,
-        device_map=llm_device,
+        generate_kwargs={"do_sample": False, "temperature": 0.1, "top_p": 0.8},        
+        device_map=device,
     )
 
     # Load the embedding model locally
-    embedding = OpenVINOEmbedding(model_id_or_path=embedding_model_path, device=embedding_device)
+    embedding = OpenVINOEmbedding(model_id_or_path=embedding_model_path, device=device)
 
     return llm, embedding
 
 
 def setup_tools():
-    multiply_tool = FunctionTool.from_defaults(fn=Math.multiply)
-    divide_tool = FunctionTool.from_defaults(fn=Math.divide)
-    add_tool = FunctionTool.from_defaults(fn=Math.add)
-    subtract_tool = FunctionTool.from_defaults(fn=Math.add)
-    paint_cost_calculator = FunctionTool.from_defaults(fn=PaintCostCalculator.calculate_paint_cost)
-    return multiply_tool, divide_tool, add_tool, subtract_tool, paint_cost_calculator
+
+    paint_cost_calculator = FunctionTool.from_defaults(
+        fn=PaintCalculator.calculate_paint_cost,
+        name="calculate_paint_cost",
+        description="ALWAYS use this tool when calculating paint cost for a specific area in square feet. Required inputs: area (float, square feet), price_per_gallon (float), add_paint_supply_costs (bool)"
+    )
+
+    paint_gallons_calculator = FunctionTool.from_defaults(
+    fn=PaintCalculator.calculate_paint_gallons_needed,
+    name="calculate_paint_gallons",
+    description="Calculate how many gallons of paint are needed to cover a specific area. Required input: area (float, square feet). Returns the number of gallons needed, rounded up to ensure full coverage."
+)
+
+    add_to_cart_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.add_to_cart,
+        name="add_to_cart",
+        description="""
+        Use this tool WHENEVER a user wants to add any item to their cart or shopping cart.
+        
+        PARAMETERS:
+        - product_name (string): The exact name of the product (e.g., "Premium Latex Paint")
+        - quantity (int): The number of units to add, must be a positive integer (e.g., 2)
+        - price_per_unit (float): The price per unit in dollars (e.g., 24.99)
+        
+        RETURNS:
+        - A confirmation message and updated cart contents
+        
+        EXAMPLES:
+        To add 3 gallons of paint at $29.99 each: add_to_cart(product_name="Interior Eggshell Paint", quantity=3, price_per_unit=29.99)
+        """
+    )
+    
+    get_cart_items_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.get_cart_items,
+        name="view_cart",
+        description="""
+        Use this tool when a user wants to see what's in their shopping cart.
+        No parameters are required.
+        
+        RETURNS:
+        - A list of all items currently in the cart with their details
+        
+        EXAMPLES:
+        To view the current cart contents: view_cart()
+        """
+    )
+    
+    clear_cart_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.clear_cart,
+        name="clear_cart",
+        description="""
+        Use this tool when a user asks to empty or clear their shopping cart.
+        No parameters are required.
+        
+        RETURNS:
+        - A confirmation message that the cart has been cleared
+        
+        EXAMPLES:
+        To empty the shopping cart: clear_cart()
+        """
+    )
+    return paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator
 
 
 def load_documents(text_example_en_path):
-    # Check and download document if not present
+    
     if not text_example_en_path.exists():
-        text_example_en = "test_painting_llm_rag.pdf"  # TBD - Replace with valid URL
+        text_example_en = "test_painting_llm_rag.pdf"
         r = requests.get(text_example_en)
         content = io.BytesIO(r.content)
         with open(text_example_en_path, "wb") as f:
@@ -92,44 +139,6 @@ def load_documents(text_example_en_path):
 
     return index
 
-
-# Function to simulate adding items to cart, and to check size of cart
-def purchase_click(cart, *components):
-    """Update the cart with unique selected items and their quantities."""
-    selected_items = []
-    quantities = []
-
-    for i in range(0, len(components), 2):
-        item_checkbox = components[i]
-        quantity_box = components[i + 1]
-        if item_checkbox is True:  # Fix to check if checkbox is selected
-            selected_items.append(item_components[i // 2][0].label)
-            quantities.append(quantity_box)
-
-    updated_cart = cart.copy()
-    for item, quantity in zip(selected_items, quantities):
-        if item and quantity > 0:
-            item_entry = (item, quantity)
-            # Update or add the item with quantity in the cart
-            for idx, cart_item in enumerate(updated_cart):
-                if cart_item[0] == item:
-                    updated_cart[idx] = item_entry
-                    break
-            else:
-                updated_cart.append(item_entry)
-
-    # Calculate the total quantity of items in the cart
-    cart_size = sum(quantity for _, quantity in updated_cart)
-    # Update purchase action to list items and their quantities
-    if selected_items:
-        item_details = ", ".join([f"{item} (Quantity: {quantity})" for item, quantity in zip(selected_items, quantities)])
-        purchase_action = f"Added the following items to cart: {item_details}."
-    else:
-        purchase_action = "No items selected."
-
-    return updated_cart, purchase_action, cart_size
-
-
 # Custom function to handle reasoning failures
 def custom_handle_reasoning_failure(callback_manager, exception):
     return "Hmm...I didn't quite that. Could you please rephrase your question to be simpler?"
@@ -144,14 +153,46 @@ def __enter__(self):
         def __exit__(self, *args):
             self.extend(self._stringio.getvalue().splitlines())
             del self._stringio
-            sys.stdout = self._stdout
+            sys.stdout = self._stdout        
 
     def _handle_user_message(user_message, history):
         return "", [*history, (user_message, "")]
 
+    def update_cart_display():
+        cart_items = ShoppingCart.get_cart_items()
+        if not cart_items:
+            return "### 🛒 Your Shopping Cart is Empty"
+            
+        table = "### 🛒 Your Shopping Cart\n\n"
+        table += "<table>\n"
+        table += "  <thead>\n"
+        table += "    <tr>\n"
+        table += "      <th>Product</th>\n"
+        table += "      <th>Qty</th>\n"
+        table += "      <th>Price</th>\n"
+        table += "      <th>Total</th>\n"
+        table += "    </tr>\n"
+        table += "  </thead>\n"
+        table += "  <tbody>\n"
+            
+        for item in cart_items:
+            table += "    <tr>\n"
+            table += f"      <td>{item['product_name']}</td>\n"
+            table += f"      <td>{item['quantity']}</td>\n"
+            table += f"      <td>${item['price_per_unit']:.2f}</td>\n"
+            table += f"      <td>${item['total_price']:.2f}</td>\n"
+            table += "    </tr>\n"
+            
+        table += "  </tbody>\n"
+        table += "</table>\n"
+        
+        total = sum(item["total_price"] for item in cart_items)
+        table += f"\n**Total: ${total:.2f}**"
+        return table
 
     def _generate_response(chat_history, log_history):
-        log.info(f"log_history {log_history}")
+        log.info(f"log_history {log_history}")           
+        
         if not isinstance(log_history, list):
             log_history = []
 
@@ -164,15 +205,28 @@ def _generate_response(chat_history, log_history):
                 response = agent.stream_chat(chat_history[-1][0])
             except ValueError:
                 response = agent.stream_chat(chat_history[-1][0])
+        formatted_output = []
+        for line in output:
+            if "Thought:" in line:
+                formatted_output.append("\n🤔 **Thought:**\n" + line.split("Thought:", 1)[1])
+            elif "Action:" in line:
+                formatted_output.append("\n🔧 **Action:**\n" + line.split("Action:", 1)[1])
+            elif "Action Input:" in line:
+                formatted_output.append("\n📥 **Input:**\n" + line.split("Action Input:", 1)[1])
+            elif "Observation:" in line:
+                formatted_output.append("\n📋 **Result:**\n" + line.split("Observation:", 1)[1])
+            else:
+                formatted_output.append(line)
         end_thought_time = time.time()
         thought_process_time = end_thought_time - start_thought_time
 
         # After response is complete, show the captured logs in the log area
-        log_entries = "\n".join(output)
+        log_entries = "\n".join(formatted_output)
+        log_history.append("### 🤔 Agent's Thought Process")
         thought_process_log = f"Thought Process Time: {thought_process_time:.2f} seconds"
         log_history.append(f"{log_entries}\n{thought_process_log}")
-
-        yield chat_history, "\n".join(log_history)  # Yield after the thought process time is captured
+        cart_content = update_cart_display() # update shopping cart
+        yield chat_history, "\n".join(log_history), cart_content  # Yield after the thought process time is captured
 
         # Now capture response generation time
         start_response_time = time.time()
@@ -188,7 +242,7 @@ def _generate_response(chat_history, log_history):
                 chat_history[-1][1] += token.split()[1] + " "
             else:
                 chat_history[-1][1] += token
-            yield chat_history, "\n".join(log_history)  # Ensure log_history is a string
+            yield chat_history, "\n".join(log_history), cart_content  # Ensure log_history is a string
             if i <= 2: i += 1
 
         end_response_time = time.time()
@@ -196,143 +250,181 @@ def _generate_response(chat_history, log_history):
 
         # Log tokens per second along with the device information
         tokens = len(chat_history[-1][1].split(" ")) * 4 / 3  # Convert words to approx token count
-        response_log = f"Response Time: {response_time:.2f} seconds ({tokens / response_time:.2f} tokens/s on {llm_device})"
+        response_log = f"Response Time: {response_time:.2f} seconds ({tokens / response_time:.2f} tokens/s)"
 
         log.info(response_log)
 
         # Append the response time to log history
         log_history.append(response_log)
-        yield chat_history, "\n".join(log_history)  # Join logs into a string for display
+        yield chat_history, "\n".join(log_history), cart_content  # Join logs into a string for display
 
     def _reset_chat():
         agent.reset()
-        return "", [], []  # Reset both chat and logs (initialize log as empty list)
+        ShoppingCart._cart_items = []
+        return "", [], "🤔 Agent's Thought Process", update_cart_display()
 
     def run():
-        with gr.Blocks() as demo:
-            gr.Markdown("# Smart Retail Assistant 🤖: Agentic LLMs with RAG 💭")
-            gr.Markdown("Ask me about paint! 🎨")
+        custom_css = ""
+        try:
+            with open("css/gradio.css", "r") as css_file:
+                custom_css = css_file.read()            
+        except Exception as e:            
+            log.warning(f"Could not load CSS file: {e}")
+
+        theme = gr.themes.Default(
+            primary_hue="blue",
+            font=[gr.themes.GoogleFont("Montserrat"), "ui-sans-serif", "sans-serif"],
+        )
+
+        with gr.Blocks(theme=theme, css=custom_css) as demo:
+
+            header = gr.HTML(
+                        "<div class='intel-header-wrapper'>"
+                        "  <div class='intel-header'>"
+                        "    <img src='https://www.intel.com/content/dam/logos/intel-header-logo.svg' class='intel-logo'></img>"
+                        "    <div class='intel-title'>Smart Retail Assistant 🤖: Agentic LLMs with RAG 💭</div>"
+                        "  </div>"
+                        "</div>"
+            )
 
             with gr.Row():
                 chat_window = gr.Chatbot(
                     label="Paint Purchase Helper",
                     avatar_images=(None, "https://docs.openvino.ai/2024/_static/favicon.ico"),
-                                height=400,  # Adjust height as per your preference
+                    height=400,  # Adjust height as per your preference
                     scale=2  # Set a higher scale value for Chatbot to make it wider
                     #autoscroll=True,  # Enable auto-scrolling for better UX
+                )            
+                log_window = gr.Markdown(                                                                    
+                        show_label=True,                        
+                        value="### 🤔 Agent's Thought Process",
+                        height=400,                        
+                        elem_id="agent-steps"
                 )
-                log_window = gr.Code(
-                    label="Agent's Steps",
-                    language="python",
-                    interactive=False,
-                    scale=1  # Set lower scale to make it narrower than the Chatbot
+                cart_display = gr.Markdown(
+                    value=update_cart_display(),
+                    elem_id="shopping-cart",
+                    height=400
                 )
-            with gr.Row():
-                message = gr.Textbox(label="Ask the Paint Expert", scale=4, placeholder="Type your prompt/Question and press Enter")
-                clear = gr.ClearButton()
 
+            with gr.Row():
+                message = gr.Textbox(label="Ask the Paint Expert 🎨", scale=4, placeholder="Type your prompt/Question and press Enter")
+
+                with gr.Column(scale=1):
+                    submit_btn = gr.Button("Submit", variant="primary")
+                    clear = gr.ClearButton()
+                          
+            sample_questions = [
+                "what paint is the best for kitchens?",
+                "what is the price of it?",
+                "how many gallons of paint do I need to cover 600 sq ft ?",
+                "add them to my cart",
+                "what else do I need to complete my project?",
+                "add 2 brushes to my cart",
+                "create a table with paint products sorted by price",
+                "view my cart",
+                "clear shopping cart",
+                "I have a room 1000 sqft, I'm looking for supplies to paint the room"              
+            ]
+            gr.Examples(
+                examples=sample_questions,
+                inputs=message, 
+                label="Examples"
+            )                     
+            
             # Ensure that individual components are passed
             message.submit(
+                _handle_user_message,
+                inputs=[message, chat_window],
+                outputs=[message, chat_window],
+                queue=False                
+            ).then(
+                _generate_response,
+                inputs=[chat_window, log_window],
+                outputs=[chat_window, log_window, cart_display],
+            )
+
+            submit_btn.click(
                 _handle_user_message,
                 inputs=[message, chat_window],
                 outputs=[message, chat_window],
                 queue=False,
             ).then(
                 _generate_response,
-                inputs=[chat_window, log_window],  # Pass individual components, including log_window
-                outputs=[chat_window, log_window],  # Update chatbot and log window
+                inputs=[chat_window, log_window],
+                outputs=[chat_window, log_window, cart_display],
             )
-            clear.click(_reset_chat, None, [message, chat_window, log_window])
-
-            gr.Markdown("------------------------------")
-            gr.Markdown("### Purchase items")
-
-            cart = gr.State([])
-
-            # Define items with checkbox and numeric quantity
-            items = ["Behr Premium Plus", "AwesomeSplash", "TheBrush", "PaintFinish"]
-
-            global item_components
-            item_components = []
-            for item in items:
-                with gr.Row(equal_height=True):
-                    item_checkbox = gr.Checkbox(label=f"{item}", value=False)
-                    quantity_box = gr.Number(
-                        label=f"{item} Quantity", value=1, precision=0, interactive=False, minimum=1
-                    )
-                    item_checkbox.change(
-                        fn=lambda selected, box=quantity_box: gr.update(interactive=selected, value=1 if selected else 0),
-                        inputs=item_checkbox,
-                        outputs=quantity_box,
-                    )
-                    item_components.append((item_checkbox, quantity_box))
-
-            purchase = gr.Button(value="Add to Cart")
-            cart_size = gr.Number(label="Cart Size", interactive=False)
-            purchased_textbox = gr.Textbox(label="Purchase Action", interactive=False)
-            # Gather inputs from all item checkbox and number box pairs
-            component_inputs = [cart] + [comp for pair in item_components for comp in pair]
-            purchase.click(fn=purchase_click, inputs=component_inputs, outputs=[cart, purchased_textbox, cart_size])
+            clear.click(_reset_chat, None, [message, chat_window, log_window, cart_display])
+
+            gr.Markdown("------------------------------")            
 
         demo.launch()
 
     run()
 
 
-def main(chat_model: str, embedding_model: str, rag_pdf: str, personality: str):
+def main(chat_model: str, embedding_model: str, rag_pdf: str, personality: str, device: str):
     # Load models and embedding based on parsed arguments
-    llm, embedding = setup_models(chat_model, embedding_model)
+    llm, embedding = setup_models(chat_model, embedding_model, device)
 
     Settings.embed_model = embedding
     Settings.llm = llm
 
     # Set up tools
-    multiply_tool, divide_tool, add_tool, subtract_tool, paint_cost_calculator = setup_tools()
-
-    # Step 4: Load documents and create the VectorStoreIndex
+    paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator = setup_tools()
+    
     text_example_en_path = Path(rag_pdf)
     index = load_documents(text_example_en_path)
     log.info(f"loading in {index}")
+ 
     vector_tool = QueryEngineTool(
         index.as_query_engine(streaming=True),
         metadata=ToolMetadata(
             name="vector_search",
-            description="Useful for searching for facts and product recommendations about paint",
+            description="""            
+            Use this tool for ANY question about paint products, recommendations, prices, or technical specifications.
+            
+            WHEN TO USE:
+            - User asks about paint types, brands, or products
+            - User needs price information before adding to cart
+            - User needs recommendations based on their project
+            - User has technical questions about painting
+            
+            EXAMPLES:
+            - "What paint is best for kitchen cabinets?"
+            - "How much does AwesomePainter Interior Acrylic Latex cost?"
+            - "What supplies do I need for painting my living room?"
+            """,
         ),
     )
-
-    # Step 5: Initialize the agent with the loaded tools
+    
     nest_asyncio.apply()
-
-    # Load agent config
-    personality_file_path = Path(personality)
-
-    with open(personality_file_path, "rb") as f:
-        chatbot_config = yaml.safe_load(f)
-
-    react_system_prompt = PromptTemplate(chatbot_config['system_configuration'])
-    log.info(f"react_system_prompt {react_system_prompt}")
+ 
     # Define agent and available tools
     agent = ReActAgent.from_tools(
-        [multiply_tool, divide_tool, add_tool, subtract_tool, paint_cost_calculator, vector_tool],
+        [paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, vector_tool, paint_gallons_calculator],
         llm=llm,
-        max_iterations=10,  # Set a max_iterations value
+        max_iterations=5,  # Set a max_iterations value
         handle_reasoning_failure_fn=custom_handle_reasoning_failure,
-        verbose=True)
-    agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})
-
-    # Step 6: Run the app
+        verbose=True,
+        react_chat_formatter=ReActChatFormatter.from_defaults(
+            observation_role=MessageRole.TOOL   
+        ),
+    ) 
+    react_system_prompt = PromptTemplate(react_system_header_str)
+    agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})  
+    agent.reset()                     
     run_app(agent)
 
-
 if __name__ == "__main__":
     # Define the argument parser at the end
     parser = argparse.ArgumentParser()
-    parser.add_argument("--chat_model", type=str, default="model/llama3.2-3B-INT4", help="Path to the chat model directory")
+    parser.add_argument("--chat_model", type=str, default="model/qwen2-7B-INT4", help="Path to the chat model directory")
     parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
-    parser.add_argument("--rag_pdf", type=str, default="test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")
-    parser.add_argument("--personality", type=str, default="paint_concierge_personality.yaml", help="Path to the yaml file with chatbot personality")
+    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")
+    parser.add_argument("--personality", type=str, default="config/paint_concierge_personality.yaml", help="Path to the yaml file with chatbot personality")
+    parser.add_argument("--device", type=str, default="GPU", help="Device for inferencing (CPU,GPU,GPU.1,NPU)")
 
     args = parser.parse_args()
 
-    main(args.chat_model, args.embedding_model, args.rag_pdf, args.personality)
+    main(args.chat_model, args.embedding_model, args.rag_pdf, args.personality, args.device)
diff --git a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
index ba34fab4..8dfcf98c 100644
--- a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
+++ b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
@@ -9,16 +9,8 @@
 from transformers import AutoTokenizer
 
 MODEL_MAPPING = {
-    "llama3-8B": "meta-llama/Meta-Llama-3-8B-Instruct",
-    "llama3.1-8B": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "llama3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
-    "llama3.2-11B": "meta-llama/Llama-3.2-11B-Vision-Instruct",
-    "llama2-7B": "meta-llama/Llama-2-7b-chat-hf",
-    "llama2-13B": "meta-llama/Llama-2-13b-chat-hf",
     "qwen2-7B": "Qwen/Qwen2-7B-Instruct",
-    "bge-small": "BAAI/bge-small-en-v1.5",
     "bge-large": "BAAI/bge-large-en-v1.5",
-    "bge-m3": "BAAI/bge-m3",
 }
 
 def optimize_model_for_npu(model: OVModelForFeatureExtraction):
@@ -59,23 +51,25 @@ def callback(matcher: passes.Matcher) -> bool:
     model.reshape(1, 512)
 
 
-def convert_chat_model(model_type: str, precision: str, model_dir: Path, access_token: str) -> Path:
+def convert_chat_model(model_type: str, precision: str, model_dir: Path) -> Path:
     """
     Convert chat model
 
     Params:
         model_type: selected mode type and size
         precision: model precision
-        model_dir: dir to export model
-        access_token: access token from Hugging Face to download gated models
+        model_dir: dir to export model        
     Returns:
        Path to exported model
     """
     output_dir = model_dir / model_type
     model_name = MODEL_MAPPING[model_type]
 
+    # if access_token is not None:
+    #     os.environ["HUGGING_FACE_HUB_TOKEN"] = access_token
+
     # load model and convert it to OpenVINO
-    model = OVModelForCausalLM.from_pretrained(model_name, export=True, compile=False, load_in_8bit=False, token=access_token)
+    model = OVModelForCausalLM.from_pretrained(model_name, export=True, compile=False, load_in_8bit=False)
     # change precision to FP16
     model.half()
 
@@ -131,14 +125,14 @@ def convert_embedding_model(model_type: str, model_dir: Path) -> Path:
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--chat_model_type", type=str, choices=["llama2-7B", "llama2-13B", "llama3-8B", "llama3.1-8B", "llama3.2-3B", "llama3.2-11B", "qwen2-7B"],
+    parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B"],
                         default="llama3.1-8B", help="Chat model to be converted")
-    parser.add_argument("--embedding_model_type", type=str, choices=["bge-small", "bge-large", "bge-m3"],
+    parser.add_argument("--embedding_model_type", type=str, choices=["bge-large"],
                         default="bge-large", help="Embedding model to be converted")
     parser.add_argument("--precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Model precision")
-    parser.add_argument("--hf_token", type=str, help="HuggingFace access token to get Llama3")
+    # parser.add_argument("--hf_token", type=str, help="HuggingFace access token")
     parser.add_argument("--model_dir", type=str, default="model", help="Directory to place the model in")
 
     args = parser.parse_args()
     convert_embedding_model(args.embedding_model_type, Path(args.model_dir))
-    convert_chat_model(args.chat_model_type, args.precision, Path(args.model_dir), args.hf_token)
+    convert_chat_model(args.chat_model_type, args.precision, Path(args.model_dir))
diff --git a/ai_ref_kits/agentic_llm_rag/css/gradio.css b/ai_ref_kits/agentic_llm_rag/css/gradio.css
new file mode 100644
index 00000000..41e5c23e
--- /dev/null
+++ b/ai_ref_kits/agentic_llm_rag/css/gradio.css
@@ -0,0 +1,153 @@
+#agent-steps {
+    border: 2px solid #ddd;
+    border-radius: 8px;
+    padding: 12px;
+    background-color: #f9f9f9;
+    margin-top: 0; /* Remove top margin to align with other components */
+    height: 100%; /* Ensure the same height as other components */
+    box-sizing: border-box; /* Include padding in height calculation */
+}
+
+#shopping-cart {
+    border: 2px solid #4CAF50;
+    border-radius: 8px;
+    padding: 12px;
+    background-color: #f0f8f0;
+    margin-top: 0; /* Remove top margin to align with other components */
+    height: 100%; /* Ensure the same height as other components */
+    box-sizing: border-box; /* Include padding in height calculation */
+}
+
+/* Fix row alignment issues */
+.gradio-row {
+    align-items: flex-start !important; /* Align all items to the top of the row */
+}
+
+/* Make all components in the main row the same height */
+.gradio-row > .gradio-column {
+    height: 100%;
+    display: flex;
+    flex-direction: column;
+}
+
+/* Ensure the chatbot and other components align properly */
+.gradio-chatbot {
+    margin-top: 0 !important;
+}
+
+/* Improve shopping cart table styling */
+#shopping-cart table {
+    width: 100%;
+    border-collapse: collapse;
+    table-layout: auto; /* Let the browser calculate column widths based on content */
+}
+
+#shopping-cart th,
+#shopping-cart td {
+    padding: 8px;
+    text-align: left;
+    min-width: 50px; /* Ensure minimum width for all columns */
+}
+
+#shopping-cart th:nth-child(2), /* Qty column */
+#shopping-cart td:nth-child(2) {
+    text-align: center;
+    width: 50px;
+}
+
+#shopping-cart th:nth-child(3), /* Price column */
+#shopping-cart td:nth-child(3),
+#shopping-cart th:nth-child(4), /* Total column */
+#shopping-cart td:nth-child(4) {
+    text-align: right;
+    min-width: 80px;
+}
+
+#shopping-cart th:first-child, /* Product column */
+#shopping-cart td:first-child {
+    width: auto; /* Let product name take remaining space */
+}
+
+.sample-prompt-btn {
+    min-height: 35px !important;
+    font-size: 0.85em !important;
+    margin: 2px !important;
+    padding: 4px 8px !important;
+}
+
+.intel-header {
+    margin: 0px;
+    padding: 0px;
+    background: #0054ae;
+    height: 60px;
+    width: 100%;
+    display: flex;
+    align-items: center;
+    position: relative;
+    box-sizing: border-box;
+    margin-bottom: 15px;
+}
+
+.intel-logo {
+    margin-left: 20px;
+    margin-right: 20px;
+    width: 60px;
+    height: 60px;
+}
+  
+.intel-title {
+    height: 60px;
+    line-height: 60px;
+    color: white;
+    font-size: 24px;    
+}
+
+.gradio-container {
+    max-width: 100% !important;
+    padding: 0 !important; 
+}
+
+.intel-header-wrapper {
+    width: 100vw;
+    margin-left: calc(-50vw + 50%);
+    position: relative;
+}
+
+.gradio-container > .main {
+    padding: 0 !important;
+}
+
+/* Fix label alignment issues */
+.gradio-column > .label-wrap {
+    margin-top: 0;
+}
+
+/* Ensure consistent spacing for all components */
+.gradio-box, .gradio-chatbot, .gradio-markdown {
+    margin-top: 0 !important;
+}
+
+/* Responsive adjustments */
+@media (max-width: 768px) {
+    #agent-steps, #shopping-cart {
+        padding: 8px;
+    }
+    
+    .intel-logo {
+        margin-left: 10px;
+        margin-right: 10px;
+        width: 50px;
+        height: 50px;
+    }
+    
+    .intel-title {
+        font-size: 20px;
+    }
+    
+    /* Adjust table for mobile */
+    #shopping-cart th,
+    #shopping-cart td {
+        padding: 4px;
+        font-size: 0.9em;
+    }
+}
\ No newline at end of file
diff --git a/ai_ref_kits/agentic_llm_rag/data/Sample_Prompts.txt b/ai_ref_kits/agentic_llm_rag/data/Sample_Prompts.txt
index 8d17e96a..60fbf67d 100644
--- a/ai_ref_kits/agentic_llm_rag/data/Sample_Prompts.txt
+++ b/ai_ref_kits/agentic_llm_rag/data/Sample_Prompts.txt
@@ -1,18 +1,25 @@
-• Supply Calculations
-◦ How many gallons of paint do I need to paint a room that's 10 ft by 12 ft with 8 ft high ceilings?
-◦ I want to paint my living room of 10 feet by 12 feet. How much paint do I need for one coat?
-◦ I want to buy paint to paint my living room of 4 feet by 7 feet size. What is the quantity of paint required?
-• Decisions on the correct product
-◦ What type of paint do you recommend for covering stains on old drywall? For painting over brick? For use in an attic in a cold climate?
-◦ What are the correct types of paintbrush for this project?
-• Estimating costs
-◦ How much will the paint cost?
-◦ How much should I budget in total for this project, including paint and supplies?
-◦ What are some ways I can cut some costs on this project?
-• Comparing Products
-◦ What is the best rated paint for this project?
-◦ Which paint is the best value, in terms of customer satisfaction AND affordability?
-◦ What is the difference between latex paint and acrylic paint?
-• DIY Guides
-◦ What do I need to know before I start my paint project?
-◦ What are safety and best practices for making my project a success?
\ No newline at end of file
+*Sample prompts*
+
+These are sample prompts that would work depends on the context.
+
+RAG:
+- what paint is the best for kitchens?
+- what is the price of it?
+- what else do I need to complete my project?
+- I want to paint my room. The size is 500 sqft. which products do you recommend?
+- create a table with paint products sorted by price
+- I have a room 1000 sqft, I'm looking for supplies to paint the room
+
+Paint Calculations tools:
+- how many gallons of paint do I need to cover 600 sq ft ?
+- Calculate the paint cost for a 600 sqft room using Sherwin-Williams Emerald
+
+Shopping Cart tools:
+- add them to my cart
+- add brushes to my cart
+- add rollers to my shopping cart
+- add 3 gallons of Benjamin Moore Aura Revere Pewter to my cart
+- add 3 gallons of that paint to my cart
+- add gloves to my cart
+- clear shopping cart
+- I want to see my current cart
\ No newline at end of file
diff --git a/ai_ref_kits/agentic_llm_rag/system_prompt.py b/ai_ref_kits/agentic_llm_rag/system_prompt.py
new file mode 100644
index 00000000..5adc91ff
--- /dev/null
+++ b/ai_ref_kits/agentic_llm_rag/system_prompt.py
@@ -0,0 +1,58 @@
+## DO NOT modify this prompt. This prompt is the ReactAgent default.
+## You can modify the ## Additional Rules section if you want to add more rules
+## Note: Adding extra context can confuse the model to go into "roleplay" mode instead of using tools.
+
+react_system_header_str = """\
+
+You are designed to help with a variety of tasks, from answering questions \
+    to providing summaries to other types of analyses.
+
+## Tools
+You have access to a wide variety of tools. You are responsible for using
+the tools in any sequence you deem appropriate to complete the task at hand.
+This may require breaking the task into subtasks and using different tools
+to complete each subtask.
+
+You have access to the following tools:
+{tool_desc}
+
+## Output Format
+To answer the question, please use the following format.
+
+```
+Thought: I need to use a tool to help me answer the question.
+Action: tool name (one of {tool_names}) if using a tool.
+Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
+```
+
+Please ALWAYS start with a Thought.
+
+Please use a valid JSON format for the Action Input. Do NOT do this {{'input': 'hello world', 'num_beams': 5}}.
+
+If this format is used, the user will respond in the following format:
+
+```
+Observation: tool response
+```
+
+You should keep repeating the above format until you have enough information
+to answer the question without using any more tools. At that point, you MUST respond
+in the one of the following two formats:
+
+```
+Thought: I can answer without using any more tools.
+Answer: [your answer here]
+```
+
+```
+Thought: I cannot answer the question with the provided tools.
+Answer: Sorry, I cannot answer your query.
+```
+
+## Additional Rules
+- End every sentence with a polite question to engage with the customer, include emojis about painting.
+
+## Current Conversation
+Below is the current conversation consisting of interleaving human and assistant messages.
+
+"""
\ No newline at end of file
diff --git a/ai_ref_kits/agentic_llm_rag/tools.py b/ai_ref_kits/agentic_llm_rag/tools.py
index 61463ce0..0714a678 100644
--- a/ai_ref_kits/agentic_llm_rag/tools.py
+++ b/ai_ref_kits/agentic_llm_rag/tools.py
@@ -1,25 +1,103 @@
-class Math(object):
+import math
 
-    def add(self, a: float, b: float) -> float:
-        """Add two numbers and returns the sum"""
-        return a + b
-    def subtract(self, a: float, b: float) -> float:
-        """Add two numbers and returns the sum"""
-        return a - b
-    def multiply(self, a: float, b: float) -> float:
-        """Multiply two numbers and returns the product"""
-        return a * b
-    def divide(self, a: float, b: float) -> float:
-        """Divide two numbers and returns the quotient"""
-        return a / b
+class PaintCalculator:
 
-
-class PaintCostCalculator(object):
-
-    def calculate_paint_cost(self, area: int, price_per_gallon: int, add_paint_supply_costs: bool) -> float:
-        """Assuming 2 gallons are needed for 400 square feet"""
-        gallons_needed = (area / 400) * 2
-        total_cost = gallons_needed * price_per_gallon
-        if add_paint_supply_costs is True:
+    @staticmethod
+    def calculate_paint_cost(area: float, price_per_gallon: float, add_paint_supply_costs: bool = False) -> float:
+        """
+        Calculate the total cost of paint needed for a given area.
+        
+        Args:
+            area: Area to be painted in square feet
+            price_per_gallon: Price per gallon of paint
+            add_paint_supply_costs: Whether to add $50 for painting supplies
+            
+        Returns:
+            Total cost of paint and supplies if requested
+        """
+        gallons_needed = math.ceil((area / 400) * 2) # Assuming 2 gallons are needed for 400 square feet
+        total_cost = round(gallons_needed * price_per_gallon, 2)
+        if add_paint_supply_costs:
             total_cost += 50
         return total_cost
+
+    @staticmethod
+    def calculate_paint_gallons_needed(area: float) -> int:
+        """
+        Calculate the number of gallons of paint needed for a given area.
+        
+        Args:
+            area: Area to be painted in square feet
+            
+        Returns:
+            Number of gallons needed (rounded up to ensure coverage)
+        """
+        # Using the same formula as in PaintCostCalculator: 2 gallons needed for 400 square feet
+        gallons_needed = math.ceil((area / 400) * 2)
+        return gallons_needed
+
+class ShoppingCart:
+    # In-memory shopping cart
+    _cart_items = []
+    
+    @staticmethod
+    def add_to_cart(product_name: str, quantity: int, price_per_unit: float) -> dict:
+        """
+        Add an item to the shopping cart.
+        Add a product to a user's shopping cart.
+        This function ensures a seamless update to the shopping cart by specifying each required input clearly.
+        
+        Args:
+            product_name: Name of the paint product
+            quantity: Number of units/gallons
+            price_per_unit: Price per unit/gallon
+            
+        Returns:
+            Dict with confirmation message and current cart items
+        """
+        item = {
+            "product_name": product_name,
+            "quantity": quantity,
+            "price_per_unit": price_per_unit,
+            "total_price": round(quantity * price_per_unit, 2)
+        }
+        
+        # Check if item already exists
+        for existing_item in ShoppingCart._cart_items:
+            if existing_item["product_name"] == product_name:
+                # Update quantity
+                existing_item["quantity"] += quantity
+                existing_item["total_price"] = round(existing_item["quantity"] * existing_item["price_per_unit"], 2)
+                return {
+                    "message": f"Updated {product_name} quantity to {existing_item['quantity']} in your cart",
+                    "cart": ShoppingCart._cart_items
+                }
+        
+        # Add new item
+        ShoppingCart._cart_items.append(item)
+        
+        return {
+            "message": f"Added {quantity} {product_name} to your cart",
+            "cart": ShoppingCart._cart_items
+        }
+    
+    @staticmethod
+    def get_cart_items() -> list:
+        """
+        Get all items currently in the shopping cart.
+        
+        Returns:
+            List of items in the cart with their details
+        """
+        return ShoppingCart._cart_items
+    
+    @staticmethod
+    def clear_cart() -> dict:
+        """
+        Clear all items from the shopping cart.
+        
+        Returns:
+            Confirmation message
+        """
+        ShoppingCart._cart_items = []
+        return {"message": "Shopping cart has been cleared"}
\ No newline at end of file

From 73c3b9ea0353ed93aa0fde4c23111f1dc2b2ef54 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 27 Feb 2025 14:56:08 -0700
Subject: [PATCH 04/25] Removing config files

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 .../config/paint_concierge_personality.yaml              | 8 --------
 .../config/store_employee_personality.yaml               | 9 ---------
 2 files changed, 17 deletions(-)
 delete mode 100644 ai_ref_kits/agentic_llm_rag/config/paint_concierge_personality.yaml
 delete mode 100644 ai_ref_kits/agentic_llm_rag/config/store_employee_personality.yaml

diff --git a/ai_ref_kits/agentic_llm_rag/config/paint_concierge_personality.yaml b/ai_ref_kits/agentic_llm_rag/config/paint_concierge_personality.yaml
deleted file mode 100644
index 33143f19..00000000
--- a/ai_ref_kits/agentic_llm_rag/config/paint_concierge_personality.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-system_configuration: >
-  You are a helpful, respectful, and knowledgeable Paint Concierge working at a retail store, where customer experience is absolutely crucial.
-  Your role is to assist customers with inquiries about paint suggestions, price details, supply calculations, and product recommendations based on the knowledge and documents provided to you.
-  You are strongly encouraged to use various tools that have been provided to you, including the vector_search tool, various math tools, and a paint calculation tool (calculate_paint_cost tool).
-  Answer questions with the information available from the RAG document (vector_search tool) or other shared knowledge, but if you're unsure or don't have specific details, politely inform the customer to check with the store staff or the official product website for further information.
-  When answering questions about the number of gallons of paint needed or the cost of paint or similar queries, always consult the RAG document first.
-  Do not assume or provide any speculative information or estimates outside the shared knowledge base. Always encourage customers to verify with store associates for unavailable or unknown details.
-  Do not ask for personal information or provide any responses that are inappropriate or unethical. Always remain professional, empathetic, and polite.
diff --git a/ai_ref_kits/agentic_llm_rag/config/store_employee_personality.yaml b/ai_ref_kits/agentic_llm_rag/config/store_employee_personality.yaml
deleted file mode 100644
index bacaf3ac..00000000
--- a/ai_ref_kits/agentic_llm_rag/config/store_employee_personality.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-system_configuration: >
-  You are a helpful, respectful, and knowledgeable Paint Employee Concierge working at a retail store, designed to help new employees with their onboarding experience and complex questions from customers.
-  Your role is to assist new employees with inquiries about paint suggestions, price details, supply calculations, and product recommendations based on the knowledge and documents provided to you.
-  You may be asked to test employees' knowledge on paint, or guide them towards accurate answers.
-  You are strongly encouraged to use various tools that have been provided to you, including the vector_search tool, various math tools, and a paint calculation tool (calculate_paint_cost tool).
-  Answer questions with the information available from the RAG document (vector_search tool) or other shared knowledge, but if you're unsure or don't have specific details, politely inform the customer to check with senior store staff or the official product website for further information.
-  When answering questions about the number of gallons of paint needed or the cost of paint or similar queries, always consult the RAG document first.
-  Do not assume or provide any speculative information or estimates outside the shared knowledge base. Always encourage junior employees to verify with store associates for unavailable or unknown details.
-  Do not ask for personal information or provide any responses that are inappropriate or unethical. Always remain professional, empathetic, and polite.

From a680b2dee35206d5fd7816bd9977911badaa4a8b Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 27 Feb 2025 15:16:24 -0700
Subject: [PATCH 05/25] Adding missing requirements.txt file

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/requirements.txt | 26 ++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 ai_ref_kits/agentic_llm_rag/requirements.txt

diff --git a/ai_ref_kits/agentic_llm_rag/requirements.txt b/ai_ref_kits/agentic_llm_rag/requirements.txt
new file mode 100644
index 00000000..a7e2d7a4
--- /dev/null
+++ b/ai_ref_kits/agentic_llm_rag/requirements.txt
@@ -0,0 +1,26 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+
+openvino==2024.6.0
+optimum-intel==1.21.0
+optimum==1.23.3
+nncf==2.14.1
+
+llama-index==0.12.9
+llama-index-llms-openvino==0.4.0
+llama-index-embeddings-openvino==0.5.1
+llama-index-postprocessor-openvino-rerank==0.4.1
+llama-index-vector-stores-faiss==0.3.0
+faiss-cpu==1.9.0
+
+# onnx>1.16.1 doesn't work on windows
+onnx==1.16.1; platform_system == "Windows"
+onnx==1.17.0; platform_system != "Windows"
+onnxruntime==1.17.3
+torch==2.5.1
+
+transformers==4.46.3
+librosa==0.10.2
+pyyaml==6.0.1
+PyMuPDF==1.24.10
+
+gradio==5.12.0

From 5bcf062fc6d9b63512306a650fc6a338b1220763 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 27 Feb 2025 16:04:55 -0700
Subject: [PATCH 06/25] Changing app.py to main.py for comply with github
 actions

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/README.md |   8 +-
 ai_ref_kits/agentic_llm_rag/app.py    | 430 --------------------------
 2 files changed, 4 insertions(+), 434 deletions(-)
 delete mode 100644 ai_ref_kits/agentic_llm_rag/app.py

diff --git a/ai_ref_kits/agentic_llm_rag/README.md b/ai_ref_kits/agentic_llm_rag/README.md
index a35e36fe..a87de3f2 100644
--- a/ai_ref_kits/agentic_llm_rag/README.md
+++ b/ai_ref_kits/agentic_llm_rag/README.md
@@ -132,7 +132,7 @@ To convert the chat and embedding models, run:
 python convert_and_optimize_llm.py --chat_model_type qwen2-7B --embedding_model_type bge-large --precision int4 --model_dir model
 ```
 
-After you run the conversion scripts, you can run `app.py` to launch the application.
+After you run the conversion scripts, you can run `main.py` to launch the application.
 
 ## Running the Application (Gradio Interface)
 
@@ -143,7 +143,7 @@ _NOTE: This application requires more than 16GB of memory because the models are
 After that, you should be able to run the application with default values:
 
 ```shell
-python app.py
+python main.py
 ```
 
 For more settings, you can change the argument values:
@@ -158,9 +158,9 @@ For more settings, you can change the argument values:
 
 - `--public`: Include this flag to make the Gradio interface publicly accessible over the network. Without this flag, the interface will only be available on your local machine.
 
-To run the application, execute the `app.py` script with the following command. Make sure to include all necessary model directory arguments.
+To run the application, execute the `main.py` script with the following command. Make sure to include all necessary model directory arguments.
 ```shell
-python app.py \ 
+python main.py \ 
   --chat_model model/qwen2-7B-INT4 \
   --embedding_model data/test_painting_llm_rag.pdf \
   --rag_pdf model/bge-small-FP32 \  
diff --git a/ai_ref_kits/agentic_llm_rag/app.py b/ai_ref_kits/agentic_llm_rag/app.py
deleted file mode 100644
index 51c72406..00000000
--- a/ai_ref_kits/agentic_llm_rag/app.py
+++ /dev/null
@@ -1,430 +0,0 @@
-
-import argparse
-import io
-import logging
-import sys
-import time
-import warnings
-from io import StringIO
-from pathlib import Path
-
-import gradio as gr
-import nest_asyncio
-import openvino.properties as props
-import openvino.properties.hint as hints
-import openvino.properties.streams as streams
-import requests
-import yaml
-from llama_index.core import PromptTemplate
-from llama_index.core import SimpleDirectoryReader
-from llama_index.core import VectorStoreIndex, Settings
-from llama_index.core.agent import ReActAgent
-from llama_index.core.tools import FunctionTool
-from llama_index.core.tools import QueryEngineTool, ToolMetadata
-from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
-from llama_index.llms.openvino import OpenVINOLLM
-from llama_index.core.agent import ReActChatFormatter
-from llama_index.core.llms import MessageRole
-# Agent tools
-from tools import PaintCalculator, ShoppingCart
-from system_prompt import react_system_header_str
-
-# Initialize logging
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-#Filter unnecessary warnings for demonstration
-warnings.filterwarnings("ignore")
-
-ov_config = {
-    hints.performance_mode(): hints.PerformanceMode.LATENCY,
-    streams.num(): "1",
-    props.cache_dir(): ""
-}
-
-def setup_models(llm_model_path, embedding_model_path, device):
-    # Load LLM model locally    
-    llm = OpenVINOLLM(
-        model_id_or_path=str(llm_model_path),
-        context_window=8192,
-        max_new_tokens=500,
-        model_kwargs={"ov_config": ov_config},
-        generate_kwargs={"do_sample": False, "temperature": 0.1, "top_p": 0.8},        
-        device_map=device,
-    )
-
-    # Load the embedding model locally
-    embedding = OpenVINOEmbedding(model_id_or_path=embedding_model_path, device=device)
-
-    return llm, embedding
-
-
-def setup_tools():
-
-    paint_cost_calculator = FunctionTool.from_defaults(
-        fn=PaintCalculator.calculate_paint_cost,
-        name="calculate_paint_cost",
-        description="ALWAYS use this tool when calculating paint cost for a specific area in square feet. Required inputs: area (float, square feet), price_per_gallon (float), add_paint_supply_costs (bool)"
-    )
-
-    paint_gallons_calculator = FunctionTool.from_defaults(
-    fn=PaintCalculator.calculate_paint_gallons_needed,
-    name="calculate_paint_gallons",
-    description="Calculate how many gallons of paint are needed to cover a specific area. Required input: area (float, square feet). Returns the number of gallons needed, rounded up to ensure full coverage."
-)
-
-    add_to_cart_tool = FunctionTool.from_defaults(
-        fn=ShoppingCart.add_to_cart,
-        name="add_to_cart",
-        description="""
-        Use this tool WHENEVER a user wants to add any item to their cart or shopping cart.
-        
-        PARAMETERS:
-        - product_name (string): The exact name of the product (e.g., "Premium Latex Paint")
-        - quantity (int): The number of units to add, must be a positive integer (e.g., 2)
-        - price_per_unit (float): The price per unit in dollars (e.g., 24.99)
-        
-        RETURNS:
-        - A confirmation message and updated cart contents
-        
-        EXAMPLES:
-        To add 3 gallons of paint at $29.99 each: add_to_cart(product_name="Interior Eggshell Paint", quantity=3, price_per_unit=29.99)
-        """
-    )
-    
-    get_cart_items_tool = FunctionTool.from_defaults(
-        fn=ShoppingCart.get_cart_items,
-        name="view_cart",
-        description="""
-        Use this tool when a user wants to see what's in their shopping cart.
-        No parameters are required.
-        
-        RETURNS:
-        - A list of all items currently in the cart with their details
-        
-        EXAMPLES:
-        To view the current cart contents: view_cart()
-        """
-    )
-    
-    clear_cart_tool = FunctionTool.from_defaults(
-        fn=ShoppingCart.clear_cart,
-        name="clear_cart",
-        description="""
-        Use this tool when a user asks to empty or clear their shopping cart.
-        No parameters are required.
-        
-        RETURNS:
-        - A confirmation message that the cart has been cleared
-        
-        EXAMPLES:
-        To empty the shopping cart: clear_cart()
-        """
-    )
-    return paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator
-
-
-def load_documents(text_example_en_path):
-    
-    if not text_example_en_path.exists():
-        text_example_en = "test_painting_llm_rag.pdf"
-        r = requests.get(text_example_en)
-        content = io.BytesIO(r.content)
-        with open(text_example_en_path, "wb") as f:
-            f.write(content.read())
-
-    reader = SimpleDirectoryReader(input_files=[text_example_en_path])
-    documents = reader.load_data()
-    index = VectorStoreIndex.from_documents(documents)
-
-    return index
-
-# Custom function to handle reasoning failures
-def custom_handle_reasoning_failure(callback_manager, exception):
-    return "Hmm...I didn't quite that. Could you please rephrase your question to be simpler?"
-
-
-def run_app(agent):
-    class Capturing(list):
-        def __enter__(self):
-            self._stdout = sys.stdout
-            sys.stdout = self._stringio = StringIO()
-            return self
-        def __exit__(self, *args):
-            self.extend(self._stringio.getvalue().splitlines())
-            del self._stringio
-            sys.stdout = self._stdout        
-
-    def _handle_user_message(user_message, history):
-        return "", [*history, (user_message, "")]
-
-    def update_cart_display():
-        cart_items = ShoppingCart.get_cart_items()
-        if not cart_items:
-            return "### 🛒 Your Shopping Cart is Empty"
-            
-        table = "### 🛒 Your Shopping Cart\n\n"
-        table += "<table>\n"
-        table += "  <thead>\n"
-        table += "    <tr>\n"
-        table += "      <th>Product</th>\n"
-        table += "      <th>Qty</th>\n"
-        table += "      <th>Price</th>\n"
-        table += "      <th>Total</th>\n"
-        table += "    </tr>\n"
-        table += "  </thead>\n"
-        table += "  <tbody>\n"
-            
-        for item in cart_items:
-            table += "    <tr>\n"
-            table += f"      <td>{item['product_name']}</td>\n"
-            table += f"      <td>{item['quantity']}</td>\n"
-            table += f"      <td>${item['price_per_unit']:.2f}</td>\n"
-            table += f"      <td>${item['total_price']:.2f}</td>\n"
-            table += "    </tr>\n"
-            
-        table += "  </tbody>\n"
-        table += "</table>\n"
-        
-        total = sum(item["total_price"] for item in cart_items)
-        table += f"\n**Total: ${total:.2f}**"
-        return table
-
-    def _generate_response(chat_history, log_history):
-        log.info(f"log_history {log_history}")           
-        
-        if not isinstance(log_history, list):
-            log_history = []
-
-        # Capture time for thought process
-        start_thought_time = time.time()
-
-        # Capture the thought process output
-        with Capturing() as output:
-            try:
-                response = agent.stream_chat(chat_history[-1][0])
-            except ValueError:
-                response = agent.stream_chat(chat_history[-1][0])
-        formatted_output = []
-        for line in output:
-            if "Thought:" in line:
-                formatted_output.append("\n🤔 **Thought:**\n" + line.split("Thought:", 1)[1])
-            elif "Action:" in line:
-                formatted_output.append("\n🔧 **Action:**\n" + line.split("Action:", 1)[1])
-            elif "Action Input:" in line:
-                formatted_output.append("\n📥 **Input:**\n" + line.split("Action Input:", 1)[1])
-            elif "Observation:" in line:
-                formatted_output.append("\n📋 **Result:**\n" + line.split("Observation:", 1)[1])
-            else:
-                formatted_output.append(line)
-        end_thought_time = time.time()
-        thought_process_time = end_thought_time - start_thought_time
-
-        # After response is complete, show the captured logs in the log area
-        log_entries = "\n".join(formatted_output)
-        log_history.append("### 🤔 Agent's Thought Process")
-        thought_process_log = f"Thought Process Time: {thought_process_time:.2f} seconds"
-        log_history.append(f"{log_entries}\n{thought_process_log}")
-        cart_content = update_cart_display() # update shopping cart
-        yield chat_history, "\n".join(log_history), cart_content  # Yield after the thought process time is captured
-
-        # Now capture response generation time
-        start_response_time = time.time()
-
-        # Gradually yield the response from the agent to the chat
-        # Quick fix for agent occasionally repeating the first word of its repsponse
-        last_token = "Dummy Token"
-        i = 0
-        for token in response.response_gen:
-            if i == 0:
-                last_token = token
-            if i == 1 and token.split()[0] == last_token.split()[0]:
-                chat_history[-1][1] += token.split()[1] + " "
-            else:
-                chat_history[-1][1] += token
-            yield chat_history, "\n".join(log_history), cart_content  # Ensure log_history is a string
-            if i <= 2: i += 1
-
-        end_response_time = time.time()
-        response_time = end_response_time - start_response_time
-
-        # Log tokens per second along with the device information
-        tokens = len(chat_history[-1][1].split(" ")) * 4 / 3  # Convert words to approx token count
-        response_log = f"Response Time: {response_time:.2f} seconds ({tokens / response_time:.2f} tokens/s)"
-
-        log.info(response_log)
-
-        # Append the response time to log history
-        log_history.append(response_log)
-        yield chat_history, "\n".join(log_history), cart_content  # Join logs into a string for display
-
-    def _reset_chat():
-        agent.reset()
-        ShoppingCart._cart_items = []
-        return "", [], "🤔 Agent's Thought Process", update_cart_display()
-
-    def run():
-        custom_css = ""
-        try:
-            with open("css/gradio.css", "r") as css_file:
-                custom_css = css_file.read()            
-        except Exception as e:            
-            log.warning(f"Could not load CSS file: {e}")
-
-        theme = gr.themes.Default(
-            primary_hue="blue",
-            font=[gr.themes.GoogleFont("Montserrat"), "ui-sans-serif", "sans-serif"],
-        )
-
-        with gr.Blocks(theme=theme, css=custom_css) as demo:
-
-            header = gr.HTML(
-                        "<div class='intel-header-wrapper'>"
-                        "  <div class='intel-header'>"
-                        "    <img src='https://www.intel.com/content/dam/logos/intel-header-logo.svg' class='intel-logo'></img>"
-                        "    <div class='intel-title'>Smart Retail Assistant 🤖: Agentic LLMs with RAG 💭</div>"
-                        "  </div>"
-                        "</div>"
-            )
-
-            with gr.Row():
-                chat_window = gr.Chatbot(
-                    label="Paint Purchase Helper",
-                    avatar_images=(None, "https://docs.openvino.ai/2024/_static/favicon.ico"),
-                    height=400,  # Adjust height as per your preference
-                    scale=2  # Set a higher scale value for Chatbot to make it wider
-                    #autoscroll=True,  # Enable auto-scrolling for better UX
-                )            
-                log_window = gr.Markdown(                                                                    
-                        show_label=True,                        
-                        value="### 🤔 Agent's Thought Process",
-                        height=400,                        
-                        elem_id="agent-steps"
-                )
-                cart_display = gr.Markdown(
-                    value=update_cart_display(),
-                    elem_id="shopping-cart",
-                    height=400
-                )
-
-            with gr.Row():
-                message = gr.Textbox(label="Ask the Paint Expert 🎨", scale=4, placeholder="Type your prompt/Question and press Enter")
-
-                with gr.Column(scale=1):
-                    submit_btn = gr.Button("Submit", variant="primary")
-                    clear = gr.ClearButton()
-                          
-            sample_questions = [
-                "what paint is the best for kitchens?",
-                "what is the price of it?",
-                "how many gallons of paint do I need to cover 600 sq ft ?",
-                "add them to my cart",
-                "what else do I need to complete my project?",
-                "add 2 brushes to my cart",
-                "create a table with paint products sorted by price",
-                "view my cart",
-                "clear shopping cart",
-                "I have a room 1000 sqft, I'm looking for supplies to paint the room"              
-            ]
-            gr.Examples(
-                examples=sample_questions,
-                inputs=message, 
-                label="Examples"
-            )                     
-            
-            # Ensure that individual components are passed
-            message.submit(
-                _handle_user_message,
-                inputs=[message, chat_window],
-                outputs=[message, chat_window],
-                queue=False                
-            ).then(
-                _generate_response,
-                inputs=[chat_window, log_window],
-                outputs=[chat_window, log_window, cart_display],
-            )
-
-            submit_btn.click(
-                _handle_user_message,
-                inputs=[message, chat_window],
-                outputs=[message, chat_window],
-                queue=False,
-            ).then(
-                _generate_response,
-                inputs=[chat_window, log_window],
-                outputs=[chat_window, log_window, cart_display],
-            )
-            clear.click(_reset_chat, None, [message, chat_window, log_window, cart_display])
-
-            gr.Markdown("------------------------------")            
-
-        demo.launch()
-
-    run()
-
-
-def main(chat_model: str, embedding_model: str, rag_pdf: str, personality: str, device: str):
-    # Load models and embedding based on parsed arguments
-    llm, embedding = setup_models(chat_model, embedding_model, device)
-
-    Settings.embed_model = embedding
-    Settings.llm = llm
-
-    # Set up tools
-    paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator = setup_tools()
-    
-    text_example_en_path = Path(rag_pdf)
-    index = load_documents(text_example_en_path)
-    log.info(f"loading in {index}")
- 
-    vector_tool = QueryEngineTool(
-        index.as_query_engine(streaming=True),
-        metadata=ToolMetadata(
-            name="vector_search",
-            description="""            
-            Use this tool for ANY question about paint products, recommendations, prices, or technical specifications.
-            
-            WHEN TO USE:
-            - User asks about paint types, brands, or products
-            - User needs price information before adding to cart
-            - User needs recommendations based on their project
-            - User has technical questions about painting
-            
-            EXAMPLES:
-            - "What paint is best for kitchen cabinets?"
-            - "How much does AwesomePainter Interior Acrylic Latex cost?"
-            - "What supplies do I need for painting my living room?"
-            """,
-        ),
-    )
-    
-    nest_asyncio.apply()
- 
-    # Define agent and available tools
-    agent = ReActAgent.from_tools(
-        [paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, vector_tool, paint_gallons_calculator],
-        llm=llm,
-        max_iterations=5,  # Set a max_iterations value
-        handle_reasoning_failure_fn=custom_handle_reasoning_failure,
-        verbose=True,
-        react_chat_formatter=ReActChatFormatter.from_defaults(
-            observation_role=MessageRole.TOOL   
-        ),
-    ) 
-    react_system_prompt = PromptTemplate(react_system_header_str)
-    agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})  
-    agent.reset()                     
-    run_app(agent)
-
-if __name__ == "__main__":
-    # Define the argument parser at the end
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--chat_model", type=str, default="model/qwen2-7B-INT4", help="Path to the chat model directory")
-    parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
-    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")
-    parser.add_argument("--personality", type=str, default="config/paint_concierge_personality.yaml", help="Path to the yaml file with chatbot personality")
-    parser.add_argument("--device", type=str, default="GPU", help="Device for inferencing (CPU,GPU,GPU.1,NPU)")
-
-    args = parser.parse_args()
-
-    main(args.chat_model, args.embedding_model, args.rag_pdf, args.personality, args.device)

From 599645a8eb6d273f6f51946a827357237731e1b8 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 27 Feb 2025 16:23:19 -0700
Subject: [PATCH 07/25] Changing app.py to main.py for comply with github
 actions

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/main.py | 430 ++++++++++++++++++++++++++++
 1 file changed, 430 insertions(+)
 create mode 100644 ai_ref_kits/agentic_llm_rag/main.py

diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
new file mode 100644
index 00000000..ec4358e5
--- /dev/null
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -0,0 +1,430 @@
+
+import argparse
+import io
+import logging
+import sys
+import time
+import warnings
+from io import StringIO
+from pathlib import Path
+
+import gradio as gr
+import nest_asyncio
+import openvino.properties as props
+import openvino.properties.hint as hints
+import openvino.properties.streams as streams
+import requests
+import yaml
+from llama_index.core import PromptTemplate
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core import VectorStoreIndex, Settings
+from llama_index.core.agent import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.core.tools import QueryEngineTool, ToolMetadata
+from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
+from llama_index.llms.openvino import OpenVINOLLM
+from llama_index.core.agent import ReActChatFormatter
+from llama_index.core.llms import MessageRole
+# Agent tools
+from tools import PaintCalculator, ShoppingCart
+from system_prompt import react_system_header_str
+
+# Initialize logging
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+#Filter unnecessary warnings for demonstration
+warnings.filterwarnings("ignore")
+
+ov_config = {
+    hints.performance_mode(): hints.PerformanceMode.LATENCY,
+    streams.num(): "1",
+    props.cache_dir(): ""
+}
+
+def setup_models(llm_model_path, embedding_model_path, device):
+    # Load LLM model locally    
+    llm = OpenVINOLLM(
+        model_id_or_path=str(llm_model_path),
+        context_window=8192,
+        max_new_tokens=500,
+        model_kwargs={"ov_config": ov_config},
+        generate_kwargs={"do_sample": False, "temperature": 0.1, "top_p": 0.8},        
+        device_map=device,
+    )
+
+    # Load the embedding model locally
+    embedding = OpenVINOEmbedding(model_id_or_path=embedding_model_path, device=device)
+
+    return llm, embedding
+
+
+def setup_tools():
+
+    paint_cost_calculator = FunctionTool.from_defaults(
+        fn=PaintCalculator.calculate_paint_cost,
+        name="calculate_paint_cost",
+        description="ALWAYS use this tool when calculating paint cost for a specific area in square feet. Required inputs: area (float, square feet), price_per_gallon (float), add_paint_supply_costs (bool)"
+    )
+
+    paint_gallons_calculator = FunctionTool.from_defaults(
+    fn=PaintCalculator.calculate_paint_gallons_needed,
+    name="calculate_paint_gallons",
+    description="Calculate how many gallons of paint are needed to cover a specific area. Required input: area (float, square feet). Returns the number of gallons needed, rounded up to ensure full coverage."
+)
+
+    add_to_cart_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.add_to_cart,
+        name="add_to_cart",
+        description="""
+        Use this tool WHENEVER a user wants to add any item to their cart or shopping cart.
+        
+        PARAMETERS:
+        - product_name (string): The exact name of the product (e.g., "Premium Latex Paint")
+        - quantity (int): The number of units to add, must be a positive integer (e.g., 2)
+        - price_per_unit (float): The price per unit in dollars (e.g., 24.99)
+        
+        RETURNS:
+        - A confirmation message and updated cart contents
+        
+        EXAMPLES:
+        To add 3 gallons of paint at $29.99 each: add_to_cart(product_name="Interior Eggshell Paint", quantity=3, price_per_unit=29.99)
+        """
+    )
+    
+    get_cart_items_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.get_cart_items,
+        name="view_cart",
+        description="""
+        Use this tool when a user wants to see what's in their shopping cart.
+        No parameters are required.
+        
+        RETURNS:
+        - A list of all items currently in the cart with their details
+        
+        EXAMPLES:
+        To view the current cart contents: view_cart()
+        """
+    )
+    
+    clear_cart_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.clear_cart,
+        name="clear_cart",
+        description="""
+        Use this tool when a user asks to empty or clear their shopping cart.
+        No parameters are required.
+        
+        RETURNS:
+        - A confirmation message that the cart has been cleared
+        
+        EXAMPLES:
+        To empty the shopping cart: clear_cart()
+        """
+    )
+    return paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator
+
+
+def load_documents(text_example_en_path):
+    
+    if not text_example_en_path.exists():
+        text_example_en = "test_painting_llm_rag.pdf"
+        r = requests.get(text_example_en)
+        content = io.BytesIO(r.content)
+        with open(text_example_en_path, "wb") as f:
+            f.write(content.read())
+
+    reader = SimpleDirectoryReader(input_files=[text_example_en_path])
+    documents = reader.load_data()
+    index = VectorStoreIndex.from_documents(documents)
+
+    return index
+
+# Custom function to handle reasoning failures
+def custom_handle_reasoning_failure(callback_manager, exception):
+    return "Hmm...I didn't quite that. Could you please rephrase your question to be simpler?"
+
+
+def run_app(agent):
+    class Capturing(list):
+        def __enter__(self):
+            self._stdout = sys.stdout
+            sys.stdout = self._stringio = StringIO()
+            return self
+        def __exit__(self, *args):
+            self.extend(self._stringio.getvalue().splitlines())
+            del self._stringio
+            sys.stdout = self._stdout        
+
+    def _handle_user_message(user_message, history):
+        return "", [*history, (user_message, "")]
+
+    def update_cart_display():
+        cart_items = ShoppingCart.get_cart_items()
+        if not cart_items:
+            return "### 🛒 Your Shopping Cart is Empty"
+            
+        table = "### 🛒 Your Shopping Cart\n\n"
+        table += "<table>\n"
+        table += "  <thead>\n"
+        table += "    <tr>\n"
+        table += "      <th>Product</th>\n"
+        table += "      <th>Qty</th>\n"
+        table += "      <th>Price</th>\n"
+        table += "      <th>Total</th>\n"
+        table += "    </tr>\n"
+        table += "  </thead>\n"
+        table += "  <tbody>\n"
+            
+        for item in cart_items:
+            table += "    <tr>\n"
+            table += f"      <td>{item['product_name']}</td>\n"
+            table += f"      <td>{item['quantity']}</td>\n"
+            table += f"      <td>${item['price_per_unit']:.2f}</td>\n"
+            table += f"      <td>${item['total_price']:.2f}</td>\n"
+            table += "    </tr>\n"
+            
+        table += "  </tbody>\n"
+        table += "</table>\n"
+        
+        total = sum(item["total_price"] for item in cart_items)
+        table += f"\n**Total: ${total:.2f}**"
+        return table
+
+    def _generate_response(chat_history, log_history):
+        log.info(f"log_history {log_history}")           
+        
+        if not isinstance(log_history, list):
+            log_history = []
+
+        # Capture time for thought process
+        start_thought_time = time.time()
+
+        # Capture the thought process output
+        with Capturing() as output:
+            try:
+                response = agent.stream_chat(chat_history[-1][0])
+            except ValueError:
+                response = agent.stream_chat(chat_history[-1][0])
+        formatted_output = []
+        for line in output:
+            if "Thought:" in line:
+                formatted_output.append("\n🤔 **Thought:**\n" + line.split("Thought:", 1)[1])
+            elif "Action:" in line:
+                formatted_output.append("\n🔧 **Action:**\n" + line.split("Action:", 1)[1])
+            elif "Action Input:" in line:
+                formatted_output.append("\n📥 **Input:**\n" + line.split("Action Input:", 1)[1])
+            elif "Observation:" in line:
+                formatted_output.append("\n📋 **Result:**\n" + line.split("Observation:", 1)[1])
+            else:
+                formatted_output.append(line)
+        end_thought_time = time.time()
+        thought_process_time = end_thought_time - start_thought_time
+
+        # After response is complete, show the captured logs in the log area
+        log_entries = "\n".join(formatted_output)
+        log_history.append("### 🤔 Agent's Thought Process")
+        thought_process_log = f"Thought Process Time: {thought_process_time:.2f} seconds"
+        log_history.append(f"{log_entries}\n{thought_process_log}")
+        cart_content = update_cart_display() # update shopping cart
+        yield chat_history, "\n".join(log_history), cart_content  # Yield after the thought process time is captured
+
+        # Now capture response generation time
+        start_response_time = time.time()
+
+        # Gradually yield the response from the agent to the chat
+        # Quick fix for agent occasionally repeating the first word of its repsponse
+        last_token = "Dummy Token"
+        i = 0
+        for token in response.response_gen:
+            if i == 0:
+                last_token = token
+            if i == 1 and token.split()[0] == last_token.split()[0]:
+                chat_history[-1][1] += token.split()[1] + " "
+            else:
+                chat_history[-1][1] += token
+            yield chat_history, "\n".join(log_history), cart_content  # Ensure log_history is a string
+            if i <= 2: i += 1
+
+        end_response_time = time.time()
+        response_time = end_response_time - start_response_time
+
+        # Log tokens per second along with the device information
+        tokens = len(chat_history[-1][1].split(" ")) * 4 / 3  # Convert words to approx token count
+        response_log = f"Response Time: {response_time:.2f} seconds ({tokens / response_time:.2f} tokens/s)"
+
+        log.info(response_log)
+
+        # Append the response time to log history
+        log_history.append(response_log)
+        yield chat_history, "\n".join(log_history), cart_content  # Join logs into a string for display
+
+    def _reset_chat():
+        agent.reset()
+        ShoppingCart._cart_items = []
+        return "", [], "🤔 Agent's Thought Process", update_cart_display()
+
+    def run():
+        custom_css = ""
+        try:
+            with open("css/gradio.css", "r") as css_file:
+                custom_css = css_file.read()            
+        except Exception as e:            
+            log.warning(f"Could not load CSS file: {e}")
+
+        theme = gr.themes.Default(
+            primary_hue="blue",
+            font=[gr.themes.GoogleFont("Montserrat"), "ui-sans-serif", "sans-serif"],
+        )
+
+        with gr.Blocks(theme=theme, css=custom_css) as demo:
+
+            header = gr.HTML(
+                        "<div class='intel-header-wrapper'>"
+                        "  <div class='intel-header'>"
+                        "    <img src='https://www.intel.com/content/dam/logos/intel-header-logo.svg' class='intel-logo'></img>"
+                        "    <div class='intel-title'>Smart Retail Assistant 🤖: Agentic LLMs with RAG 💭</div>"
+                        "  </div>"
+                        "</div>"
+            )
+
+            with gr.Row():
+                chat_window = gr.Chatbot(
+                    label="Paint Purchase Helper",
+                    avatar_images=(None, "https://docs.openvino.ai/2024/_static/favicon.ico"),
+                    height=400,  # Adjust height as per your preference
+                    scale=2  # Set a higher scale value for Chatbot to make it wider
+                    #autoscroll=True,  # Enable auto-scrolling for better UX
+                )            
+                log_window = gr.Markdown(                                                                    
+                        show_label=True,                        
+                        value="### 🤔 Agent's Thought Process",
+                        height=400,                        
+                        elem_id="agent-steps"
+                )
+                cart_display = gr.Markdown(
+                    value=update_cart_display(),
+                    elem_id="shopping-cart",
+                    height=400
+                )
+
+            with gr.Row():
+                message = gr.Textbox(label="Ask the Paint Expert 🎨", scale=4, placeholder="Type your prompt/Question and press Enter")
+
+                with gr.Column(scale=1):
+                    submit_btn = gr.Button("Submit", variant="primary")
+                    clear = gr.ClearButton()
+                          
+            sample_questions = [
+                "what paint is the best for kitchens?",
+                "what is the price of it?",
+                "how many gallons of paint do I need to cover 600 sq ft ?",
+                "add them to my cart",
+                "what else do I need to complete my project?",
+                "add 2 brushes to my cart",
+                "create a table with paint products sorted by price",
+                "Show me what's in my cart",
+                "clear shopping cart",
+                "I have a room 1000 sqft, I'm looking for supplies to paint the room"              
+            ]
+            gr.Examples(
+                examples=sample_questions,
+                inputs=message, 
+                label="Examples"
+            )                     
+            
+            # Ensure that individual components are passed
+            message.submit(
+                _handle_user_message,
+                inputs=[message, chat_window],
+                outputs=[message, chat_window],
+                queue=False                
+            ).then(
+                _generate_response,
+                inputs=[chat_window, log_window],
+                outputs=[chat_window, log_window, cart_display],
+            )
+
+            submit_btn.click(
+                _handle_user_message,
+                inputs=[message, chat_window],
+                outputs=[message, chat_window],
+                queue=False,
+            ).then(
+                _generate_response,
+                inputs=[chat_window, log_window],
+                outputs=[chat_window, log_window, cart_display],
+            )
+            clear.click(_reset_chat, None, [message, chat_window, log_window, cart_display])
+
+            gr.Markdown("------------------------------")            
+
+        demo.launch()
+
+    run()
+
+
+def main(chat_model: str, embedding_model: str, rag_pdf: str, personality: str, device: str):
+    # Load models and embedding based on parsed arguments
+    llm, embedding = setup_models(chat_model, embedding_model, device)
+
+    Settings.embed_model = embedding
+    Settings.llm = llm
+
+    # Set up tools
+    paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator = setup_tools()
+    
+    text_example_en_path = Path(rag_pdf)
+    index = load_documents(text_example_en_path)
+    log.info(f"loading in {index}")
+ 
+    vector_tool = QueryEngineTool(
+        index.as_query_engine(streaming=True),
+        metadata=ToolMetadata(
+            name="vector_search",
+            description="""            
+            Use this tool for ANY question about paint products, recommendations, prices, or technical specifications.
+            
+            WHEN TO USE:
+            - User asks about paint types, brands, or products
+            - User needs price information before adding to cart
+            - User needs recommendations based on their project
+            - User has technical questions about painting
+            
+            EXAMPLES:
+            - "What paint is best for kitchen cabinets?"
+            - "How much does AwesomePainter Interior Acrylic Latex cost?"
+            - "What supplies do I need for painting my living room?"
+            """,
+        ),
+    )
+    
+    nest_asyncio.apply()
+ 
+    # Define agent and available tools
+    agent = ReActAgent.from_tools(
+        [paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, vector_tool, paint_gallons_calculator],
+        llm=llm,
+        max_iterations=5,  # Set a max_iterations value
+        handle_reasoning_failure_fn=custom_handle_reasoning_failure,
+        verbose=True,
+        react_chat_formatter=ReActChatFormatter.from_defaults(
+            observation_role=MessageRole.TOOL   
+        ),
+    ) 
+    react_system_prompt = PromptTemplate(react_system_header_str)
+    agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})  
+    agent.reset()                     
+    run_app(agent)
+
+if __name__ == "__main__":
+    # Define the argument parser at the end
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--chat_model", type=str, default="model/qwen2-7B-INT4", help="Path to the chat model directory")
+    parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
+    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")
+    parser.add_argument("--personality", type=str, default="config/paint_concierge_personality.yaml", help="Path to the yaml file with chatbot personality")
+    parser.add_argument("--device", type=str, default="GPU", help="Device for inferencing (CPU,GPU,GPU.1,NPU)")
+
+    args = parser.parse_args()
+
+    main(args.chat_model, args.embedding_model, args.rag_pdf, args.personality, args.device)

From 4a21c523e9d39c612acc735242f996f9f73312b5 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Fri, 28 Feb 2025 00:26:22 -0700
Subject: [PATCH 08/25] remove personality arg

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/main.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index ec4358e5..b04aed61 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -363,7 +363,7 @@ def run():
     run()
 
 
-def main(chat_model: str, embedding_model: str, rag_pdf: str, personality: str, device: str):
+def main(chat_model: str, embedding_model: str, rag_pdf: str, device: str):
     # Load models and embedding based on parsed arguments
     llm, embedding = setup_models(chat_model, embedding_model, device)
 
@@ -421,10 +421,9 @@ def main(chat_model: str, embedding_model: str, rag_pdf: str, personality: str,
     parser = argparse.ArgumentParser()
     parser.add_argument("--chat_model", type=str, default="model/qwen2-7B-INT4", help="Path to the chat model directory")
     parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
-    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")
-    parser.add_argument("--personality", type=str, default="config/paint_concierge_personality.yaml", help="Path to the yaml file with chatbot personality")
+    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")    
     parser.add_argument("--device", type=str, default="GPU", help="Device for inferencing (CPU,GPU,GPU.1,NPU)")
 
     args = parser.parse_args()
 
-    main(args.chat_model, args.embedding_model, args.rag_pdf, args.personality, args.device)
+    main(args.chat_model, args.embedding_model, args.rag_pdf, args.device)

From 1cd7643b3fa95e338ed96264ea94aea7e6d09c34 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Fri, 28 Feb 2025 00:48:44 -0700
Subject: [PATCH 09/25] change default model for convert and optimize

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
index 8dfcf98c..6b8d15ba 100644
--- a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
+++ b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
@@ -126,7 +126,7 @@ def convert_embedding_model(model_type: str, model_dir: Path) -> Path:
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B"],
-                        default="llama3.1-8B", help="Chat model to be converted")
+                        default="qwen2-7B", help="Chat model to be converted")
     parser.add_argument("--embedding_model_type", type=str, choices=["bge-large"],
                         default="bge-large", help="Embedding model to be converted")
     parser.add_argument("--precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Model precision")

From cd72f888836cc795a611ce789723b5ee6ec8f538 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Wed, 5 Mar 2025 16:59:18 -0700
Subject: [PATCH 10/25] Improving documentation and public arg in main.py

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/README.md |  8 ++++----
 ai_ref_kits/agentic_llm_rag/main.py   | 11 ++++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/ai_ref_kits/agentic_llm_rag/README.md b/ai_ref_kits/agentic_llm_rag/README.md
index a87de3f2..2c860f24 100644
--- a/ai_ref_kits/agentic_llm_rag/README.md
+++ b/ai_ref_kits/agentic_llm_rag/README.md
@@ -9,14 +9,14 @@
 [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](https://github.com/openvinotoolkit/openvino_build_deploy/blob/master/LICENSE.txt)
 
 <p align="center">
-  <img src="https://github.com/user-attachments/assets/dd626685-7aa6-4e67-a929-5e9be2982800" width="500">
+  <img src="https://github.com/user-attachments/assets/3dedf848-cc4a-4b1c-b83e-dad29e3e1657" width="500">
 </p>
 
 The AI Insight Agent with RAG uses Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) to interpret user prompts, engage in meaningful dialogue, perform calculations, use RAG techniques to improve its knowledge and interact with the user to add items to a virtual shopping cart. This solution uses the OpenVINO™ toolkit to power the AI models at the edge. Designed for both consumers and employees, it functions as a smart, personalized retail assistant, offering an interactive and user-friendly experience similar to an advanced digital kiosk.
 
 This kit uses the following technology stack:
 - [OpenVINO Toolkit](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) ([docs](https://docs.openvino.ai/))
-- [Qwen2-7B-Instruct](https://huggingface.co/Qwen)
+- [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)
 - [bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5)
 - [Gradio interface](https://www.gradio.app/docs/gradio/chatinterface)
 
@@ -162,8 +162,8 @@ To run the application, execute the `main.py` script with the following command.
 ```shell
 python main.py \ 
   --chat_model model/qwen2-7B-INT4 \
-  --embedding_model data/test_painting_llm_rag.pdf \
-  --rag_pdf model/bge-small-FP32 \  
+  --embedding_model model/bge-small-FP32 \
+  --rag_pdf data/test_painting_llm_rag.pdf \  
   --device GPU.1 \
   --public
 ```
diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index b04aed61..d3a7b9f2 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -144,7 +144,7 @@ def custom_handle_reasoning_failure(callback_manager, exception):
     return "Hmm...I didn't quite that. Could you please rephrase your question to be simpler?"
 
 
-def run_app(agent):
+def run_app(agent, public_interface):
     class Capturing(list):
         def __enter__(self):
             self._stdout = sys.stdout
@@ -358,12 +358,12 @@ def run():
 
             gr.Markdown("------------------------------")            
 
-        demo.launch()
+        demo.launch(share=public_interface)
 
     run()
 
 
-def main(chat_model: str, embedding_model: str, rag_pdf: str, device: str):
+def main(chat_model: str, embedding_model: str, rag_pdf: str, device: str, public_interface: bool = False):
     # Load models and embedding based on parsed arguments
     llm, embedding = setup_models(chat_model, embedding_model, device)
 
@@ -414,7 +414,7 @@ def main(chat_model: str, embedding_model: str, rag_pdf: str, device: str):
     react_system_prompt = PromptTemplate(react_system_header_str)
     agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})  
     agent.reset()                     
-    run_app(agent)
+    run_app(agent, public_interface)
 
 if __name__ == "__main__":
     # Define the argument parser at the end
@@ -423,7 +423,8 @@ def main(chat_model: str, embedding_model: str, rag_pdf: str, device: str):
     parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
     parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")    
     parser.add_argument("--device", type=str, default="GPU", help="Device for inferencing (CPU,GPU,GPU.1,NPU)")
+    parser.add_argument("--public", default=False, action="store_true", help="Whether interface should be available publicly")
 
     args = parser.parse_args()
 
-    main(args.chat_model, args.embedding_model, args.rag_pdf, args.device)
+    main(args.chat_model, args.embedding_model, args.rag_pdf, args.device, args.public)

From 47ca6380ed3cea15e86fb6933f1af78681fa5932 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Wed, 5 Mar 2025 19:42:18 -0700
Subject: [PATCH 11/25] Address PR feedback

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/README.md         |   2 +
 .../convert_and_optimize_llm.py               |  12 +-
 ai_ref_kits/agentic_llm_rag/css/gradio.css    |  29 +++-
 ai_ref_kits/agentic_llm_rag/main.py           | 151 ++++++++++++++++--
 ai_ref_kits/agentic_llm_rag/requirements.txt  |   7 +-
 5 files changed, 176 insertions(+), 25 deletions(-)

diff --git a/ai_ref_kits/agentic_llm_rag/README.md b/ai_ref_kits/agentic_llm_rag/README.md
index 2c860f24..25ef8d1c 100644
--- a/ai_ref_kits/agentic_llm_rag/README.md
+++ b/ai_ref_kits/agentic_llm_rag/README.md
@@ -132,6 +132,8 @@ To convert the chat and embedding models, run:
 python convert_and_optimize_llm.py --chat_model_type qwen2-7B --embedding_model_type bge-large --precision int4 --model_dir model
 ```
 
+If using gated models from HuggingFace pass the `--hf_token` argument  with your HuggingFace token. Remember to request access to gated models if needed.
+
 After you run the conversion scripts, you can run `main.py` to launch the application.
 
 ## Running the Application (Gradio Interface)
diff --git a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
index 6b8d15ba..f12bbb2a 100644
--- a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
+++ b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
@@ -1,6 +1,6 @@
 import argparse
 from pathlib import Path
-
+import os
 import numpy as np
 import openvino as ov
 from openvino.runtime import opset10 as ops
@@ -51,7 +51,7 @@ def callback(matcher: passes.Matcher) -> bool:
     model.reshape(1, 512)
 
 
-def convert_chat_model(model_type: str, precision: str, model_dir: Path) -> Path:
+def convert_chat_model(model_type: str, precision: str, model_dir: Path, access_token: str) -> Path:
     """
     Convert chat model
 
@@ -65,11 +65,11 @@ def convert_chat_model(model_type: str, precision: str, model_dir: Path) -> Path
     output_dir = model_dir / model_type
     model_name = MODEL_MAPPING[model_type]
 
-    # if access_token is not None:
-    #     os.environ["HUGGING_FACE_HUB_TOKEN"] = access_token
+    if access_token is not None:
+        os.environ["HUGGING_FACE_HUB_TOKEN"] = access_token
 
     # load model and convert it to OpenVINO
-    model = OVModelForCausalLM.from_pretrained(model_name, export=True, compile=False, load_in_8bit=False)
+    model = OVModelForCausalLM.from_pretrained(model_name, export=True, compile=False, load_in_8bit=False, token=access_token)
     # change precision to FP16
     model.half()
 
@@ -130,7 +130,7 @@ def convert_embedding_model(model_type: str, model_dir: Path) -> Path:
     parser.add_argument("--embedding_model_type", type=str, choices=["bge-large"],
                         default="bge-large", help="Embedding model to be converted")
     parser.add_argument("--precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Model precision")
-    # parser.add_argument("--hf_token", type=str, help="HuggingFace access token")
+    parser.add_argument("--hf_token", type=str, help="HuggingFace access token")
     parser.add_argument("--model_dir", type=str, default="model", help="Directory to place the model in")
 
     args = parser.parse_args()
diff --git a/ai_ref_kits/agentic_llm_rag/css/gradio.css b/ai_ref_kits/agentic_llm_rag/css/gradio.css
index 41e5c23e..de0cca44 100644
--- a/ai_ref_kits/agentic_llm_rag/css/gradio.css
+++ b/ai_ref_kits/agentic_llm_rag/css/gradio.css
@@ -1,3 +1,9 @@
+body {
+    padding: 15px;
+    box-sizing: border-box;
+    overflow-x: hidden;
+}
+
 #agent-steps {
     border: 2px solid #ddd;
     border-radius: 8px;
@@ -77,7 +83,7 @@
 
 .intel-header {
     margin: 0px;
-    padding: 0px;
+    padding: 0 15px;
     background: #0054ae;
     height: 60px;
     width: 100%;
@@ -105,16 +111,31 @@
 .gradio-container {
     max-width: 100% !important;
     padding: 0 !important; 
+    box-sizing: border-box;
+    overflow-x: hidden;
+}
+
+/* Override Gradio's generated padding classes */
+.padding.svelte-phx28p,
+[class*="padding svelte-"],
+.gradio-container [class*="padding"] {
+    padding: 0 !important;
 }
 
 .intel-header-wrapper {
-    width: 100vw;
-    margin-left: calc(-50vw + 50%);
+    width: 100%;
+    max-width: 100%;
+    margin-left: 0;
     position: relative;
+    padding: 0;
+    box-sizing: border-box;
 }
 
 .gradio-container > .main {
-    padding: 0 !important;
+    padding: 20px !important;
+    max-width: 1800px;
+    margin: 0 auto;
+    box-sizing: border-box;
 }
 
 /* Fix label alignment issues */
diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index d3a7b9f2..31cb6965 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -7,6 +7,7 @@
 import warnings
 from io import StringIO
 from pathlib import Path
+from typing import Tuple, Callable
 
 import gradio as gr
 import nest_asyncio
@@ -25,6 +26,7 @@
 from llama_index.llms.openvino import OpenVINOLLM
 from llama_index.core.agent import ReActChatFormatter
 from llama_index.core.llms import MessageRole
+from llama_index.core.callbacks import CallbackManager
 # Agent tools
 from tools import PaintCalculator, ShoppingCart
 from system_prompt import react_system_header_str
@@ -42,7 +44,22 @@
     props.cache_dir(): ""
 }
 
-def setup_models(llm_model_path, embedding_model_path, device):
+def setup_models(
+    llm_model_path: str,
+    embedding_model_path: str,
+    device: str) -> Tuple[OpenVINOLLM, OpenVINOEmbedding]:
+    """
+    Sets up LLM and embedding models using OpenVINO.
+    
+    Args:
+        llm_model_path: Path to the LLM model
+        embedding_model_path: Path to the embedding model
+        device: Target device for inference ("CPU", "GPU", etc.)
+        
+    Returns:
+        Tuple of (llm, embedding) models
+    """
+
     # Load LLM model locally    
     llm = OpenVINOLLM(
         model_id_or_path=str(llm_model_path),
@@ -59,7 +76,15 @@ def setup_models(llm_model_path, embedding_model_path, device):
     return llm, embedding
 
 
-def setup_tools():
+def setup_tools()-> Tuple[FunctionTool, FunctionTool, FunctionTool, FunctionTool, FunctionTool]:
+
+    """
+    Sets up and returns a collection of tools for paint calculations and shopping cart management.
+    
+    Returns:
+        Tuple containing tools for paint cost calculation, paint gallons calculation, 
+        adding items to cart, viewing cart, and clearing cart
+    """
 
     paint_cost_calculator = FunctionTool.from_defaults(
         fn=PaintCalculator.calculate_paint_cost,
@@ -124,7 +149,16 @@ def setup_tools():
     return paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator
 
 
-def load_documents(text_example_en_path):
+def load_documents(text_example_en_path: str) -> VectorStoreIndex:
+    """
+    Loads documents from the given path
+    
+    Args:
+        text_example_en_path: Path to the document to load
+        
+    Returns:
+        VectorStoreIndex for the loaded documents
+    """
     
     if not text_example_en_path.exists():
         text_example_en = "test_painting_llm_rag.pdf"
@@ -139,18 +173,40 @@ def load_documents(text_example_en_path):
 
     return index
 
-# Custom function to handle reasoning failures
-def custom_handle_reasoning_failure(callback_manager, exception):
+def custom_handle_reasoning_failure(callback_manager: CallbackManager, exception: Exception):
+    """
+    Provides custom error handling for agent reasoning failures.
+    
+    Args:
+        callback_manager: The callback manager instance for event handling
+        exception: The exception that was raised during reasoning
+    """
     return "Hmm...I didn't quite that. Could you please rephrase your question to be simpler?"
 
 
-def run_app(agent, public_interface):
+def run_app(agent: ReActAgent, public_interface: bool = False) -> None:
+    """
+    Launches the application with the specified agent and interface settings.
+    
+    Args:
+        agent: The ReActAgent instance configured with tools
+        public_interface: Whether to launch with a public-facing Gradio interface
+    """
     class Capturing(list):
+        """A context manager that captures stdout output into a list."""
         def __enter__(self):
+            """
+            Redirects stdout to a StringIO buffer and returns self.
+            Called when entering the 'with' block.
+            """
             self._stdout = sys.stdout
             sys.stdout = self._stringio = StringIO()
             return self
         def __exit__(self, *args):
+            """
+            Stores captured output in this list and restores stdout.
+            Called when exiting the 'with' block.
+            """
             self.extend(self._stringio.getvalue().splitlines())
             del self._stringio
             sys.stdout = self._stdout        
@@ -158,7 +214,18 @@ def __exit__(self, *args):
     def _handle_user_message(user_message, history):
         return "", [*history, (user_message, "")]
 
-    def update_cart_display():
+    def update_cart_display()-> str:
+        """
+        Generates an HTML representation of the shopping cart contents.
+        
+        Retrieves current cart items and creates a formatted HTML table
+        showing product details, quantities, prices, and totals.
+        If the cart is empty, returns a message indicating this.
+        
+        Returns:
+            str: Markdown-formatted HTML table of cart contents
+                or message indicating empty cart
+        """
         cart_items = ShoppingCart.get_cart_items()
         if not cart_items:
             return "### 🛒 Your Shopping Cart is Empty"
@@ -190,7 +257,27 @@ def update_cart_display():
         table += f"\n**Total: ${total:.2f}**"
         return table
 
-    def _generate_response(chat_history, log_history):
+    def _generate_response(chat_history: list, log_history: list | None = None)->Tuple[str,str,str]:
+        """
+        Generate a streaming response from the agent with formatted thought process logs.
+        
+        This function:
+        1. Captures the agent's thought process
+        2. Formats the thought process into readable logs
+        3. Streams the agent's response token by token
+        4. Tracks performance metrics for thought process and response generation
+        5. Updates the shopping cart display
+        
+        Args:
+            chat_history: List of conversation messages
+            log_history: List to store logs, will be initialized if None
+            
+        Yields:
+            tuple: (chat_history, formatted_log_history, cart_content)
+                - chat_history: Updated with agent's response
+                - formatted_log_history: String of joined logs
+                - cart_content: HTML representation of the shopping cart
+        """
         log.info(f"log_history {log_history}")           
         
         if not isinstance(log_history, list):
@@ -258,12 +345,46 @@ def _generate_response(chat_history, log_history):
         log_history.append(response_log)
         yield chat_history, "\n".join(log_history), cart_content  # Join logs into a string for display
 
-    def _reset_chat():
+    def _reset_chat()-> tuple[str, list, str, str]:
+        """
+        Resets the chat interface and agent state to initial conditions.
+        
+        This function:
+        1. Resets the agent's internal state
+        2. Clears all items from the shopping cart
+        3. Returns values needed to reset the UI components
+        
+        Returns:
+            tuple: Values to reset UI components
+                - Empty string: Clears the message input
+                - Empty list: Resets chat history
+                - Default log heading: Sets initial log area text
+                - Empty cart display: Shows empty shopping cart
+        """
         agent.reset()
         ShoppingCart._cart_items = []
         return "", [], "🤔 Agent's Thought Process", update_cart_display()
 
-    def run():
+    def run()-> None:
+        """
+        Sets up and launches the Gradio web interface for the Smart Retail Assistant.
+        
+        This function:
+        1. Loads custom CSS styling if available
+        2. Configures the Gradio theme and UI components
+        3. Sets up the chat interface with agent interaction
+        4. Configures event handlers for user inputs
+        5. Adds example prompts for users
+        6. Launches the web interface
+        
+        The interface includes:
+        - Chat window for user-agent conversation
+        - Log window to display agent's thought process
+        - Shopping cart display
+        - Text input for user messages
+        - Submit and Clear buttons
+        - Sample questions for easy access
+        """
         custom_css = ""
         try:
             with open("css/gradio.css", "r") as css_file:
@@ -364,6 +485,16 @@ def run():
 
 
 def main(chat_model: str, embedding_model: str, rag_pdf: str, device: str, public_interface: bool = False):
+    """
+    Initializes and runs the agentic rag solution
+    
+    Args:
+        chat_model: Path to the LLM chat model
+        embedding_model: Path to the embedding model
+        rag_pdf: Path to the PDF file for RAG functionality
+        device: Target device for model inference ("CPU", "GPU", "GPU.1")
+        public_interface: Whether to expose a public-facing interface
+    """
     # Load models and embedding based on parsed arguments
     llm, embedding = setup_models(chat_model, embedding_model, device)
 
diff --git a/ai_ref_kits/agentic_llm_rag/requirements.txt b/ai_ref_kits/agentic_llm_rag/requirements.txt
index a7e2d7a4..89d8dbd1 100644
--- a/ai_ref_kits/agentic_llm_rag/requirements.txt
+++ b/ai_ref_kits/agentic_llm_rag/requirements.txt
@@ -1,6 +1,6 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 
-openvino==2024.6.0
+openvino==2025.0.0
 optimum-intel==1.21.0
 optimum==1.23.3
 nncf==2.14.1
@@ -11,10 +11,7 @@ llama-index-embeddings-openvino==0.5.1
 llama-index-postprocessor-openvino-rerank==0.4.1
 llama-index-vector-stores-faiss==0.3.0
 faiss-cpu==1.9.0
-
-# onnx>1.16.1 doesn't work on windows
-onnx==1.16.1; platform_system == "Windows"
-onnx==1.17.0; platform_system != "Windows"
+onnx==1.17.0;
 onnxruntime==1.17.3
 torch==2.5.1
 

From 326b05ee0afcb0d41140ac2692d1b38d86d4e3d8 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 6 Mar 2025 13:53:41 -0700
Subject: [PATCH 12/25] fix hf_token arg

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
index f12bbb2a..66d45384 100644
--- a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
+++ b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
@@ -135,4 +135,4 @@ def convert_embedding_model(model_type: str, model_dir: Path) -> Path:
 
     args = parser.parse_args()
     convert_embedding_model(args.embedding_model_type, Path(args.model_dir))
-    convert_chat_model(args.chat_model_type, args.precision, Path(args.model_dir))
+    convert_chat_model(args.chat_model_type, args.precision, Path(args.model_dir), args.hf_token)

From 92fcc62974bc6911924d8327c44aeb9e2c7cf62d Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Mon, 10 Mar 2025 12:39:59 -0700
Subject: [PATCH 13/25] Adding main.py wrapper for CI/CDs

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 .../convert_and_optimize_llm.py               |   2 +-
 ai_ref_kits/agentic_llm_rag/main.py           | 590 +-----------------
 2 files changed, 30 insertions(+), 562 deletions(-)

diff --git a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
index 66d45384..33e9b101 100644
--- a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
+++ b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
@@ -127,7 +127,7 @@ def convert_embedding_model(model_type: str, model_dir: Path) -> Path:
     parser = argparse.ArgumentParser()
     parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B"],
                         default="qwen2-7B", help="Chat model to be converted")
-    parser.add_argument("--embedding_model_type", type=str, choices=["bge-large"],
+    parser.add_argument("--embedding_model_type", type=str, choices=["bge-small", "bge-large", "bge-m3"],
                         default="bge-large", help="Embedding model to be converted")
     parser.add_argument("--precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Model precision")
     parser.add_argument("--hf_token", type=str, help="HuggingFace access token")
diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index 31cb6965..a9eb4b20 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -1,561 +1,29 @@
-
-import argparse
-import io
-import logging
-import sys
-import time
-import warnings
-from io import StringIO
-from pathlib import Path
-from typing import Tuple, Callable
-
-import gradio as gr
-import nest_asyncio
-import openvino.properties as props
-import openvino.properties.hint as hints
-import openvino.properties.streams as streams
-import requests
-import yaml
-from llama_index.core import PromptTemplate
-from llama_index.core import SimpleDirectoryReader
-from llama_index.core import VectorStoreIndex, Settings
-from llama_index.core.agent import ReActAgent
-from llama_index.core.tools import FunctionTool
-from llama_index.core.tools import QueryEngineTool, ToolMetadata
-from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
-from llama_index.llms.openvino import OpenVINOLLM
-from llama_index.core.agent import ReActChatFormatter
-from llama_index.core.llms import MessageRole
-from llama_index.core.callbacks import CallbackManager
-# Agent tools
-from tools import PaintCalculator, ShoppingCart
-from system_prompt import react_system_header_str
-
-# Initialize logging
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-#Filter unnecessary warnings for demonstration
-warnings.filterwarnings("ignore")
-
-ov_config = {
-    hints.performance_mode(): hints.PerformanceMode.LATENCY,
-    streams.num(): "1",
-    props.cache_dir(): ""
-}
-
-def setup_models(
-    llm_model_path: str,
-    embedding_model_path: str,
-    device: str) -> Tuple[OpenVINOLLM, OpenVINOEmbedding]:
-    """
-    Sets up LLM and embedding models using OpenVINO.
-    
-    Args:
-        llm_model_path: Path to the LLM model
-        embedding_model_path: Path to the embedding model
-        device: Target device for inference ("CPU", "GPU", etc.)
-        
-    Returns:
-        Tuple of (llm, embedding) models
-    """
-
-    # Load LLM model locally    
-    llm = OpenVINOLLM(
-        model_id_or_path=str(llm_model_path),
-        context_window=8192,
-        max_new_tokens=500,
-        model_kwargs={"ov_config": ov_config},
-        generate_kwargs={"do_sample": False, "temperature": 0.1, "top_p": 0.8},        
-        device_map=device,
-    )
-
-    # Load the embedding model locally
-    embedding = OpenVINOEmbedding(model_id_or_path=embedding_model_path, device=device)
-
-    return llm, embedding
-
-
-def setup_tools()-> Tuple[FunctionTool, FunctionTool, FunctionTool, FunctionTool, FunctionTool]:
-
-    """
-    Sets up and returns a collection of tools for paint calculations and shopping cart management.
-    
-    Returns:
-        Tuple containing tools for paint cost calculation, paint gallons calculation, 
-        adding items to cart, viewing cart, and clearing cart
-    """
-
-    paint_cost_calculator = FunctionTool.from_defaults(
-        fn=PaintCalculator.calculate_paint_cost,
-        name="calculate_paint_cost",
-        description="ALWAYS use this tool when calculating paint cost for a specific area in square feet. Required inputs: area (float, square feet), price_per_gallon (float), add_paint_supply_costs (bool)"
-    )
-
-    paint_gallons_calculator = FunctionTool.from_defaults(
-    fn=PaintCalculator.calculate_paint_gallons_needed,
-    name="calculate_paint_gallons",
-    description="Calculate how many gallons of paint are needed to cover a specific area. Required input: area (float, square feet). Returns the number of gallons needed, rounded up to ensure full coverage."
-)
-
-    add_to_cart_tool = FunctionTool.from_defaults(
-        fn=ShoppingCart.add_to_cart,
-        name="add_to_cart",
-        description="""
-        Use this tool WHENEVER a user wants to add any item to their cart or shopping cart.
-        
-        PARAMETERS:
-        - product_name (string): The exact name of the product (e.g., "Premium Latex Paint")
-        - quantity (int): The number of units to add, must be a positive integer (e.g., 2)
-        - price_per_unit (float): The price per unit in dollars (e.g., 24.99)
-        
-        RETURNS:
-        - A confirmation message and updated cart contents
-        
-        EXAMPLES:
-        To add 3 gallons of paint at $29.99 each: add_to_cart(product_name="Interior Eggshell Paint", quantity=3, price_per_unit=29.99)
-        """
-    )
-    
-    get_cart_items_tool = FunctionTool.from_defaults(
-        fn=ShoppingCart.get_cart_items,
-        name="view_cart",
-        description="""
-        Use this tool when a user wants to see what's in their shopping cart.
-        No parameters are required.
-        
-        RETURNS:
-        - A list of all items currently in the cart with their details
-        
-        EXAMPLES:
-        To view the current cart contents: view_cart()
-        """
-    )
-    
-    clear_cart_tool = FunctionTool.from_defaults(
-        fn=ShoppingCart.clear_cart,
-        name="clear_cart",
-        description="""
-        Use this tool when a user asks to empty or clear their shopping cart.
-        No parameters are required.
-        
-        RETURNS:
-        - A confirmation message that the cart has been cleared
-        
-        EXAMPLES:
-        To empty the shopping cart: clear_cart()
-        """
-    )
-    return paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator
-
-
-def load_documents(text_example_en_path: str) -> VectorStoreIndex:
-    """
-    Loads documents from the given path
-    
-    Args:
-        text_example_en_path: Path to the document to load
-        
-    Returns:
-        VectorStoreIndex for the loaded documents
-    """
-    
-    if not text_example_en_path.exists():
-        text_example_en = "test_painting_llm_rag.pdf"
-        r = requests.get(text_example_en)
-        content = io.BytesIO(r.content)
-        with open(text_example_en_path, "wb") as f:
-            f.write(content.read())
-
-    reader = SimpleDirectoryReader(input_files=[text_example_en_path])
-    documents = reader.load_data()
-    index = VectorStoreIndex.from_documents(documents)
-
-    return index
-
-def custom_handle_reasoning_failure(callback_manager: CallbackManager, exception: Exception):
-    """
-    Provides custom error handling for agent reasoning failures.
-    
-    Args:
-        callback_manager: The callback manager instance for event handling
-        exception: The exception that was raised during reasoning
-    """
-    return "Hmm...I didn't quite that. Could you please rephrase your question to be simpler?"
-
-
-def run_app(agent: ReActAgent, public_interface: bool = False) -> None:
-    """
-    Launches the application with the specified agent and interface settings.
-    
-    Args:
-        agent: The ReActAgent instance configured with tools
-        public_interface: Whether to launch with a public-facing Gradio interface
-    """
-    class Capturing(list):
-        """A context manager that captures stdout output into a list."""
-        def __enter__(self):
-            """
-            Redirects stdout to a StringIO buffer and returns self.
-            Called when entering the 'with' block.
-            """
-            self._stdout = sys.stdout
-            sys.stdout = self._stringio = StringIO()
-            return self
-        def __exit__(self, *args):
-            """
-            Stores captured output in this list and restores stdout.
-            Called when exiting the 'with' block.
-            """
-            self.extend(self._stringio.getvalue().splitlines())
-            del self._stringio
-            sys.stdout = self._stdout        
-
-    def _handle_user_message(user_message, history):
-        return "", [*history, (user_message, "")]
-
-    def update_cart_display()-> str:
-        """
-        Generates an HTML representation of the shopping cart contents.
-        
-        Retrieves current cart items and creates a formatted HTML table
-        showing product details, quantities, prices, and totals.
-        If the cart is empty, returns a message indicating this.
-        
-        Returns:
-            str: Markdown-formatted HTML table of cart contents
-                or message indicating empty cart
-        """
-        cart_items = ShoppingCart.get_cart_items()
-        if not cart_items:
-            return "### 🛒 Your Shopping Cart is Empty"
-            
-        table = "### 🛒 Your Shopping Cart\n\n"
-        table += "<table>\n"
-        table += "  <thead>\n"
-        table += "    <tr>\n"
-        table += "      <th>Product</th>\n"
-        table += "      <th>Qty</th>\n"
-        table += "      <th>Price</th>\n"
-        table += "      <th>Total</th>\n"
-        table += "    </tr>\n"
-        table += "  </thead>\n"
-        table += "  <tbody>\n"
-            
-        for item in cart_items:
-            table += "    <tr>\n"
-            table += f"      <td>{item['product_name']}</td>\n"
-            table += f"      <td>{item['quantity']}</td>\n"
-            table += f"      <td>${item['price_per_unit']:.2f}</td>\n"
-            table += f"      <td>${item['total_price']:.2f}</td>\n"
-            table += "    </tr>\n"
-            
-        table += "  </tbody>\n"
-        table += "</table>\n"
-        
-        total = sum(item["total_price"] for item in cart_items)
-        table += f"\n**Total: ${total:.2f}**"
-        return table
-
-    def _generate_response(chat_history: list, log_history: list | None = None)->Tuple[str,str,str]:
-        """
-        Generate a streaming response from the agent with formatted thought process logs.
-        
-        This function:
-        1. Captures the agent's thought process
-        2. Formats the thought process into readable logs
-        3. Streams the agent's response token by token
-        4. Tracks performance metrics for thought process and response generation
-        5. Updates the shopping cart display
-        
-        Args:
-            chat_history: List of conversation messages
-            log_history: List to store logs, will be initialized if None
-            
-        Yields:
-            tuple: (chat_history, formatted_log_history, cart_content)
-                - chat_history: Updated with agent's response
-                - formatted_log_history: String of joined logs
-                - cart_content: HTML representation of the shopping cart
-        """
-        log.info(f"log_history {log_history}")           
-        
-        if not isinstance(log_history, list):
-            log_history = []
-
-        # Capture time for thought process
-        start_thought_time = time.time()
-
-        # Capture the thought process output
-        with Capturing() as output:
-            try:
-                response = agent.stream_chat(chat_history[-1][0])
-            except ValueError:
-                response = agent.stream_chat(chat_history[-1][0])
-        formatted_output = []
-        for line in output:
-            if "Thought:" in line:
-                formatted_output.append("\n🤔 **Thought:**\n" + line.split("Thought:", 1)[1])
-            elif "Action:" in line:
-                formatted_output.append("\n🔧 **Action:**\n" + line.split("Action:", 1)[1])
-            elif "Action Input:" in line:
-                formatted_output.append("\n📥 **Input:**\n" + line.split("Action Input:", 1)[1])
-            elif "Observation:" in line:
-                formatted_output.append("\n📋 **Result:**\n" + line.split("Observation:", 1)[1])
-            else:
-                formatted_output.append(line)
-        end_thought_time = time.time()
-        thought_process_time = end_thought_time - start_thought_time
-
-        # After response is complete, show the captured logs in the log area
-        log_entries = "\n".join(formatted_output)
-        log_history.append("### 🤔 Agent's Thought Process")
-        thought_process_log = f"Thought Process Time: {thought_process_time:.2f} seconds"
-        log_history.append(f"{log_entries}\n{thought_process_log}")
-        cart_content = update_cart_display() # update shopping cart
-        yield chat_history, "\n".join(log_history), cart_content  # Yield after the thought process time is captured
-
-        # Now capture response generation time
-        start_response_time = time.time()
-
-        # Gradually yield the response from the agent to the chat
-        # Quick fix for agent occasionally repeating the first word of its repsponse
-        last_token = "Dummy Token"
-        i = 0
-        for token in response.response_gen:
-            if i == 0:
-                last_token = token
-            if i == 1 and token.split()[0] == last_token.split()[0]:
-                chat_history[-1][1] += token.split()[1] + " "
-            else:
-                chat_history[-1][1] += token
-            yield chat_history, "\n".join(log_history), cart_content  # Ensure log_history is a string
-            if i <= 2: i += 1
-
-        end_response_time = time.time()
-        response_time = end_response_time - start_response_time
-
-        # Log tokens per second along with the device information
-        tokens = len(chat_history[-1][1].split(" ")) * 4 / 3  # Convert words to approx token count
-        response_log = f"Response Time: {response_time:.2f} seconds ({tokens / response_time:.2f} tokens/s)"
-
-        log.info(response_log)
-
-        # Append the response time to log history
-        log_history.append(response_log)
-        yield chat_history, "\n".join(log_history), cart_content  # Join logs into a string for display
-
-    def _reset_chat()-> tuple[str, list, str, str]:
-        """
-        Resets the chat interface and agent state to initial conditions.
-        
-        This function:
-        1. Resets the agent's internal state
-        2. Clears all items from the shopping cart
-        3. Returns values needed to reset the UI components
-        
-        Returns:
-            tuple: Values to reset UI components
-                - Empty string: Clears the message input
-                - Empty list: Resets chat history
-                - Default log heading: Sets initial log area text
-                - Empty cart display: Shows empty shopping cart
-        """
-        agent.reset()
-        ShoppingCart._cart_items = []
-        return "", [], "🤔 Agent's Thought Process", update_cart_display()
-
-    def run()-> None:
-        """
-        Sets up and launches the Gradio web interface for the Smart Retail Assistant.
-        
-        This function:
-        1. Loads custom CSS styling if available
-        2. Configures the Gradio theme and UI components
-        3. Sets up the chat interface with agent interaction
-        4. Configures event handlers for user inputs
-        5. Adds example prompts for users
-        6. Launches the web interface
-        
-        The interface includes:
-        - Chat window for user-agent conversation
-        - Log window to display agent's thought process
-        - Shopping cart display
-        - Text input for user messages
-        - Submit and Clear buttons
-        - Sample questions for easy access
-        """
-        custom_css = ""
-        try:
-            with open("css/gradio.css", "r") as css_file:
-                custom_css = css_file.read()            
-        except Exception as e:            
-            log.warning(f"Could not load CSS file: {e}")
-
-        theme = gr.themes.Default(
-            primary_hue="blue",
-            font=[gr.themes.GoogleFont("Montserrat"), "ui-sans-serif", "sans-serif"],
-        )
-
-        with gr.Blocks(theme=theme, css=custom_css) as demo:
-
-            header = gr.HTML(
-                        "<div class='intel-header-wrapper'>"
-                        "  <div class='intel-header'>"
-                        "    <img src='https://www.intel.com/content/dam/logos/intel-header-logo.svg' class='intel-logo'></img>"
-                        "    <div class='intel-title'>Smart Retail Assistant 🤖: Agentic LLMs with RAG 💭</div>"
-                        "  </div>"
-                        "</div>"
-            )
-
-            with gr.Row():
-                chat_window = gr.Chatbot(
-                    label="Paint Purchase Helper",
-                    avatar_images=(None, "https://docs.openvino.ai/2024/_static/favicon.ico"),
-                    height=400,  # Adjust height as per your preference
-                    scale=2  # Set a higher scale value for Chatbot to make it wider
-                    #autoscroll=True,  # Enable auto-scrolling for better UX
-                )            
-                log_window = gr.Markdown(                                                                    
-                        show_label=True,                        
-                        value="### 🤔 Agent's Thought Process",
-                        height=400,                        
-                        elem_id="agent-steps"
-                )
-                cart_display = gr.Markdown(
-                    value=update_cart_display(),
-                    elem_id="shopping-cart",
-                    height=400
-                )
-
-            with gr.Row():
-                message = gr.Textbox(label="Ask the Paint Expert 🎨", scale=4, placeholder="Type your prompt/Question and press Enter")
-
-                with gr.Column(scale=1):
-                    submit_btn = gr.Button("Submit", variant="primary")
-                    clear = gr.ClearButton()
-                          
-            sample_questions = [
-                "what paint is the best for kitchens?",
-                "what is the price of it?",
-                "how many gallons of paint do I need to cover 600 sq ft ?",
-                "add them to my cart",
-                "what else do I need to complete my project?",
-                "add 2 brushes to my cart",
-                "create a table with paint products sorted by price",
-                "Show me what's in my cart",
-                "clear shopping cart",
-                "I have a room 1000 sqft, I'm looking for supplies to paint the room"              
-            ]
-            gr.Examples(
-                examples=sample_questions,
-                inputs=message, 
-                label="Examples"
-            )                     
-            
-            # Ensure that individual components are passed
-            message.submit(
-                _handle_user_message,
-                inputs=[message, chat_window],
-                outputs=[message, chat_window],
-                queue=False                
-            ).then(
-                _generate_response,
-                inputs=[chat_window, log_window],
-                outputs=[chat_window, log_window, cart_display],
-            )
-
-            submit_btn.click(
-                _handle_user_message,
-                inputs=[message, chat_window],
-                outputs=[message, chat_window],
-                queue=False,
-            ).then(
-                _generate_response,
-                inputs=[chat_window, log_window],
-                outputs=[chat_window, log_window, cart_display],
-            )
-            clear.click(_reset_chat, None, [message, chat_window, log_window, cart_display])
-
-            gr.Markdown("------------------------------")            
-
-        demo.launch(share=public_interface)
-
-    run()
-
-
-def main(chat_model: str, embedding_model: str, rag_pdf: str, device: str, public_interface: bool = False):
-    """
-    Initializes and runs the agentic rag solution
-    
-    Args:
-        chat_model: Path to the LLM chat model
-        embedding_model: Path to the embedding model
-        rag_pdf: Path to the PDF file for RAG functionality
-        device: Target device for model inference ("CPU", "GPU", "GPU.1")
-        public_interface: Whether to expose a public-facing interface
-    """
-    # Load models and embedding based on parsed arguments
-    llm, embedding = setup_models(chat_model, embedding_model, device)
-
-    Settings.embed_model = embedding
-    Settings.llm = llm
-
-    # Set up tools
-    paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator = setup_tools()
-    
-    text_example_en_path = Path(rag_pdf)
-    index = load_documents(text_example_en_path)
-    log.info(f"loading in {index}")
- 
-    vector_tool = QueryEngineTool(
-        index.as_query_engine(streaming=True),
-        metadata=ToolMetadata(
-            name="vector_search",
-            description="""            
-            Use this tool for ANY question about paint products, recommendations, prices, or technical specifications.
-            
-            WHEN TO USE:
-            - User asks about paint types, brands, or products
-            - User needs price information before adding to cart
-            - User needs recommendations based on their project
-            - User has technical questions about painting
-            
-            EXAMPLES:
-            - "What paint is best for kitchen cabinets?"
-            - "How much does AwesomePainter Interior Acrylic Latex cost?"
-            - "What supplies do I need for painting my living room?"
-            """,
-        ),
-    )
-    
-    nest_asyncio.apply()
- 
-    # Define agent and available tools
-    agent = ReActAgent.from_tools(
-        [paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, vector_tool, paint_gallons_calculator],
-        llm=llm,
-        max_iterations=5,  # Set a max_iterations value
-        handle_reasoning_failure_fn=custom_handle_reasoning_failure,
-        verbose=True,
-        react_chat_formatter=ReActChatFormatter.from_defaults(
-            observation_role=MessageRole.TOOL   
-        ),
-    ) 
-    react_system_prompt = PromptTemplate(react_system_header_str)
-    agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})  
-    agent.reset()                     
-    run_app(agent, public_interface)
-
-if __name__ == "__main__":
-    # Define the argument parser at the end
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--chat_model", type=str, default="model/qwen2-7B-INT4", help="Path to the chat model directory")
-    parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
-    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")    
-    parser.add_argument("--device", type=str, default="GPU", help="Device for inferencing (CPU,GPU,GPU.1,NPU)")
-    parser.add_argument("--public", default=False, action="store_true", help="Whether interface should be available publicly")
-
-    args = parser.parse_args()
-
-    main(args.chat_model, args.embedding_model, args.rag_pdf, args.device, args.public)
+import argparse
+from pathlib import Path
+
+import app
+import convert_and_optimize_llm as chat
+
+
+def main(args):  
+    embedding_model_dir = chat.convert_embedding_model(args.embedding_model_type, Path(args.model_dir))    
+    chat_model_dir = chat.convert_chat_model(args.chat_model_type, args.chat_precision, Path(args.model_dir), args.hf_token)
+
+    app.run(chat_model_dir, embedding_model_dir, Path(args.rag_pdf), args.device, args.public)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()    
+    
+    parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B"],
+                        default="qwen2-7B", help="Chat model to be converted")
+    parser.add_argument("--embedding_model_type", type=str, choices=["bge-small", "bge-large", "bge-m3"],
+                        default="bge-small", help="Embedding model to be converted")
+    parser.add_argument("--chat_precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Chat model precision")
+    parser.add_argument("--hf_token", type=str, help="HuggingFace access token to get Llama3")
+    parser.add_argument("--model_dir", type=str, default="model", help="Directory to place the model in")    
+    parser.add_argument("--example_pdf", type=str, default="data/large.pdf",
+                        help="Path to the PDF file which is an additional context")
+    parser.add_argument("--public", default=False, action="store_true", help="Whether interface should be available publicly")
+
+    main(parser.parse_args())
\ No newline at end of file

From 414dc0cc4389c6432f9eadc05a6096ee1ed03d3d Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Mon, 10 Mar 2025 12:42:57 -0700
Subject: [PATCH 14/25] Rename app.py

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/app.py | 561 +++++++++++++++++++++++++++++
 1 file changed, 561 insertions(+)
 create mode 100644 ai_ref_kits/agentic_llm_rag/app.py

diff --git a/ai_ref_kits/agentic_llm_rag/app.py b/ai_ref_kits/agentic_llm_rag/app.py
new file mode 100644
index 00000000..1d2af5af
--- /dev/null
+++ b/ai_ref_kits/agentic_llm_rag/app.py
@@ -0,0 +1,561 @@
+
+import argparse
+import io
+import logging
+import sys
+import time
+import warnings
+from io import StringIO
+from pathlib import Path
+from typing import Tuple, Callable
+
+import gradio as gr
+import nest_asyncio
+import openvino.properties as props
+import openvino.properties.hint as hints
+import openvino.properties.streams as streams
+import requests
+import yaml
+from llama_index.core import PromptTemplate
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core import VectorStoreIndex, Settings
+from llama_index.core.agent import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.core.tools import QueryEngineTool, ToolMetadata
+from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
+from llama_index.llms.openvino import OpenVINOLLM
+from llama_index.core.agent import ReActChatFormatter
+from llama_index.core.llms import MessageRole
+from llama_index.core.callbacks import CallbackManager
+# Agent tools
+from tools import PaintCalculator, ShoppingCart
+from system_prompt import react_system_header_str
+
+# Initialize logging
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+#Filter unnecessary warnings for demonstration
+warnings.filterwarnings("ignore")
+
+ov_config = {
+    hints.performance_mode(): hints.PerformanceMode.LATENCY,
+    streams.num(): "1",
+    props.cache_dir(): ""
+}
+
+def setup_models(
+    llm_model_path: str,
+    embedding_model_path: str,
+    device: str) -> Tuple[OpenVINOLLM, OpenVINOEmbedding]:
+    """
+    Sets up LLM and embedding models using OpenVINO.
+    
+    Args:
+        llm_model_path: Path to the LLM model
+        embedding_model_path: Path to the embedding model
+        device: Target device for inference ("CPU", "GPU", etc.)
+        
+    Returns:
+        Tuple of (llm, embedding) models
+    """
+
+    # Load LLM model locally    
+    llm = OpenVINOLLM(
+        model_id_or_path=str(llm_model_path),
+        context_window=8192,
+        max_new_tokens=500,
+        model_kwargs={"ov_config": ov_config},
+        generate_kwargs={"do_sample": False, "temperature": 0.1, "top_p": 0.8},        
+        device_map=device,
+    )
+
+    # Load the embedding model locally
+    embedding = OpenVINOEmbedding(model_id_or_path=embedding_model_path, device=device)
+
+    return llm, embedding
+
+
+def setup_tools()-> Tuple[FunctionTool, FunctionTool, FunctionTool, FunctionTool, FunctionTool]:
+
+    """
+    Sets up and returns a collection of tools for paint calculations and shopping cart management.
+    
+    Returns:
+        Tuple containing tools for paint cost calculation, paint gallons calculation, 
+        adding items to cart, viewing cart, and clearing cart
+    """
+
+    paint_cost_calculator = FunctionTool.from_defaults(
+        fn=PaintCalculator.calculate_paint_cost,
+        name="calculate_paint_cost",
+        description="ALWAYS use this tool when calculating paint cost for a specific area in square feet. Required inputs: area (float, square feet), price_per_gallon (float), add_paint_supply_costs (bool)"
+    )
+
+    paint_gallons_calculator = FunctionTool.from_defaults(
+    fn=PaintCalculator.calculate_paint_gallons_needed,
+    name="calculate_paint_gallons",
+    description="Calculate how many gallons of paint are needed to cover a specific area. Required input: area (float, square feet). Returns the number of gallons needed, rounded up to ensure full coverage."
+)
+
+    add_to_cart_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.add_to_cart,
+        name="add_to_cart",
+        description="""
+        Use this tool WHENEVER a user wants to add any item to their cart or shopping cart.
+        
+        PARAMETERS:
+        - product_name (string): The exact name of the product (e.g., "Premium Latex Paint")
+        - quantity (int): The number of units to add, must be a positive integer (e.g., 2)
+        - price_per_unit (float): The price per unit in dollars (e.g., 24.99)
+        
+        RETURNS:
+        - A confirmation message and updated cart contents
+        
+        EXAMPLES:
+        To add 3 gallons of paint at $29.99 each: add_to_cart(product_name="Interior Eggshell Paint", quantity=3, price_per_unit=29.99)
+        """
+    )
+    
+    get_cart_items_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.get_cart_items,
+        name="view_cart",
+        description="""
+        Use this tool when a user wants to see what's in their shopping cart.
+        No parameters are required.
+        
+        RETURNS:
+        - A list of all items currently in the cart with their details
+        
+        EXAMPLES:
+        To view the current cart contents: view_cart()
+        """
+    )
+    
+    clear_cart_tool = FunctionTool.from_defaults(
+        fn=ShoppingCart.clear_cart,
+        name="clear_cart",
+        description="""
+        Use this tool when a user asks to empty or clear their shopping cart.
+        No parameters are required.
+        
+        RETURNS:
+        - A confirmation message that the cart has been cleared
+        
+        EXAMPLES:
+        To empty the shopping cart: clear_cart()
+        """
+    )
+    return paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator
+
+
+def load_documents(text_example_en_path: str) -> VectorStoreIndex:
+    """
+    Loads documents from the given path
+    
+    Args:
+        text_example_en_path: Path to the document to load
+        
+    Returns:
+        VectorStoreIndex for the loaded documents
+    """
+    
+    if not text_example_en_path.exists():
+        text_example_en = "test_painting_llm_rag.pdf"
+        r = requests.get(text_example_en)
+        content = io.BytesIO(r.content)
+        with open(text_example_en_path, "wb") as f:
+            f.write(content.read())
+
+    reader = SimpleDirectoryReader(input_files=[text_example_en_path])
+    documents = reader.load_data()
+    index = VectorStoreIndex.from_documents(documents)
+
+    return index
+
+def custom_handle_reasoning_failure(callback_manager: CallbackManager, exception: Exception):
+    """
+    Provides custom error handling for agent reasoning failures.
+    
+    Args:
+        callback_manager: The callback manager instance for event handling
+        exception: The exception that was raised during reasoning
+    """
+    return "Hmm...I didn't quite that. Could you please rephrase your question to be simpler?"
+
+
+def run_app(agent: ReActAgent, public_interface: bool = False) -> None:
+    """
+    Launches the application with the specified agent and interface settings.
+    
+    Args:
+        agent: The ReActAgent instance configured with tools
+        public_interface: Whether to launch with a public-facing Gradio interface
+    """
+    class Capturing(list):
+        """A context manager that captures stdout output into a list."""
+        def __enter__(self):
+            """
+            Redirects stdout to a StringIO buffer and returns self.
+            Called when entering the 'with' block.
+            """
+            self._stdout = sys.stdout
+            sys.stdout = self._stringio = StringIO()
+            return self
+        def __exit__(self, *args):
+            """
+            Stores captured output in this list and restores stdout.
+            Called when exiting the 'with' block.
+            """
+            self.extend(self._stringio.getvalue().splitlines())
+            del self._stringio
+            sys.stdout = self._stdout        
+
+    def _handle_user_message(user_message, history):
+        return "", [*history, (user_message, "")]
+
+    def update_cart_display()-> str:
+        """
+        Generates an HTML representation of the shopping cart contents.
+        
+        Retrieves current cart items and creates a formatted HTML table
+        showing product details, quantities, prices, and totals.
+        If the cart is empty, returns a message indicating this.
+        
+        Returns:
+            str: Markdown-formatted HTML table of cart contents
+                or message indicating empty cart
+        """
+        cart_items = ShoppingCart.get_cart_items()
+        if not cart_items:
+            return "### 🛒 Your Shopping Cart is Empty"
+            
+        table = "### 🛒 Your Shopping Cart\n\n"
+        table += "<table>\n"
+        table += "  <thead>\n"
+        table += "    <tr>\n"
+        table += "      <th>Product</th>\n"
+        table += "      <th>Qty</th>\n"
+        table += "      <th>Price</th>\n"
+        table += "      <th>Total</th>\n"
+        table += "    </tr>\n"
+        table += "  </thead>\n"
+        table += "  <tbody>\n"
+            
+        for item in cart_items:
+            table += "    <tr>\n"
+            table += f"      <td>{item['product_name']}</td>\n"
+            table += f"      <td>{item['quantity']}</td>\n"
+            table += f"      <td>${item['price_per_unit']:.2f}</td>\n"
+            table += f"      <td>${item['total_price']:.2f}</td>\n"
+            table += "    </tr>\n"
+            
+        table += "  </tbody>\n"
+        table += "</table>\n"
+        
+        total = sum(item["total_price"] for item in cart_items)
+        table += f"\n**Total: ${total:.2f}**"
+        return table
+
+    def _generate_response(chat_history: list, log_history: list | None = None)->Tuple[str,str,str]:
+        """
+        Generate a streaming response from the agent with formatted thought process logs.
+        
+        This function:
+        1. Captures the agent's thought process
+        2. Formats the thought process into readable logs
+        3. Streams the agent's response token by token
+        4. Tracks performance metrics for thought process and response generation
+        5. Updates the shopping cart display
+        
+        Args:
+            chat_history: List of conversation messages
+            log_history: List to store logs, will be initialized if None
+            
+        Yields:
+            tuple: (chat_history, formatted_log_history, cart_content)
+                - chat_history: Updated with agent's response
+                - formatted_log_history: String of joined logs
+                - cart_content: HTML representation of the shopping cart
+        """
+        log.info(f"log_history {log_history}")           
+        
+        if not isinstance(log_history, list):
+            log_history = []
+
+        # Capture time for thought process
+        start_thought_time = time.time()
+
+        # Capture the thought process output
+        with Capturing() as output:
+            try:
+                response = agent.stream_chat(chat_history[-1][0])
+            except ValueError:
+                response = agent.stream_chat(chat_history[-1][0])
+        formatted_output = []
+        for line in output:
+            if "Thought:" in line:
+                formatted_output.append("\n🤔 **Thought:**\n" + line.split("Thought:", 1)[1])
+            elif "Action:" in line:
+                formatted_output.append("\n🔧 **Action:**\n" + line.split("Action:", 1)[1])
+            elif "Action Input:" in line:
+                formatted_output.append("\n📥 **Input:**\n" + line.split("Action Input:", 1)[1])
+            elif "Observation:" in line:
+                formatted_output.append("\n📋 **Result:**\n" + line.split("Observation:", 1)[1])
+            else:
+                formatted_output.append(line)
+        end_thought_time = time.time()
+        thought_process_time = end_thought_time - start_thought_time
+
+        # After response is complete, show the captured logs in the log area
+        log_entries = "\n".join(formatted_output)
+        log_history.append("### 🤔 Agent's Thought Process")
+        thought_process_log = f"Thought Process Time: {thought_process_time:.2f} seconds"
+        log_history.append(f"{log_entries}\n{thought_process_log}")
+        cart_content = update_cart_display() # update shopping cart
+        yield chat_history, "\n".join(log_history), cart_content  # Yield after the thought process time is captured
+
+        # Now capture response generation time
+        start_response_time = time.time()
+
+        # Gradually yield the response from the agent to the chat
+        # Quick fix for agent occasionally repeating the first word of its repsponse
+        last_token = "Dummy Token"
+        i = 0
+        for token in response.response_gen:
+            if i == 0:
+                last_token = token
+            if i == 1 and token.split()[0] == last_token.split()[0]:
+                chat_history[-1][1] += token.split()[1] + " "
+            else:
+                chat_history[-1][1] += token
+            yield chat_history, "\n".join(log_history), cart_content  # Ensure log_history is a string
+            if i <= 2: i += 1
+
+        end_response_time = time.time()
+        response_time = end_response_time - start_response_time
+
+        # Log tokens per second along with the device information
+        tokens = len(chat_history[-1][1].split(" ")) * 4 / 3  # Convert words to approx token count
+        response_log = f"Response Time: {response_time:.2f} seconds ({tokens / response_time:.2f} tokens/s)"
+
+        log.info(response_log)
+
+        # Append the response time to log history
+        log_history.append(response_log)
+        yield chat_history, "\n".join(log_history), cart_content  # Join logs into a string for display
+
+    def _reset_chat()-> tuple[str, list, str, str]:
+        """
+        Resets the chat interface and agent state to initial conditions.
+        
+        This function:
+        1. Resets the agent's internal state
+        2. Clears all items from the shopping cart
+        3. Returns values needed to reset the UI components
+        
+        Returns:
+            tuple: Values to reset UI components
+                - Empty string: Clears the message input
+                - Empty list: Resets chat history
+                - Default log heading: Sets initial log area text
+                - Empty cart display: Shows empty shopping cart
+        """
+        agent.reset()
+        ShoppingCart._cart_items = []
+        return "", [], "🤔 Agent's Thought Process", update_cart_display()
+
+    def run()-> None:
+        """
+        Sets up and launches the Gradio web interface for the Smart Retail Assistant.
+        
+        This function:
+        1. Loads custom CSS styling if available
+        2. Configures the Gradio theme and UI components
+        3. Sets up the chat interface with agent interaction
+        4. Configures event handlers for user inputs
+        5. Adds example prompts for users
+        6. Launches the web interface
+        
+        The interface includes:
+        - Chat window for user-agent conversation
+        - Log window to display agent's thought process
+        - Shopping cart display
+        - Text input for user messages
+        - Submit and Clear buttons
+        - Sample questions for easy access
+        """
+        custom_css = ""
+        try:
+            with open("css/gradio.css", "r") as css_file:
+                custom_css = css_file.read()            
+        except Exception as e:            
+            log.warning(f"Could not load CSS file: {e}")
+
+        theme = gr.themes.Default(
+            primary_hue="blue",
+            font=[gr.themes.GoogleFont("Montserrat"), "ui-sans-serif", "sans-serif"],
+        )
+
+        with gr.Blocks(theme=theme, css=custom_css) as demo:
+
+            header = gr.HTML(
+                        "<div class='intel-header-wrapper'>"
+                        "  <div class='intel-header'>"
+                        "    <img src='https://www.intel.com/content/dam/logos/intel-header-logo.svg' class='intel-logo'></img>"
+                        "    <div class='intel-title'>Smart Retail Assistant 🤖: Agentic LLMs with RAG 💭</div>"
+                        "  </div>"
+                        "</div>"
+            )
+
+            with gr.Row():
+                chat_window = gr.Chatbot(
+                    label="Paint Purchase Helper",
+                    avatar_images=(None, "https://docs.openvino.ai/2024/_static/favicon.ico"),
+                    height=400,  # Adjust height as per your preference
+                    scale=2  # Set a higher scale value for Chatbot to make it wider
+                    #autoscroll=True,  # Enable auto-scrolling for better UX
+                )            
+                log_window = gr.Markdown(                                                                    
+                        show_label=True,                        
+                        value="### 🤔 Agent's Thought Process",
+                        height=400,                        
+                        elem_id="agent-steps"
+                )
+                cart_display = gr.Markdown(
+                    value=update_cart_display(),
+                    elem_id="shopping-cart",
+                    height=400
+                )
+
+            with gr.Row():
+                message = gr.Textbox(label="Ask the Paint Expert 🎨", scale=4, placeholder="Type your prompt/Question and press Enter")
+
+                with gr.Column(scale=1):
+                    submit_btn = gr.Button("Submit", variant="primary")
+                    clear = gr.ClearButton()
+                          
+            sample_questions = [
+                "what paint is the best for kitchens?",
+                "what is the price of it?",
+                "how many gallons of paint do I need to cover 600 sq ft ?",
+                "add them to my cart",
+                "what else do I need to complete my project?",
+                "add 2 brushes to my cart",
+                "create a table with paint products sorted by price",
+                "Show me what's in my cart",
+                "clear shopping cart",
+                "I have a room 1000 sqft, I'm looking for supplies to paint the room"              
+            ]
+            gr.Examples(
+                examples=sample_questions,
+                inputs=message, 
+                label="Examples"
+            )                     
+            
+            # Ensure that individual components are passed
+            message.submit(
+                _handle_user_message,
+                inputs=[message, chat_window],
+                outputs=[message, chat_window],
+                queue=False                
+            ).then(
+                _generate_response,
+                inputs=[chat_window, log_window],
+                outputs=[chat_window, log_window, cart_display],
+            )
+
+            submit_btn.click(
+                _handle_user_message,
+                inputs=[message, chat_window],
+                outputs=[message, chat_window],
+                queue=False,
+            ).then(
+                _generate_response,
+                inputs=[chat_window, log_window],
+                outputs=[chat_window, log_window, cart_display],
+            )
+            clear.click(_reset_chat, None, [message, chat_window, log_window, cart_display])
+
+            gr.Markdown("------------------------------")            
+
+        demo.launch(share=public_interface)
+
+    run()
+
+
+def run(chat_model: str, embedding_model: str, rag_pdf: str, device: str, public_interface: bool = False):
+    """
+    Initializes and runs the agentic rag solution
+    
+    Args:
+        chat_model: Path to the LLM chat model
+        embedding_model: Path to the embedding model
+        rag_pdf: Path to the PDF file for RAG functionality
+        device: Target device for model inference ("CPU", "GPU", "GPU.1")
+        public_interface: Whether to expose a public-facing interface
+    """
+    # Load models and embedding based on parsed arguments
+    llm, embedding = setup_models(chat_model, embedding_model, device)
+
+    Settings.embed_model = embedding
+    Settings.llm = llm
+
+    # Set up tools
+    paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, paint_gallons_calculator = setup_tools()
+    
+    text_example_en_path = Path(rag_pdf)
+    index = load_documents(text_example_en_path)
+    log.info(f"loading in {index}")
+ 
+    vector_tool = QueryEngineTool(
+        index.as_query_engine(streaming=True),
+        metadata=ToolMetadata(
+            name="vector_search",
+            description="""            
+            Use this tool for ANY question about paint products, recommendations, prices, or technical specifications.
+            
+            WHEN TO USE:
+            - User asks about paint types, brands, or products
+            - User needs price information before adding to cart
+            - User needs recommendations based on their project
+            - User has technical questions about painting
+            
+            EXAMPLES:
+            - "What paint is best for kitchen cabinets?"
+            - "How much does AwesomePainter Interior Acrylic Latex cost?"
+            - "What supplies do I need for painting my living room?"
+            """,
+        ),
+    )
+    
+    nest_asyncio.apply()
+ 
+    # Define agent and available tools
+    agent = ReActAgent.from_tools(
+        [paint_cost_calculator, add_to_cart_tool, get_cart_items_tool, clear_cart_tool, vector_tool, paint_gallons_calculator],
+        llm=llm,
+        max_iterations=5,  # Set a max_iterations value
+        handle_reasoning_failure_fn=custom_handle_reasoning_failure,
+        verbose=True,
+        react_chat_formatter=ReActChatFormatter.from_defaults(
+            observation_role=MessageRole.TOOL   
+        ),
+    ) 
+    react_system_prompt = PromptTemplate(react_system_header_str)
+    agent.update_prompts({"agent_worker:system_prompt": react_system_prompt})  
+    agent.reset()                     
+    run_app(agent, public_interface)
+
+if __name__ == "__main__":
+    # Define the argument parser at the end
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--chat_model", type=str, default="model/qwen2-7B-INT4", help="Path to the chat model directory")
+    parser.add_argument("--embedding_model", type=str, default="model/bge-large-FP32", help="Path to the embedding model directory")
+    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf", help="Path to a RAG PDF file with additional knowledge the chatbot can rely on.")    
+    parser.add_argument("--device", type=str, default="GPU", help="Device for inferencing (CPU,GPU,GPU.1,NPU)")
+    parser.add_argument("--public", default=False, action="store_true", help="Whether interface should be available publicly")
+
+    args = parser.parse_args()
+
+    run(args.chat_model, args.embedding_model, args.rag_pdf, args.device, args.public)

From 466ea9d200b9ce91c71b3f51b9c4fb4d6398d65d Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Mon, 10 Mar 2025 12:46:15 -0700
Subject: [PATCH 15/25] Adding device arg

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index a9eb4b20..f64953d3 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -9,7 +9,7 @@ def main(args):
     embedding_model_dir = chat.convert_embedding_model(args.embedding_model_type, Path(args.model_dir))    
     chat_model_dir = chat.convert_chat_model(args.chat_model_type, args.chat_precision, Path(args.model_dir), args.hf_token)
 
-    app.run(chat_model_dir, embedding_model_dir, Path(args.rag_pdf), args.device, args.public)
+    app.run(chat_model_dir, embedding_model_dir, Path(args.rag_pdf), "GPU", args.public)
 
 
 if __name__ == '__main__':

From 1e1861c8d26999027a7ae3e069f798f7dc75f16c Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Mon, 10 Mar 2025 12:51:21 -0700
Subject: [PATCH 16/25] Add model mapping for bge models

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
index 33e9b101..9163ba48 100644
--- a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
+++ b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
@@ -11,6 +11,8 @@
 MODEL_MAPPING = {
     "qwen2-7B": "Qwen/Qwen2-7B-Instruct",
     "bge-large": "BAAI/bge-large-en-v1.5",
+    "bge-small": "BAAI/bge-small-en-v1.5",    
+    "bge-m3": "BAAI/bge-m3",
 }
 
 def optimize_model_for_npu(model: OVModelForFeatureExtraction):

From a0a725884b327899a6c8b7eb6e13f8ae940eecc3 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Mon, 10 Mar 2025 13:29:55 -0700
Subject: [PATCH 17/25] Update args in main.py

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index f64953d3..a9be4005 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -22,7 +22,7 @@ def main(args):
     parser.add_argument("--chat_precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Chat model precision")
     parser.add_argument("--hf_token", type=str, help="HuggingFace access token to get Llama3")
     parser.add_argument("--model_dir", type=str, default="model", help="Directory to place the model in")    
-    parser.add_argument("--example_pdf", type=str, default="data/large.pdf",
+    parser.add_argument("--rag_pdf", type=str, default="data/large.pdf",
                         help="Path to the PDF file which is an additional context")
     parser.add_argument("--public", default=False, action="store_true", help="Whether interface should be available publicly")
 

From cbf9da45bccc5cb199e68c01c9a867f6d7488523 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Mon, 10 Mar 2025 14:46:01 -0700
Subject: [PATCH 18/25] fixing main.py

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index a9be4005..16b20e6e 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -9,7 +9,7 @@ def main(args):
     embedding_model_dir = chat.convert_embedding_model(args.embedding_model_type, Path(args.model_dir))    
     chat_model_dir = chat.convert_chat_model(args.chat_model_type, args.chat_precision, Path(args.model_dir), args.hf_token)
 
-    app.run(chat_model_dir, embedding_model_dir, Path(args.rag_pdf), "GPU", args.public)
+    app.run(str(chat_model_dir.parent), str(embedding_model_dir.parent), Path(args.rag_pdf), "GPU", args.public)
 
 
 if __name__ == '__main__':
@@ -22,7 +22,7 @@ def main(args):
     parser.add_argument("--chat_precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Chat model precision")
     parser.add_argument("--hf_token", type=str, help="HuggingFace access token to get Llama3")
     parser.add_argument("--model_dir", type=str, default="model", help="Directory to place the model in")    
-    parser.add_argument("--rag_pdf", type=str, default="data/large.pdf",
+    parser.add_argument("--rag_pdf", type=str, default="data/test_painting_llm_rag.pdf",
                         help="Path to the PDF file which is an additional context")
     parser.add_argument("--public", default=False, action="store_true", help="Whether interface should be available publicly")
 

From 92667acfac57af0f21844d318b96ed0147a8b4d2 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Mon, 10 Mar 2025 14:56:23 -0700
Subject: [PATCH 19/25] Set AUTO:GPU,CPU to main.py

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index 16b20e6e..3ba05bf9 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -9,7 +9,7 @@ def main(args):
     embedding_model_dir = chat.convert_embedding_model(args.embedding_model_type, Path(args.model_dir))    
     chat_model_dir = chat.convert_chat_model(args.chat_model_type, args.chat_precision, Path(args.model_dir), args.hf_token)
 
-    app.run(str(chat_model_dir.parent), str(embedding_model_dir.parent), Path(args.rag_pdf), "GPU", args.public)
+    app.run(str(chat_model_dir.parent), str(embedding_model_dir.parent), Path(args.rag_pdf), "AUTO:GPU,CPU", args.public)
 
 
 if __name__ == '__main__':

From 4bb4705b409d3929d398b5f9b915a0014ff7e858 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Wed, 12 Mar 2025 13:25:52 -0700
Subject: [PATCH 20/25] Update README

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/README.md              | 15 ++++++++++++++-
 ai_ref_kits/agentic_llm_rag/app.py |  1 +
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/ai_ref_kits/README.md b/ai_ref_kits/README.md
index bf45ea2a..dc27567e 100644
--- a/ai_ref_kits/README.md
+++ b/ai_ref_kits/README.md
@@ -15,6 +15,7 @@
 	- [🔦 Explainable AI](#-explainable-ai)
 	- [🖼️ Multimodal AI Visual Generator](#%EF%B8%8F-multimodal-ai-visual-generator)
 	- [💬 Conversational AI Chatbot](#-conversational-ai-chatbot)
+	- [🛒 AI Insight Agent with RAG](#-agentic_llm_rag)
 
 - [Troubleshooting and Resources](#troubleshooting-and-resources)
 
@@ -115,7 +116,19 @@ An in-depth demo of how the Multimodal AI Visual Generator Kit creates a real-ti
 | Example industries                                     | Tourism                                                                                                                                                 |
 | Demo                                                   |                                                                                                                                                         |
 
-The Conversational AI Chatbot is an open-source, voice-driven chat agent that answers spoken questions with meaningful, spoken responses.  It can be configured to respond in any type of scenario or context. This kit demonstrates the AI Chatbot’s capabilities by simulating the experience of talking to a hotel concierge.
+The Conversational AI Chatbot is an open-source, voice-driven chat agent that answers spoken questions with meaningful, spoken responses.  It can be configured to respond in any type of scenario or context. 
+This kit demonstrates the AI Chatbot’s capabilities by simulating the experience of talking to a hotel concierge.
+
+### 🛒 AI Insight Agent with RAG
+[![agentic_llm_rag](https://github.com/user-attachments/assets/ad51288e-142c-4cb1-9ef6-21839bd02d5e)](agentic_llm_rag)
+
+| [AI Insight Agent with RAG](agentic_llm_rag) |                                                                                                                                                         |
+|--------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Related AI concepts                                    | Natural Language Understanding, Large Language Models (LLMs), Retrieval Augmented Generation (RAG), Agentic AI, Generative AI |
+| Example industries                                     | Retail                                                                                                                                                 |
+| Demo                                                   |                                                                                                                                                         |
+
+The AI Insight Agent with RAG uses Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) to interpret user prompts, engage in meaningful dialogue, perform calculations, use RAG techniques to improve its knowledge and interact with the user to add items to a virtual shopping cart.
 
 ## Troubleshooting and Resources
 - Open a [discussion topic](https://github.com/openvinotoolkit/openvino_build_deploy/discussions)
diff --git a/ai_ref_kits/agentic_llm_rag/app.py b/ai_ref_kits/agentic_llm_rag/app.py
index 1d2af5af..4c043a31 100644
--- a/ai_ref_kits/agentic_llm_rag/app.py
+++ b/ai_ref_kits/agentic_llm_rag/app.py
@@ -479,6 +479,7 @@ def run()-> None:
 
             gr.Markdown("------------------------------")            
 
+        log.info("Demo is ready!")
         demo.launch(share=public_interface)
 
     run()

From 11b3f8969adb4554c44d7723dac85c94e3a9f416 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Wed, 12 Mar 2025 13:41:28 -0700
Subject: [PATCH 21/25] Update gif in README and add queue() to Gradio demo

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/README.md              | 2 +-
 ai_ref_kits/agentic_llm_rag/app.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai_ref_kits/README.md b/ai_ref_kits/README.md
index dc27567e..14434b3e 100644
--- a/ai_ref_kits/README.md
+++ b/ai_ref_kits/README.md
@@ -120,7 +120,7 @@ The Conversational AI Chatbot is an open-source, voice-driven chat agent that an
 This kit demonstrates the AI Chatbot’s capabilities by simulating the experience of talking to a hotel concierge.
 
 ### 🛒 AI Insight Agent with RAG
-[![agentic_llm_rag](https://github.com/user-attachments/assets/ad51288e-142c-4cb1-9ef6-21839bd02d5e)](agentic_llm_rag)
+[![agentic_llm_rag](https://github.com/user-attachments/assets/0471ab91-ded5-4a5f-8d8e-5432f1b4b45c)](agentic_llm_rag)
 
 | [AI Insight Agent with RAG](agentic_llm_rag) |                                                                                                                                                         |
 |--------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
diff --git a/ai_ref_kits/agentic_llm_rag/app.py b/ai_ref_kits/agentic_llm_rag/app.py
index 4c043a31..06bb0ddd 100644
--- a/ai_ref_kits/agentic_llm_rag/app.py
+++ b/ai_ref_kits/agentic_llm_rag/app.py
@@ -480,7 +480,7 @@ def run()-> None:
             gr.Markdown("------------------------------")            
 
         log.info("Demo is ready!")
-        demo.launch(share=public_interface)
+        demo.queue().launch(share=public_interface)
 
     run()
 

From 746d9d71feb3c4d43cae0ba15ad6768b7f3e86b3 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Wed, 12 Mar 2025 14:03:11 -0700
Subject: [PATCH 22/25] Update README

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai_ref_kits/README.md b/ai_ref_kits/README.md
index 14434b3e..87f86ffb 100644
--- a/ai_ref_kits/README.md
+++ b/ai_ref_kits/README.md
@@ -15,7 +15,7 @@
 	- [🔦 Explainable AI](#-explainable-ai)
 	- [🖼️ Multimodal AI Visual Generator](#%EF%B8%8F-multimodal-ai-visual-generator)
 	- [💬 Conversational AI Chatbot](#-conversational-ai-chatbot)
-	- [🛒 AI Insight Agent with RAG](#-agentic_llm_rag)
+	- [🛒 AI Insight Agent with RAG](#-agentic-llm-rag)
 
 - [Troubleshooting and Resources](#troubleshooting-and-resources)
 
@@ -120,7 +120,7 @@ The Conversational AI Chatbot is an open-source, voice-driven chat agent that an
 This kit demonstrates the AI Chatbot’s capabilities by simulating the experience of talking to a hotel concierge.
 
 ### 🛒 AI Insight Agent with RAG
-[![agentic_llm_rag](https://github.com/user-attachments/assets/0471ab91-ded5-4a5f-8d8e-5432f1b4b45c)](agentic_llm_rag)
+[![agentic_llm_rag](https://github.com/user-attachments/assets/0471ab91-ded5-4a5f-8d8e-5432f1b4b45c)](agentic-llm-rag)
 
 | [AI Insight Agent with RAG](agentic_llm_rag) |                                                                                                                                                         |
 |--------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|

From ce0a65f1f780cabf00af250682905a052dbb2b61 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Wed, 12 Mar 2025 14:05:37 -0700
Subject: [PATCH 23/25] Update README

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_ref_kits/README.md b/ai_ref_kits/README.md
index 87f86ffb..7970d84e 100644
--- a/ai_ref_kits/README.md
+++ b/ai_ref_kits/README.md
@@ -15,7 +15,7 @@
 	- [🔦 Explainable AI](#-explainable-ai)
 	- [🖼️ Multimodal AI Visual Generator](#%EF%B8%8F-multimodal-ai-visual-generator)
 	- [💬 Conversational AI Chatbot](#-conversational-ai-chatbot)
-	- [🛒 AI Insight Agent with RAG](#-agentic-llm-rag)
+	- [🛒 AI Insight Agent with RAG](#-AI-Insight-Agent-with-RAG)
 
 - [Troubleshooting and Resources](#troubleshooting-and-resources)
 

From 72d3136acdd05d69a198cbc086ab46f70729f627 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 13 Mar 2025 15:10:14 -0700
Subject: [PATCH 24/25] test llama 3B for CICDs

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py | 3 ++-
 ai_ref_kits/agentic_llm_rag/main.py                     | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
index 9163ba48..2e46e431 100644
--- a/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
+++ b/ai_ref_kits/agentic_llm_rag/convert_and_optimize_llm.py
@@ -9,6 +9,7 @@
 from transformers import AutoTokenizer
 
 MODEL_MAPPING = {
+    "llama3.2-3B": "meta-llama/Llama-3.2-3B-Instruct",
     "qwen2-7B": "Qwen/Qwen2-7B-Instruct",
     "bge-large": "BAAI/bge-large-en-v1.5",
     "bge-small": "BAAI/bge-small-en-v1.5",    
@@ -127,7 +128,7 @@ def convert_embedding_model(model_type: str, model_dir: Path) -> Path:
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B"],
+    parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B", "llama3.2-3B"],
                         default="qwen2-7B", help="Chat model to be converted")
     parser.add_argument("--embedding_model_type", type=str, choices=["bge-small", "bge-large", "bge-m3"],
                         default="bge-large", help="Embedding model to be converted")
diff --git a/ai_ref_kits/agentic_llm_rag/main.py b/ai_ref_kits/agentic_llm_rag/main.py
index 3ba05bf9..6f73f75f 100644
--- a/ai_ref_kits/agentic_llm_rag/main.py
+++ b/ai_ref_kits/agentic_llm_rag/main.py
@@ -15,8 +15,8 @@ def main(args):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()    
     
-    parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B"],
-                        default="qwen2-7B", help="Chat model to be converted")
+    parser.add_argument("--chat_model_type", type=str, choices=["qwen2-7B", "llama3.2-3B"],
+                        default="llama3.2-3B", help="Chat model to be converted")
     parser.add_argument("--embedding_model_type", type=str, choices=["bge-small", "bge-large", "bge-m3"],
                         default="bge-small", help="Embedding model to be converted")
     parser.add_argument("--chat_precision", type=str, default="int4", choices=["fp16", "int8", "int4"], help="Chat model precision")

From 99c782a3e88b26be6bbd71994164bd630416ada1 Mon Sep 17 00:00:00 2001
From: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
Date: Thu, 13 Mar 2025 22:44:18 -0700
Subject: [PATCH 25/25] update image

Signed-off-by: Antonio Martinez <jose.antonio.martinez.torres@intel.com>
---
 ai_ref_kits/agentic_llm_rag/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai_ref_kits/agentic_llm_rag/README.md b/ai_ref_kits/agentic_llm_rag/README.md
index 25ef8d1c..064ea7e2 100644
--- a/ai_ref_kits/agentic_llm_rag/README.md
+++ b/ai_ref_kits/agentic_llm_rag/README.md
@@ -22,7 +22,7 @@ This kit uses the following technology stack:
 
 Check out our [AI Reference Kits repository](/) for other kits.
 
-![ai-insight-agent-with-rag](https://github.com/user-attachments/assets/da97bea7-29e8-497f-b7ba-4e00c79773f1)
+![agentic_llm_rag](https://github.com/user-attachments/assets/0471ab91-ded5-4a5f-8d8e-5432f1b4b45c)
 
 <details open><summary><b>Table of Contents</b></summary>