diff --git a/app/api/tools_config.json b/app/api/tools_config.json index 92abc4f3..85c42dee 100644 --- a/app/api/tools_config.json +++ b/app/api/tools_config.json @@ -34,5 +34,9 @@ "15": { "path": "features.rubric_generator.core", "metadata_file": "metadata.json" + }, + "14": { + "path": "features.writing_feedback_generator.core", + "metadata_file": "metadata.json" } } diff --git a/app/features/writing_feedback_generator/__init__.py b/app/features/writing_feedback_generator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/features/writing_feedback_generator/core.py b/app/features/writing_feedback_generator/core.py new file mode 100644 index 00000000..1499c65a --- /dev/null +++ b/app/features/writing_feedback_generator/core.py @@ -0,0 +1,67 @@ +from app.features.writing_feedback_generator.tools import WritingFeedbackGeneratorPipeline +from app.services.schemas import WritingFeedbackGeneratorArgs +from app.utils.document_loaders import get_docs +from app.services.logger import setup_logger +from app.api.error_utilities import LoaderError, ToolExecutorError + +logger = setup_logger() + +def executor(grade_level: str, + assignment_description: str, + criteria: str, + writing_to_review: str, + criteria_file_url: str, + criteria_file_type: str, + wtr_file_url: str, + wtr_file_type: str, + lang: str, + verbose=False): + + try: + if (criteria_file_type): + logger.info(f"Generating docs. from {criteria_file_type}") + + if (wtr_file_type): + logger.info(f"Generating docs. from {wtr_file_type}") + + docs = None + + def fetch_docs(file_url, file_type): + return get_docs(file_url, file_type, True) if file_url and file_type else None + + criteria_docs = fetch_docs(criteria_file_url, criteria_file_type) + wtr_docs = fetch_docs(wtr_file_url, wtr_file_type) + + docs = ( + criteria_docs + wtr_docs + if criteria_docs and wtr_docs + else criteria_docs or wtr_docs + ) + + writing_feedback_generator = WritingFeedbackGeneratorArgs( + grade_level=grade_level, + assignment_description=assignment_description, + criteria=criteria, + writing_to_review=writing_to_review, + criteria_file_url=criteria_file_url, + criteria_file_type=criteria_file_type, + wtr_file_url=wtr_file_url, + wtr_file_type=wtr_file_type, + lang=lang + ) + + output = WritingFeedbackGeneratorPipeline(args=writing_feedback_generator, verbose=verbose).generate_feedback(docs) + + logger.info(f"Writing Feedback generated successfully") + + except LoaderError as e: + error_message = e + logger.error(f"Error in Writing Feedback Generator Pipeline -> {error_message}") + raise ToolExecutorError(error_message) + + except Exception as e: + error_message = f"Error in executor: {e}" + logger.error(error_message) + raise ValueError(error_message) + + return output \ No newline at end of file diff --git a/app/features/writing_feedback_generator/metadata.json b/app/features/writing_feedback_generator/metadata.json new file mode 100644 index 00000000..b8084a28 --- /dev/null +++ b/app/features/writing_feedback_generator/metadata.json @@ -0,0 +1,49 @@ +{ + "inputs": [ + { + "label": "Grade Level", + "name": "grade_level", + "type": "text" + }, + { + "label": "Assignment Description", + "name": "assignment_description", + "type": "text" + }, + { + "label": "Rubric Criteria/Feedback Focus", + "name": "criteria", + "type": "text" + }, + { + "label": "Writing to Review", + "name": "writing_to_review", + "type": "text" + }, + { + "label": "Criteria File URL", + "name": "criteria_file_url", + "type": "text" + }, + { + "label": "Criteria File Type", + "name": "criteria_file_type", + "type": "text" + }, + { + "label": "Writing to Review File URL", + "name": "wtr_file_url", + "type": "text" + }, + { + "label": "Writing to Review File Type", + "name": "wtr_file_type", + "type": "text" + }, + { + "label": "Language", + "name": "lang", + "type": "text" + } + ] +} \ No newline at end of file diff --git a/app/features/writing_feedback_generator/tests/__init__.py b/app/features/writing_feedback_generator/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/features/writing_feedback_generator/tests/test_core.py b/app/features/writing_feedback_generator/tests/test_core.py new file mode 100644 index 00000000..5e07c0b7 --- /dev/null +++ b/app/features/writing_feedback_generator/tests/test_core.py @@ -0,0 +1,186 @@ +import pytest +from app.features.writing_feedback_generator.core import executor +from app.features.writing_feedback_generator.tools import WritingFeedback + +# Base attributes reused across all tests +base_attributes = { + "grade_level": "university", + "assignment_description": "Review and provide feedback on the assigned text.", + "criteria": "", + "writing_to_review": "", + "criteria_file_url": "https://docs.google.com/document/d/1IsTPJSgWMdD20tXMm1sXJSCc0xz9Kxmn/edit?usp=sharing&ouid=107052763106493355624&rtpof=true&sd=true", + "criteria_file_type": "gdoc", + "lang": "en" +} + +# PDF Tests +def test_executor_pdf_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", + wtr_file_type="pdf" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_pdf_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/pdf/sample1.pdf", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# CSV Tests +def test_executor_csv_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/csv/sample1.csv", + wtr_file_type="csv" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_csv_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/csv/sample1.csv", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# TXT Tests +def test_executor_txt_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/txt/sample1.txt", + wtr_file_type="txt" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_txt_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/txt/sample1.txt", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# MD Tests +def test_executor_md_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md", + wtr_file_type="md" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_md_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://github.com/radicalxdev/kai-ai-backend/blob/main/README.md", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# PPTX Tests +def test_executor_pptx_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx", + wtr_file_type="pptx" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_pptx_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://scholar.harvard.edu/files/torman_personal/files/samplepptx.pptx", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# DOCX Tests +def test_executor_docx_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/docx/sample1.docx", + wtr_file_type="docx" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_docx_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/docx/sample1.docx", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# XLS Tests +def test_executor_xls_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/xls/sample1.xls", + wtr_file_type="xls" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_xls_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/xls/sample1.xls", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# XLSX Tests +def test_executor_xlsx_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx", + wtr_file_type="xlsx" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_xlsx_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://filesamples.com/samples/document/xlsx/sample1.xlsx", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# GPDF Tests +def test_executor_gpdf_wtr_url_valid(): + writing_feedback = executor( + **base_attributes, + wtr_file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view", + wtr_file_type="gpdf" + ) + assert isinstance(writing_feedback, WritingFeedback) + +def test_executor_gpdf_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://drive.google.com/file/d/1fUj1uWIMh6QZsPkt0Vs7mEd2VEqz3O8l/view", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) + +# MP3 Tests +def test_executor_mp3_wtr_url_invalid(): + with pytest.raises(ValueError) as exc_info: + executor( + **base_attributes, + wtr_file_url="https://raw.githubusercontent.com/asleem/uploaded_files/main/dummy.mp3", + wtr_file_type=1 + ) + assert isinstance(exc_info.value, ValueError) diff --git a/app/features/writing_feedback_generator/tools.py b/app/features/writing_feedback_generator/tools.py new file mode 100644 index 00000000..44838c34 --- /dev/null +++ b/app/features/writing_feedback_generator/tools.py @@ -0,0 +1,121 @@ +from pydantic import BaseModel +from typing import List, Optional +from langchain_core.documents import Document +from langchain_chroma import Chroma +from langchain_core.prompts import PromptTemplate +from langchain_core.runnables import RunnableParallel +from langchain_core.output_parsers import JsonOutputParser +from langchain_google_genai import GoogleGenerativeAI +from langchain_google_genai import GoogleGenerativeAIEmbeddings +from app.services.logger import setup_logger + +logger = setup_logger(__name__) + +class FeedbackSection(BaseModel): + title: str + points: List[str] + +class WritingFeedback(BaseModel): + title: str + areas_of_strength: FeedbackSection + areas_for_growth: FeedbackSection + general_feedback: FeedbackSection + +class WritingFeedbackGeneratorPipeline: + def __init__(self, args=None, verbose=False): + self.verbose = verbose + self.args = args + self.model = GoogleGenerativeAI(model="gemini-1.5-pro") + self.vectorstore_class = Chroma + self.parsers = { + "areas_of_strength": JsonOutputParser(pydantic_object=FeedbackSection), + "areas_for_growth": JsonOutputParser(pydantic_object=FeedbackSection), + "general_feedback": JsonOutputParser(pydantic_object=FeedbackSection), + } + self.vectorstore = None + self.retriever = None + + def compile_vectorstore(self, documents: List[Document]): + if self.verbose: + logger.info("Creating vectorstore from documents...") + self.vectorstore = self.vectorstore_class.from_documents(documents, GoogleGenerativeAIEmbeddings(model="models/embedding-001")) + self.retriever = self.vectorstore.as_retriever() + if self.verbose: + logger.info("Vectorstore and retriever created successfully.") + + def compile_pipeline(self): + prompts = { + "areas_of_strength": PromptTemplate( + template=( + "Analyze the provided writing and generate feedback under 'Areas of Strength'. " + "Focus on what the writer has done well, including structure, clarity, and topic presentation. " + "Assignment Description: {assignment_description}. Grade Level: {grade_level}. " + "Use the provided Writing to Review: {writing_to_review} and Criteria: {criteria}. If any of them is empty, use the context {context}. " + "Respond in this JSON format: \n{format_instructions}" + ), + input_variables=["writing_to_review", "assignment_description", "grade_level", "criteria", "context"], + partial_variables={"format_instructions": self.parsers["areas_of_strength"].get_format_instructions()}, + ), + "areas_for_growth": PromptTemplate( + template=( + "Analyze the provided writing and generate feedback under 'Areas for Growth'. " + "Identify areas where the writer could improve, focusing on content depth, clarity, and logical argumentation. " + "Assignment Description: {assignment_description}. Grade Level: {grade_level}. " + "Use the provided Writing to Review: {writing_to_review} and Criteria: {criteria}. If any of them is empty, use the context {context}. " + "Respond in this JSON format: \n{format_instructions}" + ), + input_variables=["writing_to_review", "assignment_description", "grade_level", "criteria", "context"], + partial_variables={"format_instructions": self.parsers["areas_for_growth"].get_format_instructions()}, + ), + "general_feedback": PromptTemplate( + template=( + "Analyze the provided writing and generate general feedback on 'Writing Mechanics'. " + "This includes grammar, sentence structure, and overall readability. " + "Assignment Description: {assignment_description}. Grade Level: {grade_level}. " + "Use the provided Writing to Review: {writing_to_review} and Criteria: {criteria}. If any of them is empty, use the context {context}. " + "Respond in this JSON format: \n{format_instructions}" + ), + input_variables=["writing_to_review", "assignment_description", "grade_level", "criteria", "context"], + partial_variables={"format_instructions": self.parsers["general_feedback"].get_format_instructions()}, + ), + } + + chains = { + key: prompt | self.model | self.parsers[key] + for key, prompt in prompts.items() + } + return RunnableParallel(branches=chains) + + def generate_context(self, query: str) -> str: + return self.retriever.invoke(query) + + def generate_feedback(self, documents: Optional[List[Document]] = None): + if documents: + self.compile_vectorstore(documents) + context = self.generate_context("Provide context for evaluating this writing assignment.") + else: + context = "" + + pipeline = self.compile_pipeline() + inputs = { + "writing_to_review": self.args.writing_to_review, + "assignment_description": self.args.assignment_description, + "grade_level": self.args.grade_level, + "criteria": self.args.criteria, + "context": context, + } + + try: + results = pipeline.invoke(inputs) + feedback = WritingFeedback( + title=f"Feedback on Your Writing: {self.args.assignment_description}", + areas_of_strength=results["branches"]["areas_of_strength"], + areas_for_growth=results["branches"]["areas_for_growth"], + general_feedback=results["branches"]["general_feedback"], + ) + if self.verbose: + logger.info("Feedback successfully generated.") + return feedback + except Exception as e: + logger.error(f"Error generating feedback: {e}") + raise ValueError("Failed to generate feedback.") diff --git a/app/services/schemas.py b/app/services/schemas.py index 637ad680..d4f94fb1 100644 --- a/app/services/schemas.py +++ b/app/services/schemas.py @@ -138,4 +138,15 @@ class LessonPlanGeneratorArgs(BaseModel): objectives_file_type: str ac_file_url: str ac_file_type: str + lang: Optional[str] = "en" + +class WritingFeedbackGeneratorArgs(BaseModel): + grade_level: str + assignment_description: str + criteria: str + writing_to_review: str + criteria_file_url: str + criteria_file_type: str + wtr_file_url: str + wtr_file_type: str lang: Optional[str] = "en" \ No newline at end of file