From 836f20de653e63750d549d3dbcbd4c28700eebb9 Mon Sep 17 00:00:00 2001
From: Eric Pinzur <epinzur@gmail.com>
Date: Mon, 3 Jun 2024 19:00:06 +0200
Subject: [PATCH] added option to make deffered evaluation quieter

---
 trulens_eval/trulens_eval/feedback/provider/base.py | 6 ++----
 trulens_eval/trulens_eval/feedback/provider/hugs.py | 2 +-
 trulens_eval/trulens_eval/tru.py                    | 9 ++++++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py
index 2ca01fe73..0fa63a25a 100644
--- a/trulens_eval/trulens_eval/feedback/provider/base.py
+++ b/trulens_eval/trulens_eval/feedback/provider/base.py
@@ -5,7 +5,6 @@
 import nltk
 from nltk.tokenize import sent_tokenize
 import numpy as np
-from tqdm.auto import tqdm
 
 from trulens_eval.feedback import prompts
 from trulens_eval.feedback.provider.endpoint import base as mod_endpoint
@@ -1198,14 +1197,13 @@ def groundedness_measure_with_cot_reasons(
         Returns:
             Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
         """
-        nltk.download('punkt')
+        nltk.download('punkt', quiet=True)
         groundedness_scores = {}
         reasons_str = ""
 
         hypotheses = sent_tokenize(statement)
         system_prompt = prompts.LLM_GROUNDEDNESS_SYSTEM
-        for i, hypothesis in enumerate(tqdm(
-                hypotheses, desc="Groundedness per statement in source")):
+        for i, hypothesis in enumerate(hypotheses):
             user_prompt = prompts.LLM_GROUNDEDNESS_USER.format(
                 premise=f"{source}", hypothesis=f"{hypothesis}"
             )
diff --git a/trulens_eval/trulens_eval/feedback/provider/hugs.py b/trulens_eval/trulens_eval/feedback/provider/hugs.py
index 6cd4835c7..a3c7594d2 100644
--- a/trulens_eval/trulens_eval/feedback/provider/hugs.py
+++ b/trulens_eval/trulens_eval/feedback/provider/hugs.py
@@ -218,7 +218,7 @@ def groundedness_measure_with_nli(self, source: str,
         Returns:
             Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
         """
-        nltk.download('punkt')
+        nltk.download('punkt', quiet=True)
         groundedness_scores = {}
 
         reasons_str = ""
diff --git a/trulens_eval/trulens_eval/tru.py b/trulens_eval/trulens_eval/tru.py
index 85edac997..de6da1e07 100644
--- a/trulens_eval/trulens_eval/tru.py
+++ b/trulens_eval/trulens_eval/tru.py
@@ -749,6 +749,8 @@ def start_evaluator(self,
             fork: If set, will start the evaluator in a new process instead of a
                 thread. NOT CURRENTLY SUPPORTED.
 
+            disable_tqdm: If set, will disable progress bar logging from the evaluator.
+
         Returns:
             The started process or thread that is executing the deferred feedback
                 evaluator.
@@ -816,14 +818,15 @@ def runloop():
                 total=queue_total,
                 postfix={
                     status.name: count for status, count in queue_stats.items()
-                }
+                },
+                disable=disable_tqdm
             )
 
             # Show the status of the results so far.
-            tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs")
+            tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs", disable=disable_tqdm)
 
             # Show what is being waited for right now.
-            tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs")
+            tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs", disable=disable_tqdm)
 
             runs_stats = defaultdict(int)