From 836f20de653e63750d549d3dbcbd4c28700eebb9 Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Mon, 3 Jun 2024 19:00:06 +0200 Subject: [PATCH] added option to make deffered evaluation quieter --- trulens_eval/trulens_eval/feedback/provider/base.py | 6 ++---- trulens_eval/trulens_eval/feedback/provider/hugs.py | 2 +- trulens_eval/trulens_eval/tru.py | 9 ++++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py index 2ca01fe73..0fa63a25a 100644 --- a/trulens_eval/trulens_eval/feedback/provider/base.py +++ b/trulens_eval/trulens_eval/feedback/provider/base.py @@ -5,7 +5,6 @@ import nltk from nltk.tokenize import sent_tokenize import numpy as np -from tqdm.auto import tqdm from trulens_eval.feedback import prompts from trulens_eval.feedback.provider.endpoint import base as mod_endpoint @@ -1198,14 +1197,13 @@ def groundedness_measure_with_cot_reasons( Returns: Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation. """ - nltk.download('punkt') + nltk.download('punkt', quiet=True) groundedness_scores = {} reasons_str = "" hypotheses = sent_tokenize(statement) system_prompt = prompts.LLM_GROUNDEDNESS_SYSTEM - for i, hypothesis in enumerate(tqdm( - hypotheses, desc="Groundedness per statement in source")): + for i, hypothesis in enumerate(hypotheses): user_prompt = prompts.LLM_GROUNDEDNESS_USER.format( premise=f"{source}", hypothesis=f"{hypothesis}" ) diff --git a/trulens_eval/trulens_eval/feedback/provider/hugs.py b/trulens_eval/trulens_eval/feedback/provider/hugs.py index 6cd4835c7..a3c7594d2 100644 --- a/trulens_eval/trulens_eval/feedback/provider/hugs.py +++ b/trulens_eval/trulens_eval/feedback/provider/hugs.py @@ -218,7 +218,7 @@ def groundedness_measure_with_nli(self, source: str, Returns: Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation. """ - nltk.download('punkt') + nltk.download('punkt', quiet=True) groundedness_scores = {} reasons_str = "" diff --git a/trulens_eval/trulens_eval/tru.py b/trulens_eval/trulens_eval/tru.py index 85edac997..de6da1e07 100644 --- a/trulens_eval/trulens_eval/tru.py +++ b/trulens_eval/trulens_eval/tru.py @@ -749,6 +749,8 @@ def start_evaluator(self, fork: If set, will start the evaluator in a new process instead of a thread. NOT CURRENTLY SUPPORTED. + disable_tqdm: If set, will disable progress bar logging from the evaluator. + Returns: The started process or thread that is executing the deferred feedback evaluator. @@ -816,14 +818,15 @@ def runloop(): total=queue_total, postfix={ status.name: count for status, count in queue_stats.items() - } + }, + disable=disable_tqdm ) # Show the status of the results so far. - tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs") + tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs", disable=disable_tqdm) # Show what is being waited for right now. - tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs") + tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs", disable=disable_tqdm) runs_stats = defaultdict(int)