@@ -134,7 +134,6 @@ def run_test_deepeval(chat_model_name: str, personality_file_path: Path, auth_to
134
134
outputs .append (output )
135
135
136
136
final_score = compute_deepeval_hallucination (inputs [:selection_num ], outputs [:selection_num ], contexts_res [:selection_num ])
137
- print (f"final_score is { final_score } " )
138
137
return final_score
139
138
140
139
@@ -203,7 +202,6 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
203
202
for response_list_per_prompt in tqdm (response_list , desc = "predict hallucination ratio" ):
204
203
score_list .append (check_eng .predict (response_list_per_prompt ))
205
204
final_score = float (np .mean (score_list ))
206
- print (f"final_score is { final_score } " )
207
205
return final_score
208
206
209
207
@@ -213,13 +211,14 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
213
211
214
212
parser = argparse .ArgumentParser ()
215
213
parser .add_argument ("--chat_model" , type = str , default = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" , help = "Path/name of the chat model" )
216
- parser .add_argument ("--personality" , type = str , default = "healthcare_personality.yaml" , help = "Path to the YAML file with chatbot personality" )
214
+ parser .add_argument ("--personality" , type = str , default = "../ healthcare_personality.yaml" , help = "Path to the YAML file with chatbot personality" )
217
215
parser .add_argument ("--hf_token" , type = str , help = "HuggingFace access token to get Llama3" )
218
216
parser .add_argument ("--check_type" , type = str , choices = ["deepeval" , "selfcheckgpt" ], default = "deepeval" , help = "Hallucination check type" )
219
217
parser .add_argument ("--selection_num" , type = int , default = 5 , help = "Maximum number of prompt are selected to compute hallucination score" )
220
218
221
219
args = parser .parse_args ()
222
220
if args .check_type == "deepeval" :
223
- run_test_deepeval (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
221
+ hallucination_score = run_test_deepeval (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
224
222
else :
225
- run_test_selfcheckgpt (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
223
+ hallucination_score = run_test_selfcheckgpt (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
224
+ print (f"hallucination_score for personality { args .personality } : { hallucination_score } " )
0 commit comments