Skip to content

Commit f01ab94

Browse files
committed
Add workflow
Signed-off-by: yzheng124 <yi.zheng@intel.com>
1 parent 4304c7b commit f01ab94

File tree

4 files changed

+42
-5
lines changed

4 files changed

+42
-5
lines changed

.github/reusable-steps/categorize-projects/action.yml

+7
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ outputs:
1616
value: ${{ steps.group-subprojects.outputs.qt }}
1717
js:
1818
value: ${{ steps.group-subprojects.outputs.js }}
19+
unittest:
20+
value: ${{ steps.group-subprojects.outputs.unittest }}
1921

2022
runs:
2123
using: 'composite'
@@ -42,6 +44,8 @@ runs:
4244
qt+=("$dir")
4345
elif [ -f "$dir/main.py" ] && grep -q -- "--stream" "$dir/main.py"; then
4446
webcam+=("$dir")
47+
elif [ -d "$dir/test" ]; then
48+
unittest+=("$dir/test")
4549
else
4650
python+=("$dir")
4751
fi
@@ -53,13 +57,15 @@ runs:
5357
webcam_json=$(printf '%s\n' "${webcam[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
5458
qt_json=$(printf '%s\n' "${qt[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
5559
js_json=$(printf '%s\n' "${js[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
60+
unittest_json=$(printf '%s\n' "${unittest_json[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
5661
5762
echo "notebook=$notebook_json" >> $GITHUB_OUTPUT
5863
echo "python=$python_json" >> $GITHUB_OUTPUT
5964
echo "gradio=$gradio_json" >> $GITHUB_OUTPUT
6065
echo "webcam=$webcam_json" >> $GITHUB_OUTPUT
6166
echo "qt=$qt_json" >> $GITHUB_OUTPUT
6267
echo "js=$js_json" >> $GITHUB_OUTPUT
68+
echo "unittest_json=$unittest_json" >> $GITHUB_OUTPUT
6369
- name: Print subprojects to test
6470
shell: bash
6571
run: |
@@ -69,3 +75,4 @@ runs:
6975
echo "Webcam subprojects: ${{ steps.group-subprojects.outputs.webcam }}"
7076
echo "Qt subprojects: ${{ steps.group-subprojects.outputs.qt }}"
7177
echo "JS subprojects: ${{ steps.group-subprojects.outputs.js }}"
78+
echo "Unit test subprojects: ${{ steps.group-subprojects.outputs.unittest }}"

.github/workflows/sanity-check-demos.yml

+31
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,34 @@ jobs:
119119
command: npm start
120120
project: ${{ matrix.subproject }}
121121
timeout: 1m
122+
123+
unittest:
124+
needs: find-subprojects
125+
if: ${{ needs.find-subprojects.outputs.unittest != '[]' }}
126+
runs-on: ${{ matrix.os }}
127+
strategy:
128+
fail-fast: false
129+
matrix:
130+
os: [ubuntu-latest, windows-latest, macos-latest]
131+
python: ["3.10", "3.12"]
132+
subproject: ${{ fromJson(needs.find-subprojects.outputs.unittest) }}
133+
steps:
134+
- uses: actions/checkout@v4
135+
- uses: ./.github/reusable-steps/setup-os
136+
- name: Set up Python ${{ matrix.python }}
137+
uses: actions/setup-python@v5
138+
with:
139+
python-version: ${{ matrix.python }}
140+
- uses: ./.github/reusable-steps/setup-python
141+
with:
142+
python: ${{ matrix.python }}
143+
project: ${{ matrix.subproject }}
144+
- name: Login to HF
145+
shell: bash
146+
run: |
147+
huggingface-cli login --token ${{ secrets.HF_TOKEN }}
148+
- uses: ./.github/reusable-steps/timeouted-action
149+
with:
150+
script: python test.py
151+
project: ${{ matrix.subproject }}
152+
timeout: 5h

demos/virtual_ai_assistant_demo/test/test_vaa_hallucination.py demos/virtual_ai_assistant_demo/test/test.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ def run_test_deepeval(chat_model_name: str, personality_file_path: Path, auth_to
134134
outputs.append(output)
135135

136136
final_score = compute_deepeval_hallucination(inputs[:selection_num], outputs[:selection_num], contexts_res[:selection_num])
137-
print(f"final_score is {final_score}")
138137
return final_score
139138

140139

@@ -203,7 +202,6 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
203202
for response_list_per_prompt in tqdm(response_list, desc="predict hallucination ratio"):
204203
score_list.append(check_eng.predict(response_list_per_prompt))
205204
final_score = float(np.mean(score_list))
206-
print(f"final_score is {final_score}")
207205
return final_score
208206

209207

@@ -213,13 +211,14 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
213211

214212
parser = argparse.ArgumentParser()
215213
parser.add_argument("--chat_model", type=str, default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", help="Path/name of the chat model")
216-
parser.add_argument("--personality", type=str, default="healthcare_personality.yaml", help="Path to the YAML file with chatbot personality")
214+
parser.add_argument("--personality", type=str, default="../healthcare_personality.yaml", help="Path to the YAML file with chatbot personality")
217215
parser.add_argument("--hf_token", type=str, help="HuggingFace access token to get Llama3")
218216
parser.add_argument("--check_type", type=str, choices=["deepeval", "selfcheckgpt"], default="deepeval", help="Hallucination check type")
219217
parser.add_argument("--selection_num", type=int, default=5, help="Maximum number of prompt are selected to compute hallucination score")
220218

221219
args = parser.parse_args()
222220
if args.check_type == "deepeval":
223-
run_test_deepeval(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
221+
hallucination_score = run_test_deepeval(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
224222
else:
225-
run_test_selfcheckgpt(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
223+
hallucination_score = run_test_selfcheckgpt(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
224+
print(f"hallucination_score for personality {args.personality}: {hallucination_score}")

0 commit comments

Comments
 (0)