Skip to content

Commit 77c6fdb

Browse files
committed
Added the MIMIC-IV-BHC benchmark to MedHelm scenarios.
1 parent d14768d commit 77c6fdb

File tree

4 files changed

+158
-0
lines changed

4 files changed

+158
-0
lines changed

src/helm/benchmark/presentation/run_entries_medhelm.conf

+8
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,14 @@ entries: [
121121
{description: "mimic_rrs:model=qwen/qwen2.5-7b-instruct,model_deployment=huggingface/qwen2.5-7b-instruct-4bit", priority: 1},
122122
{description: "mimic_rrs:model=microsoft/phi-3.5-mini-instruct,model_deployment=huggingface/phi-3.5-mini-instruct-4bit", priority: 1},
123123

124+
### Summarizing Discharge Notes ###
125+
{description: "mimic_bhc:model=google/gemini-1.5-pro-001,model_deployment=stanfordhealthcare/gemini-1.5-pro-001", priority: 1},
126+
{description: "mimic_bhc:model=openai/gpt-4o-2024-05-13,model_deployment=stanfordhealthcare/gpt-4o-2024-05-13", priority: 1},
127+
{description: "mimic_bhc:model=openai/gpt-4o-mini-2024-07-18,model_deployment=stanfordhealthcare/gpt-4o-mini-2024-07-18", priority: 1},
128+
{description: "mimic_bhc:model=meta/llama-3.3-70b-instruct,model_deployment=stanfordhealthcare/llama-3.3-70b-instruct", priority: 1},
129+
{description: "mimic_bhc:model=microsoft/phi-3.5-mini-instruct,model_deployment=huggingface/phi-3.5-mini-instruct-4bit", priority: 1},
130+
{description: "mimic_bhc:model=qwen/qwen2.5-7b-instruct,model_deployment=huggingface/qwen2.5-7b-instruct-4bit", priority: 1},
131+
124132
### Documenting Care Plans ###
125133
{description: "chw_care_plan:model=google/gemini-1.5-pro-001,model_deployment=stanfordhealthcare/gemini-1.5-pro-001", priority: 1},
126134
{description: "chw_care_plan:model=openai/gpt-4o-2024-05-13,model_deployment=stanfordhealthcare/gpt-4o-2024-05-13", priority: 1},

src/helm/benchmark/run_specs/medhelm_run_specs.py

+31
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,37 @@ def get_mimic_rrs_spec() -> RunSpec:
518518
)
519519

520520

521+
@run_spec_function("mimic_bhc")
522+
def get_mimic_bhc_spec() -> RunSpec:
523+
scenario_spec = ScenarioSpec(class_name="helm.benchmark.scenarios.mimic_bhc_scenario.MIMICBHCScenario", args={})
524+
525+
adapter_spec = get_generation_adapter_spec(
526+
instructions=(
527+
"Summarize the clinical note into a brief hospital course."
528+
),
529+
input_noun="Clinical Note",
530+
output_noun="Brief Hospital Course",
531+
newline_after_input_noun=True,
532+
newline_after_output_noun=True,
533+
max_tokens=1024,
534+
max_train_instances=0,
535+
stop_sequences=[],
536+
)
537+
metric_args = {
538+
"task": "mimic_bhc",
539+
"device": get_torch_device_name(),
540+
"bertscore_model": "distilbert-base-uncased",
541+
"rescale_with_baseline": False,
542+
}
543+
return RunSpec(
544+
name="mimic_bhc",
545+
scenario_spec=scenario_spec,
546+
adapter_spec=adapter_spec,
547+
metric_specs=get_summarization_metric_specs(metric_args),
548+
groups=["mimic_bhc"],
549+
)
550+
551+
521552
@run_spec_function("chw_care_plan")
522553
def get_chw_care_plan_run_spec() -> RunSpec:
523554
"""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import os
2+
import json
3+
import gdown
4+
from typing import Dict, List
5+
6+
from helm.benchmark.scenarios.scenario import (
7+
Input,
8+
Scenario,
9+
Instance,
10+
TEST_SPLIT,
11+
CORRECT_TAG,
12+
Reference,
13+
Output,
14+
)
15+
16+
17+
class MIMICBHCScenario(Scenario):
18+
"""
19+
MIMIC-IV-BHC presents a curated collection of preprocessed clinical discharge notes with labeled brief hospital course (BHC) summaries.
20+
This dataset is derived from MIMIC-IV (https://doi.org/10.1093/jamia/ocae312).
21+
22+
In total, the dataset contains 270,033 clinical notes.
23+
The splits are provided by the dataset itself.
24+
25+
Sample Synthetic Prompt:
26+
Summarize the clinical note into a brief hospital course.
27+
28+
Clinical Note:
29+
<SEX> M <SERVICE> SURGERY <ALLERGIES> No Known Allergies \/ Adverse Drug Reactions
30+
...
31+
continue to follow-up with your health care providers as an outpatient.
32+
33+
Brief Hospital Course:
34+
Mr. ___ was pre-admitted on ___ for liver transplantation
35+
...
36+
discharged home to continue home medications and follow-up as an outpatient.
37+
38+
@article{aali2024dataset,
39+
title={A dataset and benchmark for hospital course summarization with adapted large language models},
40+
author={Aali, Asad and Van Veen, Dave and Arefeen, YI and Hom, Jason and Bluethgen, Christian
41+
and Reis, Eduardo Pontes and Gatidis, Sergios and Clifford, Namuun and Daws, Joseph
42+
and Tehrani, Arash and Kim, Jangwon and Chaudhari, Akshay},
43+
journal={Journal of the American Medical Informatics Association},
44+
volume={32},
45+
number={3},
46+
pages={470--479},
47+
year={2024},
48+
publisher={Oxford University Press}
49+
}
50+
51+
@article{aali2024mimic,
52+
title={MIMIC-IV-Ext-BHC: Labeled Clinical Notes Dataset for Hospital Course Summarization},
53+
author={Aali, Asad and Van Veen, Dave and Arefeen, YI and Hom, Jason and Bluethgen, Christian
54+
and Reis, Eduardo Pontes and Gatidis, Sergios and Clifford, Namuun and Daws, Joseph
55+
and Tehrani, Arash and Kim, Jangwon and Chaudhari, Akshay},
56+
journal={PhysioNet},
57+
year={2024}
58+
}
59+
"""
60+
61+
name = "mimic_bhc"
62+
description = (
63+
"A curated collection of preprocessed clinical discharge notes from MIMIC-IV paired with"
64+
" their corresponding brief hospital course (BHC) summaries."
65+
)
66+
tags = ["question_answering", "biomedical"]
67+
68+
def get_instances(self, output_path: str) -> List[Instance]:
69+
url = "https://drive.google.com/uc?id=1SegKUbUZ72D5GMhgmIFHomF0DHV5Wkrn"
70+
file = "mimic_iv_bhc.jsonl"
71+
gdown.download(url, file, quiet=False)
72+
73+
instances: List[Instance] = []
74+
# Limit to zero shot setting for now
75+
splits: Dict[str, str] = {
76+
# "train": TRAIN_SPLIT,
77+
# "validate": VALID_SPLIT,
78+
"test": TEST_SPLIT,
79+
}
80+
81+
with open(file, "r") as f:
82+
data = [json.loads(line) for line in f]
83+
os.remove(file)
84+
85+
for data_split, split in splits.items():
86+
clinical_notes: List[str] = [x["input"] for x in data]
87+
bhc_summaries: List[str] = [x["target"] for x in data]
88+
assert len(clinical_notes) == len(bhc_summaries), "Notes and summaries must have the same length"
89+
for clinical_note, bhc_summary in zip(clinical_notes, bhc_summaries):
90+
if not clinical_note or not bhc_summary:
91+
continue
92+
instances.append(
93+
Instance(
94+
input=Input(text=clinical_note),
95+
references=[Reference(Output(text=bhc_summary), tags=[CORRECT_TAG])],
96+
split=split,
97+
)
98+
)
99+
100+
return instances

src/helm/benchmark/static/schema_medhelm.yaml

+19
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ run_groups:
432432
- aci_bench
433433
- mtsamples_procedures
434434
- mimic_rrs
435+
- mimic_bhc
435436
- chw_care_plan
436437

437438
- name: patient_communication
@@ -676,6 +677,24 @@ run_groups:
676677
who: Radiologist
677678
when: Post-imaging
678679
language: English
680+
681+
- name: mimic_bhc
682+
display_name: MIMIC-IV-BHC
683+
short_display_name: MIMIC-BHC
684+
description: A curated collection of preprocessed clinical discharge notes from MIMIC-IV paired with their corresponding brief hospital course (BHC) summaries [(Aali et al., 2024)](https://doi.org/10.1093/jamia/ocae312).
685+
metric_groups:
686+
- accuracy
687+
- efficiency
688+
- general_information
689+
environment:
690+
main_name: BERTScore-F
691+
main_split: test
692+
taxonomy:
693+
task: Text generation
694+
what: Summarize the clinical note into a brief hospital course
695+
who: Clinician
696+
when: Upon hospital discharge
697+
language: English
679698

680699
- name: mimiciv_billing_code
681700
display_name: MIMIC-IV Billing Code

0 commit comments

Comments
 (0)