Skip to content

Commit f2e8ee1

Browse files
authored
Fix Qwen2-Audio-Instruct (#3298)
1 parent 2b1f0d7 commit f2e8ee1

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ audiolm =
285285
# For HuggingFace audio datasets
286286
soundfile~=0.12
287287
librosa~=0.10
288+
einops~=0.7.0
288289

289290
# For LLaMA-Omni
290291
openai-whisper==20240930

src/helm/clients/audio_language/qwen2_audiolm_client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def make_request(self, request: Request) -> RequestResult:
9898
for media_num, media_object in enumerate(request.multimodal_prompt.media_objects):
9999
if media_object.is_type("audio") and media_object.location:
100100
assert media_object.is_local_file, "Only local audio files are supported"
101-
query.append({"type": "audio", "audio_loc": media_object.location})
101+
query.append({"type": "audio", "audio_url": media_object.location})
102102

103103
prompt_text += f"<|im_start|>user\nAudio {media_num+1}: <|audio_bos|><|AUDIO|><|audio_eos|>\n"
104104
elif media_object.is_type(TEXT_TYPE):
@@ -131,7 +131,7 @@ def do_it() -> Dict[str, Any]:
131131
if element["type"] == "audio":
132132
audios.append(
133133
librosa.load(
134-
element["audio_loc"],
134+
element["audio_url"],
135135
sr=tokenizer.feature_extractor.sampling_rate,
136136
)[0]
137137
)

0 commit comments

Comments
 (0)