-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathtests.py
63 lines (53 loc) · 2.34 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import torch
def command_based_fdm_test(model, test_data, command, device):
model.eval()
interruptions = 0
total = len(test_data)
with torch.no_grad():
for sample, _ in test_data:
sample = sample.unsqueeze(0).to(device)
command = command.unsqueeze(0).to(device)
output = model(sample, command, is_training=False)
if output == model.irq_token:
interruptions += 1
print(f"Interruption rate: {interruptions/total:.2f}")
def voice_based_fdm_test(model, test_data, device):
model.eval()
interruptions = 0
total = len(test_data)
with torch.no_grad():
for sample, voice_command in test_data:
sample = sample.unsqueeze(0).to(device)
voice_command = voice_command.unsqueeze(0).to(device)
output = model(sample, voice_command, is_training=False)
if output == model.irq_token:
interruptions += 1
print(f"Interruption rate: {interruptions/total:.2f}")
def analyze_turn_taking(model, test_data, device):
model.eval()
correct_detections = 0
false_positives = 0
false_negatives = 0
with torch.no_grad():
for sample, label in test_data:
sample = sample.unsqueeze(0).to(device)
output = model(sample, torch.zeros(1, 1, model.listening_encoder.d_model).to(device), is_training=False)
if output == model.irq_token and label == 1:
correct_detections += 1
elif output == model.irq_token and label == 0:
false_positives += 1
elif output != model.irq_token and label == 1:
false_negatives += 1
precision = correct_detections / (correct_detections + false_positives)
recall = correct_detections / (correct_detections + false_negatives)
f1_score = 2 * (precision * recall) / (precision + recall)
print(f"Turn-taking Performance:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")
def generate_speech(model, text, device):
model.eval()
with torch.no_grad():
tokenized_text = model.speaking_encoder.tokenizer(text, return_tensors="pt").input_values.to(device)
generated_audio = model.generate(tokenized_text)
return generated_audio.cpu().numpy()