-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun.py
121 lines (86 loc) · 4.01 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os, csv
import numpy as np
from utils.data_utils import get_trimmed_features, load_vocab, \
get_processing_word, get_processing_speech, AnnotationDataset, \
pad_sequences
from config import Config
import tensorflow as tf
from SpeechTextModel import WImpModel
EVAL_FOLDER = "examples"
def read_csv(path, map_func):
data = []
with open(path) as f:
csv_reader = csv.reader(f)
next(csv_reader, None)
for row in csv_reader:
row = map_func(row)
data.append(row)
return data
def read_examples(path, f):
examples = {}
for split_folder in os.listdir(path):
split_folder_ = os.path.join(path, split_folder)
if os.path.isfile(split_folder_):
continue
for sent_folder in os.listdir(split_folder_):
sent_folder_ = os.path.join(split_folder_, sent_folder)
if os.path.isfile(sent_folder_):
continue
wrd_key = "%s-%s" % (split_folder, sent_folder)
int_feats = os.listdir(os.path.join(sent_folder_, 'words'))
int_feats = [f for f in int_feats if f.endswith('.norm')]
examples[sent_folder] = []
for word_id in range(len(int_feats)):
interval_key = "%s-%d" % (wrd_key, word_id)
word_file = os.path.join(os.path.join(sent_folder_, 'words', str(word_id) + ".norm"))
word_ = read_csv(word_file, lambda x: str(x[0]))
map_func = lambda x: list(map(float, x[1:]))
norm_feats = np.asarray(read_csv(word_file, map_func))
real_feats = np.asarray(read_csv(word_file[:-5] + "_.csv", map_func))
'''all_feats = np.zeros((len(norm_feats), 60))
all_feats[:, 0::2] = real_feats
all_feats[:, 1::2] = norm_feats'''
examples[sent_folder].append((word_[0], f(word_[0]), norm_feats))
return examples
def main(config, eval_folder):
# local the vocab file
text_words_vocab = load_vocab(config.text_words_path)
text_chars_vocab = load_vocab(config.text_chars_path)
inv_text_vocab = {v: k for k, v in text_words_vocab.items()}
# get the processing function
processing_word = get_processing_word(text_words_vocab, text_chars_vocab,
lowercase=True, chars=True)
#load features:
word_features = get_trimmed_features(config.word_embeddings_trimmed_path)
examples = read_examples(eval_folder, processing_word)
# build WImpModel
model = WImpModel(config, word_features, None, text_words_vocab["$UNK$"],
inv_text_vocab, None)
model.build_graph()
words, word_feats, speech_interval_feats = [], [], []
for sent_key in examples.keys():
words_, word_feats_, speech_feats_ = zip(*examples[sent_key])
word_feats_ = list(zip(*word_feats_))
word_feats.append(word_feats_)
speech_interval_feats.append(speech_feats_)
words.append(words_)
speech_interval_feats_pad_, speech_lengths = pad_sequences(speech_interval_feats,
pad_tok = [0] * config.speech_features_dim, nlevels=2)
speech_feats = speech_interval_feats_pad_[:, :, :, config.speech_lexical_features_dim:]
speech_lexical_feats = speech_interval_feats_pad_[:, :, 0, :config.speech_lexical_features_dim]
feed, sequence_lengths = model.get_feed_dict(words=word_feats, dropout=1.0)
feed[model.speech_features] = speech_feats
feed[model.speech_lexical_features] = speech_lexical_feats
feed[model.speech_lengths] = speech_lengths
predictions = model.test(feed)
print ("\n")
print ("WORD IMPORTANCE PREDICTION OUTPUT")
print ("=================================")
for sent_id in range(len(words)):
scores = predictions[0][:sequence_lengths[sent_id]]
tokens = words[sent_id]
result = ["%s (%f)" % (w, s) for w, s in zip(tokens, scores)]
print ("--> " + " ".join(result) + "\n")
if __name__ == "__main__":
config = Config()
main(config, EVAL_FOLDER)