-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
240 lines (191 loc) · 10.5 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# models.py
from imp import init_frozen
from sentiment_data import *
from evaluator import *
from collections import Counter
import os
import numpy as np
import torch
from torch import nn, optim
######################################
# IMPLEMENT THE SENTIMENT CLASSIFIER #
######################################
class FeedForwardNeuralNetClassifier(nn.Module):
"""
The Feed-Forward Neural Net sentiment classifier.
"""
def __init__(self, n_classes, vocab_size, emb_dim, n_hidden_units):
"""
In the __init__ function, you will define modules in FFNN.
:param n_classes: number of classes in this classification problem
:param vocab_size: size of vocabulary
:param emb_dim: dimension of the embedding vectors
:param n_hidden_units: dimension of the hidden units
"""
super(FeedForwardNeuralNetClassifier, self).__init__()
self.n_classes = n_classes
self.vocab_size = vocab_size
self.emb_dim = emb_dim
self.n_hidden_units = n_hidden_units
# TODO: create a randomly initialized embedding matrix, and set padding_idx as 0
# PAD's embedding will not be trained and by default is initialized as zero
# self.word_embeddings = torch.from_numpy(np.random.randn(self.vocab_size, self.emb_dim)).float() #out replace None with the correct implementation
# self.word_embeddings[0,:] = torch.from_numpy(np.zeros((1,self.emb_dim))).float()
self.word_embeddings = torch.empty(self.vocab_size, self.emb_dim)
nn.init.xavier_normal_(self.word_embeddings)
self.word_embeddings[0,:] = torch.from_numpy(np.zeros((self.emb_dim))).float()
self.padding_idx = 0
# TODO: implement the FFNN architecture
# when you build the FFNN model, you will need specify the embedding size using self.emb_dim, the hidden size using self.n_hidden_units,
# and the output class size using self.n_classes
self.emb = nn.Embedding.from_pretrained(embeddings = self.word_embeddings,
freeze = False,
padding_idx = self.padding_idx)
self.h = nn.Sequential(nn.Linear(in_features = self.emb_dim, out_features = self.n_hidden_units, bias=True),
nn.ReLU())
self.hout = nn.Linear(in_features = self.n_hidden_units,
out_features = self.n_classes,
bias = True)
self.sm = nn.Softmax(dim=1) #check if the dimension is right
def forward(self, batch_inputs: torch.Tensor, batch_lengths: torch.Tensor) -> torch.Tensor:
"""
The forward function, which defines how FFNN should work when given a batch of inputs and their actual sent lengths (i.e., before PAD)
:param batch_inputs: a torch.Tensor object of size (n_examples, max_sent_length_in_this_batch), which is the *indexed* inputs
:param batch_lengths: a torch.Tensor object of size (n_examples), which describes the actual sentence length of each example (i.e., before PAD)
:return the logits outputs of FFNN (i.e., the unnormalized hidden units before softmax)
"""
# TODO: implement the forward function, which returns the logits
# Lookup to the word embeddings
x = self.emb(batch_inputs)
# Compute av using sentence lengths
x = torch.div(x.sum(dim=1), batch_lengths.reshape(-1,1))
x = self.h(x)
logits = self.hout(x)
return logits
# raise Exception("Not Implemented!")
def batch_predict(self, batch_inputs: torch.Tensor, batch_lengths: torch.Tensor) -> List[int]:
"""
Make predictions for a batch of inputs. This function may directly invoke `forward` (which passes the input through FFNN and returns the output logits)
:param batch_inputs: a torch.Tensor object of size (n_examples, max_sent_length_in_this_batch), which is the *indexed* inputs
:param batch_lengths: a torch.Tensor object of size (n_examples), which describes the actual sentence length of each example (i.e., before PAD)
:return: a list of predicted classes for this batch of data, either 0 for negative class or 1 for positive class
"""
# TODO: implement the prediction function, which could reuse the forward function
# but should return a list of predicted labels
logits = self.forward(batch_inputs, batch_lengths)
pred_prob = nn.functional.softmax(logits, dim=1)
pred_class = torch.argmax(pred_prob, dim=1)
return pred_class
##################################
# IMPLEMENT THE TRAINING METHODS #
##################################
def train_feedforward_neural_net(
args,
train_exs: List[SentimentExample],
dev_exs: List[SentimentExample]) -> FeedForwardNeuralNetClassifier:
"""
Main entry point for your modifications. Trains and returns a FFNN model (whose architecture is configured based on args)
:param args: args bundle from sentiment_classifier.py
:param train_exs: training set, List of SentimentExample objects
:param dev_exs: dev set, List of SentimentExample objects. You can use this for validation throughout the training
process, but you should *not* directly train on this data.
:return: trained SentimentClassifier model, of whichever type is specified
"""
# TODO: read in all training examples and create a vocabulary (a List-type object called `vocab`)
vocab = [] # replace None with the correct implementation
# Compute first the frequencies
vocab_freq = {}
for ex in train_exs:
words, words_counts = np.unique(ex.words, return_counts=True)
for w, f in zip(words, words_counts):
if len(w)>1:
vocab_freq[w] = vocab_freq[w] + f if w in list(vocab_freq.keys()) else f
# Filter out those words with very low freq
f=1 # -> the frequency tested were 1, 5, and 10. It was selected 5. Check report for more detail.
vocab = list(np.array(list(vocab_freq.keys()))[np.where(np.array(list(vocab_freq.values()))>=f)])
if args.glove_path is not None:
f = open(args.glove_path)
embedding_matrix = []
#Add PAD
embedding_matrix.append(np.repeat(0, args.emb_dim))
#Add UNK
embedding_matrix.append(np.random.rand(args.emb_dim))
glove_vocab = []
for line in f:
if len(line.strip())>0:
fields = line.split(' ')
if fields[0] in vocab:
glove_vocab = glove_vocab + [fields[0]]
embedding_matrix.append([float(x) for x in fields[1:]])
embedding_matrix = np.array(embedding_matrix)
vocab = glove_vocab
# add PAD and UNK as the first two tokens
# DO NOT CHANGE, PAD must go first and UNK next (as their indices have been hard-coded in several places)
vocab = ["PAD", "UNK"] + vocab
print("Vocab size:", len(vocab))
# write vocab to an external file, so the vocab can be reloaded to index the test set
with open("data/vocab.txt", "w") as f:
for word in vocab:
f.write(word + "\n")
# indexing the training/dev examples
indexing_sentiment_examples(train_exs, vocabulary=vocab, UNK_idx=1)
indexing_sentiment_examples(dev_exs, vocabulary=vocab, UNK_idx=1)
# TODO: create the FFNN classifier
# replace None with the correct implementation
model = FeedForwardNeuralNetClassifier(n_classes=2, vocab_size=len(vocab), emb_dim=args.emb_dim, n_hidden_units = args.n_hidden_units)
if args.glove_path is not None:
model.emb.weight = nn.Parameter(torch.from_numpy(embedding_matrix).float(), requires_grad = True)
# TODO: define an Adam optimizer, using default config
optimizer = torch.optim.Adam(model.parameters()) # replace None with the correct implementation
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) test with 1e-3 and 1e-2
# optimizer = torch.optim.SGD(model.parameters(), lr=.2) # test with 0.1 and 0.2
# optimizer = torch.optim.Adagrad(model.parameters(), lr=1e-2) #-> test with 1e-3 and 1e-2
# create a batch iterator for the training data
batch_iterator = SentimentExampleBatchIterator(train_exs, batch_size=args.batch_size, PAD_idx=0, shuffle=True)
# training
best_epoch = -1
best_acc = -1
# adding CrossEntropy loss
cross_entropy = nn.CrossEntropyLoss()
for epoch in range(args.n_epochs):
print("Epoch %i" % epoch)
batch_iterator.refresh() # initiate a new iterator for this epoch
model.train() # turn on the "training mode"
batch_loss = 0.0
batch_example_count = 0
batch_data = batch_iterator.get_next_batch()
while batch_data is not None:
batch_inputs, batch_lengths, batch_labels = batch_data
# TODO: clean up the gradients for this batch
optimizer.zero_grad()
# TODO: call the model to get the logits
# print(batch_inputs)
logits = model(batch_inputs, batch_lengths)
# print(logits)
# TODO: calculate the loss (let's name it `loss`, so the follow-up lines could collect the stats)
# cross_entropy was set just right before for-loop
loss = cross_entropy(nn.functional.softmax(logits,dim=1), batch_labels)
# record the loss and number of examples, so we could report some stats
batch_example_count += len(batch_labels)
batch_loss += loss.item() * len(batch_labels)
# TODO: backpropagation (backward and step)
loss.backward()
optimizer.step()
# get another batch
batch_data = batch_iterator.get_next_batch()
print("Avg loss: %.5f" % (batch_loss / batch_example_count))
# evaluate on dev set
model.eval() # turn on the "evaluation mode"
acc, _, _, _ = evaluate(model, dev_exs, return_metrics=True)
if acc > best_acc:
best_acc = acc
best_epoch = epoch
print("Secure a new best accuracy %.3f in epoch %d!" % (best_acc, best_epoch))
# save the current best model parameters
print("Save the best model checkpoint as `best_model.ckpt`!")
torch.save(model.state_dict(), "best_model.ckpt")
print("-" * 10)
# load back the best checkpoint on dev set
model.load_state_dict(torch.load("best_model.ckpt"))
model.eval() # switch to the evaluation mode
return model