-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMainAE.py
89 lines (77 loc) · 3.21 KB
/
MainAE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# AutoEncoders
# Importing the libraries
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
from architecture import SAE
from utils import convert
# Importing the dataset
movies = pd.read_csv('ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
# Preparing the training set and the test set
training_set = pd.read_csv('ml-100k/u1.base', delimiter = '\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('ml-100k/u1.test', delimiter = '\t')
test_set = np.array(test_set, dtype = 'int')
# Getting the number of users and movies
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))
# Converting the data into an array with users in lines and movies in columns
training_set = convert(training_set, nb_users, nb_movies)
test_set = convert(test_set, nb_users, nb_movies)
# Converting the data into Torch tensors
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)
# Stacked AE
nb_neurons = 20
sae = SAE(nb_movies, nb_neurons)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5)
# Training the SAE
nb_epoch = 200
for epoch in range(1, nb_epoch + 1):
train_loss = 0
s = 0.
for id_user in range(nb_users):
input = Variable(training_set[id_user]).unsqueeze(0)
target = input.clone()
if torch.sum(target.data > 0) > 0:
output = sae(input)
target.require_grad = False
# we don't want that values where target == 0 are considered
# in the computation of the error
output[target == 0] = 0
# Compute the loss
loss = criterion(output, target)
# Since we considered only the moview with no zero rating
# Average of the error wrt movies with no zero rating
mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
# Back-propagation: Decides the direction of the update
loss.backward()
# We use the mean_correction
train_loss += np.sqrt(loss.data*mean_corrector)
s += 1.
# One step optimization - The optimizer decides the intensity of the update
optimizer.step()
print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))
# Testing the SAE
test_loss = 0
s = 0.
for id_user in range(nb_users):
input = Variable(training_set[id_user]).unsqueeze(0)
target = Variable(test_set[id_user])
if torch.sum(target.data > 0) > 0:
output = sae(input)
target.require_grad = False
output[target == 0] = 0
loss = criterion(output, target)
mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
test_loss += np.sqrt(loss.data[0]*mean_corrector)
s += 1.
print('test loss: '+str(test_loss/s))