-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRBMModel.py
73 lines (49 loc) · 2.76 KB
/
RBMModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from surprise import PredictionImpossible
from surprise import AlgoBase
import numpy as np
import RBM
class RBMAlgorithm(AlgoBase):
def __init__(self, epochs=20, hiddenDim=100, learningRate=0.001, batchSize=100, sim_options={}):
AlgoBase.__init__(self)
self.epochs = epochs
self.hiddenDim = hiddenDim
self.learningRate = learningRate
self.batchSize = batchSize
def softmax(self, x):
return np.exp(x) / np.sum(np.exp(x), axis=0)
def fit(self, trainset):
AlgoBase.fit(self, trainset)
numUsers = trainset.n_users
numItems = trainset.n_items
trainingMatrix = np.zeros([numUsers, numItems, 10], dtype=np.float32)
for (uid, iid, rating) in trainset.all_ratings():
adjustedRating = int(float(rating)*2.0) - 1
trainingMatrix[int(uid), int(iid), adjustedRating] = 1
# Flatten to a 2D array, with nodes for each possible rating type on each possible item, for every user.
trainingMatrix = np.reshape(trainingMatrix, [trainingMatrix.shape[0], -1])
# Create an RBM with (num items * rating values) visible nodes
rbm = RBM.RBM(trainingMatrix.shape[1], hiddenDimensions=self.hiddenDim, learningRate=self.learningRate, batchSize=self.batchSize, epochs=self.epochs)
rbm.Train(trainingMatrix)
self.predictedRatings = np.zeros([numUsers, numItems], dtype=np.float32)
for uiid in range(trainset.n_users):
if (uiid % 50 == 0):
print("Processing user ", uiid)
recs = rbm.GetRecommendations([trainingMatrix[uiid]])
recs = np.reshape(recs, [numItems, 10])
for itemID, rec in enumerate(recs):
# The obvious thing would be to just take the rating with the highest score:
#rating = rec.argmax()
# ... but this just leads to a huge multi-way tie for 5-star predictions.
# The paper suggests performing normalization over K values to get probabilities
# and take the expectation as your prediction, so we'll do that instead:
normalized = self.softmax(rec)
rating = np.average(np.arange(10), weights=normalized)
self.predictedRatings[uiid, itemID] = (rating + 1) * 0.5
return self
def estimate(self, u, i):
if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
raise PredictionImpossible('User and/or item is unkown.')
rating = self.predictedRatings[u, i]
if (rating < 0.001):
raise PredictionImpossible('No valid prediction exists.')
return rating