This notebook contains following sections
Importing necessary Libraries & dataset
Building a dataset Module
Building performance Module
Building Evaluator Module
- Evaluated Algorithm submodule
- Evaluated Data submodule
Building Hybrid Module
from google.colab import drive
!pip install surprise
folderpath='drive/My Drive/datasets/'
import os
import csv
import sys
import re
import numpy as np
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise import dump
from collections import defaultdict
This module takes the raw dataset and provides the processed the dataset along with other details
It has following functions
- loadDataset
- getUserRating
- getPopularityRanking
- getArtistName
- getArtistID
#user_id artist_mbid artist_name plays norm_plays rating
class DataLoader:
path='drive/My Drive/datasets/user-songs-rating-3000.csv'
#user_id artist_mbid norm_plays rating
def loadDataset(self):
ratingsDataset = 0
self.artistID_to_name = {}
self.name_to_artistID = {}
reader = Reader(rating_scale=(0, 5))
ratingsDataset= Dataset.load_from_df(df_matrix[['user_id', 'artist_mbid', 'rating']], reader)
with open(self.path, newline='', encoding='ISO-8859-1') as csvfile:
artistReader = csv.reader(csvfile)
next(artistReader) #Skip header line
for row in artistReader:
artistID = row[1]
artistName = row[2]
self.artistID_to_name[artistID] = artistName
self.name_to_artistID[artistName] = artistID
return ratingsDataset
def getUserRatings(self, user):
userRatings = []
hitUser = False
with open(self.path, newline='', encoding='ISO-8859-1') as csvfile:
ratingReader = csv.reader(csvfile)
for row in ratingReader:
userID = row[0]
if (user == userID):
artistID = row[1]
rating = float(row[5])
userRatings.append((artistID, rating))
hitUser = True
if (hitUser and (user != userID)):
return userRatings
def getPopularityRanks(self):
ratings = defaultdict(int)
rankings = defaultdict(int)
with open(self.path, newline='', encoding='ISO-8859-1') as csvfile:
ratingReader = csv.reader(csvfile)
for row in ratingReader:
artistID = row[1]
ratings[artistID] += 1
rank = 1
for artistID, ratingCount in sorted(ratings.items(), key=lambda x: x[1], reverse=True):
rankings[artistID] = rank
rank += 1
return rankings
def getArtistName(self, artistID):
if artistID in self.artistID_to_name:
return self.artistID_to_name[artistID]
return ""
def getArtistID(self, artistName):
if artistName in self.name_to_artistID:
return self.name_to_artistID[artistName]
return 0
This module generated the metrics by taking the predictions of the models. It outputs two metrics
- Mean Absolute Error
- Root mean square Error
from surprise import accuracy
class PerformanceMetrics:
def MAE(predictions):
return accuracy.mae(predictions)
def RMSE(predictions):
return accuracy.rmse(predictions)
This module is to build the algorithms/models to train the dataset It has following models
- getName - returns the name of model
- getModel - returns the model
- saveModel - save the model
- Evaluate - train the model and returns the metrics
class ModelBuilder:
def __init__(self, model, name):
self.model = model = name
def GetName(self):
def GetModel(self):
return self.model
def SaveModel(self,predictions):
print('Model saved at '
def Evaluate(self, evaluationData,save=False):
metrics = {}
# Compute accuracy
print("Evaluating accuracy...")
predictions =
metrics["RMSE"] = PerformanceMetrics.RMSE(predictions)
metrics["MAE"] = PerformanceMetrics.MAE(predictions)
print("Analysis complete.")
print('saving the model.....')
return metrics
This module is used to load a set of models into the returns the metrics/performace of each algorithm
It has following functions
- addmodel
- Evaluate
- flushModels
class ModelFactory:
models = []
def __init__(self, dataset):
ed = DataGenerator(dataset)
self.dataset = ed
def AddModel(self, model, name):
alg = ModelBuilder(model, name)
def Evaluate(self,save=False):
results = {}
for model in self.models:
print("Evaluating ", model.GetName(), "...")
results[model.GetName()] = model.Evaluate(self.dataset,save)
# Print results
def flushModels(self):
This model takes the dataset splits it into training dataset and testing dataset and return them
from surprise.model_selection import train_test_split
from surprise.model_selection import LeaveOneOut
class DataGenerator:
def __init__(self, data):
#Build a 75/25 train/test split for measuring accuracy
self.trainSet, self.testSet = train_test_split(data, test_size=.25, random_state=1)
def GetTrainSet(self):
return self.trainSet
def GetTestSet(self):
return self.testSet
This module takes multiple models along with their preference weights and returns the results
from surprise import AlgoBase
class HybridModel(AlgoBase):
def __init__(self, models, weights, sim_options={}):
self.models = models
self.weights = weights
def fit(self, trainset):, trainset)
for model in self.models:
return self
def estimate(self, user_id, item_id):
scores_sum = 0
weights_sum = 0
for i in range(len(self.models)):
scores_sum += self.models[i].estimate(user_id, item_id) * self.weights[i] # 3*1/4+4*3/4 laga ra
weights_sum += self.weights[i] # always becomes one
return scores_sum / weights_sum
def LoadData():
ml = DataLoader()
print("Loading songs ratings...")
data = ml.loadDataset()
return (ml, data)
# Load up common data set for the recommender algorithms
(ml, evaluationData) = LoadData()
Loading songs ratings...
from surprise import BaselineOnly
#Construct an Evaluator to, you know, evaluate them
modelfactory = ModelFactory(evaluationData)
# BaselineOnly
baseline= BaselineOnly()
modelfactory.AddModel(baseline, "baseline")
Evaluating baseline ...
Evaluating accuracy...
Estimating biases using als...
RMSE: 0.9934
MAE: 0.6817
Analysis complete.
saving the model.....
Model saved at drive/My Drive/datasets/baseline
{'baseline': {'RMSE': 0.9934226190571713, 'MAE': 0.6816700266309835}}
from surprise import SVD
# BaselineOnly
svd= SVD()
modelfactory.AddModel(svd, "svd")
Evaluating baseline ...
Evaluating accuracy...
Estimating biases using als...
RMSE: 0.9934
MAE: 0.6817
Analysis complete.
Evaluating svd ...
Evaluating accuracy...
RMSE: 1.0063
MAE: 0.6831
Analysis complete.
{'baseline': {'RMSE': 0.9934226190571713, 'MAE': 0.6816700266309835}, 'svd': {'RMSE': 1.0063057804737225, 'MAE': 0.6830706904210475}}
#Combine them
Hybrid = HybridModel([svd, baseline], [0.5, 0.5])
# Fight!
modelfactory.AddModel(Hybrid, "Hybrid")
Evaluating baseline ...
Evaluating accuracy...
Estimating biases using als...
RMSE: 0.9934
MAE: 0.6817
Analysis complete.
saving the model.....
Model saved at drive/My Drive/datasets/baseline
Evaluating svd ...
Evaluating accuracy...
RMSE: 1.0058
MAE: 0.6821
Analysis complete.
saving the model.....
Model saved at drive/My Drive/datasets/svd
Evaluating Hybrid ...
Evaluating accuracy...
Estimating biases using als...
RMSE: 0.9971
MAE: 0.6795
Analysis complete.
saving the model.....
Model saved at drive/My Drive/datasets/Hybrid
{'baseline': {'RMSE': 0.9934226190571713, 'MAE': 0.6816700266309835}, 'svd': {'RMSE': 1.005848123790534, 'MAE': 0.6821244100215875}, 'Hybrid': {'RMSE': 0.9970686978736479, 'MAE': 0.6794833671716486}}
RBMs have two layers, input layer which is also known as visible layer and the hidden layer. The neurons in each layer communicate with neurons in the other layer but not with neurons in the same layer. there is no intralayer communication among the neurons.
import sys
sys.path.append('/content/drive/My Drive/datasets/')
import RBM
import RBMModel
import importlib
# Construct an Evaluator to, you know, evaluate them
deep_factory= ModelFactory(evaluationData)
#Simple RBM
SimpleRBM = RBMModel.RBMAlgorithm(epochs=10)
svd= SVD()
deep_factory.AddModel(svd, "svd")
#Combine them
Hybrid = HybridModel([svd, SimpleRBM], [0.5, 0.5])
# Fight!
deep_factory.AddModel(Hybrid, "Hybrid")
Evaluating rbm ...
Evaluating accuracy...
Trained epoch 0
Trained epoch 1
Trained epoch 2
Trained epoch 3
Trained epoch 4
Trained epoch 5
Trained epoch 6
Trained epoch 7
Trained epoch 8
Trained epoch 9
RMSE: 1.5733
MAE: 1.4437
Analysis complete.
Evaluating svd ...
Evaluating accuracy...
RMSE: 1.0070
MAE: 0.6827
Analysis complete.
Evaluating Hybrid ...
Evaluating accuracy...
Trained epoch 0
Trained epoch 1
Trained epoch 2
Trained epoch 3
Trained epoch 4
Trained epoch 5
Trained epoch 6
Trained epoch 7
Trained epoch 8
Trained epoch 9
RMSE: 1.1650
MAE: 1.0040
Analysis complete.
{'rbm': {'RMSE': 1.5732653327536739, 'MAE': 1.4437298577219584}, 'svd': {'RMSE': 1.0069576973032366, 'MAE': 0.6826501315236461}, 'Hybrid': {'RMSE': 1.1649843535460234, 'MAE': 1.0040280997374962}}