-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgraddescent.py
299 lines (266 loc) · 19.2 KB
/
graddescent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
import numpy as np
import sys
class GradientDescent(object):
#Abstract Class for implementation of Gradient Descent
#Extends Class object
def __init__(self):
pass
def computeCost(self,X,t,w,gamma):
#Abstract Method
#Method's purpose is to compute the cost of the system with the given inputs.
#Inputs:
# X -> Input Features. Can be a single array of input features or a matrix.
# t -> Target values
# w -> Weights of the system.
# gamma -> Regularization constant
#Outputs:
# J -> Cost of the system with the given inputs.
#Abstract Method defined by trying to get the value of an undeclared variable and then handling the exception.
try:
Abstract #Undeclared Variable. Raises a NameError Exception
except NameError: #Handling the NameError Exception
print("Abstract Method not Implemented: %s"%('computeCost()'))
sys.exit(0)
def gradientDescent(self,X,t,w,gamma,beta):
#Abstract Method
#Method's purpose is to compute the magnitude and direction with which the weights
#are to be changed to optimize the system for the least cost.
#Inputs:
# X -> Input Features. Can be a single array of input features or a matrix.
# t -> Target values
# w -> Weights of the system.
# gamma -> Regularization constant
# beta -> The precision value. If two successively computed costs are equivalent upto this precision value, the
# gradient descent algorithm stops.
#Outputs:
# [J,w] -> A list containing a list of costs and the final weights.
#Method abstraction achieved by raising a NameError Exception by fetching the value of an undeclared variable
#and then handling the exception. Exception is overcome when method overridden in the subclasses.
try:
Abstract #Undeclared Variable. Raises a NameError Exception
except NameError: #Handling the NameError Exception
print("Abstract Method not Implemented: %s"%('gradientDescent()'))
sys.exit(0)
def error(self,X,t,w,gamma):
#Abstract Method
#Method's purpose is to compute the error of the system over the validation and test set.
#Inputs:
# X -> Input Features. Can be a single array of input features or a matrix.
# t -> Target values
# w -> Weights of the system.
# gamma -> Regularization constant
#Outputs:
# J -> Cost of the system with the given inputs.
#Abstract Method defined by trying to get the value of an undeclared variable and then handling the exception.
try:
Abstract #Undeclared Variable. Raises a NameError Exception
except NameError: #Handling the NameError Exception
print("Abstract Method not Implemented: %s"%('error()'))
sys.exit(0)
class BatchGradientDescent(GradientDescent):
#Class to Implement Batch Gradient Descent.
#Extends the Abstract Class Gradient Descent.
#Considers the complete set of inputs as a single batch and performs computation on the batch.
def __init__(self):
super(BatchGradientDescent,self).__init__()
self.alpha = 1 #Initializing the learning rate of the system to 1.
def computeCost(self,X,t,w,gamma):
#Computes the cost which is the sum of squared difference between the actual and the predicted value
#Inputs:
# X -> Matrix of the Input Records/Feature Records with a 1 appended to the begining of the Matrix to account for the bias
# t -> The actual target values
# w -> The weights for the linear regression system
# gamma -> Regularization constant
#Outputs:
# J -> The cost of the system with weights w
m = len(t) # 'm' has the number of records in the dataset
J = sum(np.array(np.dot(X,w) - t).__pow__(2))/(2*m) #Unregularized cost
temp_w = np.copy(w) #Copying the weights to perform regularization computations
temp_w[0][0] = 0 #Setting the first weight to 0 since w0 is the bias weight.
#Done to regularize only the input features.
J = J + gamma*np.dot(temp_w.transpose(),temp_w)/(2*m) #Regularized cost
return J
#J(w) or J is the cost of the system when it works with the weights w
#A larger J indicates a larger error in the predicted values. Our goal is to reduce the cost of the system
#by optimizing the weights or the parameters of the system.
def gradientDescent(self,X,t,w,gamma,beta):
#Computes the magnitude and direction with which the weights are to be changed to optimize the system for the
#least cost.
#Inputs:
# X -> Matrix of the Input Records/Feature Records with a 1 appended to the begining of the Matrix to account for the bias
# t -> The actual target values
# w -> The weights for the linear regression system
# gamma -> Regularization constant
# beta -> The precision value. If two successively computed costs are equivalent upto this precision value, the
# gradient descent algorithm stops.
#Outputs:
# [J,w] -> A list containing a list of costs and the final weights.
m = len(t) #'m' indicates the number of records in the dataset
J = [self.computeCost(X,t,w,gamma)] #list of costs initialized
print('Initial Cost: %s'%J[-1])
flag = False #Flag value initialized to False. Indicates that two successively computed costs aren't equivalent
#upto the indicated precision value. True here, indicates that the computation loop must end since
#two successive computations have the same value upto the indicated precision.
inc_rate = 1.00005 #Increment Rate of 0.005%. Used to reward the system in case of successful computation
prev_alpha = None #prev_alpha initialized to None indicating no previous alpha value exists
while not flag:
gradient = self.alpha*np.dot(X.transpose(),(np.dot(X,w) - t))/m #Unregularized gradient
temp_w = np.copy(w)
temp_w[0][0] = 0
gradient = gradient + (gamma*temp_w)/m #Regularized gradient
w = w - gradient #Weight Optimization through gradient descent
interim_J = np.asscalar(self.computeCost(X,t,w,gamma)) #Computing the cost with new weights
if interim_J > J[-1]: #If the computed cost is higher than the previous, then the
# gradient descent algorithm is diverging
w = w + gradient #Resetting the weights
if self.alpha == 0.0: #If learning rate is 0, finish computation
break
if prev_alpha is None: #If the previous learning rate was None, penalize heavily
self.alpha = self.alpha/3.0 #Penalizing the learning rate. Decreasing it by a factor of 3
inc_rate = 1.00005 #Increment Rate set to 0.005% in hopes of improvement after penalization
else: #If previous learning rate was stored, penalize the system
self.alpha = prev_alpha #by returning the learning rate to this stored value
prev_alpha = None #Stored Learning Rate Reset to None to heavily penalize the system
#a second time if necessary
inc_rate = 1 #Increment Rate is set to 0. The System cannot handle increments
continue #Continuing to the next iteration without updating the cost
elif interim_J < J[-1]: #If the descent was successful
prev_alpha = self.alpha #Store the learning rate for later marginal penalization
self.alpha = inc_rate*self.alpha #Rewarding the system for the successful computation by
#increasing learning rate
J.append(interim_J) #appending the cost computed for later visualization.
if int(J[-1]/beta) == int(J[-2]/beta): #comparing the last two values to check for the end of loop
flag = True
return np.array([np.array(J),w])
#Returned weights are the optimal for the system.
def error(self,X,t,w,gamma):
#Computes the error of the validation and test set.
#Inputs:
# X -> Matrix of the Input Records/Feature Records with a 1 appended to the begining of the Matrix to account for the bias
# t -> The actual target values
# w -> The weights for the linear regression system
# gamma -> Regularization constant
#Outputs:
# E -> The cost of the system with weights w
E = self.computeCost(X,t,w,0)
return E
class StochasticGradientDescent(GradientDescent):
def __init__(self):
super(StochasticGradientDescent,self).__init__()
self.alpha = None #Initializing the learning rate of the system to None.
def learningRate(self,alpha):
self.alpha = alpha
def computeCost(self,X,t,w,gamma):
#Computes the cost which is the sum of squared difference between the actual and the predicted value
#Inputs:
# X -> A single record of input features.
# t -> The actual target value
# w -> The weights for the linear regression system
# gamma -> Regularization constant
#Outputs:
# J -> The cost of the system with weights w and input features X
J = pow(np.asscalar(np.dot(X,w) - t),2)/2 #Unregularized cost
temp_w = np.copy(w) #Copying the weights to perform regularization computations
temp_w[0][0] = 0 #Setting the first weight to 0 since w0 is the bias weight.
#Done to regularize only the input features.
J = J + gamma*np.dot(temp_w.transpose(),temp_w)/2 #Regularized cost
return J
#J(w) or J is the cost of the system when it works with the weights w
#A larger J indicates a larger error in the predicted values. Our goal is to reduce the cost of the system
#by optimizing the weights or the parameters of the system.
def gradientDescent(self,X,t,w,gamma,beta):
#Computes the magnitude and direction with which the weights are to be changed to optimize the system for the
#least cost.
#Inputs:
# X -> Matrix of the Input Records/Feature Records with a 1 appended to the begining of the Matrix to account for the bias
# t -> The actual target values
# w -> The weights for the linear regression system
# gamma -> Regularization constant
# beta -> The precision value. If two successively computed costs are equivalent upto this precision value, the
# gradient descent algorithm stops.
#Outputs:
# [J,w] -> A list containing a list of costs and the final weights.
if self.alpha is None: #Check whether learning rate has been set
print "Error: Learning Rate not set.Ending the training session" #Display error message
sys.exit(0) #Exit system
m = len(t) #'m' indicates the number of records in the dataset
J = 0 #Initializing cost/error to 0
for i in range(m): #Iterating over the entire dataset.
gradient = np.asscalar(np.dot(X[i],w)
- t[i])*X[i].reshape((len(X[i]),1)) #Computing unregularized gradient for each input record
gradient = gradient + gamma*w #Updating gradient to regularize it
w = w - self.alpha*gradient #Updating weights
J += self.computeCost(X[i],t[i],w,gamma) #Computing cost of the system
J = J/m #Averaging cost over all input records
return np.array([np.array(J),w])
#Returned weights are the optimal for the system.
def error(self,X,t,w,gamma):
#Computes the error of the validation and test set.
#Inputs:
# X -> Matrix of the Input Records/Feature Records with a 1 appended to the begining of the Matrix to account for the bias
# t -> The actual target values
# w -> The weights for the linear regression system
# gamma -> Regularization constant
#Outputs:
# E -> The cost of the system with weights w
m = len(t) #Length of dataset over which error is to be computed
J = 0 #Initializing cost/error to 0
for i in range(m): #Iterating over all m input sets
J += self.computeCost(X[i],t[i],w,gamma) #Compute Cost for each input record
return J/m #Return the averaged cost.
class LinearRegression():
#Class to implement LinearRegression.
#Performs LinearRegression using BatchGradientDescent or StochasticGradientDescent based on users choice.
def __init__(self):
self.weights = None
self.gamma = 0 #Initializing the regularization constant to 0.
self.gradient_descent = GradientDescent() #Initializing Gradient Descent Algorithm type to abstract
self.precision = 0.0001 #Required Precision Set.
#Perform SGD within the function.
def trainSGD(self,learningRate):
self.gradient_descent = StochasticGradientDescent()
self.gradient_descent.learningRate(learningRate) #Set the learning rate of the regression
def trainBGD(self):
self.gradient_descent = BatchGradientDescent()
def setPrecision(self,precision):
self.precision = precision
def trainSystem(self,X,t,gamma=0):
#Trains the system for the given input features and the target values.
#Inputs:
# X -> Input Feature Matrix without bias
# t -> The target values
# gamma -> Regularization constant
#Outputs:
# No outputs.
m = len(t) #'m' indicates the number of records in the dataset
self.gamma = gamma # Value of gamma initialized to indicate Regularization
X = np.concatenate((np.ones((m,1)),np.array(X)),axis=1) #Appending 1 to each input feature record as a bias value
self.weights = np.zeros((X.shape[1],1)) #Initializing the weights to 0
#Performing gradient descent to find the optimal weights
[J,self.weights] = self.gradient_descent.gradientDescent(X,t,self.weights,
self.gamma,self.precision) #Return Value is a list of costs and weights
print "Weights: ",self.weights
print "Final Cost: ",J[-1]
def validateSystem(self,X,t):
#Validates the System by computing the RMS Error value.
#Inputs:
# X -> Input Feature Matrix of validation set without bias
# t -> Target Values
m = len(t) #'m' indicates the number of records in the dataset
X = np.concatenate((np.ones((m,1)),np.array(X)),axis=1) #Appending 1 to each input feature record as a bias value
Error = self.gradient_descent.error(X,t,self.weights,self.gamma) #Computing the Error/Cost for the validation set.
#Returned Error is the squared mean value
E_rms = pow(2*Error,0.5) #Computing the Root Mean Squared Error Value.
print("Validation,Root Mean Squared Error: %f"%(E_rms))
return E_rms
def testSystem(self,X,t):
#Tests the System by computing the RMS Error value.
#Inputs:
# X -> Input Feature Matrix of validation set without bias
# t -> Target Values
m = len(t) #'m' indicates the number of records in the dataset
X = np.concatenate((np.ones((m,1)),np.array(X)),axis=1) #Appending 1 to each input feature record as a bias value
Error = self.gradient_descent.error(X,t,self.weights,self.gamma) #Computing the Error/Cost for the validation set.
#Returned Error is the squared mean value
E_rms = pow(2*Error,0.5) #Computing the Root Mean Squared Error Value.
print("Root Mean Squared Error: %f"%(E_rms))