Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for apple silicon and switched to Aesara #37

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 45 additions & 42 deletions network3.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""network3.py
~~~~~~~~~~~~~~

A Theano-based program for training and running simple neural
A Aesara-based program for training and running simple neural
networks.

Supports several layer types (fully connected, convolutional, max
Expand All @@ -12,14 +12,14 @@
network2.py. However, unlike network.py and network2.py it can also
be run on a GPU, which makes it faster still.

Because the code is based on Theano, the code is different in many
Because the code is based on Aesara, the code is different in many
ways from network.py and network2.py. However, where possible I have
tried to maintain consistency with the earlier programs. In
particular, the API is similar to network2.py. Note that I have
focused on making the code simple, easily readable, and easily
modifiable. It is not optimized, and omits many desirable features.

This program incorporates ideas from the Theano documentation on
This program incorporates ideas from the Aesara documentation on
convolutional neural nets (notably,
http://deeplearning.net/tutorial/lenet.html ), from Misha Denil's
implementation of dropout (https://github.com/mdenil/dropout ), and
Expand All @@ -34,46 +34,49 @@

# Third-party libraries
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor.nnet import softmax
from theano.tensor import shared_randomstreams
from theano.tensor.signal.pool import pool_2d
import aesara
import aesara.tensor as T
from aesara.tensor.nnet import conv2d
from aesara.tensor.nnet import softmax
from aesara.tensor.random.utils import RandomStream
from aesara.tensor.signal.pool import pool_2d

# Activation functions for neurons
def linear(z): return z
def ReLU(z): return T.maximum(0.0, z)
from theano.tensor.nnet import sigmoid
from theano.tensor import tanh
from aesara.tensor.nnet import relu as ReLU
from aesara.tensor import sigmoid
from aesara.tensor import tanh


#### Constants
GPU = True
if GPU:
print("Trying to run under a GPU. If this is not desired, then modify "+\
"network3.py\nto set the GPU flag to False.")
try: theano.config.device = 'gpu'
try: aesara.config.device = 'gpu'
except: pass # it's already set
theano.config.floatX = 'float32'
aesara.config.floatX = 'float32'
else:
print("Running with a CPU. If this is not desired, then the modify "+\
"network3.py to set\nthe GPU flag to True.")

aesara.config.gcc__cxxflags = "-Wno-c++11-narrowing" #Required for apple silicon


#### Load the MNIST data
def load_data_shared(filename="mnist.pkl.gz"):
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
f.close()
def shared(data):
"""Place the data into shared variables. This allows Theano to copy
"""Place the data into shared variables. This allows Aesara to copy
the data to the GPU, if one is available.

"""
shared_x = theano.shared(
np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
shared_y = theano.shared(
np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
shared_x = aesara.shared(
np.asarray(data[0], dtype=aesara.config.floatX), borrow=True)
shared_y = aesara.shared(
np.asarray(data[1], dtype=aesara.config.floatX), borrow=True)
return shared_x, T.cast(shared_y, "int32")
return [shared(training_data), shared(validation_data), shared(test_data)]

Expand Down Expand Up @@ -123,31 +126,31 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
# define functions to train a mini-batch, and to compute the
# accuracy in validation and test mini-batches.
i = T.lscalar() # mini-batch index
train_mb = theano.function(
train_mb = aesara.function(
[i], cost, updates=updates,
givens={
self.x:
training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
validate_mb_accuracy = theano.function(
validate_mb_accuracy = aesara.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
test_mb_accuracy = theano.function(
test_mb_accuracy = aesara.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
self.test_mb_predictions = theano.function(
self.test_mb_predictions = aesara.function(
[i], self.layers[-1].y_out,
givens={
self.x:
Expand Down Expand Up @@ -210,23 +213,23 @@ def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
self.activation_fn=activation_fn
# initialize weights and biases
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
self.w = theano.shared(
self.w = aesara.shared(
np.asarray(
np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
dtype=theano.config.floatX),
dtype=aesara.config.floatX),
borrow=True)
self.b = theano.shared(
self.b = aesara.shared(
np.asarray(
np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
dtype=theano.config.floatX),
dtype=aesara.config.floatX),
borrow=True)
self.params = [self.w, self.b]

def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape(self.image_shape)
conv_out = conv.conv2d(
conv_out = conv2d(
input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
image_shape=self.image_shape)
input_shape=self.image_shape)
pooled_out = pool_2d(
input=conv_out, ws=self.poolsize, ignore_border=True)
self.output = self.activation_fn(
Expand All @@ -241,15 +244,15 @@ def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
self.activation_fn = activation_fn
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(
self.w = aesara.shared(
np.asarray(
np.random.normal(
loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX),
dtype=aesara.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
self.b = aesara.shared(
np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX),
dtype=aesara.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b]

Expand All @@ -274,21 +277,21 @@ def __init__(self, n_in, n_out, p_dropout=0.0):
self.n_out = n_out
self.p_dropout = p_dropout
# Initialize weights and biases
self.w = theano.shared(
np.zeros((n_in, n_out), dtype=theano.config.floatX),
self.w = aesara.shared(
np.zeros((n_in, n_out), dtype=aesara.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.zeros((n_out,), dtype=theano.config.floatX),
self.b = aesara.shared(
np.zeros((n_out,), dtype=aesara.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b]

def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b, axis=-1)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b, axis=-1)

def cost(self, net):
"Return the log-likelihood cost."
Expand All @@ -305,7 +308,7 @@ def size(data):
return data[0].get_value(borrow=True).shape[0]

def dropout_layer(layer, p_dropout):
srng = shared_randomstreams.RandomStreams(
srng = RandomStream(
np.random.RandomState(0).randint(999999))
mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
return layer*T.cast(mask, theano.config.floatX)
mask = srng.binomial(1, 1-p_dropout, size=layer.shape)
return layer*T.cast(mask, aesara.config.floatX)