forked from igormq/ctc_tensorflow_example
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
133 lines (94 loc) · 4.32 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python
# encoding=utf-8
# Created by andy on 2016-07-31 16:57.
import common
__author__ = "andy"
import tensorflow as tf
# Utility functions
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.5)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W, stride=(1, 1), padding='SAME'):
return tf.nn.conv2d(x, W, strides=[1, stride[0], stride[1], 1],
padding=padding)
def max_pool(x, ksize=(2, 2), stride=(2, 2)):
return tf.nn.max_pool(x, ksize=[1, ksize[0], ksize[1], 1],
strides=[1, stride[0], stride[1], 1], padding='SAME')
def avg_pool(x, ksize=(2, 2), stride=(2, 2)):
return tf.nn.avg_pool(x, ksize=[1, ksize[0], ksize[1], 1],
strides=[1, stride[0], stride[1], 1], padding='SAME')
def convolutional_layers():
"""
Get the convolutional layers of the model.
"""
inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]])
# First layer
W_conv1 = weight_variable([5, 5, 1, 48])
b_conv1 = bias_variable([48])
x_expanded = tf.expand_dims(inputs, 3)
h_conv1 = tf.nn.relu(conv2d(x_expanded, W_conv1) + b_conv1)
h_pool1 = max_pool(h_conv1, ksize=(2, 2), stride=(2, 2))
# Second layer
W_conv2 = weight_variable([5, 5, 48, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool(h_conv2, ksize=(2, 1), stride=(2, 1))
# Third layer
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool(h_conv3, ksize=(2, 2), stride=(2, 2))
# Densely connected layer
W_fc1 = weight_variable([32 * 8 * common.OUTPUT_SHAPE[1], common.OUTPUT_SHAPE[1]])
b_fc1 = bias_variable([common.OUTPUT_SHAPE[1]])
conv_layer_flat = tf.reshape(h_pool3, [-1, 32 * 8 * common.OUTPUT_SHAPE[1]])
features = tf.nn.relu(tf.matmul(conv_layer_flat, W_fc1) + b_fc1)
shape = tf.shape(features)
features = tf.reshape(features, [shape[0], common.OUTPUT_SHAPE[1], 1]) # batchsize * outputshape * 1
return inputs, features
def lstm_cell():
return tf.contrib.rnn.LSTMCell(common.num_hidden)
def get_train_model():
# Has size [batch_size, max_stepsize, num_features], but the
# batch_size and max_stepsize can vary along each step
inputs, features = convolutional_layers()
# print features.get_shape()
# inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]])
# Here we use sparse_placeholder that will generate a
# SparseTensor required by ctc_loss op.
targets = tf.sparse_placeholder(tf.int32)
# 1d array of size [batch_size]
seq_len = tf.placeholder(tf.int32, [None])
# Defining the cell
# Can be:
# tf.nn.rnn_cell.RNNCell
# tf.nn.rnn_cell.GRUCell
# cell = tf.contrib.rnn.LSTMCell(common.num_hidden, state_is_tuple=True)
# Stacking rnn cells
stack = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(0, common.num_layers)],
state_is_tuple=True)
# The second output is the last state and we will no use that
outputs, _ = tf.nn.dynamic_rnn(stack, features, seq_len, dtype=tf.float32)
shape = tf.shape(features)
batch_s, max_timesteps = shape[0], shape[1]
# Reshaping to apply the same weights over the timesteps
outputs = tf.reshape(outputs, [-1, common.num_hidden])
# Truncated normal with mean 0 and stdev=0.1
# Tip: Try another initialization
# see https://www.tensorflow.org/versions/r0.9/api_docs/python/contrib.layers.html#initializers
W = tf.Variable(tf.truncated_normal([common.num_hidden,
common.num_classes],
stddev=0.1), name="W")
# Zero initialization
# Tip: Is tf.zeros_initializer the same?
b = tf.Variable(tf.constant(0., shape=[common.num_classes]), name="b")
# Doing the affine projection
logits = tf.matmul(outputs, W) + b
# Reshaping back to the original shape
logits = tf.reshape(logits, [batch_s, -1, common.num_classes])
# Time major
logits = tf.transpose(logits, (1, 0, 2))
return logits, inputs, targets, seq_len, W, b