-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsudoku_rnn.py
128 lines (113 loc) · 3.78 KB
/
sudoku_rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#sudoku_rnn
#copyright: fiorezhang@sina.com
import numpy as np
from keras.models import Sequential
from keras import layers
from sudoku_dataset import load_data as load
#global variables for sudoku training/test data set
MATRIX_SIZE = 4
TRAIN_VISIBLE = 0.9
TEST_VISIBLE = 0.9
TRAIN_SIZE = 1000
TEST_SIZE = 100
#transfer matrix to a one-hot serial
def oh_encode(m):
s = m.shape[0]
c = np.zeros((s*s, s+1), dtype='int8')
for i in range(s*s):
t = m[(int)(i/s), (int)(i%s)]
c[i, t] = 1
return c
#transfer one-hot serial to matrix
def oh_decode(c):
s = (int)(np.sqrt(c.shape[0]))
m = np.zeros((s, s), dtype='int8')
for i in range(s*s):
for j in range(s+1):
if c[i, j] > 0:
m[(int)(i/s), (int)(i%s)] = j
return m
#generate matrix from sudoku generator/dataset functions
print('-'*50)
print('Generating Data... ')
print('Matrix size: ', MATRIX_SIZE, ', visible(train): ', TRAIN_VISIBLE*100, '%, visible(test): ', TEST_VISIBLE*100, '%')
print('Train samples: ', TRAIN_SIZE, ', test samples: ', TEST_SIZE)
x_train_m, y_train_m = load(MATRIX_SIZE, TRAIN_VISIBLE, TRAIN_SIZE)
x_train = np.zeros((TRAIN_SIZE, MATRIX_SIZE*MATRIX_SIZE, MATRIX_SIZE+1), dtype='int8')
y_train = np.zeros((TRAIN_SIZE, MATRIX_SIZE*MATRIX_SIZE, MATRIX_SIZE+1), dtype='int8')
for i in range (TRAIN_SIZE):
x_train[i], y_train[i] = oh_encode(x_train_m[i]), oh_encode(y_train_m[i])
x_test_m, y_test_m = load(MATRIX_SIZE, TEST_VISIBLE, TEST_SIZE)
x_test = np.zeros((TEST_SIZE, MATRIX_SIZE*MATRIX_SIZE, MATRIX_SIZE+1), dtype='int8')
y_test = np.zeros((TEST_SIZE, MATRIX_SIZE*MATRIX_SIZE, MATRIX_SIZE+1), dtype='int8')
for i in range (TEST_SIZE):
x_test[i], y_test[i] = oh_encode(x_test_m[i]), oh_encode(y_test_m[i])
#print(x_test_m.shape)
#print(x_test.shape)
#print(x_test_m[0])
#print(x_test[0])
#print(y_test_m[0])
#print(y_test[0])
print('-'*50)
#set parameters for keras model
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
#Build the RNN model
print('-'*50)
print('Building Model... ')
model = Sequential()
#input the one-hot data transferred from a sudoku matrix, connect to a hidden layer
model.add(RNN(HIDDEN_SIZE, input_shape=(MATRIX_SIZE*MATRIX_SIZE, MATRIX_SIZE+1)))
#as we expect the output as also a one-hot data, say s*s output, so repeat s*s times.
model.add(layers.RepeatVector(MATRIX_SIZE*MATRIX_SIZE))
#connect again to RNN network, notice we expect a 3D output so set return_sequences to True
for _ in range(LAYERS):
model.add(RNN(HIDDEN_SIZE, return_sequences=True))
#add a dense layer with the s*s vector flattened, then 3D turn to 2D, need a catagory for each output number in matrix
model.add(layers.TimeDistributed(layers.Dense(MATRIX_SIZE+1)))
model.add(layers.Dropout(0.2))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.summary()
print('-'*50)
#train the model and print information during the process
print('-'*50)
print('Training...')
for iteration in range(1, 500):
print()
print('-'*50)
print('Iteration', iteration)
model.fit(x_train, y_train,
batch_size=BATCH_SIZE,
epochs=5,
validation_data=(x_test, y_test))
#show result in the middle
for i in range(1):
ind = np.random.randint(0, len(x_test))
rowx, rowy = x_test[np.array([ind])], y_test[np.array([ind])]
preds = model.predict_classes(rowx, verbose=0)
question = oh_decode(rowx[0])
correct = oh_decode(rowy[0])
guess = preds[0].reshape(question.shape[0], question.shape[1])
print('Q','- '*25)
print(question)
print('A','- '*25)
print(correct)
print('G','- '*25)
print(guess)
print('- '*25)
####test functions
'''
print('Generate Data +')
(x, y), (z, u) = load(9, 0.7, 10000, 1000)
print('Generate Data -')
o_y = oh_encode(y[0])
print(y[0])
print(o_y)
t = oh_decode(o_y)
print(t)
'''