Shengbin's Studio.

Deep Neural NetWork Sorting

2018/02/01

In this example, I will train a deep neural network to sort an array of 5 data.

Dara Generation

1
import numpy as np

data generation function

Generate data set, the input of training data is arrays of integer, the length is set to 5 for this particular example. The label of a training data is 5 independent vectors which indicate sorted indexes from the input array.
For example, for an array like [10, 5, 3, 6, 7], the output label is [2, 1, 3, 4, 0]. This label will be transformed to 5 independent vectors.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def generateData(train, n = 100, d = 5):
dataSet = np.arange(-255, 256, 1)
length = len(dataSet)
if train:
np.random.seed(2018)
sampleIndex = np.random.randint(length, size=(n, d))

train_x = dataSet[sampleIndex]
train_y = np.argsort(sampleIndex)

#train_y = train_y[:,0]
I = np.eye(d)
train_label = I[train_y]

#train_label = train_label.reshape(n,d,d)
train_x_normalized = train_x / 511.0

y_0 = train_label[:,0,:]
y_1 = train_label[:,1,:]
y_2 = train_label[:,2,:]
y_3 = train_label[:,3,:]
y_4 = train_label[:,4,:]


return train_x, train_x_normalized, [y_0, y_1, y_2, y_3, y_4]

training data

1
x_train_o, x_train, y_train = generateData(True, 500000, 5)

validation data

1
x_valid_o, x_valid, y_valid = generateData(False, 50000, 5)

Build the DNN model by Keras

1
2
3
from keras.utils import layer_utils
from keras.initializers import glorot_uniform
from keras import optimizers, losses, activations, models
Using TensorFlow backend.
1
2
3
4
5
6
from keras.models import Sequential
from keras.layers.core import Activation, RepeatVector, Dense, Dropout
from keras.layers.wrappers import TimeDistributed
from keras.layers import Input, LSTM
from keras.models import load_model
from keras.models import Model
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def model(input_shape, output_shape):
X_input = Input(input_shape)
X = Dense(8, activation='relu')(X_input)
X = Dense(32, activation='relu')(X)
X = Dense(128, activation='relu')(X)
X = Dropout(rate=0.5)(X)

X = Dense(128, activation='relu')(X)
X = Dropout(rate=0.5)(X)

X1 = Dense(32, activation='relu')(X)
X1 = Dropout(rate=0.5)(X1)

X2 = Dense(32, activation='relu')(X)
X2 = Dropout(rate=0.5)(X2)

X3 = Dense(32, activation='relu')(X)
X3 = Dropout(rate=0.5)(X3)

X4 = Dense(32, activation='relu')(X)
X4 = Dropout(rate=0.5)(X4)

X5 = Dense(32, activation='relu')(X)
X5 = Dropout(rate=0.5)(X5)

Y1 = Dense(output_shape, activation='softmax')(X1)
Y2 = Dense(output_shape, activation='softmax')(X2)
Y3 = Dense(output_shape, activation='softmax')(X3)
Y4 = Dense(output_shape, activation='softmax')(X4)
Y5 = Dense(output_shape, activation='softmax')(X5)

model = Model(inputs = X_input, outputs = [Y1,Y2,Y3,Y4,Y5], name='dnnModel')
return model
1
dnnModel = model(x_train.shape[1:], 5)
1
dnnModel.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])
1
dnnModel.fit(x_train, y_train, batch_size = 4096, validation_data=(x_valid, y_valid), epochs=20, shuffle=True, verbose=2)
Train on 500000 samples, validate on 50000 samples
Epoch 19/20
 - 10s - loss: 1.7910 - dense_10_loss: 0.1319 - dense_11_loss: 0.4869 - dense_12_loss: 0.5322 - dense_13_loss: 0.5044 - dense_14_loss: 0.1357 - dense_10_acc: 0.9594 - dense_11_acc: 0.8106 - dense_12_acc: 0.7886 - dense_13_acc: 0.7934 - dense_14_acc: 0.9565 - val_loss: 0.4033 - val_dense_10_loss: 0.0304 - val_dense_11_loss: 0.1082 - val_dense_12_loss: 0.1206 - val_dense_13_loss: 0.1106 - val_dense_14_loss: 0.0334 - val_dense_10_acc: 0.9893 - val_dense_11_acc: 0.9739 - val_dense_12_acc: 0.9713 - val_dense_13_acc: 0.9730 - val_dense_14_acc: 0.9865
Epoch 20/20
 - 10s - loss: 1.7874 - dense_10_loss: 0.1309 - dense_11_loss: 0.4863 - dense_12_loss: 0.5303 - dense_13_loss: 0.5029 - dense_14_loss: 0.1369 - dense_10_acc: 0.9595 - dense_11_acc: 0.8112 - dense_12_acc: 0.7899 - dense_13_acc: 0.7953 - dense_14_acc: 0.9565 - val_loss: 0.3875 - val_dense_10_loss: 0.0310 - val_dense_11_loss: 0.1055 - val_dense_12_loss: 0.1149 - val_dense_13_loss: 0.1076 - val_dense_14_loss: 0.0286 - val_dense_10_acc: 0.9864 - val_dense_11_acc: 0.9743 - val_dense_12_acc: 0.9731 - val_dense_13_acc: 0.9741 - val_dense_14_acc: 0.9888

<keras.callbacks.History at 0x125df0550>

Make prediction

1
2
x_test_o, x_test, y_test = generateData(False, 10, 5)
y_predict = dnnModel.predict(x_test)

Decode function for prediction

1
2
3
4
5
6
7
8
9
10
11
def decode(x_test_o, y_predict):
output = []
n, f = x_test_o.shape

for i in range(n):
index = []
for j in range(f):
idx_v = y_predict[j][i]
index.append(x_test_o[i][np.argmax(idx_v)])
output.append(index)
return output
1
corrected = decode(x_test_o, y_predict)
1
corrected

result

[[-215, -120, 75, 129, 167],
 [-232, -187, -161, -82, 58],
 [-152, 12, 32, 135, 255],
 [-224, -208, -20, -3, 22],
 [-209, 5, 63, 112, 150],
 [-203, -70, -28, 219, 247],
 [-170, -16, -1, 15, 72],
 [-126, -118, -91, 166, 230],
 [-199, -123, -50, 54, 132],
 [-246, -79, -75, 139, 196]]

It seems that this model do a good job in sorting the test data.

CATALOG
  1. 1. Dara Generation
    1. 1.1. data generation function
    2. 1.2. training data
    3. 1.3. validation data
  2. 2. Build the DNN model by Keras
    1. 2.1. Decode function for prediction
    2. 2.2. result