In this example, I will train a deep neural network to sort an array of 5 data.
Dara Generation
data generation function Generate data set, the input of training data is arrays of integer, the length is set to 5 for this particular example. The label of a training data is 5 independent vectors which indicate sorted indexes from the input array. For example, for an array like [10, 5, 3, 6, 7], the output label is [2, 1, 3, 4, 0]. This label will be transformed to 5 independent vectors.1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 def generateData (train, n = 100 , d = 5 ) : dataSet = np.arange(-255 , 256 , 1 ) length = len(dataSet) if train: np.random.seed(2018 ) sampleIndex = np.random.randint(length, size=(n, d)) train_x = dataSet[sampleIndex] train_y = np.argsort(sampleIndex) I = np.eye(d) train_label = I[train_y] train_x_normalized = train_x / 511.0 y_0 = train_label[:,0 ,:] y_1 = train_label[:,1 ,:] y_2 = train_label[:,2 ,:] y_3 = train_label[:,3 ,:] y_4 = train_label[:,4 ,:] return train_x, train_x_normalized, [y_0, y_1, y_2, y_3, y_4]
training data 1 x_train_o, x_train, y_train = generateData(True , 500000 , 5 )
validation data 1 x_valid_o, x_valid, y_valid = generateData(False , 50000 , 5 )
Build the DNN model by Keras 1 2 3 from keras.utils import layer_utilsfrom keras.initializers import glorot_uniformfrom keras import optimizers, losses, activations, models
Using TensorFlow backend.
1 2 3 4 5 6 from keras.models import Sequentialfrom keras.layers.core import Activation, RepeatVector, Dense, Dropoutfrom keras.layers.wrappers import TimeDistributedfrom keras.layers import Input, LSTMfrom keras.models import load_modelfrom keras.models import Model
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 def model (input_shape, output_shape) : X_input = Input(input_shape) X = Dense(8 , activation='relu' )(X_input) X = Dense(32 , activation='relu' )(X) X = Dense(128 , activation='relu' )(X) X = Dropout(rate=0.5 )(X) X = Dense(128 , activation='relu' )(X) X = Dropout(rate=0.5 )(X) X1 = Dense(32 , activation='relu' )(X) X1 = Dropout(rate=0.5 )(X1) X2 = Dense(32 , activation='relu' )(X) X2 = Dropout(rate=0.5 )(X2) X3 = Dense(32 , activation='relu' )(X) X3 = Dropout(rate=0.5 )(X3) X4 = Dense(32 , activation='relu' )(X) X4 = Dropout(rate=0.5 )(X4) X5 = Dense(32 , activation='relu' )(X) X5 = Dropout(rate=0.5 )(X5) Y1 = Dense(output_shape, activation='softmax' )(X1) Y2 = Dense(output_shape, activation='softmax' )(X2) Y3 = Dense(output_shape, activation='softmax' )(X3) Y4 = Dense(output_shape, activation='softmax' )(X4) Y5 = Dense(output_shape, activation='softmax' )(X5) model = Model(inputs = X_input, outputs = [Y1,Y2,Y3,Y4,Y5], name='dnnModel' ) return model
1 dnnModel = model(x_train.shape[1 :], 5 )
1 dnnModel.compile(optimizer = "adam" , loss = "categorical_crossentropy" , metrics = ["accuracy" ])
1 dnnModel.fit(x_train, y_train, batch_size = 4096 , validation_data=(x_valid, y_valid), epochs=20 , shuffle=True , verbose=2 )
Train on 500000 samples, validate on 50000 samples
Epoch 19/20
- 10s - loss: 1.7910 - dense_10_loss: 0.1319 - dense_11_loss: 0.4869 - dense_12_loss: 0.5322 - dense_13_loss: 0.5044 - dense_14_loss: 0.1357 - dense_10_acc: 0.9594 - dense_11_acc: 0.8106 - dense_12_acc: 0.7886 - dense_13_acc: 0.7934 - dense_14_acc: 0.9565 - val_loss: 0.4033 - val_dense_10_loss: 0.0304 - val_dense_11_loss: 0.1082 - val_dense_12_loss: 0.1206 - val_dense_13_loss: 0.1106 - val_dense_14_loss: 0.0334 - val_dense_10_acc: 0.9893 - val_dense_11_acc: 0.9739 - val_dense_12_acc: 0.9713 - val_dense_13_acc: 0.9730 - val_dense_14_acc: 0.9865
Epoch 20/20
- 10s - loss: 1.7874 - dense_10_loss: 0.1309 - dense_11_loss: 0.4863 - dense_12_loss: 0.5303 - dense_13_loss: 0.5029 - dense_14_loss: 0.1369 - dense_10_acc: 0.9595 - dense_11_acc: 0.8112 - dense_12_acc: 0.7899 - dense_13_acc: 0.7953 - dense_14_acc: 0.9565 - val_loss: 0.3875 - val_dense_10_loss: 0.0310 - val_dense_11_loss: 0.1055 - val_dense_12_loss: 0.1149 - val_dense_13_loss: 0.1076 - val_dense_14_loss: 0.0286 - val_dense_10_acc: 0.9864 - val_dense_11_acc: 0.9743 - val_dense_12_acc: 0.9731 - val_dense_13_acc: 0.9741 - val_dense_14_acc: 0.9888
<keras.callbacks.History at 0x125df0550>
Make prediction1 2 x_test_o, x_test, y_test = generateData(False , 10 , 5 ) y_predict = dnnModel.predict(x_test)
Decode function for prediction 1 2 3 4 5 6 7 8 9 10 11 def decode (x_test_o, y_predict) : output = [] n, f = x_test_o.shape for i in range(n): index = [] for j in range(f): idx_v = y_predict[j][i] index.append(x_test_o[i][np.argmax(idx_v)]) output.append(index) return output
1 corrected = decode(x_test_o, y_predict)
result [[-215, -120, 75, 129, 167],
[-232, -187, -161, -82, 58],
[-152, 12, 32, 135, 255],
[-224, -208, -20, -3, 22],
[-209, 5, 63, 112, 150],
[-203, -70, -28, 219, 247],
[-170, -16, -1, 15, 72],
[-126, -118, -91, 166, 230],
[-199, -123, -50, 54, 132],
[-246, -79, -75, 139, 196]]
It seems that this model do a good job in sorting the test data.