加载数据集和数据预处理
df = pd.read_csv("train.csv")X_train = np.array(df.iloc[:,1:]) y_train = np.array(df.iloc[:,0]) X_train = np.reshape(X_train,(-1,28,28,1))def create_dev_set(X_train, Y_train): ## split 42000 into 35000 and 7000(0.16) return train_test_split(X_train, Y_train, test_size = 0.166, random_state = 0)X_train, X_dev, y_train, y_dev = create_dev_set(X_train, y_train) print('Training data shape : ', X_train.shape, y_train.shape) print('Dev data shape : ', X_dev.shape, y_dev.shape)
classes = np.unique(y_train) nClasses = len(classes) print('Total number of outputs : ', nClasses) print('Output classes : ', classes)
X_train = X_train.astype('float32') X_dev = X_dev.astype('float32') X_train = X_train / 255. X_dev = X_dev / 255.y_train_one_hot = np.array(to_categorical(y_train)) y_dev_one_hot = np.array(to_categorical(y_dev))
构建模型
batch_size = 64 epochs = 30 num_classes = 10dr = Sequential()dr.add(Conv2D(32, kernel_size=(3,3),activation='relu',input_shape=(28,28,1),padding='same'))dr.add(BatchNormalization(axis=-1)) dr.add(LeakyReLU(alpha=0.1)) dr.add(MaxPooling2D((2,2),padding='same')) dr.add(Dropout(0.3)) dr.add(Conv2D(64, (3,3), activation='relu',padding='same')) dr.add(BatchNormalization(axis=-1)) dr.add(LeakyReLU(alpha=0.1)) dr.add(MaxPooling2D(pool_size=(2,2),padding='same')) dr.add(Dropout(0.3)) dr.add(Conv2D(128, (3,3), activation='relu',padding='same')) dr.add(BatchNormalization(axis=-1)) dr.add(LeakyReLU(alpha=0.1)) dr.add(MaxPooling2D(pool_size=(2,2),padding='same')) dr.add(Dropout(0.4)) dr.add(Flatten()) dr.add(Dense(120, activation='relu')) dr.add(BatchNormalization(axis=-1)) dr.add(LeakyReLU(alpha=0.1)) dr.add(Dropout(0.3)) dr.add(Dense(40, activation='relu')) dr.add(BatchNormalization(axis=-1)) dr.add(LeakyReLU(alpha=0.1)) dr.add(Dropout(0.2)) dr.add(Dense(num_classes, activation='softmax'))dr.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])dr.summary()
在这里,我们对所有隐藏层使用relu激活函数,对输出层使用softmax激活函数。ReLu只应应用于隐藏图层。而且,如果您的模型在训练过程中出现神经元死亡的情况,则应使用leaky ReLu或Maxout函数。
训练和可视化
training = dr.fit(X_train, y_train_one_hot, batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(X_dev, y_dev_one_hot))dr.save("Conv2D_DR_dropout.h5py")test_eval = dr.evaluate(X_dev, y_dev_one_hot, verbose=0) print(test_eval)accuracy = training.history['acc'] val_accuracy = training.history['val_acc'] loss = training.history['loss'] val_loss = training.history['val_loss']epochs = range(len(accuracy))plt.plot(epochs, accuracy, 'bo', label='Training accuracy') plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy') plt.title('Training and validation accuracy') plt.legend() plt.figure() plt.plot(epochs, loss, 'bo', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss') plt.legend() plt.show()
结果评估
print("Training dataset evaluation") test_eval = dr.evaluate(X_train, y_train_one_hot, verbose=0) print(test_eval) print("Dev dataset evaluation") test_eval = dr.evaluate(X_dev, y_dev_one_hot, verbose=0) print(test_eval)
输出
Training dataset evaluation [0.003591470370079107, 0.9918690614700317] Dev dataset evaluation [0.022693904750876813, 0.993689041881813]
结论
本文提供了深度学习中使用的激活函数的全面摘要,最重要的是,重点介绍了在实践中使用这些功能的特点。
问题是哪个更好用?
正如我们在深度学习应用程序中使用激活函数所观察到的那样,较新的激活函数似乎要优于诸如ReLU之类的较早的激活功能,但即使是最新的深度学习架构也都依赖ReLU功能。这在SeNet中很明显,其中隐藏层具有ReLU激活函数和Sigmoid输出。
由于梯度消失的问题,如今不应该使用Sigmoid和Tanh,它会导致很多问题,从而降低了深度神经网络模型的准确性和性能。