【Python实战】——神经网络识别手写数字（一）

1 探索数据集

1.1 读取并显示数据示例

运行程序：

import numpy as np
import matplotlib.pyplot as plt
image_size = 28 # width and length
num_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size
train_data = np.loadtxt("D:\\mnist_train.csv", delimiter=",")
test_data = np.loadtxt("D:\\mnist_test.csv", delimiter=",")
test_data[:10]#测试集前十行

运行结果：

array([[7., 0., 0., ..., 0., 0., 0.],
[2., 0., 0., ..., 0., 0., 0.],
[1., 0., 0., ..., 0., 0., 0.],
...,
[9., 0., 0., ..., 0., 0., 0.],
[5., 0., 0., ..., 0., 0., 0.],
[9., 0., 0., ..., 0., 0., 0.]])

1.2 数据集大小

运行程序：

print(test_data.shape)
print(train_data.shape)

运行结果：

(10000, 785)
(60000, 785)

该mnist数据集训练集共10000个数据，有785维，测试集有60000个数据，785维。

1.3 自变量因变量构建

运行程序：

##第一列为预测类别
train_imgs = np.asfarray(train_data[:, 1:]) / 255
test_imgs = np.asfarray(test_data[:, 1:]) / 255
train_labels = np.asfarray(train_data[:, :1])
test_labels = np.asfarray(test_data[:, :1])

1.4 One-hot编码

运行程序

import numpy as np
lable_range = np.arange(10)
for label in range(10):
one_hot = (lable_range==label).astype(int)
print("label: ", label, " in one-hot representation: ", one_hot)

# 将数据集的标签转换为one-hot label
label_range = np.arange(num_of_different_labels)
train_labels_one_hot = (label_range==train_labels).astype(float)
test_labels_one_hot = (label_range==test_labels).astype(float)

1.5 图像数据示例

运行程序：

# 示例
for i in range(10):
img = train_imgs[i].reshape((28,28))
plt.imshow(img, cmap="Greys")
plt.show()

运行结果：

1.6 pickle包保存python对象

运行程序:

import pickle
with open("D:\\pickled_mnist.pkl", "bw") as fh:
data = (train_imgs,
test_imgs,
train_labels,
test_labels)
pickle.dump(data, fh)

2 构建神经网络并训练

2.1 读取pickle文件

运行程序：

import pickle
with open("D:\\19实验\\实验课大作业\\pickled_mnist.pkl", "br") as fh:
train_imgs = data[0]
test_imgs = data[1]
train_labels = data[2]
test_labels = data[3]
train_labels_one_hot = (lable_range==train_labels).astype(float)
test_labels_one_hot = (label_range==test_labels).astype(float)
image_size = 28 # width and length
num_of_different_labels = 10 #  i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size

2.2 神经网络核心关键函数定义

运行程序：

import numpy as np
def sigmoid(x):
return 1 / (1 + np.e ** -x)
##激活函数
activation_function = sigmoid
from scipy.stats import truncnorm
##数据标准化
def truncated_normal(mean=0, sd=1, low=0, upp=10):
return truncnorm((low - mean) / sd,
(upp - mean) / sd,
loc=mean,
scale=sd)
##构建神经网络模型
class NeuralNetwork:

def __init__(self,
num_of_in_nodes, #输入节点数
num_of_out_nodes, #输出节点数
num_of_hidden_nodes,#隐藏节点数
learning_rate):#学习率
self.num_of_in_nodes = num_of_in_nodes
self.num_of_out_nodes = num_of_out_nodes
self.num_of_hidden_nodes = num_of_hidden_nodes
self.learning_rate = learning_rate
self.create_weight_matrices()
#初始为一个隐藏节点
def create_weight_matrices(self):#创建权重矩阵

# A method to initialize the weight
#matrices of the neural network#一种初始化神经网络权重矩阵的方法
rad = 1 / np.sqrt(self.num_of_in_nodes)
X = truncated_normal(mean=0, sd=1, low=-rad, upp=rad)  #形成指定分布
self.weight_1 = X.rvs((self.num_of_hidden_nodes, self.num_of_in_nodes)) #rvs:产生服从指定分布的随机数

rad = 1 / np.sqrt(self.num_of_hidden_nodes)
self.weight_2 = X.rvs((self.num_of_out_nodes, self.num_of_hidden_nodes)) #rvs: 产生服从指定分布的随机数

def train(self, input_vector, target_vector):
#
# input_vector and target_vector can
#be tuple, list or ndarray
#

input_vector = np.array(input_vector, ndmin=2).T#输入
target_vector = np.array(target_vector, ndmin=2).T#输出

output_vector1 = np.dot(self.weight_1, input_vector) #隐藏层值
output_hidden = activation_function(output_vector1)#删除不激活

output_vector2 = np.dot(self.weight_2, output_hidden)#输出
output_network = activation_function(output_vector2)##删除不激活

# calculate output errors:计算输出误差
output_errors = target_vector - output_network

# update the weights:更新权重
tmp = output_errors * output_network * (1.0 - output_network)
self.weight_2 += self.learning_rate  * np.dot(tmp, output_hidden.T)
# calculate hidden errors:计算隐藏层误差
hidden_errors = np.dot(self.weight_2.T, output_errors)

# update the weights:
tmp = hidden_errors * output_hidden * (1.0 - output_hidden)
self.weight_1 += self.learning_rate * np.dot(tmp, input_vector.T)

#测试集
def run(self, input_vector):
# input_vector can be tuple, list or ndarray
input_vector = np.array(input_vector, ndmin=2).T

output_vector = np.dot(self.weight_1, input_vector)
output_vector = activation_function(output_vector)

output_vector = np.dot(self.weight_2, output_vector)
output_vector = activation_function(output_vector)

return output_vector
#判别矩阵
def confusion_matrix(self, data_array, labels):
cm = np.zeros((10, 10), int)
for i in range(len(data_array)):
res = self.run(data_array[i])
res_max = res.argmax()
target = labels[i][0]
cm[res_max, int(target)] += 1
return cm
#精确度
def precision(self, label, confusion_matrix):
col = confusion_matrix[:, label]
return confusion_matrix[label, label] / col.sum()
#评估
def evaluate(self, data, labels):
corrects, wrongs = 0, 0
for i in range(len(data)):
res = self.run(data[i])
res_max = res.argmax()
if res_max == labels[i]:
corrects += 1
else:
wrongs += 1
return corrects, wrongs

