开发环境:Python3.9+Tensorflow2.9.1
Tensorflow1.0到Tensorflow2.0还是有很多不同的地方,踩了不少坑🤣
以一个经典的异或问题为例,构建了两层神经网络,输入为[0, 0], [0, 1], [1, 0], [1, 1],输出为[0, 1, 1, 0]。
无论神经网络简单还是复杂,机器学习都包括训练train和测试predict两个过程。
首先采用不调Tensorflow库,可以深入了解计算过程,代码如下:
#coding:utf-8importnumpyasnp# 激活函数deftanh(x): returnnp.tanh(x) deftanh_deriv(x): return1.0-np.tanh(x)**2deflogistic(x): return1/(1+np.exp(-x)) deflogistic_derivative(x): returnlogistic(x)*(1-logistic(x)) # NeuralNetwork 神经网络算法 classNeuralNetwork: #初始化,layes表示的是一个list,eg[10,10,3]表示第一层10个神经元,第二层10个神经元,第三层3个神经元 def__init__(self, layers, activation='tanh'): """ :param layers: A list containing the number of units in each layer. Should be at least two values :param activation: The activation function to be used. Can be "logistic" or "tanh" """ifactivation=='logistic': self.activation=logisticself.activation_deriv=logistic_derivativeelifactivation=='tanh': self.activation=tanhself.activation_deriv=tanh_derivself.weights= [] # 循环从1开始,相当于以第二层为基准,进行权重的初始化 foriinrange(1, len(layers) -1): #对当前神经节点的前驱赋值 self.weights.append((2*np.random.random((layers[i-1] +1, layers[i] +1))-1)*0.25) #对当前神经节点的后继赋值 self.weights.append((2*np.random.random((layers[i] +1, layers[i+1]))-1)*0.25) # 训练函数,X矩阵,每行是一个实例 ,y是每个实例对应的结果,learning_rate 学习率, # epochs,表示抽样的方法对神经网络进行更新的最大次数 deffit(self, X, y, learning_rate=0.1, epochs=10000): X=np.atleast_2d(X) #确定X至少是二维的数据 temp=np.ones([X.shape[0], X.shape[1]+1]) #初始化矩阵 temp[:, 0:-1] =X# adding the bias unit to the input layer X=tempy=np.array(y) #把list转换成array的形式 forkinrange(epochs): #随机选取一行,对神经网络进行更新 i=np.random.randint(X.shape[0]) a= [X[i]] #完成所有正向的更新 forlinrange(len(self.weights)): a.append(self.activation(np.dot(a[l], self.weights[l]))) # error=y[i] -a[-1] deltas= [error*self.activation_deriv(a[-1])] ifk%1000==0: print(k,'...',error*error*100) #开始反向计算误差,更新权重 forlinrange(len(a) -2, 0, -1): # we need to begin at the second to last layer deltas.append(deltas[-1].dot(self.weights[l].T)*self.activation_deriv(a[l])) deltas.reverse() foriinrange(len(self.weights)): layer=np.atleast_2d(a[i]) delta=np.atleast_2d(deltas[i]) self.weights[i] +=learning_rate*layer.T.dot(delta) # 预测函数 defpredict(self, x): x=np.array(x) temp=np.ones(x.shape[0]+1) temp[0:-1] =xa=tempforlinrange(0, len(self.weights)): a=self.activation(np.dot(a, self.weights[l])) returnaif__name__=='__main__': nn=NeuralNetwork([2,2,1], 'tanh') X=np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y=np.array([0, 1, 1, 0]) nn.fit(X, y) foriin [[0, 0], [0, 1], [1, 0], [1,1]]: print(i,nn.predict(i)) ``` 采用Tensorflow库,工程实现过程中必然要在前人开发的框架上进行二次开发或者直接使用,代码如下:```python# 两层感知机+BP算法 完美解决异或问题importtensorflowastfimportnumpyasnp# tf.random.set_seed(777) # for reproducibilitylearning_rate=0.1# 输入x_data= [[0, 0], [0, 1], [1, 0], [1, 1]] y_data= [[0], [1], [1], [0]] x_data=np.array(x_data, dtype=np.float32) y_data=np.array(y_data, dtype=np.float32) # 占位符tf.compat.v1.disable_eager_execution() X=tf.compat.v1.placeholder(tf.float32, [None, 2]) # 2维Y=tf.compat.v1.placeholder(tf.float32, [None, 1]) # 1维# 权重W1=tf.Variable(tf.random.normal([2, 2]), name='weight1') b1=tf.Variable(tf.random.normal([2]), name='bias1') layer1=tf.sigmoid(tf.matmul(X, W1) +b1) W2=tf.Variable(tf.random.normal([2, 1]), name='weight2') b2=tf.Variable(tf.random.normal([1]), name='bias2') hypothesis=tf.sigmoid(tf.matmul(layer1, W2) +b2) # cost/loss functioncost=-tf.reduce_mean(Y*tf.compat.v1.log(hypothesis) + (1-Y) *tf.compat.v1.log(1-hypothesis)) train=tf.compat.v1.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost) # Accuracy computation# True if hypothesis>0.5 else Falsepredicted=tf.cast(hypothesis>0.5, dtype=tf.float32) accuracy=tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) # Launch graphwithtf.compat.v1.Session() assess: # Initialize TensorFlow variablessess.run(tf.compat.v1.global_variables_initializer()) forstepinrange(10001): sess.run(train, feed_dict={X: x_data, Y: y_data}) ifstep%100==0: print(step, sess.run(cost, feed_dict={ X: x_data, Y: y_data}), sess.run([W1, W2])) # Accuracy reporth, c, a=sess.run([hypothesis, predicted, accuracy], feed_dict={X: x_data, Y: y_data}) print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)