一半很难遇到折点,如果真遇到了,那就特殊处理一下叭。
如果陷入z<0
的部分,很有可能导致这个神经元死亡,也就是死亡Relu问题
(根据求导链式法则,如果激活函数求导为0那就锁定了这个神经元的参数梯度为0,权重将无法更新)
所以人们又提出了一种改进版的Relu函数:
不过梯度消失不止与激活函数有关,还有其他因素有关,目前也有很多相关的研究,但按照经验使用relu激活函数都会有不错的效果。
我们把激活函数改为Relu函数,再试一次:
咦,似乎有了点效果,我们把每层的神经元增加到8个:
嗯,效果好了不少!
2 炼丹
我们对上述过程代码实现:
豆豆数据集模拟:dataset.py
import numpy as np import random def get_beans(counts): posX,posY = genSpiral(int(counts/2),0,1) negX,negY = genSpiral(int(counts/2),np.pi,0) X = np.vstack((posX,negX)) Y = np.hstack((posY,negY)) return X,Y def genSpiral(counts,deltaT, label): X = np.zeros((counts,2)) Y = np.zeros(counts) for i in range(counts): r = i / counts * 5 t = 1.75 * i / counts * 2 * np.pi + deltaT; x1 = r * np.sin(t) + random.uniform(-0.1,0.1) x2 = r * np.cos(t) + random.uniform(-0.1,0.1) X[i] = np.array([x1,x2]) Y[i] = label return X,Y def dist(a, b): dx = a['x'] - b['x']; dy = a['y']- b['y']; return np.sqrt(dx * dx + dy * dy); def getCircleLabel(p, center): radius = 1; if dist(p, center) < (radius * 0.5): return 1 else: return 0 def randUniform(a=-1, b=1): return np.random.rand() * (b - a) + a; def classifyCircleData(numSamples=100, noise=0): points = []; Y = [] X = [] radius = 1; num = int(numSamples/2) for i in range(num): r = randUniform(0, radius * 0.5); angle = randUniform(0, 2 * np.pi); x = r * np.sin(angle); y = r * np.cos(angle); noiseX = randUniform(-radius, radius) * noise; noiseY = randUniform(-radius, radius) * noise; label = getCircleLabel({'x': x + noiseX, 'y': y + noiseY}, {'x': 0, 'y': 0}); X.append([x+1,y+1]) Y.append(label) for i in range(num): r = randUniform(radius * 0.7, radius); angle = randUniform(0, 2 * np.pi); x = r * np.sin(angle); y = r * np.cos(angle); noiseX = randUniform(-radius, radius) * noise; noiseY = randUniform(-radius, radius) * noise; label = getCircleLabel({'x': x + noiseX, 'y': y + noiseY}, {'x': 0, 'y': 0}); X.append([x+1,y+1]) Y.append(label) X = np.array(X) Y = np.array(Y) return X,Y
🚩豆豆毒性分布如下:
绘图工具:plot_utils.py
import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import numpy as np from keras.models import Sequential#导入keras def show_scatter_curve(X,Y,pres): plt.scatter(X, Y) plt.plot(X, pres) plt.show() def show_scatter(X,Y): if X.ndim>1: show_3d_scatter(X,Y) else: plt.scatter(X, Y) plt.show() def show_3d_scatter(X,Y): x = X[:,0] z = X[:,1] fig = plt.figure() ax = Axes3D(fig) ax.scatter(x, z, Y) plt.show() def show_surface(x,z,forward_propgation): x = np.arange(np.min(x),np.max(x),0.1) z = np.arange(np.min(z),np.max(z),0.1) x,z = np.meshgrid(x,z) y = forward_propgation(X) fig = plt.figure() ax = Axes3D(fig) ax.plot_surface(x, z, y, cmap='rainbow') plt.show() def show_scatter_surface(X,Y,forward_propgation): if type(forward_propgation) == Sequential: show_scatter_surface_with_model(X,Y,forward_propgation) return x = X[:,0] z = X[:,1] y = Y fig = plt.figure() ax = Axes3D(fig) ax.scatter(x, z, y) x = np.arange(np.min(x),np.max(x),0.1) z = np.arange(np.min(z),np.max(z),0.1) x,z = np.meshgrid(x,z) X = np.column_stack((x[0],z[0])) for j in range(z.shape[0]): if j == 0: continue X = np.vstack((X,np.column_stack((x[0],z[j])))) print(X.shape) r = forward_propgation(X) y = r[0] if type(r) == np.ndarray: y = r y = np.array([y]) y = y.reshape(x.shape[0],z.shape[1]) ax.plot_surface(x, z, y, cmap='rainbow') plt.show() def show_scatter_surface_with_model(X,Y,model): #model.predict(X) x = X[:,0] z = X[:,1] y = Y fig = plt.figure() ax = Axes3D(fig) ax.scatter(x, z, y) x = np.arange(np.min(x),np.max(x),0.1) z = np.arange(np.min(z),np.max(z),0.1) x,z = np.meshgrid(x,z) X = np.column_stack((x[0],z[0])) for j in range(z.shape[0]): if j == 0: continue X = np.vstack((X,np.column_stack((x[0],z[j])))) print(X.shape) y = model.predict(X) # return # y = model.predcit(X) y = np.array([y]) y = y.reshape(x.shape[0],z.shape[1]) ax.plot_surface(x, z, y, cmap='rainbow') plt.show() def pre(X,Y,model): model.predict(X)
梯度下降:beans_predict.py
import dataset import plot_utils from keras.models import Sequential from keras.layers import Dense from keras.optimizers import SGD m = 100 X, Y = dataset.get_beans(m) plot_utils.show_scatter(X, Y) model = Sequential() model.add(Dense(units=8, activation='relu', input_dim=2)) model.add(Dense(units=8, activation='relu')) model.add(Dense(units=8, activation='relu')) model.add(Dense(units=1, activation='sigmoid')) model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.05), metrics=['accuracy']) model.fit(X, Y, epochs=5000, batch_size=10) pres = model.predict(X) plot_utils.show_scatter_surface(X, Y, model)
训练后: