x_1 = 40.0 x_2 = 80.0 expected_output = 60.0 初始化 w_1_11 = 0.5 w_1_12 = 0.5 w_1_13 = 0.5 w_1_21 = 0.5 w_1_22 = 0.5 w_1_23 = 0.5 w_2_11 = 1.0 w_2_21 = 1.0 w_2_31 = 1.0 z_1 = x_1 *w_1_11 +x2*w1_21 z_2 = x_1 * w_1_12 + x_2 * w_1_22 Z_3 = x_1 * w_1_13 + X_2 * w_1_23 y_pred = z_1 * w_2_11+ z_2 * w_2_21+ z_3 * w_2_31 print("前向传播预测值为:", y_pred) loss = 0.5 *( expected_output - y_pred)**2 print("当前的loss值为: ",loss) d_loss_predicted_output = -( expected_output - y_pred) d_loss_w_2_11 = d_loss_predicted_output * z_1 d_loss_w_2_21 = d_loss_predicted_output * z_2 d_loss_w_2_31 = d_loss_predicted_output * z_3 print(d_loss_w_2_11,d_loss_w_2_21,d_loss_w_2_31) d_loss_w_1_11 = d_loss_predicted_output *w_2_11 *x_1 d_loss_w_1_21 = d_loss_predicted_output * w_2_11 *x_2 d_loss_w_1_12 = d_loss_predicted_output * w_2_21 *x_1 d_loss_w_1_22 = d_loss_predicted_output * w_2_21 * x_2 d_loss_w_1_13 = d_loss_predicted_output*w_2_31 *x_1 learning_rate = 1e-5 w_2_11 -= learning_rate * d_loss_w_2_11 w_2_21 -= learning_rate * d_loss_w_2_21 w_2_31 -= learning_rate * d_loss_w_2_31 w_1_11 -= learning_rate * d_loss_w_1_11 w_1_12 -= learning_rate * d_loss_w_1_12 w_1_13 -= learning_rate * d_loss_w_1_13 w_1_21 -= learning_rate * d_loss_w_1_21 w_1_22 -= learning_rate * d_loss_w_1_22 w_1_23 -= learning_rate * d_loss_w_1_23 z_1 = x_1 * w_1_11 + x_2 * w_1_21 z_2 = x_1 * w_1_12+ x_2* w_1_22 Z_3 = x_1 * w_1_13+ x_2 * w_1_23 y_pred = z_1 * w_2_11 + z_2 * w_2_21 + z_3 * w_2_31 print("Final: ",y_pred) loss = 0.5 *(expected_output - y_pred)**2 print("当前的loss值为: ", loss) 前向传播预测值为: 180.0 当前的loss值为:7200.0 Final:140.3136 当前的loss值为:3225.1371724800006
为了展示梯度下降算法寻找函数最小值的过程,我们可以使用Python的matplotlib库来可视化这个过程。以下是一个简单的案例,使用梯度下降算法来寻找一个二次函数(例如 (f(x) = x^2))的最小值,并先静态地显示梯度下降的过程,然后将其制作成动画。
import numpy as np import matplotlib.pyplot as plt def f(x): return x ** 2 def df(x): return 2 * x def gradient_descent(x_start, learning_rate, epochs): x = x_start history = [] for i in range(epochs): grad = df(x) x -= learning_rate * grad history.append(x) return history x_start = 5.0 learning_rate = 0.1 epochs = 50 history = gradient_descent(x_start, learning_rate, epochs) x_values = np.linspace(-10, 10, 400) y_values = [f(x) for x in x_values] plt.figure(figsize=(10, 6)) plt.plot(x_values, y_values, label="f(x) = x^2") plt.scatter(history, [f(x) for x in history], color='red', label="Gradient Descent Path") plt.xlabel("x") plt.ylabel("f(x)") plt.title("Gradient Descent to Find Minimum of f(x) = x^2") plt.legend() plt.show()
import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation fig, ax = plt.subplots(figsize=(10, 6)) x_values = np.linspace(-10, 10, 400) y_values = [f(x) for x in x_values] ax.plot(x_values, y_values, label="f(x) = x^2") line, = ax.plot([], [], 'ro', animated=True) ax.set_xlim(-10, 10) ax.set_ylim(0, 100) ax.set_xlabel("x") ax.set_ylabel("f(x)") ax.set_title("Gradient Descent Animation for f(x) = x^2") def init(): line.set_data([], []) return line, def animate(i): x = x_start - i * learning_rate * df(x_start - i * learning_rate) y = f(x) line.set_data(x, y) return line, ani = animation.FuncAnimation(fig, animate, init_func=init, frames=epochs, interval=100, blit=True) plt.show()
- 注意:在实际运行动画代码之前,请确保已经安装了必要的库,并且你的环境支持动画的显示。
在三维空间中实现梯度下降算法的可视化会稍微复杂一些,因为我们需要处理三个维度。为了简化问题,我们可以选择一个简单的三维函数,比如 (f(x, y) = x2 + y2),这是一个碗状的三维曲面,其最小值在原点 (0, 0)。
import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib.animation import FuncAnimation def f(x, y): return x**2 + y**2 def grad_f(x, y): dfdx = 2 * x dfdy = 2 * y return np.array([dfdx, dfdy]) def gradient_descent(x_start, y_start, learning_rate, epochs): path = [] x, y = x_start, y_start for _ in range(epochs): grad = grad_f(x, y) x, y = x - learning_rate * grad[0], y - learning_rate * grad[1] path.append((x, y, f(x, y))) return np.array(path) x_start, y_start = 1.5, 1.5 learning_rate = 0.1 epochs = 50 path = gradient_descent(x_start, y_start, learning_rate, epochs) fig = plt.figure(figsize=(10, 8)) ax = fig.add_subplot(111, projection='3d') X = np.linspace(-2, 2, 100) Y = np.linspace(-2, 2, 100) X, Y = np.meshgrid(X, Y) Z = f(X, Y) ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='viridis', alpha=0.8) x_vals, y_vals, z_vals = path[:, 0], path[:, 1], path[:, 2] ax.plot(x_vals, y_vals, z_vals, 'r-', label='Gradient Descent Path') ax.set_xlabel('X') ax.set_ylabel('Y') ax.set_zlabel('Z') ax.set_title('Gradient Descent in 3D') ax.legend() plt.show()
- 首先定义了一个目标函数 (f(x, y)) 和它的梯度函数
grad_f(x, y)