目录
基于sklearn自带california_housing加利福尼亚房价数据集利用GD神经网络梯度下降算法进行回归预测(数据较多时采用mini-batch方式训练会更快)
基于sklearn自带california_housing加利福尼亚房价数据集利用GD神经网络梯度下降算法进行回归预测(数据较多时采用mini-batch方式训练会更快)
该数据包含9个变量的20640个观测值,该数据集包含平均房屋价值作为目标变量和以下输入变量(特征):平均收入、房屋平均年龄、平均房间、平均卧室、人口、平均占用、纬度和经度。
输出结果
epoch: 20 batch_id: 83 Batch loss 0.5640518069267273 …… epoch: 90 batch_id: 203 Batch loss 0.6403363943099976 epoch: 90 batch_id: 204 Batch loss 0.45315566658973694 epoch: 90 batch_id: 205 Batch loss 0.5528439879417419 epoch: 90 batch_id: 206 Batch loss 0.386596143245697
实现代码
1. import tensorflow as tf 2. import numpy as np 3. from sklearn.datasets import fetch_california_housing 4. from sklearn.preprocessing import StandardScaler 5. 6. scaler = StandardScaler() #将特征进行标准归一化 7. #获取房价数据 8. housing = fetch_california_housing() 9. m,n = housing.data.shape 10. print (housing.keys()) #输出房价的key 11. print (housing.feature_names) #输出房价的特征: 12. print (housing.target) 13. print (housing.DESCR) 14. 15. 16. housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data] 17. scaled_data = scaler. fit_transform(housing.data) 18. data = np.c_[np.ones((m,1)),scaled_data] 19. 20. # #T1、传统方式 21. # A = tf.placeholder(tf.float32,shape=(None,3)) 22. # B = A + 5 23. # with tf.Session() as sess: 24. # test_b_l = B.eval(feed_dict={A:[[1,2,3]]}) 25. # test_b_2 = B.eval(feed_dict={A:[[4,5,6],[7,8,9]]}) 26. # print(test_b_1) 27. # print(test_b_2) 28. 29. #T2、采用mini-batch方式 30. X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X") 31. y = tf.placeholder(tf.float32, shape=(None, 1), name="y") 32. #采用optimizer计算梯度,设置参数 33. n_epochs = 100 34. learning_rate = 0.01 35. batch_size=100 36. n_batches = int(np.ceil(m / batch_size)) 37. theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta") 38. y_pred = tf.matmul(X, theta, name="predictions") 39. error = y_pred - y 40. mse = tf.reduce_mean(tf.square(error), name="mse") 41. optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) 42. training_op = optimizer.minimize(mse) 43. init = tf.global_variables_initializer() 44. 45. #定义mini-batch取数据方式 46. def fetch_batch(epoch, batch_index, batch_size): 47. np.random.seed(epoch * n_batches + batch_index) 48. indices = np.random.randint(m, size=batch_size) 49. X_batch = data[indices] 50. y_batch = housing.target.reshape(-1, 1)[indices] 51. return X_batch, y_batch 52. #mini-batch计算过程 53. with tf.Session() as sess: 54. sess.run(init) 55. for epoch in range(n_epochs):#/gfeMat 56. avg_cost = 0. 57. for batch_index in range(n_batches): 58. X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size) 59. sess.run(training_op, feed_dict={X: X_batch, y: y_batch}) 60. 61. if epoch % 10 == 0: 62. total_loss = 0 63. acc_train = mse.eval(feed_dict={X: X_batch, y: y_batch}) 64. total_loss += acc_train 65. #print(acc_train, total_loss) 66. print("epoch:",epoch, "batch_id:",batch_index, "Batch loss", total_loss) 67.