输出结果
该数据包含9个变量的20640个观测值,该数据集包含平均房屋价值作为目标变量和以下输入变量(特征):平均收入、房屋平均年龄、平均房间、平均卧室、人口、平均占用、纬度和经度。
代码设计
#DL:基于sklearn的加利福尼亚房价数据集实现GD算法
import tensorflow as tf
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() #将特征进行标准归一化
#获取房价数据
housing = fetch_california_housing()
m,n = housing.data.shape
print (housing.keys()) #输出房价的key
print (housing.feature_names) #输出房价的特征:
print (housing.target)
print (housing.DESCR)
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]
scaled_data = scaler. fit_transform(housing.data)
data = np.c_[np.ones((m,1)),scaled_data]
#设置参数
n_epoch = 1000
learning_rate = 0.01
#设置placeholder即灌入数据
X = tf.constant(data,dtype = tf.float32,name = "X")
y = tf.constant(housing.target.reshape(-111),dtype=tf.float32,name='y')
#theta理解为权重,random_uniform途中创建包含随机值的节点即初始权重是随机赋值的,理解为numpy的random函数
theta = tf.Variable(tf.random_uniform([n+1, 1], -1, 1),name='theta')
y_pred = tf.matmul(X,theta,name='prediction')
error = y_pred - y
mse = tf.reduce_mean(tf.square(error),name='mse') #采用的成本函数是mse即Mean Squared Error均方误差
#计算梯度公式,关键一步
# #T1、手动求导
# gradient = 2/m * tf.matmul(tf.transpose(X),error)
# training_op = tf.assign(theta,theta - learning_rate * gradient) #assign将新值赋值给一个变量的节点,即权重更新公式的迭代过程
#T2、自动求导
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
#参数初始化,启动session,将graph放入session进行每一步的更新
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epoch):
if epoch % 100 == 0:
print ("Epoch",epoch, "MSE =", mse.eval())
# sess.run(training_op)
print('best theta:',theta.eval())