1.8 如何选择超参数?比如多少轮迭代次数好?
#1 利用pandas显示数据 path = 'ex2data1.txt' data = pd.read_csv(path, header=None, names=['Exam1', 'Exam2', 'Admitted']) data.head()
Exam1 | Exam2 | Admitted | |
0 | 34.623660 | 78.024693 | 0 |
1 | 30.286711 | 43.894998 | 0 |
2 | 35.847409 | 72.902198 | 0 |
3 | 60.182599 | 86.308552 | 1 |
4 | 79.032736 | 75.344376 | 1 |
positive=data[data["Admitted"].isin([1])] negative=data[data["Admitted"].isin([0])] col_num=data.shape[1] X=data.iloc[:,:col_num-1] y=data.iloc[:,col_num-1] X.insert(0,"ones",1) X=X.values y=y.values
# 1 划分数据 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
X_train.shape,X_test.shape,X_val.shape
((64, 3), (20, 3), (16, 3))
y_train.shape,y_test.shape,y_val.shape
((64,), (20,), (16,))
# 2 修改梯度下降算法,为了不改变原有函数的签名,将训练集传给X,y def grandient(X,y,X_val,y_val,iter_num,alpha): y=y.reshape((X.shape[0],1)) w=np.zeros((X.shape[1],1)) cost_lst=[] cost_val=[] lst_w=[] for i in range(iter_num): y_pred=h(X,w)-y temp=np.zeros((X.shape[1],1)) for j in range(X.shape[1]): right=np.multiply(y_pred.ravel(),X[:,j]) gradient=1/(X.shape[0])*(np.sum(right)) temp[j,0]=w[j,0]-alpha*gradient w=temp cost_lst.append(cost(X,w,y.ravel())) cost_val.append(cost(X_val,w,y_val.ravel())) lst_w.append(w) return lst_w,cost_lst,cost_val
#调用梯度下降算法 iter_num,alpha=6000000,0.001 lst_w,cost_lst,cost_val=grandient(X_train,y_train,X_val,y_val,iter_num,alpha)
plt.plot(range(iter_num),cost_lst,"b-+") plt.plot(range(iter_num),cost_val,"r-^") plt.legend(["train","validate"]) plt.show()
#分析结果,看看在300万轮时的情况 print(cost_lst[500000],cost_val[500000])
0.24994786329203897 0.18926411883434127
#看看5万轮时测试误差 k=50000 w=lst_w[k] print(cost_lst[k],cost_val[k]) y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
0.45636730725628694 0.4573279187241135 0.7
#看看8万轮时测试误差 k=80000 w=lst_w[k] print(cost_lst[k],cost_val[k]) y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
0.40603054170171965 0.39424783821776516 0.75
#看看10万轮时测试误差 k=100000 print(cost_lst[k],cost_val[k]) w=lst_w[k] y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
0.381898564816469 0.36355983465263897 0.8
#分析结果,看看在300万轮时的情况 k=3000000 print(cost_lst[k],cost_val[k]) w=lst_w[k] y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
0.19780791870188535 0.11432680130573875 0.85
#分析结果,看看在500万轮时的情况 k=5000000 print(cost_lst[k],cost_val[k]) w=lst_w[k] y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
0.19393055410160026 0.10754181199189947 0.85
#在500轮时的情况 k=5999999 print(cost_lst[k],cost_val[k]) w=lst_w[k] y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
0.19319692059853838 0.10602762617262468 0.85
1.9 如何选择超参数?比如学习率设置多少好?
#1 设置一组学习率的初始值,然后绘制出在每个点初的验证误差,选择具有最小验证误差的学习率 alpha_lst=[0.1,0.08,0.03,0.01,0.008,0.003,0.001,0.0008,0.0003,0.00001]
def grandient(X,y,iter_num,alpha): y=y.reshape((X.shape[0],1)) w=np.zeros((X.shape[1],1)) cost_lst=[] for i in range(iter_num): y_pred=h(X,w)-y temp=np.zeros((X.shape[1],1)) for j in range(X.shape[1]): right=np.multiply(y_pred.ravel(),X[:,j]) gradient=1/(X.shape[0])*(np.sum(right)) temp[j,0]=w[j,0]-alpha*gradient w=temp cost_lst.append(cost(X,w,y.ravel())) return w,cost_lst
lst_val=[] iter_num=100000 lst_w=[] for alpha in alpha_lst: w,cost_lst=grandient(X_train,y_train,iter_num,alpha) lst_w.append(w) lst_val.append(cost(X_val,w,y_val.ravel())) lst_val
C:\Users\sanly\AppData\Local\Temp\ipykernel_8444\2221512341.py:5: RuntimeWarning: divide by zero encountered in log right=np.multiply(y.ravel(),np.log(y_hat).ravel())+np.multiply((1-y).ravel(),np.log(1-y_hat).ravel()) C:\Users\sanly\AppData\Local\Temp\ipykernel_8444\2221512341.py:5: RuntimeWarning: invalid value encountered in multiply right=np.multiply(y.ravel(),np.log(y_hat).ravel())+np.multiply((1-y).ravel(),np.log(1-y_hat).ravel()) [nan, nan, nan, 1.302365681883988, 0.9807991089640924, 0.6863333276415668, 0.3635612014705094, 0.3942497801600069, 0.5169328809489743, 0.6448319202310255]
np.array(lst_val)
array([ nan, nan, nan, 1.30236568, 0.98079911, 0.68633333, 0.3635612 , 0.39424978, 0.51693288, 0.64483192])
lst_val[3:]
[1.302365681883988, 0.9807991089640924, 0.6863333276415668, 0.3635612014705094, 0.3942497801600069, 0.5169328809489743, 0.6448319202310255]
np.argmin(np.array(lst_val[3:]))
3
#最好的学习率为 alpha_best=alpha_lst[3+np.argmin(np.array(lst_val[3:]))] alpha_best
0.001
#可视化各学习率对应的验证误差 plt.scatter(alpha_lst[3:],lst_val[3:])
<matplotlib.collections.PathCollection at 0x1d1d48738b0>
#看看测试集的结果 #取出最好学习率对应的w w_best=lst_w[3+np.argmin(np.array(lst_val[3:]))] print(w_best) y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w_best).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
[[-4.72412058] [ 0.0504264 ] [ 0.0332232 ]] 0.8
#查看其他学习率对应的测试集准确率 for w in lst_w[3:]: y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w).ravel()]) print(np.sum(y_p_true==y_test)/X_test.shape[0])
0.75 0.75 0.6 0.8 0.75 0.6 0.55
1.10 如何选择超参数?试试调整l2正则化因子
实验:完成正则化因子的调参,下面给出了正则化因子lambda的范围,请参照学习率的调参,完成下面代码
# 1正则化的因子的范围可以比学习率略微设置的大一些 lambda_lst=[0.001,0.003,0.008,0.01,0.03,0.08,0.1,0.3,0.8,1,3,10]
# 2 代价函数构造 def cost_reg(X,w,y,lambd): #当X(m,n+1),y(m,),w(n+1,1) y_hat=sigmoid(X@w) right1=np.multiply(y.ravel(),np.log(y_hat).ravel())+np.multiply((1-y).ravel(),np.log(1-y_hat).ravel()) right2=(lambd/(2*X.shape[0]))*np.sum(np.power(w[1:,0],2)) cost=-np.sum(right1)/X.shape[0]+right2 return cost
def grandient_reg(X,w,y,iter_num,alpha,lambd): y=y.reshape((X.shape[0],1)) w=np.zeros((X.shape[1],1)) cost_lst=[] for i in range(iter_num): y_pred=h(X,w)-y temp=np.zeros((X.shape[1],1)) for j in range(0,X.shape[1]): if j==0: right_0=np.multiply(y_pred.ravel(),X[:,j]) gradient_0=1/(X.shape[0])*(np.sum(right_0)) temp[j,0]=w[j,0]-alpha*(gradient_0) else: right=np.multiply(y_pred.ravel(),X[:,j]) reg=(lambd/X.shape[0])*w[j,0] gradient=1/(X.shape[0])*(np.sum(right)) temp[j,0]=w[j,0]-alpha*(gradient+reg) w=temp cost_lst.append(cost_reg(X,w,y,lambd)) return w,cost_lst
# 3 调用梯度下降算法用l2正则化 iter_num,alpha=100000,0.001 cost_val=[] cost_w=[] for lambd in lambda_lst: w,cost_lst=grandient_reg(X_train,w,y_train,iter_num,alpha,lambd) cost_w.append(w) cost_val.append(cost_reg(X_val,w,y_val,lambd))
cost_val
[0.36356132605416125, 0.36356157522133403, 0.3635621981384864, 0.36356244730503007, 0.36356493896065706, 0.3635711680214138, 0.36357365961439897, 0.3635985745598491, 0.3636608540941533, 0.36368576277656284, 0.36393475122711266, 0.36480480418120226]
# 4 查找具有最小验证误差的索引,从而求解出最优的lambda值 idex=np.argmin(np.array(cost_val)) print("具有最小验证误差的索引为{}".format(idex)) lamba_best=lambda_lst[idex] lamba_best
具有最小验证误差的索引为0 0.001
# 5 计算最好的lambda对应的测试结果 w_best=cost_w[idex] print(w_best) y_p_true=np.array([1 if item>0.5 else 0 for item in h(X_test,w_best).ravel()]) y_p_true np.sum(y_p_true==y_test)/X_test.shape[0]
[[-4.7241201 ] [ 0.05042639] [ 0.0332232 ]] 0.8