设计思路
更新……
输出结果
['"alcohol"', '"volatile acidity"', '"sulphates"', '"total sulfur dioxide"', '"chlorides"', '"fixed acidity"', '"pH"', '"free sulfur dioxide"', '"citric acid"', '"residual sugar"', '"density"']
1、LARS
2、10-fold cross validation
Minimum Mean Square Error 0.5873018933136459
Index of Minimum Mean Square Error 311
实现代码
#initialize a vector of coefficients beta
beta = [0.0] * ncols
#initialize matrix of betas at each step
betaMat = []
betaMat.append(list(beta))
#number of steps to take
nSteps = 350
stepSize = 0.004
nzList = []
for i in range(nSteps):
#calculate residuals
residuals = [0.0] * nrows
for j in range(nrows):
labelsHat = sum([xNormalized[j][k] * beta[k] for k in range(ncols)])
residuals[j] = labelNormalized[j] - labelsHat
#calculate correlation between attribute columns from normalized wine and residual
corr = [0.0] * ncols
for j in range(ncols):
corr[j] = sum([xNormalized[k][j] * residuals[k] for k in range(nrows)]) / nrows
iStar = 0
corrStar = corr[0]
for j in range(1, (ncols)):
if abs(corrStar) < abs(corr[j]):
iStar = j; corrStar = corr[j]
beta[iStar] += stepSize * corrStar / abs(corrStar)
betaMat.append(list(beta))
nzBeta = [index for index in range(ncols) if beta[index] != 0.0]
for q in nzBeta:
if (q in nzList) == False:
nzList.append(q)
nameList = [names[nzList[i]] for i in range(len(nzList))]
print(nameList)
for i in range(ncols):
#plot range of beta values for each attribute
coefCurve = [betaMat[k][i] for k in range(nSteps)]
xaxis = range(nSteps)
plot.plot(xaxis, coefCurve)