Regression算法

用线性回归找到最佳拟合直线

from google.colab import drive
drive.mount("/content/drive")
Mounted at /content/drive
from numpy import *
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat, labelMat

1.numFeat = len(open(fileName).readline().split('\t')) - 1

2.dataMat = [] 和 labelMat = []

3.fr = open(fileName)

4.for line in fr.readlines():

5.lineArr = []

curLine = line.strip().split('\t')

6.for i in range(numFeat):

7.lineArr.append(float(curLine[i]))

8.dataMat.append(lineArr) 和 labelMat.append(float(curLine[-1]))

9.return dataMat, labelMat

标准回归函数

def standRegres(xArr, yArr):
xMat = mat(xArr)
yMat = mat(yArr).T
xTx = xMat.T * xMat
if linalg.det(xTx) == 0:
print("This matrix is singular, cannot do inverse")
return
ws = xTx.I * (xMat.T * yMat)
return ws

1.def standRegres(xArr, yArr):

2.xMat = mat(xArr)

3.yMat = mat(yArr).T

4.xTx = xMat.T * xMat

5.if linalg.det(xTx) == 0:

6.ws = xTx.I * (xMat.T * yMat)

7.return ws

xArr, yArr = loadDataSet('/content/drive/MyDrive/Colab Notebooks/MachineLearning/《机器学习实战》/06丨预测数值型数据：回归/用线性回归找到最佳拟合直线/ex0.txt')
xArr[:2]
[[1.0, 0.067732], [1.0, 0.42781]]
ws = standRegres(xArr, yArr)
ws
matrix([[3.00774324],
[1.69532264]])
xMat = mat(xArr)
yMat = mat(yArr)
yHat = xMat * ws
import matplotlib.pyplot as plt
fig = plt.figure()
ax.scatter(xMat[:,1].flatten().A[0], yMat.T[:,0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)
yHat = xCopy*ws
ax.plot(xCopy[:,1], yHat)
plt.show()

yHat = xMat*ws
yHat.shape
(200, 1)
yMat.shape
(1, 200)
corrcoef(yHat.T, yMat)
array([[1.        , 0.98647356],
[0.98647356, 1.        ]])

局部加权线性回归函数

def lwlr(testPoint, xArr, yArr, k=1.0):
xMat = mat(xArr)
yMat = mat(yArr).T
m = shape(xMat)[0]
weights = mat(eye(m))
for j in range(m):
diffMat = testPoint - xMat[j, :]
weights[j, j] = exp(diffMat*diffMat.T/(-2.0*k**2))
xTx = xMat.T * (weights * xMat)
if linalg.det(xTx) == 0.0:
print("This matrix is singular, cannot do inverse")
return
ws = xTx.I * (xMat.T * (weights * yMat))
return testPoint * ws

def lwlr(testPoint, xArr, yArr, k=1.0):

xMat = mat(xArr)

yMat = mat(yArr).T

m = shape(xMat)[0]

weights = mat(eye(m))

for j in range(m):

diffMat = testPoint - xMat[j, :]

weights[j, j] = exp(diffMat*diffMat.T/(-2.0*k**2))

xTx = xMat.T * (weights * xMat)

if linalg.det(xTx) == 0.0:

ws = xTx.I * (xMat.T * (weights * yMat))

return testPoint * ws

def lwlrTest(testArr, xArr, yArr, k=1.0):
m = shape(testArr)[0]
yHat = zeros(m)
for i in range(m):
yHat[i] = lwlr(testArr[i], xArr, yArr, k)
return yHat
xArr, yArr = loadDataSet('/content/drive/MyDrive/Colab Notebooks/MachineLearning/《机器学习实战》/06丨预测数值型数据：回归/用线性回归找到最佳拟合直线/ex0.txt')
yArr[0]
3.176513
lwlr(xArr[0], xArr, yArr, 1.0)
<ipython-input-27-f0eaaa458f3a>:8: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
weights[j, j] = exp(diffMat*diffMat.T/(-2.0*k**2))

matrix([[3.12204471]])
lwlr(xArr[0], xArr, yArr, 0.001)
<ipython-input-27-f0eaaa458f3a>:8: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
weights[j, j] = exp(diffMat*diffMat.T/(-2.0*k**2))

matrix([[3.20175729]])
yHat = lwlrTest(xArr, xArr, yArr, 0.01)
<ipython-input-27-f0eaaa458f3a>:8: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
weights[j, j] = exp(diffMat*diffMat.T/(-2.0*k**2))
<ipython-input-28-3481d8d2a021>:5: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
yHat[i] = lwlr(testArr[i], xArr, yArr, k)
xMat = mat(xArr)
srtInd = xMat[:,1].argsort(0)
xSort = xMat[srtInd][:,0,:]
import matplotlib.pyplot as plt
fig = plt.figure()
ax.plot(xSort[:,1], yHat[srtInd])
ax.scatter(xMat[:,1].flatten().A[0], mat(yArr).T.flatten().A[0], s=2, c='red')
plt.show()

