5、分析计算过程(以啤酒消费为例子)
def plot_vectors(vectors=[(0, 0, 1, 2),(0, 0, 3, 1)], texts=[r'$\vec{a}$', r'$\vec{b}$', 'power vec', 'power vec2'], texts_locs=[(0.5, 0.7), (0.2, 0.8), (0.7, 0.5), (0.5, 0.5)], text_size=18, colors=["#2EBCE7","#00E64E", "purple", 'orange'], xlim=(-1, 3), ylim=(-1,3), x_label='x', y_label='y', scale=1, scale_units='xy'): fig, ax =plt.subplots(figsize=(6,6)) ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) for i, vec in enumerate(vectors): ax.quiver(*vec, color=colors[i], angles='xy', scale_units=scale_units, scale=scale) plt.text(*texts_locs[i], texts[i], color=colors[i], size=text_size) # draw axes plt.axhline(0, c='#d6d6d6', zorder=0) plt.axvline(0, c='#d6d6d6', zorder=0) plt.xlim(*xlim) plt.ylim(*ylim) plt.xlabel(x_label) plt.ylabel(y_label) plt.show() return ax
啤酒消费的数据:
【1】‘Temperatura Media ©’:平均温
【2】‘Temperatura Minima ©’:最低温
【3】‘Temperatura Maxima ©’:最高温
【4】‘Precipitacao (mm)’:降雨量
【5】‘Final de Semana’:是否周末
【6】‘Consumo de cerveja (litros)’:相当于标签
def plot_beer(x, y, x_label="Maximal temperature ($\degree$C)", y_label="Consuption (liters)", alpha=0.3, draw_axes=False): plt.scatter(x, y, alpha=alpha) plt.xlabel(x_label) plt.ylabel(y_label) # Assure that ticks are displayed with a specific step ax = plt.gca() ax.xaxis.set_major_locator(ticker.MultipleLocator(5)) ax.yaxis.set_major_locator(ticker.MultipleLocator(5)) if draw_axes: # draw axes plt.axhline(0, c='#d6d6d6', zorder=0) plt.axvline(0, c='#d6d6d6', zorder=0) # assure x and y axis have the same scale plt.axis('equal') plt.show()
# 1、创建新的数据集 X = np.array([df['Temperatura Maxima (C)'], df['Consumo de cerveja (litros)']]).T
# 2、计算协方差矩阵 C = np.cov(X, rowvar=False) C
# 3、计算均值,数据中心化 X_norm=X.copy() X_norm-=X.mean(axis=0)
# 4、定义计算特征,计算函数与SVD超级迭代函数 def eigenvalue(A, v): val = A @ v / v return val[0] def svd_power_iteration(A): n, d = A.shape v = np.ones(d) / np.sqrt(d) ev = eigenvalue(A, v) while True: Av = A @ v v_new = Av / np.linalg.norm(Av) ev_new = eigenvalue(A, v_new) if np.abs(ev - ev_new) < 0.01: break v = v_new ev = ev_new return ev_new, v_new
eigen_value, eigen_vec = svd_power_iteration(C)
得到的特征值:
与numpy的计算功能对比:
# 与numpy的计算功能对比 u, s, v=np.linalg.svd(C, 1)
下面的三个参数是numpy中的SVD的解释:
# 寻找主要的特征向量 def plot_eigenvectors(eigen_vecs, eigen_values, colors=["#FF8177", "orange"]): for i, eigen_vec in enumerate(eigen_vecs): plt.quiver(0, 0, 2 * np.sqrt(eigen_values[i]) * eigen_vec[0], 2 * np.sqrt(eigen_values[i]) * eigen_vec[1], color=colors[i], angles="xy", scale_units="xy", scale=1, zorder=2, width=0.011) plot_eigenvectors([eigen_vec], [eigen_value], colors=["#FF8177", "orange"]) plot_beer(X_norm[:, 0], X_norm[:, 1], draw_axes=True)
6、SVM,SVR,SVC的区别
- SVM=Support Vector Machine 是支持向量
- SVC=Support Vector Classification就是支持向量机用于分类
- SVR=Support Vector Regression.就是支持向量机用于回归分析
7、特征值与奇异值分解
#特征值分解 from scipy import linalg import numpy as np A=np.array([[1,2],[3,4]]) l,v=linalg.eig(A) print(l) print(v) ''' [-0.37228132+0.j 5.37228132+0.j] [[-0.82456484 -0.41597356] [ 0.56576746 -0.90937671]] '''
#奇异值分解 from numpy import * data=mat([[1,2,3],[4,5,6]]) U,sigma,VT=np.linalg.svd(data) print('U:',U) print('SIGMA:',sigma) print('VT:',VT) ''' U: [[-0.3863177 0.92236578] [-0.92236578 -0.3863177 ]] SIGMA: [9.508032 0.77286964] VT: [[-0.42866713 -0.56630692 -0.7039467 ] [-0.80596391 -0.11238241 0.58119908] [ 0.40824829 -0.81649658 0.40824829]] '''
#奇异值分解 2 from numpy import * data=mat([[1,2,3],[4,5,6],[7,8,9]]) U,sigma,VT=np.linalg.svd(data) print('U:',U) print('SIGMA:',sigma) print('VT:',VT) ''' U: [[-0.21483724 0.88723069 0.40824829] [-0.52058739 0.24964395 -0.81649658] [-0.82633754 -0.38794278 0.40824829]] SIGMA: [1.68481034e+01 1.06836951e+00 4.41842475e-16] VT: [[-0.47967118 -0.57236779 -0.66506441] [-0.77669099 -0.07568647 0.62531805] [-0.40824829 0.81649658 -0.40824829]] '''
#奇异值分解 3 from numpy import * data=mat([[1,2,3],[4,5,6],[7,8,9],[11,22,33]]) U,sigma,VT=np.linalg.svd(data) print('U:',U) print('SIGMA:',sigma) print('VT:',VT) ''' U: [[-0.0844147 -0.03251869 0.53753816 -0.83837308] [-0.19452796 0.40431201 0.75055526 0.48513655] [-0.30464122 0.84114271 -0.37527763 -0.24256827] [-0.92856166 -0.35770557 -0.08298325 0.05416407]] SIGMA: [4.42965582e+01 4.10060089e+00 1.49502706e-15] VT: [[-0.29819909 -0.54195984 -0.78572058] [ 0.86279235 0.19902982 -0.46473271] [ 0.40824829 -0.81649658 0.40824829]] '''
8、案例:猩猩图片处理
from PIL import Image import matplotlib.pyplot as plt import numpy as np im=np.array(Image.open('hxx00.jpg')) #图片不能太大 plt.imshow(im,cmap='Greys_r') plt.title('SRC') plt.axis('off')
得到图片的长宽像素:为后期压缩做调整
im.shape
#对图像进行SVD转换 U,sigma,VT=np.linalg.svd(im) print("前30个特征值是:\n",sigma[:30]) #对图像进行SVD转换 U,sigma,VT=np.linalg.svd(im) print("前30个特征值是:\n",sigma[:30]) #分别使用10、50个特征值重构图像 import numpy as np import matplotlib.pyplot as plt import matplotlib.image as mpimg # 读取图片 img_eg = mpimg.imread(r"hxx00.jpg") # 535,095=3*3*5*11*23*47 3*5*23=,3*11*47=517*3 print(img_eg.shape) # 奇异值分解 img_temp = img_eg.reshape(300,600*3) U, Sigma, VT = np.linalg.svd(img_temp) print(Sigma) # 奇异值分解:我们先将图片变成【400,450×3】,再做奇异值分解,并且从svd函数中得到的奇异值 sigma sigmasigma 它是从大到小排列的 # 取前10个奇异值 sval_nums = 10 img_restruct1 = (U[:,0:sval_nums]).dot(np.diag(Sigma[0:sval_nums])).dot(VT[0:sval_nums,:]) img_restruct1 = img_restruct1.reshape(600,300,3) # 取前部分奇异值重构图片 # 1、如果处理的是一维数组,则得到的是两数组的內积。 # 2、如果是二维数组(矩阵)之间的运算,则得到的是矩阵乘法(mastrix product)。 # 3、np.diag(Sigma[0:sval_nums])对角矩阵 # 取前50个奇异值 sval_nums = 50 img_restruct2 = (U[:,0:sval_nums]).dot(np.diag(Sigma[0:sval_nums])).dot(VT[0:sval_nums,:]) img_restruct2 = img_restruct2.reshape(600,300,3) # fig, ax = plt.subplots(1,3,figsize = (12,16)) ax[0].imshow(img_eg) ax[0].set(title = "src") ax[1].imshow(img_restruct1.astype(np.uint8)) ax[1].set(title = "nums of sigma = 10") ax[2].imshow(img_restruct2.astype(np.uint8)) ax[2].set(title = "nums of sigma = 50") plt.show()