用PCA将小麦的数据降维到二维
from sklearn.decomposition import PCA from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt import numpy import pandas as pd
fp=open('seeds_dataset.txt','r') ls=[] for line in fp: line=line.strip('\n') #将\n去掉 ls.append(line.split('\t')) #将空格作为分隔符将一个字符切割成一个字符数组 fp.close() ls=numpy.array(ls,dtype=float) #将其转换成numpy的数组,并定义数据类型为float print(ls)
# 将文件转为DataFrame表格 ls_pd = pd.DataFrame(ls)
ls_pd.loc[:,1:6]
y = ls_pd.loc[:,7]
from sklearn.preprocessing import StandardScaler sc = StandardScaler() data_std = sc.fit_transform(ls_pd.loc[:,1:6])
pca = PCA(n_components = 2) #加载PCA算法,设置降维后主成分数目为2 reduced_x = pca.fit_transform(data_std) #对样本进行降维
reduced_x
red_x,red_y=[],[] blue_x,blue_y=[],[] green_x,green_y=[],[] for i in range(len(reduced_x)): if y[i] == 1: red_x.append(reduced_x[i][0]) red_y.append(reduced_x[i][1]) elif y[i] == 2: blue_x.append(reduced_x[i][0]) blue_y.append(reduced_x[i][1]) else: green_x.append(reduced_x[i][0]) green_y.append(reduced_x[i][1])
#可视化 plt.scatter(red_x,red_y,c='r',marker='x') plt.scatter(blue_x,blue_y,c='b',marker='D') plt.scatter(green_x,green_y,c='g',marker='.') plt.show()
结果如下: