实验目的
使用Python实现K近邻
实验原理
(1)计算测试与各个训练数据之间的距离;
(2)按照距离的递增关系进行排序;
(3)选取距离最小的K个点;
(4)确定前K个点所在类别出现的概率;
(5)返回前K个点中出现概率最高的类别作为测试数据的预测分类。
(1) 鸢尾花识别。
import numpy as np import pandas as pd def getdata(path): data=pd.read_csv(path,encoding='gbk') return np.array(data) def getlength(data,target,k): temp=[] for i in data: temp.append(np.sum((i[:-1]-target)**2)) #print(temp) #float ('inf') indexList=[] for i in range(k): indexList.append(np.argmin(temp)) #temp[np.array(temp)]=('inf') temp[np.argmin(temp)] = np.max(temp) return indexList def KNNs(data,indexList): dic={0:0,1:0,2:0} for i in indexList: dic[data[i][-1]]+=1 Maxnum=0 Maxindex=0 for i in dic: if dic[i]>Maxnum: Maxnum=dic[i] Maxindex=i #print(dic) return Maxindex if __name__=='__main__': dic = {0: 'setosa', 1: 'versicolor', 2: 'virginnica'} path='iris_training.csv' data= getdata(path) #print(data) target=np.array([5,3,1,0.5]) try: while True: k = (input("请输入k(退出请输入quit):")) if k=='quit': print("退出成功") break k=int(k) #getlength(data,target,k) indexList=getlength(data,target,k) n=KNNs(data,indexList) print(dic[n]) except Exception as e: print("输入出错,请重新输入")