# 模式识别与机器学习(作业5)基于PCA–LDA的人脸识别

## 1. 导入数据

#%%导入数据
import numpy as np
import cv2
import os.path as osp
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
#训练集路径
osp.abspath(".")
#导入训练数据
train_x,train_y = [],[]
for i in os.listdir(osp.join(osp.realpath("."),"train_1")):
train_x.append(np.reshape(img,(1,-1)))
a,_ = i.split("_")
train_y.append(a)
test_x,test_y = [],[]
for i in os.listdir(osp.join(osp.realpath("."),"test")):
test_x.append(np.reshape(img,(1,-1)))
a,_ = i.split("_")
test_y.append(a)
train_x = np.array(train_x,dtype = np.float32).squeeze(1)
train_y = np.array(train_y)
test_x = np.array(test_x,dtype = np.float32).squeeze(1)
test_y = np.array(test_y)


## 2. PCA降维

class PCA:
#定义K近邻的值必须大于1
def __init__(self):
self.x_train_fit = None
self.y_train_fit = None
self.u = None
self.s = None
self.vh = None
def fit(self,x_train,y_train):
self.x_train_fit = x_train
self.y_train_fit = y_train
return self
#中心化,建立模型
def Centralization(self,X):
Centra = X - np.mean(self.x_train_fit,axis = 0)
return Centra
def model(self):
#定义一个新矩阵
X_ = (1/np.sqrt(len(self.Centralization(self.x_train_fit))))*self.Centralization(self.x_train_fit)
#进行奇异值分解
self.u, self.s, self.vh = np.linalg.svd(X_)
#降维
def transform(self,X,K):
X = self.Centralization(X)
X_dunction = (((self.vh).T[:,:K]).T).dot(X.T).T
return X_dunction
#%%
pca = PCA()
pca.fit(train_x,train_y)
pca.model()
k=80
X_test_dunction_1 = pca.transform(test_x,k)
X_train_dunction_1 = pca.transform(train_x,k)


## 3. LDA降维

LDA也可以推广到了多分类的任务当中。假定存在 N NN个类，且第i ii 类示例数为m i m_im

i

S w = ∑ i = 1 N S w i \mathbf{S}_{w}=\sum_{i=1}^{N} \mathbf{S}_{w_{i}}

S w i = ∑ x ∈ X i ( x − μ i ) ( x − μ i ) T \mathbf{S}_{w_{i}}=\sum_{\boldsymbol{x} \in X_{i}}\left(\boldsymbol{x}-\boldsymbol{\mu}_{i}\right)\left(\boldsymbol{x}-\boldsymbol{\mu}_{i}\right)^{\mathrm{T}}

S b = S t − S w = ∑ i = 1 N m i ( μ i − μ ) ( μ i − μ ) T

Sb=St−Sw=∑i=1Nmi(μi−μ)(μi−μ)T

Sb=St−Sw=∑i=1Nmi(μi−μ)(μi−μ)T

b

i

i

S b W = λ S w W \mathbf{S}_{b} \mathbf{W}=\lambda \mathbf{S}_{w} \mathbf{W}

#%%导入数据
#%%算法的判别
#需要计算肋间方差和类内方差
import collections
#求取所用示例的均值向量(k,)，k表示降维后的数据
mean_u = np.mean(X_train_dunction_1,axis = 0)
#求取类间散度矩阵,和类内散度矩阵
S_b = np.zeros((X_train_dunction_1.shape[1],X_train_dunction_1.shape[1]))
S_w = np.zeros((X_train_dunction_1.shape[1],X_train_dunction_1.shape[1]))
get_class =  collections.Counter(train_y).keys()
for classes in get_class:
x_i = np.array([x_i for x_i,i in zip(X_train_dunction_1,train_y) if i == classes])
mean_u_i = np.mean(x_i,axis = 0)
#类间散度矩阵
S_b = S_b + (mean_u_i - mean_u).reshape(-1,1).dot((mean_u_i - mean_u).reshape(1,-1))
#类内散度矩阵，西瓜书公式3.41,3.42
S_w = S_w + (x_i - mean_u_i).T.dot(x_i - mean_u_i)
#类间散度矩阵，西瓜书公式3.43
S_b = S_b * len(get_class)
#进行特征值分解
eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_w).dot(S_b))
#对特征值进行由高到低的排序
eig_vecs = np.array([eig_vecs[:,i] for i in np.argsort(-eig_vals)])
#%%求取准确度
for k in range(1,40):
X_train_dunction_2 = X_train_dunction_1.dot(eig_vecs[:,:k])
X_test_dunction_2 = X_test_dunction_1.dot(eig_vecs[:,:k])
accu = 0
for i in range(len(test_y)):
a = [np.linalg.norm(X_test_dunction_2[i,:] - X_train_dunction_2[j,:]) for j in range(len(train_y))]
min_dix = np.argmin(a)
if train_y[min_dix] == test_y[i]:
accu += 1
print("保留前{}维的准确度为{}".format(k,accu/80))


