机器学习第五次作业
第一题
高斯贝叶斯分类代码如下:
import numpy as np
def gaussian_bayes(x_0, x_1):
n = len(data)
c_0 = 0
c_1 = 0
mu_00 = 0
mu_01 = 0
mu_10 = 0
mu_11 = 0
var_00 = 0
var_01 = 0
var_10 = 0
var_11 = 0
var_0 = 0
var_1 = 0
for d in data:
if d[2] == 0:
mu_00 += d[0]
mu_01 += d[1]
c_0 += 1
else:
mu_10 += d[0]
mu_11 += d[1]
c_1 += 1
mu_00 /= c_0
mu_01 /= c_0
mu_10 /= c_1
mu_11 /= c_1
for d in data:
if d[2] == 0:
var_00 += (d[0] - mu_00) * (d[0] - mu_00)
var_01 += (d[1] - mu_01) * (d[1] - mu_01)
var_0 += (d[0] - mu_00) * (d[1] - mu_01)
else:
var_10 += (d[0] - mu_10) * (d[0] - mu_10)
var_11 += (d[1] - mu_11) * (d[1] - mu_11)
var_1 += (d[0] - mu_10) * (d[1] - mu_11)
# 协方差
var_0 = np.array([[var_00, var_0], [var_0, var_01]]) / c_0
var_1 = np.array([[var_10, var_1], [var_1, var_11]]) / c_1
x = [x_0, x_1]
mu_0 = np.array([[mu_00, mu_01]])
mu_1 = np.array([[mu_10, mu_11]])
p_0 = 1 / (2 * np.pi ** (2 / 2) * np.sqrt(var_00 * var_01)) * \
np.exp(-1 / 2 * np.dot(np.dot((x - mu_0), np.linalg.inv(var_0)), (x - mu_0).T))
p_1 = 1 / (2 * np.pi ** (2 / 2) * np.sqrt(var_10 * var_11)) * \
np.exp(-1 / 2 * np.dot(np.dot((x - mu_1), np.linalg.inv(var_1)), (x - mu_1).T))
print("好瓜:", p_1)
print("坏瓜:", p_0)
高斯贝叶斯分类结果如下:
好瓜: [[0.75333906]]
坏瓜: [[0.30333779]]
高斯朴素贝叶斯分类代码如下:
import numpy as np
data = [[0.697, 0.460, 1], [0.774, 0.376, 1], [0.634, 0.264, 1], [0.608, 0.318, 1], [0.556, 0.215, 1],
[0.403, 0.237, 1], [0.481, 0.149, 1], [0.437, 0.211, 1],
[0.666, 0.091, 0], [0.243, 0.267, 0], [0.245, 0.057, 0], [0.343, 0.099, 0], [0.639, 0.161, 0],
[0.657, 0.198, 0], [0.360, 0.370, 0], [0.593, 0.042, 0], [0.719, 0.103, 0]]
label_1_x1 = [i[0] for i in data if i[2]==1]
label_1_x2 = [i[1] for i in data if i[2]==1]
label_0_x1 = [i[0] for i in data if i[2]==0]
label_0_x2 = [i[1] for i in data if i[2]==0]
# 上面那组
label_0_x1_mean = np.mean(label_0_x1)
label_0_x1_std = np.std(label_0_x1)
label_0_x2_mean = np.mean(label_0_x2)
label_0_x2_std = np.std(label_0_x2)
# 下面那组
label_1_x1_mean = np.mean(label_1_x1)
label_1_x1_std = np.std(label_1_x1)
label_1_x2_mean = np.mean(label_1_x2)
label_1_x2_std = np.std(label_1_x2)
# 先算是bad瓜的概率
p1 = 1/(np.sqrt(2*np.pi)*label_0_x1_std)*np.exp((-(0.5-label_0_x1_mean)**2)/(2*(label_0_x1_std**2)))
p2 = 1/(np.sqrt(2*np.pi)*label_0_x2_std)*np.exp((-(0.3-label_0_x2_mean)**2)/(2*(label_0_x2_std**2)))
print("坏瓜:", p1*p2*(9/17))
# 再算是good瓜的概率
p1x = 1/(np.sqrt(2*np.pi)*label_1_x1_std)*np.exp((-(0.5-label_1_x1_mean)**2)/(2*label_1_x1_std**2))
p2x = 1/(np.sqrt(2*np.pi)*label_1_x2_std)*np.exp((-(0.3-label_1_x2_mean)**2)/(2*label_1_x2_std**2))
print("好瓜:", p1x*p2x*(8/17))
高斯朴素贝叶斯分类结果如下:
坏瓜: 1.6139228137336026
好瓜: 5.312634923114169
第二题
代码如下:
import numpy as np
data = [1.0, 1.3, 2.2, 2.6, 2.8, 5.0, 7.3, 7.4, 7.5, 7.7, 7.9]
# 均值 方差 pc
theta1 = [6, 1, 0.5]
theta2 = [7.5, 1, 0.5]
while True:
# E-step
# pxc: P(x|c)
# pcx: P(c|x) pcx1:
pc1x_list = []
pc2x_list = []
for d in data:
pxc1 = 1 / np.sqrt(2 * np.pi * theta1[1]) * np.exp(-(d - theta1[0]) ** 2 / (2 * theta1[1]))
pxc2 = 1 / np.sqrt(2 * np.pi * theta2[1]) * np.exp(-(d - theta2[0]) ** 2 / (2 * theta2[1]))
pc1x = theta1[2] * pxc1 / (theta1[2] * pxc1 + theta2[2] * pxc2)
pc2x = theta2[2] * pxc2 / (theta1[2] * pxc1 + theta2[2] * pxc2)
pc1x_list.append(pc1x)
pc2x_list.append(pc2x)
# M-step
mu1_temp1 = 0
mu1_temp2 = 0
mu2_temp1 = 0
mu2_temp2 = 0
sigma1_temp1 = 0
sigma1_temp2 = 0
sigma2_temp1 = 0
sigma2_temp2 = 0
pc1 = 0
pc2 = 0
for index, d in enumerate(data):
# print(index, d)
# mu
mu1_temp1 += pc1x_list[index] * d
mu1_temp2 += pc1x_list[index]
mu2_temp1 += pc2x_list[index] * d
mu2_temp2 += pc2x_list[index]
# sigma
sigma1_temp1 += pc1x_list[index] * (d - theta1[0]) ** 2
sigma1_temp2 += pc1x_list[index]
sigma2_temp1 += pc2x_list[index] * (d - theta2[0]) ** 2
sigma2_temp2 += pc2x_list[index]
# pc
pc1 += pc1x_list[index]
pc2 += pc2x_list[index]
theta = theta1[:]
# 更新均值
theta1[0] = mu1_temp1 / mu1_temp2
theta2[0] = mu2_temp1 / mu2_temp2
# 更新方差
theta1[1] = sigma1_temp1 / sigma1_temp2
theta2[1] = sigma2_temp1 / sigma2_temp2
# 更新P(C)
theta1[2] = pc1 / len(data)
theta2[2] = pc2 / len(data)
if np.abs(theta[2] - theta1[2]) < 1e-7:
break
print(theta1)
print(theta2)
结果如下:
[2.484129369953009, 1.6917479527139188, 0.5455419134418865]
[7.560020390897808, 0.046398844567893, 0.4544580865581134]
第三题
09, 1.6917479527139188, 0.5455419134418865]
[7.560020390897808, 0.046398844567893, 0.4544580865581134]
```
第三题
[外链图片转存中...(img-pG1AwdUZ-1616158608323)]
[外链图片转存中...(img-XRJ4d0UK-1616158608324)]