7.2 绘制高高线表示预测结果
def plot_predictions(clf, axes): x0s = np.linspace(axes[0], axes[1], 100) x1s = np.linspace(axes[2], axes[3], 100) x0, x1 = np.meshgrid(x0s, x1s) X = np.c_[x0.ravel(), x1.ravel()] y_pred = clf.predict(X).reshape(x0.shape) y_decision = clf.decision_function(X).reshape(x0.shape) plt.contourf(x0, x1, y_pred, cmap=plt.cm.brg, alpha=0.2) plt.contourf(x0, x1, y_decision, cmap=plt.cm.brg, alpha=0.1)
7.3 绘制原始数据
def plot_dataset(X, y, axes): plt.plot(X[:, 0][y==0], X[:, 1][y==0], "bs") plt.plot(X[:, 0][y==1], X[:, 1][y==1], "g^") plt.axis(axes) plt.grid(True, which='both') plt.xlabel(r"$x_1$", fontsize=20) plt.ylabel(r"$x_2$", fontsize=20, rotation=0)
7.4 绘制不同gamma和C对应的
from sklearn.svm import SVC X, y = make_moons(n_samples=100, noise=0.15, random_state=42) gamma1, gamma2 = 0.1, 5 C1, C2 = 0.001, 1000 hyperparams = (gamma1, C1), (gamma1, C2), (gamma2, C1), (gamma2, C2) svm_clfs = [] for gamma, C in hyperparams: rbf_kernel_svm_clf = Pipeline([("scaler", StandardScaler()), ("svm_clf",SVC(kernel="rbf", gamma=gamma, C=C))]) rbf_kernel_svm_clf.fit(X, y) svm_clfs.append(rbf_kernel_svm_clf) plt.figure(figsize=(6,4)) for i, svm_clf in enumerate(svm_clfs): plt.subplot(221 + i) plot_predictions(svm_clf, [-1.5, 2.5, -1, 1.5]) plot_dataset(X, y, [-1.5, 2.5, -1, 1.5]) gamma, C = hyperparams[i] plt.title(r"$\gamma = {}, C = {}$".format(gamma, C), fontsize=12) plt.show()
实例8 手写SVM
8.1 创建数据
import numpy as np import pandas as pd from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt %matplotlib inline
# data def create_data(): iris = load_iris() df = pd.DataFrame(iris.data, columns=iris.feature_names) df['label'] = iris.target df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label'] data = np.array(df.iloc[:100, [0, 1, -1]]) for i in range(len(data)): if data[i,-1] == 0: data[i,-1] = -1 return data[:,:2], data[:,-1]
X, y = create_data() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
plt.scatter(X[:50,0],X[:50,1], label='0') plt.scatter(X[50:,0],X[50:,1], label='1') plt.legend()
matplotlib.legend.Legend at 0x1c516838670>
8.2 定义支持向量机
class SVM: def __init__(self, max_iter=100, kernel='linear'): self.max_iter = max_iter self._kernel = kernel def init_args(self, features, labels): self.m, self.n = features.shape self.X = features self.Y = labels self.b = 0.0 # 将Ei保存在一个列表里 self.alpha = np.ones(self.m) self.E = [self._E(i) for i in range(self.m)] # 松弛变量 self.C = 1.0 def _KKT(self, i): y_g = self._g(i) * self.Y[i] if self.alpha[i] == 0: return y_g >= 1 elif 0 < self.alpha[i] < self.C: return y_g == 1 else: return y_g <= 1 # g(x)预测值,输入xi(X[i]) def _g(self, i): r = self.b for j in range(self.m): r += self.alpha[j] * self.Y[j] * self.kernel(self.X[i], self.X[j]) return r # 核函数 def kernel(self, x1, x2): if self._kernel == 'linear': return sum([x1[k] * x2[k] for k in range(self.n)]) elif self._kernel == 'poly': return (sum([x1[k] * x2[k] for k in range(self.n)]) + 1)**2 return 0 # E(x)为g(x)对输入x的预测值和y的差 def _E(self, i): return self._g(i) - self.Y[i] def _init_alpha(self): # 外层循环首先遍历所有满足0<a<C的样本点,检验是否满足KKT index_list = [i for i in range(self.m) if 0 < self.alpha[i] < self.C] # 否则遍历整个训练集 non_satisfy_list = [i for i in range(self.m) if i not in index_list] index_list.extend(non_satisfy_list) for i in index_list: if self._KKT(i): continue E1 = self.E[i] # 如果E2是+,选择最小的;如果E2是负的,选择最大的 if E1 >= 0: j = min(range(self.m), key=lambda x: self.E[x]) else: j = max(range(self.m), key=lambda x: self.E[x]) return i, j def _compare(self, _alpha, L, H): if _alpha > H: return H elif _alpha < L: return L else: return _alpha def fit(self, features, labels): self.init_args(features, labels) for t in range(self.max_iter): # train i1, i2 = self._init_alpha() # 边界 if self.Y[i1] == self.Y[i2]: L = max(0, self.alpha[i1] + self.alpha[i2] - self.C) H = min(self.C, self.alpha[i1] + self.alpha[i2]) else: L = max(0, self.alpha[i2] - self.alpha[i1]) H = min(self.C, self.C + self.alpha[i2] - self.alpha[i1]) E1 = self.E[i1] E2 = self.E[i2] # eta=K11+K22-2K12 eta = self.kernel(self.X[i1], self.X[i1]) + self.kernel( self.X[i2], self.X[i2]) - 2 * self.kernel(self.X[i1], self.X[i2]) if eta <= 0: # print('eta <= 0') continue alpha2_new_unc = self.alpha[i2] + self.Y[i2] * ( E1 - E2) / eta #此处有修改,根据书上应该是E1 - E2,书上130-131页 alpha2_new = self._compare(alpha2_new_unc, L, H) alpha1_new = self.alpha[i1] + self.Y[i1] * self.Y[i2] * ( self.alpha[i2] - alpha2_new) b1_new = -E1 - self.Y[i1] * self.kernel(self.X[i1], self.X[i1]) * ( alpha1_new - self.alpha[i1]) - self.Y[i2] * self.kernel( self.X[i2], self.X[i1]) * (alpha2_new - self.alpha[i2]) + self.b b2_new = -E2 - self.Y[i1] * self.kernel(self.X[i1], self.X[i2]) * ( alpha1_new - self.alpha[i1]) - self.Y[i2] * self.kernel( self.X[i2], self.X[i2]) * (alpha2_new - self.alpha[i2]) + self.b if 0 < alpha1_new < self.C: b_new = b1_new elif 0 < alpha2_new < self.C: b_new = b2_new else: # 选择中点 b_new = (b1_new + b2_new) / 2 # 更新参数 self.alpha[i1] = alpha1_new self.alpha[i2] = alpha2_new self.b = b_new self.E[i1] = self._E(i1) self.E[i2] = self._E(i2) return 'train done!' def predict(self, data): r = self.b for i in range(self.m): r += self.alpha[i] * self.Y[i] * self.kernel(data, self.X[i]) return 1 if r > 0 else -1 def score(self, X_test, y_test): right_count = 0 for i in range(len(X_test)): result = self.predict(X_test[i]) if result == y_test[i]: right_count += 1 return right_count / len(X_test) def _weight(self): # linear model yx = self.Y.reshape(-1, 1) * self.X self.w = np.dot(yx.T, self.alpha) return self.w
8.3 初始化支持向量机并拟合
svm = SVM(max_iter=100) svm.fit(X_train, y_train)
'train done!'
8.4 支持向量机得到分数
svm.score(X_test, y_test)
0.72
实验:采用以下数据作为数据集,分别基于线性和核支持向量机进行分类,对于线性核绘制决策边界
1. 获取数据
from sklearn.svm import SVC from sklearn import datasets import matplotlib as mpl import matplotlib.pyplot as plt mpl.rc('axes', labelsize=14) mpl.rc('xtick', labelsize=12) mpl.rc('ytick', labelsize=12) iris = datasets.load_iris() X = iris["data"][:, (2, 3)] # petal length, petal width y = iris["target"] X,y
(array([[1.4, 0.2], [1.4, 0.2], [1.3, 0.2], [1.5, 0.2], [1.4, 0.2], [1.7, 0.4], [1.4, 0.3], [1.5, 0.2], [1.4, 0.2], [1.5, 0.1], [1.5, 0.2], [1.6, 0.2], [1.4, 0.1], [1.1, 0.1], [1.2, 0.2], [1.5, 0.4], [1.3, 0.4], [1.4, 0.3], [1.7, 0.3], [1.5, 0.3], [1.7, 0.2], [1.5, 0.4], [1. , 0.2], [1.7, 0.5], [1.9, 0.2], [1.6, 0.2], [1.6, 0.4], [1.5, 0.2], [1.4, 0.2], [1.6, 0.2], [1.6, 0.2], [1.5, 0.4], [1.5, 0.1], [1.4, 0.2], [1.5, 0.2], [1.2, 0.2], [1.3, 0.2], [1.4, 0.1], [1.3, 0.2], [1.5, 0.2], [1.3, 0.3], [1.3, 0.3], [1.3, 0.2], [1.6, 0.6], [1.9, 0.4], [1.4, 0.3], [1.6, 0.2], [1.4, 0.2], [1.5, 0.2], [1.4, 0.2], [4.7, 1.4], [4.5, 1.5], [4.9, 1.5], [4. , 1.3], [4.6, 1.5], [4.5, 1.3], [4.7, 1.6], [3.3, 1. ], [4.6, 1.3], [3.9, 1.4], [3.5, 1. ], [4.2, 1.5], [4. , 1. ], [4.7, 1.4], [3.6, 1.3], [4.4, 1.4], [4.5, 1.5], [4.1, 1. ], [4.5, 1.5], [3.9, 1.1], [4.8, 1.8], [4. , 1.3], [4.9, 1.5], [4.7, 1.2], [4.3, 1.3], [4.4, 1.4], [4.8, 1.4], [5. , 1.7], [4.5, 1.5], [3.5, 1. ], [3.8, 1.1], [3.7, 1. ], [3.9, 1.2], [5.1, 1.6], [4.5, 1.5], [4.5, 1.6], [4.7, 1.5], [4.4, 1.3], [4.1, 1.3], [4. , 1.3], [4.4, 1.2], [4.6, 1.4], [4. , 1.2], [3.3, 1. ], [4.2, 1.3], [4.2, 1.2], [4.2, 1.3], [4.3, 1.3], [3. , 1.1], [4.1, 1.3], [6. , 2.5], [5.1, 1.9], [5.9, 2.1], [5.6, 1.8], [5.8, 2.2], [6.6, 2.1], [4.5, 1.7], [6.3, 1.8], [5.8, 1.8], [6.1, 2.5], [5.1, 2. ], [5.3, 1.9], [5.5, 2.1], [5. , 2. ], [5.1, 2.4], [5.3, 2.3], [5.5, 1.8], [6.7, 2.2], [6.9, 2.3], [5. , 1.5], [5.7, 2.3], [4.9, 2. ], [6.7, 2. ], [4.9, 1.8], [5.7, 2.1], [6. , 1.8], [4.8, 1.8], [4.9, 1.8], [5.6, 2.1], [5.8, 1.6], [6.1, 1.9], [6.4, 2. ], [5.6, 2.2], [5.1, 1.5], [5.6, 1.4], [6.1, 2.3], [5.6, 2.4], [5.5, 1.8], [4.8, 1.8], [5.4, 2.1], [5.6, 2.4], [5.1, 2.3], [5.1, 1.9], [5.9, 2.3], [5.7, 2.5], [5.2, 2.3], [5. , 1.9], [5.2, 2. ], [5.4, 2.3], [5.1, 1.8]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))
X_train=X[(y==1) | (y==2)] y_train=y[(y==1) | (y==2)]
y_train
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
2. 可视化数据
plt.scatter(X_train[:50,0],X_train[:50,1],marker='x',label='Positive') plt.scatter(X_train[50:,0],X_train[50:,1],marker='o',label='Negative') plt.legend()
<matplotlib.legend.Legend at 0x1c515115610>
3. 试试采用线性支持向量机来拟合
from sklearn.svm import SVC svm_clf = SVC(kernel="linear", C=10,max_iter=1000) svm_clf.fit(X_train,y_train)
SVC(C=10, kernel='linear', max_iter=1000)
svm_clf.score(X_train,y_train)
0.95
4. 试试采用核支持向量机
import sklearn.svm as svm nl_svc=svm.SVC(C=1,gamma=1,probability=True) nl_svc.fit(X_train,y_train) nl_svc.score(X_train,y_train)
0.95
5. 绘制线性支持向量机的决策边界
def plot_svc_decision_boundary(svm_clf, xmin, xmax): w = svm_clf.coef_[0] b = svm_clf.intercept_[0] # At the decision boundary, w0*x0 + w1*x1 + b = 0 # => x1 = -w0/w1 * x0 - b/w1 x0 = np.linspace(xmin, xmax, 200) decision_boundary = -w[0]/w[1] * x0 - b/w[1] # margin = 1/np.sqrt(w[1]**2+w[0]**2) margin = 1/0.9 margin = 1/w[1] gutter_up = decision_boundary + margin gutter_down = decision_boundary - margin svs = svm_clf.support_vectors_ plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA') plt.plot(x0, decision_boundary, "k-", linewidth=2) plt.plot(x0, gutter_up, "k--", linewidth=2) plt.plot(x0, gutter_down, "k--", linewidth=2)
np.min(X_train[:,0]),np.max(X_train[:,0])
(3.0, 6.9)
plt.figure(figsize=(6,4)) plot_svc_decision_boundary(svm_clf,3,7) plt.plot(X[:, 0][y == 1], X[:, 1][y == 1], "bs") plt.plot(X[:, 0][y == 2], X[:, 1][y == 2], "yo") plt.xlabel("Petal length", fontsize=14) plt.axis([3,7,0,2]) plt.show()
6. 绘制非线性决策边界
def plot_predictions(clf, axes): x0s = np.linspace(axes[0], axes[1], 100) x1s = np.linspace(axes[2], axes[3], 100) x0, x1 = np.meshgrid(x0s, x1s) X = np.c_[x0.ravel(), x1.ravel()] y_pred = clf.predict(X).reshape(x0.shape) y_decision = clf.decision_function(X).reshape(x0.shape) plt.contourf(x0, x1, y_pred, cmap=plt.cm.brg, alpha=0.2) plt.contourf(x0, x1, y_decision, cmap=plt.cm.brg, alpha=0.1)
def plot_dataset(X, y, axes): plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs") plt.plot(X[:, 0][y==2], X[:, 1][y==2], "g^") plt.axis(axes) plt.grid(True, which='both') plt.xlabel(r"$x_1$", fontsize=20) plt.ylabel(r"$x_2$", fontsize=20, rotation=0)
np.min(X_train[:,0]),np.max(X_train[:,0]),
(3.0, 6.9)
np.min(X_train[:,1]),np.max(X_train[:,1])
(1.0, 2.5)
plot_predictions(nl_svc, [2.5,7,1,3]) plot_dataset(X, y, [2.5,7,1,3])