# ML之Kmeans：利用自定义Kmeans函数实现对多个坐标点(自定义四个点)进行自动(最多迭代10次)分类

## 核心代码

#!/usr/bin/python

# -*- coding:utf-8 -*-

import numpy as np

#ML之Kmeans：利用自定义Kmeans函数实现对多个坐标点(自定义四个点)进行自动(最多迭代10次)分类

def kmeans(X, k, maxIt):

numPoints, numDim = X.shape

dataSet = np.zeros((numPoints, numDim + 1))

dataSet[:, :-1] = X

centroids = dataSet[np.random.randint(numPoints, size = k), :]

#centroids = dataSet[0:2, :]

#Randomly assign labels to initial centorid给初始中心随机分配标签

centroids[:, -1] = range(1, k +1)

iterations = 0

oldCentroids = None

# Run the main k-means algorithm

while not shouldStop(oldCentroids, centroids, iterations, maxIt):

print ("iteration: \n", iterations)

print ("dataSet: \n", dataSet)

print ("centroids: \n", centroids)

# Save old centroids for convergence test. Book keeping.

oldCentroids = np.copy(centroids)

iterations += 1

# Assign labels to each datapoint based on centroids

updateLabels(dataSet, centroids)

# Assign centroids based on datapoint labels

centroids = getCentroids(dataSet, k)

# We can get the labels too by calling getLabels(dataSet, centroids)

return dataSet

# Function: Should Stop

# -------------

# Returns True or False if k-means is done. K-means terminates either

# because it has run a maximum number of iterations OR the centroids

# stop changing.

def shouldStop(oldCentroids, centroids, iterations, maxIt):

if iterations > maxIt:

return True

return np.array_equal(oldCentroids, centroids)

# Function: Get Labels

# -------------

# Update a label for each piece of data in the dataset.

def updateLabels(dataSet, centroids):

# For each element in the dataset, chose the closest centroid.

# Make that centroid the element's label.

numPoints, numDim = dataSet.shape

for i in range(0, numPoints):

dataSet[i, -1] = getLabelFromClosestCentroid(dataSet[i, :-1], centroids)

def getLabelFromClosestCentroid(dataSetRow, centroids):

label = centroids[0, -1];

minDist = np.linalg.norm(dataSetRow - centroids[0, :-1])

for i in range(1 , centroids.shape[0]):

dist = np.linalg.norm(dataSetRow - centroids[i, :-1])

if dist < minDist:

minDist = dist

label = centroids[i, -1]

print ("minDist:", minDist)

return label

# Function: Get Centroids

# -------------

# Returns k random centroids, each of dimension n.

def getCentroids(dataSet, k):

# Each centroid is the geometric mean of the points that

# have that centroid's label. Important: If a centroid is empty (no points have

# that centroid's label) you should randomly re-initialize it.

result = np.zeros((k, dataSet.shape[1]))

for i in range(1, k + 1):

oneCluster = dataSet[dataSet[:, -1] == i, :-1]

result[i - 1, :-1] = np.mean(oneCluster, axis = 0)

result[i - 1, -1] = i

x1 = np.array([1, 1])

x2 = np.array([2, 1])

x3 = np.array([4, 3])

x4 = np.array([5, 4])

testX = np.vstack((x1, x2, x3, x4))

result = kmeans(testX, 2, 10)

print ("final result:")

print (result)

|

1928 0
|
16天前
|
Python
python 随机划分图片数据集以及移动标注

25 0
|
2月前
|

【7月更文挑战第22天】创建一个训练函数。
21 4
|
2月前
|

GridSearchCV 是一种穷举搜索方法，它会对指定的参数网格中的每一个参数组合进行交叉验证，并返回最优的参数组合。
GridSearchCV 是一种穷举搜索方法，它会对指定的参数网格中的每一个参数组合进行交叉验证，并返回最优的参数组合。
31 0
|
4月前
|

【Python机器学习】Sklearn库中Kmeans类、超参数K值确定、特征归一化的讲解（图文解释）
【Python机器学习】Sklearn库中Kmeans类、超参数K值确定、特征归一化的讲解（图文解释）
255 0
|
10月前
|

base model初始化large model，造成的参数矩阵对不上权重不匹配问题+修改预训练权重形状和上采样
base model初始化large model，造成的参数矩阵对不上权重不匹配问题+修改预训练权重形状和上采样
172 0
|
11月前
|

这个问题苦恼我几个月，之前一直用替代方案。这次实在没替代方案了，transformers源码和文档看了一整天，终于在晚上12点找到了。。。
436 0
|

141 0
|

8种交叉验证类型的深入解释和可视化介绍
8种交叉验证类型的深入解释和可视化介绍
278 3
|

PyTorch实现随机傅里叶特征映射的示例代码

305 0