spectral clustering谱聚类.zip

  • LOUISLF
    了解作者
  • Python
    开发工具
  • 597KB
    文件大小
  • zip
    文件格式
  • 0
    收藏次数
  • 1 积分
    下载积分
  • 3
    下载次数
  • 2020-07-25 21:44
    上传日期
用python实现的谱聚类,python源码,生成decision graph和聚类结果图
spectral clustering谱聚类.zip
  • fig
  • sc.png
    638.5KB
  • .idea
  • inspectionProfiles
  • profiles_settings.xml
    174B
  • .gitignore
    184B
  • workspace.xml
    1.7KB
  • misc.xml
    294B
  • modules.xml
    315B
  • spectral clustering谱聚类.iml
    408B
  • SC.py
    3.5KB
  • iris.data
    4.4KB
内容介绍
import pandas as pd import numpy as np from scipy.spatial.distance import pdist, squareform import matplotlib.pyplot as plt from sklearn.cluster import KMeans from sklearn import datasets from itertools import cycle, islice from sklearn import metrics def genTwoCircles(n_samples=1000): X, y = datasets.make_circles(n_samples, factor=0.5, noise=0.05) return X, y def distance(A, B): # 计算两点之间距离 距离计算不一样 return np.linalg.norm(A - B) def adjacentMatrix_KNN(data, k, sigma=1.0): # 利用KNN获取邻接矩阵 n = len(data) dist_matrix = squareform(pdist(data, metric='euclidean')) AdjMat = np.zeros((n, n)) for i in range(n): dist_with_index = zip(dist_matrix[i], range(n)) dist_with_index = sorted(dist_with_index, key=lambda x: x[0]) neighbours_id = [dist_with_index[m][1] for m in range(k + 1)] for j in neighbours_id: AdjMat[i, j] = np.exp(-((dist_matrix[i, j]) ** 2) / 2 / sigma / sigma) AdjMat[j, i] = AdjMat[i, j] print(AdjMat) return AdjMat # ''' # for idx,each in enumerate(dist_matrix): # index_array = np.argsort(each) # AdjMat[idx][index_array[1:k+1]] = 1 # # for i in range(n): # # for j in range(n): # # AdjMat[i] # AdjMatrix = (AdjMat + AdjMat.T)/2 # return AdjMatrix # ''' def LaplacianMatrix(adjacentMatrix): # 获取标准的拉普拉斯矩阵 # compute the Degree Matrix: D = diag(sum(邻接矩阵)) degreeMatrix = np.sum(adjacentMatrix, axis=1) # 计算拉普拉斯矩阵: L= D-A laplacianMatrix = np.diag(degreeMatrix) - adjacentMatrix # 下面是进行标准化normalize :D^(-1/2) L D^(-1/2) sqrtDegreeMatrix = np.diag(1.0 / (degreeMatrix ** (0.5))) return np.dot(np.dot(sqrtDegreeMatrix, laplacianMatrix), sqrtDegreeMatrix) def getEigVec(LaMat, n_cluster): # 获取k个最小特征值 eigVal, eigVec = np.linalg.eig(LaMat) index_eigVal = np.argsort(eigVal) O_index = index_eigVal[0:n_cluster] O_eigVec = eigVec[:,O_index] return np.real(O_eigVec) # return np.real(eigVec) def plot(X, y_sp, y_km): colors = np.array(list(islice( cycle(['#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00']), int(max(y_km) + 1)))) plt.subplot(121) plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_sp]) plt.title("Spectral Clustering") plt.subplot(122) plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y_km]) plt.title("Kmeans Clustering") plt.show() if __name__ == "__main__": # 读取.xlsx文件 data_full = pd.read_excel("iris.data") # 得到表格的列名 columns = list(data_full.columns) # 提取数据 features = columns[:len(columns) - 1] data = data_full[features] DATA = np.array(data) DATA_FULL = np.array(data_full) DATA_FINAL_COLUMN = DATA_FULL[:, len(columns) - 1] #X, y = genTwoCircles(n_samples=1000) k = 3 A = adjacentMatrix_KNN(DATA, 5, sigma=1.0) LapMat = LaplacianMatrix(A) H = getEigVec(LapMat, n_cluster=k) print('ok') y_sp = KMeans(n_clusters=k).fit_predict(H) y_km = KMeans(n_clusters=k).fit_predict(DATA) print('NMI:', metrics.normalized_mutual_info_score(y_sp, DATA_FINAL_COLUMN)) print('NMI:', metrics.normalized_mutual_info_score(y_km, DATA_FINAL_COLUMN)) plot(DATA, y_sp, y_km)
评论
    相关推荐