# 谱聚类.zip

• 桌子非
了解作者
• Python
开发工具
• 1KB
文件大小
• zip
文件格式
• 0
收藏次数
• 10 积分
下载积分
• 1
下载次数
• 2019-07-18 09:04
上传日期

• 谱聚类.py
3KB

# -*- coding: utf-8 -*- """ Created on Thu Jul 11 20:25:05 2019 @author: Administrator """ import sys sys.path.append("..") from sklearn.cluster import KMeans import numpy as np import pandas as pd from matplotlib import pyplot as plt from itertools import cycle, islice from sklearn.preprocessing import normalize np.random.seed(1) def myKNN(S, k, sigma=1.0): N = len(S) A = np.zeros((N,N)) for i in range(N): dist_with_index = zip(S[i], range(N)) dist_with_index = sorted(dist_with_index, key=lambda x:x[0]) neighbours_id = [dist_with_index[m][1] for m in range(k+1)] # xi's k nearest neighbours for j in neighbours_id: # xj is xi's neighbour A[i][j] = np.exp(-S[i][j]/2/sigma/sigma) A[j][i] = A[i][j] # mutually return A def calLaplacianMatrix(adjacentMatrix): # compute the Degree Matrix: D=sum(A) degreeMatrix = np.sum(adjacentMatrix, axis=1) # print degreeMatrix # compute the Laplacian Matrix: L=D-A laplacianMatrix = np.diag(degreeMatrix) - adjacentMatrix # print laplacianMatrix # normailze # D^(-1/2) L D^(-1/2) sqrtDegreeMatrix = np.diag(1.0 / (degreeMatrix ** (0.5))) return np.dot(np.dot(sqrtDegreeMatrix, laplacianMatrix), sqrtDegreeMatrix) def euclidDistance(x1, x2, sqrt_flag=False): res = np.sum((x1-x2)**2) if sqrt_flag: res = np.sqrt(res) return res def calEuclidDistanceMatrix(X): X = np.array(X) S = np.zeros((len(X), len(X))) for i in range(len(X)): for j in range(i+1, len(X)): S[i][j] = 1.0 * euclidDistance(X[i], X[j]) S[j][i] = S[i][j] return S def plot(X, y_sp, y_km): colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00']), int(max(y_km) + 1)))) plt.subplot(121) plt.scatter(X[:,0], X[:,1], s=10, color=colors[y_sp]) plt.title("Spectral Clustering") plt.subplot(122) plt.scatter(X[:,0], X[:,1], s=10, color=colors[y_km]) plt.title("Kmeans Clustering") # plt.show() plt.savefig("F:/tjxt/2019 new_research/result/now/figures/spectral_clustering.png") data=pd.read_csv('F:/tjxt/2019 new_research/result/now/city_class.csv',encoding='ANSI') del data['city'] data=data.values data= data.astype(np.float64) data= normalize(data, axis=0, norm='max')#按行归一化 Similarity = calEuclidDistanceMatrix(data) Adjacent = myKNN(Similarity, k=2) Laplacian = calLaplacianMatrix(Adjacent) x, V = np.linalg.eig(Laplacian) x = zip(x, range(len(x))) x = sorted(x, key=lambda x:x[0]) H = np.vstack([V[:,i] for (v, i) in x[:500]]).T sp_kmeans = KMeans(n_clusters=10).fit(H) pure_kmeans = KMeans(n_clusters=10).fit(data) plot(data, sp_kmeans.labels_, pure_kmeans.labels_)

相关推荐