# 谱聚类.zip

• 2019-07-18 09:04
上传日期

• 谱聚类.py
3KB

# -*- coding: utf-8 -*- """ Created on Thu Jul 11 20:25:05 2019 @author: Administrator """ import sys sys.path.append("..") from sklearn.cluster import KMeans import numpy as np import pandas as pd from matplotlib import pyplot as plt from itertools import cycle, islice from sklearn.preprocessing import normalize np.random.seed(1) def myKNN(S, k, sigma=1.0): N = len(S) A = np.zeros((N,N)) for i in range(N): dist_with_index = zip(S[i], range(N)) dist_with_index = sorted(dist_with_index, key=lambda x:x[0]) neighbours_id = [dist_with_index[m][1] for m in range(k+1)] # xi's k nearest neighbours for j in neighbours_id: # xj is xi's neighbour A[i][j] = np.exp(-S[i][j]/2/sigma/sigma) A[j][i] = A[i][j] # mutually return A def calLaplacianMatrix(adjacentMatrix): # compute the Degree Matrix: D=sum(A) degreeMatrix = np.sum(adjacentMatrix, axis=1) # print degreeMatrix # compute the Laplacian Matrix: L=D-A laplacianMatrix = np.diag(degreeMatrix) - adjacentMatrix # print laplacianMatrix # normailze # D^(-1/2) L D^(-1/2) sqrtDegreeMatrix = np.diag(1.0 / (degreeMatrix ** (0.5))) return np.dot(np.dot(sqrtDegreeMatrix, laplacianMatrix), sqrtDegreeMatrix) def euclidDistance(x1, x2, sqrt_flag=False): res = np.sum((x1-x2)**2) if sqrt_flag: res = np.sqrt(res) return res def calEuclidDistanceMatrix(X): X = np.array(X) S = np.zeros((len(X), len(X))) for i in range(len(X)): for j in range(i+1, len(X)): S[i][j] = 1.0 * euclidDistance(X[i], X[j]) S[j][i] = S[i][j] return S def plot(X, y_sp, y_km): colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00']), int(max(y_km) + 1)))) plt.subplot(121) plt.scatter(X[:,0], X[:,1], s=10, color=colors[y_sp]) plt.title("Spectral Clustering") plt.subplot(122) plt.scatter(X[:,0], X[:,1], s=10, color=colors[y_km]) plt.title("Kmeans Clustering") # plt.show() plt.savefig("F:/tjxt/2019 new_research/result/now/figures/spectral_clustering.png") data=pd.read_csv('F:/tjxt/2019 new_research/result/now/city_class.csv',encoding='ANSI') del data['city'] data=data.values data= data.astype(np.float64) data= normalize(data, axis=0, norm='max')#按行归一化 Similarity = calEuclidDistanceMatrix(data) Adjacent = myKNN(Similarity, k=2) Laplacian = calLaplacianMatrix(Adjacent) x, V = np.linalg.eig(Laplacian) x = zip(x, range(len(x))) x = sorted(x, key=lambda x:x[0]) H = np.vstack([V[:,i] for (v, i) in x[:500]]).T sp_kmeans = KMeans(n_clusters=10).fit(H) pure_kmeans = KMeans(n_clusters=10).fit(data) plot(data, sp_kmeans.labels_, pure_kmeans.labels_)

