谱聚类.zip

  • 桌子非
    了解作者
  • Python
    开发工具
  • 1KB
    文件大小
  • zip
    文件格式
  • 0
    收藏次数
  • 10 积分
    下载积分
  • 1
    下载次数
  • 2019-07-18 09:04
    上传日期
谱聚类的目的便是要找到一种合理的分割图的方法,使得分割后形成若干个子图,连接不同子图的边的权重(相似度)尽可能低,同子图内的边的权重(相似度)尽可能高
谱聚类.zip
  • 谱聚类.py
    3KB
内容介绍
# -*- coding: utf-8 -*- """ Created on Thu Jul 11 20:25:05 2019 @author: Administrator """ import sys sys.path.append("..") from sklearn.cluster import KMeans import numpy as np import pandas as pd from matplotlib import pyplot as plt from itertools import cycle, islice from sklearn.preprocessing import normalize np.random.seed(1) def myKNN(S, k, sigma=1.0): N = len(S) A = np.zeros((N,N)) for i in range(N): dist_with_index = zip(S[i], range(N)) dist_with_index = sorted(dist_with_index, key=lambda x:x[0]) neighbours_id = [dist_with_index[m][1] for m in range(k+1)] # xi's k nearest neighbours for j in neighbours_id: # xj is xi's neighbour A[i][j] = np.exp(-S[i][j]/2/sigma/sigma) A[j][i] = A[i][j] # mutually return A def calLaplacianMatrix(adjacentMatrix): # compute the Degree Matrix: D=sum(A) degreeMatrix = np.sum(adjacentMatrix, axis=1) # print degreeMatrix # compute the Laplacian Matrix: L=D-A laplacianMatrix = np.diag(degreeMatrix) - adjacentMatrix # print laplacianMatrix # normailze # D^(-1/2) L D^(-1/2) sqrtDegreeMatrix = np.diag(1.0 / (degreeMatrix ** (0.5))) return np.dot(np.dot(sqrtDegreeMatrix, laplacianMatrix), sqrtDegreeMatrix) def euclidDistance(x1, x2, sqrt_flag=False): res = np.sum((x1-x2)**2) if sqrt_flag: res = np.sqrt(res) return res def calEuclidDistanceMatrix(X): X = np.array(X) S = np.zeros((len(X), len(X))) for i in range(len(X)): for j in range(i+1, len(X)): S[i][j] = 1.0 * euclidDistance(X[i], X[j]) S[j][i] = S[i][j] return S def plot(X, y_sp, y_km): colors = np.array(list(islice(cycle(['#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00']), int(max(y_km) + 1)))) plt.subplot(121) plt.scatter(X[:,0], X[:,1], s=10, color=colors[y_sp]) plt.title("Spectral Clustering") plt.subplot(122) plt.scatter(X[:,0], X[:,1], s=10, color=colors[y_km]) plt.title("Kmeans Clustering") # plt.show() plt.savefig("F:/tjxt/2019 new_research/result/now/figures/spectral_clustering.png") data=pd.read_csv('F:/tjxt/2019 new_research/result/now/city_class.csv',encoding='ANSI') del data['city'] data=data.values data= data.astype(np.float64) data= normalize(data, axis=0, norm='max')#按行归一化 Similarity = calEuclidDistanceMatrix(data) Adjacent = myKNN(Similarity, k=2) Laplacian = calLaplacianMatrix(Adjacent) x, V = np.linalg.eig(Laplacian) x = zip(x, range(len(x))) x = sorted(x, key=lambda x:x[0]) H = np.vstack([V[:,i] for (v, i) in x[:500]]).T sp_kmeans = KMeans(n_clusters=10).fit(H) pure_kmeans = KMeans(n_clusters=10).fit(data) plot(data, sp_kmeans.labels_, pure_kmeans.labels_)
评论
    相关推荐