kmeans_cluster聚类算法的实现.rar

  • javale
    了解作者
  • Visual C++
    开发工具
  • 3KB
    文件大小
  • rar
    文件格式
  • 0
    收藏次数
  • 10 积分
    下载积分
  • 170
    下载次数
  • 2005-03-18 18:51
    上传日期
数据挖掘中k-means聚类算法的实现源代码
kmeans_cluster聚类算法的实现.rar
  • KMEANS.CPP
    8.9KB
  • www.pudn.com.txt
    218B
内容介绍
/**************************************************************************** * * * KMEANS * * * *****************************************************************************/ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <conio.h> #include <math.h> // FUNCTION PROTOTYPES // DEFINES #define SUCCESS 1 #define FAILURE 0 #define TRUE 1 #define FALSE 0 #define MAXVECTDIM 20 #define MAXPATTERN 20 #define MAXCLUSTER 10 char *f2a(double x, int width){ char cbuf[255]; char *cp; int i,k; int d,s; cp=fcvt(x,width,&d,&s); if (s) { strcpy(cbuf,"-"); } else { strcpy(cbuf," "); } /* endif */ if (d>0) { for (i=0; i<d; i++) { cbuf[i+1]=cp[i]; } /* endfor */ cbuf[d+1]=0; cp+=d; strcat(cbuf,"."); strcat(cbuf,cp); } else { if (d==0) { strcat(cbuf,"."); strcat(cbuf,cp); } else { k=-d; strcat(cbuf,"."); for (i=0; i<k; i++) { strcat(cbuf,"0"); } /* endfor */ strcat(cbuf,cp); } /* endif */ } /* endif */ cp=&cbuf[0]; return cp; } // ***** Defined structures & classes ***** struct aCluster { double Center[MAXVECTDIM]; int Member[MAXPATTERN]; //Index of Vectors belonging to this cluster int NumMembers; }; struct aVector { double Center[MAXVECTDIM]; int Size; }; class System { private: double Pattern[MAXPATTERN][MAXVECTDIM+1]; aCluster Cluster[MAXCLUSTER]; int NumPatterns; // Number of patterns int SizeVector; // Number of dimensions in vector int NumClusters; // Number of clusters void DistributeSamples(); // Step 2 of K-means algorithm int CalcNewClustCenters();// Step 3 of K-means algorithm double EucNorm(int, int); // Calc Euclidean norm vector int FindClosestCluster(int); //ret indx of clust closest to pattern //whose index is arg public: system(); int LoadPatterns(char *fname); // Get pattern data to be clustered void InitClusters(); // Step 1 of K-means algorithm void RunKMeans(); // Overall control K-means process void ShowClusters(); // Show results on screen void SaveClusters(char *fname); // Save results to file void ShowCenters(); }; void System::ShowCenters(){ int i,j; printf("Cluster centers:\n"); for (i=0; i<NumClusters; i++) { Cluster[i].Member[0]=i; printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]); } /* endfor */ printf("\n"); } int System::LoadPatterns(char *fname){ FILE *InFilePtr; int i,j; double x; if((InFilePtr = fopen(fname, "r")) == NULL) return FAILURE; fscanf(InFilePtr, "%d", &NumPatterns); // Read # of patterns fscanf(InFilePtr, "%d", &SizeVector); // Read dimension of vector fscanf(InFilePtr, "%d", &NumClusters); // Read # of clusters for K-Means for (i=0; i<NumPatterns; i++) { // For each vector for (j=0; j<SizeVector; j++) { // create a pattern fscanf(InFilePtr,"%lg",&x); // consisting of all elements Pattern[i][j]=x; } /* endfor */ } /* endfor */ printf("Input patterns:\n"); for (i=0; i<NumPatterns; i++) { printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]); } /* endfor */ printf("\n--------------------\n"); return SUCCESS; } //*************************************************************************** // InitClusters * // Arbitrarily assign a vector to each of the K clusters * // We choose the first K vectors to do this * //*************************************************************************** void System::InitClusters(){ int i,j; printf("Initial cluster centers:\n"); for (i=0; i<NumClusters; i++) { Cluster[i].Member[0]=i; for (j=0; j<SizeVector; j++) { Cluster[i].Center[j]=Pattern[i][j]; } /* endfor */ } /* endfor */ for (i=0; i<NumClusters; i++) { printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]); } /* endfor */ printf("\n"); } void System::RunKMeans(){ int converged; int pass; pass=1; converged=FALSE; while (converged==FALSE) { printf("PASS=%d\n",pass++); DistributeSamples(); converged=CalcNewClustCenters(); ShowCenters(); } /* endwhile */ } double System::EucNorm(int p, int c){ // Calc Euclidean norm of vector difference double dist,x; // between pattern vector, p, and cluster int i; // center, c. char zout[128]; char znum[40]; char *pnum; pnum=&znum[0]; strcpy(zout,"d=sqrt("); printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p); dist=0; for (i=0; i<SizeVector ;i++){ x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]); strcat(zout,f2a(x,4)); if (i==0) strcat(zout,"+"); dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]); } /* endfor */ printf("%s)\n",zout); return dist; } int System::FindClosestCluster(int pat){ int i, ClustID; double MinDist, d; MinDist =9.9e+99; ClustID=-1; for (i=0; i<NumClusters; i++) { d=EucNorm(pat,i); printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d)); if (d<MinDist) { MinDist=d; ClustID=i; } /* endif */ } /* endfor */ if (ClustID<0) { printf("Aaargh"); exit(0); } /* endif */ return ClustID; } void System::DistributeSamples(){ int i,pat,Clustid,MemberIndex; //Clear membership list for all current clusters for (i=0; i<NumClusters;i++){ Cluster[i].NumMembers=0; } for (pat=0; pat<NumPatterns; pat++) { //Find cluster center to which the pattern is closest Clustid= FindClosestCluster(pat); printf("patern %d assigned to cluster %d\n\n",pat,Clustid); //post this pattern to the cluster MemberIndex=Cluster[Clustid].NumMembers; Cluster[Clustid].Member[MemberIndex]=pat; Cluster[Clustid].NumMembers++; } /* endfor */ } int System::CalcNewClustCenters(){ int ConvFlag,VectID,i,j,k; double tmp[MAXVECTDIM]; char xs[255]; char ys[255]; char nc1[20]; char nc2[20]; char *pnc1; char *pnc2; char *fpv; pnc1=&nc1[0]; pnc2=&nc2[0]; ConvFlag=TRUE; printf("The new cluster centers are now calculated as:\n"); for (i=0; i<NumClusters; i++) { //for each cluster pnc1=itoa(Cluster[i].NumMembers,nc1,10); pnc2=itoa(i,nc2,10); strcpy(xs,"Cluster Center"); strcat(xs,nc2); strcat(xs,"(1/"); strcpy(ys,"(1/"); strcat(xs,nc1); strcat(ys,nc1); strcat(xs,")("); strcat(ys,")("); for (j=0; j<SizeVector; j++) { // clear workspace tmp[j]=0.0; } /* endfor */ for (j=0; j<Cluster[i].NumMembers; j++) { //traverse member vectors VectID=Cluster[i].Member[j]; for (k=0; k<SizeVector; k++) { //traverse elements of vector tmp[k] += Pattern[VectID][k]; // add (member) pattern elmnt into temp if (k==0) { strcat(xs,f2a(Pattern[VectID][k],3)); } else { strcat(ys,f2a(Pattern[VectID][k],3)); } /* endif */ } /* endfor */
评论
    相关推荐
    • k-means 聚类.rar
      k-means聚类算法,实现了不同点的聚类,能够实现
    • k-means聚类算法
      k-means聚类算法在二维平面上的可视化实现 聚类时可以设置类数和迭代阈值 聚类结果用色彩和类圆清楚的表现出来-k-means clustering algorithm in a two-dimensional plane with the Visualization of clustering can...
    • k-means聚类算法
      K-Means聚类算法中的一种,其中K表示类别数,Means表示均值。顾名思义K-Means是一种通过均值对数据点进行聚类的算法。K-Means算法通过预先设定的K值及每个类别的初始质心对相似的数据点进行划分。并通过划分后的...
    • K-means聚类算法
      这是一个K-means聚类算法的源代码,可以对文本数据进行聚类。
    • k-means聚类算法
      k-means聚类算法进行了改进,改变了原始随机选取样本点的缺点。
    • K-means聚类算法
      使用python进行编码,k - means聚类算法,里面有数据集。
    • K-means聚类算法
      它不仅是最简单的聚类算法,也是最普及且最常用的。k-means算法是一种基于形心的划分数据的方法。我们给定一个数据集DD,以及要划分的簇数kk,就能通过该算法将数据集划分为kk个簇。一般来说,每个数据项只能属于...
    • 基于遗传算法k-means聚类
      K-means算法是很典型的基于距离的聚类算法,采用距离作为相似性的评价指标,即认为两个对象的距离越近,其相似度就越大。该算法认为簇是由距离靠近的对象组成的,因此把得到紧凑且独立的簇作为最终目标。
    • k-means聚类算法
      Java编写的k-means聚类算法,从文件读取数据,可视化界面。
    • k-means聚类算法
      k-mean聚类算法,其中还包括一些用户数据,其中函数标注的都很清楚,可以作为借鉴,值得学习,亲测可用。