# k近邻算法.zip

• PUDN用户
了解作者
• Python
开发工具
• 15KB
文件大小
• zip
文件格式
• 0
收藏次数
• 10 积分
下载积分
• 1
下载次数
• 2019-03-31 19:38
上传日期
k近邻算法(knn)算法模拟 K-nearest neighbor algorithm (knn) simulation
k近邻算法.zip
• k近邻算法
• knn.csv
25.8KB
• knn.txt
1.7KB
• k近邻算法(knn).py
1.8KB
• k近邻算法.ipynb
6.3KB

import numpy as np import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns from sklearn import preprocessing from scipy.spatial import distance shujv=pd.read_csv('knn.csv') suhjv=shujv.dropna() shujv.head() features=['mianji','xxx','geshu']##特征变量 for x in features: means=np.mean(shujv[x]) stds=np.std(shujv[x]) print(means,stds) shujv[x]=(shujv[x]-means)/stds ##数据的标准化处理 #shujv[features]=preprocessing.StandardScaler().fit_transform(shujv[features])##采用StandardScaler集成函数的标准化处理 fenge=int(np.floor(np.shape(shujv)[0]*0.7))##取0.7的数据作为训练集train_df，取0.3的数据作为测试集test_df train_df=shujv.copy().iloc[:fenge] test_df=shujv.copy().iloc[fenge:] def predict(new_listing_value,feature_column): temp_df=train_df temp_df['distance']=distance.cdist(temp_df[feature_column],[new_listing_value[feature_column]])##采用distance进行距离的计算 temp_df=temp_df.sort_values('distance') predicted_price=temp_df.danjia.iloc[:5].mean()##查找前五近邻，并计算其单价均值 return(predicted_price) test_df['predicted_price']=test_df[features].apply(predict,feature_column=features,axis=1)##对训练集进行测试 test_df['squared_error']=(test_df['predicted_price']-test_df['danjia'])**(2) rmse=(test_df['squared_error'].mean())**(1/2) print(rmse)##计算均方根误差 ## 对样本进行预测时：样本值[33600 1 2]预测其单价 a=[33600,1,2] a[0]=(a[0]-33635.421)/21946.0255835 a[1]=(a[1]-0.832073)/0.49699 a[2]=(a[2]-1.985)/0.81778664699 temp_df=train_df.copy() temp_df['distance']=distance.cdist(temp_df[features],[a])##采用distance进行距离的计算 temp_df=temp_df.sort_values('distance') temp_df.head()

相关推荐