sklearn: kmeans 发表于 2017-01-03 | 分类于 ai | 阅读次数 | 12345678910111213141516171819202122232425262728293031323334353637383940# -*- coding: utf-8 -*-from sklearn.cluster import KMeansfrom sklearn.externals import joblibfinal = open('c:/test/final.dat' , 'r')data = [line.strip().split('\t') for line in final]feature = [[float(x) for x in row[3:]] for row in data]#调用kmeans类clf = KMeans(n_clusters=9)s = clf.fit(feature)print s#9个中心print clf.cluster_centers_#每个样本所属的簇print clf.labels_#用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数print clf.inertia_#进行预测print clf.predict(feature)#保存模型joblib.dump(clf , 'c:/km.pkl')#载入保存的模型clf = joblib.load('c:/km.pkl')'''#用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数for i in range(5,30,1): clf = KMeans(n_clusters=i) s = clf.fit(feature) print i , clf.inertia_''' 来源:http://blog.itpub.net/12199764/viewspace-1479320/ Donate WeChat Pay Alipay