import numpy as np
from numpy import *
from operator import *
def filemattrix(filename): #转化数据成矩阵形式
fr = open(filename)
arrayLines = fr.readlines()
numberLines = len(arrayLines)
returnMat = zeros((numberLines,3))
labels = []
index = 0
for line in arrayLines:
line = line.strip()
listLine = line.split('\t')
#index表示的是第几行,[index,:]代表从index到全部元素;
returnMat[index,:] = listLine[0:3]
labels.append(int(listLine[-1]))
index +=1
return returnMat,labels
制作散点图
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
returnMat, labels = filemattrix('datingDataSet.txt') #datingDataSet.txt文件下的数据集
fig = plt.figure()
axes = fig.add_subplot(111)
for i in range(len(labels)):
if labels[i] ==1:
# 第i行数据,及returnMat[i:,0]及矩阵的切片意思是:i:i+1代表第i行数据,0代表第1列数据
axes.scatter(returnMat[i:i+1, 0], returnMat[i:i+1 ,2],color = 'red')
if labels[i] == 2:
axes.scatter(returnMat[i:i+1, 0], returnMat[i:i+1, 2], color='green')
if labels[i] == 3:
axes.scatter(returnMat[i:i+1, 0], returnMat[i:i+1, 2], color='black')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Epic Chart') #对中文的支持很差!
plt.show()
*************数据文件******************
40920 8.326976 0.953952 3
14488 7.153469 1.673904 2
26052 1.441871 0.805124 1
75136 13.147394 0.428964 1
38344 1.669788 0.134296 1
72993 10.141740 1.032955 1
35948 6.830792 1.213192 3
42666 13.276369 0.543880 3
67497 8.631577 0.749278 1
35483 12.273169 1.508053 3
50242 3.723498 0.831917 1
63275 8.385879 1.669485 1
5569 4.875435 0.728658 2
51052 4.680098 0.625224 1
77372 15.299570 0.331351 1
43673 1.889461 0.191283 1
61364 7.516754 1.269164 1
69673 14.239195 0.261333 1
15669 0.000000 1.250185 2
28488 10.528555 1.304844 3 #或者去https://github.com/ 找