import csv
import numpy as np
from numpy.linalg import inv
import random
import math
import sys
#读数据
data = []
# 每一个维度储存一种污染物的数据
for i in range(18):
data.append([])
n_row = 0
text = open('C:/Users/liky/Desktop/PY/2017fall-ml-hw1-master/2017fall-ml-hw1-master/train.csv', 'r', encoding='big5')
row = csv.reader(text , delimiter=",")
for r in row:
# 第0行沒有数据
if n_row != 0:
# 每一行只有第3-27格有值(1天內24小时的数值)
for i in range(3,27):
if r[i] != "NR":
data[(n_row-1)%18].append(float(r[i]))
else:
data[(n_row-1)%18].append(float(0))
n_row = n_row+1
text.close()
#解析数据到(x,y)
x = []
y = []
# 每12个月
for i in range(12):
# 一個月取连续10小时的data可以有471组
for j in range(471):
x.append([])
# 18种污染物
for t in range(18):
# 连续9小時
for s in range(9):
x[471*i+j].append(data[t][480*i+j+s] ) #一个x中的值为1到9时的值(18行9列) 对应的y的值为第10时的PM2.5的值
y.append(data[9][480*i+j+9])
#创建x和y的数组
x = np.array(x)
y = np.array(y)
# add square term
# x = np.concatenate((x,x**2), axis=1)
# add bias
x = np.concatenate((np.ones((x.shape[0],1)),x), axis=1) # bias
#初始参数
w = np.zeros(len(x[0]))
l_rate = 10
repeat = 10000
#check answer
# use close form to check whether ur gradient descent is good
# however, this cannot be used in hw1.sh
# w = np.matmul(np.matmul(inv(np.matmul(x.transpose(),x)),x.transpose()),y)
#开始训练
x_t = x.transpose() #转置
s_gra = np.zeros(len(x[0]))
for i in range(2):
hypo = np.dot(x,w) #矩阵相乘,得到一个行数和x相同,列数为1的矩阵
loss = hypo - y # hypo = w1*x1 + w2*x2 + ... + wt*xt + bias
cost = np.sum(loss**2) / len(x) #...
cost_a = math.sqrt(cost) #求loss值,对结果无影响
gra = np.dot(x_t,loss) # 求微分 loss = (hypo - y) ---> &gra/&w1 = (hypo - y) * x1 = loss * x1
s_gra += gra**2 #求 adagrad
ada = np.sqrt(s_gra) #...
w = w - l_rate * gra/ada #得出下一步的点
print('iteration: %d | Cost: %f ' % ( i,cost_a))
# save model
np.save('model.npy',w)
# read model
w = np.load('model.npy')
#读测试数据
test_x = []
n_row = 0
text = open('C:/Users/liky/Desktop/PY/2017fall-ml-hw1-master/2017fall-ml-hw1-master/test.csv' ,"r")
row = csv.reader(text , delimiter= ",")
for r in row:
if n_row %18 == 0:
test_x.append([])
for i in range(2,11):
test_x[n_row//18].append(float(r[i]) ) #把每天的九个小时的数据作为一组存到一个test_x里
else :
for i in range(2,11):
if r[i] !="NR":
test_x[n_row//18].append(float(r[i]))
else:
test_x[n_row//18].append(0)
n_row = n_row+1
text.close()
test_x = np.array(test_x)
# add square term
# test_x = np.concatenate((test_x,test_x**2), axis=1)
# add bias
test_x = np.concatenate((np.ones((test_x.shape[0],1)),test_x), axis=1)
#得到结果
ans = []
for i in range(len(test_x)):
ans.append(["id_"+str(i)])
a = np.dot(w,test_x[i])
ans[i].append(a)
filename = "C:/Users/liky/Desktop/PY/2017fall-ml-hw1-master/2017fall-ml-hw1-master/predict.csv"
text = open(filename, "w+")
s = csv.writer(text,delimiter=',',lineterminator='\n')
s.writerow(["id","value"])
for i in range(len(ans)):
s.writerow(ans[i])
text.close()
HW1 PM2.5 代码逐行解析
猜你喜欢
转载自blog.csdn.net/li_k_y/article/details/84104103
今日推荐
周排行