开窗时刻模型

只考虑开关窗时刻的模型

思路

在前期数据处理的时候，需要根据开关窗状态发生的时刻去其他表格中找相对应的时刻的其他参数，比如天气栏中的室外温度；ikair卧室栏中的室内温度、CO2浓度；pm栏中的AQI；
实现提取过程的具体思路为：
1、先把所有的时间类型都转化为字符串（str），方便比大小
2、比较开窗时刻的采集时间与其他待提取因素栏中的采集时间
这里需要说明的是，我用正则遍历了所有的sheet表格，把所有记录了开关窗场所的信息（比如卧室、客厅、阳台、厨房，在考虑厨房的要不要拿掉）都提取出来放在总列表里面了
3、我处理的时候先遍历开窗时刻的采集时间t1，拿出一个元素与其他因素对应的时间t2比较，当t1<=t2时，就认为t2对应的时刻的因素是开窗时刻的因素
这里假设：环境参数变化比较平缓，1~2个小时内各个参数变化不是很大
有可能会出现一个问题，开关窗对应的时刻t1有可能 > 各个参数记录的时刻t2(t1>t2)，比如6月30号23点记录了一次关窗，但参数记录时刻很可能就只截止到6月30号22点。
这时就需要我们最后再判断一下开关窗状态列表和提取后的各参数列表中元素的数量是否相等，若不等，需要把参数栏中最后一个时刻的参数加入列表中。
4、将提取出来的各个参数列表放在同一个表格中

代码

这是测试版的代码，里面很多注释掉的print没有删掉，显得比较乱，也当给自己做了备份
这里只提取了开关窗时刻和状态以及对应的天气栏中温度和湿度

复习摘要

值得再复习一遍的代码

#用来检测模块的运行时间
import time
t_start = time.clock()
t_end = time.clock()
run_time = t_end - t_start
print('{}模块运行时间:%ds'.format()%run_time)

#用于提取中止循环
break_number = 0
for weather_time in weather_times:
	if (all_opentime <= weather_time) and (break_number == 0):
		new_outtemps.append(weather_temps[weather_times.index(weather_time)])
		new_humiditys.append(weather_humiditys[weather_times.index(weather_time)])
		break_number += 1

#将timestamp时间格式datetime转化为字符串，并用split分开再连接
for i in range(len(df['采集时间'])):
	q_1 = str(df.iloc[i, 0])[:10]
	q_1 = q_1.split('-')
	#print(q_1)
	q_2 = str(df.iloc[i, 0])[11:]
	q_2 = q_2.split(':')
	#print(q_2)
	q_3 = q_1[0] + q_1[1] + q_1[2] + q_2[0] + q_2[1] + q_2[2]
	#print(q_3)	
	weather_times.append(q_3)

#用正则提取所有窗数据，并检测次数
place_number = 0 #有开窗记录的地点数目
for Sheet_name in Sheet_names:	
		if  (not re.search('窗', Sheet_name) == None) and (re.search('ikair', Sheet_name) == None):
		place_number += 1
		print('有开窗记录的sheet数量：%d次'%place_number)

完整代码

# -*- coding: utf-8 -*-  
import numpy as np
import pandas as pd
import os
from datetime import datetime
import matplotlib.pyplot as plt
import re
import time


def readfile(months,last_days):
	#months = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
	#last_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
	t_start = time.clock()  #起始时间，用于监测代码的运行时间
	# months = ['06']
	# last_days = [30]
	for month,last_day in zip(months,last_days):
		path = 'C:\\Users\\hao\\Desktop\\time开窗模型\\源数据\\48NNJZ03_M_2017-{}.xlsx'.format(month) #
		b_1 = pd.ExcelFile(path)
		Sheet_names = b_1.sheet_names
		place_number = 0 #有开窗记录的地点数目
		all_opentimes = [] #所有的开窗时刻记录 
		all_openstates = [] #所有的开窗状态记录
		for Sheet_name in Sheet_names:
			'''提取开窗时刻的采集时间和状态'''
			#if (not re.search('卧', Sheet_name) == None) and (not re.search('窗', Sheet_name) == None) and (re.search('阳台', Sheet_name) == None) and (re.search('厨房', Sheet_name) == None):
			if  (not re.search('窗', Sheet_name) == None) and (re.search('ikair', Sheet_name) == None):
				place_number += 1
				print('有开窗记录的sheet数量：%d次'%place_number)
				df = pd.read_excel(path, sheet_name=Sheet_name)
				temp_columns = ['采集时间', '状态']
				df = df[temp_columns].dropna()
				#all_openstates.append(x for x in df['状态'].tolist())  #将出现的状态都放到一个列表中 为啥这个不可以？？？
				df_zt = df['状态'].tolist()
				#print(df_zt)
				for i in df_zt:  #将open和close改写成1,0
					if i == 'open':
						all_openstates.append(1)
					elif i == 'close':
						all_openstates.append(0)
				#print(df)
				for i in range(len(df['采集时间'])):
					q_1 = str(df.iloc[i, 0])[:10]
					q_1 = q_1.split('-')
					#print(q_1)
					q_2 = str(df.iloc[i, 0])[11:]
					q_2 = q_2.split(':')
					#print(q_2)
					q_3 = q_1[0] + q_1[1] + q_1[2] + q_2[0] + q_2[1] + q_2[2]
					#print(q_3)
					all_opentimes.append(q_3)
					#print(all_opentimes)
			print('all_opentimes:%d长度'%len(all_opentimes))
			#如果汇集几个地点的开窗数据，可能会涉及到排序的问题
			#return all_opentimes, all_openstates 

		#提取天气栏里面的信息,
		weather_times = []  #天气栏中的时刻
		df = pd.read_excel(path, sheet_name='天气')
		temp_columns = ['采集时间', '温度(℃)', '湿度(%)']
		df = df[temp_columns].dropna()
		#weather_temps = df['温度(℃)'].tolist()
		'''
		这里尝试用np.array，而不用list,看运算速度能否提高
		'''
		weather_temps = np.array(df['温度(℃)'])
		weather_humiditys = np.array(df['湿度(%)'])
		for i in range(len(df['采集时间'])):
			q_1 = str(df.iloc[i, 0])[:10]
			q_1 = q_1.split('-')
			#print(q_1)
			q_2 = str(df.iloc[i, 0])[11:]
			q_2 = q_2.split(':')
			#print(q_2)
			q_3 = q_1[0] + q_1[1] + q_1[2] + q_2[0] + q_2[1] + q_2[2]
			#print(q_3)	
			weather_times.append(q_3)
		#print('weather_times长度%d'%len(weather_times))	
		'''
		提取ikair卧室的室内温度和二氧化碳
		'''		
	t_end = time.clock()
	run_time = t_end - t_start
	print('{}函数耗时：%ds'.format('readfile')%run_time)
	return all_opentimes, all_openstates, weather_times, weather_temps, weather_humiditys

def dealwithfile():
	t_start = time.clock()
	all_opentimes, all_openstates, weather_times, weather_temps, weather_humiditys = readfile(months,last_days)
	new_outtemps, new_humiditys = [], [] 
	for all_opentime in all_opentimes:
		#提取天气栏中的温度,湿度
		break_number = 0
		for weather_time in weather_times:
			if (all_opentime <= weather_time) and (break_number == 0):
				new_outtemps.append(weather_temps[weather_times.index(weather_time)])
				new_humiditys.append(weather_humiditys[weather_times.index(weather_time)])
				break_number += 1
	#print('new_outtemps长度%d'%len(new_outtemps))
	if len(all_opentimes) == len(new_outtemps) + 1: #防止出现最后一次开窗时间比最后一次温度数据时间更晚
		new_outtemps.append(weather_temps[-1])
		new_humiditys.append(weather_humiditys[-1]) #这里就不重复写天气栏中湿度的判定了
	t_end = time.clock()
	run_time = t_end - t_start
	print('{}函数耗时：%ds'.format('findfile')%run_time)
	return new_outtemps, new_humiditys

def savefile():
	t_start = time.clock()
	all_opentimes, all_openstates, weather_times, weather_temps, weather_humiditys = readfile(months,last_days)
	new_outtemps, new_humiditys = dealwithfile()
	#print(len(all_opentimes), len(all_openstates), len(new_outtemps), len(new_humiditys) )
	ddd = {"all_opentimes" : all_opentimes, "new_outtemps" : new_outtemps, "new_humiditys" : new_humiditys,
	 "all_openstates" : all_openstates}  #将列表a，b转换成字典
	df = pd.DataFrame(ddd)
	#df = pd.concat
	df.to_excel(r'C:\Users\hao\Desktop\test5.xlsx', index=False)
	t_end = time.clock()
	run_time = t_end - t_start
	print('{}函数耗时：%ds'.format('sivefile')%run_time)

if __name__ == '__main__':
	months = ['06']
	last_days = [30]	
	# readfile(months, last_days)
	# dealwithfile()
	savefile()
#print(all_opentime.index(76))