# -*- coding: utf-8 -*-
import urllib.request
import json
import requests
import pandas as pd
import os
import sys
from io import StringIO
import gzip
import time as t
url_auth = "http://****/api-token-auth/" #隐去
url_data ="http://***/stocks/"
def get_token():
env_dist = os.environ
username =env_dist['my_webapi_username']
pwd = env_dist['my_webapi_password']
data =json.dumps({"username":username,"password":pwd})
data = bytes(data,'utf-8')
headers = {"Content-Type":"application/json"}
html = urllib.request.Request(url_auth,data =data,headers =headers)
result = urllib.request.urlopen(html).read().decode('utf-8')
tokendict = json.loads(result)
token = tokendict['token']
return token
def get_price_json(token,code,start_date,close_date,freq):
headers= {"Content-Type": "application/json","Authorization": "JWT " + token}
params = {'security':code,'startdate':start_date,'enddate':close_date,'method':'get_price','unit':freq}
payload = json.dumps(params)
req = requests.get(url_data,data =payload,headers =headers)
data = pd.read_json(req.text)
return data.to_json() #json字符串->str类型,这个才便于压缩
def get_price_df(token,code,start_date,close_date,freq):
headers= {"Content-Type": "application/json","Authorization": "JWT " + token}
params = {'security':code,'startdate':start_date,'enddate':close_date,'method':'get_price','unit':freq}
payload = json.dumps(params)
req = requests.get(url_data,data =payload,headers =headers)
#print(type(req))
data = pd.read_json(req.text)
data = {col :data[col][0] for col in data.columns}
df = pd.DataFrame(data)
return df
def get_data_df_test():
token = get_token()
code ='000001.XSHE'
start_date ='2018-01-01'
close_date ='2019-01-01'
freq ='1m'
data =get_price_df(token,code,start_date,close_date,freq) #dataframe
return data
def get_data_json_test():
token = get_token()
code ='000001.XSHE'
start_date ='2018-01-01'
close_date ='2019-01-01'
freq ='1m'
data =get_price_json(token,code,start_date,close_date,freq) #json
return data
测试一下api的速度
def test():
for i in range(5):
t0 =t.time()
data =get_data_test()
print("i: ",i," row: ",data.shape)
row =data.shape[0]
col =data.shape[1]
#print("output 字节数 of data :",sys.getsizeof(data))
print("cost time:",t.time()-t0,'s')
d =[len(str(data.iloc[i,j])) for i in range(row) for j in range(col) ]
#print("compute bit",sum(d))
测试一下 压缩效率
因为文件大小:dataframe > json > gzip,所以在有dataframe文件的情况下,一般尽可能地转成json,再在json的基础上,压成gzip格式。这样效果都好一些。
选择zlib还是gzip库?还是比较一下好吧。
def json_gzip_test():
print("gzip =>compress")
data = get_data_json_test()
print("json data: ",sys.getsizeof(data))
t0 = t.time()
g_data = gzip.compress(data.encode(),compresslevel=6)) # str => data.encode()) ,默认级别为9,这里调为6
print("gzip compress cost time:",t.time()-t0,'s')
print("gzip compress size: ",sys.getsizeof(g_data))
print("=>gzip decompress")
t1=t.time()
d_data = gzip.decompress(g_data).decode("utf-8") #没有级别之分
print("gzip decompress cost time:",t.time()-t1,'s')
print("gzip d_data: ",type(d_data),"size: ",sys.getsizeof(d_data))
#print(d_data)
def json_zlib_test():
print("zlib =>compress")
data = get_data_json_test()
print("json data: ",sys.getsizeof(data))
t0 =t.time()
comp_data = zlib.compress(data.encode('utf-8'),level=6) # 编码为UTF-8格式的字节进行压缩,默认级别为6
print("zlib compress cost time: ",t.time()-t0,'s')
print("zlib compress size: ",sys.getsizeof(comp_data))
print("zlib =>decompress")
t1= t.time()
decom_data = zlib.decompress(comp_data) # 将压缩的字节进行解压缩,没有级别
print("zlib decompress cost time:",t.time()-t1,'s')
#print(decom_bytes.decode('utf-8')) #
def json_zlib_byte_test(): #再转一下成byte
print("zlib byte => compress")
data = get_data_json_test()
print("json data: ",sys.getsizeof(data))
json_bytes = bytes(data, encoding='utf-8') # different
print("json bytes data: ",sys.getsizeof(json_bytes))
t0 =t.time()
comp_data = zlib.compress(json_bytes,level=6) # 编码为UTF-8格式的字节进行压缩
print("zlib +byte compress cost time: ",t.time()-t0,'s')
print("zlib +byte compress size: ",sys.getsizeof(comp_data))
print("zlib +byte =>decompress")
t1= t.time()
decom_data = zlib.decompress(comp_data) # 将压缩的字节进行解压缩,没有级别
print("zlib +byte decompress cost time:",t.time()-t1,'s')
对比结果:
json_gzip_test()
gzip =>compress
json data: 8322743
gzip compress cost time: 0.4303134775161743 s
gzip compress size: 1150013
=>gzip decompress
gzip decompress cost time: 0.06250691413879395 s
gzip d_data: <class 'str'> size: 8322743
json_zlib_test()
zlib =>compress
json data: 8322743
zlib compress cost time: 0.4296891689300537 s
zlib compress size: 1170816
zlib =>decompress
zlib decompress cost time: 0.029301881790161133 s
json_zlib_byte_test()
zlib byte =>compress
json data: 8322743
json bytes data: 8322727
zlib +byte compress cost time: 0.42188024520874023 s
zlib +byte compress size: 1170816
zlib +byte =>decompress
zlib +byte decompress cost time: 0.03125166893005371 s
可以看出,zlib和gzip库在压缩级别相同时,压缩和解压效率差不多。需要指出,压缩级别调高后,压缩时间则越久。