DeBank和非小号网站的数据分析-实习工作小结

 暑期实习的时候,我们部门开始对DeBank和非小号等网站进行金融数据分析,所以轮到我处理相关的数据了。这么多天了也没怎么写博客,今天我就写一篇,算是对暑期实习的一个小结。所有的数据是从DeBank和非小号两个网站获取的。主要是处理以下四个字段的数据:日期(近12个月)、锁仓量、24H额、收盘价,结果是要给数据分析师一个excel表格。             

我看了一眼,这两个网站都是json传输表格里的数据的,还是挺好搞的。最后要输出excel表格,直接用pandas库的DataFrame数据结构来处理即可。代码的逻辑是先从DeBank上获取12个月的锁仓量,然后24H额、收盘价这两个都是在非小号网站上的,分开来处理即可。打开F12直接查看json文件的url,十分方便,这就贴上代码:

import time
import requests
import pandas as pd
import random

#一些通用的东西,浏览器的参数是用random随机选择的
base_url = 'https://api.debank.com/project/chart?id=example&type=tvl'
names = [
    'heco_mdex',
    'bsc_mdex',
    'synthetix',
    'bancor',
    'bsc_bakeryswap',
    'curve',
    'matic_curve',
    'uma',
    '1inch2',
    'ren',
    'reserve',
    'gnosis',
]
base_urls = [
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=mdex&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=mdex&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=synthetix-network-token&begintime=202006024&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=bancor&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=bakeryswap&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=curve&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=curve&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=uma&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=1inchtoken&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=republic-protocol&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=rsr&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    'https://dncapi.bqrank.net/api/v3/coin/history?coincode=gnosis-gno&begintime=20200624&endtime=20210624&page=1&per_page=1000&webp=1',
    ]
User_Agent = [
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR "
    "2.0.50727)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center "
    "PC 5.0; .NET CLR 3.0.04506)",
    "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET "
    "CLR 2.0.50727)",
    "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR "
    "3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
    "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR "
    "2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
    "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; "
    ".NET CLR 3.0.04506.30)",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) "
    "Arora/0.3 (Change: 287 c9dfb30)",
    "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
    "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
    "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
    "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 "
    "Safari/535.11",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 "
    "Safari/535.20",
    "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 "
    "TaoBrowser/2.0 Safari/536.11",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 "
    "Safari/537.1 LBBROWSER",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET "
    "CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 "
    "Safari/535.11 LBBROWSER",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET "
    "CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
    "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET "
    "CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; "
    "360SE)",
    "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
    "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET "
    "CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
    "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) "
    "Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 "
    "Safari/537.11",
    "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) "
    "Firefox/3.6.10 "
]
headers = {
    "User_Agent": random.choice(User_Agent),
}


date = []
value = []
suo_cang_jia = []
one_day = []

name_date = ' 日期'
name_value = ' 锁仓量'
name_one_day = ' 24H额'
name_souPanJia = ' 收盘价'

tmp_dic = {}
tmp_execel_name = []
for name in names:
    tmp1 = name + name_date
    tmp_dic[tmp1] = date


    tmp2 = name + name_value
    tmp_dic[tmp2] = value

    tmp3 = name + name_one_day
    tmp_dic[tmp3] = one_day

    tmp4 = name + name_souPanJia
    tmp_dic[tmp4] = suo_cang_jia
# print(tmp_dic)


# 从DeBank获取锁仓量
for name in names:
    real_url = base_url.replace('example', name)
    print(real_url)
    time.sleep(random.random() * 3)
    json = requests.get(url=real_url, headers=headers).json()
    # print(json)
    # print(json.get('data').get('data'))
    json_tmp = json.get('data').get('data')

    tmp_date = []
    tmp_value = []
    for tmp in json_tmp:
        # print(tmp['date_at'])
        tmp_date.append(tmp['date_at'].replace('-', '/'))
        # print(tmp['value'])
        tmp_value.append(tmp['value'])
    # print(date)
    # print(value)
    tmp_date_name = name + name_date
    tmp_value_name = name + name_value
    tmp_date.reverse()
    tmp_value.reverse()
    tmp_dic[tmp_date_name] = tmp_date
    tmp_dic[tmp_value_name] = tmp_value


# 从非小号获取24H额和收盘价
for num in range(len(names)):
    tmp_24h = names[num] + name_one_day
    tmp_souPanJia = names[num] + name_souPanJia
    closePrice = []
    vol = []
    closePrice.append(None)
    vol.append(None)

    url = base_urls[num]
    print(url)
    time.sleep(random.random() * 3)
    json = requests.get(url=url, headers=headers).json()
    print(json.get('data'))
    print(json.get('data').get('list'))
    json_tmp = json.get('data').get('list')
    for tmp in json_tmp:
        print(tmp)
        vol.append(tmp['vol'])
        closePrice.append(tmp['closeprice'])

    tmp_dic[tmp_24h] = vol
    tmp_dic[tmp_souPanJia] = closePrice


execl_df = pd.DataFrame.from_dict(tmp_dic, orient='index')
#显示所有列
pd.set_option('display.max_columns', None)
#显示所有行
pd.set_option('display.max_rows', None)
#设置value的显示长度为100,默认为50
pd.set_option('max_colwidth', 100)
execl_df = pd.DataFrame(execl_df.values.T, index=execl_df.columns, columns=execl_df.index)
print(execl_df)
# 生成excel文件
execl_df.to_excel('result.xlsx', index=False)

其中那些print函数,基本都是用来debug用的,就是写到一半时,看看爬到东西对不对、是不是空的之类的作用,大可以直接删掉。总之,逻辑不是很难。

  以下是最后的结果展示:

END

猜你喜欢

转载自blog.csdn.net/qq_41938259/article/details/123150014
今日推荐