目标:前十数据
过程:
# -*- coding:utf-8 -*- #不加这个报错 import requests from bs4 import BeautifulSoup import re import csv import datetime url = 'http://data.10jqka.com.cn/market/rzrq/' today = datetime.date.today().strftime('%Y%m%d') #采集日期 res = requests.get(url) res.encoding = res.apparent_encoding html = BeautifulSoup(res.text,'lxml') data = html.select('#table1 > table > tbody') data = str(data).replace('-','') datas = re.findall('(\d+.?\d\d)',data) exc = [datas[i:i+13] for i in range(0,len(datas),13)] f = open('rzrq.csv', 'w', newline="") writer = csv.writer(f) writer.writerow(('交易日期','本日融资余额(亿元)上海', '本日融资余额(亿元)深圳', '本日融资余额(亿元)沪深合计', '本日融资买入额(亿元)上海', '本日融资买入额(亿元)深圳', '本日融资买入额(亿元)沪深合计','本日融券余量余额(亿元)上海', '本日融券余量余额(亿元)深圳', '本日融券余量余额(亿元)沪深合计','本日融资融券余额(亿元)上海', '本日融资融券余额(亿元)深圳', '本日融资融券余额(亿元)沪深合计','采集日期')) for i in range(len(exc)): line = exc[i] line.append(today) #这里每一行追加采集日期 writer.writerow(line)
结果: