import requests
import re
from bs4 import BeautifulSoup
def getHTMLText(url,code):
try:
r = requests.get(url)
r.raise_for_status
r.encoding = code
return r.text
except:
return '爬取异常'
def getStocklist(ulist,stocklistURL):
html = getHTMLText(stocklistURL,'GB2312')
soup = BeautifulSoup(html,'html.parser')
a = soup.findAll('a',attrs={'target':"_blank"})
for i in a:
try:
href = i.attrs['href']
ulist.append(re.findall(r'[s][zh]\d{6}',href)[0])
except:
continue
def getStockinfo(ulist,StockinfoURL,stockDic,fpath):
count =0
for stock in ulist:
url = StockinfoURL+stock+'.html'
html = getHTMLText(url,'utf-8')
try:
if html =='':
continue
soup = BeautifulSoup(html,'html.parser')
name = soup.findAll('a',attrs={'class':"bets-name"})[0].text
stockDic.update({"股票名称":name.split()[0]})
div = soup.findAll("div",attrs={'class':"bets-content"})
for dl in div:
dt = dl('dt')
dd = dl('dd')
if dt[0] =="":
continue
for i in range(len(dt)):
key = dt[i].text
val = dd[i].text
stockDic[key] = val
# print(stockDic)
with open(fpath,'a',encoding ='utf-8') as f:
f.write(str(stockDic)+'\n')
count=count+1
print("\r当前进度:{:.2f}%".format(count*100/len(ulist)),end='')
except:
count=count+1
print("\r当前进度:{:.2f}%".format(count*100/len(ulist)),end='')
continue
def main():
stocklistURL = 'http://quote.eastmoney.com/stocklist.html'
StockinfoURL = 'https://gupiao.baidu.com/stock/'
fpath =r'C:\Users\lenovo\Desktop\stock.txt'
stockDic = {}
ulist = []
getStocklist(ulist,stocklistURL)
print('获取列表成功!')
# f = open(fpath,'a+',encoding = 'utf-8')
getStockinfo(ulist,StockinfoURL,stockDic,fpath)
# f.close()
main()
python爬虫 股票界面爬取 DAY6
猜你喜欢
转载自blog.csdn.net/u011451186/article/details/81407491
今日推荐
周排行