python 第二题 重置为多线程100t版本
爬虫练习网站: GlidedShy官网
GlidedShy 第一题
完整程序见GlidedShy 第二题 实现多页爬取数据
# GlidedSky 第二题 基础2
# @Author: xiaozhu_sai
# Date: 2020/11/28
"""
#题目2 多线程版(100t)
def Q2_100t(self):
#判断/进入题目2页面
self.clickQuestion(2, False)
pagelist = [i for i in range(1,4)]
url = 'http://www.glidedsky.com/level/web/crawler-basic-2?page='
def f(n):
cur_url = ''
while pagelist != [] :
cur_url = url + str(pagelist.pop())
self._brower.get(cur_url)
bs = BeautifulSoup(self._brower.page_source, 'html.parser')
nums_pageList = bs.select('.row .col-md-1')
for item in nums_pageList:
self._answer += int(item.text.strip())
# t为线程数
t = 100
thread_list = [
threading.Thread(target=f, args=[i]) for i in range(t)
]
start_time = time.time()
for item in thread_list:
item.start()
for item in thread_list:
item.join()
end_time = time.time()
print(end_time - start_time, self._answer)