初始代码:
async def myRequest(data):
url = data['url']
header = data['header']
country = data['country']
category = data['category']
async with aiohttp.ClientSession(headers = header) as session:
async with await session.get(url = url) as resp:
html = await resp.read()
today = str(datetime.date.today())
filePath = '%s.xlsx'%(category)
absPath = os.path.join(dir, today, country, filePath)
print(absPath, 3333)
f = open(absPath, 'wb')
f.write(html)
f.close()
def main():
tasks = []
for url in urls:
c= myRequest(requestDataList, headers)
task = asyncio.ensure_future(c)
tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
if __name__ == '__main__':
main()
async def download_page(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
await result = resp.text()
async def main(urls):
tasks = []
for url in urls:
tasks.append(asyncio.create_task(download_page(url))) # 我的python版本为3.9.6
await asyncio.await(tasks)
if __name__ == '__main__':
urls = [ url1, url2, …… ]
asyncio.run(main(urls))
这是最基本的一个异步协程框架,在数据量不大的情况下,可以基本满足要求,但是,数据量稍大一些,就会把报错,我收集到的报错信息有以下几种:
-
aiohttp.client_exceptions.ClientOSError: [WinError 64] 指定的网络名不再可用。
Task exception was never retrieved -
aiohttp.client_exceptions.ClientOSError: [WinError 121] 信号灯超时时间已到
Task exception was never retrieved -
aiohttp.client_exceptions.ServerDisconnectedError: Server disconnected
Task exception was never retrieved
解决思路:
上述报错比较大的问题是在于每一个任务都创建了一个session,当创建过多session时就会报错。
解决办法:
尝试只创建一个session
import asyncio
import aiohttp
async def download_page(url,session):
async with session.get(url) as resp:
result = await resp.content.read()
print(result)
async def main(urls):
tasks = []
async with aiohttp.ClientSession() as session: # 将创建session,放在main函数中,同时,需要注意将session作为变量传入download_page函数中
for url in urls:
tasks.append(asyncio.create_task(download_page(url,session)))
# 我的python版本为3.9.6,python版本3.8及以上,如果需要创建异步任务,需要通过asyncio.creat_task()创建,否则虽然可以正常运行,但是会出警告信息
await asyncio.wait(tasks)
if __name__ == '__main__':
urls = [ url1, url2, …… ]
asyncio.run(main(urls))