from urllib import request import gevent, time from gevent import monkey //在没有加上此句和下一句时,运行速度理论上时一样的,因为gevent检测不到I/O端口 monkey.patch_all() def f(url): print('GET:%s'%url) resp = request.urlopen(url) data = resp.read() print('%d bytes received from %s' % (len(data),url)) #用循环的方式爬虫,也就时串行 urls = ['https://www.python.org/','https://www.yahoo.com/'] start_time = time.time() for url in urls: f(url) print('The asynchronous total time is {time}'.format(time = time.time() - start_time)) #用协程方式爬虫 async_time = time.time() gevent.joinall([gevent.spawn(f,'https://www.python.org/'), gevent.spawn(f,'https://www.yahoo.com/'), ]) print('The total time is {time}'.format(time = time.time() - async_time))
运行的结果如下:
GET:https://www.python.org/
48835 bytes received from https://www.python.org/
GET:https://www.yahoo.com/
498399 bytes received from https://www.yahoo.com/
The total time is 12.665598630905151
GET:https://www.python.org/
GET:https://www.yahoo.com/
48835 bytes received from https://www.python.org/
498546 bytes received from https://www.yahoo.com/
The asynchronous total time is 5.80000114440918