简单的python代理爬取及验证

# import requests,re
# from multiprocessing import Pool
# 
# proxy=[]
# alive_ip=[]
# headers={'Connection':'keep-alive',
#              'Cookie':'你的cookie',
#              'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5383.400 QQBrowser/10.0.1313.400'
#        }
# def get_url(url):
#     global proxy
#     res=requests.get(url,headers=headers,timeout=1)
#     try:
#         ips=re.findall('<td>(\d+)</td>',res.text,re.S)
#         https=re.findall('<td>(\d+\.\d+\.\d+\.\d+)</td>',res.text,re.S)
#         for ip,http in zip(ips,https):
#             info={'https':'https://' + http + ':' + ip,
#                     'http':'http://'+http+':'+ip}
#             proxy.append(info)
#     except:
#         pass
# 
# def yanzheng(url):
#     global proxy
#     for proxys in proxy:
#         print('正在测试IP:',proxys)
#         try:
#             html=requests.get(url,proxies=proxys,timeout=1)
#             if html.status_code==200:
#                 print('该IP有效',proxys)
#                 alive_ip.append(proxys)
#         except:
#             print('该IP无效',proxys)
# 
# if __name__=='__main__':
#     urls=['http://www.xicidaili.com/nn/%d'%i for i in range(1,10)]
#     # pool=Pool(processes=2)
#     # pool=map(get_url,urls)
#     for url in urls:
#         get_url(url)
#         print(proxy)
#         ur='https://www.baidu.com/'
#         yanzheng(ur)
#         print(alive_ip)
简单的python代理爬取及验证

猜你喜欢