# import requests,re
# from multiprocessing import Pool
#
# proxy=[]
# alive_ip=[]
# headers={'Connection':'keep-alive',
# 'Cookie':'你的cookie',
# 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5383.400 QQBrowser/10.0.1313.400'
# }
# def get_url(url):
# global proxy
# res=requests.get(url,headers=headers,timeout=1)
# try:
# ips=re.findall('<td>(\d+)</td>',res.text,re.S)
# https=re.findall('<td>(\d+\.\d+\.\d+\.\d+)</td>',res.text,re.S)
# for ip,http in zip(ips,https):
# info={'https':'https://' + http + ':' + ip,
# 'http':'http://'+http+':'+ip}
# proxy.append(info)
# except:
# pass
#
# def yanzheng(url):
# global proxy
# for proxys in proxy:
# print('正在测试IP:',proxys)
# try:
# html=requests.get(url,proxies=proxys,timeout=1)
# if html.status_code==200:
# print('该IP有效',proxys)
# alive_ip.append(proxys)
# except:
# print('该IP无效',proxys)
#
# if __name__=='__main__':
# urls=['http://www.xicidaili.com/nn/%d'%i for i in range(1,10)]
# # pool=Pool(processes=2)
# # pool=map(get_url,urls)
# for url in urls:
# get_url(url)
# print(proxy)
# ur='https://www.baidu.com/'
# yanzheng(ur)
# print(alive_ip)
简单的python代理爬取及验证
猜你喜欢
转载自blog.csdn.net/weixin_42557907/article/details/81151802
今日推荐
周排行