版权声明: https://blog.csdn.net/dashoumeixi/article/details/86678193
不同的网站不同解析方式.
xpath 解析的: 可以根据requests timeout 再来过滤响应速度
heads = {'user-agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"}
r = requests.get("https://www.kuaidaili.com/free/",headers = heads)
r.encoding = r.apparent_encoding
html = etree.HTML(r.content)
trs = html.xpath(".//tbody/tr")
def getip():
httpiplist = [] //or set
for t in trs:
ip = t.xpath("./td[1]/text()")[0]
port = t.xpath("./td[2]/text()")[0]
protocol= t.xpath("./td[4]/text()")[0]
httpiplist.append( protocol + "://" + ip + ":" + port) // yield ...
return httpipset