python3.6抓取代理ip

有时候我们会用到代理ip，下面我们便抓取代理ip网站的ip

import requests
import random
from bs4 import BeautifulSoup

class Spider_Ips():
    url = 'http://www.xicidaili.com/wn/'
    headers = [
        {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:34.0) Gecko/20100101 Firefox/34.0'},
        {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'},
        {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11'},
        {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)'},
        {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0'},
        {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36'}
    ]
    MAX_RETRIES = 20
    session = requests.Session()
    adapter = requests.adapters.HTTPAdapter(max_retries=MAX_RETRIES)
    session.mount('https://', adapter)
    session.mount('http://', adapter)

    def list_ip(self):
        req = self.session.get(self.url, headers=random.choice(self.headers))
        response = BeautifulSoup(req.text, 'html.parser')
        res_obj = response.findAll('table')
        tab = res_obj[0]
        for item in tab.findAll('tr'):
            tds = item.findAll('td')    
            if tds:
                with open('HTTPS.txt', 'a', encoding='utf-8') as f:
                    print(tds[1].text+':'+tds[2].text)
                    f.write(tds[1].text+':'+tds[2].text+'\n')                    

    def __init__(self):
        self.list_ip()


s_ip = Spider_Ips()

运行便可，抓取到txt文本中，结果如下图所示，我们便可以使用下面的ip了

当然我们也可以对这些ip存入到数据库等其他操作

python3.6抓取代理ip

猜你喜欢