Python 请求网页【requests】封装版

函数版

import requests, random
# User-Agent
ua = [
    'Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;',  # IE9.0
    'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)']

def write_log(self, *args):
    from time import strftime
    ymd = strftime('%Y%m%d')
    with open('log%s.txt' % ymd, 'a', encoding='utf-8') as f:
        for i in args:
            f.write('%r\n' % i)
        f.write('\n')

def get(url, encode='utf-8', times=5):
    times -= 1
    if times >= 0:
        try:
            r = requests.get(url, headers={'User-Agent': random.choice(ua)})
        except Exception as error:
            write_log(url, error, times)
            return get(url, encode, times)
        if r.status_code == 200:
            r.encoding = encode
            return r.text
        else:
            write_log(url, r.status_code, times)
            return get(url, encode, times)

def post(url, data, encode='utf-8', times=5):
    times -= 1
    if times >= 0:
        try:
            r = requests.post(url, headers={'User-Agent': random.choice(ua)}, data=data)
        except Exception as error:
            write_log('%r\n%r' % (url, data), error, times)
            return post(url, data, encode, times)
        if r.status_code == 200:
            r.encoding = encode
            return r.text
        else:
            write_log('%r\n%r' % (url, data), r.status_code, times)
            return post(url, data, encode, times)

面向对象版

import requests, random, re
# 浏览器伪装
UA = [
    'Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;',  # IE9.0
    'Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.0;Trident/4.0)',  # IE8.0
    'Mozilla/4.0(compatible;MSIE7.0;WindowsNT6.0)',  # IE7.0
    'Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)',  # IE6.0
    'Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1',  # Firefox4.0.1–MAC
    'Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1',  # Firefox4.0.1–Windows
    'Opera/9.80(Macintosh;IntelMacOSX10.6.8;U;en)Presto/2.8.131Version/11.11',  # Opera11.11–MAC
    'Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11',  # Opera11.11–Windows
    'Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11',  # Chrome17.0–MAC
    'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Maxthon2.0)',  # 傲游(Maxthon)
    'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TencentTraveler4.0)',  # 腾讯TT
    'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)',  # 360浏览器
    'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TheWorld)',  # 世界之窗(TheWorld)3.x
    'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko']
# 代理池
def get_proxies():
    url = 'http://www.xicidaili.com/nn/'  # 国内高匿代理IP
    header = {'User-Agent': 'Opera/8.0 (Windows NT 5.1; U; en)'}
    r = requests.get(url, headers=header).text
    pt = '<td>([0-9.]{7,15})</td>\s+<td>(\d+)</td>\s+<td>[\s\S]+?</td>\s+<td class="country">高匿</td>\s+<td>(.+?)</td>'
    ls = re.findall(pt, r)
    return [{i[2].lower(): i[0] + ':' + i[1]} for i in ls]
proxies = get_proxies()
print(len(proxies), proxies)
# 请求
class Request:
    def __init__(self):
        self.ua = UA  # user agent
        self.proxies = proxies
    def write_log(self, *args):
        from time import strftime
        ymd = strftime('%Y%m%d')
        with open('log%s.txt' % ymd, 'a', encoding='utf-8') as f:
            for i in args:
                f.write('%r\n' % i)
            f.write('\n')
    def get(self, url, encode='utf-8', times = 5):
        times -= 1
        ua = {'User-Agent': random.choice(self.ua)}
        proxy = random.choice(self.proxies)
        if times >= 0:
            try:
                r = requests.get(url, headers=ua, proxies=proxy)
            except Exception as error:
                self.write_log(url, error, times)
                return self.get(url, encode, times)
            if r.status_code == 200:
                r.encoding = encode
                return r.text
            else:
                self.write_log(url, r.status_code, times)
                return self.get(url, encode, times)
    def post(self, url, data, encode='utf-8', times = 5):
        times -= 1
        ua = {'User-Agent': random.choice(self.ua)}
        proxy = random.choice(self.proxies)
        if times >= 0:
            try:
                r = requests.post(url, headers=ua, data=data, proxies=proxy)
            except Exception as error:
                self.write_log('%r\n%r' % (url, data), error, times)
                return self.post(url, data, encode, times)
            if r.status_code == 200:
                r.encoding = encode
                return r.text
            else:
                self.write_log('%r\n%r' % (url, data), r.status_code, times)
                return self.post(url, data, encode, times)


if __name__ == '__main__':
    r =Request()
    text_get = r.get('http://httpbin.org/get')
    print(text_get)
    text_ip = r.get('http://httpbin.org/ip')
    print(text_ip)

备注

基础补充
requests模块

猜你喜欢

转载自blog.csdn.net/Yellow_python/article/details/81280529