函数版
import requests, random
ua = [
'Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;',
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)']
def write_log(self, *args):
from time import strftime
ymd = strftime('%Y%m%d')
with open('log%s.txt' % ymd, 'a', encoding='utf-8') as f:
for i in args:
f.write('%r\n' % i)
f.write('\n')
def get(url, encode='utf-8', times=5):
times -= 1
if times >= 0:
try:
r = requests.get(url, headers={'User-Agent': random.choice(ua)})
except Exception as error:
write_log(url, error, times)
return get(url, encode, times)
if r.status_code == 200:
r.encoding = encode
return r.text
else:
write_log(url, r.status_code, times)
return get(url, encode, times)
def post(url, data, encode='utf-8', times=5):
times -= 1
if times >= 0:
try:
r = requests.post(url, headers={'User-Agent': random.choice(ua)}, data=data)
except Exception as error:
write_log('%r\n%r' % (url, data), error, times)
return post(url, data, encode, times)
if r.status_code == 200:
r.encoding = encode
return r.text
else:
write_log('%r\n%r' % (url, data), r.status_code, times)
return post(url, data, encode, times)
面向对象版
import requests, random, re
UA = [
'Mozilla/5.0(compatible;MSIE9.0;WindowsNT6.1;Trident/5.0;',
'Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.0;Trident/4.0)',
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT6.0)',
'Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.1)',
'Mozilla/5.0(Macintosh;IntelMacOSX10.6;rv:2.0.1)Gecko/20100101Firefox/4.0.1',
'Mozilla/5.0(WindowsNT6.1;rv:2.0.1)Gecko/20100101Firefox/4.0.1',
'Opera/9.80(Macintosh;IntelMacOSX10.6.8;U;en)Presto/2.8.131Version/11.11',
'Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11',
'Mozilla/5.0(Macintosh;IntelMacOSX10_7_0)AppleWebKit/535.11(KHTML,likeGecko)Chrome/17.0.963.56Safari/535.11',
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;Maxthon2.0)',
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TencentTraveler4.0)',
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;360SE)',
'Mozilla/4.0(compatible;MSIE7.0;WindowsNT5.1;TheWorld)',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko']
def get_proxies():
url = 'http://www.xicidaili.com/nn/'
header = {'User-Agent': 'Opera/8.0 (Windows NT 5.1; U; en)'}
r = requests.get(url, headers=header).text
pt = '<td>([0-9.]{7,15})</td>\s+<td>(\d+)</td>\s+<td>[\s\S]+?</td>\s+<td class="country">高匿</td>\s+<td>(.+?)</td>'
ls = re.findall(pt, r)
return [{i[2].lower(): i[0] + ':' + i[1]} for i in ls]
proxies = get_proxies()
print(len(proxies), proxies)
class Request:
def __init__(self):
self.ua = UA
self.proxies = proxies
def write_log(self, *args):
from time import strftime
ymd = strftime('%Y%m%d')
with open('log%s.txt' % ymd, 'a', encoding='utf-8') as f:
for i in args:
f.write('%r\n' % i)
f.write('\n')
def get(self, url, encode='utf-8', times = 5):
times -= 1
ua = {'User-Agent': random.choice(self.ua)}
proxy = random.choice(self.proxies)
if times >= 0:
try:
r = requests.get(url, headers=ua, proxies=proxy)
except Exception as error:
self.write_log(url, error, times)
return self.get(url, encode, times)
if r.status_code == 200:
r.encoding = encode
return r.text
else:
self.write_log(url, r.status_code, times)
return self.get(url, encode, times)
def post(self, url, data, encode='utf-8', times = 5):
times -= 1
ua = {'User-Agent': random.choice(self.ua)}
proxy = random.choice(self.proxies)
if times >= 0:
try:
r = requests.post(url, headers=ua, data=data, proxies=proxy)
except Exception as error:
self.write_log('%r\n%r' % (url, data), error, times)
return self.post(url, data, encode, times)
if r.status_code == 200:
r.encoding = encode
return r.text
else:
self.write_log('%r\n%r' % (url, data), r.status_code, times)
return self.post(url, data, encode, times)
if __name__ == '__main__':
r =Request()
text_get = r.get('http://httpbin.org/get')
print(text_get)
text_ip = r.get('http://httpbin.org/ip')
print(text_ip)
备注
-
基础补充
-
requests模块