HTTP请求
urllib模块
GET请求
import urllib
response=urllib.request.urlopen('http://www.zhihu.com')
html=response.read()
print(html)
import urllib
request=urllib.request.Request(
response=urllib.request.urlopen(request)
html=response.read()
print(html)
POST请求
import urllib
url='https://weibo.com/login'
postdata={
'uname':' [email protected]',
'password':'suhangshispz '}
data=urllib.parse.urlencode(postdata).encode('utf-8')
req=urllib.request.Request(url,data)
response=urllib.request.urlopen(req)
html=response.read()
print(html)
import urllib
url='http://www.weibo.com/login'
user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0'
referer='http://s.weibo.com/'
postdata={
'uname':' [email protected]',
'password':'suhangshispz '}
headers={'User_Agent':user_agent,'Referer':referer}
data=urllib.parse.urlencode(postdata).encode('utf-8')
req=urllib.request.Request(url,data,headers)
response=urllib.request.urlopen(req)
html=response.read()
print(html)
Cookie处理
import urllib
from http import cookiejar
cookie=cookiejar.CookieJar()
opener=urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookie))
response=opener.open('http://www.zhihu.com')
for item in cookie:
print(item.name+':'+item.value)
import urllib
opener=urllib.request.build_opener()
opener.addheaders.append(('Cookie','email='+'[email protected]'))
req=urllib.request.Request('http://www.zhihu.com/')
response=opener.open(req)
print(response.headers)
retdata=response.read()
Timeout设置
import urllib
import socket
socket.setdefaulttimeout(10)
urllib.request.socket.setdefaulttimeout(10)
import urllib
request=urllib.request.Request(
response=urllib.request.urlopen(request,timeout=2)
html=response.read()
print(html)
HTTP响应码
import urllib
try:
response=urllib.request.urlopen('http://www.python.org/fish.html')
print(response)
except (urllib.request.HTTPError,urllib.request.URLError) as e:
if hasattr(e,'code'):
print('Error code:',e.code)
重定向
import urllib
response=urllib.request.urlopen('http://www.zhihu.com')
isRedirected=response.geturl()=='http://www.zhihu.cn'
import urllib
class RedirectHandler(urllib.request.HTTPRedirectHandler):
def http_error_301(self,req,fp,code,msg,headers):
pass
def http_error_302(self,req,fp,close,msg,header):
result.status=code
result.newurl=result.geturl()
return result
opener=urllib.request.build_opener(RedirectHandler)
opener.open('http://www.zhihu.cn')
Proxy设置
import urllib
proxy=urllib.request.ProxyHandler({'http':'121.42.167.160'})
opener=urllib.request.build_opener(proxy,)
response=opener.open('http://www.zhihu.com/')
print(response.read())
Requests模块
GET请求
import requests
r=requests.get('http://www.baidu.com')
print(r.content)
POST请求
import requests
postdata={
'uname':' [email protected]',
'password':'suhangshispz '}
r=requests.post('http://weibo.com/login',data=postdata)
print(r.content)
复杂请求
import requests
payload={'Keywords':'blog:qiyeboy','pageindex':1}
r=requests.get('http://zzk.cnblogs.com/s/blogpost',params=payload)
print(r.url)
响应
import requests
r=requests.get('http://www.baidu.com')
#print('content-->'+r.content)
print('text-->'+r.text)
print('encoding-->'+r.encoding)
r.encoding='utf-8'
print('new text-->'+r.text)
import requests
import chardet
r=requests.get('http://www.baidu.com')
print(chardet.detect(r.content))
r.encoding=chardet.detect(r.content)['encoding']
print(r.text)
import requests
r=requests.get('http://www.baidu.com',stream=True)
print(r.raw.read(10))
import requests
user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.2.17331'
headers={'User-Agent':user_agent}
r=requests.get('http://www.baidu.com',headers=headers)
print(r.content)
响应码&响应头
import requests
r=requests.get('http://www.baidu.com')
if r.status_code==requests.codes.ok:
print(r.status_code,'\n',r.headers,'\n',r.headers.get('content-type'),'\n',r.headers['content-type'])
else:
r.raise_for_status()
Cookie处理
import requests
user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.2.17331'
headers={'User-Agent':user_agent}
r=requests.get('http://www.baidu.com',headers=headers)
for cookie in r.cookies.keys():
print(cookie+':'+r.cookies.get(cookie))
import requests
user_agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.2.17331'
headers={'User-Agent':user_agent}
cookies=dict(name='qiye',age='10')
r=requests.get('http://www.baidu.com',headers=headers,cookies=cookies)
print(r.text)
import requests
loginUrl='http://www.weibo.com/login'
s=requests.Session()
r=s.get(loginUrl,allow_redirects=True)
datas={
'uname':' [email protected]',
'password':'suhangshispz '}
r=s.post(loginUrl,data=datas,allow_redirects=True)
print(r.text)
重定向&历史信息
import requests
r=requests.get('http://github.com')
print(r.url)
print(r.status_code)
print(r.history)
Timeout设置
import requests
r=requests.get('http://github.com',timeout=2)
print(r.content)
Proxy设置
import requests
proxies={
'http':'http://111.121.193.214',
'https':'http://121.201.33.100',
}
requests.get('http://example.org',proxies=proxies)