19.4 get和post请求
get请求
from urllib import request
from urllib import parse
url = "https://www.baidu.com/s?"
wd = input("请输入你要搜索的关键字:")
params = {
"wd":wd
}
ps = parse.urlencode(params)
print(ps)
from urllib import request
from urllib import parse
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
}
url = "https://www.baidu.com/s?"
wd = input("请输入你要搜索的关键字:")
params = {
"wd":wd
}
#获取编码格式
ps = parse.urlencode(params)
print(ps)
url = url + ps
print(url)
# rsp = request.urlopen(url)
req = request.Request(url=url,headers=headers) resp = request.urlopen(req)
data = resp.read()
print(data)
with open("get.html","wb") as f:
f.write(data)
post请求
import urllib.request
import urllib
url = "https://fanyi.youdao.com/translate?"
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
}
word = input("请输入你要翻译的单词:")
from_data ={
"i":word,
"from":"AUTO",
"to":"AUTO",
"smartresult":"dict",
"client":"fanyideskweb",
"doctype":"json",
"version":"2.1",
"keyfrom":"fanyi.web", "action":"FY_BY_REALTlME",
}
data = urllib.parse.urlencode(from_data)
data = data.encode(encoding = "utf-8")
request=urllib.request.Request(url,data=data,headers=headers)
response = urllib.request.urlopen(request)
html = response.read().decode(encoding="utf-8").strip()
print(html)
19.5 requests库
requests 库底层封装urllib
安装:
pip install requests
import requests
import chardet
url = "http://www.sina.com.cn"
#响应头
response = requests.get(url=url)
#获取响应头的编码
print(response.encoding)
#新浪的编码 print(chardet.detect(response.content))
ISO-8859-1 -------- ladin1编码
import requests
import chardet
url = "http://www.sina.com.cn"
#响应头
response = requests.get(url=url)
#获取响应头的编码
# print(response.encoding)
#新浪的编码
# print(chardet.detect(response.content))
# print(response.text)
charset=chardet.detect(response.content).get("encoding")
print(charset)
response.encoding = charset
print(response.text)
with open("sina.html","w",encoding=charset) as f:
f.write(response.text)
需求:爬取新浪页面的图片
import re
import requests
import chardet
url = "http://www.sina.com.cn"
#响应头
response = requests.get(url=url)
#获取响应头的编码
# print(response.encoding)
#新浪的编码
# print(chardet.detect(response.content))
# print(response.text)
charset=chardet.detect(response.content).get("encoding")
print(charset)
response.encoding = charset
# print(response.text)
html = response.text images = re.findall(r"src=\"(.*?jpg|png|gif|jepg)\"",html) print(images) print(len(images))
# with open("sina.html","w",encoding="utf-8") as f:
# f.write(response.text)
#enumerate
for index,item, in enumerate (images):
print("开始从{}下载图片".format(item))
real_url = "http:"+item
print("开始从{}下载图片".format(real_url))
resp = requests.get(real_url)
with open("image/"+str(index)+".jpg","wb") as f:
#因为是字节数据,所以使用resp.content
f.write(resp.content)
import re
import requests
import chardet
url = "http://www.sina.com.cn"
#响应头
response = requests.get(url=url)
#获取响应头的编码
# print(response.encoding)
#新浪的编码
# print(chardet.detect(response.content))
# print(response.text)
charset = chardet.detect(response.content).get("encoding")
print(charset)
response.encoding = charset
# print(response.text)
html = response.text
images = re.findall(r"src=\"(.*?jpg|png|gif|jepg)\"",html)
print(images)
print(len(images))
# with open("sina.html","w",encoding="utf-8") as f:
# f.write(response.text)
#enumerate
for index,item, in enumerate (images):
print("开始从{}下载图片".format(item))
if not item.startswith("http"):
real_url = "http:"+item
print("开始从{}下载图片".format(real_url))
resp = requests.get(real_url)
with open("image/"+str(index)+".jpg","wb") as f:
#因为是字节数据,所以使用resp.content
f.write(resp.content)
手动伪造请求头
fake-useragent
pip install fake-useragent
python -m pip install fake-useragent
import fake_useragent
import re
import requests from fake_useragent
import UserAgent
# us = UserAgent()
# print(us)
# print(us.ie)
# print(us.chrome)
# print(us.random)
headers = {
"User-Agent":UserAgent().random }kw = input("请输入你要搜索的文字:")
parms = {
"wd":kw
}
url = "http://www.baidu.com/s?"
response = requests.get(url=url,params=parms,headers=headers)
print(response.text)
post请求:
var t = n.md5(navigator.appVersion), r = "" + (new Date).getTime(), i = r + parseInt(10 * Math.random(), 10);
return {
ts: r, bv: t, salt: i, sign: n.md5("fanyideskweb" + e + i + "Tbh5E8=q6U3EXe+&L[4c@")}
{
ts: r, bv: t, salt: i, sign: n.md5("fanyideskweb" + e + i + "Tbh5E8=q6U3EXe+&L[4c@")}
i = r + parseInt(10 * Math.random()
salt == i
r = "" + (new Date).getTime()
time.time()
lvs = ts =r =time.time()
salt = i =time.time()+random.randint(0,10)=lvs+random.randint(0,10)
sign: n.md5("fanyideskweb" + e + i + "Tbh5E8=q6U3EXe+&L[4c@")
sign=hashlib.md5(“fanyideskweb”+word+salt+“Tbh5E8=q6U3EXe+&L[4c@”)
var t = n.md5(navigator.appVersion)
bv = t =hashlib.md5(“5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/90.0.4430.85 Safari/537.36”
import time
import hashlib
import random
import requests from fake_useragent
import UserAgent
url = "https://fanyi.youdao.com/translate?"
headers = {
"User-Agent":UserAgent().random
}
word = input("请输入你要翻译的单词:")
lvs = time.time()*1000
salt = lvs + random.randint(1,10)
sign = hashlib.md5(("fanyideskweb"+ word + str(salt) + "Tbh5E8=q6U3EXe+&L[4c@").encode("utf-8")).hexdigest()
bv = hashlib.md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36".encode("utf-8")).hexdigest()
from_data ={
"i":word,
"from":"AUTO",
"to":"AUTO",
"smartresult":"dict",
"client":"fanyideskweb",
"salt":salt, "sign":sign,
"lts":lvs,
"bv":bv,
"doctype":"json",
"version":"2.1",
"keyfrom":"fanyi.web",
"action":"FY_BY_REALTlME",
}
response = requests.post(url=url,data=from_data,headers=headers)
print(response.text)