版权声明:欢迎转载,注明出处 https://blog.csdn.net/jklcl/article/details/82528014
爬虫 requests.post
可以模拟网页向服务器发送消息,获取想要的内容
1.无返回值
打开并登陆豆瓣
模拟豆瓣登陆
import requests
postUrl = 'https://www.douban.com/accounts/login'
id = '******' #账户
passwd = '*****' #密码
headers = {
'Referer':'https://www.douban.com/',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
postData ={
'source':'index_nav',
'form_email':id,
'form_password':passwd,
'captcha-solution':'sponge',
'captcha-id':'T65SuHhM8GeYaQb8QFGsmI2H:ens'
}
responseRes = requests.post(postUrl, data=postData, headers=headers)
if (responseRes.status_code == 200):
print("模拟登陆成功")
2.返回html
爬取的某大学的本学期的成绩
# code=utf-8
import requests
from bs4 import BeautifulSoup
import csv
userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
header = {
"Referer": "http://210.44.176.116/cjcx/zcjcx_login.html",
'User-Agent': userAgent,
}
def To_csv(id, html):
soup = BeautifulSoup(html, features="html.parser")
stu = soup.table
stu_table = stu.table
stu_label = stu_table.find_all("th")
stu_info = stu_table.find_all("td")
print("学生基本信息:")
for i in range(len(stu_label)):
if (stu_info[i].text != " "):
print(stu_label[i].text + ":" + stu_info[i].text)
score_table = stu.find_all("table")[1]
label_list = []
for label in score_table.find_all("th"):
label_list.append(label.text)
score_list = []
score_tr = score_table.find_all("tr")
for row in range(1, len(score_tr)):
course = score_tr[row]
dist = {}
i = 0;
for score in course.find_all("td"):
dist[label_list[i]] = score.text
i += 1
score_list.append(dist)
print("开始写入csv")
with open(id + '(本学期).csv', 'w', encoding='utf-8-sig') as csvfile:
fieldnames = label_list
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for list in score_list:
writer.writerow(list)
print("写入成功")
def Login(account):
print("开始获取" + account + "的成绩")
postUrl = "http://210.44.176.116/cjcx/dqcjcx_list.php"
postData = {
"post_xuehao": account,
"Submit": "提交"
}
responseRes = requests.post(postUrl, data=postData, headers=header)
if (responseRes.status_code == 200):
print("成绩爬取成功")
return responseRes.text
if __name__ == "__main__":
id = "******" #学号
text = Login(id)
To_csv(id, text)
3.返回josn
爬取某旅游网站的列表信息
import requests
import json
# post取内容
post_url = 'http://www.mafengwo.cn/mdd/base/list/pagedata_citylist'
form = {
'mddid': '13061',
'page': 1
}
# 模拟Post请求form
response_json = requests.post(post_url, data=form).text
text = json.loads(response_json)
li_text = text['list']
print(li_text)