爬虫 requests.post

版权声明:欢迎转载,注明出处 https://blog.csdn.net/jklcl/article/details/82528014

爬虫 requests.post

可以模拟网页向服务器发送消息,获取想要的内容

1.无返回值

打开并登陆豆瓣
这里写图片描述

这里写代码片

这里写图片描述

模拟豆瓣登陆

import requests

postUrl = 'https://www.douban.com/accounts/login'
id = '******' #账户
passwd = '*****' #密码
headers = {
    'Referer':'https://www.douban.com/',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
postData ={
    'source':'index_nav',
'form_email':id,
'form_password':passwd,
'captcha-solution':'sponge',
'captcha-id':'T65SuHhM8GeYaQb8QFGsmI2H:ens'
}
responseRes = requests.post(postUrl, data=postData, headers=headers)
if (responseRes.status_code == 200):
    print("模拟登陆成功")

2.返回html

爬取的某大学的本学期的成绩

# code=utf-8
import requests
from bs4 import BeautifulSoup
import csv

userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
header = {
    "Referer": "http://210.44.176.116/cjcx/zcjcx_login.html",
    'User-Agent': userAgent,
}


def To_csv(id, html):
    soup = BeautifulSoup(html, features="html.parser")
    stu = soup.table
    stu_table = stu.table
    stu_label = stu_table.find_all("th")
    stu_info = stu_table.find_all("td")
    print("学生基本信息:")
    for i in range(len(stu_label)):
        if (stu_info[i].text != " "):
            print(stu_label[i].text + ":" + stu_info[i].text)

    score_table = stu.find_all("table")[1]
    label_list = []
    for label in score_table.find_all("th"):
        label_list.append(label.text)

    score_list = []
    score_tr = score_table.find_all("tr")
    for row in range(1, len(score_tr)):
        course = score_tr[row]
        dist = {}
        i = 0;
        for score in course.find_all("td"):
            dist[label_list[i]] = score.text
            i += 1
        score_list.append(dist)
    print("开始写入csv")
    with open(id + '(本学期).csv', 'w', encoding='utf-8-sig') as csvfile:
        fieldnames = label_list
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for list in score_list:
            writer.writerow(list)
    print("写入成功")


def Login(account):
    print("开始获取" + account + "的成绩")

    postUrl = "http://210.44.176.116/cjcx/dqcjcx_list.php"

    postData = {
        "post_xuehao": account,
        "Submit": "提交"
    }
    responseRes = requests.post(postUrl, data=postData, headers=header)
    if (responseRes.status_code == 200):
        print("成绩爬取成功")
    return responseRes.text


if __name__ == "__main__":
    id = "******" #学号
    text = Login(id)
    To_csv(id, text)

3.返回josn

爬取某旅游网站的列表信息

import requests
import json

# post取内容
post_url = 'http://www.mafengwo.cn/mdd/base/list/pagedata_citylist'

form = {
    'mddid': '13061',
    'page': 1
}
# 模拟Post请求form
response_json = requests.post(post_url, data=form).text
text = json.loads(response_json)

li_text = text['list']
print(li_text)

猜你喜欢

转载自blog.csdn.net/jklcl/article/details/82528014