import requests
import re
from lxml import html
from bs4 import BeautifulSoup
payload = {
"os_username": "xxxxx",
"os_password": "xxxxxxx",
"login": "Log in",
"os_destination": "",
}
# 登陆session
session_requests = requests.session()
login_url = "https://wiki.*********"
result = session_requests.post(
login_url,
data=payload,
headers=dict(referer=login_url)
)
# 爬取页面
url = 'https://wiki*********/pages/viewinfo.action?pageId=35343810'
result = session_requests.get(url)
soup = BeautifulSoup(result.text, 'lxml')
for title in soup.find_all("a", text=re.compile("-")):
print(title)
Python 爬虫网页
猜你喜欢
转载自blog.csdn.net/jonwu0102/article/details/81239415
今日推荐
周排行