import requests
from bs4 import BeautifulSoup
def get_html(url):
headers = {
'User-Agent': 'Mozilla / 5.0(Linux;Android6.0;Nexus5Build / MRA58N) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 70.0.3538.25MobileSafari / 537.36'
} # 模拟浏览器访问
response = requests.get(url, headers=headers) # 请求访问网站
html = response.text # 获取网页源码
return html # 返回网页源码
print("瓯海区新闻网")
soup = BeautifulSoup(get_html('http://www.ohnews.cn/news/oh/index.shtml'), 'lxml') # 初始化BeautifulSoup库,并设置解析器
for li in soup.find_all('div', class_='list_e'): # 遍历父节点
for li2 in soup.find_all('li'):
print(li2)
python爬虫获取温州瓯海区新闻网的指定内容
猜你喜欢
转载自blog.csdn.net/fdsgfd43432/article/details/107740559
今日推荐
周排行