1.以爬取简书首页标题为例
# coding:utf-8 import requests from bs4 import BeautifulSoup # 简书首页title爬取 class SoupSpider: def __init__(self): self.session = requests.Session() def jian_shu_spider(self, url, headers): response = requests.get(url, headers=headers).text # 将获取到的内容转换成BeautifulSoup格式 soup = BeautifulSoup(response, "lxml") # 查找所有class="title"的语句 title_list = soup.find_all(class_= "title") for tit in title_list: title = tit.text print("文章标题:{}".format(title)) if __name__ == '__main__': soup_spider = SoupSpider() soup_spider.jian_shu_spider( "http://www.jianshu.com", { "Referer": "https://www.jianshu.com/", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36" } )
2.爬取结果