# -*- coding: utf-8 -*- from urllib import request from bs4 import BeautifulSoup import urllib import re import requests def book(): head = {} head['User-Agent'] = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' num = 0 while num<290: print(num) url = 'https://www.douban.com/tag/%E5%B0%8F%E8%AF%B4/book?start='+str(num) res = request.Request(url , headers=head) html = request.urlopen(res) data = BeautifulSoup(html, 'html.parser') book_names = data.find(attrs={'class':'mod book-list'}) names = book_names.find_all('a', attrs={'class':'title'}) pre_names = book_names.find_all('div', attrs={'class':'desc'}) nums = book_names.find_all('span', attrs={'class': 'rating_nums'}) for (a, b, c) in zip(names, pre_names,nums): aa = '\n'+'书名:{}'.format(str(a.string))+'\n'+'作者信息:{}'.format(str.strip(b.string))+'\n'+'评分:{}'.format(str(c.string)) print(aa) fo = open('txt.txt', 'ab+') fo.write((aa + '\r\n\r\n').encode('UTF-8')) fo.close() num+=15 if __name__ == '__main__': book()
Python编写一个简单的简单的爬虫-下载保存在本地
猜你喜欢
转载自blog.csdn.net/longfei_2010/article/details/79745248
今日推荐
周排行