Python编写一个简单的简单的爬虫-下载保存在本地

# -*- coding: utf-8 -*-
from urllib import request
from bs4 import BeautifulSoup
import urllib
import re
import requests

def book():
     head = {}

     head['User-Agent'] = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
     num = 0
     while num<290:
          print(num)
          url = 'https://www.douban.com/tag/%E5%B0%8F%E8%AF%B4/book?start='+str(num)
          res = request.Request(url , headers=head)
          html = request.urlopen(res)
          data = BeautifulSoup(html, 'html.parser')
          book_names = data.find(attrs={'class':'mod book-list'})
          names = book_names.find_all('a', attrs={'class':'title'})
          pre_names = book_names.find_all('div', attrs={'class':'desc'})
          nums = book_names.find_all('span', attrs={'class': 'rating_nums'})
          for (a, b, c) in zip(names, pre_names,nums):
              aa = '\n'+'书名:{}'.format(str(a.string))+'\n'+'作者信息:{}'.format(str.strip(b.string))+'\n'+'评分:{}'.format(str(c.string))
              print(aa)
              fo = open('txt.txt', 'ab+')
              fo.write((aa + '\r\n\r\n').encode('UTF-8'))
              fo.close()
          num+=15


if __name__ == '__main__':
    book()

猜你喜欢

转载自blog.csdn.net/longfei_2010/article/details/79745248