版权声明:2018/4/10重启blog;转载请注明出处 https://blog.csdn.net/zhaiqiming2010/article/details/86512662
import requests
from lxml import html
def spider(sn):
'''get book data of Dangdang'''
url = 'http://search.dangdang.com/?key={sn}&act=input'.format(sn=sn)
# 请求
html_data = requests.get(url).text
# 解析HTML
selector = html.fromstring(html_data)
# 获取图书列表
ul_list = selector.xpath('//div[@id="search_nature_rg"]/ul/li')
#提取出书籍的部分信息
book_list = []
for li in ul_list:
#bookName
bookName = li.xpath('a/@title')[0]
#link
link = li.xpath('a/@href')[0]
#price
price = li.xpath('p[3]/span[1]/text()')[0].replace('¥', '')
#business
business = li.xpath('p[@class="search_shangjia"]/a/text()') if len(li.xpath('p[@class="search_shangjia"]/a/text()')) > 0 else '当当自营'
book_list.append({'name':bookName,'link':link,'price':price,'business':business,})
book_list = sorted(book_list, key=lambda item:float(item['price']))
for item in book_list:
print(item['price'])
if __name__ == '__main__':
# sn = input('PELEASE INPUT BOOK NUMBER')
sn = "9787115428028" #书籍的编号
spider(sn)