from selenium import webdriver
import time
import csv
class Jingdong:
def __init__(self):
self.url = 'https://www.jd.com/'
self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36'}
def getPage(self):
opt = webdriver.ChromeOptions()
opt.set_headless()
self.driver = webdriver.Chrome(options=opt)
self.driver.get(self.url)
key = input('请输入商品:')
self.driver.find_element_by_class_name('text').send_keys(key)
# 点击搜索按钮
self.driver.find_element_by_class_name('button').click()
def parsePage(self):
# 执行JS脚本,进度条拉到最下面
self.driver.execute_script(
'window.scrollTo(0,document.body.scrollHeight)'
)
# 给页面加载留出时间
time.sleep(3)
# 基准xpath,每个商品的节点对象列表
rList = self.driver.find_elements_by_xpath('//div[@id="J_goodsList"]/ul/li')
for r in rList:
info = r.text.split('\n')
# ¥52.80
# Python编程从入门到实践python3.0绝技核心编程基础教程网络爬虫入门书籍
# 500 + 条评价
# 润知天下图书专营店
price = info[0]
if info[1] != '拍拍':
name = info[1]
commit = info[2]
market = info[3]
else:
name = info[2]
commit = info[3]
market = info[4]
L = [price, commit, market, name]
self.saveCsv(L)
if self.driver.page_source.find('pn-next disabled') == -1:
self.driver.find_element_by_class_name('pn-next').click()
time.sleep(2)
def saveCsv(self,L):
with open('京东.csv','a',newline='',encoding='gb18030') as f:
writer = csv.writer(f)
writer.writerow(L)
def workOn(self):
n = 1
self.getPage()
for i in range(3):
self.parsePage()
print('第%s页爬取成功'%n)
n += 1
if __name__ == '__main__':
print('start')
spider = Jingdong()
spider.workOn()
python京东商品爬取
猜你喜欢
转载自blog.csdn.net/lichong2259/article/details/87940091
今日推荐
周排行