Python爬京东
话不多说直接上代码
import requests
import re
from openpyxl import workbook
from openpyxl import load_workbook
total_name_list = []
total_sku_list = []
total_price_list = []
keyword = str(input('请输入关键词:'))
for page in range(1, 11):
url = 'https://search.jd.com/Search'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'}
params = {
'keyword': keyword,
'enc': 'utf-8',
'qrst': '1',
'rt': '1',
'stop': '1',
'vt': '2',
'wq': '',
'page': str(page),
's': str((page-1)*30+1),
'click' : '0'
}
resp = requests.get(url, headers = headers, params = params, timeout = 20)
resp.encoding = resp.apparent_encoding
resp.raise_for_status()
# print(len(resp.text))
html = resp.text
pattern_name= re.compile(r'<em>\s?(.*?)<font class="skcolor_ljg">')
pattern_sku = re.compile(r'<li.data-sku.+"(\d+)".+gl-item">')
pattern_price = re.compile(r'<em>.</em><i>(\d+).00</i></strong>')
name_list = re.findall(pattern_name, html)
total_name_list.extend(name_list)
sku_list = re.findall(pattern_sku, html)
total_sku_list.extend(sku_list)
price_list = re.findall(pattern_price, html)
total_price_list.extend(price_list)
wb = workbook.Workbook()
ws = wb.active
ws.append(['商品名称', 'SKU', '价格'])
for i in range(len(total_name_list)):
ws.cell(row = i + 2, column = 1).value = total_name_list[i]
ws.cell(row = i + 2, column = 2).value = total_sku_list[i]
ws.cell(row = i + 2, column = 3).value = total_price_list[i]
wb.save('text.xlsx')
print('写入完成!')