import requests
from lxml import etree
import xlwt
def getData():
datalist=[] # 总的数据列表
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
# 获取页面源码
url = 'https://weixin.qq.com/'
page_text = requests.get(url=url, headers=headers).text
# 数据解析
tree = etree.HTML(page_text)
li_list = tree.xpath('/html/body/div/div/div[2]/div[3]/ul/li') # 将li对象列表赋值给li_list变量
# 循环遍历列表对象
for li in li_list:
data = [] #每行数据的列表
title = li.xpath('./a/text()')[0] # /text():获取的是标签中直系的文本内容
data.append(title)
date = li.xpath('./span/text()')[0]
data.append(date)
datalist.append(data) #将每行列表添加到总列表
return datalist
def saveData(datalist, savepath):
print('save....')
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('sht1', cell_overwrite_ok=True)
col = ('标题', '日期')
for i in range(0, len(col)): #元组是不可变的,len取长度
sheet.write(0, i, col[i]) #列名
for i in range(0, len(datalist)): #使用len(列表)获得长度
data = datalist[i]
for j in range(0, len(data)):
sheet.write(i+1, j, data[j])
book.save(savepath)
print('save ok....')
if __name__ == "__main__":
savepath='aaa2.xls'
datalist = getData()
saveData(datalist, savepath)
保存数据的excel的python代码重构
猜你喜欢
转载自blog.csdn.net/zbguolei/article/details/110305267
今日推荐
周排行