解析excel文件python源码模板(加密与未加密两类)
业务说明: 此脚本主要负责自动化解析excel文件,将其写入数据库,用户只需要根据自己的业务需求重写define_json函数,并且将\”’后续添加\”’补全即可,主要讲解思路,代码自行补全,对于单元格合并未做处理,解决思路只需在合并单元格列坐标基础上加一,添加一层异常捕获即可。
excel样式:
'''
解析excel文件模板
'''
import xlrd
import os
import shutil
import re
import win32com.client as win32
class Excel_Model(object):
path = 'XXX'
move_path = 'XXX'
def load_files(self):
'''加载excel文件,获取其目标路径'''
files = os.listdir(self.path) # 返回子目录下所有文件名集合
for file in files:
if file[-3:] == 'xls':
yield self.path + file
else:
pass
def define_json(self):
'''自定义excel文件模板样式'''
target_row1 = '股票代码' # 目标行对应第一列值
# 目标行字段(不同的excel模板样式)
row_data_type1 = ['股票代码', '简称', '市值']
row_data_type2 = ['代码', '股票简称', '市 值']
'''后续可继续添加模板样式'''
return target_row1, row_data_type1, row_data_type2
def find_data(self, file_path):
# 初始化模板数据
target_row1, row_data_type1, row_data_type2 = self.define_json()
# 区分文件是否加密,分类处理
try:
# 正常未加密文件
workbook = xlrd.open_workbook(file_path)
try:
sheet1 = workbook.sheet_by_name('Sheet1')
except:
sheet1 = workbook.sheet_by_name('Sheet2')
# 第一列数据(通过正则过滤)
col_ = sheet1.col_values(0)
# 目标行字段(目标数据所在行)
row_ = sheet1.row_values(col_.index(target_row1[0]))
# 目标数据所在列坐标值(所有)
try:
code = row_.index(row_data_type1[0])
except Exception as e:
code = row_.index(row_data_type2[0])
try:
name = row_.index(row_data_type1[1])
except Exception as e:
name = row_.index(row_data_type2[1])
'''后续添加'''
for col_value in col_:
# 循环第一列数据,正则过滤目标数据
patt = re.compile(r'^XXX$') # 自定义过滤条件
if patt.findall(str(col_value)):
# 股票行索引
stock_index = col_.index(col_value)
# 行列交叉匹配获取目标数据
# 股票代码
try:
target_code = sheet1.cell(stock_index, code).value
except Exception as e:
print(e)
pass
# 简称
try:
target_name = sheet1.cell(stock_index, name).value
except Exception as e:
print(e)
pass
'''后续添加'''
print('股票代码:%s, 股票简称:%s' % (target_code, target_name))
self.save_DB() # 保存数据库
except:
# 加密文档
xlApp = win32.gencache.EnsureDispatch('Excel.Application')
xlApp.Visible = True
xlApp.DisplayAlerts = 0
xlApp.Visible = 2
xlopen = xlApp.Workbooks.Open(file_path, False, True, None, Password='XXX',WriteResPassword='XXX') # XXX为密码
# 获取总行数
rsheet = xlopen.Worksheets(1)
row_len = rsheet.Range('A65536').End(win32.constants.xlUp).Row
# 数据筛选
for rlen in range(1, row_len):
# 获取每一行第一个值进行过滤
rValue = xlopen.Sheets(1).Cells(rlen, 1).Value
# 循环第一列数据,正则过滤目标数据
patt = re.compile(r'^XXX$') # 自定义过滤条件
if patt.findall(str(rValue)):
# 行列交叉匹配获取目标数据
# 股票代码
try:
target_code = xlopen.Sheets(1).Cells(rlen, 1).Value
except Exception as e:
print(e)
pass
# 简称
try:
target_name = xlopen.Sheets(1).Cells(rlen, 2).Value
except Exception as e:
print(e)
pass
'''后续添加'''
print('股票代码:%s, 股票简称:%s' % (target_code, target_name))
self.save_DB() # 保存数据库
# 退出文件窗口
xlopen.Close(False)
xlApp.Application.Quit()
def save_DB(self):
'''数据存储'''
pass
def main(self):
'''处理完成后将文件移除到move_path'''
files = self.load_files()
for file in files:
self.find_data(file)
try:
shutil.move(file, self.move_path) # 文件移除
except:
os.remove(self.move_path + file.replace(self.path, '')) # 删除已有文件
shutil.move(file, self.move_path)
if __name__ == '__main__':
Excel_Model = Excel_Model()
Excel_Model.main()