策划需求,要求统计所有xlsx文件中的数据。并将其统一处理归纳到一个文件中,方便阅读。
这时候Python相对C#来说,优势体现出来了。当然C#还是很强的,只是针对某些功能,选择合适的工具。
思路:读取所有的xlsx文件,将其数据处理为统一的数据结构。再利用数据集合的特性将其合并再导出json文件。
以下为Python读取xlsx文件代码,谨防忘记找不到,特作记录!
import json
import base64
import xlrd
class ReadExcel:
def __init__(self, file_path):
try:
self.book = xlrd.open_workbook(file_path)
except:
print('No File %s' % file_path)
self.sheet_names = self.book.sheet_names()
self.sheet_num = self.book.nsheets
self.sheet = self.book.sheet_by_index(0)
self.row_num = self.sheet.nrows
self.col_num = self.sheet.ncols
def read_sheet_name(self, sheet_name):
try:
self.sheet = self.book.sheet_by_name(sheet_name)
except:
print("No Sheet %s" % sheet_name)
# 获取行数列数
self.row_num = self.sheet.nrows
self.col_num = self.sheet.ncols
def read_sheet_index(self, sheet_index):
try:
self.sheet = self.book.sheet_by_index(sheet_index)
except:
print("No Sheet Index %s" % sheet_index)
# 获取行数列数
self.row_num = self.sheet.nrows
self.col_num = self.sheet.ncols
def get_sheet_names(self):
return self.sheet_names
# 读取单元格内容
def get_cell_value(self, row, col):
return self.sheet.cell_value(row, col)
# 读取某行数据
def get_row_data(self, row):
return self.sheet.row_values(row)
# 读取某列数据
def get_col_data(self, col):
return self.sheet.col_values(col)
# 读取所有行数据
def get_sheet_rows_data(self):
data = []
for i in range(0, self.row_num):
row_value_list = self.sheet.row_values(i)
data.append(row_value_list)
return data
# 读取所有列数据
def get_sheet_cols_data(self):
data = []
for i in range(0, self.col_num):
col_value_list = self.sheet.col_values(i)
data.append(col_value_list)
return data
# 读取所有sheet行数据
def get_file_rows_data(self):
data = []
for name in self.sheet_names:
self.read_sheet_name(name)
sheet_data = self.get_sheet_rows_data()
sheet_data.append(['SheetName', name])
data.append(sheet_data)
return data
# 读取所有sheet列数据
def get_file_cols_data(self):
data = []
for i in range(0, self.sheet_num):
self.read_sheet_index(i)
data.append(self.get_sheet_cols_data())
return data
# 读取指定列的行数据
def get_choose_rows_data(self, start_index, end_index):
data = []
for i in range(1, self.row_num):
row_value_list = self.sheet.row_values(i, start_index, end_index)
data.append(row_value_list)
return data
# 读取指定列关键字的行数据
def get_keys_rows_data(self, keys):
data = []
keys_value = self.sheet.row_values(0)
key_indexes = []
for key_index in range(0, len(keys)):
for index in range(0, len(keys_value)):
if keys_value[index] == keys[key_index]:
key_indexes.append(index)
for j in range(1, self.row_num):
row_value_list = []
for i in range(0, len(key_indexes)):
row_value_list.append(self.sheet.cell_value(j, key_indexes[i]))
data.append(row_value_list)
return data
if __name__ == '__main__':
xlsx = ReadExcel(R"D:\WorkFiles\项目工程\新疆援疆\第九批照片\第九批名单.xlsx")
templi = xlsx.get_sheet_rows_data()
templi = templi[2:-1]
li = []
for i in templi:
if i[1] != "":
lichild = []
for index in range(0, len(i)):
lichild.append(base64.b64encode(str(i[index]).encode()))
li.append(i)
# print('LiChild= ',lichild)
print(li)
# filename = 'D:\WorkFiles\项目工程\新疆援疆\第九批照片\test.json' # 写入数据的文件名
# 写入JSON文件
with open('D:\Program Files (x86)\DeskTop\Demo\HotfixDemo\wecatdata.json', 'w', encoding='utf-8') as f_obj:
json.dump(li, f_obj, ensure_ascii=False)