python_合并列名不完全相同的Excel文件
意思就是两个文件有相同的列名,但是列数不同
from openpyxl import *
import xlrd
import xlwt
import pandas as pd
import csv
import os
# 合并表
from utils.read_write import eachFile, writeOneCSV, writeCSV, readCsv
# 子表
# wb2=load_workbook(fileName)
# #获取sheet
# table=wb2.get_sheet_by_name('Sheet1')
# #获取行列数
# rows=table.max_row
# cols=table.max_column
# #获取第一行单元格的值
# rowdata=[]
# for i in range(1,cols+1):
# cellvalue=table.cell(row=rows,column=i).value
# rowdata.append(cellvalue)
class excel():
def __init__(self, file):
self.file = file
self.wb = load_workbook(self.file)
sheets = self.wb.get_sheet_names()
self.sheet = sheets[0]
self.ws = self.wb[self.sheet]
# 获取表格的总行数和总列数
def getRowsClosNum(self):
rows = self.ws.max_row
columns = self.ws.max_column
return rows, columns
# 获取某个单元格的值
def getCellValue(self, row, column):
cellvalue = self.ws.cell(row=row, column=column).value
return cellvalue
# 获取某列的所有值
def getColValues(self, column):
rows = self.ws.max_row
columndata = []
for i in range(1, rows + 1):
cellvalue = self.ws.cell(row=i, column=column).value
columndata.append(cellvalue)
return columndata
# 获取某行所有值
def getRowValues(self, row):
columns = self.ws.max_column
rowdata = []
for i in range(1, columns + 1):
cellvalue = self.ws.cell(row=row, column=i).value
rowdata.append(cellvalue)
return rowdata
# 写入值
def writeCellValue(self, readCellvalue):
sheetActive = self.wb.active
rows = sheetActive.max_row + 1
column = sheetActive.max_column + 1
# for data_cell in range(1,rows):
# # sheetActive.cell(rows=rows,column=data_cell,value=readCellvalue)
# sheetActive.cell(row=rows,column=data_cell,value=readCellvalue)
# self.wb.save(fileName)
sheetActive.cell(row=rows, column=column, value=readCellvalue)
self.wb.save(fileName)
# def writeCellValue2(self,readCellvalue,cloumn_k):
# sheetActive=self.wb.active
# rows = sheetActive.max_row+1
# column=sheetActive.max_column+1
# # for data_cell in range(1,rows):
# # # sheetActive.cell(rows=rows,column=data_cell,value=readCellvalue)
# # sheetActive.cell(row=rows,column=data_cell,value=readCellvalue)
# # self.wb.save(fileName)
# sheetActive.cell(row=1,column=cloumn_k+1,value=readCellvalue)
# self.wb.save(fileName)
# return cloumn_k
class subexcel():
def __init__(self, subfile):
self.subfile = subfile
self.book = xlrd.open_workbook(self.subfile)
sheets = self.book.sheet_by_index(0)
self.sheets = sheets
# 获取总行数和总列数
def getRowsClosNum(self):
rows = self.sheets.nrows
colums = self.sheets.ncols
return rows
# 获取单元格的值
def getCellValue(self, row, column):
cellvalue = self.sheets.cell_value(rowx=row, colx=column)
return cellvalue
# 获取某行的值
def getRowValues(self, row):
row_value = self.sheets.row_values(row)
return row_value
# 获取某列的值
def getColValues(self, column):
col_value = self.sheets.col_values(column, 2)
return col_value
def mergeCsv(src,writeFile):
files = eachFile(src)
for file in files:
subfileName = src + file
excelOne = readCsv(subfileName)
rows = len(excelOne)
# 遍历89列
listTwo =[]
for n in range(1,rows):
listTwo.append(excelOne[n])
writeCSV(listTwo,writeFile)
def merge(src,writeFile):
files = eachFile(src)
for file in files:
subfileName = src + file
excelOne = subexcel(subfileName)
sub_title = excelOne.getRowValues(2)
rows = excelOne.getRowsClosNum()
# 遍历89列
listTwo =[]
combine = []
# !!!关键在此,记录在1文件中和2文件相同的列序数
for i in range(0, len(sub_title)):
for k in range(0, len(combine_title)):
if combine_title[k] == sub_title[i]:
combine.append(k)
for n in range(3,rows):
list = []
for m in range(0,61):
if m in combine:
col_value = excelOne.getCellValue(n,combine.index(m))
if isinstance(col_value,str):
col_value = col_value.replace('\n','').replace('\ue5e5','').replace('\ue2e8','').replace('\ue134','').replace('\ue128','').replace('\ue3b8','').replace('\ue370','')
list.append(col_value)
else:
list.append(col_value)
else:
list.append(0)
listTwo.append(list)
writeCSV(listTwo,writeFile)
def mergeSame(src,writeFile):
files = eachFile(src)
for file in files:
subfileName = src + file
excelOne = subexcel(subfileName)
# sub_title = excelOne.getRowValues(2)
rows = excelOne.getRowsClosNum()
# 遍历89列
listTwo =[]
for n in range(3,rows):
list = []
for m in range(0,61):
col_value = excelOne.getCellValue(n,m)
if isinstance(col_value,str):
col_value = col_value.replace('\n','').replace('\ue5e5','').replace('\ue2e8','').replace('\ue134','').replace('\ue128','').replace('\ue3b8','').replace('\ue370','')
list.append(col_value)
else:
list.append(col_value)
listTwo.append(list)
writeCSV(listTwo,writeFile)
def getAllFile():
src = 'C:\\Users\2019年\\all\\'
mergeSame(src,'数据.csv')
def writeWrong():
fileDir = 'D'
data = readCsv(fileDir)
writeCSV(data,'luanma.csv')
if __name__ == "__main__":
# writeWrong()
fileName = r'D:\projec.xlsx'
# 89列
list1 = excel(fileName).getRowValues(2)
combine_title = list(filter(None, list1))
writeOneCSV(combine_title, '人口数据.csv')
# writeOneCSV(combine_title, 'wrong.csv')
getAllFile()