# ecoding=utf-8
import os
import time
def mkSubFile(lines, head, srcName, sub):
[des_filename, extname] = os.path.splitext(srcName)
filename = des_filename + '_' + str(sub) + extname
print('make file: %s' % filename)
fout = open(filename, 'w')
try:
fout.writelines([head])
fout.writelines(lines)
return sub + 1
finally:
fout.close()
def splitByLineCount(filename, count):
fin = open(filename, encoding="utf-8")
try:
head = fin.readline()
buf = []
sub = 1
for line in fin:
buf.append(line)
if len(buf) == count:
sub = mkSubFile(buf, head, filename, sub)
buf = []
if len(buf) != 0:
sub = mkSubFile(buf, head, filename, sub)
finally:
fin.close()
if __name__ == '__main__':
begin = time.time()
splitByLineCount('lidar40_list.json.csv', 600)
end = time.time()
print('time is %d seconds ' % (end - begin))
正则匹配字符
# -*- coding: utf-8 -*-
import xlrd
import re
import json
data = xlrd.open_workbook("result.xlsx")
table = data.sheets()[0]
cn_pattern = re.compile("[^a-zA-Z]*")
en_pattern = re.compile("^[a-zA-Z]*\s{0,}[a-zA-Z]*")
result = {}
nrows = table.nrows
ncols = table.ncols
for i in range(nrows):
for j in range(0, ncols-1):
cn = table.cell(i,j).value
en = table.cell(i,j+1).value
if re.findall(cn_pattern,cn)[0]!="" and re.findall(en_pattern,en)[0]!="":
result[cn] = en
print(result)
with open("result.json", "w", encoding="utf-8") as f:
json.dump(result, f, indent=4, ensure_ascii=False)