版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/my_xxh/article/details/81837686
需求: 提取文本类型文件中,符号|和─拼接而成表格中的数字。
#!/usr/bin/python2.7
# coding=utf-8
import re
import xlsxwriter
import sys
reload(sys)
sys.setdefaultencoding('utf8')
def Search_TextData():
#提取结果写入Excel保存
filename = u'./excel/Text_Data.xlsx'
workbook = xlsxwriter.Workbook(filename)
worksheet = workbook.add_worksheet()
fo = open(u'./需求/201801',"r")
f1=open('./201802.txt','a+') #过程记录
rows = 1
nrows = 0
for line in fo.readlines():
if len(line.strip()) > 0:
if line.strip()[0] == '|':
print rows
print "读取数据为:%s" %(line.strip())
f1.write(line.strip() + "\n" )
#分割字符串 提取数据
line = line.strip() #去掉每行头尾空白
str = re.split( r'\|' , line)
if line[0] == '|' and len(str[1].strip()) != 0:
print rows,str[7].strip().replace(',',''), str[9].strip().replace(',','')
# 过滤 . ,
if (re.sub("[\.,]",'', str[7].strip()).isdigit()) == True:
worksheet.write(nrows, 0, float(str[7].strip().replace(',','')))
else:
worksheet.write(nrows, 0, str[7].strip().replace(',',''))
worksheet.write(nrows, 1, str[9].strip().replace(',',''))
nrows = nrows + 1
rows = rows + 1
fo.close()
f1.close()
workbook.close()
if __name__=="__main__":
Search_TextData()
[1] python正则表达式split()方法详解
[2] python 过滤中文、英文标点特殊符号