版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/warrah/article/details/83509019
指导小孩拼音真是一件非常折磨人心志的事情,儿子刚上一年级,先学拼音、认字,但是一年级的课本内容也就那么多。有时候我会让他背书,书是背会了,但是字要注音,或者写,就没法训练了。所以我想了个办法,最近让他背诸葛亮的《诫子书》,字不多,背完我写个py脚本直接生成田字格,打印出来,让他注音。
首先执行命令pip install docx
,其他代码应该写的比较清楚,就不说明了。
模板截图如下:
'''
拼写处理
'''
import docx
import re
import time
from docxtpl import DocxTemplate
# 文件输入目录
IN_FILE_PATH = "G:\\dzmfile\\pythonwork\\small_routine\\others\\in\\"
# 模板路径
TPL_FILE_PATH = "G:\\dzmfile\\pythonwork\\small_routine\\others\\tpl\\"
# 文件输出目录
OUT_FILE_PATH = "G:\\dzmfile\\pythonwork\\small_routine\\others\\out\\"
def load_doc(file_name):
'''
加载word文件
:param file_name: 文件名
:return:
'''
file = docx.Document(IN_FILE_PATH+file_name)
print("段落数:" + str(len(file.paragraphs)))
# 输出每一段的内容
paragraphs = []
for para in file.paragraphs:
paragraphs.append(para.text)
return paragraphs
def duplicate_removal(paragraphs=[]):
'''
去重
:param paragraphs:
:return:
'''
words = set()
for para in paragraphs:
val = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*()]+", "", para)
tmp = set(val)
words = words | tmp
print(words)
print("单词去重后长度为:"+str(len(words)))
return words
def to_doc(words,file_name):
'''
文字转word文档
:param words: 文字
:return:
'''
# 拼接输出文件名称
tmps = file_name.split('.')
name = tmps[0]
name = name + '_' + time.strftime('%Y%m%d%H%M%S',time.localtime()) + '.' + tmps[1]
print("文件名称为:" + name)
# 通过模板生成
tpl = DocxTemplate(TPL_FILE_PATH+"注音.docx")
contens = []
line = set()
for i in range(0,len(words)):
if len(line) == 8:
contens.append({'cols':line})
line = set()
line.add(words[i])
else:
line.add(words[i])
doc_contents = {'tbl_contents':contens}
tpl.render(doc_contents)
tpl.save(OUT_FILE_PATH+name)
#
#
# # 根据字生word文档
# doc = docx.Document()
# # 表格样式参考 https://blog.csdn.net/ibiao/article/details/78595295
# table = doc.add_table(1,8,style="Table Grid")
#
# cells = table.add_row().cells
# # 输出
# doc.save(OUT_FILE_PATH+name)
print("文件生成成功")
def gen_train_form(file_name):
'''
产生训练表格
:param file_name: 文件名
:return:
'''
paragraphs = load_doc(file_name=file_name)
words = duplicate_removal(paragraphs)
to_doc(words=list(words), file_name=file_name)
if __name__ == '__main__':
file_name = "戒子书.docx"
gen_train_form(file_name)
诸葛亮的诫子书内容并不多,去重后生成的内容截图如下: