python操作word实现小学拼音本功能

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/warrah/article/details/83509019

指导小孩拼音真是一件非常折磨人心志的事情,儿子刚上一年级,先学拼音、认字,但是一年级的课本内容也就那么多。有时候我会让他背书,书是背会了,但是字要注音,或者写,就没法训练了。所以我想了个办法,最近让他背诸葛亮的《诫子书》,字不多,背完我写个py脚本直接生成田字格,打印出来,让他注音。
首先执行命令pip install docx,其他代码应该写的比较清楚,就不说明了。
模板截图如下:
1

'''
拼写处理
'''
import docx
import re
import time
from docxtpl import DocxTemplate
# 文件输入目录
IN_FILE_PATH = "G:\\dzmfile\\pythonwork\\small_routine\\others\\in\\"
# 模板路径
TPL_FILE_PATH = "G:\\dzmfile\\pythonwork\\small_routine\\others\\tpl\\"
# 文件输出目录
OUT_FILE_PATH = "G:\\dzmfile\\pythonwork\\small_routine\\others\\out\\"

def load_doc(file_name):
    '''
    加载word文件
    :param file_name: 文件名
    :return: 
    '''
    file = docx.Document(IN_FILE_PATH+file_name)
    print("段落数:" + str(len(file.paragraphs)))
    # 输出每一段的内容
    paragraphs = []
    for para in file.paragraphs:
        paragraphs.append(para.text)
    return paragraphs

def duplicate_removal(paragraphs=[]):
    '''
    去重
    :param paragraphs: 
    :return: 
    '''
    words = set()
    for para in paragraphs:
        val = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*()]+", "", para)
        tmp = set(val)
        words = words | tmp
    print(words)
    print("单词去重后长度为:"+str(len(words)))
    return words

def to_doc(words,file_name):
    '''
    文字转word文档
    :param words: 文字
    :return: 
    '''
    # 拼接输出文件名称
    tmps = file_name.split('.')
    name = tmps[0]
    name = name + '_' + time.strftime('%Y%m%d%H%M%S',time.localtime()) + '.' + tmps[1]
    print("文件名称为:" + name)
    # 通过模板生成
    tpl = DocxTemplate(TPL_FILE_PATH+"注音.docx")
    contens = []
    line = set()
    for i in range(0,len(words)):
        if len(line) == 8:
            contens.append({'cols':line})
            line = set()
            line.add(words[i])
        else:
            line.add(words[i])
    doc_contents = {'tbl_contents':contens}
    tpl.render(doc_contents)
    tpl.save(OUT_FILE_PATH+name)
    #
    #
    # # 根据字生word文档
    # doc = docx.Document()
    # # 表格样式参考 https://blog.csdn.net/ibiao/article/details/78595295
    # table = doc.add_table(1,8,style="Table Grid")
    #
    #     cells = table.add_row().cells
    # # 输出
    # doc.save(OUT_FILE_PATH+name)
    print("文件生成成功")

def gen_train_form(file_name):
    '''
    产生训练表格
    :param file_name: 文件名
    :return: 
    '''
    paragraphs = load_doc(file_name=file_name)
    words = duplicate_removal(paragraphs)
    to_doc(words=list(words), file_name=file_name)

if __name__ == '__main__':
    file_name = "戒子书.docx"
    gen_train_form(file_name)

诸葛亮的诫子书内容并不多,去重后生成的内容截图如下:
2

猜你喜欢

转载自blog.csdn.net/warrah/article/details/83509019