版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Kangyucheng/article/details/80948028
3. 在“3_人民日报语料”中统计“日语借词”的词频;
pyhton方法
# -*- coding: utf-8 -*-
import json
japanese_words_file = open('japanese_words.txt') # japanese_words.txt是日语借用词
japanese_words = [] # list 用于存储日语借用词
for i in japanese_words_file:
japanese_words.append(i.replace('\r\n', ''))
japanese_words_file.close()
data_file = open('3.txt') # 3.txt 是人民日报语料
result = {} # 存储词频统计结果
for i in data_file:
word_lists = i.split()
for each_word in word_lists:
word = each_word.split('/')[0]
if word in japanese_words:
if word in result:
result[word] += 1
else:
result[word] = 1
data_file.close()
print json.dumps(result, encoding="UTF-8", ensure_ascii=False) # 输出结果