概述
import os
with open('DouJuan', encoding='utf-8') as file_obj:
contents = file_obj.read()
print(contents)
string = contents.lower()
string = string.replace("n"," ")
# 删除字符串内除了字母与空格的字符 例如@#$%^&*()_+:<>.|}""?这些特殊符号
newstring = ""
for i in string:
if((i >= 'a' and i <='z') or i == ' '):
newstring += i
# print(newstring)
words = newstring.split(sep=' ')
#虚词数组,用于存放虚词这种没有统计意义的词,再用于过滤掉这些词
FunctionWord = [""," ","the","a","an","and","of","to","in","at","but","as","with","or"]
# print(words)
statisticsWords = ["start"]
for i in words:
if(not(i in FunctionWord)):
statisticsWords.append(i)
print(statisticsWords)
print(type(words))
# 统计单词个数并存到字典中
wordsDict = {}
for i in statisticsWords:
if(not(i in wordsDict.keys())):
wordsDict[i] = 1
else:
wordsDict[i] += 1
print(wordsDict)
# z=zip(wordsDict.values(),wordsDict.keys())
# sorted(z)
wordsDict = sorted(wordsDict.items(),key=lambda x:x[1],reverse=True)
statisticsDescirbe = ""
for i in wordsDict:
statisticsDescirbe += str(i[0]) + " : " + str(i[1]) + 'n'
print(statisticsDescirbe)
with open("/root/test1/XXXXXXXXXXXX","w") as file_object:
file_object.write(statisticsDescirbe)
print("done")
最后
以上就是现实香烟为你收集整理的python 统计小说词数(完整,无bug,优化版)的全部内容,希望文章能够帮你解决python 统计小说词数(完整,无bug,优化版)所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复