概述
>>> import re
>>> import jieba.posseg as pseg
>>> f = open('E:/序言.txt','r').read()
>>> words = pseg.cut(f)
>>> l = []
>>> m = []
>>> for w in words:
... x = w.word
... y = w.flag
... l.append((x))
... m.append((y))
...
Building prefix dict from the default dictionary ...
Loading model from cache C:UsersoilAppDataLocalTempjieba.cache
Loading model cost 0.893 seconds.
Prefix dict has been built succesfully.
>>> print(l)
['美国版', '序言', 'n', 'n', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', 'n', 'n', 'u3000', 'u3000', ' 目前', '已经', '有', '不少', '部', '哲学史', '了', ',', '我', '的', '目的', '并', '不是', '要', '仅仅', '在', '它们', '之中', '再', '加上', '一部', '。', '我', '的', '目的', '是', '要', '揭示', ',', '哲学', '乃是', '社会', '生活', '与', '政治', '生活', '的', '一个', '组成部分', ':', '它', '并', '不是', '卓越', '的', '个人', '所', '做出', '的', '孤立', '的', '思考', ',', '而是', '曾经', '有', '各种', '体系', '盛行', '过', '的', '各种', '社会', '性格', '的', '产物', '与', '成因', '。', '这', '一', '目的', '就', '要求', '我们', '对于', '一般', '历史',
----------------------------------------------
>>> for line in lines:
... words = pseg.cut(line)
... print(words)
...
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
<generator object cut at 0x0000019655658048>
<generator object cut at 0x00000196556580A0>
>>>
--------------------------------------------
>>> import jieba.posseg as pseg
>>> import re
>>> l = []
>>> m = []
>>> f = open("E:/序言.txt",'r').read()
>>> c = "。"
>>> lines = f.split(c)
>>> s = open("E:/序言++.txt",'a+')
>>> for line in lines:
... words = pseg.cut(line)
... for w in words:
... x = w.word
... y = w.flag
... print(x,y,file = s)
...
Building prefix dict from the default dictionary ...
Dumping model to file cache C:UsersoilAppDataLocalTempjieba.cache
Loading model cost 1.096 seconds.
Prefix dict has been built succesfully.
>>> s.close()
>>>
难道要把txt分割嘛?,越来月麻烦了 = =,暂时没有解决 , 也九先放一下了 ,这样的词性标注就对我来说一点用都没有了= =暂时
最后
以上就是快乐含羞草为你收集整理的python【】词性标注横排的全部内容,希望文章能够帮你解决python【】词性标注横排所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复