01 | # -*- coding: utf-8 -*- |
02 | import jieba.posseg as pseg |
08 | f_content = open (r 'D:/a/test.txt' , 'r' ) |
10 | for count, line in enumerate (f_content): |
14 | '''读取一行,把一行里面的第一个名词替换成关键词,再逐行写入另一个文件''' |
17 | for j in range (lineNum + 1 ): #循环的次数等于行数,即一行执行一次如下代码 |
18 | line_content = linecache.getline(r 'D:/a/test.txt' , i) #获取第i行内容 |
19 | string = line_content.decode( 'gbk' ) |
22 | words = pseg.cut(string) |
24 | amount = 1 #引入amount,这样就可以仅仅替换一行中出现的第一个名词,后面的不替换 |
26 | if w.flag = = 'n' and amount = = 1 : #n 是词语的属性 |
28 | w.word = 'haha' #haha为要替换的关键词 |
29 | result + = str (w.word) #这个有点妙,喜欢 |
30 | f = open ( 'd:/a/new.txt' , 'a' ) |
下面这篇是网络上看到的,原创是谁不知道,不是我就是了。对了解jieba如何运用有帮助
03 | import jieba.posseg as pseg |
06 | f = open ( "t_with_splitter.txt" , "r" ) #读取文本 |
07 | string = f.read().decode( "utf-8" ) |
09 | words = pseg.cut(string) #进行分词 |
12 | result + = str (w.word) + "/" + str (w.flag) #加词性标注 |
14 | f = open ( "t_with_POS_tag.txt" , "w" ) #将结果保存到另一个文档中 |
18 | print ( "分词及词性标注完成,耗时:" + str (t2 - t1) + "秒。" ) #反馈结果 |