#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import sys
import jieba
import json
from collections import Counter
reload(sys)
sys.setdefaultencoding("utf-8")
filename = "rowss.txt"
f1 = open("row2.txt", "w+")
with open(filename) as f:
mytext = f.read()
mytext = mytext.decode("utf-8")
mytext = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、|~@#¥%……&*()]+".decode("utf-8"), "".decode("utf8"), mytext)
mytext = " ".join(jieba.cut(mytext))
f1.write(mytext)
word_lst = []
word_dict = {}
with open("row2.txt") as f2, open("row4.txt", "w") as f3:
for word in f2:
word_lst.append(word.split(' '))
for item in word_lst:
for item2 in item:
if item2 not in word_dict:
word_dict[item2] = 1
else:
word_dict[item2] += 1
sorted(word_dict.items(), key=lambda x: x[1], reverse=True)
for key in word_dict:
print key, word_dict[key]
f3.write(key + ' ' + str(word_dict[key]) + '\n')
sort = sorted(word_dict.items(), key=lambda e: e[1], reverse=True) # sort为list
print json.dumps(sort[:5], encoding="UTF-8", ensure_ascii=False)
版权声明:本文为zhangmary原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。