import jieba
import jieba.analyse
import sys
import codecs
reload(sys)
sys.setdefaultencoding('utf-8')
#使用其他編碼讀取停用詞表
#stoplist = codecs.open('../../file/stopword.txt','r',encoding='utf8').readlines()
#stoplist = set(w.strip() for w in stoplist)
#停用詞文件是utf8編碼
stoplist = {}.fromkeys([ line.strip() for line in open("../../file/stopword.txt") ])
#經過分詞得到的應該是unicode編碼,先將其轉成utf8編碼