當前位置:成語大全網 - 新華字典 - Python編程實現csv文件某壹列的詞頻統計

Python編程實現csv文件某壹列的詞頻統計

import re

import collections

import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

#為避免出問題,文件名使用全路徑

data = pd.read_csv('XXX.csv')

trainheadlines = []

for row in range(0, len(data.index)):

trainheadlines.append(' '.join(str(x) for x in data.iloc[row, m:n]))

#上面的m:n代表取那壹列,或者那幾列。

advancedvectorizer = TfidfVectorizer(

min_df=0, max_df=1, max_features=20000, ngram_range=(1, 1))

advancedtrain = advancedvectorizer.fit_transform(trainheadlines)

print(advancedtrain.shape)