基本知识
- 分词(
jieba.cut
) - 关键字提取(
jieba.analyse.extract_tags(sentence, topK=20, withWeight=False, allowPOS=())
) - 情感分析
可视化
词云
- Tableau
- pyechart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15def plot_word_cloud(data, swords):
text = ''.join(data['content'])
words = list(jieba.cut(text))
ex_sw_words = []
for word in words:
if len(word)>1 and (word not in swords):
ex_sw_words.append(word)
c = Counter()
c = Counter(ex_sw_words)
wc_data = pd.DataFrame({'word':list(c.keys()), 'counts':list(c.values())}).sort_values(by='counts', ascending=False).head(100)
wordcloud = WordCloud()
wordcloud.add("", wc_data.values.tolist(), word_size_range=[20, 100])
return wordcloud
plot_word_cloud(data=data[data['score']>6], swords=swords).render_notebook()