-
Notifications
You must be signed in to change notification settings - Fork 21
Expand file tree
/
Copy pathcreate_word_cloud.py
More file actions
69 lines (55 loc) · 2.26 KB
/
create_word_cloud.py
File metadata and controls
69 lines (55 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import jieba
import codecs
import sys
import pandas
import numpy as np
from wordcloud import WordCloud
import imageio
from wordcloud import WordCloud, ImageColorGenerator
from os import listdir
from os.path import isfile, join
stopwords_filename = 'data/stopwords.txt'
font_filename = 'fonts/STFangSong.ttf'
template_dir = 'data/templates/'
def main(input_filename):
content = '\n'.join([line.strip()
for line in codecs.open(input_filename, 'r', 'utf-8')
if len(line.strip()) > 0])
stopwords = set([line.strip()
for line in codecs.open(stopwords_filename, 'r', 'utf-8')])
segs = jieba.cut(content)
words = []
for seg in segs:
word = seg.strip().lower()
if len(word) > 1 and word not in stopwords:
words.append(word)
words_df = pandas.DataFrame({'word': words})
words_stat = words_df.groupby(by=['word'])['word'].agg(np.size)
words_stat = words_stat.to_frame()
words_stat.columns = ['number']
words_stat = words_stat.reset_index().sort_values(by="number", ascending=False)
print('# of different words =', len(words_stat))
input_prefix = input_filename
if input_filename.find('.') != -1:
input_prefix = '.'.join(input_filename.split('.')[:-1])
for file in listdir(template_dir):
if file[-4:] != '.png' and file[-4:] != '.jpg':
continue
background_picture_filename = join(template_dir, file)
if isfile(background_picture_filename):
prefix = file.split('.')[0]
bimg = imageio.imread(background_picture_filename)
wordcloud = WordCloud(font_path=font_filename, background_color='white',
mask=bimg, max_font_size=600, random_state=100)
wordcloud = wordcloud.fit_words(
dict(words_stat.head(100).itertuples(index=False)))
bimgColors = ImageColorGenerator(bimg)
wordcloud.recolor(color_func=bimgColors)
output_filename = prefix + '_' + input_prefix + '.png'
print('Saving', output_filename)
wordcloud.to_file(output_filename)
if __name__ == '__main__':
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print('[usage] <input>')