-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordcloud_fromtxt.py
76 lines (58 loc) · 2.46 KB
/
wordcloud_fromtxt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
try:
from wordcloud import WordCloud
from datetime import datetime
import csv
import json
import random
import neologdn
import re
from os import path
import MeCab
import matplotlib
matplotlib.use('Agg')
finally:
import matplotlib.pyplot as plt
def color_func(word, font_size, position, orientation, random_state, font_path, **kwargs):
colors = ["#1E88E5", "#FDD835", "#f44336", "#66BB6A"] # 画像内で使われる文字の色
return random.choice(colors)
def analyzeTweet(dfile):
fname = r"'" + dfile + "'"
fname = fname.replace("'", "")
sname = datetime.now().strftime("%Y%m%d%H%M%S") # 保存するときのファイル名
mecab = MeCab.Tagger()
words = []
with open(fname, 'r', encoding="utf-8") as f:
reader = f.readline()
while reader:
normalized_text = neologdn.normalize(reader)
text_without_url = re.sub(
r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', '', normalized_text)
node = mecab.parseToNode(text_without_url)
while node:
word_type = node.feature.split(",")[0]
if word_type in ["動詞", "形容詞", "形容動詞"]:
# 動詞、形容詞、形容動詞は原型を使っています
words.append(node.feature.split(",")[7])
elif word_type in ["名詞", "副詞"]:
words.append(node.surface)
node = node.next
reader = f.readline()
json_file = open('config.json', 'r')
json_obj = json.load(json_file)
font_path = json_obj['Font_path']
txt = " ".join(words)
stop_words = [] # 画像内に含めないようにする単語
wordcloud = WordCloud(color_func=color_func,
font_path=font_path,
width=1920, # 出力画像の横幅
height=1080, # 出力画像の縦幅
min_font_size=6, # 最小のフォントサイズ
stopwords=set(stop_words),
collocations=False,
background_color="white").generate(txt) # 背景色
sname = sname + ".png" # 保存形式変えたい場合は拡張子を変更
wordcloud.to_file(path.join(path.dirname(__file__), sname))
if __name__ == '__main__':
print('====== Enter Tweet Data file =====')
dfile = input('> ')
analyzeTweet(dfile)