From 5ed36788ca29aaf7015ebf5c5a0ada6f27328ad3 Mon Sep 17 00:00:00 2001 From: Osamu Takiya Date: Fri, 17 Jun 2022 22:22:50 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=8E=B8=202021=E5=B9=B4=E3=81=AE?= =?UTF-8?q?=E3=82=A2=E3=83=97=E3=83=AA=E3=81=8B=E3=82=89DB=E3=81=A8?= =?UTF-8?q?=E3=83=A2=E3=83=87=E3=83=AB=E3=82=92=E7=A7=BB=E6=A4=8D=20(#43)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/models/analyze_syntax.rb | 358 ++++++++++++++++++ .../analyze_syntax_response/sentence.rb | 15 + app/models/analyze_syntax_response/token.rb | 22 ++ app/models/asset.rb | 5 + app/models/character.rb | 7 + app/models/character_nickname.rb | 4 + app/models/character_product.rb | 4 + app/models/direct_message.rb | 48 +++ app/models/hashtag.rb | 7 + app/models/in_tweet_url.rb | 3 + app/models/mention.rb | 7 + app/models/nickname.rb | 4 + app/models/product.rb | 20 + app/models/tweet.rb | 146 +++++++ app/models/user.rb | 41 ++ db/migrate/20210418071020_create_users.rb | 17 + db/migrate/20210530044047_create_tweets.rb | 31 ++ db/migrate/20210530053416_create_hashtags.rb | 13 + .../20210530053438_create_in_tweet_urls.rb | 13 + db/migrate/20210530054154_create_assets.rb | 14 + .../20210530054844_create_direct_messages.rb | 18 + db/migrate/20210530080905_create_versions.rb | 36 ++ db/migrate/20210530085016_create_mentions.rb | 13 + ...name_column_from_sent_at_to_messaged_at.rb | 5 + ...8_add_response_column_to_direct_message.rb | 5 + ...210607090631_add_born_at_column_to_user.rb | 5 + .../20210607214306_create_analyze_syntaxes.rb | 15 + .../20210608083648_create_characters.rb | 15 + db/migrate/20210608083656_create_products.rb | 16 + db/migrate/20210608083702_create_nicknames.rb | 11 + ...0210608083713_create_character_products.rb | 10 + ...210608083720_create_character_nicknames.rb | 10 + ...e_from_direct_message_to_analize_syntax.rb | 5 + db/schema.rb | 169 +++++++++ 34 files changed, 1112 insertions(+) create mode 100644 app/models/analyze_syntax.rb create mode 100644 app/models/analyze_syntax_response/sentence.rb create mode 100644 app/models/analyze_syntax_response/token.rb create mode 100644 app/models/asset.rb create mode 100644 app/models/character.rb create mode 100644 app/models/character_nickname.rb create mode 100644 app/models/character_product.rb create mode 100644 app/models/direct_message.rb create mode 100644 app/models/hashtag.rb create mode 100644 app/models/in_tweet_url.rb create mode 100644 app/models/mention.rb create mode 100644 app/models/nickname.rb create mode 100644 app/models/product.rb create mode 100644 app/models/tweet.rb create mode 100644 app/models/user.rb create mode 100644 db/migrate/20210418071020_create_users.rb create mode 100644 db/migrate/20210530044047_create_tweets.rb create mode 100644 db/migrate/20210530053416_create_hashtags.rb create mode 100644 db/migrate/20210530053438_create_in_tweet_urls.rb create mode 100644 db/migrate/20210530054154_create_assets.rb create mode 100644 db/migrate/20210530054844_create_direct_messages.rb create mode 100644 db/migrate/20210530080905_create_versions.rb create mode 100644 db/migrate/20210530085016_create_mentions.rb create mode 100644 db/migrate/20210607054640_rename_column_from_sent_at_to_messaged_at.rb create mode 100644 db/migrate/20210607054908_add_response_column_to_direct_message.rb create mode 100644 db/migrate/20210607090631_add_born_at_column_to_user.rb create mode 100644 db/migrate/20210607214306_create_analyze_syntaxes.rb create mode 100644 db/migrate/20210608083648_create_characters.rb create mode 100644 db/migrate/20210608083656_create_products.rb create mode 100644 db/migrate/20210608083702_create_nicknames.rb create mode 100644 db/migrate/20210608083713_create_character_products.rb create mode 100644 db/migrate/20210608083720_create_character_nicknames.rb create mode 100644 db/migrate/20210611051011_add_reference_from_direct_message_to_analize_syntax.rb create mode 100644 db/schema.rb diff --git a/app/models/analyze_syntax.rb b/app/models/analyze_syntax.rb new file mode 100644 index 00000000..88731e14 --- /dev/null +++ b/app/models/analyze_syntax.rb @@ -0,0 +1,358 @@ +require 'nkf' + +class AnalyzeSyntax < ApplicationRecord + belongs_to :tweet, optional: true + belongs_to :direct_message, optional: true + + def convert_analyze_syntax_response_sentence_objects + hashed_sentences.map do |hashed_sentence| + hashed_sentence.merge!(analyze_syntax_id: id) + + # インスタンスが持つ属性が hashed_sentence の keys となる + AnalyzeSyntaxResponse::Sentence.new(hashed_sentence) + end + end + + def convert_analyze_syntax_response_token_objects + hashed_tokens.map do |hashed_token| + hashed_token.merge!(analyze_syntax_id: id) + + # インスタンスが持つ属性が hashed_token の keys となる + AnalyzeSyntaxResponse::Token.new(hashed_token) + end + end + + def hashed_tokens + tokens.map { |token| JSON.parse(token) } + end + + def hashed_sentences + sentences.map { |sentence| JSON.parse(sentence) } + end + + # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity + # 完全一致でキャラ名一覧と比較するので、最大限に広く word を持つようにしている + def check_words + # TODO: 検知できていない単語例 + # 「主人公」(単体で現れると前後関係や他の単語と併せて検討する必要がある) + ( + words_with_noun_and_punct_and_noun_tags + + words_with_noun_and_punct_and_noun_tags.map { |word| remove_all_three_point_readers_from_word(word) } + + words_with_noun_and_punct_and_noun_tags.map { |word| convert_hankaku_katakana_to_zenkaku_katakana(word) } + + words_with_noun_and_punct_and_noun_tags.map { |word| convert_zenkaku_numbers_to_hankaku_numbers(word) } + + words_with_noun_and_punct_and_noun_tags.map { |word| remove_beginning_unnecesary_strings(word) } + + words_with_basic_filters + + words_with_basic_filters.map { |word| remove_all_three_point_readers_from_word(word) } + + words_with_basic_filters.map { |word| convert_hankaku_katakana_to_zenkaku_katakana(word) } + + words_with_basic_filters.map { |word| convert_zenkaku_numbers_to_hankaku_numbers(word) } + + words_with_basic_filters.map { |word| remove_beginning_unnecesary_strings(word) } + + words_with_noun_and_affix_tags + + words_with_noun_and_affix_tags.map { |word| remove_all_three_point_readers_from_word(word) } + + words_with_noun_and_affix_tags.map { |word| convert_hankaku_katakana_to_zenkaku_katakana(word) } + + words_with_noun_and_affix_tags.map { |word| convert_zenkaku_numbers_to_hankaku_numbers(word) } + + words_with_noun_and_affix_tags.map { |word| remove_beginning_unnecesary_strings(word) } + + words_with_num_and_affix_tags + + words_with_num_and_affix_tags.map { |word| remove_all_three_point_readers_from_word(word) } + + words_with_num_and_affix_tags.map { |word| convert_hankaku_katakana_to_zenkaku_katakana(word) } + + words_with_num_and_affix_tags.map { |word| convert_zenkaku_numbers_to_hankaku_numbers(word) } + + words_with_num_and_affix_tags.map { |word| remove_beginning_unnecesary_strings(word) } + + words_with_noun_and_noun_tags + + words_with_noun_and_noun_tags.map { |word| remove_all_three_point_readers_from_word(word) } + + words_with_noun_and_noun_tags.map { |word| convert_hankaku_katakana_to_zenkaku_katakana(word) } + + words_with_noun_and_noun_tags.map { |word| convert_zenkaku_numbers_to_hankaku_numbers(word) } + + words_with_noun_and_noun_tags.map { |word| remove_beginning_unnecesary_strings(word) } + + words_with_noun_and_x_tags + + words_with_noun_and_x_tags.map { |word| remove_all_three_point_readers_from_word(word) } + + words_with_noun_and_x_tags.map { |word| convert_hankaku_katakana_to_zenkaku_katakana(word) } + + words_with_noun_and_x_tags.map { |word| convert_zenkaku_numbers_to_hankaku_numbers(word) } + + words_with_noun_and_x_tags.map { |word| remove_beginning_unnecesary_strings(word) } + + words_with_affix_and_affix_tags + + words_with_affix_and_affix_tags.map { |word| remove_all_three_point_readers_from_word(word) } + + words_with_affix_and_affix_tags.map { |word| convert_hankaku_katakana_to_zenkaku_katakana(word) } + + words_with_affix_and_affix_tags.map { |word| convert_zenkaku_numbers_to_hankaku_numbers(word) } + + words_with_affix_and_affix_tags.map { |word| remove_beginning_unnecesary_strings(word) } + ).uniq.reject(&:empty?) + end + # rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity + + private + + ################################################################### + # NOUN - PUNCT - NOUN という並びのタグの部分を抽出する + # 「ヤム・クー」などを抽出する + ################################################################### + def words_with_noun_and_punct_and_noun_tags + words_with_noun_and_punct_and_noun_tags = [] + target_start_index_numbers = token_start_index_numbers_with_noun_and_punct_and_noun_tags + + target_start_index_numbers.each do |index_number| + word = hashed_tokens[index_number]['lemma'] + hashed_tokens[index_number + 1]['lemma'] + hashed_tokens[index_number + 2]['lemma'] + + words_with_noun_and_punct_and_noun_tags << word + end + + words_with_noun_and_punct_and_noun_tags + end + + def token_start_index_numbers_with_noun_and_punct_and_noun_tags + target_tags = ['NOUN', 'PUNCT', 'NOUN'].freeze + tokens = convert_analyze_syntax_response_token_objects + tags_array = tokens.map(&:tag) + token_start_index_numbers = [] + + # 3つの要素の配列の判別をするために、配列の大きさから 2 を引いた index まで調べる + (tags_array.count - 2).times.each do |i| + target_array_in_tokens = [ + tags_array[i], + tags_array[i + 1], + tags_array[i + 2], + ] + + token_start_index_numbers << i if target_array_in_tokens == target_tags + end + + token_start_index_numbers + end + + ################################################################### + # NOUN - X という並びのタグの部分を抽出する + # 「テンガアール」などを抽出する + ################################################################### + def words_with_noun_and_x_tags + words_with_noun_and_x_tags = [] + target_start_index_numbers = token_start_index_numbers_with_noun_and_x_tags + + target_start_index_numbers.each do |index_number| + word = hashed_tokens[index_number]['lemma'] + hashed_tokens[index_number + 1]['lemma'] + + words_with_noun_and_x_tags << word + end + + words_with_noun_and_x_tags + end + + def token_start_index_numbers_with_noun_and_x_tags + target_tags = ['NOUN', 'X'].freeze + tokens = convert_analyze_syntax_response_token_objects + tags_array = tokens.map(&:tag) + token_start_index_numbers = [] + + # 2つの要素の配列の判別をするために、配列の大きさから 1 を引いた index まで調べる + (tags_array.count - 1).times.each do |i| + target_array_in_tokens = [ + tags_array[i], + tags_array[i + 1], + ] + + token_start_index_numbers << i if target_array_in_tokens == target_tags + end + + token_start_index_numbers + end + + ################################################################### + # NOUN - NOUN という並びのタグの部分を抽出する + # 「ルカ様」などを抽出する + ################################################################### + def words_with_noun_and_noun_tags + words_with_noun_and_noun_tags = [] + target_start_index_numbers = token_start_index_numbers_with_noun_and_noun_tags + + target_start_index_numbers.each do |index_number| + word = hashed_tokens[index_number]['lemma'] + hashed_tokens[index_number + 1]['lemma'] + + words_with_noun_and_noun_tags << word + end + + words_with_noun_and_noun_tags + end + + def token_start_index_numbers_with_noun_and_noun_tags + target_tags = ['NOUN', 'NOUN'].freeze + tokens = convert_analyze_syntax_response_token_objects + tags_array = tokens.map(&:tag) + token_start_index_numbers = [] + + # 2つの要素の配列の判別をするために、配列の大きさから 1 を引いた index まで調べる + (tags_array.count - 1).times.each do |i| + target_array_in_tokens = [ + tags_array[i], + tags_array[i + 1], + ] + + token_start_index_numbers << i if target_array_in_tokens == target_tags + end + + token_start_index_numbers + end + + ################################################################### + # NOUN - AFFIX という並びのタグの部分を抽出する + # 「ルカ様」などを抽出する + ################################################################### + def words_with_noun_and_affix_tags + words_with_noun_and_affix_tags = [] + target_start_index_numbers = token_start_index_numbers_with_noun_and_affix_tags + + target_start_index_numbers.each do |index_number| + word = hashed_tokens[index_number]['lemma'] + hashed_tokens[index_number + 1]['lemma'] + + words_with_noun_and_affix_tags << word + end + + words_with_noun_and_affix_tags + end + + def token_start_index_numbers_with_noun_and_affix_tags + target_tags = ['NOUN', 'AFFIX'].freeze + tokens = convert_analyze_syntax_response_token_objects + tags_array = tokens.map(&:tag) + token_start_index_numbers = [] + + # 2つの要素の配列の判別をするために、配列の大きさから 1 を引いた index まで調べる + (tags_array.count - 1).times.each do |i| + target_array_in_tokens = [ + tags_array[i], + tags_array[i + 1], + ] + + token_start_index_numbers << i if target_array_in_tokens == target_tags + end + + token_start_index_numbers + end + + ################################################################### + # NUM - AFFIX という並びのタグの部分を抽出する + # 「4様」などを抽出する + ################################################################### + def words_with_num_and_affix_tags + words_with_num_and_affix_tags = [] + target_start_index_numbers = token_start_index_numbers_with_num_and_affix_tags + + target_start_index_numbers.each do |index_number| + word = hashed_tokens[index_number]['lemma'] + hashed_tokens[index_number + 1]['lemma'] + + words_with_num_and_affix_tags << word + end + + words_with_num_and_affix_tags + end + + def token_start_index_numbers_with_num_and_affix_tags + target_tags = ['NUM', 'AFFIX'].freeze + tokens = convert_analyze_syntax_response_token_objects + tags_array = tokens.map(&:tag) + token_start_index_numbers = [] + + # 2つの要素の配列の判別をするために、配列の大きさから 1 を引いた index まで調べる + (tags_array.count - 1).times.each do |i| + target_array_in_tokens = [ + tags_array[i], + tags_array[i + 1], + ] + + token_start_index_numbers << i if target_array_in_tokens == target_tags + end + + token_start_index_numbers + end + + ################################################################### + # AFFIX - AFFIX という並びのタグの部分を抽出する + # 特定の文脈における「坊ちゃん」などを抽出する + ################################################################### + def words_with_affix_and_affix_tags + words_with_affix_and_affix_tags = [] + target_start_index_numbers = token_start_index_numbers_with_affix_and_affix_tags + + target_start_index_numbers.each do |index_number| + word = hashed_tokens[index_number]['lemma'] + hashed_tokens[index_number + 1]['lemma'] + + words_with_affix_and_affix_tags << word + end + + words_with_affix_and_affix_tags + end + + def token_start_index_numbers_with_affix_and_affix_tags + target_tags = ['AFFIX', 'AFFIX'].freeze + tokens = convert_analyze_syntax_response_token_objects + tags_array = tokens.map(&:tag) + token_start_index_numbers = [] + + # 2つの要素の配列の判別をするために、配列の大きさから 1 を引いた index まで調べる + (tags_array.count - 1).times.each do |i| + target_array_in_tokens = [ + tags_array[i], + tags_array[i + 1], + ] + + token_start_index_numbers << i if target_array_in_tokens == target_tags + end + + token_start_index_numbers + end + + ################################################################### + # NOUN タグだけに絞ろうとしたが「ベルクート」が VERB だったので例外的に追加 + # 「シュウ」が AFFIX だったので追加 + ################################################################### + def words_with_basic_filters + filtered_tokens = convert_analyze_syntax_response_token_objects.select do |token| + token.tag == 'NOUN' || token.tag == 'VERB' || token.tag == 'AFFIX' + end + + filtered_tokens.map(&:lemma) + end + + ################################################################### + # 以下はヘルパ的メソッドなので、ここに書かなくてもいい + ################################################################### + + ################################################################### + # 得られた単語群から三点リーダを除外した単語群を作る + # 「…ルカ」などで一つの NOUN として認識されてしまうため + ################################################################### + def remove_all_three_point_readers_from_word(word) + word.gsub(/…/, '') + end + + ################################################################### + # 得られた単語群の半角カタカナを全角カタカナに変換する + # 文字列完全一致でキャラ名と照合するため + ################################################################### + def convert_hankaku_katakana_to_zenkaku_katakana(word) + NKF.nkf('-WwXm0', word) + end + + ################################################################### + # 全角英数字を半角英数字へ変換する + # 「2主」などを「2主」などへ統一する + ################################################################### + def convert_zenkaku_numbers_to_hankaku_numbers(word) + word.tr('0-9a-zA-Z', '0-9a-zA-Z') + end + + ################################################################### + # 抽出要素の先頭に不要文字が含まれている場合には削除する + # 「:リオン」などを「リオン」などへ統一する + # 「★ナナミ」などを「ナナミ」などへ統一する + ################################################################### + def remove_beginning_unnecesary_strings(word) + # "2:" に対する対応 (id_number: 1396459824892710913) + removed_beginning_unnecesary_strings = word.sub(/\A2/, '') + + # "★" に対する対応 (id_number: 1403442321144750081) + removed_beginning_unnecesary_strings = removed_beginning_unnecesary_strings.sub(/\A★/, '') + + removed_beginning_unnecesary_strings.sub(/\A:/, '') + end + + ################################################################### + # 配列の要素をカンマ区切り(ダブルクォート付)へ変換する + ################################################################### + def convert_array_to_comma_separated_with_double_quote(array) + array.map { |element| "\"#{element}\"" }.join(",") + end +end diff --git a/app/models/analyze_syntax_response/sentence.rb b/app/models/analyze_syntax_response/sentence.rb new file mode 100644 index 00000000..e39c7789 --- /dev/null +++ b/app/models/analyze_syntax_response/sentence.rb @@ -0,0 +1,15 @@ +module AnalyzeSyntaxResponse + class Sentence + include ActiveModel::Model + + attr_accessor :text, :analyze_syntax_id + + def begin_offset + text['beginOffset'] + end + + def content + text['content'] + end + end +end diff --git a/app/models/analyze_syntax_response/token.rb b/app/models/analyze_syntax_response/token.rb new file mode 100644 index 00000000..e361cff7 --- /dev/null +++ b/app/models/analyze_syntax_response/token.rb @@ -0,0 +1,22 @@ +module AnalyzeSyntaxResponse + class Token + include ActiveModel::Model + + # rubocop:disable Naming/MethodName, Layout/EmptyLinesAroundAttributeAccessor + attr_accessor :text, :partOfSpeech, :dependencyEdge, :lemma, :analyze_syntax_id + # rubocop:enable Naming/MethodName, Layout/EmptyLinesAroundAttributeAccessor + + def tag + # 戻り値は Google::Cloud::Language::V1::AnalyzeSyntaxResponse では Symbol だが、これは String である + part_of_speech['tag'] + end + + def part_of_speech + partOfSpeech + end + + def dependency_edge + dependencyEdge + end + end +end diff --git a/app/models/asset.rb b/app/models/asset.rb new file mode 100644 index 00000000..dce12594 --- /dev/null +++ b/app/models/asset.rb @@ -0,0 +1,5 @@ +class Asset < ApplicationRecord + has_paper_trail + + belongs_to :tweet +end diff --git a/app/models/character.rb b/app/models/character.rb new file mode 100644 index 00000000..2ae6543d --- /dev/null +++ b/app/models/character.rb @@ -0,0 +1,7 @@ +class Character < ApplicationRecord + has_many :character_products, dependent: :destroy + has_many :products, through: :character_products + + has_many :character_nicknames, dependent: :destroy + has_many :nicknames, through: :character_nicknames +end diff --git a/app/models/character_nickname.rb b/app/models/character_nickname.rb new file mode 100644 index 00000000..ce42c629 --- /dev/null +++ b/app/models/character_nickname.rb @@ -0,0 +1,4 @@ +class CharacterNickname < ApplicationRecord + belongs_to :character + belongs_to :nickname +end diff --git a/app/models/character_product.rb b/app/models/character_product.rb new file mode 100644 index 00000000..2e49b6d3 --- /dev/null +++ b/app/models/character_product.rb @@ -0,0 +1,4 @@ +class CharacterProduct < ApplicationRecord + belongs_to :character + belongs_to :product +end diff --git a/app/models/direct_message.rb b/app/models/direct_message.rb new file mode 100644 index 00000000..33e80677 --- /dev/null +++ b/app/models/direct_message.rb @@ -0,0 +1,48 @@ +class DirectMessage < ApplicationRecord + has_paper_trail + + serialize :api_response, JSON + belongs_to :user + has_one :analyze_syntax + + validates :id_number, uniqueness: true + + # self.user と同義 + def sender + User.find_by(id_number: sender_id_number) + end + + def recipient + User.find_by(id_number: recipient_id_number) + end + + def self.valid_term_votes + begin_datetime = Time.zone.parse('2021-06-11 21:00:00') + end_datetime = Time.zone.parse('2021-06-13 11:59:59') + + where(messaged_at: begin_datetime..end_datetime) + end + + def self.extend_valid_term_votes + begin_datetime = Time.zone.parse('2021-06-11 21:00:00') + end_datetime = Time.zone.parse('2021-06-13 12:59:59') + + where(messaged_at: begin_datetime..end_datetime) + end + + def self.only_beginning_valid_term_votes + begin_datetime = Time.zone.parse('2021-06-11 21:00:00') + + where(messaged_at: begin_datetime..) + end + + # gensosenkyo: 1471724029, + # sub_gensosenkyo: 1388758231825018881 + def self.from_gensosenkyo_main + where(sender_id_number: 1471724029) + end + + def self.to_gensosenkyo_main + where(recipient_id_number: 1471724029) + end +end diff --git a/app/models/hashtag.rb b/app/models/hashtag.rb new file mode 100644 index 00000000..781f7591 --- /dev/null +++ b/app/models/hashtag.rb @@ -0,0 +1,7 @@ +class Hashtag < ApplicationRecord + belongs_to :tweet + + def convert_to_search_word + "##{text}" + end +end diff --git a/app/models/in_tweet_url.rb b/app/models/in_tweet_url.rb new file mode 100644 index 00000000..14b7614d --- /dev/null +++ b/app/models/in_tweet_url.rb @@ -0,0 +1,3 @@ +class InTweetUrl < ApplicationRecord + belongs_to :tweet +end diff --git a/app/models/mention.rb b/app/models/mention.rb new file mode 100644 index 00000000..42ff7083 --- /dev/null +++ b/app/models/mention.rb @@ -0,0 +1,7 @@ +class Mention < ApplicationRecord + belongs_to :tweet + + def user + User.find_by(id_number: user_id_number) + end +end diff --git a/app/models/nickname.rb b/app/models/nickname.rb new file mode 100644 index 00000000..0c2aeb49 --- /dev/null +++ b/app/models/nickname.rb @@ -0,0 +1,4 @@ +class Nickname < ApplicationRecord + has_many :character_nicknames, dependent: :destroy + has_many :characters, through: :character_nicknames +end diff --git a/app/models/product.rb b/app/models/product.rb new file mode 100644 index 00000000..76b13799 --- /dev/null +++ b/app/models/product.rb @@ -0,0 +1,20 @@ +class Product < ApplicationRecord + has_many :character_products, dependent: :destroy + has_many :characters, through: :character_products + + def shorten_name(lang: 'ja') + if lang == 'en' + return { + 'Suikoden' => 'S1', + 'Suikoden II' => 'S2', + 'SuikodenII' => 'S2', + 'Suikoden 2' => 'S2', + 'Suikoden2' => 'S2' + }.fetch(name_en, name_en) + end + + { + '幻想水滸伝' => '幻水I' + }.fetch(name, name) + end +end diff --git a/app/models/tweet.rb b/app/models/tweet.rb new file mode 100644 index 00000000..dab1f9ae --- /dev/null +++ b/app/models/tweet.rb @@ -0,0 +1,146 @@ +class Tweet < ApplicationRecord + has_paper_trail + + has_one :analyze_syntax + belongs_to :user + has_many :assets + has_many :hashtags + has_many :in_tweet_urls + has_many :mentions + + validates :id_number, uniqueness: true + + scope :not_retweet, -> { where(is_retweet: false) } + scope :be_retweet, -> { where(is_retweet: true) } + scope :contains_hashtag, ->(hashtag) { joins(:hashtags).where(hashtags: { text: hashtag }) } + scope :mentioned_user, ->(user) { joins(:mentions).where(mentions: { user_id_number: user.id_number }) } + scope :is_public, -> { where(is_public: true) } + + # TODO: TweetStorage の方にも書く + def self.filter_by_tweeted_at(from, to) + where(tweeted_at: from..to) + end + + def self.gensosenkyo_2021_votes + valid_term_votes + .not_retweet + .contains_hashtag('幻水総選挙2021') + .not_by_gensosenkyo_main + .order(tweeted_at: :asc) + .order(id_number: :asc) + end + + def self.not_by_gensosenkyo_family + # gensosenkyo: 1471724029, + # sub_gensosenkyo: 1388758231825018881 + + target_user_ids = [ + User.find_by(id_number: 1471724029)&.id, + User.find_by(id_number: 1388758231825018881)&.id, + ].compact + + where.not(user_id: target_user_ids) + end + + def self.not_by_gensosenkyo_main + target_user_ids = [ + User.find_by(id_number: 1471724029)&.id, + ].compact + + where.not(user_id: target_user_ids) + end + + def self.not_by_gensosenkyo_sub + target_user_ids = [ + User.find_by(id_number: 1388758231825018881)&.id, + ].compact + + where.not(user_id: target_user_ids) + end + + def self.valid_term_votes + begin_datetime = Time.zone.parse('2021-06-11 21:00:00') + end_datetime = Time.zone.parse('2021-06-13 11:59:59') + + where(tweeted_at: begin_datetime..end_datetime) + end + + def self.odai_shosetsu + not_retweet + .not_by_gensosenkyo_main + .contains_hashtag('幻水総選挙お題小説') + .where(tweeted_at: ..Time.zone.parse('2021-06-07 02:20:00')) + .order(tweeted_at: :asc) + .order(id_number: :asc) + end + + def self.oshi_serifu + not_retweet + .not_by_gensosenkyo_main + .contains_hashtag('幻水総選挙推し台詞') + .where(tweeted_at: ..Time.zone.parse('2021-06-10 23:59:59')) + .order(tweeted_at: :asc) + .order(id_number: :asc) + end + + def valid_term_vote? + begin_datetime = Time.zone.parse('2021-06-11 21:00:00') + end_datetime = Time.zone.parse('2021-06-13 11:59:59') + + tweeted_at >= begin_datetime && tweeted_at <= end_datetime + end + + def has_this_hashtag?(hashtag) + hashtags.any? { |h| h.text == hashtag } + end + + def public? + is_public + end + + def source_app_name + source + end + + def in_reply_to_tweet + Tweet.find_by(id_number: in_reply_to_tweet_id_number) + end + + def in_reply_to_user + User.find_by(id_number: in_reply_to_user_id_number) + end + + def retweet? + is_retweet + end + + def url + "https://twitter.com/#{user.screen_name}/status/#{id_number}" + end + + def url_by_id_number_only + "https://twitter.com/twitter/status/#{id_number}" + end + + def has_hashtags? + hashtags.present? + end + + def has_assets? + assets.present? + end + + def has_in_tweet_urls? + in_tweet_urls.present? + end + + def is_mentioned_to_gensosenkyo_admin? + # gensosenkyo: 1471724029, sub_gensosenkyo: 1388758231825018881 + gensosenkyo_admin_user_id_numbers = { + gensosenkyo: 1471724029, + sub_gensosenkyo: 1388758231825018881 + } + + mentions.any? { |mention| mention.user_id_number.in?(gensosenkyo_admin_user_id_numbers.values) } + end +end diff --git a/app/models/user.rb b/app/models/user.rb new file mode 100644 index 00000000..0405cbf4 --- /dev/null +++ b/app/models/user.rb @@ -0,0 +1,41 @@ +class User < ApplicationRecord + has_paper_trail + + has_many :tweets, dependent: :destroy + has_many :direct_messages, dependent: :destroy + + validates :id_number, uniqueness: true + + scope :by_tweets, ->(target_tweets) { joins(:tweets).where(tweets: target_tweets) } + + def assets; end + + def protected? + is_protected + end + + def url + "https://twitter.com/#{screen_name}" + end + + def url_by_id_number_only + "https://twitter.com/i/user/#{id_number}" + end + + def gensosenkyo_admin? + gensosenkyo_admin_user_id_numbers = { + gensosenkyo: 1471724029, + sub_gensosenkyo: 1388758231825018881 + } + + id_number.in?(gensosenkyo_admin_user_id_numbers.values) + end + + def self.who_vote_two_or_more_without_not_public + self.select { |user| user.tweets.gensosenkyo_2021_votes.is_public.count > 1 } + end + + def self.did_vote_without_not_public + self.select { |user| user.tweets.gensosenkyo_2021_votes.is_public.count > 0 } + end +end diff --git a/db/migrate/20210418071020_create_users.rb b/db/migrate/20210418071020_create_users.rb new file mode 100644 index 00000000..5c31e2fe --- /dev/null +++ b/db/migrate/20210418071020_create_users.rb @@ -0,0 +1,17 @@ +class CreateUsers < ActiveRecord::Migration[6.1] + def change + create_table :users do |t| + t.bigint :id_number, null: false + t.string :name, null: false + t.string :screen_name, null: false + t.string :profile_image_url_https + t.boolean :is_protected + + t.timestamps + end + + add_index :users, :id_number, unique: true + add_index :users, :name + add_index :users, :screen_name + end +end diff --git a/db/migrate/20210530044047_create_tweets.rb b/db/migrate/20210530044047_create_tweets.rb new file mode 100644 index 00000000..76f14bcd --- /dev/null +++ b/db/migrate/20210530044047_create_tweets.rb @@ -0,0 +1,31 @@ +class CreateTweets < ActiveRecord::Migration[6.1] + def change + create_table :tweets do |t| + t.bigint :id_number, null: false + t.string :full_text + t.string :source + t.bigint :in_reply_to_tweet_id_number + t.bigint :in_reply_to_user_id_number + t.boolean :is_retweet + t.string :language + t.boolean :is_public + t.datetime :tweeted_at + + t.references :user + + # t.string 'geo' + # t.string 'coordinates' + # t.string 'place' + # t.boolean 'is_quote_status' + # t.integer 'retweet_count' + # t.integer 'favorite_count' + # t.boolean 'favorited' + # t.boolean 'possibly_sensitive' + # t.boolean 'possibly_sensitive_appealable' + + t.timestamps + end + + add_index :tweets, :id_number, unique: true + end +end diff --git a/db/migrate/20210530053416_create_hashtags.rb b/db/migrate/20210530053416_create_hashtags.rb new file mode 100644 index 00000000..760d6f75 --- /dev/null +++ b/db/migrate/20210530053416_create_hashtags.rb @@ -0,0 +1,13 @@ +class CreateHashtags < ActiveRecord::Migration[6.1] + def change + create_table :hashtags do |t| + t.string :text + + t.references :tweet + + t.timestamps + end + + add_index :hashtags, [:tweet_id, :text], unique: true + end +end diff --git a/db/migrate/20210530053438_create_in_tweet_urls.rb b/db/migrate/20210530053438_create_in_tweet_urls.rb new file mode 100644 index 00000000..7fe1230d --- /dev/null +++ b/db/migrate/20210530053438_create_in_tweet_urls.rb @@ -0,0 +1,13 @@ +class CreateInTweetUrls < ActiveRecord::Migration[6.1] + def change + create_table :in_tweet_urls do |t| + t.string :text + + t.references :tweet + + t.timestamps + end + + add_index :in_tweet_urls, [:tweet_id, :text], unique: true + end +end diff --git a/db/migrate/20210530054154_create_assets.rb b/db/migrate/20210530054154_create_assets.rb new file mode 100644 index 00000000..3d023dc1 --- /dev/null +++ b/db/migrate/20210530054154_create_assets.rb @@ -0,0 +1,14 @@ +class CreateAssets < ActiveRecord::Migration[6.1] + def change + create_table :assets do |t| + t.bigint :id_number + t.string :url + t.string :asset_type + t.boolean :is_public + + t.references :tweet + + t.timestamps + end + end +end diff --git a/db/migrate/20210530054844_create_direct_messages.rb b/db/migrate/20210530054844_create_direct_messages.rb new file mode 100644 index 00000000..57ee65e7 --- /dev/null +++ b/db/migrate/20210530054844_create_direct_messages.rb @@ -0,0 +1,18 @@ +class CreateDirectMessages < ActiveRecord::Migration[6.1] + def change + create_table :direct_messages do |t| + t.bigint :id_number + t.datetime :sent_at + t.string :text + t.bigint :sender_id_number + t.bigint :recipient_id_number + t.boolean :is_visible + + t.references :user + + t.timestamps + end + + add_index :direct_messages, :id_number, unique: true + end +end diff --git a/db/migrate/20210530080905_create_versions.rb b/db/migrate/20210530080905_create_versions.rb new file mode 100644 index 00000000..dd37b99d --- /dev/null +++ b/db/migrate/20210530080905_create_versions.rb @@ -0,0 +1,36 @@ +# This migration creates the `versions` table, the only schema PT requires. +# All other migrations PT provides are optional. +class CreateVersions < ActiveRecord::Migration[6.1] + + # The largest text column available in all supported RDBMS is + # 1024^3 - 1 bytes, roughly one gibibyte. We specify a size + # so that MySQL will use `longtext` instead of `text`. Otherwise, + # when serializing very large objects, `text` might not be big enough. + TEXT_BYTES = 1_073_741_823 + + def change + create_table :versions do |t| + t.string :item_type, { null: false } + t.bigint :item_id, null: false + t.string :event, null: false + t.string :whodunnit + t.text :object, limit: TEXT_BYTES + + # Known issue in MySQL: fractional second precision + # ------------------------------------------------- + # + # MySQL timestamp columns do not support fractional seconds unless + # defined with "fractional seconds precision". MySQL users should manually + # add fractional seconds precision to this migration, specifically, to + # the `created_at` column. + # (https://dev.mysql.com/doc/refman/5.6/en/fractional-seconds.html) + # + # MySQL users should also upgrade to at least rails 4.2, which is the first + # version of ActiveRecord with support for fractional seconds in MySQL. + # (https://github.com/rails/rails/pull/14359) + # + t.datetime :created_at + end + add_index :versions, %i(item_type item_id) + end +end diff --git a/db/migrate/20210530085016_create_mentions.rb b/db/migrate/20210530085016_create_mentions.rb new file mode 100644 index 00000000..28960cfd --- /dev/null +++ b/db/migrate/20210530085016_create_mentions.rb @@ -0,0 +1,13 @@ +class CreateMentions < ActiveRecord::Migration[6.1] + def change + create_table :mentions do |t| + t.bigint :user_id_number + + t.references :tweet + + t.timestamps + end + + add_index :mentions, [:tweet_id, :user_id_number], unique: true + end +end diff --git a/db/migrate/20210607054640_rename_column_from_sent_at_to_messaged_at.rb b/db/migrate/20210607054640_rename_column_from_sent_at_to_messaged_at.rb new file mode 100644 index 00000000..6d7c11bd --- /dev/null +++ b/db/migrate/20210607054640_rename_column_from_sent_at_to_messaged_at.rb @@ -0,0 +1,5 @@ +class RenameColumnFromSentAtToMessagedAt < ActiveRecord::Migration[6.1] + def change + rename_column :direct_messages, :sent_at, :messaged_at + end +end diff --git a/db/migrate/20210607054908_add_response_column_to_direct_message.rb b/db/migrate/20210607054908_add_response_column_to_direct_message.rb new file mode 100644 index 00000000..af37f89f --- /dev/null +++ b/db/migrate/20210607054908_add_response_column_to_direct_message.rb @@ -0,0 +1,5 @@ +class AddResponseColumnToDirectMessage < ActiveRecord::Migration[6.1] + def change + add_column :direct_messages, :api_response, :text + end +end diff --git a/db/migrate/20210607090631_add_born_at_column_to_user.rb b/db/migrate/20210607090631_add_born_at_column_to_user.rb new file mode 100644 index 00000000..dd50752d --- /dev/null +++ b/db/migrate/20210607090631_add_born_at_column_to_user.rb @@ -0,0 +1,5 @@ +class AddBornAtColumnToUser < ActiveRecord::Migration[6.1] + def change + add_column :users, :born_at, :datetime + end +end diff --git a/db/migrate/20210607214306_create_analyze_syntaxes.rb b/db/migrate/20210607214306_create_analyze_syntaxes.rb new file mode 100644 index 00000000..64beaf7c --- /dev/null +++ b/db/migrate/20210607214306_create_analyze_syntaxes.rb @@ -0,0 +1,15 @@ +class CreateAnalyzeSyntaxes < ActiveRecord::Migration[6.1] + def change + create_table :analyze_syntaxes do |t| + t.string :language + + # https://googleapis.dev/ruby/google-cloud-language-v1/latest/Google/Cloud/Language/V1/AnalyzeSyntaxResponse.html + t.text :sentences, array: true # レスポンスの生ログを保存する目的 + t.text :tokens, array: true # レスポンスの生ログを保存する目的 + + t.references :tweet + + t.timestamps + end + end +end diff --git a/db/migrate/20210608083648_create_characters.rb b/db/migrate/20210608083648_create_characters.rb new file mode 100644 index 00000000..571e26f9 --- /dev/null +++ b/db/migrate/20210608083648_create_characters.rb @@ -0,0 +1,15 @@ +class CreateCharacters < ActiveRecord::Migration[6.1] + def change + create_table :characters do |t| + # ここでの name および name_en は幻水総選挙での呼び名とする(必ずしも公式ではない) + t.string :name + t.string :name_en + + t.timestamps + end + + # 同名キャラが存在するため、UNIQUE を付けるのはいったん保留 + # add_index :characters, :name, unique: true + # add_index :characters, :name_en, unique: true + end +end diff --git a/db/migrate/20210608083656_create_products.rb b/db/migrate/20210608083656_create_products.rb new file mode 100644 index 00000000..321f2c95 --- /dev/null +++ b/db/migrate/20210608083656_create_products.rb @@ -0,0 +1,16 @@ +class CreateProducts < ActiveRecord::Migration[6.1] + def change + create_table :products do |t| + t.string :name + t.string :name_en + + # 発売日とかハードとかWebページとか……? + # コラボ作品とかも?(そうすると変数名を再考したい) + + t.timestamps + end + + add_index :products, :name, unique: true + add_index :products, :name_en, unique: true + end +end diff --git a/db/migrate/20210608083702_create_nicknames.rb b/db/migrate/20210608083702_create_nicknames.rb new file mode 100644 index 00000000..18986bd9 --- /dev/null +++ b/db/migrate/20210608083702_create_nicknames.rb @@ -0,0 +1,11 @@ +class CreateNicknames < ActiveRecord::Migration[6.1] + def change + create_table :nicknames do |t| + # Nickname に name ってのはちょっとイケてない + t.string :name + t.string :name_en + + t.timestamps + end + end +end diff --git a/db/migrate/20210608083713_create_character_products.rb b/db/migrate/20210608083713_create_character_products.rb new file mode 100644 index 00000000..bdf8aabd --- /dev/null +++ b/db/migrate/20210608083713_create_character_products.rb @@ -0,0 +1,10 @@ +class CreateCharacterProducts < ActiveRecord::Migration[6.1] + def change + create_table :character_products do |t| + t.references :character, foreign_key: true + t.references :product, foreign_key: true + + t.timestamps + end + end +end diff --git a/db/migrate/20210608083720_create_character_nicknames.rb b/db/migrate/20210608083720_create_character_nicknames.rb new file mode 100644 index 00000000..0cb42883 --- /dev/null +++ b/db/migrate/20210608083720_create_character_nicknames.rb @@ -0,0 +1,10 @@ +class CreateCharacterNicknames < ActiveRecord::Migration[6.1] + def change + create_table :character_nicknames do |t| + t.references :character, foreign_key: true + t.references :nickname, foreign_key: true + + t.timestamps + end + end +end diff --git a/db/migrate/20210611051011_add_reference_from_direct_message_to_analize_syntax.rb b/db/migrate/20210611051011_add_reference_from_direct_message_to_analize_syntax.rb new file mode 100644 index 00000000..12008b1f --- /dev/null +++ b/db/migrate/20210611051011_add_reference_from_direct_message_to_analize_syntax.rb @@ -0,0 +1,5 @@ +class AddReferenceFromDirectMessageToAnalizeSyntax < ActiveRecord::Migration[6.1] + def change + add_reference :analyze_syntaxes, :direct_message + end +end diff --git a/db/schema.rb b/db/schema.rb new file mode 100644 index 00000000..70481bee --- /dev/null +++ b/db/schema.rb @@ -0,0 +1,169 @@ +# This file is auto-generated from the current state of the database. Instead +# of editing this file, please use the migrations feature of Active Record to +# incrementally modify your database, and then regenerate this schema definition. +# +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. +# +# It's strongly recommended that you check this file into your version control system. + +ActiveRecord::Schema[7.0].define(version: 2021_06_11_051011) do + # These are extensions that must be enabled in order to support this database + enable_extension "plpgsql" + + create_table "analyze_syntaxes", force: :cascade do |t| + t.string "language" + t.text "sentences", array: true + t.text "tokens", array: true + t.bigint "tweet_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.bigint "direct_message_id" + t.index ["direct_message_id"], name: "index_analyze_syntaxes_on_direct_message_id" + t.index ["tweet_id"], name: "index_analyze_syntaxes_on_tweet_id" + end + + create_table "assets", force: :cascade do |t| + t.bigint "id_number" + t.string "url" + t.string "asset_type" + t.boolean "is_public" + t.bigint "tweet_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["tweet_id"], name: "index_assets_on_tweet_id" + end + + create_table "character_nicknames", force: :cascade do |t| + t.bigint "character_id" + t.bigint "nickname_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["character_id"], name: "index_character_nicknames_on_character_id" + t.index ["nickname_id"], name: "index_character_nicknames_on_nickname_id" + end + + create_table "character_products", force: :cascade do |t| + t.bigint "character_id" + t.bigint "product_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["character_id"], name: "index_character_products_on_character_id" + t.index ["product_id"], name: "index_character_products_on_product_id" + end + + create_table "characters", force: :cascade do |t| + t.string "name" + t.string "name_en" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + + create_table "direct_messages", force: :cascade do |t| + t.bigint "id_number" + t.datetime "messaged_at", precision: nil + t.string "text" + t.bigint "sender_id_number" + t.bigint "recipient_id_number" + t.boolean "is_visible" + t.bigint "user_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.text "api_response" + t.index ["id_number"], name: "index_direct_messages_on_id_number", unique: true + t.index ["user_id"], name: "index_direct_messages_on_user_id" + end + + create_table "hashtags", force: :cascade do |t| + t.string "text" + t.bigint "tweet_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["tweet_id", "text"], name: "index_hashtags_on_tweet_id_and_text", unique: true + t.index ["tweet_id"], name: "index_hashtags_on_tweet_id" + end + + create_table "in_tweet_urls", force: :cascade do |t| + t.string "text" + t.bigint "tweet_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["tweet_id", "text"], name: "index_in_tweet_urls_on_tweet_id_and_text", unique: true + t.index ["tweet_id"], name: "index_in_tweet_urls_on_tweet_id" + end + + create_table "mentions", force: :cascade do |t| + t.bigint "user_id_number" + t.bigint "tweet_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["tweet_id", "user_id_number"], name: "index_mentions_on_tweet_id_and_user_id_number", unique: true + t.index ["tweet_id"], name: "index_mentions_on_tweet_id" + end + + create_table "nicknames", force: :cascade do |t| + t.string "name" + t.string "name_en" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + + create_table "products", force: :cascade do |t| + t.string "name" + t.string "name_en" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["name"], name: "index_products_on_name", unique: true + t.index ["name_en"], name: "index_products_on_name_en", unique: true + end + + create_table "tweets", force: :cascade do |t| + t.bigint "id_number", null: false + t.string "full_text" + t.string "source" + t.bigint "in_reply_to_tweet_id_number" + t.bigint "in_reply_to_user_id_number" + t.boolean "is_retweet" + t.string "language" + t.boolean "is_public" + t.datetime "tweeted_at", precision: nil + t.bigint "user_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["id_number"], name: "index_tweets_on_id_number", unique: true + t.index ["user_id"], name: "index_tweets_on_user_id" + end + + create_table "users", force: :cascade do |t| + t.bigint "id_number", null: false + t.string "name", null: false + t.string "screen_name", null: false + t.string "profile_image_url_https" + t.boolean "is_protected" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.datetime "born_at", precision: nil + t.index ["id_number"], name: "index_users_on_id_number", unique: true + t.index ["name"], name: "index_users_on_name" + t.index ["screen_name"], name: "index_users_on_screen_name" + end + + create_table "versions", force: :cascade do |t| + t.string "item_type" + t.string "{:null=>false}" + t.bigint "item_id", null: false + t.string "event", null: false + t.string "whodunnit" + t.text "object" + t.datetime "created_at", precision: nil + t.index ["item_type", "item_id"], name: "index_versions_on_item_type_and_item_id" + end + + add_foreign_key "character_nicknames", "characters" + add_foreign_key "character_nicknames", "nicknames" + add_foreign_key "character_products", "characters" + add_foreign_key "character_products", "products" +end