Skip to content

Commit

Permalink
Merge pull request #2220 from internetee/optimize-verification-model-v2
Browse files Browse the repository at this point in the history
decrease db load
  • Loading branch information
vohmar authored Nov 24, 2021
2 parents 7477cf8 + 7e188a3 commit 6b88511
Show file tree
Hide file tree
Showing 11 changed files with 246 additions and 128 deletions.
4 changes: 4 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,7 @@ group :test do
end

gem 'aws-sdk-sesv2', '~> 1.19'

# profiles
gem 'pghero'
gem 'pg_query', '>= 0.9.0'
10 changes: 9 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ GEM
thor (>= 0.14.0, < 2)
globalid (0.5.2)
activesupport (>= 5.0)
google-protobuf (3.19.1)
google-protobuf (3.19.1-x86_64-linux)
gyoku (1.3.1)
builder (>= 2.1.2)
haml (5.2.2)
Expand Down Expand Up @@ -338,6 +340,10 @@ GEM
request_store (~> 1.1)
pdfkit (0.8.5)
pg (1.2.3)
pg_query (2.1.2)
google-protobuf (>= 3.17.1)
pghero (2.8.1)
activerecord (>= 5)
pry (0.14.1)
coderay (~> 1.1)
method_source (~> 1.0)
Expand Down Expand Up @@ -555,6 +561,8 @@ DEPENDENCIES
paper_trail (~> 12.1)
pdfkit
pg (= 1.2.3)
pg_query (>= 0.9.0)
pghero
pry (= 0.14.1)
puma
que
Expand All @@ -579,4 +587,4 @@ DEPENDENCIES
wkhtmltopdf-binary (~> 0.12.5.1)

BUNDLED WITH
2.2.27
2.2.31
18 changes: 2 additions & 16 deletions app/jobs/verify_emails_job.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
class VerifyEmailsJob < ApplicationJob
discard_on StandardError

def perform(contact_id:, check_level: 'regex')
contact = Contact.find_by(id: contact_id)

return if check_contact_for_duplicate_mail(contact_id)

contact_not_found(contact_id) unless contact
def perform(contact:, check_level: 'regex')
contact_not_found(contact.id) unless contact
validate_check_level(check_level)
action = Actions::EmailCheck.new(email: contact.email,
validation_eventable: contact,
Expand All @@ -19,16 +15,6 @@ def perform(contact_id:, check_level: 'regex')

private

def check_contact_for_duplicate_mail(contact_id)
time = Time.zone.now - ValidationEvent::VALIDATION_PERIOD
contact = Contact.find(contact_id)
contact_ids = Contact.where(email: contact.email).where('created_at > ?', time).pluck(:id)

r = ValidationEvent.where(validation_eventable_id: contact_ids).order(created_at: :desc)

r.present?
end

def contact_not_found(contact_id)
raise StandardError, "Contact with contact_id #{contact_id} not found"
end
Expand Down
3 changes: 2 additions & 1 deletion app/models/validation_event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ class ValidationEvent < ApplicationRecord
VALIDATION_PERIOD = 1.year.freeze
VALID_CHECK_LEVELS = %w[regex mx smtp].freeze
VALID_EVENTS_COUNT_THRESHOLD = 5
MX_CHECK = 3

INVALID_EVENTS_COUNT_BY_LEVEL = {
regex: 1,
mx: 3,
mx: MX_CHECK,
smtp: 1,
}.freeze

Expand Down
6 changes: 6 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
get 'practice/index'
get 'practice/contact'
# https://github.com/internetee/epp_proxy#translation-of-epp-calls
#
# profiles
if Rails.env.development? || Rails.env.staging?
mount PgHero::Engine, at: "pghero"
end

namespace :epp do
constraints(EppConstraint.new(:session)) do
get 'session/hello', to: 'sessions#hello', as: 'hello'
Expand Down
15 changes: 15 additions & 0 deletions db/migrate/20211124071418_create_pghero_query_stats.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class CreatePgheroQueryStats < ActiveRecord::Migration[6.1]
def change
create_table :pghero_query_stats do |t|
t.text :database
t.text :user
t.text :query
t.integer :query_hash, limit: 8
t.float :total_time
t.integer :calls, limit: 8
t.timestamp :captured_at
end

add_index :pghero_query_stats, [:database, :captured_at]
end
end
86 changes: 74 additions & 12 deletions db/structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ COMMENT ON EXTENSION pgcrypto IS 'cryptographic functions';

CREATE TYPE public.validation_type AS ENUM (
'email_validation',
'manual_force_delete'
'manual_force_delete',
'nameserver_validation'
);


Expand Down Expand Up @@ -2260,6 +2261,41 @@ CREATE SEQUENCE public.payment_orders_id_seq
ALTER SEQUENCE public.payment_orders_id_seq OWNED BY public.payment_orders.id;


--
-- Name: pghero_query_stats; Type: TABLE; Schema: public; Owner: -
--

CREATE TABLE public.pghero_query_stats (
id bigint NOT NULL,
database text,
"user" text,
query text,
query_hash bigint,
total_time double precision,
calls bigint,
captured_at timestamp without time zone
);


--
-- Name: pghero_query_stats_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--

CREATE SEQUENCE public.pghero_query_stats_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;


--
-- Name: pghero_query_stats_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
--

ALTER SEQUENCE public.pghero_query_stats_id_seq OWNED BY public.pghero_query_stats.id;


--
-- Name: prices; Type: TABLE; Schema: public; Owner: -
--
Expand Down Expand Up @@ -2619,8 +2655,7 @@ CREATE TABLE public.validation_events (
validation_eventable_type character varying,
validation_eventable_id bigint,
created_at timestamp(6) without time zone NOT NULL,
updated_at timestamp(6) without time zone NOT NULL,
event_type public.validation_type
updated_at timestamp(6) without time zone NOT NULL
);


Expand Down Expand Up @@ -3162,6 +3197,13 @@ ALTER TABLE ONLY public.notifications ALTER COLUMN id SET DEFAULT nextval('publi
ALTER TABLE ONLY public.payment_orders ALTER COLUMN id SET DEFAULT nextval('public.payment_orders_id_seq'::regclass);


--
-- Name: pghero_query_stats id; Type: DEFAULT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.pghero_query_stats ALTER COLUMN id SET DEFAULT nextval('public.pghero_query_stats_id_seq'::regclass);


--
-- Name: prices id; Type: DEFAULT; Schema: public; Owner: -
--
Expand Down Expand Up @@ -3685,6 +3727,14 @@ ALTER TABLE ONLY public.payment_orders
ADD CONSTRAINT payment_orders_pkey PRIMARY KEY (id);


--
-- Name: pghero_query_stats pghero_query_stats_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.pghero_query_stats
ADD CONSTRAINT pghero_query_stats_pkey PRIMARY KEY (id);


--
-- Name: prices prices_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
Expand Down Expand Up @@ -4049,6 +4099,13 @@ CREATE INDEX index_domain_transfers_on_domain_id ON public.domain_transfers USIN
CREATE INDEX index_domains_on_delete_date ON public.domains USING btree (delete_date);


--
-- Name: index_domains_on_json_statuses_history; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX index_domains_on_json_statuses_history ON public.domains USING gin (json_statuses_history);


--
-- Name: index_domains_on_name; Type: INDEX; Schema: public; Owner: -
--
Expand Down Expand Up @@ -4441,6 +4498,13 @@ CREATE INDEX index_notifications_on_registrar_id ON public.notifications USING b
CREATE INDEX index_payment_orders_on_invoice_id ON public.payment_orders USING btree (invoice_id);


--
-- Name: index_pghero_query_stats_on_database_and_captured_at; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX index_pghero_query_stats_on_database_and_captured_at ON public.pghero_query_stats USING btree (database, captured_at);


--
-- Name: index_prices_on_zone_id; Type: INDEX; Schema: public; Owner: -
--
Expand Down Expand Up @@ -4490,13 +4554,6 @@ CREATE INDEX index_users_on_identity_code ON public.users USING btree (identity_
CREATE INDEX index_users_on_registrar_id ON public.users USING btree (registrar_id);


--
-- Name: index_validation_events_on_event_type; Type: INDEX; Schema: public; Owner: -
--

CREATE INDEX index_validation_events_on_event_type ON public.validation_events USING btree (event_type);


--
-- Name: index_validation_events_on_validation_eventable; Type: INDEX; Schema: public; Owner: -
--
Expand Down Expand Up @@ -5232,7 +5289,12 @@ INSERT INTO "schema_migrations" (version) VALUES
('20210215101019'),
('20210616112332'),
('20210629074044'),
('20210628090353'),
('20210708131814'),
('20210729131100'),
('20210729134625');
('20210729134625'),
('20211028122103'),
('20211028125245'),
('20211029082225'),
('20211124071418');


28 changes: 28 additions & 0 deletions lib/tasks/generate_mock_contacts.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# namespace :generate_mock do
# task contacts: :environment do
# 1000.times do
# c = Contact.new
# c.name = generate_random_string
# c.email = generate_random_string + "@" + generate_random_string + ".ee"
# c.registrar_id = registrar
# c.street = generate_random_string
# c.city = generate_random_string
# c.zip = '12323'
# c.country_code = 'EE'
# c.phone = "+372.59813318"
# c.ident_country_code = 'EE'
# c.ident_type = 'priv'
# c.ident = '38903110313'
# c.code = generate_random_string + ":" + generate_random_string
# c.save
# end
# end
#
# def generate_random_string
# (0...10).map { (65 + rand(26)).chr }.join
# end
#
# def registrar
# Registrar.last.id
# end
# end
50 changes: 29 additions & 21 deletions lib/tasks/verify_email.rake
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'optparse'
require 'rake_option_parser_boilerplate'
require 'syslog/logger'
require 'active_record'

namespace :verify_email do
# bundle exec rake verify_email:check_all -- --domain_name=shop.test --check_level=mx --spam_protect=true
Expand All @@ -18,14 +19,16 @@ namespace :verify_email do
banner: banner,
hash: opts_hash)

contacts = prepare_contacts(options)
logger.info 'No contacts to check email selected' and next if contacts.blank?
batch_contacts = prepare_contacts(options)
logger.info 'No contacts to check email selected' and next if batch_contacts.blank?

contacts.each do |contact|
VerifyEmailsJob.set(wait_until: spam_protect_timeout(options)).perform_later(
contact_id: contact.id,
check_level: check_level(options)
)
batch_contacts.find_in_batches(batch_size: 10_000) do |contacts|
contacts.each do |contact|
VerifyEmailsJob.set(wait_until: spam_protect_timeout(options)).perform_later(
contact: contact,
check_level: check_level(options)
)
end
end
end
end
Expand Down Expand Up @@ -55,35 +58,40 @@ def prepare_contacts(options)
contacts_by_domain(options[:domain_name])
else
time = Time.zone.now - ValidationEvent::VALIDATION_PERIOD
validation_events_ids = ValidationEvent.where('created_at > ?', time).pluck(:validation_eventable_id)
validation_events_ids = ValidationEvent.where('created_at > ?', time).distinct.pluck(:validation_eventable_id)

# Contact.where.not(id: validation_events_ids) + Contact.where(id: failed_contacts)
Contact.where.not(id: validation_events_ids) | failed_contacts
contacts_ids = Contact.where.not(id: validation_events_ids).pluck(:id)
Contact.where(id: contacts_ids + failed_contacts)
end
end

def failed_contacts
failed_contacts = []
failed_validations_ids = ValidationEvent.failed.pluck(:validation_eventable_id)
contacts = Contact.where(id: failed_validations_ids)
contacts.each do |contact|
failed_validations_ids = ValidationEvent.failed.distinct.pluck(:validation_eventable_id)
contacts = Contact.where(id: failed_validations_ids).includes(:validation_events)
contacts.find_each(batch_size: 10_000) do |contact|

if contact.validation_events.mx.order(created_at: :asc).present?
failed_contacts << contact unless contact.validation_events.mx.order(created_at: :asc).last.success
end
data = contact.validation_events.order(created_at: :asc).last

if contact.validation_events.regex.order(created_at: :asc).present?
failed_contacts << contact unless contact.validation_events.regex.order(created_at: :asc).last.success
end
if data.failed?
next if data.event_data['check_level'] == 'regex'

next if data.event_data['check_level'] == 'smtp'

if contact.validation_events.smtp.order(created_at: :asc).present?
failed_contacts << contact unless contact.validation_events.mx.order(created_at: :asc).last.success
next if check_mx_contact_validation(contact)

failed_contacts << contact.id
end
end

failed_contacts.uniq
end

def check_mx_contact_validation(contact)
data = contact.validation_events.mx.order(created_at: :asc).last(ValidationEvent::MX_CHECK)
data.all? { |d| d.failed? }
end

def contacts_by_domain(domain_name)
domain = ::Domain.find_by(name: domain_name)
return unless domain
Expand Down
Loading

0 comments on commit 6b88511

Please sign in to comment.