From 5810b3f3736de1002f15b5452a959bfbf0720346 Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Fri, 23 Feb 2018 13:31:00 -0800 Subject: [PATCH] Add rake task for importing annotations Use the import_annotations_from_sheet to import all the annotations an analyst has created in a given Google Sheet. This can be used to solve #61. Arguments are: 1. Google sheet ID, e.g. 1-Rq-AclS2GI_yxLmkYVY7FvTfN21KoJtxXtOXXXXXX 2. E-mail of user to attribute the annotation to 3. (optional) Name of spreadsheet tabs to import (comma-separated). If unset, all tabs will be imported. 4. (optional) Row to start at (defaults to 7) 5. (optional) Row to end at. If unset, reads all rows. When done, it'll output summary information of how many rows were added, skipped, or errored across how many tabs. --- .env.example | 4 + Gemfile | 1 + Gemfile.lock | 39 +++++++ lib/tasks/import_from_sheets.rake | 171 ++++++++++++++++++++++++++++++ 4 files changed, 215 insertions(+) create mode 100644 lib/tasks/import_from_sheets.rake diff --git a/.env.example b/.env.example index a5a57253..7d4f64b2 100644 --- a/.env.example +++ b/.env.example @@ -53,3 +53,7 @@ TOKEN_PRIVATE_KEY='MIIEogIBAAKCAQEAufNrDQRl6Gj1yuga0DVHeJ4fi+lNWtn4S8XRU8/nBwm9v # In production, set up Sentry.io for error tracking # SENTRY_DSN= + +# Set these if you are running rake tasks to import data from Google Sheets +# GOOGLE_CLIENT_ID=XYZ +# GOOGLE_CLIENT_SECRET=XYZ diff --git a/Gemfile b/Gemfile index 487e2e20..72e9e289 100644 --- a/Gemfile +++ b/Gemfile @@ -22,6 +22,7 @@ gem 'oj', '~> 3.4' gem 'sentry-raven' gem 'readthis' gem 'hiredis' +gem 'google-api-client' # See https://github.com/rails/execjs#readme for more supported runtimes # gem 'therubyracer', platforms: :ruby diff --git a/Gemfile.lock b/Gemfile.lock index 47f2733e..6b2da55e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -65,6 +65,8 @@ GEM crack (0.4.3) safe_yaml (~> 1.0.0) crass (1.0.3) + declarative (0.0.10) + declarative-option (0.1.0) devise (4.4.1) bcrypt (~> 3.0) orm_adapter (~> 0.1) @@ -82,10 +84,26 @@ GEM ffi (1.9.21) globalid (0.4.1) activesupport (>= 4.2.0) + google-api-client (0.19.8) + addressable (~> 2.5, >= 2.5.1) + googleauth (>= 0.5, < 0.7.0) + httpclient (>= 2.8.1, < 3.0) + mime-types (~> 3.0) + representable (~> 3.0) + retriable (>= 2.0, < 4.0) + googleauth (0.6.2) + faraday (~> 0.12) + jwt (>= 1.4, < 3.0) + logging (~> 2.0) + memoist (~> 0.12) + multi_json (~> 1.11) + os (~> 0.9) + signet (~> 0.7) hashdiff (0.3.7) hiredis (0.6.1) httparty (0.16.0) multi_xml (>= 0.5.2) + httpclient (2.8.3) i18n (0.9.5) concurrent-ruby (~> 1.0) jmespath (1.3.1) @@ -95,12 +113,20 @@ GEM rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) ruby_dep (~> 1.2) + little-plugger (1.1.4) + logging (2.2.2) + little-plugger (~> 1.1) + multi_json (~> 1.10) loofah (2.2.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) mail (2.7.0) mini_mime (>= 0.1.1) + memoist (0.16.0) method_source (0.9.0) + mime-types (3.1) + mime-types-data (~> 3.2015) + mime-types-data (3.2016.0521) mini_mime (1.0.0) mini_portile2 (2.3.0) minitest (5.11.3) @@ -115,6 +141,7 @@ GEM mini_portile2 (~> 2.3.0) oj (3.4.0) orm_adapter (0.5.0) + os (0.9.6) parallel (1.12.1) parser (2.5.0.0) ast (~> 2.4.0) @@ -173,6 +200,10 @@ GEM redis (4.0.1) redis-namespace (1.6.0) redis (>= 3.0.4) + representable (3.0.4) + declarative (< 0.1.0) + declarative-option (< 0.2.0) + uber (< 0.2.0) responders (2.4.0) actionpack (>= 4.2.0, < 5.3) railties (>= 4.2.0, < 5.3) @@ -182,6 +213,7 @@ GEM redis-namespace (~> 1.3) sinatra (>= 0.9.2) vegas (~> 0.1.2) + retriable (3.1.1) rubocop (0.52.1) parallel (~> 1.10) parser (>= 2.4.0.2, < 3.0) @@ -205,6 +237,11 @@ GEM tilt (>= 1.1, < 3) sentry-raven (2.7.2) faraday (>= 0.7.6, < 1.0) + signet (0.8.1) + addressable (~> 2.3) + faraday (~> 0.9) + jwt (>= 1.5, < 3.0) + multi_json (~> 1.10) sinatra (2.0.1) mustermann (~> 1.0) rack (~> 2.0) @@ -227,6 +264,7 @@ GEM tilt (2.0.8) tzinfo (1.2.5) thread_safe (~> 0.1) + uber (0.1.0) uglifier (4.1.6) execjs (>= 0.3.0, < 3) unicode-display_width (1.3.0) @@ -255,6 +293,7 @@ DEPENDENCIES byebug devise dotenv-rails + google-api-client hiredis httparty jwt (~> 2.1) diff --git a/lib/tasks/import_from_sheets.rake b/lib/tasks/import_from_sheets.rake new file mode 100644 index 00000000..21b0040e --- /dev/null +++ b/lib/tasks/import_from_sheets.rake @@ -0,0 +1,171 @@ +require 'google/apis/sheets_v4' +require 'googleauth' +require 'googleauth/stores/file_token_store' + +IMPORT_TYPE = 'rake_task_v1' +OOB_URI = 'urn:ietf:wg:oauth:2.0:oob' +APPLICATION_NAME = 'Web Monitoring DB Importer' + + +desc 'Create annotations from data in analysts’ Google sheets -- only the sheet ID and user e-mail are required.' +task :import_annotations_from_sheet, [:sheet_id, :user_email, :tabs, :start_row, :end_row] => [:environment] do |_t, args| + + verbose = ENV['VERBOSE'] + client = get_client + sheet_id = args[:sheet_id] + start_row = args.fetch(:start_row, 7).to_i + end_row = args[:end_row] || '' + + user = User.find_by!(email: args[:user_email]) + + tab_count = 0 + annotated_count = 0 + skipped_count = 0 + error_count = 0 + + tabs = + if args[:tabs] + tabs = args[:tabs].split(',').collect {|name| name.strip} + else + client.get_spreadsheet(sheet_id).sheets.collect do |sheet| + sheet.properties.title + end + end + + begin + tabs.each do |tab_title| + puts "Importing spreadsheet tab '#{tab_title}'" + + rows = client.get_spreadsheet_values( + sheet_id, + "#{tab_title}!A#{start_row}:AL#{end_row}" + ).values + + rows.each_with_index do |row, index| + # Column 9 is latest-to-base + begin + change = change_for_version_url(row[9]) + rescue StandardError => error + puts "Row #{start_row + index}: #{error.message}" + error_count += 1 + end + next unless change + + change.annotate(annotation_data_for_row(row), user) + annotated_count += 1 + + puts "Annotated '#{change.version.page.url}' change '#{change.api_id}'" if verbose + end + + tab_count += 1 + end + ensure + puts "\nRESULTS:" + puts "--------" + puts "Created #{annotated_count} annotations" + puts "Skipped #{skipped_count} rows" + puts "Errored #{error_count} rows" + puts "In #{tab_count} spreadsheet tabs" + puts "" + end +end + +def change_for_version_url(url) + return nil unless url.present? + + # Handle versionista URLs + match = /versionista\.com\/\d+\/\d+\/(\d+):(\d+)/.match(url) + if match + to_version = Version.find_by!( + "source_type = 'versionista' AND source_metadata->>'version_id' = ?", + match[1] + ) + from_version = Version.find_by!( + "source_type = 'versionista' AND source_metadata->>'version_id' = ?", + match[2] + ) + return Change.between(from: from_version, to: to_version, create: true) + end + + # Handle our URLs + match = /monitoring\.envirodatagov\.org\/page\/[^\/]+\/([^\/.]+)\.\.([^\/.]+)/.match(url) + if match + from_version = Version.find(match[1]) + to_version = Version.find(match[2]) + return Change.between(from: from_version, to: to_version, create: true) + end + + raise StandardError, "Unknown change URL format: '#{url}'" +end + +def annotation_data_for_row(row) + start_index = 17 + # fields from UI project + fields = [ + ['indiv_1', :boolean], + ['indiv_2', :boolean], + ['indiv_3', :boolean], + ['indiv_4', :boolean], + ['indiv_5', :boolean], + ['indiv_6', :boolean], + ['repeat_7', :boolean], + ['repeat_8', :boolean], + ['repeat_9', :boolean], + ['repeat_10', :boolean], + ['repeat_11', :boolean], + ['repeat_12', :boolean], + ['sig_1', :boolean], + ['sig_2', :boolean], + ['sig_3', :boolean], + ['sig_4', :boolean], + ['sig_5', :boolean], + ['sig_6', :boolean], + 'notes' + ] + + data = { _importer: IMPORT_TYPE } + fields.each_with_index do |field, index| + field_name, field_type = field.is_a?(Array) ? field : [field, :text] + + value = row[start_index + index] + value = value.present? if field_type == :boolean + + data[field_name] = value + end + + data +end + +def get_client + service = Google::Apis::SheetsV4::SheetsService.new + service.client_options.application_name = APPLICATION_NAME + service.authorization = authorize_google + service +end + +def authorize_google + unless ENV['GOOGLE_CLIENT_ID'] && ENV['GOOGLE_CLIENT_SECRET'] + raise "You must have both `GOOGLE_CLIENT_ID` and `GOOGLE_CLIENT_SECRET` environment variables set." + end + + scope = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY + client_id = Google::Auth::ClientId.new( + ENV['GOOGLE_CLIENT_ID'], ENV['GOOGLE_CLIENT_SECRET']) + token_store = Google::Auth::Stores::FileTokenStore.new(file: Tempfile.new) + authorizer = Google::Auth::UserAuthorizer.new(client_id, scope, token_store) + + user_id = 'default' + credentials = authorizer.get_credentials(user_id) + if credentials.nil? + url = authorizer.get_authorization_url( + base_url: OOB_URI) + puts "Open the following URL in the browser and enter the " + + "resulting code after authorization:" + puts url + code = STDIN.gets.strip + credentials = authorizer.get_and_store_credentials_from_code( + user_id: user_id, code: code, base_url: OOB_URI) + end + + credentials +end