diff --git a/.env.example b/.env.example index a5a57253..7d4f64b2 100644 --- a/.env.example +++ b/.env.example @@ -53,3 +53,7 @@ TOKEN_PRIVATE_KEY='MIIEogIBAAKCAQEAufNrDQRl6Gj1yuga0DVHeJ4fi+lNWtn4S8XRU8/nBwm9v # In production, set up Sentry.io for error tracking # SENTRY_DSN= + +# Set these if you are running rake tasks to import data from Google Sheets +# GOOGLE_CLIENT_ID=XYZ +# GOOGLE_CLIENT_SECRET=XYZ diff --git a/Gemfile b/Gemfile index 487e2e20..72e9e289 100644 --- a/Gemfile +++ b/Gemfile @@ -22,6 +22,7 @@ gem 'oj', '~> 3.4' gem 'sentry-raven' gem 'readthis' gem 'hiredis' +gem 'google-api-client' # See https://github.com/rails/execjs#readme for more supported runtimes # gem 'therubyracer', platforms: :ruby diff --git a/Gemfile.lock b/Gemfile.lock index 47f2733e..6b2da55e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -65,6 +65,8 @@ GEM crack (0.4.3) safe_yaml (~> 1.0.0) crass (1.0.3) + declarative (0.0.10) + declarative-option (0.1.0) devise (4.4.1) bcrypt (~> 3.0) orm_adapter (~> 0.1) @@ -82,10 +84,26 @@ GEM ffi (1.9.21) globalid (0.4.1) activesupport (>= 4.2.0) + google-api-client (0.19.8) + addressable (~> 2.5, >= 2.5.1) + googleauth (>= 0.5, < 0.7.0) + httpclient (>= 2.8.1, < 3.0) + mime-types (~> 3.0) + representable (~> 3.0) + retriable (>= 2.0, < 4.0) + googleauth (0.6.2) + faraday (~> 0.12) + jwt (>= 1.4, < 3.0) + logging (~> 2.0) + memoist (~> 0.12) + multi_json (~> 1.11) + os (~> 0.9) + signet (~> 0.7) hashdiff (0.3.7) hiredis (0.6.1) httparty (0.16.0) multi_xml (>= 0.5.2) + httpclient (2.8.3) i18n (0.9.5) concurrent-ruby (~> 1.0) jmespath (1.3.1) @@ -95,12 +113,20 @@ GEM rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) ruby_dep (~> 1.2) + little-plugger (1.1.4) + logging (2.2.2) + little-plugger (~> 1.1) + multi_json (~> 1.10) loofah (2.2.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) mail (2.7.0) mini_mime (>= 0.1.1) + memoist (0.16.0) method_source (0.9.0) + mime-types (3.1) + mime-types-data (~> 3.2015) + mime-types-data (3.2016.0521) mini_mime (1.0.0) mini_portile2 (2.3.0) minitest (5.11.3) @@ -115,6 +141,7 @@ GEM mini_portile2 (~> 2.3.0) oj (3.4.0) orm_adapter (0.5.0) + os (0.9.6) parallel (1.12.1) parser (2.5.0.0) ast (~> 2.4.0) @@ -173,6 +200,10 @@ GEM redis (4.0.1) redis-namespace (1.6.0) redis (>= 3.0.4) + representable (3.0.4) + declarative (< 0.1.0) + declarative-option (< 0.2.0) + uber (< 0.2.0) responders (2.4.0) actionpack (>= 4.2.0, < 5.3) railties (>= 4.2.0, < 5.3) @@ -182,6 +213,7 @@ GEM redis-namespace (~> 1.3) sinatra (>= 0.9.2) vegas (~> 0.1.2) + retriable (3.1.1) rubocop (0.52.1) parallel (~> 1.10) parser (>= 2.4.0.2, < 3.0) @@ -205,6 +237,11 @@ GEM tilt (>= 1.1, < 3) sentry-raven (2.7.2) faraday (>= 0.7.6, < 1.0) + signet (0.8.1) + addressable (~> 2.3) + faraday (~> 0.9) + jwt (>= 1.5, < 3.0) + multi_json (~> 1.10) sinatra (2.0.1) mustermann (~> 1.0) rack (~> 2.0) @@ -227,6 +264,7 @@ GEM tilt (2.0.8) tzinfo (1.2.5) thread_safe (~> 0.1) + uber (0.1.0) uglifier (4.1.6) execjs (>= 0.3.0, < 3) unicode-display_width (1.3.0) @@ -255,6 +293,7 @@ DEPENDENCIES byebug devise dotenv-rails + google-api-client hiredis httparty jwt (~> 2.1) diff --git a/lib/tasks/import_from_sheets.rake b/lib/tasks/import_from_sheets.rake new file mode 100644 index 00000000..21b0040e --- /dev/null +++ b/lib/tasks/import_from_sheets.rake @@ -0,0 +1,171 @@ +require 'google/apis/sheets_v4' +require 'googleauth' +require 'googleauth/stores/file_token_store' + +IMPORT_TYPE = 'rake_task_v1' +OOB_URI = 'urn:ietf:wg:oauth:2.0:oob' +APPLICATION_NAME = 'Web Monitoring DB Importer' + + +desc 'Create annotations from data in analysts’ Google sheets -- only the sheet ID and user e-mail are required.' +task :import_annotations_from_sheet, [:sheet_id, :user_email, :tabs, :start_row, :end_row] => [:environment] do |_t, args| + + verbose = ENV['VERBOSE'] + client = get_client + sheet_id = args[:sheet_id] + start_row = args.fetch(:start_row, 7).to_i + end_row = args[:end_row] || '' + + user = User.find_by!(email: args[:user_email]) + + tab_count = 0 + annotated_count = 0 + skipped_count = 0 + error_count = 0 + + tabs = + if args[:tabs] + tabs = args[:tabs].split(',').collect {|name| name.strip} + else + client.get_spreadsheet(sheet_id).sheets.collect do |sheet| + sheet.properties.title + end + end + + begin + tabs.each do |tab_title| + puts "Importing spreadsheet tab '#{tab_title}'" + + rows = client.get_spreadsheet_values( + sheet_id, + "#{tab_title}!A#{start_row}:AL#{end_row}" + ).values + + rows.each_with_index do |row, index| + # Column 9 is latest-to-base + begin + change = change_for_version_url(row[9]) + rescue StandardError => error + puts "Row #{start_row + index}: #{error.message}" + error_count += 1 + end + next unless change + + change.annotate(annotation_data_for_row(row), user) + annotated_count += 1 + + puts "Annotated '#{change.version.page.url}' change '#{change.api_id}'" if verbose + end + + tab_count += 1 + end + ensure + puts "\nRESULTS:" + puts "--------" + puts "Created #{annotated_count} annotations" + puts "Skipped #{skipped_count} rows" + puts "Errored #{error_count} rows" + puts "In #{tab_count} spreadsheet tabs" + puts "" + end +end + +def change_for_version_url(url) + return nil unless url.present? + + # Handle versionista URLs + match = /versionista\.com\/\d+\/\d+\/(\d+):(\d+)/.match(url) + if match + to_version = Version.find_by!( + "source_type = 'versionista' AND source_metadata->>'version_id' = ?", + match[1] + ) + from_version = Version.find_by!( + "source_type = 'versionista' AND source_metadata->>'version_id' = ?", + match[2] + ) + return Change.between(from: from_version, to: to_version, create: true) + end + + # Handle our URLs + match = /monitoring\.envirodatagov\.org\/page\/[^\/]+\/([^\/.]+)\.\.([^\/.]+)/.match(url) + if match + from_version = Version.find(match[1]) + to_version = Version.find(match[2]) + return Change.between(from: from_version, to: to_version, create: true) + end + + raise StandardError, "Unknown change URL format: '#{url}'" +end + +def annotation_data_for_row(row) + start_index = 17 + # fields from UI project + fields = [ + ['indiv_1', :boolean], + ['indiv_2', :boolean], + ['indiv_3', :boolean], + ['indiv_4', :boolean], + ['indiv_5', :boolean], + ['indiv_6', :boolean], + ['repeat_7', :boolean], + ['repeat_8', :boolean], + ['repeat_9', :boolean], + ['repeat_10', :boolean], + ['repeat_11', :boolean], + ['repeat_12', :boolean], + ['sig_1', :boolean], + ['sig_2', :boolean], + ['sig_3', :boolean], + ['sig_4', :boolean], + ['sig_5', :boolean], + ['sig_6', :boolean], + 'notes' + ] + + data = { _importer: IMPORT_TYPE } + fields.each_with_index do |field, index| + field_name, field_type = field.is_a?(Array) ? field : [field, :text] + + value = row[start_index + index] + value = value.present? if field_type == :boolean + + data[field_name] = value + end + + data +end + +def get_client + service = Google::Apis::SheetsV4::SheetsService.new + service.client_options.application_name = APPLICATION_NAME + service.authorization = authorize_google + service +end + +def authorize_google + unless ENV['GOOGLE_CLIENT_ID'] && ENV['GOOGLE_CLIENT_SECRET'] + raise "You must have both `GOOGLE_CLIENT_ID` and `GOOGLE_CLIENT_SECRET` environment variables set." + end + + scope = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY + client_id = Google::Auth::ClientId.new( + ENV['GOOGLE_CLIENT_ID'], ENV['GOOGLE_CLIENT_SECRET']) + token_store = Google::Auth::Stores::FileTokenStore.new(file: Tempfile.new) + authorizer = Google::Auth::UserAuthorizer.new(client_id, scope, token_store) + + user_id = 'default' + credentials = authorizer.get_credentials(user_id) + if credentials.nil? + url = authorizer.get_authorization_url( + base_url: OOB_URI) + puts "Open the following URL in the browser and enter the " + + "resulting code after authorization:" + puts url + code = STDIN.gets.strip + credentials = authorizer.get_and_store_credentials_from_code( + user_id: user_id, code: code, base_url: OOB_URI) + end + + credentials +end