Skip to content

Commit

Permalink
Add rake task for importing annotations
Browse files Browse the repository at this point in the history
Use the import_annotations_from_sheet to import all the annotations an analyst has created in a given Google Sheet. This can be used to solve #61.

Arguments are:
1. Google sheet ID, e.g. 1-Rq-AclS2GI_yxLmkYVY7FvTfN21KoJtxXtOXXXXXX
2. E-mail of user to attribute the annotation to
3. (optional) Name of spreadsheet tabs to import (comma-separated). If unset, all tabs will be imported.
4. (optional) Row to start at (defaults to 7)
5. (optional) Row to end at. If unset, reads all rows.

When done, it'll output summary information of how many rows were added, skipped, or errored across how many tabs.
  • Loading branch information
Mr0grog committed Feb 23, 2018
1 parent 2eeb329 commit 5810b3f
Show file tree
Hide file tree
Showing 4 changed files with 215 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,7 @@ TOKEN_PRIVATE_KEY='MIIEogIBAAKCAQEAufNrDQRl6Gj1yuga0DVHeJ4fi+lNWtn4S8XRU8/nBwm9v

# In production, set up Sentry.io for error tracking
# SENTRY_DSN=

# Set these if you are running rake tasks to import data from Google Sheets
# GOOGLE_CLIENT_ID=XYZ
# GOOGLE_CLIENT_SECRET=XYZ
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ gem 'oj', '~> 3.4'
gem 'sentry-raven'
gem 'readthis'
gem 'hiredis'
gem 'google-api-client'

# See https://github.com/rails/execjs#readme for more supported runtimes
# gem 'therubyracer', platforms: :ruby
Expand Down
39 changes: 39 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ GEM
crack (0.4.3)
safe_yaml (~> 1.0.0)
crass (1.0.3)
declarative (0.0.10)
declarative-option (0.1.0)
devise (4.4.1)
bcrypt (~> 3.0)
orm_adapter (~> 0.1)
Expand All @@ -82,10 +84,26 @@ GEM
ffi (1.9.21)
globalid (0.4.1)
activesupport (>= 4.2.0)
google-api-client (0.19.8)
addressable (~> 2.5, >= 2.5.1)
googleauth (>= 0.5, < 0.7.0)
httpclient (>= 2.8.1, < 3.0)
mime-types (~> 3.0)
representable (~> 3.0)
retriable (>= 2.0, < 4.0)
googleauth (0.6.2)
faraday (~> 0.12)
jwt (>= 1.4, < 3.0)
logging (~> 2.0)
memoist (~> 0.12)
multi_json (~> 1.11)
os (~> 0.9)
signet (~> 0.7)
hashdiff (0.3.7)
hiredis (0.6.1)
httparty (0.16.0)
multi_xml (>= 0.5.2)
httpclient (2.8.3)
i18n (0.9.5)
concurrent-ruby (~> 1.0)
jmespath (1.3.1)
Expand All @@ -95,12 +113,20 @@ GEM
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
ruby_dep (~> 1.2)
little-plugger (1.1.4)
logging (2.2.2)
little-plugger (~> 1.1)
multi_json (~> 1.10)
loofah (2.2.0)
crass (~> 1.0.2)
nokogiri (>= 1.5.9)
mail (2.7.0)
mini_mime (>= 0.1.1)
memoist (0.16.0)
method_source (0.9.0)
mime-types (3.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2016.0521)
mini_mime (1.0.0)
mini_portile2 (2.3.0)
minitest (5.11.3)
Expand All @@ -115,6 +141,7 @@ GEM
mini_portile2 (~> 2.3.0)
oj (3.4.0)
orm_adapter (0.5.0)
os (0.9.6)
parallel (1.12.1)
parser (2.5.0.0)
ast (~> 2.4.0)
Expand Down Expand Up @@ -173,6 +200,10 @@ GEM
redis (4.0.1)
redis-namespace (1.6.0)
redis (>= 3.0.4)
representable (3.0.4)
declarative (< 0.1.0)
declarative-option (< 0.2.0)
uber (< 0.2.0)
responders (2.4.0)
actionpack (>= 4.2.0, < 5.3)
railties (>= 4.2.0, < 5.3)
Expand All @@ -182,6 +213,7 @@ GEM
redis-namespace (~> 1.3)
sinatra (>= 0.9.2)
vegas (~> 0.1.2)
retriable (3.1.1)
rubocop (0.52.1)
parallel (~> 1.10)
parser (>= 2.4.0.2, < 3.0)
Expand All @@ -205,6 +237,11 @@ GEM
tilt (>= 1.1, < 3)
sentry-raven (2.7.2)
faraday (>= 0.7.6, < 1.0)
signet (0.8.1)
addressable (~> 2.3)
faraday (~> 0.9)
jwt (>= 1.5, < 3.0)
multi_json (~> 1.10)
sinatra (2.0.1)
mustermann (~> 1.0)
rack (~> 2.0)
Expand All @@ -227,6 +264,7 @@ GEM
tilt (2.0.8)
tzinfo (1.2.5)
thread_safe (~> 0.1)
uber (0.1.0)
uglifier (4.1.6)
execjs (>= 0.3.0, < 3)
unicode-display_width (1.3.0)
Expand Down Expand Up @@ -255,6 +293,7 @@ DEPENDENCIES
byebug
devise
dotenv-rails
google-api-client
hiredis
httparty
jwt (~> 2.1)
Expand Down
171 changes: 171 additions & 0 deletions lib/tasks/import_from_sheets.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
require 'google/apis/sheets_v4'
require 'googleauth'
require 'googleauth/stores/file_token_store'

IMPORT_TYPE = 'rake_task_v1'
OOB_URI = 'urn:ietf:wg:oauth:2.0:oob'
APPLICATION_NAME = 'Web Monitoring DB Importer'


desc 'Create annotations from data in analysts’ Google sheets -- only the sheet ID and user e-mail are required.'
task :import_annotations_from_sheet, [:sheet_id, :user_email, :tabs, :start_row, :end_row] => [:environment] do |_t, args|

verbose = ENV['VERBOSE']
client = get_client
sheet_id = args[:sheet_id]
start_row = args.fetch(:start_row, 7).to_i
end_row = args[:end_row] || ''

user = User.find_by!(email: args[:user_email])

tab_count = 0
annotated_count = 0
skipped_count = 0
error_count = 0

tabs =
if args[:tabs]
tabs = args[:tabs].split(',').collect {|name| name.strip}
else
client.get_spreadsheet(sheet_id).sheets.collect do |sheet|
sheet.properties.title
end
end

begin
tabs.each do |tab_title|
puts "Importing spreadsheet tab '#{tab_title}'"

rows = client.get_spreadsheet_values(
sheet_id,
"#{tab_title}!A#{start_row}:AL#{end_row}"
).values

rows.each_with_index do |row, index|
# Column 9 is latest-to-base
begin
change = change_for_version_url(row[9])
rescue StandardError => error
puts "Row #{start_row + index}: #{error.message}"
error_count += 1
end
next unless change

change.annotate(annotation_data_for_row(row), user)
annotated_count += 1

puts "Annotated '#{change.version.page.url}' change '#{change.api_id}'" if verbose
end

tab_count += 1
end
ensure
puts "\nRESULTS:"
puts "--------"
puts "Created #{annotated_count} annotations"
puts "Skipped #{skipped_count} rows"
puts "Errored #{error_count} rows"
puts "In #{tab_count} spreadsheet tabs"
puts ""
end
end

def change_for_version_url(url)
return nil unless url.present?

# Handle versionista URLs
match = /versionista\.com\/\d+\/\d+\/(\d+):(\d+)/.match(url)
if match
to_version = Version.find_by!(
"source_type = 'versionista' AND source_metadata->>'version_id' = ?",
match[1]
)
from_version = Version.find_by!(
"source_type = 'versionista' AND source_metadata->>'version_id' = ?",
match[2]
)
return Change.between(from: from_version, to: to_version, create: true)
end

# Handle our URLs
match = /monitoring\.envirodatagov\.org\/page\/[^\/]+\/([^\/.]+)\.\.([^\/.]+)/.match(url)
if match
from_version = Version.find(match[1])
to_version = Version.find(match[2])
return Change.between(from: from_version, to: to_version, create: true)
end

raise StandardError, "Unknown change URL format: '#{url}'"
end

def annotation_data_for_row(row)
start_index = 17
# fields from UI project
fields = [
['indiv_1', :boolean],
['indiv_2', :boolean],
['indiv_3', :boolean],
['indiv_4', :boolean],
['indiv_5', :boolean],
['indiv_6', :boolean],
['repeat_7', :boolean],
['repeat_8', :boolean],
['repeat_9', :boolean],
['repeat_10', :boolean],
['repeat_11', :boolean],
['repeat_12', :boolean],
['sig_1', :boolean],
['sig_2', :boolean],
['sig_3', :boolean],
['sig_4', :boolean],
['sig_5', :boolean],
['sig_6', :boolean],
'notes'
]

data = { _importer: IMPORT_TYPE }
fields.each_with_index do |field, index|
field_name, field_type = field.is_a?(Array) ? field : [field, :text]

value = row[start_index + index]
value = value.present? if field_type == :boolean

data[field_name] = value
end

data
end

def get_client
service = Google::Apis::SheetsV4::SheetsService.new
service.client_options.application_name = APPLICATION_NAME
service.authorization = authorize_google
service
end

def authorize_google
unless ENV['GOOGLE_CLIENT_ID'] && ENV['GOOGLE_CLIENT_SECRET']
raise "You must have both `GOOGLE_CLIENT_ID` and `GOOGLE_CLIENT_SECRET` environment variables set."
end

scope = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY
client_id = Google::Auth::ClientId.new(
ENV['GOOGLE_CLIENT_ID'], ENV['GOOGLE_CLIENT_SECRET'])
token_store = Google::Auth::Stores::FileTokenStore.new(file: Tempfile.new)
authorizer = Google::Auth::UserAuthorizer.new(client_id, scope, token_store)

user_id = 'default'
credentials = authorizer.get_credentials(user_id)
if credentials.nil?
url = authorizer.get_authorization_url(
base_url: OOB_URI)
puts "Open the following URL in the browser and enter the " +
"resulting code after authorization:"
puts url
code = STDIN.gets.strip
credentials = authorizer.get_and_store_credentials_from_code(
user_id: user_id, code: code, base_url: OOB_URI)
end

credentials
end

0 comments on commit 5810b3f

Please sign in to comment.