From 20ed3a0b323a4803aaea229e262ec522a09fb8f7 Mon Sep 17 00:00:00 2001 From: Ben Sheldon Date: Tue, 21 May 2019 07:46:21 -0700 Subject: [PATCH 1/2] Delete a newly created page if the version import fails --- app/jobs/import_versions_job.rb | 52 ++++++++++++++++----------- test/jobs/import_versions_job_test.rb | 36 +++++++++++++++++++ test/test_helper.rb | 1 + 3 files changed, 68 insertions(+), 21 deletions(-) diff --git a/app/jobs/import_versions_job.rb b/app/jobs/import_versions_job.rb index 227b8db8..e36b6464 100644 --- a/app/jobs/import_versions_job.rb +++ b/app/jobs/import_versions_job.rb @@ -76,33 +76,43 @@ def import_record(record) return if existing && @import.skip_existing_records? - version = version_for_record(record, existing, @import.update_behavior) - version.page = page + begin + version = version_for_record(record, existing, @import.update_behavior) + version.page = page - if version.uri.nil? - if record['content'] - # TODO: upload content - raise Api::NotImplementedError, 'Raw content uploading not implemented yet.' + if version.uri.nil? + if record['content'] + # TODO: upload content + raise Api::NotImplementedError, 'Raw content uploading not implemented yet.' + end + elsif !Archiver.already_archived?(version.uri) || !version.version_hash + result = Archiver.archive(version.uri, expected_hash: version.version_hash) + version.version_hash = result[:hash] + if result[:url] != version.uri + version.source_metadata['original_url'] = version.uri + version.uri = result[:url] + end end - elsif !Archiver.already_archived?(version.uri) || !version.version_hash - result = Archiver.archive(version.uri, expected_hash: version.version_hash) - version.version_hash = result[:hash] - if result[:url] != version.uri - version.source_metadata['original_url'] = version.uri - version.uri = result[:url] + + if @import.skip_unchanged_versions? && version_changed?(version) + warn "Skipped version identical to previous. URL: #{page.url}, capture_time: #{version.capture_time}, source_type: #{version.source_type}" + return end - end - if @import.skip_unchanged_versions? && version_changed?(version) - warn "Skipped version identical to previous. URL: #{page.url}, capture_time: #{version.capture_time}, source_type: #{version.source_type}" - return - end + version.validate! + version.update_different_attribute(save: false) + version.save - version.validate! - version.update_different_attribute(save: false) - version.save + @added << version unless existing + rescue StandardError + # Delete a newly created page if the version import fails + if page.uuid_previously_changed? && page.versions.empty? + page.maintainers = [] + page.destroy! + end - @added << version unless existing + raise + end end def version_for_record(record, existing_version = nil, update_behavior = 'replace') diff --git a/test/jobs/import_versions_job_test.rb b/test/jobs/import_versions_job_test.rb index 7772e87b..ca4e9ea1 100644 --- a/test/jobs/import_versions_job_test.rb +++ b/test/jobs/import_versions_job_test.rb @@ -39,6 +39,42 @@ def teardown assert_equal(original_data['source_metadata'], versions(:page1_v1).source_metadata, 'source_metadata was changed') end + test 'does not add a page if there is an error processing versions' do + bad_page_url = 'http://badpage.com' + + import = Import.create_with_data( + { + user: users(:alice) + }, + [ + { + page_url: bad_page_url, + page_title: 'Bad Page', + site_agency: 'Bad Agency', + site_name: 'Bad Administration', + capture_time: 5.days.ago, + uri: 'https://test-bucket.s3.amazonaws.com/example-v1', + version_hash: 'INVALID_HASH', + source_type: 'versionista', + source_metadata: { test_meta: 'data' } + } + ].map(&:to_json).join("\n") + ) + + archive_stub = proc do |_url| + raise StandardError + end + + Archiver.stub :already_archived?, false do + Archiver.stub :archive, archive_stub do + ImportVersionsJob.perform_now(import) + + imported_page = Page.find_by(url: bad_page_url) + assert_nil imported_page, 'Should not import page with bad versions' + end + end + end + test 'replaces an existing version if requested' do page_versions_count = pages(:home_page).versions.count diff --git a/test/test_helper.rb b/test/test_helper.rb index 709e5cac..f0a6b25b 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,6 +1,7 @@ ENV['RAILS_ENV'] ||= 'test' require File.expand_path('../config/environment', __dir__) require 'rails/test_help' +require 'minitest/autorun' require 'webmock/minitest' class ActiveSupport::TestCase From 9372f58b36dc57e1c467f497fb955a82c7e3e83f Mon Sep 17 00:00:00 2001 From: Ben Sheldon Date: Tue, 21 May 2019 19:49:23 -0700 Subject: [PATCH 2/2] Improve empty versions check --- app/jobs/import_versions_job.rb | 2 +- test/jobs/import_versions_job_test.rb | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/app/jobs/import_versions_job.rb b/app/jobs/import_versions_job.rb index e36b6464..866ad82c 100644 --- a/app/jobs/import_versions_job.rb +++ b/app/jobs/import_versions_job.rb @@ -106,7 +106,7 @@ def import_record(record) @added << version unless existing rescue StandardError # Delete a newly created page if the version import fails - if page.uuid_previously_changed? && page.versions.empty? + if page.uuid_previously_changed? && page.versions.count.zero? page.maintainers = [] page.destroy! end diff --git a/test/jobs/import_versions_job_test.rb b/test/jobs/import_versions_job_test.rb index ca4e9ea1..c59727b6 100644 --- a/test/jobs/import_versions_job_test.rb +++ b/test/jobs/import_versions_job_test.rb @@ -70,7 +70,13 @@ def teardown ImportVersionsJob.perform_now(import) imported_page = Page.find_by(url: bad_page_url) - assert_nil imported_page, 'Should not import page with bad versions' + assert_nil imported_page, 'Should delete newly created page on version error' + + Page.create! url: bad_page_url + ImportVersionsJob.perform_now(import) + + imported_page = Page.find_by(url: bad_page_url) + assert imported_page, 'Should NOT delete existing page on version error' end end end