From c5dc30f57cfbb8d2471c05c6c68c99ab0a526ee2 Mon Sep 17 00:00:00 2001 From: Barry Gordon Date: Mon, 11 Jul 2022 16:16:23 +0100 Subject: [PATCH 1/4] Move RegistryClient from maven to common --- common/lib/dependabot/registry_client.rb | 55 ++++++++++++++++++ .../spec/dependabot}/registry_client_spec.rb | 4 +- maven/lib/dependabot/maven.rb | 1 - .../file_parser/property_value_finder.rb | 3 +- .../maven/file_parser/repositories_finder.rb | 3 +- maven/lib/dependabot/maven/metadata_finder.rb | 5 +- maven/lib/dependabot/maven/registry_client.rb | 57 ------------------- .../maven/update_checker/version_finder.rb | 5 +- 8 files changed, 67 insertions(+), 66 deletions(-) create mode 100644 common/lib/dependabot/registry_client.rb rename {maven/spec/dependabot/maven => common/spec/dependabot}/registry_client_spec.rb (98%) delete mode 100644 maven/lib/dependabot/maven/registry_client.rb diff --git a/common/lib/dependabot/registry_client.rb b/common/lib/dependabot/registry_client.rb new file mode 100644 index 00000000000..a1fa7bad4e6 --- /dev/null +++ b/common/lib/dependabot/registry_client.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require "dependabot/shared_helpers" + +# This class provides a thin wrapper around our normal usage of Excon as a simple HTTP client in order to +# provide some minor caching functionality. +# +# This is not used to support full response caching currently, we just use it to ensure we detect unreachable +# hosts and fast-fail on any subsequent requests to them to avoid excessive use of retries and connect- or +# read-timeouts as Maven jobs tend to be sensitive to exceeding our overall 45 minute timeout. +module Dependabot + class RegistryClient + @cached_errors = {} + + def self.get(url:, headers: {}, options: {}) + raise cached_error_for(url) if cached_error_for(url) + + Excon.get( + url, + idempotent: true, + **SharedHelpers.excon_defaults({ headers: headers }.merge(options)) + ) + rescue Excon::Error::Timeout => e + cache_error(url, e) + raise e + end + + def self.head(url:, headers: {}, options: {}) + raise cached_error_for(url) if cached_error_for(url) + + Excon.head( + url, + idempotent: true, + **SharedHelpers.excon_defaults({ headers: headers }.merge(options)) + ) + rescue Excon::Error::Timeout => e + cache_error(url, e) + raise e + end + + def self.clear_cache! + @cached_errors = {} + end + + private_class_method def self.cache_error(url, error) + host = URI(url).host + @cached_errors[host] = error + end + + private_class_method def self.cached_error_for(url) + host = URI(url).host + @cached_errors.fetch(host, nil) + end + end +end diff --git a/maven/spec/dependabot/maven/registry_client_spec.rb b/common/spec/dependabot/registry_client_spec.rb similarity index 98% rename from maven/spec/dependabot/maven/registry_client_spec.rb rename to common/spec/dependabot/registry_client_spec.rb index 6f12984f18a..5d578346b82 100644 --- a/maven/spec/dependabot/maven/registry_client_spec.rb +++ b/common/spec/dependabot/registry_client_spec.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true require "spec_helper" -require "dependabot/maven/registry_client" +require "dependabot/registry_client" -RSpec.describe Dependabot::Maven::RegistryClient do +RSpec.describe Dependabot::RegistryClient do let(:url) { "https://example.com" } let(:maven_defaults) do { idempotent: true } diff --git a/maven/lib/dependabot/maven.rb b/maven/lib/dependabot/maven.rb index 90dbc23f1ed..cf9f2a23901 100644 --- a/maven/lib/dependabot/maven.rb +++ b/maven/lib/dependabot/maven.rb @@ -9,7 +9,6 @@ require "dependabot/maven/metadata_finder" require "dependabot/maven/requirement" require "dependabot/maven/version" -require "dependabot/maven/registry_client" require "dependabot/pull_request_creator/labeler" Dependabot::PullRequestCreator::Labeler. diff --git a/maven/lib/dependabot/maven/file_parser/property_value_finder.rb b/maven/lib/dependabot/maven/file_parser/property_value_finder.rb index bfe03352373..e165262a347 100644 --- a/maven/lib/dependabot/maven/file_parser/property_value_finder.rb +++ b/maven/lib/dependabot/maven/file_parser/property_value_finder.rb @@ -4,6 +4,7 @@ require "dependabot/dependency_file" require "dependabot/maven/file_parser" +require "dependabot/registry_client" # For documentation, see: # - http://maven.apache.org/guides/introduction/introduction-to-the-pom.html @@ -127,7 +128,7 @@ def fetch_remote_parent_pom(group_id, artifact_id, version, pom) url = remote_pom_url(group_id, artifact_id, version, base_url) @maven_responses ||= {} - @maven_responses[url] ||= RegistryClient.get(url: url) + @maven_responses[url] ||= Dependabot::RegistryClient.get(url: url) next unless @maven_responses[url].status == 200 next unless pom?(@maven_responses[url].body) diff --git a/maven/lib/dependabot/maven/file_parser/repositories_finder.rb b/maven/lib/dependabot/maven/file_parser/repositories_finder.rb index 316cb4e9e9c..20b5b3e7d1d 100644 --- a/maven/lib/dependabot/maven/file_parser/repositories_finder.rb +++ b/maven/lib/dependabot/maven/file_parser/repositories_finder.rb @@ -4,6 +4,7 @@ require "dependabot/dependency_file" require "dependabot/maven/file_parser" +require "dependabot/registry_client" require "dependabot/errors" # For documentation, see: @@ -109,7 +110,7 @@ def fetch_remote_parent_pom(group_id, artifact_id, version, repo_urls) url = remote_pom_url(group_id, artifact_id, version, base_url) @maven_responses ||= {} - @maven_responses[url] ||= RegistryClient.get( + @maven_responses[url] ||= Dependabot::RegistryClient.get( url: url, # We attempt to find dependencies in private repos before failing over to the CENTRAL_REPO_URL, # but this can burn a lot of a job's time against slow servers due to our `read_timeout` being 20 seconds. diff --git a/maven/lib/dependabot/maven/metadata_finder.rb b/maven/lib/dependabot/maven/metadata_finder.rb index 7844a2ff0a7..efc7f58b0b0 100644 --- a/maven/lib/dependabot/maven/metadata_finder.rb +++ b/maven/lib/dependabot/maven/metadata_finder.rb @@ -7,6 +7,7 @@ require "dependabot/maven/file_parser" require "dependabot/maven/file_parser/repositories_finder" require "dependabot/maven/utils/auth_headers_finder" +require "dependabot/registry_client" module Dependabot module Maven @@ -104,7 +105,7 @@ def source_from_anywhere_in_pom(pom) def dependency_pom_file return @dependency_pom_file unless @dependency_pom_file.nil? - response = RegistryClient.get( + response = Dependabot::RegistryClient.get( url: "#{maven_repo_dependency_url}/#{dependency.version}/#{dependency_artifact_id}-#{dependency.version}.pom", headers: auth_headers ) @@ -134,7 +135,7 @@ def parent_pom_file(pom) "#{version}/"\ "#{artifact_id}-#{version}.pom" - response = RegistryClient.get( + response = Dependabot::RegistryClient.get( url: substitute_properties_in_source_url(url, pom), headers: auth_headers ) diff --git a/maven/lib/dependabot/maven/registry_client.rb b/maven/lib/dependabot/maven/registry_client.rb deleted file mode 100644 index a96b3be5cf1..00000000000 --- a/maven/lib/dependabot/maven/registry_client.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -require "dependabot/shared_helpers" - -# This class provides a thin wrapper around our normal usage of Excon as a simple HTTP client in order to -# provide some minor caching functionality. -# -# This is not used to support full response caching currently, we just use it to ensure we detect unreachable -# hosts and fast-fail on any subsequent requests to them to avoid excessive use of retries and connect- or -# read-timeouts as Maven jobs tend to be sensitive to exceeding our overall 45 minute timeout. -module Dependabot - module Maven - class RegistryClient - @cached_errors = {} - - def self.get(url:, headers: {}, options: {}) - raise cached_error_for(url) if cached_error_for(url) - - Excon.get( - url, - idempotent: true, - **SharedHelpers.excon_defaults({ headers: headers }.merge(options)) - ) - rescue Excon::Error::Timeout => e - cache_error(url, e) - raise e - end - - def self.head(url:, headers: {}, options: {}) - raise cached_error_for(url) if cached_error_for(url) - - Excon.head( - url, - idempotent: true, - **SharedHelpers.excon_defaults({ headers: headers }.merge(options)) - ) - rescue Excon::Error::Timeout => e - cache_error(url, e) - raise e - end - - def self.clear_cache! - @cached_errors = {} - end - - private_class_method def self.cache_error(url, error) - host = URI(url).host - @cached_errors[host] = error - end - - private_class_method def self.cached_error_for(url) - host = URI(url).host - @cached_errors.fetch(host, nil) - end - end - end -end diff --git a/maven/lib/dependabot/maven/update_checker/version_finder.rb b/maven/lib/dependabot/maven/update_checker/version_finder.rb index 40197c238ed..d72f4accead 100644 --- a/maven/lib/dependabot/maven/update_checker/version_finder.rb +++ b/maven/lib/dependabot/maven/update_checker/version_finder.rb @@ -7,6 +7,7 @@ require "dependabot/maven/version" require "dependabot/maven/requirement" require "dependabot/maven/utils/auth_headers_finder" +require "dependabot/registry_client" module Dependabot module Maven @@ -138,7 +139,7 @@ def released?(version) @released_check[version] = repositories.any? do |repository_details| url = repository_details.fetch("url") - response = RegistryClient.head( + response = Dependabot::RegistryClient.head( url: dependency_files_url(url, version), headers: repository_details.fetch("auth_headers") ) @@ -160,7 +161,7 @@ def dependency_metadata(repository_details) end def fetch_dependency_metadata(repository_details) - response = RegistryClient.get( + response = Dependabot::RegistryClient.get( url: dependency_metadata_url(repository_details.fetch("url")), headers: repository_details.fetch("auth_headers") ) From f815e99f3c59cce7ac66b4444004ef7622b381eb Mon Sep 17 00:00:00 2001 From: Barry Gordon Date: Wed, 13 Jul 2022 11:26:10 +0100 Subject: [PATCH 2/4] Clear the RegistryClient after each test --- common/spec/spec_helper.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/common/spec/spec_helper.rb b/common/spec/spec_helper.rb index 1e8d99c9452..7fbeacdd511 100644 --- a/common/spec/spec_helper.rb +++ b/common/spec/spec_helper.rb @@ -10,6 +10,7 @@ require "uri" require "dependabot/dependency_file" +require "dependabot/registry_client" require_relative "dummy_package_manager/dummy" require_relative "warning_monkey_patch" @@ -38,6 +39,11 @@ config.mock_with(:rspec) { |mocks| mocks.verify_partial_doubles = true } config.raise_errors_for_deprecations! + config.after do + # Ensure we clear any cached timeouts between tests + Dependabot::RegistryClient.clear_cache! + end + config.around do |example| if example.metadata[:profile] example_name = example.metadata[:full_description].strip.gsub(/[\s#\.-]/, "_").gsub("::", "_").downcase From 86f6941ca49e117e6b976be380dca5a0d99cb12f Mon Sep 17 00:00:00 2001 From: Barry Gordon Date: Wed, 13 Jul 2022 14:13:58 +0100 Subject: [PATCH 3/4] [npm/yarn] Cache unreachable registry hosts --- .../npm_and_yarn/metadata_finder.rb | 16 ++----- .../update_checker/latest_version_finder.rb | 46 ++++++++----------- .../update_checker/library_detector.rb | 7 +-- .../update_checker/registry_finder.rb | 12 ++--- 4 files changed, 27 insertions(+), 54 deletions(-) diff --git a/npm_and_yarn/lib/dependabot/npm_and_yarn/metadata_finder.rb b/npm_and_yarn/lib/dependabot/npm_and_yarn/metadata_finder.rb index c9fccfc95e3..2f6a4ef276c 100644 --- a/npm_and_yarn/lib/dependabot/npm_and_yarn/metadata_finder.rb +++ b/npm_and_yarn/lib/dependabot/npm_and_yarn/metadata_finder.rb @@ -5,7 +5,7 @@ require "dependabot/metadata_finders" require "dependabot/metadata_finders/base" -require "dependabot/shared_helpers" +require "dependabot/registry_client" require "dependabot/npm_and_yarn/update_checker/registry_finder" require "dependabot/npm_and_yarn/version" @@ -136,12 +136,7 @@ def find_source_from_git_url def latest_version_listing return @latest_version_listing if defined?(@latest_version_listing) - response = Excon.get( - "#{dependency_url}/latest", - idempotent: true, - **SharedHelpers.excon_defaults(headers: registry_auth_headers) - ) - + response = Dependabot::RegistryClient.get(url: "#{dependency_url}/latest", headers: registry_auth_headers) return @latest_version_listing = JSON.parse(response.body) if response.status == 200 @latest_version_listing = {} @@ -161,12 +156,7 @@ def all_version_listings def npm_listing return @npm_listing unless @npm_listing.nil? - response = Excon.get( - dependency_url, - idempotent: true, - **SharedHelpers.excon_defaults(headers: registry_auth_headers) - ) - + response = Dependabot::RegistryClient.get(url: dependency_url, headers: registry_auth_headers) return @npm_listing = {} if response.status >= 500 begin diff --git a/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/latest_version_finder.rb b/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/latest_version_finder.rb index dc7ebc0a502..92ffc78854c 100644 --- a/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/latest_version_finder.rb +++ b/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/latest_version_finder.rb @@ -227,18 +227,16 @@ def yanked?(version) @yanked[version] = begin - status = Excon.get( - dependency_url + "/#{version}", - idempotent: true, - **SharedHelpers.excon_defaults(headers: registry_auth_headers) + status = Dependabot::RegistryClient.get( + url: dependency_url + "/#{version}", + headers: registry_auth_headers ).status if status == 404 && dependency_registry != "registry.npmjs.org" # Some registries don't handle escaped package names properly - status = Excon.get( - dependency_url.gsub("%2F", "/") + "/#{version}", - idempotent: true, - **SharedHelpers.excon_defaults(headers: registry_auth_headers) + status = Dependabot::RegistryClient.get( + url: dependency_url.gsub("%2F", "/") + "/#{version}", + headers: registry_auth_headers ).status end @@ -257,10 +255,9 @@ def version_endpoint_working? @version_endpoint_working = begin - Excon.get( - dependency_url + "/latest", - idempotent: true, - **SharedHelpers.excon_defaults(headers: registry_auth_headers) + Dependabot::RegistryClient.get( + url: dependency_url + "/latest", + headers: registry_auth_headers ).status < 400 rescue Excon::Error::Timeout, Excon::Error::Socket # Give the benefit of the doubt if the registry is playing up @@ -291,10 +288,9 @@ def npm_details end def fetch_npm_response - response = Excon.get( - dependency_url, - idempotent: true, - **SharedHelpers.excon_defaults(headers: registry_auth_headers) + response = Dependabot::RegistryClient.get( + url: dependency_url, + headers: registry_auth_headers ) return response unless response.status == 500 @@ -307,12 +303,12 @@ def fetch_npm_response return unless decoded_token.include?(":") username, password = decoded_token.split(":") - Excon.get( - dependency_url, - user: username, - password: password, - idempotent: true, - **SharedHelpers.excon_defaults + Dependabot::RegistryClient.get( + url: dependency_url, + options: { + user: username, + password: password + } ) end @@ -349,11 +345,7 @@ def private_dependency_not_reachable?(npm_response) if dependency_registry == "registry.npmjs.org" return false unless dependency.name.start_with?("@") - web_response = Excon.get( - "https://www.npmjs.com/package/#{dependency.name}", - idempotent: true, - **SharedHelpers.excon_defaults - ) + web_response = Dependabot::RegistryClient.get(url: "https://www.npmjs.com/package/#{dependency.name}") # NOTE: returns 429 when the login page is rate limited return web_response.body.include?("Forgot password?") || web_response.status == 429 diff --git a/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/library_detector.rb b/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/library_detector.rb index 96170d64f5e..6244797e63c 100644 --- a/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/library_detector.rb +++ b/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/library_detector.rb @@ -36,12 +36,7 @@ def npm_response_matches_package_json? return false unless project_description # Check if the project is listed on npm. If it is, it's a library - @project_npm_response ||= Excon.get( - "https://registry.npmjs.org/#{escaped_project_name}", - idempotent: true, - **SharedHelpers.excon_defaults - ) - + @project_npm_response ||= Dependabot::RegistryClient.get(url: "https://registry.npmjs.org/#{escaped_project_name}") return false unless @project_npm_response.status == 200 @project_npm_response.body.force_encoding("UTF-8").encode. diff --git a/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/registry_finder.rb b/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/registry_finder.rb index 86127003026..90deb85b94e 100644 --- a/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/registry_finder.rb +++ b/npm_and_yarn/lib/dependabot/npm_and_yarn/update_checker/registry_finder.rb @@ -2,7 +2,7 @@ require "excon" require "dependabot/npm_and_yarn/update_checker" -require "dependabot/shared_helpers" +require "dependabot/registry_client" module Dependabot module NpmAndYarn @@ -53,13 +53,9 @@ def self.central_registry?(registry) def first_registry_with_dependency_details @first_registry_with_dependency_details ||= known_registries.find do |details| - response = Excon.get( - "https://#{details['registry'].gsub(%r{/+$}, '')}/"\ - "#{escaped_dependency_name}", - idempotent: true, - **SharedHelpers.excon_defaults( - headers: auth_header_for(details["token"]) - ) + response = Dependabot::RegistryClient.get( + url: "https://#{details['registry'].gsub(%r{/+$}, '')}/#{escaped_dependency_name}", + headers: auth_header_for(details["token"]) ) response.status < 400 && JSON.parse(response.body) rescue Excon::Error::Timeout, From 514efcdfc78c0d7cf8b4938fe975b6b822369a5c Mon Sep 17 00:00:00 2001 From: Barry Gordon <896971+brrygrdn@users.noreply.github.com> Date: Thu, 14 Jul 2022 11:01:58 +0100 Subject: [PATCH 4/4] Fix comment Co-authored-by: Jurre --- common/lib/dependabot/registry_client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/dependabot/registry_client.rb b/common/lib/dependabot/registry_client.rb index a1fa7bad4e6..baafd05f4f1 100644 --- a/common/lib/dependabot/registry_client.rb +++ b/common/lib/dependabot/registry_client.rb @@ -7,7 +7,7 @@ # # This is not used to support full response caching currently, we just use it to ensure we detect unreachable # hosts and fast-fail on any subsequent requests to them to avoid excessive use of retries and connect- or -# read-timeouts as Maven jobs tend to be sensitive to exceeding our overall 45 minute timeout. +# read-timeouts as some jobs tend to be sensitive to exceeding our overall 45 minute timeout. module Dependabot class RegistryClient @cached_errors = {}