From 933ece35ab48c7bdd5c14becfe5e6aa50149a9e0 Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Sat, 18 Feb 2023 16:22:08 +0100 Subject: [PATCH 1/2] Add healthcheck before deploy --- README.md | 14 +++++++- lib/mrsk/cli/healthcheck.rb | 29 ++++++++++++++++ lib/mrsk/cli/main.rb | 9 +++++ lib/mrsk/commander.rb | 4 +++ lib/mrsk/commands/app.rb | 4 --- lib/mrsk/commands/base.rb | 4 +++ lib/mrsk/commands/healthcheck.rb | 46 ++++++++++++++++++++++++++ lib/mrsk/configuration.rb | 6 ++++ lib/mrsk/configuration/role.rb | 2 +- test/commands/app_test.rb | 8 +++++ test/commands/healthcheck_test.rb | 55 +++++++++++++++++++++++++++++++ 11 files changed, 175 insertions(+), 6 deletions(-) create mode 100644 lib/mrsk/cli/healthcheck.rb create mode 100644 lib/mrsk/commands/healthcheck.rb create mode 100644 test/commands/healthcheck_test.rb diff --git a/README.md b/README.md index 4cabbee0e..d5c2b9c9e 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ env: Then edit your `.env` file to add your registry password as `MRSK_REGISTRY_PASSWORD` (and your `RAILS_MASTER_KEY` for production with a Rails app). -Finally, you have to ensure your application can answer `200 OK` to a `GET /up` request. That's how the zero-downtime deploy process knows that your new version is ready to serve traffic. +Finally, you have to ensure your application can answer `200 OK` to a `GET /up` request (or configure an alternative health path). That's how the zero-downtime deploy process knows that your new version is ready to serve traffic. Now you're ready to deploy to the servers: @@ -370,6 +370,18 @@ That'll post a line like follows to a preconfigured chatbot in Basecamp: [My App] [2023-02-18 11:29:52] [dhh] Rolled back to version d264c4e92470ad1bd18590f04466787262f605de ``` +### Using custom healthcheck path or port + +MRSK defaults to checking the health of your application again `/up` on port 3000. You can tailor both with the `healthcheck` setting: + +```yaml +healthcheck: + path: /healthz + port: 4000 +``` + +This will ensure your application is configured with a traefik label for the healthcheck against `/healthz` and that the pre-deploy healthcheck that MRSK performs is done against the same path on port 4000. + ## Commands ### Running commands on servers diff --git a/lib/mrsk/cli/healthcheck.rb b/lib/mrsk/cli/healthcheck.rb new file mode 100644 index 000000000..fcaf6bc85 --- /dev/null +++ b/lib/mrsk/cli/healthcheck.rb @@ -0,0 +1,29 @@ +class Mrsk::Cli::Healthcheck < Mrsk::Cli::Base + desc "perform", "Health check the current version of the app" + def perform + on(MRSK.primary_host) do + begin + execute *MRSK.healthcheck.run + + target = "Health check against #{MRSK.config.healthcheck["path"]}" + + if capture_with_info(*MRSK.healthcheck.curl) == "200" + info "#{target} succeeded with 200 OK!" + else + # Catches 1xx, 2xx, 3xx + raise SSHKit::Command::Failed, "#{target} failed to return 200 OK!" + end + rescue SSHKit::Command::Failed => e + if e.message =~ /curl/ + # Catches 4xx, 5xx + raise SSHKit::Command::Failed, "#{target} failed to return 200 OK!" + else + raise + end + ensure + execute *MRSK.healthcheck.stop, raise_on_non_zero_exit: false + execute *MRSK.healthcheck.remove, raise_on_non_zero_exit: false + end + end + end +end diff --git a/lib/mrsk/cli/main.rb b/lib/mrsk/cli/main.rb index d69e64fcf..8dac14ac6 100644 --- a/lib/mrsk/cli/main.rb +++ b/lib/mrsk/cli/main.rb @@ -23,6 +23,9 @@ def deploy say "Ensure Traefik is running...", :magenta invoke "mrsk:cli:traefik:boot" + say "Ensure app can pass healthcheck...", :magenta + invoke "mrsk:cli:healthcheck:perform" + invoke "mrsk:cli:app:boot" say "Prune old containers and images...", :magenta @@ -38,6 +41,9 @@ def redeploy say "Build and push app image...", :magenta invoke "mrsk:cli:build:deliver" + say "Ensure app can pass healthcheck...", :magenta + invoke "mrsk:cli:healthcheck:perform" + invoke "mrsk:cli:app:boot" end @@ -147,6 +153,9 @@ def version desc "build", "Build the application image" subcommand "build", Mrsk::Cli::Build + desc "healthcheck", "Healthcheck the application" + subcommand "healthcheck", Mrsk::Cli::Healthcheck + desc "prune", "Prune old application images and containers" subcommand "prune", Mrsk::Cli::Prune diff --git a/lib/mrsk/commander.rb b/lib/mrsk/commander.rb index d6f88b252..acab2c949 100644 --- a/lib/mrsk/commander.rb +++ b/lib/mrsk/commander.rb @@ -73,6 +73,10 @@ def auditor @auditor ||= Mrsk::Commands::Auditor.new(config) end + def healthcheck + @healthcheck ||= Mrsk::Commands::Healthcheck.new(config) + end + def with_verbosity(level) old_level = self.verbosity diff --git a/lib/mrsk/commands/app.rb b/lib/mrsk/commands/app.rb index 711be42e5..6deb49e84 100644 --- a/lib/mrsk/commands/app.rb +++ b/lib/mrsk/commands/app.rb @@ -75,10 +75,6 @@ def current_container_id docker :ps, "-q", *service_filter end - def container_id_for(container_name:) - docker :container, :ls, "-a", "-f", "name=#{container_name}", "-q" - end - def current_running_version # FIXME: Find more graceful way to extract the version from "app-version" than using sed and tail! pipe \ diff --git a/lib/mrsk/commands/base.rb b/lib/mrsk/commands/base.rb index f5d25c7cd..bcb47a615 100644 --- a/lib/mrsk/commands/base.rb +++ b/lib/mrsk/commands/base.rb @@ -17,6 +17,10 @@ def run_over_ssh(*command, host:) end end + def container_id_for(container_name:) + docker :container, :ls, "-a", "-f", "name=#{container_name}", "-q" + end + private def combine(*commands, by: "&&") commands diff --git a/lib/mrsk/commands/healthcheck.rb b/lib/mrsk/commands/healthcheck.rb new file mode 100644 index 000000000..8fd3b23c3 --- /dev/null +++ b/lib/mrsk/commands/healthcheck.rb @@ -0,0 +1,46 @@ +class Mrsk::Commands::Healthcheck < Mrsk::Commands::Base + EXPOSED_PORT = 3999 + + def run + web = config.role(:web) + + docker :run, + "-d", + "--name", container_name_with_version, + "-p", "#{EXPOSED_PORT}:#{config.healthcheck["port"]}", + "--label", "service=#{container_name}", + *web.env_args, + *config.volume_args, + config.absolute_image, + web.cmd + end + + def curl + [ :curl, "--silent", "--output", "/dev/null", "--write-out", "'%{http_code}'", health_url ] + end + + def stop + pipe \ + container_id_for(container_name: container_name), + xargs(docker(:stop)) + end + + def remove + pipe \ + container_id_for(container_name: container_name), + xargs(docker(:container, :rm)) + end + + private + def container_name + "healthcheck-#{config.service}" + end + + def container_name_with_version + "healthcheck-#{config.service_with_version}" + end + + def health_url + "http://localhost:#{EXPOSED_PORT}#{config.healthcheck["path"]}" + end +end diff --git a/lib/mrsk/configuration.rb b/lib/mrsk/configuration.rb index b17bb8c00..4043e951e 100644 --- a/lib/mrsk/configuration.rb +++ b/lib/mrsk/configuration.rb @@ -107,6 +107,7 @@ def volume_args end end + def ssh_user if raw_config.ssh.present? raw_config.ssh["user"] || "root" @@ -126,10 +127,15 @@ def ssh_options { user: ssh_user, proxy: ssh_proxy, auth_methods: [ "publickey" ] }.compact end + def audit_broadcast_cmd raw_config.audit_broadcast_cmd end + def healthcheck + { "path" => "/up", "port" => "3000" }.merge(raw_config.healthcheck || {}) + end + def valid? ensure_required_keys_present && ensure_env_available diff --git a/lib/mrsk/configuration/role.rb b/lib/mrsk/configuration/role.rb index dbac24bc7..5c07ac3a4 100644 --- a/lib/mrsk/configuration/role.rb +++ b/lib/mrsk/configuration/role.rb @@ -59,7 +59,7 @@ def traefik_labels if running_traefik? { "traefik.http.routers.#{config.service}.rule" => "'PathPrefix(`/`)'", - "traefik.http.services.#{config.service}.loadbalancer.healthcheck.path" => "/up", + "traefik.http.services.#{config.service}.loadbalancer.healthcheck.path" => config.healthcheck["path"], "traefik.http.services.#{config.service}.loadbalancer.healthcheck.interval" => "1s", "traefik.http.middlewares.#{config.service}.retry.attempts" => "3", "traefik.http.middlewares.#{config.service}.retry.initialinterval" => "500ms" diff --git a/test/commands/app_test.rb b/test/commands/app_test.rb index fd484bc8e..1523dbd7b 100644 --- a/test/commands/app_test.rb +++ b/test/commands/app_test.rb @@ -26,6 +26,14 @@ class CommandsAppTest < ActiveSupport::TestCase @app.run.join(" ") end + test "run with custom healthcheck path" do + @config[:healthcheck] = { "path" => "/healthz" } + + assert_equal \ + "docker run -d --restart unless-stopped --log-opt max-size=10m --name app-999 -e RAILS_MASTER_KEY=456 --label service=app --label role=web --label traefik.http.routers.app.rule='PathPrefix(`/`)' --label traefik.http.services.app.loadbalancer.healthcheck.path=/healthz --label traefik.http.services.app.loadbalancer.healthcheck.interval=1s --label traefik.http.middlewares.app.retry.attempts=3 --label traefik.http.middlewares.app.retry.initialinterval=500ms dhh/app:999", + @app.run.join(" ") + end + test "start" do assert_equal \ "docker start app-999", diff --git a/test/commands/healthcheck_test.rb b/test/commands/healthcheck_test.rb new file mode 100644 index 000000000..0216fa1fb --- /dev/null +++ b/test/commands/healthcheck_test.rb @@ -0,0 +1,55 @@ +require "test_helper" + +class CommandsHealthcheckTest < ActiveSupport::TestCase + setup do + @config = { + service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ], + traefik: { "args" => { "accesslog.format" => "json", "metrics.prometheus.buckets" => "0.1,0.3,1.2,5.0" } } + } + end + + test "run" do + assert_equal \ + "docker run -d --name healthcheck-app-123 -p 3999:3000 --label service=healthcheck-app dhh/app:123", + new_command.run.join(" ") + end + + test "run with custom port" do + @config[:healthcheck] = { "port" => 3001 } + + assert_equal \ + "docker run -d --name healthcheck-app-123 -p 3999:3001 --label service=healthcheck-app dhh/app:123", + new_command.run.join(" ") + end + + test "curl" do + assert_equal \ + "curl --silent --output /dev/null --write-out '%{http_code}' http://localhost:3999/up", + new_command.curl.join(" ") + end + + test "curl with custom path" do + @config[:healthcheck] = { "path" => "/healthz" } + + assert_equal \ + "curl --silent --output /dev/null --write-out '%{http_code}' http://localhost:3999/healthz", + new_command.curl.join(" ") + end + + test "stop" do + assert_equal \ + "docker container ls -a -f name=healthcheck-app -q | xargs docker stop", + new_command.stop.join(" ") + end + + test "remove" do + assert_equal \ + "docker container ls -a -f name=healthcheck-app -q | xargs docker container rm", + new_command.remove.join(" ") + end + + private + def new_command + Mrsk::Commands::Healthcheck.new(Mrsk::Configuration.new(@config, version: "123")) + end +end From 7afa9e0815f712c1293b325c6262c6158ea63009 Mon Sep 17 00:00:00 2001 From: David Heinemeier Hansson Date: Sat, 18 Feb 2023 16:23:46 +0100 Subject: [PATCH 2/2] Mention healthcheck as part of steps instead --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d5c2b9c9e..199e410f8 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,6 @@ env: Then edit your `.env` file to add your registry password as `MRSK_REGISTRY_PASSWORD` (and your `RAILS_MASTER_KEY` for production with a Rails app). -Finally, you have to ensure your application can answer `200 OK` to a `GET /up` request (or configure an alternative health path). That's how the zero-downtime deploy process knows that your new version is ready to serve traffic. - Now you're ready to deploy to the servers: ``` @@ -39,9 +37,10 @@ This will: 5. Push the image to the registry. 6. Pull the image from the registry on the servers. 7. Ensure Traefik is running and accepting traffic on port 80. -8. Stop any containers running a previous versions of the app. -9. Start a new container with the version of the app that matches the current git version hash. -10. Prune unused images and stopped containers to ensure servers don't fill up. +8. Ensure your app responds with `200 OK` to `GET /up`. +9. Stop any containers running a previous versions of the app. +10. Start a new container with the version of the app that matches the current git version hash. +11. Prune unused images and stopped containers to ensure servers don't fill up. Voila! All the servers are now serving the app on port 80. If you're just running a single server, you're ready to go. If you're running multiple servers, you need to put a load balancer in front of them.