Skip to content

Commit

Permalink
update_pgcluster.yml: Wait for caches to warm up after reboot (#580)
Browse files Browse the repository at this point in the history
  • Loading branch information
vitabaks authored Feb 21, 2024
1 parent a2fe5cb commit 97e60ab
Show file tree
Hide file tree
Showing 23 changed files with 115 additions and 68 deletions.
4 changes: 4 additions & 0 deletions config_pgcluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,10 @@
- name: Include system variables
ansible.builtin.include_vars: "vars/system.yml"
tags: always

- name: Include OS-specific variables
ansible.builtin.include_vars: "vars/{{ ansible_os_family }}.yml"
tags: always
roles:
# finish (info)
- role: deploy-finish
2 changes: 1 addition & 1 deletion roles/patroni/tasks/custom_wal_dir.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
become: true
become_user: postgres
ansible.builtin.command: >
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "CHECKPOINT"
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "CHECKPOINT"
- name: Stop patroni service on the Replica (for create symlink)
become: true
Expand Down
29 changes: 21 additions & 8 deletions roles/update/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,30 @@ Update all system packages:
- `target`
- Defines the target for the update.
- Available values: 'postgres', 'patroni', 'system'
- Default value: postgres
- Default value: `postgres`
- `max_replication_lag_bytes`
- Determines the size of the replication lag above which the update will not be performed.
- If the lag is high, you will be prompted to try again later.
- Default value: 10485760 (10 MiB)
- Note: If the lag is high, you will be prompted to try again later.
- Default value: `10485760` (10 MiB)
- `max_transaction_sec`
- Determines the maximum transaction time, in the presence of which the update will not be performed.
- If long-running transactions are present, you will be prompted to try again later.
- Default value: 15 (seconds)
- Note: If long-running transactions are present, you will be prompted to try again later.
- Default value: `15` (seconds)
- `update_extensions`
- If 'true', an attempt will be made to automatically update all extensions for all databases.
- Specify 'false', to avoid updating extensions.
- Default value: true
- Attempt to automatically update all PostgreSQL extensions in all databases.
- Note: Specify 'false', to avoid updating extensions.
- Default value: `true`
- `reboot_host_after_update`
- Restart the server if it is required after the update.
- Default value: `true`
- `reboot_host_timeout`
- Maximum seconds to wait for machine to reboot and respond to a test command.
- Default value: `1800` (30 minutes)
- `reboot_host_post_delay`
- The waiting time (in minutes) for the caches to warm up after restarting the server before updating the next server.
- Note: Applicable when there are multiple replicas.
- Default value: `5` (minutes).

---

## Plan:
Expand Down Expand Up @@ -82,6 +93,8 @@ When using load balancing for read-only traffic (the "Type A" and "Type C" schem
- Disable `noloadbalance`, `nosync`, `nofailover` parameters in the patroni.yml
- Reload patroni service
- Make sure replica endpoint is available
- Wait N minutes for caches to warm up after reboot
- Note: variable `reboot_host_post_delay`
- Perform the same steps for the next replica server.
#### 3. UPDATE: Primary
- Switchover Patroni leader role
Expand Down
2 changes: 1 addition & 1 deletion roles/update/tasks/extensions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

- name: Get a list of databases
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select datname from pg_catalog.pg_database where datname <> 'template0'"
register: databases_list
changed_when: false
Expand Down
8 changes: 4 additions & 4 deletions roles/update/tasks/pre_checks.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
---
- name: '[Pre-Check] (ALL) Test PostgreSQL DB Access'
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc 'select 1'
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc 'select 1'
changed_when: false

- name: '[Pre-Check] Make sure that physical replication is active'
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select count(*) from pg_stat_replication
where application_name != 'pg_basebackup'"
register: pg_replication_state
Expand All @@ -24,7 +24,7 @@

- name: '[Pre-Check] Make sure there is no high replication lag (more than {{ max_replication_lag_bytes | human_readable }})'
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select pg_wal_lsn_diff(pg_current_wal_lsn(),replay_lsn) pg_lag_bytes
from pg_stat_replication
order by pg_lag_bytes desc limit 1"
Expand Down Expand Up @@ -53,7 +53,7 @@

- name: '[Pre-Check] Make sure there are no long-running transactions (more than {{ max_transaction_sec }} seconds)'
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select pid, usename, client_addr, clock_timestamp() - xact_start as xact_age,
state, wait_event_type ||':'|| wait_event as wait_events,
left(regexp_replace(query, E'[ \\t\\n\\r]+', ' ', 'g'),100) as query
Expand Down
10 changes: 10 additions & 0 deletions roles/update/tasks/start_traffic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,14 @@
delay: 2
environment:
no_proxy: "{{ inventory_hostname }}"

# Warming up caches after reboot (is 'reboot_host_post_delay' is defined)
- name: "Wait {{ reboot_host_post_delay }} minutes for caches to warm up after reboot"
ansible.builtin.pause:
minutes: "{{ reboot_host_post_delay }}"
when:
- (reboot_result.rebooted is defined and reboot_result.rebooted)
- (reboot_host_post_delay is defined and reboot_host_post_delay | int > 0)
- (inventory_hostname in groups['secondary'] and groups['secondary'] | length > 1)

...
4 changes: 2 additions & 2 deletions roles/update/tasks/stop_services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
become: true
become_user: postgres
ansible.builtin.command: >
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "CHECKPOINT"
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "CHECKPOINT"
- name: "Stop Patroni service on the Cluster Replica ({{ ansible_hostname }})"
become: true
Expand All @@ -32,7 +32,7 @@
become: true
become_user: postgres
ansible.builtin.command: >
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "CHECKPOINT"
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc "CHECKPOINT"
- name: "Stop Patroni service on the old Cluster Leader ({{ ansible_hostname }})"
become: true
Expand Down
2 changes: 1 addition & 1 deletion roles/update/tasks/stop_traffic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
become: true
become_user: postgres
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select count(*)
from pg_stat_activity
where pid <> pg_backend_pid()
Expand Down
6 changes: 5 additions & 1 deletion roles/update/tasks/system.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
- ansible_os_family == "RedHat"
- ansible_distribution_major_version >= '8'

# Reboot (if 'reboot_host_after_update' is 'true')
- name: Check if a reboot is required
ansible.builtin.stat:
path: /var/run/reboot-required
Expand All @@ -76,6 +77,7 @@
when:
- ansible_os_family == "Debian"
- ansible_virtualization_type not in ['container', 'docker', 'lxc', 'podman'] # exclude for containers to prevent test failures in CI.
- reboot_host_after_update | bool

- name: Check if a reboot is required
ansible.builtin.command: needs-restarting -r
Expand All @@ -85,12 +87,14 @@
when:
- ansible_os_family == "RedHat"
- ansible_virtualization_type not in ['container', 'docker', 'lxc', 'podman'] # exclude for containers to prevent test failures in CI.
- reboot_host_after_update | bool

- name: Rebooting host
ansible.builtin.reboot:
msg: "Reboot initiated by Ansible due to required system updates"
reboot_timeout: 1800 # 30 minutes
reboot_timeout: "{{ reboot_host_timeout | int }}"
test_command: uptime
register: reboot_result
when: (reboot_required_debian.stat.exists is defined and reboot_required_debian.stat.exists) or
(reboot_required_rhel.rc is defined and reboot_required_rhel.rc != 0)

Expand Down
10 changes: 5 additions & 5 deletions roles/update/tasks/update_extensions.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
- name: Get a list of old PostgreSQL extensions
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
"select extname from pg_extension e
join pg_available_extensions ae on extname = ae.name
where installed_version <> default_version"
Expand All @@ -21,7 +21,7 @@
# excluding: 'pg_repack' (is exists), as it requires re-creation to update
- name: Update old PostgreSQL extensions
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
"ALTER EXTENSION {{ item }} UPDATE"
ignore_errors: true
loop: "{{ pg_old_extensions.stdout_lines | reject('match', '^pg_repack$') | list }}"
Expand All @@ -36,7 +36,7 @@
# excluding: 'pg_stat_statements', because extension pg_stat_kcache depends on it (will be re-created)
- name: Update old PostgreSQL extensions
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
"ALTER EXTENSION {{ item }} UPDATE"
ignore_errors: true
loop: "{{ pg_old_extensions.stdout_lines | reject('match', '^(pg_repack|pg_stat_statements|pg_stat_kcache)$') | list }}"
Expand All @@ -45,7 +45,7 @@
# re-create 'pg_stat_statements' and 'pg_stat_kcache' if an update is required
- name: Recreate old pg_stat_statements and pg_stat_kcache extensions to update
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
"DROP EXTENSION pg_stat_statements CASCADE;
CREATE EXTENSION pg_stat_statements;
CREATE EXTENSION pg_stat_kcache"
Expand All @@ -58,7 +58,7 @@
# re-create the 'pg_repack' if it exists and an update is required
- name: Recreate old pg_repack extension to update
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d {{ pg_target_dbname }} -tAXc
"DROP EXTENSION pg_repack;
CREATE EXTENSION pg_repack;"
when:
Expand Down
14 changes: 14 additions & 0 deletions roles/update/vars/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---

target: postgres # Defines the target for the update. Available values: 'postgres', 'patroni', 'system'

update_extensions: true # Attempt will be made to automatically update all PostgreSQL extensions in all databases.

# if target=system
reboot_host_after_update: true # Restart the server if it is required after the update.
reboot_host_timeout: 1800 # Maximum seconds to wait for machine to reboot and respond to a test command.
reboot_host_post_delay: 5 # The waiting time (in minutes) for the caches to warm up after restarting the server before updating the next server.

# pre-checks vars
max_replication_lag_bytes: 10485760 # (10 MiB) Determines the size of the replication lag above which the update will not be performed.
max_transaction_sec: 15 # (seconds) Determines the maximum transaction time, in the presence of which the update will not be performed.
2 changes: 1 addition & 1 deletion roles/upgrade/tasks/extensions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

- name: Get a list of databases
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select datname from pg_catalog.pg_database where datname <> 'template0'"
register: databases_list
changed_when: false
Expand Down
4 changes: 2 additions & 2 deletions roles/upgrade/tasks/initdb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

- name: Get the current install user
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select rolname from pg_roles where oid = 10"
changed_when: false
register: pg_install_user
Expand All @@ -49,7 +49,7 @@

- name: Get the current encodig and data_checksums settings
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ postgresql_bin_dir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"show {{ item }}"
changed_when: false
register: pg_settings
Expand Down
8 changes: 4 additions & 4 deletions roles/upgrade/tasks/post_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

- name: Make sure that physical replication is active
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select count(*) from pg_stat_replication
where application_name != 'pg_basebackup'"
register: pg_replication_state
Expand All @@ -28,15 +28,15 @@

- name: Create a table "test_replication" with 10000 rows on the Primary
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"drop table IF EXISTS test_replication;
create table test_replication as select generate_series(1, 10000)"
when:
- inventory_hostname in groups['primary']

- name: Wait until the PostgreSQL replica is synchronized
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select count(*) from test_replication"
register: count_test
until: count_test.stdout | int == 10000
Expand All @@ -49,7 +49,7 @@

- name: Drop a table "test_replication"
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"drop table IF EXISTS test_replication"
when:
- inventory_hostname in groups['primary']
Expand Down
4 changes: 2 additions & 2 deletions roles/upgrade/tasks/post_upgrade.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

- name: Ensure the current data directory is the new data directory
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"show data_directory"
changed_when: false
register: pg_current_datadir
Expand Down Expand Up @@ -165,7 +165,7 @@
- name: Check the current PostgreSQL version
run_once: true
ansible.builtin.command: >-
psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
{{ pg_new_bindir }}/psql -p {{ postgresql_port }} -U {{ patroni_superuser_username }} -d postgres -tAXc
"select current_setting('server_version')"
register: postgres_version
changed_when: false
Expand Down
Loading

0 comments on commit 97e60ab

Please sign in to comment.