Skip to content

Commit

Permalink
Inventory refresh for redeploy - fix (#60)
Browse files Browse the repository at this point in the history
* Inventory refresh for redeploy - fix

+ Some client playbooks will require that the dynamic inventory is accurate during redeploy.  Perform a re-acquisition of the inventory when hosts are removed/ added during redeploy.  Also refresh (clean) the inventory prior to adding hosts the inventory, because this called multiple times now.
+ Tidy up - allow cluster_vars.custom_tagslabels to be absent
+ Tidy up - replace deprecated _facts modules with _info
+ Tidy up - remove duplicated debug logs

* + Only run predelete role on retired hosts that are running.
+ Re-acquire the dynamic inventory after removing a host too.
+ Make the redeploy host list deterministic by sorting it pre-slice (bulletproofing for canary=start/finish).

* Fixes for _scheme_rmvm_rmdisk_only

* Remove accidentally added file

* Fix array-of-dict sort to work with Python3

* fix _scheme_addnewvm_rmdisk_rollback with canary=start
  • Loading branch information
dseeley-sky authored May 18, 2020
1 parent 908ec37 commit 59a0a48
Show file tree
Hide file tree
Showing 16 changed files with 67 additions and 30 deletions.
4 changes: 2 additions & 2 deletions EXAMPLE/group_vars/_skel/cluster_vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within
### AWS example
#cluster_vars:
# type: &cloud_type "aws"
# image: "ami-07042e91d04b1c30d" #eu-west-1, 18.04, amd64, hvm:ebs-ssd, 20200131. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/
# image: "ami-0964eb2dc8b836eb6" # eu-west-1, 18.04, amd64, hvm-ssd, 20200430. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/
# region: &region "eu-west-1"
# dns_zone_internal: "{{_region}}.compute.internal" # eu-west-1, us-west-2
# dns_zone_external: "{%- if dns_tld_external -%}{{_cloud_type}}-{{_region}}.{{app_class}}.{{buildenv}}.{{dns_tld_external}} {%- endif -%}"
Expand Down Expand Up @@ -86,7 +86,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within
### GCP example
#cluster_vars:
# type: &cloud_type "gcp"
# image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20191113"
# image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20200430"
# region: &region "europe-west1"
# dns_zone_internal: "c.{{gcp_credentials_json.project_id}}.internal"
# dns_zone_external: "{%- if dns_tld_external -%}{{_cloud_type}}-{{_region}}.{{app_class}}.{{buildenv}}.{{dns_tld_external}} {%- endif -%}"
Expand Down
3 changes: 2 additions & 1 deletion EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within

cluster_vars:
type: &cloud_type "aws"
image: "ami-07042e91d04b1c30d" #eu-west-1, 18.04, amd64, hvm:ebs-ssd, 20200131. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/
image: "ami-0964eb2dc8b836eb6" # eu-west-1, 18.04, amd64, hvm-ssd, 20200430. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/
region: &region "eu-west-1"
dns_zone_internal: "{{_region}}.compute.internal" # eu-west-1, us-west-2
dns_zone_external: "{%- if dns_tld_external -%}{{_cloud_type}}-{{_region}}.{{app_class}}.{{buildenv}}.{{dns_tld_external}} {%- endif -%}"
Expand All @@ -50,6 +50,7 @@ cluster_vars:
inv_service_id: "{{app_class}}"
inv_cluster_id: "{{cluster_name}}"
inv_cluster_type: "{{app_name}}"
inv_cost_centre: "1234"
secgroups_existing: []
secgroup_new:
- proto: "tcp"
Expand Down
3 changes: 2 additions & 1 deletion EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within

cluster_vars:
type: &cloud_type "gcp"
image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20191113"
image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20200430"
region: &region "europe-west1"
dns_zone_internal: "c.{{gcp_credentials_json.project_id}}.internal"
dns_zone_external: "{%- if dns_tld_external -%}{{_cloud_type}}-{{_region}}.{{app_class}}.{{buildenv}}.{{dns_tld_external}} {%- endif -%}"
Expand All @@ -55,6 +55,7 @@ cluster_vars:
inv_service_id: "{{app_class}}"
inv_cluster_id: "{{cluster_name}}"
inv_cluster_type: "{{app_name}}"
inv_cost_centre: "1234"
network_fw_tags: ["{{cluster_name}}-nwtag"]
firewall_rules:
- name: "{{cluster_name}}-extssh"
Expand Down
1 change: 1 addition & 0 deletions _dependencies/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
- assert: { that: "cluster_suffix is regex('^[a-z\\d\\-_]{0,63}$')", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is in the set[a-z\\d\\-_], and <63 characters long." }
when: cluster_suffix is defined
- assert: { that: "'{%- for label in cluster_vars.custom_tagslabels -%}{% if not cluster_vars.custom_tagslabels[label] is regex('^[a-z\\d\\-_]{0,63}$') %}{{label}}: {{cluster_vars.custom_tagslabels[label]}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure all cluster_vars.custom_tagslabels are in the set [a-z\\d\\-_], and <63 characters long." }
when: "'custom_tagslabels' in cluster_vars"
- assert: { that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars -%}{% if ('version' in cluster_vars[buildenv].hosttype_vars[hosttype]) and (not cluster_vars[buildenv].hosttype_vars[hosttype].version is regex('^[a-z\\d\\-_]{0,63}$')) %}{{cluster_vars[buildenv].hosttype_vars[hosttype].version}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure cluster_vars[{{buildenv}}].hosttype_vars[hosttype].version is in the set [a-z\\d\\-_], and <63 characters long." }

- assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" }
Expand Down
20 changes: 11 additions & 9 deletions cluster_hosts/tasks/get_cluster_hosts_target.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,33 @@
{%- endfor %}
{{ res }}
- name: get_cluster_hosts_target | cluster_hosts_target
debug: msg="{{cluster_hosts_target}}"

- name: get_cluster_hosts_target/aws | AWS-specific modifications to cluster_hosts_target - add subnets.
block:
# Dynamically look up VPC ID by name from aws
- name: get_cluster_hosts_target | Looking up VPC facts to extract ID
ec2_vpc_net_facts:
ec2_vpc_net_info:
region: "{{ cluster_vars.region }}"
aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}"
aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}"
filters:
"tag:Name": "{{ cluster_vars[buildenv].vpc_name }}"
register: r__ec2_vpc_net_facts
register: r__ec2_vpc_net_info
delegate_to: localhost
run_once: true

- name: get_cluster_hosts_target/aws | Set VPC ID in variable
set_fact:
vpc_id: "{{ r__ec2_vpc_net_facts.vpcs[0].id }}"
vpc_id: "{{ r__ec2_vpc_net_info.vpcs[0].id }}"

- name: get_cluster_hosts_target/aws | Look up proxy subnet facts
ec2_vpc_subnet_facts:
ec2_vpc_subnet_info:
region: "{{ cluster_vars.region }}"
aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}"
aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}"
filters:
"tag:Name": "{{ cluster_vars[buildenv].vpc_subnet_name_prefix }}{{item}}"
vpc-id: "{{ vpc_id }}"
register: r__ec2_vpc_subnet_facts
register: r__ec2_vpc_subnet_info
with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}"
delegate_to: localhost
run_once: true
Expand All @@ -63,7 +60,7 @@
{%- endfor %}
{{ res }}
vars:
subnet_ids_per_az: "{{r__ec2_vpc_subnet_facts.results | json_query('[*].{az_name: item, subnet_id: subnets[0].id, subnet_name: subnets[0].tags.Name, vpc_id: subnets[0].vpc_id}') | dict_agg('az_name')}}"
subnet_ids_per_az: "{{r__ec2_vpc_subnet_info.results | json_query('[*].{az_name: item, subnet_id: subnets[0].id, subnet_name: subnets[0].tags.Name, vpc_id: subnets[0].vpc_id}') | dict_agg('az_name')}}"

- block:
- name: get_cluster_hosts_target/aws | Get snapshots info
Expand All @@ -84,7 +81,10 @@
fail_msg: "There are {{ _available_snapshots|length }} available snapshots and {{ cluster_hosts_target|length }} nodes. Snapshot restore available only to the same infrastructure size."
vars:
_available_snapshots: "{{ r__ebs_snapshots.snapshots|json_query('[].snapshot_id') }}"
delegate_to: localhost
run_once: true

## [ See github.com/ansible/ansible/issues/27299 for reason for '| to_json | from_json' ]
- name: get_cluster_hosts_target/aws | update cluster_hosts_target with snapshot_id
set_fact:
cluster_hosts_target: |
Expand Down Expand Up @@ -119,3 +119,5 @@

- name: get_cluster_hosts_target | cluster_hosts_target
debug: msg={{cluster_hosts_target}}
delegate_to: localhost
run_once: true
2 changes: 1 addition & 1 deletion create/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

- debug: msg="release_version = {{release_version}}"
vars:
current_release_versions: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current'].tagslabels.release\") | default([]) }}"
current_release_versions: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current' && tagslabels.release].tagslabels.release\") | default([]) }}"


- name: Create AWS cluster
Expand Down
2 changes: 1 addition & 1 deletion dynamic_inventory/tasks/aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
register: r__ec2_instance_info
delegate_to: localhost

- debug: msg={{r__ec2_instance_info}}
#- debug: msg={{r__ec2_instance_info}}

- name: dynamic_inventory/aws | Set dynamic_inventory_flat
set_fact:
Expand Down
2 changes: 1 addition & 1 deletion dynamic_inventory/tasks/gcp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
delegate_to: localhost
run_once: true

- debug: msg={{r__gcp_compute_instance_info}}
#- debug: msg={{r__gcp_compute_instance_info}}

- name: dynamic_inventory/gcp | Set dynamic_inventory_flat
set_fact:
Expand Down
5 changes: 5 additions & 0 deletions dynamic_inventory/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
with_items: "{{ dynamic_inventory_flat }}"
retries: 12

- name: dynamic_inventory | Refresh (clean it, because there is no file or plugin inventory defined) the in-memory inventory prior to building it (this is in case this module is called multiple times, and we otherwise only add hosts to existing inventory)
meta: refresh_inventory

- name: dynamic_inventory | Add hosts to dynamic inventory
add_host:
name: "{{ item.hostname }}"
Expand Down Expand Up @@ -52,3 +55,5 @@
vars:
new_inventory_file: "{{ inventory_file if (((stat_inventory_file.stat is defined and stat_inventory_file.stat.exists) or (stat_inventory_file.skipped is defined and stat_inventory_file.skipped)) and inventory_dir is defined and inventory_dir==playbook_dir) else playbook_dir + '/inventory_' + cluster_name }}"

- name: dynamic_inventory | current inventory_hostnames
debug: msg="{{ lookup('inventory_hostnames','all').split(',') }}"
11 changes: 8 additions & 3 deletions redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,14 @@
when: testfail is defined and testfail == "fail_1"
when: canary=="start" or canary=="none"

- name: re-acquire cluster_hosts_target and cluster_hosts_state
import_role:
name: clusterverse/cluster_hosts
- block:
- name: re-acquire cluster_hosts_target and cluster_hosts_state
import_role:
name: clusterverse/cluster_hosts

- name: re-acquire the dynamic inventory
include_role:
name: clusterverse/dynamic_inventory
when: canary=="none"

- name: canary==finish or canary==none
Expand Down
7 changes: 4 additions & 3 deletions redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,24 @@
cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}"
myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}"

- name: Retire any other retiring VM(s) that might exist if we're redeploying to a smaller topology.
- name: Remove any other retiring VM(s) that might exist if we're redeploying to a smaller topology.
block:
- name: run predeleterole role on any other retiring VM(s) that might exist if we're redeploying to a smaller topology.
include_role:
name: "{{predeleterole}}"
when: predeleterole is defined and predeleterole != ""
vars:
hosts_to_remove: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}"
hosts_to_remove: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && (contains('RUNNING,running', instance_state))]\") }}"

- name: Power off any other retiring VM(s) that might exist if we're redeploying to a smaller topology.
include_role:
name: clusterverse/redeploy/__common
tasks_from: poweroff_vms.yml
vars:
hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}"
when: (canary=="finish" or canary=="none")

- name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy)
- name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy - can't be in the tidy block because the block depends on this info being correct)
import_role:
name: clusterverse/cluster_hosts
when: (canary_tidy_on_success is defined and canary_tidy_on_success|bool)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
---

- name: set hosts_to_redeploy if canary==start
set_fact: hosts_to_redeploy={{cluster_hosts_target_by_hosttype[hosttype][:1]}}
set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[:1]}}
when: (canary is defined and canary=="start")

- name: set hosts_to_redeploy if canary==finish
set_fact: hosts_to_redeploy={{cluster_hosts_target_by_hosttype[hosttype][1:]}}
set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[1:]}}
when: (canary is defined and canary=="finish")

- name: set hosts_to_redeploy if canary==none
set_fact: hosts_to_redeploy={{cluster_hosts_target_by_hosttype[hosttype]}}
set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))}}
when: (canary is defined and canary=="none")

- debug: msg="Canary redeploy ({{canary}}) selected; deleting and redeploying [{{hosts_to_redeploy | json_query('[].hostname') | join(', ')}}]"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
- fail:
when: testfail is defined and testfail == "fail_1"

- name: re-acquire the dynamic inventory
include_role:
name: clusterverse/dynamic_inventory

- name: re-acquire cluster_hosts_target and cluster_hosts_state
import_role:
name: clusterverse/cluster_hosts
Expand All @@ -30,6 +34,14 @@
vars:
hosts_to_stop: "{{ hosts_to_remove }}"

- name: re-acquire the dynamic inventory
include_role:
name: clusterverse/dynamic_inventory

- name: re-acquire cluster_hosts_target and cluster_hosts_state
import_role:
name: clusterverse/cluster_hosts

- fail:
when: testfail is defined and testfail == "fail_2"
vars:
Expand Down
6 changes: 3 additions & 3 deletions redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
---

- name: set hosts_to_del if canary==start
set_fact: hosts_to_del={{cluster_hosts_dict[hosttype][:1]}}
set_fact: hosts_to_del={{(cluster_hosts_dict[hosttype] | sort(attribute='hostname'))[:1]}}
when: (canary is defined and canary=="start")

- name: set hosts_to_del if canary==finish
set_fact: hosts_to_del={{cluster_hosts_dict[hosttype][1:]}}
set_fact: hosts_to_del={{(cluster_hosts_dict[hosttype] | sort(attribute='hostname'))[1:]}}
when: (canary is defined and canary=="finish")

- name: set hosts_to_del if canary==none
set_fact: hosts_to_del={{cluster_hosts_dict[hosttype]}}
set_fact: hosts_to_del={{(cluster_hosts_dict[hosttype] | sort(attribute='hostname'))}}
when: (canary is defined and canary=="none")

- debug: msg="Canary redeploy ({{canary}}) selected; deleting and redeploying [{{hosts_to_del | json_query('[].hostname') | join(', ')}}]"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
include_role:
name: "{{predeleterole}}"
vars:
hosts_to_remove: ["{{ host_to_del }}"]
hosts_to_remove: "{{ cluster_hosts_state | json_query(\"[?name==`\" + host_to_del.hostname + \"`]\") }}"
when: predeleterole is defined and predeleterole != ""

- import_role:
Expand All @@ -23,3 +23,7 @@
- debug: msg="{{[r__mainclusteryml.stdout_lines] + [r__mainclusteryml.stderr_lines]}}"
failed_when: r__mainclusteryml is failed
when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool)

- name: re-acquire the dynamic inventory
include_role:
name: clusterverse/dynamic_inventory
7 changes: 6 additions & 1 deletion redeploy/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,9 @@
- name: "Run the {{redeploy_scheme}} redploy scheme"
include_role:
name: "{{role_path}}/{{redeploy_scheme}}"
when: redeploy_scheme is defined
when: redeploy_scheme is defined


- name: Get the final dynamic inventory (to write out current)
include_role:
name: clusterverse/dynamic_inventory

0 comments on commit 59a0a48

Please sign in to comment.