Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert ds identify optimization #4797

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 103 additions & 144 deletions tests/unittests/test_ds_identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from textwrap import dedent
from uuid import uuid4

import pytest

from cloudinit import atomic_helper, safeyaml, subp, util
from cloudinit.sources import DataSourceIBMCloud as ds_ibm
from cloudinit.sources import DataSourceOracle as ds_oracle
Expand Down Expand Up @@ -59,145 +61,6 @@
]


DEFAULT_CLOUD_CONFIG = """\
# The top level settings are used as module
# and base configuration.
# A set of users which may be applied and/or used by various modules
# when a 'default' entry is found it will reference the 'default_user'
# from the distro configuration specified below
users:
- default

# If this is set, 'root' will not be able to ssh in and they
# will get a message to login instead as the default $user
disable_root: true

# This will cause the set+update hostname module to not operate (if true)
preserve_hostname: false

# If you use datasource_list array, keep array items in a single line.
# If you use multi line array, ds-identify script won't read array items.
# Example datasource config
# datasource:
# Ec2:
# metadata_urls: [ 'blah.com' ]
# timeout: 5 # (defaults to 50 seconds)
# max_wait: 10 # (defaults to 120 seconds)

# The modules that run in the 'init' stage
cloud_init_modules:
- seed_random
- bootcmd
- write-files
- growpart
- resizefs
- disk_setup
- mounts
- set_hostname
- update_hostname
- update_etc_hosts
- ca-certs
- rsyslog
- users-groups
- ssh

# The modules that run in the 'config' stage
cloud_config_modules:
- wireguard
- snap
- ubuntu_autoinstall
- ssh-import-id
- keyboard
- locale
- set-passwords
- grub-dpkg
- apt-pipelining
- apt-configure
- ubuntu-advantage
- ntp
- timezone
- disable-ec2-metadata
- runcmd
- byobu

# The modules that run in the 'final' stage
cloud_final_modules:
- package-update-upgrade-install
- fan
- landscape
- lxd
- ubuntu-drivers
- write-files-deferred
- puppet
- chef
- ansible
- mcollective
- salt-minion
- reset_rmc
- refresh_rmc_and_interface
- rightscale_userdata
- scripts-vendor
- scripts-per-once
- scripts-per-boot
- scripts-per-instance
- scripts-user
- ssh-authkey-fingerprints
- keys-to-console
- install-hotplug
- phone-home
- final-message
- power-state-change

# System and/or distro specific settings
# (not accessible to handlers/transforms)
system_info:
# This will affect which distro class gets used
distro: ubuntu
# Default user name + that default users groups (if added/used)
default_user:
name: ubuntu
lock_passwd: True
gecos: Ubuntu
groups: [adm, audio, cdrom, floppy, lxd, netdev, plugdev, sudo, video]
sudo: ["ALL=(ALL) NOPASSWD:ALL"]
shell: /bin/bash
network:
renderers: ['netplan', 'eni', 'sysconfig']
activators: ['netplan', 'eni', 'network-manager', 'networkd']
# Automatically discover the best ntp_client
ntp_client: auto
# Other config here will be given to the distro class and/or path classes
paths:
cloud_dir: /var/lib/cloud/
templates_dir: /etc/cloud/templates/
package_mirrors:
- arches: [i386, amd64]
failsafe:
primary: http://archive.ubuntu.com/ubuntu
security: http://security.ubuntu.com/ubuntu
search:
primary:
- http://%(ec2_region)s.ec2.archive.ubuntu.com/ubuntu/
- http://%(availability_zone)s.clouds.archive.ubuntu.com/ubuntu/
- http://%(region)s.clouds.archive.ubuntu.com/ubuntu/
security: []
- arches: [arm64, armel, armhf]
failsafe:
primary: http://ports.ubuntu.com/ubuntu-ports
security: http://ports.ubuntu.com/ubuntu-ports
search:
primary:
- http://%(ec2_region)s.ec2.ports.ubuntu.com/ubuntu-ports/
- http://%(availability_zone)s.clouds.ports.ubuntu.com/ubuntu-ports/
- http://%(region)s.clouds.ports.ubuntu.com/ubuntu-ports/
security: []
- arches: [default]
failsafe:
primary: http://ports.ubuntu.com/ubuntu-ports
security: http://ports.ubuntu.com/ubuntu-ports
ssh_svcname: ssh
"""

POLICY_FOUND_ONLY = "search,found=all,maybe=none,notfound=disabled"
POLICY_FOUND_OR_MAYBE = "search,found=all,maybe=all,notfound=disabled"
DI_DEFAULT_POLICY = "search,found=all,maybe=all,notfound=disabled"
Expand Down Expand Up @@ -283,10 +146,6 @@ def call(
if files is None:
files = {}

cloudcfg = "etc/cloud/cloud.cfg"
if cloudcfg not in files:
files[cloudcfg] = DEFAULT_CLOUD_CONFIG

if rootd is None:
rootd = self.tmp_dir()

Expand Down Expand Up @@ -460,6 +319,65 @@ def test_wb_print_variables(self):
for var in expected_vars:
self.assertIn("{0}=".format(var), err)

@pytest.mark.xfail(reason="GH-4796")
def test_maas_not_detected_1(self):
"""Don't incorrectly identify maas

In ds-identify the function check_config() attempts to parse yaml keys
in bash, but it sometimes introduces false positives. The maas
datasource uses check_config() and the existence of a "MAAS" key to
identify itself (which is a very poor identifier - clouds should have
stricter identifiers). Since the MAAS datasource is at the begining of
the list, this is particularly troublesome and more concerning than
NoCloud false positives, for example.
config = "LXD-kvm-not-MAAS-1"
self._test_ds_found(config)
"""

def test_maas_not_detected_2(self):
"""Don't incorrectly identify maas
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think it's worth putting an issue/commit reference in all of these tests (not just the xfail)? They just feel a bit random without the underlying context and if approaching otherwise I would think "why are we only testing maas here and not every other single datasource?"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think it's worth putting an issue/commit reference in all of these tests

I think you missed the rest of the docstring?

    The bug reported in 4794 combined with the previously existing bug
    reported in 4796 made for very loose MAAS false-positives.

Or maybe I misunderstand what you are asking for.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added more context to the tests.


The bug reported in 4794 combined with the previously existing bug
reported in 4796 made for very loose MAAS false-positives.

In ds-identify the function check_config() attempts to parse yaml keys
in bash, but it sometimes introduces false positives. The maas
datasource uses check_config() and the existence of a "MAAS" key to
identify itself (which is a very poor identifier - clouds should have
stricter identifiers). Since the MAAS datasource is at the begining of
the list, this is particularly troublesome and more concerning than
NoCloud false positives, for example.
"""
config = "LXD-kvm-not-MAAS-2"
self._test_ds_found(config)

def test_maas_not_detected_3(self):
"""Don't incorrectly identify maas

The bug reported in 4794 combined with the previously existing bug
reported in 4796 made for very loose MAAS false-positives.

In ds-identify the function check_config() attempts to parse yaml keys
in bash, but it sometimes introduces false positives. The maas
datasource uses check_config() and the existence of a "MAAS" key to
identify itself (which is a very poor identifier - clouds should have
stricter identifiers). Since the MAAS datasource is at the begining of
the list, this is particularly troublesome and more concerning than
NoCloud false positives, for example.
"""
config = "LXD-kvm-not-MAAS-3"
self._test_ds_found(config)

def test_azure_invalid_configuration(self):
"""Don't detect incorrect config when invalid datasource_list provided

If unparsable list is provided we just ignore it. Some users
might assume that since the rest of the configuration is yaml that
multi-line yaml lists are valid (they aren't). When this happens, just
run ds-identify and figure it out for ourselves which platform to run.
"""
self._test_ds_found("Azure-parse-invalid")

def test_azure_dmi_detection_from_chassis_asset_tag(self):
"""Azure datasource is detected from DMI chassis-asset-tag"""
self._test_ds_found("Azure-dmi-detection")
Expand Down Expand Up @@ -1256,6 +1174,15 @@ def _print_run_output(rc, out, err, cfg, files):
os.path.join(P_SEED_DIR, "azure", "ovf-env.xml"): "present\n",
},
},
"Azure-parse-invalid": {
"ds": "Azure",
"files": {
P_CHASSIS_ASSET_TAG: "7783-7084-3265-9085-8269-3286-77\n",
"etc/cloud/cloud.cfg.d/91-azure_datasource.cfg": (
"datasource_list:\n - Azure"
),
},
},
"Ec2-hvm": {
"ds": "Ec2",
"mocks": [{"name": "detect_virt", "RET": "kvm", "ret": 0}],
Expand Down Expand Up @@ -1296,6 +1223,38 @@ def _print_run_output(rc, out, err, cfg, files):
"mocks": [{"name": "is_socket_file", "ret": 1}, MOCK_VIRT_IS_KVM],
"no_mocks": ["dscheck_LXD"], # Don't default mock dscheck_LXD
},
"LXD-kvm-not-MAAS-1": {
"ds": "LXD",
"files": {
P_BOARD_NAME: "LXD\n",
"etc/cloud/cloud.cfg.d/92-broken-maas.cfg": (
"datasource:\n MAAS:\n metadata_urls: [ 'blah.com' ]"
),
},
# /dev/lxd/sock does not exist and KVM virt-type
"mocks": [{"name": "is_socket_file", "ret": 1}, MOCK_VIRT_IS_KVM],
"no_mocks": ["dscheck_LXD"], # Don't default mock dscheck_LXD
},
"LXD-kvm-not-MAAS-2": {
"ds": "LXD",
"files": {
P_BOARD_NAME: "LXD\n",
"etc/cloud/cloud.cfg.d/92-broken-maas.cfg": ("#MAAS: None"),
},
# /dev/lxd/sock does not exist and KVM virt-type
"mocks": [{"name": "is_socket_file", "ret": 1}, MOCK_VIRT_IS_KVM],
"no_mocks": ["dscheck_LXD"], # Don't default mock dscheck_LXD
},
"LXD-kvm-not-MAAS-3": {
"ds": "LXD",
"files": {
P_BOARD_NAME: "LXD\n",
"etc/cloud/cloud.cfg.d/92-broken-maas.cfg": ("MAAS: None"),
},
# /dev/lxd/sock does not exist and KVM virt-type
"mocks": [{"name": "is_socket_file", "ret": 1}, MOCK_VIRT_IS_KVM],
"no_mocks": ["dscheck_LXD"], # Don't default mock dscheck_LXD
},
"LXD-kvm-qemu-kernel-gt-5.10": { # LXD host > 5.10 kvm launch virt==qemu
"ds": "LXD",
"files": {P_BOARD_NAME: "LXD\n"},
Expand Down Expand Up @@ -1338,7 +1297,7 @@ def _print_run_output(rc, out, err, cfg, files):
# Also include a datasource list of more than just
# [NoCloud, None], because that would automatically select
# NoCloud without checking
"etc/cloud/cloud.cfg": dedent(
"/etc/cloud/cloud.cfg": dedent(
"""\
datasource_list: [ Azure, Openstack, NoCloud, None ]
datasource:
Expand Down
31 changes: 14 additions & 17 deletions tools/ds-identify
Original file line number Diff line number Diff line change
Expand Up @@ -799,24 +799,21 @@ check_config() {
if [ "$1" = "$files" -a ! -f "$1" ]; then
return 1
fi
local line="" ret="" found=0 found_fn="" oifs="$IFS" out=""
out=$(grep "$key\"\?:" "$@" 2>/dev/null)
IFS=${CR}
for line in $out; do
# drop '# comment'
line=${line%%#*}
# if more than one file was 'grep'ed, then grep will output filename:
# but if only one file, line will not be prefixed.
if [ $# -eq 1 ]; then
found_fn="$1"
else
found_fn="${line%%:*}"
line=${line#*:}
fi
ret=${line#*: };
found=$((found+1))
local fname="" line="" ret="" found=0 found_fn=""
# shellcheck disable=2094
for fname in "$@"; do
[ -f "$fname" ] || continue
while read line; do
line=${line%%#*}
case "$line" in
$key:\ *|"${key}":)
ret=${line#*:};
ret=${ret# };
found=$((found+1))
found_fn="$fname";;
esac
done <"$fname"
done
IFS="$oifs"
if [ $found -ne 0 ]; then
_RET="$ret"
_RET_fname="$found_fn"
Expand Down
Loading