Skip to content

Commit

Permalink
chore: Make exception handling more explicit
Browse files Browse the repository at this point in the history
Cloud-init's codebase makes extensive use of exception handlers which
catch Exception and then do something based on the assumed exception
type. This is bad practice and makes it possible for exceptions to be
unexpectedly ignored. To remedy this, this commit updates cloud-init to
make `Exception` always be an unknown exception which therefore should
always produce a log of level `WARN` or higher.

Require a minimum of one of the following in each Exception handler:

- LOG.warning("Unhandled exception: %s", e)
- util.logexc() - with a log level of WARN or higher
- re-raise to be caught by a different handler

Make cloud-init's code adhere to the above set of rules with the
following changes:

- Limit the scope where Exception is handled to only unexpected error paths.
- Add new handlers for more specific exception types.
- Add warning logs or increase log level to WARN in some cases.
- Add typing where exceptions are returned.
- Replace various Exception handlers with ProcessExecutionError.
- Remove handling that does nothing.
- Remove unused code which handles Exception.
  • Loading branch information
holmanb committed Apr 23, 2024
1 parent aa412a7 commit 7cee29f
Show file tree
Hide file tree
Showing 38 changed files with 269 additions and 113 deletions.
14 changes: 10 additions & 4 deletions cloudinit/analyze/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import datetime
import json
import logging
import os
import sys
import time
Expand Down Expand Up @@ -53,6 +54,7 @@
FAIL_CODE = "failure"
CONTAINER_CODE = "container"
TIMESTAMP_UNKNOWN = (FAIL_CODE, -1, -1, -1)
LOG = logging.getLogger(__name__)


def format_record(msg, event):
Expand Down Expand Up @@ -217,10 +219,14 @@ def gather_timestamps_using_dmesg():
# systemd wont start cloud-init in this case,
# so we cannot get that timestamp
return SUCCESS_CODE, kernel_start, kernel_end, kernel_end

except Exception:
pass
return TIMESTAMP_UNKNOWN
except subp.ProcessExecutionError:
return TIMESTAMP_UNKNOWN
except ValueError:
# float("??")
return TIMESTAMP_UNKNOWN
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
return TIMESTAMP_UNKNOWN


def gather_timestamps_using_systemd():
Expand Down
6 changes: 6 additions & 0 deletions cloudinit/cmd/devel/hotplug_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,12 @@ def handle_hotplug(hotplug_init: Init, devpath, subsystem, udevaction):
event_handler.success()
break
except Exception as e:
# The number of possible exceptions handled here is large and
# difficult to audit. If retries fail this exception is always
# re-raised. Therefore, this callsite is allowed to be an exception
# to the rule that handle Exception must log a warning or reraise
# or call util.logexc() - since it does technically do this on
# timeout.
LOG.debug("Exception while processing hotplug event. %s", e)
time.sleep(wait)
last_exception = e
Expand Down
8 changes: 6 additions & 2 deletions cloudinit/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,8 +745,12 @@ def status_wrapper(name, args):
else:
try:
status = json.loads(util.load_text_file(status_path))
except Exception:
pass
except OSError:
LOG.warning("File %s not found %s.", status_path, mode)
except json.JSONDecodeError as e:
LOG.warning("File %s not valid json %s: %s", status_path, mode, e)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)

nullstatus = {
"errors": [],
Expand Down
29 changes: 18 additions & 11 deletions cloudinit/config/cc_disk_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def enumerate_disk(device, nodeps=False):
info = None
try:
info, _err = subp.subp(lsblk_cmd)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed during disk check for %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -309,7 +309,11 @@ def is_device_valid(name, partition=False):
d_type = ""
try:
d_type = device_type(name)
except Exception:
except RuntimeError:
LOG.warning("Query against device %s failed", name)
return False
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
LOG.warning("Query against device %s failed", name)
return False

Expand All @@ -334,7 +338,7 @@ def check_fs(device):
blkid_cmd = [BLKID_CMD, "-c", "/dev/null", device]
try:
out, _err = subp.subp(blkid_cmd, rcs=[0, 2])
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed during disk check for %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -440,7 +444,7 @@ def get_hdd_size(device):
try:
size_in_bytes, _ = subp.subp([BLKDEV_CMD, "--getsize64", device])
sector_size, _ = subp.subp([BLKDEV_CMD, "--getss", device])
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError("Failed to get %s size\n%s" % (device, e)) from e

return int(size_in_bytes) / int(sector_size)
Expand All @@ -458,7 +462,7 @@ def check_partition_mbr_layout(device, layout):
prt_cmd = [SFDISK_CMD, "-l", device]
try:
out, _err = subp.subp(prt_cmd, data="%s\n" % layout)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -489,7 +493,7 @@ def check_partition_gpt_layout(device, layout):
prt_cmd = [SGDISK_CMD, "-p", device]
try:
out, _err = subp.subp(prt_cmd, update_env=LANG_C_ENV)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Error running partition command on %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -680,7 +684,7 @@ def purge_disk(device):
try:
LOG.info("Purging filesystem on /dev/%s", d["name"])
subp.subp(wipefs_cmd)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed FS purge of /dev/%s" % d["name"]
) from e
Expand Down Expand Up @@ -716,7 +720,7 @@ def read_parttbl(device):
util.udevadm_settle()
try:
subp.subp(probe_cmd)
except Exception as e:
except subp.ProcessExecutionError as e:
util.logexc(LOG, "Failed reading the partition table %s" % e)

util.udevadm_settle()
Expand All @@ -731,7 +735,7 @@ def exec_mkpart_mbr(device, layout):
prt_cmd = [SFDISK_CMD, "--force", device]
try:
subp.subp(prt_cmd, data="%s\n" % layout)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError(
"Failed to partition device %s\n%s" % (device, e)
) from e
Expand Down Expand Up @@ -759,7 +763,10 @@ def exec_mkpart_gpt(device, layout):
subp.subp(
[SGDISK_CMD, "-t", "{}:{}".format(index, pinput), device]
)
except Exception:
except subp.ProcessExecutionError:
LOG.warning("Failed to partition device %s", device)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
LOG.warning("Failed to partition device %s", device)
raise

Expand Down Expand Up @@ -1057,5 +1064,5 @@ def mkfs(fs_cfg):
LOG.debug(" Using cmd: %s", str(fs_cmd))
try:
subp.subp(fs_cmd, shell=shell)
except Exception as e:
except subp.ProcessExecutionError as e:
raise RuntimeError("Failed to exec of '%s':\n%s" % (fs_cmd, e)) from e
9 changes: 9 additions & 0 deletions cloudinit/config/cc_growpart.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,16 @@ def resize_devices(resizer, devices, distro: Distro):
"as it is not encrypted.",
)
)
except OSError as e:
info.append(
(
devent,
RESIZE.FAILED,
f"Resizing encrypted device ({blockdev}) failed: {e}",
)
)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
info.append(
(
devent,
Expand Down
2 changes: 1 addition & 1 deletion cloudinit/config/cc_grub_dpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:

try:
subp.subp(["debconf-set-selections"], data=dconf_sel)
except Exception as e:
except subp.ProcessExecutionError as e:
util.logexc(
LOG, "Failed to run debconf-set-selections for grub_dpkg: %s", e
)
Expand Down
22 changes: 17 additions & 5 deletions cloudinit/config/cc_mounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,13 @@ def handle_swapcfg(swapcfg):
LOG.debug("swap file %s already in use", fname)
return fname
LOG.debug("swap file %s exists, but not in /proc/swaps", fname)
except Exception:
except OSError:
LOG.warning(
"swap file %s exists. Error reading /proc/swaps", fname
)
return fname
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
LOG.warning(
"swap file %s exists. Error reading /proc/swaps", fname
)
Expand All @@ -415,7 +421,10 @@ def handle_swapcfg(swapcfg):
maxsize = util.human2bytes(maxsize)
return setup_swapfile(fname=fname, size=size, maxsize=maxsize)

except OSError as e:
LOG.warning("failed to setup swap: %s", e)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
LOG.warning("failed to setup swap: %s", e)

return None
Expand Down Expand Up @@ -457,10 +466,13 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:

try:
toks = WS.split(line)
except Exception:
pass
fstab_devs[toks[0]] = line
fstab_lines.append(line)
fstab_devs[toks[0]] = line
fstab_lines.append(line)
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
util.logexc(
LOG, "Failed to parse devs from file %s", FSTAB_PATH
)

device_aliases = cfg.get("device_aliases", {})

Expand Down
4 changes: 4 additions & 0 deletions cloudinit/config/cc_power_state_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,11 @@ def check_condition(cond):
else:
LOG.warning("%sunexpected exit %s. do not apply change.", pre, ret)
return False
except OSError as e:
LOG.warning("%sUnexpected error: %s", pre, e)
return False
except Exception as e:
LOG.warning("Unhandled exception: %s", e)
LOG.warning("%sUnexpected error: %s", pre, e)
return False

Expand Down
3 changes: 2 additions & 1 deletion cloudinit/config/cc_runcmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,5 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:
content = util.shellify(cmd)
util.write_file(out_fn, content, 0o700)
except Exception as e:
raise type(e)("Failed to shellify {} into file {}".format(cmd, out_fn))
util.logexc(LOG, "Failed to shellify runcmd: %s", e)
raise
20 changes: 14 additions & 6 deletions cloudinit/config/cc_ssh_import_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import logging
import pwd
from textwrap import dedent
from typing import List

from cloudinit import subp, util
from cloudinit.cloud import Cloud
Expand Down Expand Up @@ -80,33 +81,40 @@ def handle(name: str, cfg: Config, cloud: Cloud, args: list) -> None:

# import for cloudinit created users
(users, _groups) = ug_util.normalize_users_groups(cfg, cloud.distro)
elist = []
elist: List[Exception] = []
for (user, user_cfg) in users.items():
import_ids = []
if user_cfg["default"]:
import_ids = util.get_cfg_option_list(cfg, "ssh_import_id", [])
else:
try:
import_ids = user_cfg["ssh_import_id"]
except Exception:
import_ids = user_cfg.get("ssh_import_id")
if not import_ids:
LOG.debug("User %s is not configured for ssh_import_id", user)
continue

try:
import_ids = util.uniq_merge(import_ids)
import_ids = [str(i) for i in import_ids]
except Exception:
LOG.debug(
except (AttributeError, TypeError):
LOG.warning(
"User %s is not correctly configured for ssh_import_id", user
)
except Exception:
util.logexc(LOG, "Unhandled configuration for user %s", user)
continue

if not len(import_ids):
continue

try:
import_ssh_ids(import_ids, user)
except subp.ProcessExecutionError as exc:
util.logexc(
LOG, "ssh-import-id failed for: %s %s", user, import_ids
)
elist.append(exc)
except Exception as exc:
LOG.warning("Unhandled exception: %s", exc)
util.logexc(
LOG, "ssh-import-id failed for: %s %s", user, import_ids
)
Expand Down
28 changes: 23 additions & 5 deletions cloudinit/config/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
from inspect import signature
from types import ModuleType
from typing import Dict, List, NamedTuple, Optional
from typing import Dict, List, NamedTuple, Optional, Tuple

from cloudinit import config, importer, type_utils, util
from cloudinit.distros import ALL_DISTROS
Expand Down Expand Up @@ -246,7 +246,9 @@ def _fixup_modules(self, raw_mods) -> List[ModuleDetails]:
)
return mostly_mods

def _run_modules(self, mostly_mods: List[ModuleDetails]):
def _run_modules(
self, mostly_mods: List[ModuleDetails]
) -> Tuple[List[str], List[Tuple[str, Exception]]]:
cc = self.init.cloudify()
# Return which ones ran
# and which ones failed + the exception of why it failed
Expand Down Expand Up @@ -296,7 +298,16 @@ def _run_modules(self, mostly_mods: List[ModuleDetails]):
failures.append((name, e))
return (which_ran, failures)

def run_single(self, mod_name, args=None, freq=None):
def run_single(
self, mod_name: str, args=None, freq=None
) -> Tuple[List[str], List[Tuple[str, Exception]]]:
"""Run a single module
return: a tuple containing two lists:
- string names of modules which ran
- a list of tuples of modules which failed while running and
the exception that was raised
"""
# Form the users module 'specs'
mod_to_be = {
"mod": mod_name,
Expand All @@ -308,14 +319,21 @@ def run_single(self, mod_name, args=None, freq=None):
mostly_mods = self._fixup_modules(raw_mods)
return self._run_modules(mostly_mods)

def run_section(self, section_name):
def run_section(
self, section_name: str
) -> Tuple[List[str], List[Tuple[str, Exception]]]:
"""Runs all modules in the given section.
section_name - One of the modules lists as defined in
/etc/cloud/cloud.cfg. One of:
- cloud_init_modules
- cloud_config_modules
- cloud_final_modules
return: a tuple containing two lists:
- string names of modules which ran
- a list of tuples of modules which failed while running and
the exception that was raised
"""
raw_mods = self._read_modules(section_name)
mostly_mods = self._fixup_modules(raw_mods)
Expand Down Expand Up @@ -345,7 +363,7 @@ def run_section(self, section_name):
skipped.append(name)
continue
forced.append(name)
active_mods.append([mod, name, _freq, _args])
active_mods.append(module_details)

if inapplicable_mods:
LOG.info(
Expand Down
Loading

0 comments on commit 7cee29f

Please sign in to comment.