diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..c264694 --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,32 @@ +# workflow which lints and checks the code on every push and pull request +name: Check +on: + push: + branches: + - main + pull_request: + branches: + - main +jobs: + check: + runs-on: ubuntu-latest + steps: + # https://github.com/actions/checkout + - name: checkout + uses: actions/checkout@v4 + + # https://github.com/actions/setup-python + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' # caching pip dependencies + + - name: Install Deps + run: pip install -r requirements.txt -r requirements.dev.txt + + # https://github.com/pre-commit/action + - name: run pre-commit + uses: pre-commit/action@v3.0.0 + env: + SKIP: no-commit-to-branch diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 2d1e34d..ac53f89 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -1,4 +1,4 @@ -name: Docker Image Builds +name: Docker on: push: branches: [main] @@ -20,15 +20,15 @@ jobs: steps: # https://github.com/docker/setup-qemu-action - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 # https://github.com/docker/setup-buildx-action - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 # https://github.com/docker/login-action - name: Login to Docker Hub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: docker.io username: ${{ github.actor }} @@ -36,7 +36,7 @@ jobs: # https://github.com/docker/login-action - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} @@ -45,7 +45,7 @@ jobs: # https://github.com/docker/metadata-action - name: Extract Docker metadata id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: | docker.io/${{ github.repository }} @@ -58,7 +58,7 @@ jobs: # Build and push Docker image with Buildx (don't push on PR) # https://github.com/docker/build-push-action - name: Build and push Docker image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/yamllint.yml b/.github/workflows/yamllint.yml deleted file mode 100644 index cc2e1d2..0000000 --- a/.github/workflows/yamllint.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: YAML Lint - -on: - push: - branches: ['main'] - pull_request: - branches: ['main'] - -jobs: - yamllint: - runs-on: ubuntu-latest - steps: - - name: Checkout the code - uses: actions/checkout@v3 - - name: Check the YAML in .github and containers - run: | - docker run \ - --rm \ - --volume "$(pwd):/work" \ - backplane/yamllint \ - -f github \ - . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..6a7cabd --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,105 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: check-symlinks + - id: mixed-line-ending + args: + - --fix=lf + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: detect-private-key + - id: no-commit-to-branch + args: + - --branch + - main + + - repo: https://github.com/rhysd/actionlint + rev: v1.6.26 + hooks: + - id: actionlint-docker + name: check github workflows with actionlint + + - repo: https://github.com/koalaman/shellcheck-precommit + rev: v0.9.0 + hooks: + - id: shellcheck + + - repo: https://github.com/psf/black + rev: 23.11.0 + hooks: + - id: black + + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: + - "--profile" + - "black" + - "--filter-files" + + - repo: https://github.com/PyCQA/bandit + rev: 1.7.5 + hooks: + - id: bandit + + - repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + args: + - "--max-line-length=88" + - "--extend-ignore=E203,E501" + + # - repo: https://github.com/pycqa/pylint + # rev: "v3.0.1" + # hooks: + # - id: pylint + + # https://pylint.pycqa.org/en/latest/user_guide/installation/pre-commit-integration.html + - repo: local + hooks: + - id: pylint + name: pylint + entry: pylint + language: system + types: [python] + args: + [ + "-rn", # Only display messages + "-sn", # Don't display the score + ] + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.7.0 + hooks: + - id: mypy + # additional_dependencies: + # - types-requests + + # - repo: https://github.com/jendrikseipp/vulture + # rev: v2.7 + # hooks: + # - id: vulture + + - repo: https://github.com/Lucas-C/pre-commit-hooks-safety + rev: v1.3.2 + hooks: + - id: python-safety-dependencies-check + files: "requirements.txt" + + # - repo: local + # hooks: + # - id: pytest-check + # name: pytest-check + # entry: pytest + # language: system + # pass_filenames: false + # always_run: true diff --git a/requirements.dev.txt b/requirements.dev.txt new file mode 100644 index 0000000..a809c14 --- /dev/null +++ b/requirements.dev.txt @@ -0,0 +1,10 @@ +bandit~=1.7.5 +black~=23.11.0 +boto3-stubs~=1.29.6 +bpython~=0.24 +flake8~=6.1.0 +isort~=5.12.0 +mypy~=1.7.0 +pycodestyle~=2.11.1 +pylint~=3.0.2 +pytest~=7.4.3 diff --git a/requirements.txt b/requirements.txt index 8c36739..563f715 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -boto3~=1.28.16 +boto3~=1.29.6 diff --git a/src/aws_reporter/__main__.py b/src/aws_reporter/__main__.py index a17bc89..b5560a5 100755 --- a/src/aws_reporter/__main__.py +++ b/src/aws_reporter/__main__.py @@ -1,189 +1,14 @@ #!/usr/bin/env python3 """ utility which generates a CSV report from AWS API data on stdout """ import argparse -import csv -import json import logging import os -import shlex import sys -from typing import Any, Dict, Final, List, Optional, Union +from typing import Optional import boto3 -# some constants used later -EC2: Final = "ec2" -RDS: Final = "rds" -SG: Final = "sg" - - -class KeyPathNoDefault: - """ - A token used by get_keypath to represent the absence of a default argument - """ - - pass - - -def get_keypath( - obj: Dict, - keypath_str: str, - delimiter: str = ".", - default: Any = KeyPathNoDefault, -) -> Any: - """ - given a deeply nested object and a delimited keypath, retrieve the deep value at - that keypath - """ - keypath: List[str] = keypath_str.split(delimiter) - sub_obj: Any = obj - key: Union[str, int] - for depth, key in enumerate(keypath): - try: - if isinstance(sub_obj, list): - key = int(key) - sub_obj = sub_obj[key] - except KeyError: - if default is not KeyPathNoDefault: - return default - raise KeyError( - f"unable to resolve keypath '{keypath_str}'; failed to " - f"retrieve '{key}' component (depth: {depth})" - ) from None - return sub_obj - - -def load_json_file(path: str, encoding: str = "utf-8") -> Any: - """load the json file at the given path and return the parsed structure""" - with open(path, "rt", encoding=encoding) as jsonfh: - return json.load(jsonfh) - - -def cskv(data: Dict, kv_delimiter: str = "=", item_delimiter: str = ", ") -> str: - """ - comma-separated key/value string: converts a dict into a comma-separated list of key - and value pairs - """ - # {'name': 'test', 'version': 2} -> "name=test, version=2" - return item_delimiter.join( - [f"{shlex.quote(k)}{kv_delimiter}{shlex.quote(v)}" for k, v in data.items()] - ) - - -def ec2_report(data: Any) -> int: - """ - report on the ec2 instances - """ - fields: Final = ( - "NameTag", - "InstanceId", - "InstanceType", - "Placement.AvailabilityZone", - "PlatformDetails", - "PublicIpAddress", - "LaunchTime", - "State.Name", - "StateTransitionReason", - "Tags", - ) - # this is a subset of the above, it identifies values that aren't directly - # at a keypath - computed_fields: Final = ("NameTag", "Tags") - - csvout = csv.DictWriter(sys.stdout, fields) - csvout.writeheader() - rowcount = 0 - - for reservation in data["Reservations"]: - for instance in reservation["Instances"]: - outputs: Dict[str, str] = { - key: get_keypath(instance, key, default="--") - for key in fields - if key not in computed_fields - } - tags = {kv["Key"]: kv["Value"] for kv in instance["Tags"]} - # Promote the "Name" tag if there is one - if "Name" in tags: - outputs["NameTag"] = tags["Name"] - del tags["Name"] - outputs["Tags"] = cskv(tags) - csvout.writerow(outputs) - rowcount += 1 - - logging.info("ec2_report: wrote %s rows", rowcount) - return rowcount - - -def rds_report(data: Any) -> int: - """ - report on the rds instances - """ - fields: Final = ( - "DBInstanceIdentifier", - "DBInstanceClass", - "Engine", - "DBInstanceStatus", - "Endpoint.Address", - "Endpoint.Port", - "InstanceCreateTime", - "AvailabilityZone", - "MultiAZ", - "StorageType", - "DBInstanceArn", - # "TagList", - ) - # this is a subset of the above, it identifies values that aren't directly - # at a keypath - computed_fields: Final = () - - csvout = csv.DictWriter(sys.stdout, fields) - csvout.writeheader() - rowcount = 0 - - for instance in data["DBInstances"]: - outputs: Dict[str, str] = { - key: get_keypath(instance, key, default="--") - for key in fields - if key not in computed_fields - } - csvout.writerow(outputs) - rowcount += 1 - - logging.info("rds_report: wrote %s rows", rowcount) - return rowcount - - -def sg_report(data: Any) -> int: - fields: Final = ( - "GroupId", - "SecurityGroupRuleId", - "CidrIpv4", - "Description", - "ToPort", - ) - # this is a subset of the above, it identifies values that aren't directly - # at a keypath - computed_fields: Final = () - - csvout = csv.DictWriter(sys.stdout, fields) - csvout.writeheader() - rowcount = 0 - - for rule in sorted( - data["SecurityGroupRules"], - key=lambda x: (x["GroupId"], x["SecurityGroupRuleId"]), - ): - # logging.debug("sg_report: rule %s", json.dumps(rule)) - outputs: Dict[str, str] = { - key: get_keypath(rule, key, default="--") - for key in fields - if key not in computed_fields - } - csvout.writerow(outputs) - rowcount += 1 - - logging.info("sg_report: wrote %s rows", rowcount) - return rowcount +from .aws import EC2, RDS, SG, Reporter def main() -> int: @@ -225,11 +50,7 @@ def main() -> int: ) argp.add_argument( "mode", - choices=( - EC2, - RDS, - SG, - ), + choices=(EC2, RDS, SG), help="select a report to run", ) args = argp.parse_args() @@ -241,23 +62,12 @@ def main() -> int: level=logging.DEBUG if args.debug else logging.INFO, ) - # get the data - if args.inputjson: - data = load_json_file(args.inputjson) - else: - session = boto3.session.Session(profile_name=args.profile) - data = { - EC2: lambda: session.client(EC2).describe_instances(), - RDS: lambda: session.client(RDS).describe_db_instances(), - SG: lambda: session.client(EC2).describe_security_group_rules(), - }[args.mode]() - - # run the report on the data + reporter = Reporter(args.profile, args.inputjson) { - EC2: lambda x: ec2_report(x), - RDS: lambda x: rds_report(x), - SG: lambda x: sg_report(x), - }[args.mode](data) + EC2: reporter.ec2_report, + RDS: reporter.rds_report, + SG: reporter.sg_report, + }[args.mode]() return 0 diff --git a/src/aws_reporter/aws.py b/src/aws_reporter/aws.py new file mode 100644 index 0000000..2db2e4b --- /dev/null +++ b/src/aws_reporter/aws.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +""" class for reporting on aws instances """ +import csv +import datetime +import logging +import sys +from collections import defaultdict +from typing import Any, Dict, Final, Optional, Union + +import boto3 + +from .utils import cskv, get_keypath, hash_args_kwargs, load_json_file, utcnow + +# aws-related constants + +# services +EC2: Final = "ec2" +RDS: Final = "rds" +SG: Final = "sg" +CLOUDWATCH: Final = "cloudwatch" + +# methods +DESCRIBE_DB_INSTANCES: Final = "describe_db_instances" +DESCRIBE_INSTANCES: Final = "describe_instances" +DESCRIBE_SECURITY_GROUP_RULES: Final = "describe_security_group_rules" +GET_METRIC_STATISTICS: Final = "get_metric_statistics" + + +logger = logging.getLogger(__name__) + + +class Reporter: + """class for reporting on AWS resources""" + + session: boto3.Session + cache: Dict[str, Dict[str, Dict[str, Any]]] + + def __init__(self, profile_name: str, input_json: Optional[str] = None) -> None: + self.session = boto3.session.Session(profile_name=profile_name) + self.cache = defaultdict(lambda: defaultdict(dict)) + if input_json: + data = load_json_file(input_json) + for client_name in data: + for method_name in client_name: + self.cache[client_name][method_name] = data[client_name][ + method_name + ] + + def get_data( + self, + client: str, + method: str, + *args, + **kwargs, + ) -> Optional[Dict[str, Any]]: + """make an API call utilizing the cache""" + + cache_key = hash_args_kwargs(*args, **kwargs) + + if data := self.cache[client][method].get(cache_key): + return data + data = getattr(self.session.client(client), method)(*args, **kwargs) + if data: + self.cache[client][method][cache_key] = data + return data + + def ec2_report(self) -> int: + """ + report on the ec2 instances + """ + data = self.get_data(EC2, DESCRIBE_INSTANCES) + if data is None: + raise ValueError("no result returned from get_data") + + fields: Final = ( + "NameTag", + "InstanceId", + "InstanceType", + "Placement.AvailabilityZone", + "PlatformDetails", + "PublicIpAddress", + "LaunchTime", + "State.Name", + "StateTransitionReason", + "Tags", + ) + # this is a subset of the above, it identifies values that aren't directly + # at a keypath + computed_fields: Final = ("NameTag", "Tags") + + csvout = csv.DictWriter(sys.stdout, fields) + csvout.writeheader() + rowcount = 0 + + for reservation in data["Reservations"]: + for instance in reservation["Instances"]: + outputs: Dict[str, str] = { + key: get_keypath(instance, key, default="--") + for key in fields + if key not in computed_fields + } + tags = {kv["Key"]: kv["Value"] for kv in instance["Tags"]} + # Promote the "Name" tag if there is one + if "Name" in tags: + outputs["NameTag"] = tags["Name"] + del tags["Name"] + outputs["Tags"] = cskv(tags) + csvout.writerow(outputs) + rowcount += 1 + + logger.info("ec2_report: wrote %s rows", rowcount) + return rowcount + + def get_rds_free_storage(self, db_instance_id: str) -> int: + """return the amount of space available on the given RDS instance (in bytes)""" + data = self.get_data( + CLOUDWATCH, + GET_METRIC_STATISTICS, + Namespace="AWS/RDS", + MetricName="FreeStorageSpace", + Dimensions=[ + { + "Name": "DBInstanceIdentifier", + "Value": db_instance_id, + }, + ], + StartTime=utcnow() - datetime.timedelta(hours=1), + EndTime=utcnow(), + Period=3600, + Statistics=["Average"], + ) + if data is None: + raise ValueError("no result returned from get_data") + + # Print the free storage space + if data["Datapoints"]: + return data["Datapoints"][0]["Average"] + + return -1 + + def rds_report(self) -> int: + """ + report on the rds instances + """ + data = self.get_data(RDS, DESCRIBE_DB_INSTANCES) + if data is None: + raise ValueError("no result returned from get_data") + + fields: Final = ( + "DBInstanceIdentifier", + "DBInstanceClass", + "Engine", + "DBInstanceStatus", + "Endpoint.Address", + "Endpoint.Port", + "InstanceCreateTime", + "AvailabilityZone", + "MultiAZ", + "StorageType", + "DBInstanceArn", + "AllocatedStorage", + # "TagList", + ) + # this is a subset of the above, it identifies values that aren't directly + # at a keypath + computed_fields: Final = () + + csvout = csv.DictWriter( + sys.stdout, + fields + + ( + "AllocatedStorageGiB", + "UsedStorageGiB", + "FreeStorageGiB", + "StorageUtilizationPercentage", + ), + ) + csvout.writeheader() + rowcount = 0 + + for instance in data["DBInstances"]: + outputs: Dict[str, Union[str, int, float]] = { + key: get_keypath(instance, key, default="--") + for key in fields + if key not in computed_fields + } + allocated_bytes: int = int(outputs["AllocatedStorage"]) * 1073742000 + free_bytes: int = self.get_rds_free_storage( + instance["DBInstanceIdentifier"] + ) + free_gib = free_bytes * 9.313226e-10 + if free_bytes < 1: + free_gib = -1 + storage_used = allocated_bytes - free_bytes + + outputs["AllocatedStorageGiB"] = outputs["AllocatedStorage"] # an alias + outputs["UsedStorageGiB"] = storage_used * 9.313226e-10 + outputs["FreeStorageGiB"] = free_gib + outputs["StorageUtilizationPercentage"] = round( + (storage_used / allocated_bytes) * 100, 2 + ) + + csvout.writerow(outputs) + rowcount += 1 + + logger.info("rds_report: wrote %s rows", rowcount) + return rowcount + + def sg_report(self) -> int: + """ + report on security groups + """ + data = self.get_data(EC2, DESCRIBE_SECURITY_GROUP_RULES) + if data is None: + raise ValueError("no result returned from get_data") + + fields: Final = ( + "GroupId", + "SecurityGroupRuleId", + "CidrIpv4", + "Description", + "ToPort", + ) + # this is a subset of the above, it identifies values that aren't directly + # at a keypath + computed_fields: Final = () + + csvout = csv.DictWriter(sys.stdout, fields) + csvout.writeheader() + rowcount = 0 + + for rule in sorted( + data["SecurityGroupRules"], + key=lambda x: (x["GroupId"], x["SecurityGroupRuleId"]), + ): + # logger.debug("sg_report: rule %s", json.dumps(rule)) + outputs: Dict[str, str] = { + key: get_keypath(rule, key, default="--") + for key in fields + if key not in computed_fields + } + csvout.writerow(outputs) + rowcount += 1 + + logger.info("sg_report: wrote %s rows", rowcount) + return rowcount diff --git a/src/aws_reporter/utils.py b/src/aws_reporter/utils.py new file mode 100644 index 0000000..7d31fbc --- /dev/null +++ b/src/aws_reporter/utils.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" misc utility functions """ +import datetime +import hashlib +import json +import pickle # nosec: ephemeral use for hash computation +import shlex +from typing import Any, Dict, List, Union + + +def hash_args_kwargs(*args, **kwargs) -> str: + """return the hash of the given args/kwargs""" + pickled_args = pickle.dumps(args) + pickled_kwargs = pickle.dumps(kwargs) + return hashlib.sha256(pickled_args + pickled_kwargs).hexdigest() + + +def load_json_file(path: str, encoding: str = "utf-8") -> Any: + """load the json file at the given path and return the parsed structure""" + with open(path, "rt", encoding=encoding) as jsonfh: + return json.load(jsonfh) + + +def cskv(data: Dict, kv_delimiter: str = "=", item_delimiter: str = ", ") -> str: + """ + comma-separated key/value string: converts a dict into a comma-separated list of key + and value pairs + """ + # {'name': 'test', 'version': 2} -> "name=test, version=2" + return item_delimiter.join( + [f"{shlex.quote(k)}{kv_delimiter}{shlex.quote(v)}" for k, v in data.items()] + ) + + +def utcnow() -> datetime.datetime: + """reimplementation of deprecated datetime.datetime.utcnow""" + return datetime.datetime.now(datetime.UTC) + + +class KeyPathNoDefault(Exception): + """ + A token used by get_keypath to represent the absence of a default argument + """ + + +def get_keypath( + obj: Dict, + keypath_str: str, + delimiter: str = ".", + default: Any = KeyPathNoDefault, +) -> Any: + """ + given a deeply nested object and a delimited keypath, retrieve the deep value at + that keypath + """ + keypath: List[str] = keypath_str.split(delimiter) + sub_obj: Any = obj + key: Union[str, int] + for depth, key in enumerate(keypath): + try: + if isinstance(sub_obj, list): + key = int(key) + sub_obj = sub_obj[key] + except KeyError: + if default is not KeyPathNoDefault: + return default + raise KeyError( + f"unable to resolve keypath '{keypath_str}'; failed to " + f"retrieve '{key}' component (depth: {depth})" + ) from None + return sub_obj