Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clp-package: Add support for extracting files as IR streams through the CLI. #472

Merged
merged 22 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
3be1e72
Support ir extraction in decompression script
haiqi96 Jul 5, 2024
531c378
Merge branch 'main' into ir_extraction_backup
haiqi96 Jul 6, 2024
e2aa05f
Linter
haiqi96 Jul 6, 2024
e403604
Apply suggestions from code review
haiqi96 Jul 11, 2024
93e23dd
fixes
haiqi96 Jul 11, 2024
adbffc5
Merge branch 'main' into ir_extraction_script
haiqi96 Jul 15, 2024
ef47de3
revert fix that has already been done.
haiqi96 Jul 16, 2024
84f2993
Merge branch 'main' into ir_extraction_script
haiqi96 Jul 16, 2024
4eb049a
Refactor some comments, strings and whitespace; Add missing fmt: off;…
kirkrodrigues Jul 18, 2024
ed65ed3
address review concerns
haiqi96 Jul 18, 2024
ae13f27
linter
haiqi96 Jul 18, 2024
32ea43f
address code review concerns
haiqi96 Jul 19, 2024
63e9693
Update components/clp-package-utils/clp_package_utils/scripts/decompr…
haiqi96 Jul 19, 2024
74917e3
Bug-fix: is not None -> is None; Make other None checks consistent.
kirkrodrigues Jul 19, 2024
56987a7
Rename job DECMOPRESSION -> FILE_EXTRACTION.
kirkrodrigues Jul 19, 2024
72aeafe
Rename more instances of decompression to extract.
kirkrodrigues Jul 19, 2024
070aa1e
revert unexpected change
haiqi96 Jul 19, 2024
75a5f15
Merge branch 'ir_extraction_script' of https://github.com/haiqi96/clp…
haiqi96 Jul 19, 2024
4e1e667
revert unexpected change and apply linter
haiqi96 Jul 19, 2024
b3d3000
Add missing docstrings and error check
haiqi96 Jul 19, 2024
da1deef
Add missing docstrings and error check
haiqi96 Jul 19, 2024
65e5631
fix
haiqi96 Jul 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions components/clp-package-utils/clp_package_utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing
import uuid
from enum import auto
from typing import List, Tuple
from typing import List, Optional, Tuple

import yaml
from clp_py_utils.clp_config import (
Expand All @@ -30,6 +30,9 @@
from strenum import KebabCaseStrEnum

# CONSTANTS
DECOMPRESSION_COMMAND = "x"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was thinking about using a enum similar to JobType, but will need to manually assign a value instead of using auto.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about:

  • EXTRACT_FILE_CMD and EXTRACT_IR_CMD
  • handle_extract_file_cmd and handle_extract_ir_cmd

IR_EXTRACTION_COMMAND = "i"

# Paths
CONTAINER_CLP_HOME = pathlib.Path("/") / "opt" / "clp"
CONTAINER_INPUT_LOGS_ROOT_DIR = pathlib.Path("/") / "mnt" / "logs"
Expand All @@ -45,6 +48,7 @@ class DockerMountType(enum.IntEnum):
class JobType(KebabCaseStrEnum):
COMPRESSION = auto()
DECOMPRESSION = auto()
IR_EXTRACTION = auto()
SEARCH = auto()


Expand Down Expand Up @@ -282,7 +286,7 @@ def dump_container_config(


def generate_container_start_cmd(
container_name: str, container_mounts: List[CLPDockerMounts], container_image: str
container_name: str, container_mounts: List[Optional[DockerMount]], container_image: str
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a type fix

) -> List[str]:
"""
Generates the command to start a container with the given mounts and name.
Expand Down
142 changes: 119 additions & 23 deletions components/clp-package-utils/clp_package_utils/scripts/decompress.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@
import pathlib
import subprocess
import sys
from typing import Optional

from clp_py_utils.clp_config import CLPConfig

from clp_package_utils.general import (
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
DECOMPRESSION_COMMAND,
DockerMount,
DockerMountType,
dump_container_config,
generate_container_config,
generate_container_name,
generate_container_start_cmd,
get_clp_home,
IR_EXTRACTION_COMMAND,
JobType,
load_config_file,
validate_and_load_db_credentials_file,
Expand All @@ -32,37 +35,33 @@
logger.addHandler(logging_console_handler)


def main(argv):
clp_home = get_clp_home()
default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH

args_parser = argparse.ArgumentParser(description="Decompresses logs")
args_parser.add_argument(
"--config",
"-c",
type=str,
default=str(default_config_file_path),
help="CLP package configuration file.",
)
args_parser.add_argument("paths", metavar="PATH", nargs="*", help="Files to decompress.")
args_parser.add_argument("-f", "--files-from", help="A file listing all files to decompress.")
args_parser.add_argument(
"-d", "--extraction-dir", metavar="DIR", default=".", help="Decompress files into DIR"
)
parsed_args = args_parser.parse_args(argv[1:])

# Validate and load config file
def validate_and_load_config(
clp_home: pathlib.Path,
config_file_path: pathlib.Path,
default_config_file_path: pathlib.Path,
) -> Optional[CLPConfig]:
"""
Validates and loads the config file.
:param clp_home:
:param config_file_path:
:param default_config_file_path:
:return: clp_config on success, None otherwise.
"""
try:
config_file_path = pathlib.Path(parsed_args.config)
clp_config = load_config_file(config_file_path, default_config_file_path, clp_home)
clp_config.validate_logs_dir()

# Validate and load necessary credentials
validate_and_load_db_credentials_file(clp_config, clp_home, False)
return clp_config
except:
logger.exception("Failed to load config.")
return -1
return None


def handle_decompression_command(
parsed_args, clp_home: pathlib.Path, default_config_file_path: pathlib.Path
):
paths_to_decompress_file_path = None
if parsed_args.files_from:
paths_to_decompress_file_path = pathlib.Path(parsed_args.files_from)
Expand All @@ -76,6 +75,13 @@ def main(argv):
return -1
extraction_dir.mkdir(exist_ok=True)
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved

# Validate and load config file
clp_config = validate_and_load_config(
clp_home, pathlib.Path(parsed_args.config), default_config_file_path
)
if not clp_config:
haiqi96 marked this conversation as resolved.
Show resolved Hide resolved
return -1

container_name = generate_container_name(JobType.DECOMPRESSION)
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
Expand Down Expand Up @@ -112,6 +118,7 @@ def main(argv):
"python3",
"-m", "clp_package_utils.scripts.native.decompress",
"--config", str(generated_config_path_on_container),
DECOMPRESSION_COMMAND,
"-d", str(container_extraction_dir),
]
# fmt: on
Expand All @@ -127,7 +134,96 @@ def main(argv):
# Remove generated files
generated_config_path_on_host.unlink()

return 0

def handle_extraction(parsed_args, clp_home: pathlib.Path, default_config_file_path: pathlib.Path):
# Validate and load config file
clp_config = validate_and_load_config(
clp_home, pathlib.Path(parsed_args.config), default_config_file_path
)
if not clp_config:
return -1

container_name = generate_container_name(JobType.IR_EXTRACTION)
container_clp_config, mounts = generate_container_config(clp_config, clp_home)
generated_config_path_on_container, generated_config_path_on_host = dump_container_config(
container_clp_config, clp_config, container_name
)
necessary_mounts = [mounts.clp_home, mounts.logs_dir]
container_start_cmd = generate_container_start_cmd(
container_name, necessary_mounts, clp_config.execution_container
)

extract_cmd = [
"python3",
"-m",
"clp_package_utils.scripts.native.decompress",
"--config",
str(generated_config_path_on_container),
IR_EXTRACTION_COMMAND,
str(parsed_args.msg_ix),
]
# fmt: on
if parsed_args.orig_file_id:
extract_cmd.append("--orig-file-id")
extract_cmd.append(str(parsed_args.orig_file_id))
else:
extract_cmd.append("--path")
extract_cmd.append(str(parsed_args.path))
if parsed_args.target_uncompressed_size:
extract_cmd.append("--target-uncompressed-size")
extract_cmd.append(str(parsed_args.target_uncompressed_size))
cmd = container_start_cmd + extract_cmd
subprocess.run(cmd, check=True)

# Remove generated files
generated_config_path_on_host.unlink()


def main(argv):
clp_home = get_clp_home()
default_config_file_path = clp_home / CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH

args_parser = argparse.ArgumentParser(description="Decompresses logs")
args_parser.add_argument(
"--config",
"-c",
type=str,
default=str(default_config_file_path),
help="CLP package configuration file.",
)
command_args_parser = args_parser.add_subparsers(dest="command", required=True)
# Decompression command parser
decompression_job_parser = command_args_parser.add_parser(DECOMPRESSION_COMMAND)
decompression_job_parser.add_argument(
"paths", metavar="PATH", nargs="*", help="Files to decompress."
)
decompression_job_parser.add_argument(
"-f", "--files-from", help="A file listing all files to decompress."
)
decompression_job_parser.add_argument(
"-d", "--extraction-dir", metavar="DIR", default=".", help="Decompress files into DIR"
)
# IR extraction command parser
ir_extraction_parser = command_args_parser.add_parser(IR_EXTRACTION_COMMAND)
ir_extraction_parser.add_argument("msg_ix", type=int, help="Message index.")
ir_extraction_parser.add_argument(
"--target-uncompressed-size", type=int, help="Target uncompressed IR size."
)

group = ir_extraction_parser.add_mutually_exclusive_group(required=True)
group.add_argument("--orig-file-id", type=str, help="Original file ID.")
group.add_argument("--path", type=str, help="Path to the file.")

parsed_args = args_parser.parse_args(argv[1:])

command = parsed_args.command
if DECOMPRESSION_COMMAND == command:
return handle_decompression_command(parsed_args, clp_home, default_config_file_path)
elif IR_EXTRACTION_COMMAND == command:
return handle_extraction(parsed_args, clp_home, default_config_file_path)
else:
logger.exception(f"Unexpected command: {command}")
return -1


if "__main__" == __name__:
Expand Down
Loading