Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Taskefile: Add cmake and remote utils; update checksum data path to an array. #16

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
250 changes: 228 additions & 22 deletions taskfiles/utils.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,42 @@ tasks:
# CHECKSUM UTILS
# ===

# @param {string} DATA_DIR The directory to compute the checksum for.
# @param {string[]} DATA_PATHS List of paths to compute the checksum for.
# @param {string} OUTPUT_FILE
# @param {[]string} [EXCLUDE_PATHS] A list of paths, relative to `DATA_DIR`, to exclude from the
# checksum.
# @param {string[]} [EXCLUDE_PATTERNS] Path wildcard patterns, relative to any `DATA_PATHS`, to
# exclude from the checksum.
# the checksum.
compute-checksum:
desc: "Tries to compute a checksum for the given directory and output it to a file."
desc: "Tries to compute a checksum for the given paths and output it to a file."
internal: true
silent: true
requires:
vars: ["DATA_DIR", "OUTPUT_FILE"]
vars: ["DATA_PATHS", "OUTPUT_FILE"]
cmds:
- >-
tar cf -
davidlion marked this conversation as resolved.
Show resolved Hide resolved
--directory "{{.DATA_DIR}}"
--group 0
--mtime "UTC 1970-01-01"
--numeric-owner
--owner 0
--sort name
{{- range .EXCLUDE_PATHS}}
--no-anchored
--wildcards
{{- range .EXCLUDE_PATTERNS}}
--exclude="{{.}}"
{{- end}}
. 2> /dev/null
{{- range .DATA_PATHS}}
"{{.}}"
davidlion marked this conversation as resolved.
Show resolved Hide resolved
{{- end}}
2> /dev/null
| md5sum > {{.OUTPUT_FILE}}
# Ignore errors so that dependent tasks don't fail
ignore_error: true

# @param {string} DATA_DIR The directory to validate the checksum for.
# @param {string[]} DATA_PATHS List of paths to validate the checksum for.
# @param {string} OUTPUT_FILE
# @param {[]string} [EXCLUDE_PATHS] A list of paths, relative to `DATA_DIR`, to exclude from the
# checksum.
# @param {string[]} [EXCLUDE_PATTERNS] List of paths, relative to any `DATA_PATHS`, to exclude
# from the checksum.
validate-checksum:
desc: "Validates the checksum of the given directory matches the checksum in the given file, or
deletes the checksum file otherwise."
Expand All @@ -44,21 +49,24 @@ tasks:
vars:
TMP_CHECKSUM_FILE: "{{.CHECKSUM_FILE}}.tmp"
requires:
vars: ["CHECKSUM_FILE", "DATA_DIR"]
vars: ["CHECKSUM_FILE", "DATA_PATHS"]
cmds:
- task: "compute-checksum"
vars:
DATA_DIR: "{{.DATA_DIR}}"
EXCLUDE_PATHS:
ref: "default (list) .EXCLUDE_PATHS"
DATA_PATHS:
ref: ".DATA_PATHS"
EXCLUDE_PATTERNS:
ref: "default (list) .EXCLUDE_PATTERNS"
OUTPUT_FILE: "{{.TMP_CHECKSUM_FILE}}"
- defer: "rm -f '{{.TMP_CHECKSUM_FILE}}'"
# Check that the directory exists and the checksum matches; otherwise delete the checksum file
# Check that the paths exist and the checksum matches; otherwise delete the checksum file.
- >-
(
test -d "{{.DATA_DIR}}"
&& diff -q '{{.TMP_CHECKSUM_FILE}}' '{{.CHECKSUM_FILE}}' 2> /dev/null
) || rm -f '{{.CHECKSUM_FILE}}'
{{- range .DATA_PATHS}}
test -e "{{.}}" &&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Ignoring the quotes) I probably should've anticipated this, but this won't work for patterns since if a wildcard path glob returns multiple files, test -e will fail because it only expects one argument.

{{- end}}
diff -q "{{.TMP_CHECKSUM_FILE}}" "{{.CHECKSUM_FILE}}" 2> /dev/null
) || rm -f "{{.CHECKSUM_FILE}}"

# ===
# STRING UTILS
Expand All @@ -75,7 +83,7 @@ tasks:
# 2. We can't use `--regexp` instead of `-E` since `--regexp` is not supported on macOS
src="{{.FILE_PATH}}"
dst="{{.FILE_PATH}}.tmp"
sed -E '{{.SED_EXP}}' "${src}" > "${dst}"
sed -E "{{.SED_EXP}}" "${src}" > "${dst}"
mv "${dst}" "${src}"

# ===
Expand Down Expand Up @@ -111,7 +119,7 @@ tasks:
# Runs clang-format on C++ files at the given paths.
#
# @param {string} FLAGS Any flags to pass to clang-format.
# @param {[]string} SRC_PATHS The paths on which to run clang-format.
# @param {string[]} SRC_PATHS The paths on which to run clang-format.
# @param {string} VENV_DIR Python virtual environment where clang-format is installed.
clang-format:
internal: true
Expand All @@ -128,7 +136,7 @@ tasks:
# Runs clang-tidy on C++ files at the given paths.
#
# @param {string} FLAGS Any flags to pass to clang-tidy.
# @param {[]string} SRC_PATHS The paths on which to run clang-tidy.
# @param {string[]} SRC_PATHS The paths on which to run clang-tidy.
# @param {string} VENV_DIR Python virtual environment where clang-tidy is installed.
clang-tidy:
internal: true
Expand All @@ -141,3 +149,201 @@ tasks:
\( -iname "*.cpp" -o -iname "*.h" -o -iname "*.hpp" \) \
-print0 | \
xargs -0 clang-tidy {{.FLAGS}}

# ===
# CMAKE UTILS
# ===

# Runs CMake's configure and build steps for the given source and build directories.
#
# @param {string} BUILD_DIR CMake build directory to create.
# @param {string} SOURCE_DIR Project source directory containing the CMakeLists.txt file.
# @param {string={{.BUILD_DIR}}.md5} [CHECKSUM_FILE] Path to store the checksum of built files.
# @param {string=""} [CMAKE_ARGS] Any additional arguments to pass to CMake's configure step.
cmake-config-and-build:
label: "{{.TASK}}-{{.SOURCE_DIR}}-{{.BUILD_DIR}}"
internal: true
vars:
CHECKSUM_FILE: >-
{{default (printf "%s.md5" .BUILD_DIR) .CHECKSUM_FILE}}
CMAKE_ARGS: >-
{{default "" .CMAKE_ARGS}}
requires:
vars: ["BUILD_DIR", "SOURCE_DIR"]
sources:
- "{{.SOURCE_DIR}}/**/*"
kirkrodrigues marked this conversation as resolved.
Show resolved Hide resolved
generates: ["{{.CHECKSUM_FILE}}"]
deps:
- task: "validate-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
DATA_PATHS: ["{{.BUILD_DIR}}"]
EXCLUDE_PATTERNS: ["install_manifest.txt"]
cmds:
- >-
cmake
-S "{{.SOURCE_DIR}}"
-B "{{.BUILD_DIR}}"
{{.CMAKE_ARGS}}
- >-
cmake
--build "{{.BUILD_DIR}}"
--parallel
# This command must be last
- task: "compute-checksum"
vars:
DATA_PATHS: ["{{.BUILD_DIR}}"]
EXCLUDE_PATHS: ["install_manifest.txt"]
OUTPUT_FILE: "{{.CHECKSUM_FILE}}"

# Runs the CMake install step for the given build directory.
#
# @param {string} BUILD_DIR CMake build directory.
# @param {string} INSTALL_PREFIX Path prefix of where the project should be installed.
# @param {string={{.INSTALL_PREFIX}}.md5} [CHECKSUM_FILE] Path to store the checksum of installed
# files.
# @param {string[]=[{{.INSTALL_PREFIX}}]} [DATA_PATHS] Paths to compute the the checksum for.
davidlion marked this conversation as resolved.
Show resolved Hide resolved
cmake-install:
label: "{{.TASK}}-{{.BUILD_DIR}}-{{.INSTALL_PREFIX}}"
internal: true
vars:
CHECKSUM_FILE: >-
{{default (printf "%s.md5" .INSTALL_PREFIX) .CHECKSUM_FILE}}
DATA_PATHS:
ref: "default (list .INSTALL_PREFIX) .DATA_PATHS"
requires:
vars: ["BUILD_DIR", "INSTALL_PREFIX"]
sources:
- "{{.BUILD_DIR}}/**/*"
- exclude: "{{.BUILD_DIR}}/install_manifest.txt"
generates: ["{{.CHECKSUM_FILE}}"]
deps:
- task: "validate-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
DATA_PATHS:
ref: ".DATA_PATHS"
cmds:
- >-
cmake
--install "{{.BUILD_DIR}}"
--prefix "{{.INSTALL_PREFIX}}"
# This command must be last
- task: "compute-checksum"
vars:
DATA_PATHS:
ref: ".DATA_PATHS"
OUTPUT_FILE: "{{.CHECKSUM_FILE}}"

# ===
# REMOTE UTILS
# ===

# Runs curl to download a file from the given URL.
#
# @param {string} URL
# @param {string} FILE_SHA256 Content hash to verify downloaded file against.
davidlion marked this conversation as resolved.
Show resolved Hide resolved
# @param {string={{(base .URL)}}} [OUTPUT_FILE] Path where the file should be stored.
curl:
label: "{{.TASK}}-{{.OUTPUT_FILE}}"
internal: true
vars:
OUTPUT_FILE: "{{default (base .URL) .OUTPUT_FILE}}"
requires:
vars: ["URL", "FILE_SHA256"]
generates: ["{{.OUTPUT_FILE}}"]
status:
- >-
diff
<(echo "{{.FILE_SHA256}}")
<(openssl dgst -sha256 "{{.OUTPUT_FILE}}"
| awk '{print $2}')
cmds:
- |-
mkdir -p "{{dir .OUTPUT_FILE}}"
max_attempts=3
attempt=1
while [ $attempt -le $max_attempts ]; do
if curl \
--fail \
--location \
--show-error \
--connect-timeout 10 \
--max-time 300 \
"{{.URL}}" \
--output "{{.OUTPUT_FILE}}";
Comment on lines +230 to +237
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add security headers to curl command.

Consider adding security headers to protect against potential MITM attacks.

           if curl \
               --fail \
               --location \
               --show-error \
               --connect-timeout 10 \
               --max-time 300 \
+              --proto '=https' \
+              --tlsv1.2 \
+              --ciphers HIGH \
               "{{.URL}}" \
               --output "{{.OUTPUT_FILE}}";
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if curl \
--fail \
--location \
--show-error \
--connect-timeout 10 \
--max-time 300 \
"{{.URL}}" \
--output "{{.OUTPUT_FILE}}";
if curl \
--fail \
--location \
--show-error \
--connect-timeout 10 \
--max-time 300 \
--proto '=https' \
--tlsv1.2 \
--ciphers HIGH \
"{{.URL}}" \
--output "{{.OUTPUT_FILE}}";

then
break
fi
echo "Attempt $attempt failed. Retrying..."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this print too many times? Another way to write it is to move the loop breaking condition down here. (Fwiw, I feel like I've seen this code before and commented on the same issue. I guess it's from a StackOverflow post, lol.)

Copy link
Member Author

@davidlion davidlion Nov 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just going to remove the log. The final log covers what happened in a failure anyway.

Another way to write it is to move the loop breaking condition down here.

Fwiw, I'm pretty sure we'd need another condition to know whether we will retry again or not.

(Fwiw, I feel like I've seen this code before and commented on the same issue. I guess it's from a StackOverflow post, lol.)

ya... it is from the rabbit

attempt=$((attempt + 1))
sleep 5
done
if [ $attempt -gt $max_attempts ]; then
echo "Failed to download after $max_attempts attempts."
exit 1
fi

# Runs curl to download the tarball from the given URL and extracts its contents.
#
# @param {string} OUTPUT_DIR Directory in which to extract the tarball.
# @param {string} URL
# @param {string} FILE_SHA256 Content hash to verify downloaded tar file against.
# @param {string={{.OUTPUT_DIR}}.md5} [CHECKSUM_FILE] File path to store the checksum of
# downloaded tar file.
# @param {string[]=[]} [EXCLUDE_PATHS] Wildcard patterns for paths that shouldn't be extracted.
# @param {string[]=[]} [INCLUDE_PATHS] Wildcard patterns for paths to extract.
# @param {int=1} [NUM_COMPONENTS_TO_STRIP] Number of leading path components to strip from the
# extracted files.
# @param {string={{.OUTPUT_DIR}}.tar.gz} [TAR_FILE] Path where the tarball should be stored.
download-and-extract-tar:
label: "{{.TASK}}-{{.OUTPUT_DIR}}"
internal: true
vars:
CHECKSUM_FILE: >-
{{default (printf "%s.md5" .OUTPUT_DIR) .CHECKSUM_FILE}}
EXCLUDE_PATTERNS:
ref: "default (list) .EXCLUDE_PATTERNS"
INCLUDE_PATHS:
ref: "default (list) .INCLUDE_PATHS"
NUM_COMPONENTS_TO_STRIP: "{{default 1 .NUM_COMPONENTS_TO_STRIP}}"
TAR_FILE: >-
{{default (printf "%s.tar.gz" .OUTPUT_DIR) .TAR_FILE}}
requires:
vars: ["OUTPUT_DIR", "URL", "FILE_SHA256"]
davidlion marked this conversation as resolved.
Show resolved Hide resolved
sources: ["{{.TASKFILE}}"]
generates: ["{{.CHECKSUM_FILE}}", "{{.TAR_FILE}}"]
deps:
- task: "curl"
vars:
URL: "{{.URL}}"
FILE_SHA256: "{{.FILE_SHA256}}"
OUTPUT_FILE: "{{.TAR_FILE}}"
davidlion marked this conversation as resolved.
Show resolved Hide resolved
- task: "validate-checksum"
vars:
CHECKSUM_FILE: "{{.CHECKSUM_FILE}}"
DATA_PATHS: ["{{.OUTPUT_DIR}}"]
cmds:
- |-
rm -rf "{{.OUTPUT_DIR}}"
mkdir -p "{{.OUTPUT_DIR}}"
- >-
tar
--extract
--strip-components="{{.NUM_COMPONENTS_TO_STRIP}}"
--directory "{{.OUTPUT_DIR}}"
--file "{{.TAR_FILE}}"
--no-anchored
--wildcards
{{- range .EXCLUDE_PATTERNS}}
--exclude="{{.}}"
{{- end}}
{{- range .DATA_PATHS}}
"{{.}}"
{{- end}}
2> /dev/null
# This command must be last
- task: "compute-checksum"
vars:
DATA_PATHS: ["{{.OUTPUT_DIR}}"]
OUTPUT_FILE: "{{.CHECKSUM_FILE}}"