diff --git a/.github/matchers/clang-tidy.json b/.github/matchers/clang-tidy.json new file mode 100644 index 00000000000..1ae1bffc022 --- /dev/null +++ b/.github/matchers/clang-tidy.json @@ -0,0 +1,17 @@ +{ + "problemMatcher": [ + { + "owner": "clang-tidy", + "pattern": [ + { + "regexp": "^(.*):(\\d+):(\\d+):\\s+(error|warning):\\s+(.*) \\[([a-z0-9,\\-]+)\\]\\s*$", + "file": 1, + "line": 2, + "column": 3, + "severity": 4, + "message": 5 + } + ] + } + ] +} diff --git a/.github/matchers/gcc.json b/.github/matchers/gcc.json new file mode 100644 index 00000000000..899239f8160 --- /dev/null +++ b/.github/matchers/gcc.json @@ -0,0 +1,18 @@ +{ + "problemMatcher": [ + { + "owner": "gcc", + "severity": "error", + "pattern": [ + { + "regexp": "^(.*):(\\d+):(\\d+):\\s+(?:fatal\\s+)?(warning|error):\\s+(.*)$", + "file": 1, + "line": 2, + "column": 3, + "severity": 4, + "message": 5 + } + ] + } + ] +} diff --git a/.github/matchers/shellcheck.json b/.github/matchers/shellcheck.json new file mode 100644 index 00000000000..431d91ddd9f --- /dev/null +++ b/.github/matchers/shellcheck.json @@ -0,0 +1,18 @@ +{ + "problemMatcher": [ + { + "owner": "shellcheck", + "severity": "error", + "pattern": [ + { + "regexp": "^(.*):(\\d+):(\\d+):\\s+(?:fatal\\s+)?(warning|error):\\s+(.*)$", + "file": 1, + "line": 2, + "column": 3, + "severity": 4, + "message": 5 + } + ] + } + ] +} diff --git a/.github/workflows/preliminary_checks.yml b/.github/workflows/preliminary_checks.yml index 9be23e9c0a5..2e2cc422209 100644 --- a/.github/workflows/preliminary_checks.yml +++ b/.github/workflows/preliminary_checks.yml @@ -38,13 +38,104 @@ jobs: - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 + get-changes: + runs-on: ubuntu-latest + outputs: + run-clang-tidy: ${{ steps.changes.outputs.run_clang_tidy }} + changed-files: ${{ steps.changes.outputs.files}} + diff-range: ${{ steps.changes.outputs.range }} + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + persist-credentials: false + fetch-depth: 0 + + - name: Get changed files + id: changes + env: + GH_TOKEN: ${{ github.token }} + BASE_REF: ${{ github.base_ref }} + HEAD_REF: ${{ github.head_ref }} + PR_OWNER: ${{ github.event.pull_request.head.repo.owner.login }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + merge_base_commit=$(gh api -q '.merge_base_commit.sha' \ + /repos/facebookincubator/velox/compare/facebookincubator:$BASE_REF...$PR_OWNER:$HEAD_REF \ + ) + + range="$merge_base_commit..$HEAD_SHA" + echo "range=$range" >> "$GITHUB_OUTPUT" + + git diff --name-only $range > changed_files.txt + + cpp_files='.+\.(cpp|h|hpp)$' + + { + echo 'files<> "$GITHUB_OUTPUT" + + if grep -qE $cpp_files changed_files.txt; then + echo "run_clang_tidy=true" >> "$GITHUB_OUTPUT" + fi + + clang-tidy: + needs: get-changes + if: ${{ needs.get-changes.outputs.run-clang-tidy == 'true' }} + runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:adapters + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + persist-credentials: false + fetch-depth: 0 + + - run: uv tool install clang-tidy==18.1.8 + + - name: Configure Maximal Build + env: + CUDA_VERSION: "12.8" + CUDA_ARCHITECTURES: 70 + CUDA_COMPILER: /usr/local/cuda-${CUDA_VERSION}/bin/nvcc + # Set compiler to GCC 12 + CUDA_FLAGS: -ccbin /opt/rh/gcc-toolset-12/root/usr/bin + faiss_SOURCE: BUNDLED + EXTRA_CMAKE_FLAGS: >- + -DVELOX_ENABLE_BENCHMARKS=ON + -DVELOX_ENABLE_EXAMPLES=ON + -DVELOX_ENABLE_ARROW=ON + -DVELOX_ENABLE_GEO=ON + -DVELOX_ENABLE_PARQUET=ON + -DVELOX_ENABLE_HDFS=ON + -DVELOX_ENABLE_S3=ON + -DVELOX_ENABLE_GCS=ON + -DVELOX_ENABLE_ABFS=ON + -DVELOX_ENABLE_WAVE=ON + -DVELOX_MONO_LIBRARY=ON + -DVELOX_BUILD_SHARED=ON + -DVELOX_ENABLE_CUDF=ON + -DVELOX_ENABLE_FAISS=ON + -DVELOX_ENABLE_REMOTE_FUNCTIONS=ON + run: | + make cmake BUILD_DIR=release BUILD_TYPE=Release + + - name: Run clang-tidy + env: + FILES: ${{ needs.get-changes.outputs.changed-files }} + RANGE: ${{ needs.get-changes.outputs.diff-range }} + run: | + git config --global --add safe.directory /__w/velox/velox + + python ./scripts/checks/run-clang-tidy.py --commit $RANGE $FILES + title-check: name: PR Title Format runs-on: ubuntu-latest steps: - shell: python env: - title: '${{ github.event.pull_request.title }}' + title: "${{ github.event.pull_request.title }}" run: | import re import os diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ca6e651b7f2..2a6563fdc5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,10 +37,14 @@ repos: hooks: - id: clang-tidy name: clang-tidy - description: Run clang-tidy on C/C++ files + description: >- + Run clang-tidy on C/C++ files, requires 'compile_commands.json' + to be available in the repo root (e.g. symlinked) or BUILD_PATH + set to it's path e.g. _build/release. stages: - - manual # Needs compile_commands.json - entry: clang-tidy + - pre-commit + entry: ./scripts/checks/run-clang-tidy.py + args: [--commit, HEAD] language: python types_or: [c++, c] additional_dependencies: [clang-tidy==18.1.8] diff --git a/scripts/checks/license-header.py b/scripts/checks/license-header.py index 6f2ebb52e80..03dcdaff48f 100755 --- a/scripts/checks/license-header.py +++ b/scripts/checks/license-header.py @@ -100,7 +100,7 @@ def wrapper_hash(header, args): "*.cmake": attrdict({"wrapper": wrapper_hash, "hashbang": False}), "*.cpp": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.hpp": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), - "*.dockfile": attrdict({"wrapper": wrapper_hash, "hashbang": False}), + "*.dockerfile": attrdict({"wrapper": wrapper_hash, "hashbang": False}), "*.h": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.inc": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.java": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), @@ -110,6 +110,7 @@ def wrapper_hash(header, args): "*.thrift": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.txt": attrdict({"wrapper": wrapper_hash, "hashbang": True}), "*.yml": attrdict({"wrapper": wrapper_hash, "hashbang": False}), + "*.yaml": attrdict({"wrapper": wrapper_hash, "hashbang": False}), "*.cu": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.cuh": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.clcpp": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), @@ -228,7 +229,13 @@ def main(): # If removing the header text, zero it out there. header_comment = "" - message(log_to, "Fix : " + filepath) + if os.environ.get("GITHUB_ACTIONS"): + print( + f"::error file={filepath},line=1,endLine=1,title=Missing license header::" + + "Please add the properly commented license header." + ) + else: + message(log_to, "Fix : " + filepath) if args.check: fail = True diff --git a/scripts/checks/run-clang-tidy.py b/scripts/checks/run-clang-tidy.py index 5d7016c3d9e..9d529fd40b6 100755 --- a/scripts/checks/run-clang-tidy.py +++ b/scripts/checks/run-clang-tidy.py @@ -15,75 +15,12 @@ import argparse import json -import regex +import re import sys +import os import util -CODE_CHECKS = """* - -abseil-* - -android-* - -cert-err58-cpp - -clang-analyzer-osx-* - -cppcoreguidelines-avoid-c-arrays - -cppcoreguidelines-avoid-magic-numbers - -cppcoreguidelines-pro-bounds-array-to-pointer-decay - -cppcoreguidelines-pro-bounds-pointer-arithmetic - -cppcoreguidelines-pro-type-reinterpret-cast - -cppcoreguidelines-pro-type-vararg - -fuchsia-* - -google-* - -hicpp-avoid-c-arrays - -hicpp-deprecated-headers - -hicpp-no-array-decay - -hicpp-use-equals-default - -hicpp-vararg - -llvmlibc-* - -llvm-header-guard - -llvm-include-order - -mpi-* - -misc-non-private-member-variables-in-classes - -misc-no-recursion - -misc-unused-parameters - -modernize-avoid-c-arrays - -modernize-deprecated-headers - -modernize-use-nodiscard - -modernize-use-trailing-return-type - -objc-* - -openmp-* - -readability-avoid-const-params-in-decls - -readability-convert-member-functions-to-static - -readability-magic-numbers - -zircon-* -""" - -# Additional opt-outs because googletest macros trip too many things. -# -TEST_CHECKS = ( - CODE_CHECKS - + """ - -cert-err58-cpp - -cppcoreguidelines-avoid-goto - -cppcoreguidelines-avoid-non-const-global-variables - -cppcoreguidelines-owning-memory - -cppcoreguidelines-pro-type-vararg - -cppcoreguidelines-special-member-functions - -hicpp-avoid-goto - -hicpp-special-member-functions - -hicpp-vararg - -misc-no-recursion - -readability-implicit-bool-conversion -""" -) - - -def check_list(check_string): - return ",".join([check.strip() for check in check_string.strip().splitlines()]) - - -CODE_CHECKS = check_list(CODE_CHECKS) -TEST_CHECKS = check_list(TEST_CHECKS) - class Multimap(dict): def __setitem__(self, key, value): @@ -101,15 +38,15 @@ def git_changed_lines(commit): line = line.rstrip("\n") fields = line.split() - match = regex.match(r"^\+\+\+ b/.*", line) + match = re.match(r"^\+\+\+ b/.*", line) if match: file = "" - match = regex.match(r"^\+\+\+ b/(.*(\.cpp|\.h))$", line) + match = re.match(r"^\+\+\+ b/(.*(\.cpp|\.h|\.hpp))$", line) if match: file = match.group(1) - match = regex.match(r"^@@", line) + match = re.match(r"^@@", line) if match and file != "": lspan = fields[2].split(",") if len(lspan) <= 1: @@ -117,60 +54,71 @@ def git_changed_lines(commit): changed_lines[file] = [int(lspan[0]), int(lspan[0]) + int(lspan[1])] - return json.dumps( - [{"name": key, "lines": value} for key, value in changed_lines.items()] - ) - - -def checks(args): - status, stdout, stderr = util.run( - f"clang-tidy -checks='{CODE_CHECKS}' --list-checks" - ) - print(stdout) + return changed_lines def check_output(output): - return regex.match(r"^/.* warning: ", output) + return re.match(r"^/.* warning: ", output) def tidy(args): files = util.input_files(args.files) + files = [file for file in files if re.match(r".*(\.cpp|\.h|\.hpp)$", file)] - groups = Multimap() + in_gha = os.environ.get("GITHUB_ACTIONS") is not None - for file in files: - groups["test" if "/tests/" in file else "main"] = file + changed_lines = git_changed_lines(args.commit) - fix = "--fix" if args.fix == "fix" else "" - lines = ( - ("'--line-filter=" + git_changed_lines(args.commit)) + "'" - if args.commit is not None - else "" + line_filter = json.dumps( + [{"name": key, "lines": value} for key, value in changed_lines.items()] ) + filtered_files = [*changed_lines.keys()] + if len(filtered_files) == 0: + return 0 + + fix = "--fix" if args.fix == "fix" else "" + lines = f"'--line-filter={line_filter}'" if args.commit is not None else "" ok = True - if groups.get("main", None): - status, stdout, stderr = util.run( - f"xargs clang-tidy -p=build/release/ --format-style=file -header-filter='.*' --checks='{CODE_CHECKS}' --quiet {fix} {lines}", - input=groups["main"], - ) - ok = check_output(stdout) and ok + build_path = args.p or os.getenv("BUILD_PATH") + build_path_str = f"-p {build_path}" if build_path else "" + + if build_path_str == "" and not os.path.isfile( + os.getcwd().join("compile_commands.json") + ): + print("compile_commands.json not found, skipping clang-tidy") + return 0 - if groups.get("test", None): - status, stdout, stderr = util.run( - f"xargs clang-tidy -p=build/release/ --format-style=file -header-filter='.*' --checks='{TEST_CHECKS}' --quiet {fix} {lines}", - input=groups["test"], + status, stdout, stderr = util.run( + f"xargs clang-tidy --format-style=file -header-filter='.*' --quiet {build_path_str} {fix} {lines}", + input=filtered_files, + ) + + if in_gha: + clang_tidy_pattern = ( + r"^(.*):(\d+):(\d+):\s+(error|warning):\s+(.*) \[([a-z0-9,\-]+)\]\s*$" ) - ok = check_output(stdout) and ok + + for stdout_line in stdout.split("\n"): + m = re.match(clang_tidy_pattern, stdout_line) + if m is not None: + file, line, col, severity, message, rule = m.groups() + file = file.removeprefix("/__w/velox/velox/") + print( + f"::{severity} file={file},line={line},col={col},title={rule}::{message}" + ) + + ok = check_output(stdout) return 0 if ok else 1 def parse_args(): global parser - parser = argparse.ArgumentParser(description="CircliCi Utility") + parser = argparse.ArgumentParser(description="Clang Tidy Utility") parser.add_argument("--commit") parser.add_argument("--fix") + parser.add_argument("-p", help="Path containing 'compile_commands.json'") parser.add_argument("files", metavar="FILES", nargs="+", help="files to process") diff --git a/scripts/checks/util.py b/scripts/checks/util.py index c0b34532a94..fe7f05c86dd 100644 --- a/scripts/checks/util.py +++ b/scripts/checks/util.py @@ -15,7 +15,7 @@ import gzip import json import os -import regex +import re import subprocess import sys @@ -27,7 +27,7 @@ class attrdict(dict): class string(str): def extract(self, rexp): - return regex.match(rexp, self).group(1) + return re.match(rexp, self).group(1) def json(self): return json.loads(self, object_hook=attrdict)