Skip to content

Commit

Permalink
ci(check-fixtures): parallelise fixture check-fixtures
Browse files Browse the repository at this point in the history
Introduces several fixture-checking commands (`--check-jsonlint`,
`--check-record-ids`, `--check-record-dois`, `--check-docs-slugs`,
`--check-secondary-typeS`, `--check-trailing-whitespace`) in order to
make the lengthy `--check-fixtures` process paralellisable.

Amends CI accordingly, and renames formatters and linters for clarity.

Amends `.editorconfig` to add rules for shell scripts and to remove ReST
file rules as we have switched to Markdown.

BREAKING CHANGE: Drops `run-tests.sh --check-fixtures` command.
  • Loading branch information
tiborsimko committed Jan 10, 2025
1 parent 952b104 commit 3c61e45
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 100 deletions.
6 changes: 3 additions & 3 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Open Data Portal.
# Copyright (C) 2016 CERN.
# Copyright (C) 2016, 2025 CERN.
#
# CERN Open Data Portal is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -40,8 +40,8 @@ known_first_party = cernopendata
multi_line_output = 2
default_section = THIRDPARTY

# RST files (used by sphinx)
[*.rst]
# Shell script files
[*.sh]
indent_size = 4

# CSS, HTML, JS, JSON, YML
Expand Down
127 changes: 92 additions & 35 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file is part of CERN Open Data Portal.
# Copyright (C) 2020, 2023, 2024 CERN.
# Copyright (C) 2020, 2023, 2024, 2025 CERN.
#
# CERN Open Data Portal is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand All @@ -20,36 +20,56 @@
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

name: CI
name: ci

on: [push, pull_request]

jobs:
lint-shellcheck:
format-black:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2
- uses: actions/checkout@v2

- name: Runs shell script static analysis
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.9

- name: Check Python code formatting
run: |
sudo apt-get install shellcheck
./run-tests.sh --check-shellscript
pip install black
./run-tests.sh --check-black
lint-black:
format-isort:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- name: Checkout
uses: actions/checkout@v2

- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.9

- name: Check Python code formatting
- name: Check isort
run: |
pip install black
./run-tests.sh --check-black
pip install --upgrade pip
pip install isort
./run-tests.sh --check-isort
lint-jsonlint:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Node
uses: actions/setup-node@v4

- name: Lint JSON files
run: |
npm install jsonlint --global
./run-tests.sh --check-jsonlint
lint-pycodestyle:
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -85,39 +105,28 @@ jobs:
pip install pydocstyle
./run-tests.sh --check-pydocstyle
check-fixtures:
lint-shellcheck:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Setup node
uses: actions/setup-node@v1
with:
node-version: "14"

- name: Install Node dependencies
run: npm install -g jsonlint

- name: Check fixtures
run: ./run-tests.sh --check-fixtures
- name: Runs shell script static analysis
run: |
sudo apt-get install shellcheck
./run-tests.sh --check-shellscript
check-isort:
runs-on: ubuntu-20.04
check-docs-slugs:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Setup node
uses: actions/setup-node@v4

- name: Check isort
run: |
pip install --upgrade pip
pip install isort
./run-tests.sh --check-isort
- name: Check docs slugs
run: ./run-tests.sh --check-docs-slugs

check-licenses:
runs-on: ubuntu-20.04
Expand All @@ -134,3 +143,51 @@ jobs:
run: |
pip install --upgrade pip
./run-tests.sh --check-licenses
check-record-dois:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Setup node
uses: actions/setup-node@v4

- name: Check record DOIs
run: ./run-tests.sh --check-record-dois

check-record-ids:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Setup node
uses: actions/setup-node@v4

- name: Check record IDs
run: ./run-tests.sh --check-record-ids

check-secondary-types:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Setup node
uses: actions/setup-node@v4

- name: Check docs slugs
run: ./run-tests.sh --check-secondary-types

check-trailing-whitespace:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Setup node
uses: actions/setup-node@v4

- name: Check docs slugs
run: ./run-tests.sh --check-trailing-whitespace
135 changes: 73 additions & 62 deletions run-tests.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
#
# This file is part of CERN Open Data Portal.
# Copyright (C) 2015, 2016, 2017, 2018, 2019, 2020, 2024 CERN.
# Copyright (C) 2015, 2016, 2017, 2018, 2019, 2020, 2024, 2025 CERN.
#
# CERN Open Data Portal is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand All @@ -22,62 +22,62 @@
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

# quit on errors and potentially unbound symbols:
set -o errexit
set -o nounset

check_script () {
shellcheck run-tests.sh
}

check_black () {
check_black() {
black --check .
}


check_fixtures () {
# check for possibly incorrect JSON files:
find data/ -name "*.json" -exec jsonlint -q {} \;

# check record ID uniqueness:
dupes=$(jq '.[].recid' data/{records,skeletons}/*.json | sort | uniq -d)
check_docs_slugs() {
# shellcheck disable=SC2044
dupes=$(for file in $(find data/docs -name "*.json"); do jq '.[].slug' "$file"; done | sort | grep -v null | uniq -d)
if [ "x${dupes}" != "x" ]; then
echo "[ERROR] Found duplicate record IDs:"
echo "[ERROR] Found duplicate docs slugs:"
echo "${dupes}"
exit 1
fi
}

check_isort() {
isort -rc -c -df --profile black -- **/*.py
}

check_jsonlint() {
find . -name "*.json" -exec jsonlint -q {} \+
}

# check DOI uniqueness:
check_licenses() {
scripts/check_licenses.py
}

check_pycodestyle() {
pycodestyle --max-line-length=120 scripts
}

check_pydocstyle() {
pydocstyle scripts
}

check_record_dois() {
dupes=$(jq '.[].doi' data/{records,skeletons}/*.json | sort | grep -v null | uniq -d)
if [ "x${dupes}" != "x" ]; then
echo "[ERROR] Found duplicate record DOIs:"
echo "${dupes}"
exit 1
fi
}

# check docs slug uniqueness:
# shellcheck disable=SC2044
dupes=$(for file in $(find data/docs -name "*.json"); do jq '.[].slug' "$file"; done | sort | grep -v null | uniq -d)
check_record_ids() {
dupes=$(jq '.[].recid' data/{records,skeletons}/*.json | sort | uniq -d)
if [ "x${dupes}" != "x" ]; then
echo "[ERROR] Found duplicate docs slugs:"
echo "[ERROR] Found duplicate record IDs:"
echo "${dupes}"
exit 1
fi
}

# check trailing whitespace:
whitespace_found_p=0
for file in $(git ls-files | grep -E '.(py|html|css|json|md|sh|txt|yml)$'); do
if grep -q ' $' "$file"; then
whitespace_found_p=1
echo "[ERROR] Found trailing whitespace in ${file}."
fi
done

if [ "${whitespace_found_p}" != "0" ]; then
exit 1
fi

# check for empty secondary type in fixtures
check_secondary_types() {
# shellcheck disable=SC2044
for file in $(find data/{records,docs}/ -name "*.json"); do
secondaries=$(jq '.[].type.secondary' "$file" -c | sort | uniq)
Expand All @@ -87,46 +87,57 @@ check_fixtures () {
done
}

check_pycodestyle () {
pycodestyle --max-line-length=120 scripts
}

check_pydocstyle () {
pydocstyle scripts
check_shellcheck() {
shellcheck run-tests.sh
}

check_isort () {
isort -rc -c -df --profile black -- **/*.py
}
check_trailing_whitespace() {
whitespace_found_p=0
for file in $(git ls-files | grep -E '.(py|html|css|json|md|sh|txt|yml)$'); do
if grep -q ' $' "$file"; then
whitespace_found_p=1
echo "[ERROR] Found trailing whitespace in ${file}."
fi
done

check_licenses () {
scripts/check_licenses.py
if [ "${whitespace_found_p}" != "0" ]; then
exit 1
fi
}

check_all () {
check_script
check_fixtures
check_pycodestyle
check_all() {
check_black
check_pydocstyle
check_docs_slugs
check_isort
check_jsonlint
check_licenses
check_pycodestyle
check_pydocstyle
check_record_dois
check_record_ids
check_secondary_types
check_shellcheck
check_trailing_whitespace
}

if [ $# -eq 0 ]; then
check_all
exit 0
fi

for arg in "$@"
do
case $arg in
--check-shellscript) check_script;;
--check-fixtures) check_fixtures;;
--check-pycodestyle) check_pycodestyle;;
--check-pydocstyle) check_pydocstyle;;
--check-isort) check_isort;;
--check-licenses) check_licenses;;
*)
esac
done
arg="$1"
case $arg in
--check-black) check_black ;;
--check-docs-slugs) check_docs_slugs ;;
--check-isort) check_isort ;;
--check-jsonlint) check_jsonlint ;;
--check-licenses) check_licenses ;;
--check-pycodestyle) check_pycodestyle ;;
--check-pydocstyle) check_pydocstyle ;;
--check-record-dois) check_record_dois ;;
--check-record-ids) check_record_ids ;;
--check-secondary-types) check_secondary_types ;;
--check-shellcheck) check_shellcheck ;;
--check-trailing-whitespace) check_trailing_whitespace ;;
*) echo "[ERROR] Invalid argument '$arg'. Exiting." && exit 1 ;;
esac

0 comments on commit 3c61e45

Please sign in to comment.