diff --git a/.github/actions/restore-mtime/action.yaml b/.github/actions/restore-mtime/action.yaml new file mode 100644 index 0000000000..6e82492289 --- /dev/null +++ b/.github/actions/restore-mtime/action.yaml @@ -0,0 +1,55 @@ +name: Restore file mtimes from git history +description: > + Restore each file's mtime to its last-commit timestamp so that cargo + fingerprints remain valid when the target/ directory is cached across + CI runs. Requires fetch-depth: 0 on the checkout step. +runs: + using: composite + steps: + - name: Restore file mtimes for cargo fingerprinting + shell: bash + run: | + # actions/checkout sets all file mtimes to the checkout time, which + # invalidates cargo fingerprints and forces a full rebuild even when + # the cached target/ dir is restored. + # + # This pipeline walks the full git history to restore each file's + # mtime to its last-commit timestamp so cargo fingerprints match. + # + # git log outputs commits oldest-first (--reverse), each as: + # (--pretty=%ct: committer date in epoch seconds) + # : M\tfile (--raw: one line per file touched in that commit) + # + # --no-merges: skips merge commits, whose combined diff omits cleanly + # merged files — regular commits on each branch reliably list all + # files they touched. + # --no-renames: prevents git from collapsing renames into a single + # "R old\tnew" line that would break the awk tab parsing. + # + # awk processes line by line: + # /^[0-9]+$/ — timestamp line: save in variable t + # /^:[0-9]/ — raw diff line: extract filename (everything after + # the first tab) and map it to t in associative array c. + # Later commits overwrite earlier ones, so each file + # ends up with its most recent commit timestamp. + # END — print all "timestamp\tfilename" pairs. + # + # while loop reads each pair and touches the file with that timestamp. + # IFS=$'\t' — split on tab into ts and file + # -r — don't interpret backslashes in filenames as escapes + # [ -f ] — skip files that no longer exist on disk (deletions) + # + # touch -d "@epoch" is GNU-specific (Linux). macOS/BSD touch doesn't + # support -d, so we fall back to touch -t with a formatted timestamp. + # Detection uses a temp file because /dev/null is root-owned and + # utimensat() requires ownership to change timestamps. + _tf=$(mktemp) + if touch -d @0 "$_tf" 2>/dev/null; then + touch_epoch() { touch -d "@$1" "$2"; } + else + touch_epoch() { touch -t "$(date -r "$1" +%Y%m%d%H%M.%S)" "$2"; } + fi + rm -f "$_tf" + git log --raw --no-renames --no-merges --pretty=%ct --reverse \ + | awk '/^[0-9]+$/{t=$0;next} /^:[0-9]/{f=substr($0,index($0,"\t")+1); c[f]=t} END{for(f in c) printf "%s\t%s\n",c[f],f}' \ + | while IFS=$'\t' read -r ts file; do [ -f "$file" ] && touch_epoch "$ts" "$file"; done diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index 50fbc9070b..d68dcd110e 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -41,13 +41,18 @@ jobs: sudo rm -rf /opt/hostedtoolcache/Ruby || true sudo rm -rf /opt/hostedtoolcache/node || true - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime - uses: moonrepo/setup-rust@v1 with: cache: false - uses: Swatinem/rust-cache@v2 with: - key: ${{ runner.os }}-build + shared-key: ${{ runner.os }}-dev-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Setup Python and uv uses: astral-sh/setup-uv@v7 with: diff --git a/.github/workflows/daft-profiling.yml b/.github/workflows/daft-profiling.yml index 620d3fdd4d..3c4519cb52 100644 --- a/.github/workflows/daft-profiling.yml +++ b/.github/workflows/daft-profiling.yml @@ -22,13 +22,18 @@ jobs: timeout-minutes: 45 steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime - uses: moonrepo/setup-rust@v1 with: cache: false - uses: Swatinem/rust-cache@v2 with: - key: ${{ runner.os }}-profile + shared-key: ${{ runner.os }}-integration-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Setup Python and uv uses: astral-sh/setup-uv@v7 diff --git a/.github/workflows/pr-test-suite.yml b/.github/workflows/pr-test-suite.yml index fb3bc35237..fa8bd592e8 100644 --- a/.github/workflows/pr-test-suite.yml +++ b/.github/workflows/pr-test-suite.yml @@ -121,13 +121,18 @@ jobs: sudo rm -rf /opt/hostedtoolcache/node || true - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime - uses: moonrepo/setup-rust@v1 with: cache: false - uses: Swatinem/rust-cache@v2 with: - prefix-key: ${{ runner.os }}-build + shared-key: ${{ runner.os }}-dev-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Install cargo-llvm-cov if: matrix.coverage != false uses: taiki-e/install-action@cargo-llvm-cov @@ -284,41 +289,7 @@ jobs: with: submodules: true fetch-depth: 0 - - name: Restore file mtimes for cargo fingerprinting - run: | - # actions/checkout sets all file mtimes to the checkout time, which - # invalidates cargo fingerprints and forces a full rebuild even when - # the cached target/ dir is restored. - # - # This pipeline walks the full git history to restore each file's - # mtime to its last-commit timestamp so cargo fingerprints match. - # - # git log outputs commits oldest-first (--reverse), each as: - # (--pretty=%ct: committer date in epoch seconds) - # : M\tfile (--raw: one line per file touched in that commit) - # - # --no-merges: skips merge commits, whose combined diff omits cleanly - # merged files — regular commits on each branch reliably list all - # files they touched. - # --no-renames: prevents git from collapsing renames into a single - # "R old\tnew" line that would break the awk tab parsing. - # - # awk processes line by line: - # /^[0-9]+$/ — timestamp line: save in variable t - # /^:[0-9]/ — raw diff line: extract filename (everything after - # the first tab) and map it to t in associative array c. - # Later commits overwrite earlier ones, so each file - # ends up with its most recent commit timestamp. - # END — print all "timestamp\tfilename" pairs. - # - # while loop reads each pair and touches the file with that timestamp. - # IFS=$'\t' — split on tab into ts and file - # -r — don't interpret backslashes in filenames as escapes - # [ -f ] — skip files that no longer exist on disk (deletions) - # @$ts — the @ tells touch to interpret the value as epoch seconds - git log --raw --no-renames --no-merges --pretty=%ct --reverse \ - | awk '/^[0-9]+$/{t=$0;next} /^:[0-9]/{f=substr($0,index($0,"\t")+1); c[f]=t} END{for(f in c) printf "%s\t%s\n",c[f],f}' \ - | while IFS=$'\t' read -r ts file; do [ -f "$file" ] && touch -d "@$ts" "$file"; done + - uses: ./.github/actions/restore-mtime - name: Setup Python and uv uses: astral-sh/setup-uv@v7 with: @@ -1255,15 +1226,19 @@ jobs: RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2 steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime - uses: moonrepo/setup-rust@v1 with: cache: false - uses: Swatinem/rust-cache@v2 with: - prefix-key: dev-bench - key: ${{ runner.os }}-benchmark-build + shared-key: ${{ runner.os }}-dev-bench-build cache-all-crates: "true" + cache-workspace-crates: "true" cache-provider: buildjet + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Setup Python and uv uses: astral-sh/setup-uv@v7 with: @@ -1339,6 +1314,10 @@ jobs: on-main: false steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime + if: ${{ matrix.os != 'Windows' }} - uses: moonrepo/setup-rust@v1 with: cache: false @@ -1348,8 +1327,10 @@ jobs: # skip on windows because it misses the cache and is very slow (~15 minutes) if: ${{ matrix.os != 'Windows' }} with: - key: ${{ runner.os }}-rust-build + shared-key: ${{ runner.os }}-dev-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Free Disk Space (Ubuntu) if: ${{ matrix.os == 'ubuntu' }} uses: jlumbroso/free-disk-space@main @@ -1447,13 +1428,18 @@ jobs: sudo rm -rf /opt/hostedtoolcache/Ruby || true sudo rm -rf /opt/hostedtoolcache/node || true - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime - uses: moonrepo/setup-rust@v1 with: cache: false - uses: Swatinem/rust-cache@v2 with: - prefix-key: ${{ runner.os }}-build + shared-key: ${{ runner.os }}-dev-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Setup Python and uv uses: astral-sh/setup-uv@v7 with: @@ -1538,13 +1524,19 @@ jobs: on-main: false steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime + if: ${{ runner.os != 'Windows' }} - uses: moonrepo/setup-rust@v1 with: cache: false - uses: Swatinem/rust-cache@v2 with: - prefix-key: ${{ runner.os }}-build + shared-key: ${{ runner.os }}-dev-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Setup Python and uv uses: astral-sh/setup-uv@v7 @@ -1632,6 +1624,9 @@ jobs: python-version: "3.10" steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime - name: Setup Python and uv uses: astral-sh/setup-uv@v7 with: @@ -1647,8 +1642,10 @@ jobs: cache: false - uses: Swatinem/rust-cache@v2 with: - prefix-key: ${{ runner.os }}-build + shared-key: ${{ runner.os }}-dev-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - uses: actions/cache@v5 id: pre-commit-cache diff --git a/.github/workflows/property-based-tests.yml b/.github/workflows/property-based-tests.yml index 42b6754c26..9341404466 100644 --- a/.github/workflows/property-based-tests.yml +++ b/.github/workflows/property-based-tests.yml @@ -22,13 +22,18 @@ jobs: daft_runner: [ray, native] steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: ./.github/actions/restore-mtime - uses: moonrepo/setup-rust@v1 with: cache: false - uses: Swatinem/rust-cache@v2 with: - key: ${{ runner.os }}-build + shared-key: ${{ runner.os }}-integration-build cache-all-crates: "true" + cache-workspace-crates: "true" + save-if: ${{ github.ref == 'refs/heads/main' }} - name: Setup Python ${{ matrix.python-version }} and UV uses: astral-sh/setup-uv@v7