Skip to content

Commit

Permalink
Merge 831c6ea into e5404a1
Browse files Browse the repository at this point in the history
  • Loading branch information
gruuya committed Mar 7, 2024
2 parents e5404a1 + 831c6ea commit 3bf178a
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 5 deletions.
123 changes: 123 additions & 0 deletions .github/workflows/pr_benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
name: Benchmarks

on:
pull_request:

jobs:
benchmark:
name: Run Benchmarks
runs-on: ubuntu-latest
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJSON(github) }}
run: echo "$GITHUB_CONTEXT"

- name: Checkout PR changes
uses: actions/checkout@v4

- name: Setup data and generate unique result names
run: |
cd benchmarks
mkdir data
# Setup the TPC-H data set with a scale factor of 10
./bench.sh data tpch
# Generate a unique-ish identifier for the results using
# branch name and commit sha
short_ref=$(echo "${{ github.head_ref }}" | cut -c1-20)
short_sha=$(echo "${{ github.sha }}" | cut -c1-7)
echo "HEAD_REF_SHA=$short_ref-$short_sha" >> "$GITHUB_ENV"
short_sha=$(echo "${{ github.event.pull_request.base.sha }}" | cut -c1-7)
echo "BASE_REF_SHA=${{ github.base_ref }}-$short_sha" >> "$GITHUB_ENV"
- name: Benchmark PR changes
env:
RESULTS_NAME: ${{ env.HEAD_REF_SHA }}
run: |
cd benchmarks
./bench.sh run tpch
- name: Checkout base commit
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.base.sha }}
clean: false

- name: Benchmark baseline and generate comparison message
env:
RESULTS_NAME: ${{ env.BASE_REF_SHA }}
run: |
cd benchmarks
./bench.sh run tpch
# Temporary workaround, until `RESULTS_NAME` var lands into main
mv -f results/HEAD results/${{ env.BASE_REF_SHA }}
echo ${{ github.event.pull_request.number }} > pr
pip3 install rich
cat > message.md <<EOF
# Benchmark results
<details>
<summary>Benchmarks comparing ${{ github.event.pull_request.base.sha }} and ${{ github.sha }}</summary>
\`\`\`
$(./bench.sh compare ${{ env.BASE_REF_SHA }} ${{ env.HEAD_REF_SHA }})
\`\`\`
</details>
EOF
cat message.md
- name: Upload benchmark comparison message
uses: actions/upload-artifact@v4
with:
name: message
path: benchmarks/message.md

- name: Upload PR number
uses: actions/upload-artifact@v4
with:
name: pr
path: benchmarks/pr

comment:
name: Post benchmarks comment
runs-on: ubuntu-latest
needs: [ benchmark ]
steps:
- name: Download comment message
uses: actions/download-artifact@v4
with:
name: message

- name: Download pr number
uses: actions/download-artifact@v4
with:
name: pr

- name: Print message and pr number
run: |
cat pr
echo "PR_NUMBER=$(cat pr)" >> "$GITHUB_ENV"
cat message.md
- name: Post a comment
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const content = fs.readFileSync('message.md', 'utf8');
github.rest.issues.createComment({
issue_number: process.env.PR_NUMBER,
owner: context.repo.owner,
repo: context.repo.repo,
body: content,
})
53 changes: 53 additions & 0 deletions .github/workflows/pr_comment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: PR Comment

on:
workflow_run:
workflows: ["Benchmarks"]
types:
- completed

jobs:
comment:
name: PR Comment
runs-on: ubuntu-latest
if: >
github.event.workflow_run.event == 'pull_request' &&
github.event.workflow_run.conclusion == 'success'
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJSON(github) }}
run: echo "$GITHUB_CONTEXT"

- name: Download comment message
uses: actions/download-artifact@v4
with:
name: message
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Download pr number
uses: actions/download-artifact@v4
with:
name: pr
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Print message and pr number
run: |
cat pr
echo "PR_NUMBER=$(cat pr)" >> "$GITHUB_ENV"
cat message.md
- name: Post the comment
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const content = fs.readFileSync('message.md', 'utf8');
github.rest.issues.createComment({
issue_number: process.env.PR_NUMBER,
owner: context.repo.owner,
repo: context.repo.repo,
body: content,
})
12 changes: 7 additions & 5 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ clickbench_extended: ClickBench "inspired" queries against a single parquet (
DATA_DIR directory to store datasets
CARGO_COMMAND command that runs the benchmark binary
DATAFUSION_DIR directory to use (default $DATAFUSION_DIR)
RESULTS_NAME folder where the benchmark files are stored
"
exit 1
}
Expand Down Expand Up @@ -166,18 +167,19 @@ main() {
esac
;;
run)
# Parse positional paraleters
# Parse positional parameters
BENCHMARK=${ARG2:-"${BENCHMARK}"}
BRANCH_NAME=$(cd ${DATAFUSION_DIR} && git rev-parse --abbrev-ref HEAD)
BRANCH_NAME=${BRANCH_NAME//\//_} # mind blowing syntax to replace / with _
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$BRANCH_NAME"}
RESULTS_NAME=${RESULTS_NAME:-"${BRANCH_NAME}"}
RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$RESULTS_NAME"}

echo "***************************"
echo "DataFusion Benchmark Script"
echo "COMMAND: ${COMMAND}"
echo "BENCHMARK: ${BENCHMARK}"
echo "DATAFUSION_DIR: ${DATAFUSION_DIR}"
echo "BRACH_NAME: ${BRANCH_NAME}"
echo "BRANCH_NAME: ${BRANCH_NAME}"
echo "DATA_DIR: ${DATA_DIR}"
echo "RESULTS_DIR: ${RESULTS_DIR}"
echo "CARGO_COMMAND: ${CARGO_COMMAND}"
Expand Down Expand Up @@ -278,7 +280,7 @@ data_tpch() {
echo " tbl files exist ($FILE exists)."
else
echo " creating tbl files with tpch_dbgen..."
docker run -v "${TPCH_DIR}":/data -it --rm ghcr.io/scalytics/tpch-docker:main -vf -s ${SCALE_FACTOR}
docker run -v "${TPCH_DIR}":/data --rm ghcr.io/scalytics/tpch-docker:main -vf -s ${SCALE_FACTOR}
fi

# Copy expected answers into the ./data/answers directory if it does not already exist
Expand All @@ -288,7 +290,7 @@ data_tpch() {
else
echo " Copying answers to ${TPCH_DIR}/answers"
mkdir -p "${TPCH_DIR}/answers"
docker run -v "${TPCH_DIR}":/data -it --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp -f /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
docker run -v "${TPCH_DIR}":/data --entrypoint /bin/bash --rm ghcr.io/scalytics/tpch-docker:main -c "cp -f /opt/tpch/2.18.0_rc2/dbgen/answers/* /data/answers/"
fi

# Create 'parquet' files from tbl
Expand Down
3 changes: 3 additions & 0 deletions datafusion/physical-plan/src/sorts/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,9 @@ impl ExecutionPlan for SortExec {
let batch = batch?;
sorter.insert_batch(batch).await?;
}
// Test whether benchmarks catch this
// TODO: remove before merge!
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
sorter.sort()
})
.try_flatten(),
Expand Down

0 comments on commit 3bf178a

Please sign in to comment.