diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 5fccebbca15..abf2eb517eb 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -46,7 +46,7 @@ jobs: name: AMD64 Debian 11 Go ${{ matrix.go }} runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 15 + timeout-minutes: 30 strategy: fail-fast: false matrix: @@ -68,7 +68,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: '3.10' - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build @@ -77,6 +77,16 @@ jobs: if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' continue-on-error: true run: archery docker push debian-go + - name: Run Benchmarks + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' + env: + CONBENCH_URL: https://conbench.ursa.dev + CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} + CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} + CONBENCH_REF: ${{ github.ref_name }} + run: | + pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python + python ci/scripts/go_bench_adapt.py docker_cgo: name: AMD64 Debian 11 GO ${{ matrix.go }} - CGO @@ -164,7 +174,7 @@ jobs: staticcheck: v0.2.2 - go: 1.18 staticcheck: latest - steps: + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: @@ -175,7 +185,7 @@ jobs: with: go-version: ${{ matrix.go }} cache: true - cache-dependency-path: go/go.sum + cache-dependency-path: go/go.sum - name: Install staticcheck run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} - name: Build @@ -189,7 +199,7 @@ jobs: name: AMD64 macOS 11 Go ${{ matrix.go }} runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 15 + timeout-minutes: 30 strategy: fail-fast: false matrix: @@ -199,7 +209,7 @@ jobs: staticcheck: v0.2.2 - go: 1.18 staticcheck: latest - steps: + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: @@ -219,6 +229,23 @@ jobs: - name: Test shell: bash run: ci/scripts/go_test.sh $(pwd) + - name: Setup Python + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Run Benchmarks + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' + shell: bash + env: + CONBENCH_URL: 'https://conbench.ursa.dev' + CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} + CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} + CONBENCH_REF: ${{ github.ref_name }} + run: | + pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python + python ci/scripts/go_bench_adapt.py + macos-cgo: name: AMD64 macOS 11 Go ${{ matrix.go }} - CGO @@ -236,7 +263,7 @@ jobs: staticcheck: latest env: ARROW_GO_TESTCGO: "1" - steps: + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh new file mode 100644 index 00000000000..5347b42524e --- /dev/null +++ b/ci/scripts/go_bench.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this will output the benchmarks to STDOUT but if `-json` is passed +# as the second argument, it will create a file "bench_stats.json" +# in the directory this is called from containing a json representation + +set -ex + +# simplistic semver comparison +verlte() { + [ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ] +} +verlt() { + [ "$1" = "$2" ] && return 1 || verlte $1 $2 +} + +ver=`go env GOVERSION` + +source_dir=${1}/go + +export PARQUET_TEST_DATA=${1}/cpp/submodules/parquet-testing/data +pushd ${source_dir} + +go test -bench=. -benchmem -run=^$ ./... | tee bench_stat.dat + +if verlte "1.18" "${ver#go}"; then + go test -bench=. -benchmem -run=^$ ./arrow/compute | tee bench_stat_compute.dat +fi + +popd + +if [[ "$2" = "-json" ]]; then + go install go.bobheadxi.dev/gobenchdata@latest + export PATH=`go env GOPATH`/bin:$PATH + cat ${source_dir}/bench_*.dat | gobenchdata --json bench_stats.json +fi + +rm ${source_dir}/bench_*.dat \ No newline at end of file diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py new file mode 100644 index 00000000000..db1c09cbc59 --- /dev/null +++ b/ci/scripts/go_bench_adapt.py @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import os +import uuid +import logging +from pathlib import Path +from typing import List + +from benchadapt import BenchmarkResult +from benchadapt.adapters import BenchmarkAdapter +from benchadapt.log import log + +log.setLevel(logging.DEBUG) + +ARROW_ROOT = Path(__file__).parent.parent.parent.resolve() +SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts" +RUN_REASON = "commit" if os.environ.get("CONBENCH_REF") == "master" else "branch" + +class GoAdapter(BenchmarkAdapter): + result_file = "bench_stats.json" + command = ["bash", SCRIPTS_PATH / "go_bench.sh", ARROW_ROOT, "-json"] + + def __init__(self, *args, **kwargs) -> None: + super().__init__(command=self.command, *args, **kwargs) + + def _transform_results(self) -> List[BenchmarkResult]: + with open(self.result_file, "r") as f: + raw_results = json.load(f) + + run_id = uuid.uuid4().hex + parsed_results = [] + for suite in raw_results[0]["Suites"]: + batch_id = uuid.uuid4().hex + pkg = suite["Pkg"] + + for benchmark in suite["Benchmarks"]: + data = benchmark["Mem"]["MBPerSec"] * 1e6 + time = 1 / benchmark["NsPerOp"] * 1e9 + + name = benchmark["Name"].removeprefix('Benchmark') + ncpu = name[name.rfind('-')+1:] + pieces = name[:-(len(ncpu)+1)].split('/') + + parsed = BenchmarkResult( + run_id=run_id, + batch_id=batch_id, + stats={ + "data": [data], + "unit": "b/s", + "times": [time], + "time_unit": "i/s", + "iterations": benchmark["Runs"], + }, + context={ + "benchmark_language": "Go", + "goos": suite["Goos"], + "goarch": suite["Goarch"], + }, + tags={ + "pkg": pkg, + "num_cpu": ncpu, + "name": pieces[0], + "params": '/'.join(pieces[1:]), + }, + run_reason=RUN_REASON, + ) + parsed.run_name = f"{parsed.run_reason}: {parsed.github['commit']}" + parsed_results.append(parsed) + + return parsed_results + + +if __name__ == "__main__": + go_adapter = GoAdapter(result_fields_override={"info":{}}) + go_adapter() \ No newline at end of file diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go index 9b735ba6ddc..1bbc697a3b2 100644 --- a/go/arrow/csv/reader_test.go +++ b/go/arrow/csv/reader_test.go @@ -257,19 +257,19 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool) { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, - arrow.Field{Name: "i8", Type: arrow.PrimitiveTypes.Int8}, - arrow.Field{Name: "i16", Type: arrow.PrimitiveTypes.Int16}, - arrow.Field{Name: "i32", Type: arrow.PrimitiveTypes.Int32}, - arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: "u8", Type: arrow.PrimitiveTypes.Uint8}, - arrow.Field{Name: "u16", Type: arrow.PrimitiveTypes.Uint16}, - arrow.Field{Name: "u32", Type: arrow.PrimitiveTypes.Uint32}, - arrow.Field{Name: "u64", Type: arrow.PrimitiveTypes.Uint64}, - arrow.Field{Name: "f32", Type: arrow.PrimitiveTypes.Float32}, - arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "str", Type: arrow.BinaryTypes.String}, - arrow.Field{Name: "ts", Type: arrow.FixedWidthTypes.Timestamp_ms}, + {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, + {Name: "i8", Type: arrow.PrimitiveTypes.Int8}, + {Name: "i16", Type: arrow.PrimitiveTypes.Int16}, + {Name: "i32", Type: arrow.PrimitiveTypes.Int32}, + {Name: "i64", Type: arrow.PrimitiveTypes.Int64}, + {Name: "u8", Type: arrow.PrimitiveTypes.Uint8}, + {Name: "u16", Type: arrow.PrimitiveTypes.Uint16}, + {Name: "u32", Type: arrow.PrimitiveTypes.Uint32}, + {Name: "u64", Type: arrow.PrimitiveTypes.Uint64}, + {Name: "f32", Type: arrow.PrimitiveTypes.Float32}, + {Name: "f64", Type: arrow.PrimitiveTypes.Float64}, + {Name: "str", Type: arrow.BinaryTypes.String}, + {Name: "ts", Type: arrow.FixedWidthTypes.Timestamp_ms}, }, nil, ) @@ -379,9 +379,9 @@ func TestCSVReaderWithChunk(t *testing.T) { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "str", Type: arrow.BinaryTypes.String}, + {Name: "i64", Type: arrow.PrimitiveTypes.Int64}, + {Name: "f64", Type: arrow.PrimitiveTypes.Float64}, + {Name: "str", Type: arrow.BinaryTypes.String}, }, nil, ) @@ -632,7 +632,7 @@ func BenchmarkRead(b *testing.B) { return buf.Bytes() } - for _, rows := range []int{10, 1e2, 1e3, 1e4, 1e5} { + for _, rows := range []int{10, 1e2, 1e3, 1e4} { for _, cols := range []int{1, 10, 100, 1000} { raw := gen(rows, cols) for _, chunks := range []int{-1, 0, 10, 100, 1000} { @@ -651,9 +651,9 @@ func benchRead(b *testing.B, raw []byte, rows, cols, chunks int) { var fields []arrow.Field for i := 0; i < cols; i++ { fields = append(fields, []arrow.Field{ - arrow.Field{Name: fmt.Sprintf("i64-%d", i), Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: fmt.Sprintf("f64-%d", i), Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: fmt.Sprintf("str-%d", i), Type: arrow.BinaryTypes.String}, + {Name: fmt.Sprintf("i64-%d", i), Type: arrow.PrimitiveTypes.Int64}, + {Name: fmt.Sprintf("f64-%d", i), Type: arrow.PrimitiveTypes.Float64}, + {Name: fmt.Sprintf("str-%d", i), Type: arrow.BinaryTypes.String}, }...) } diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go index 91dd6b6b7ec..3821f591d2e 100644 --- a/go/parquet/pqarrow/reader_writer_test.go +++ b/go/parquet/pqarrow/reader_writer_test.go @@ -177,7 +177,7 @@ func benchReadTable(b *testing.B, name string, tbl arrow.Table, nbytes int64) { b.SetBytes(nbytes) for i := 0; i < b.N; i++ { - pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()), nil, nil) + pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) if err != nil { b.Error(err) }