apache · zeroshade · Oct 21, 2022 · Sep 15, 2022 · Sep 16, 2022 · Sep 16, 2022
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
@@ -46,7 +46,7 @@ jobs:
     name: AMD64 Debian 11 Go ${{ matrix.go }}
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 15
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
@@ -68,7 +68,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: '3.10'
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -77,6 +77,16 @@ jobs:
         if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
         continue-on-error: true
         run: archery docker push debian-go
+      - name: Run Benchmarks
+        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
+        env:
+          CONBENCH_URL: https://conbench.ursa.dev
+          CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
+          CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
+          CONBENCH_REF: ${{ github.ref_name }}
+        run: |
+          pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
+          python ci/scripts/go_bench_adapt.py
 
   docker_cgo:
     name: AMD64 Debian 11 GO ${{ matrix.go }} - CGO
@@ -164,7 +174,7 @@ jobs:
             staticcheck: v0.2.2
           - go: 1.18
             staticcheck: latest
-    steps:      
+    steps:
       - name: Checkout Arrow
         uses: actions/checkout@v3
         with:
@@ -175,7 +185,7 @@ jobs:
         with:
           go-version: ${{ matrix.go }}
           cache: true
-          cache-dependency-path: go/go.sum          
+          cache-dependency-path: go/go.sum
       - name: Install staticcheck
         run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }}
       - name: Build
@@ -189,7 +199,7 @@ jobs:
     name: AMD64 macOS 11 Go ${{ matrix.go }}
     runs-on: macos-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 15
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
@@ -199,7 +209,7 @@ jobs:
             staticcheck: v0.2.2
           - go: 1.18
             staticcheck: latest
-    steps:      
+    steps:
       - name: Checkout Arrow
         uses: actions/checkout@v3
         with:
@@ -219,6 +229,23 @@ jobs:
       - name: Test
         shell: bash
         run: ci/scripts/go_test.sh $(pwd)
+      - name: Setup Python
+        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Run Benchmarks
+        if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
+        shell: bash
+        env:
+          CONBENCH_URL: 'https://conbench.ursa.dev'
+          CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
+          CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
+          CONBENCH_REF: ${{ github.ref_name }}
+        run: |
+          pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
+          python ci/scripts/go_bench_adapt.py
+
 
   macos-cgo:
     name: AMD64 macOS 11 Go ${{ matrix.go }} - CGO
@@ -236,7 +263,7 @@ jobs:
             staticcheck: latest
     env:
       ARROW_GO_TESTCGO: "1"
-    steps:      
+    steps:
       - name: Checkout Arrow
         uses: actions/checkout@v3
         with:

diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# this will output the benchmarks to STDOUT but if `-json` is passed
+# as the second argument, it will create a file "bench_stats.json"
+# in the directory this is called from containing a json representation
+
+set -ex
+
+# simplistic semver comparison
+verlte() {
+    [ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ]
+}
+verlt() {
+    [ "$1" = "$2" ] && return 1 || verlte $1 $2
+}
+
+ver=`go env GOVERSION`
+
+source_dir=${1}/go
+
+export PARQUET_TEST_DATA=${1}/cpp/submodules/parquet-testing/data
+pushd ${source_dir}
+
+go test -bench=. -benchmem -run=^$ ./... | tee bench_stat.dat
+
+if verlte "1.18" "${ver#go}"; then
+    go test -bench=. -benchmem -run=^$ ./arrow/compute | tee bench_stat_compute.dat
+fi
+
+popd
+
+if [[ "$2" = "-json" ]]; then
+    go install go.bobheadxi.dev/gobenchdata@latest
+    export PATH=`go env GOPATH`/bin:$PATH
+    cat ${source_dir}/bench_*.dat | gobenchdata --json bench_stats.json
+fi    
+
+rm ${source_dir}/bench_*.dat
diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import os
+import uuid
+import logging
+from pathlib import Path
+from typing import List
+
+from benchadapt import BenchmarkResult
+from benchadapt.adapters import BenchmarkAdapter
+from benchadapt.log import log
+
+log.setLevel(logging.DEBUG)
+
+ARROW_ROOT = Path(__file__).parent.parent.parent.resolve()
+SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts"
+RUN_REASON = "commit" if os.environ.get("CONBENCH_REF") == "master" else "branch"
+
+class GoAdapter(BenchmarkAdapter):
+    result_file = "bench_stats.json"
+    command = ["bash", SCRIPTS_PATH / "go_bench.sh", ARROW_ROOT, "-json"]
+
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(command=self.command, *args, **kwargs)
+
+    def _transform_results(self) -> List[BenchmarkResult]:
+        with open(self.result_file, "r") as f:
+            raw_results = json.load(f)
+
+        run_id = uuid.uuid4().hex
+        parsed_results = []
+        for suite in raw_results[0]["Suites"]:
+            batch_id = uuid.uuid4().hex
+            pkg = suite["Pkg"]
+
+            for benchmark in suite["Benchmarks"]:
+                data = benchmark["Mem"]["MBPerSec"] * 1e6
+                time = 1 / benchmark["NsPerOp"] * 1e9
+
+                name = benchmark["Name"].removeprefix('Benchmark')
+                ncpu = name[name.rfind('-')+1:]
+                pieces = name[:-(len(ncpu)+1)].split('/')
+
+                parsed = BenchmarkResult(
+                    run_id=run_id,
+                    batch_id=batch_id,
+                    stats={
+                        "data": [data],
+                        "unit": "b/s",
+                        "times": [time],
+                        "time_unit": "i/s",
+                        "iterations": benchmark["Runs"],
+                    },
+                    context={
+                        "benchmark_language": "Go",
+                        "goos": suite["Goos"],
+                        "goarch": suite["Goarch"],
+                    },
+                    tags={
+                        "pkg": pkg,
+                        "num_cpu": ncpu,
+                        "name": pieces[0],
+                        "params": '/'.join(pieces[1:]),
+                    },
+                    run_reason=RUN_REASON,
+                )
+                parsed.run_name = f"{parsed.run_reason}: {parsed.github['commit']}"
+                parsed_results.append(parsed)
+
+        return parsed_results
+
+
+if __name__ == "__main__":
+    go_adapter = GoAdapter(result_fields_override={"info":{}})
+    go_adapter()
diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go
@@ -257,19 +257,19 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool) {
 
 	schema := arrow.NewSchema(
 		[]arrow.Field{
-			arrow.Field{Name: "bool", Type: arrow.FixedWidthTypes.Boolean},
-			arrow.Field{Name: "i8", Type: arrow.PrimitiveTypes.Int8},
-			arrow.Field{Name: "i16", Type: arrow.PrimitiveTypes.Int16},
-			arrow.Field{Name: "i32", Type: arrow.PrimitiveTypes.Int32},
-			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
-			arrow.Field{Name: "u8", Type: arrow.PrimitiveTypes.Uint8},
-			arrow.Field{Name: "u16", Type: arrow.PrimitiveTypes.Uint16},
-			arrow.Field{Name: "u32", Type: arrow.PrimitiveTypes.Uint32},
-			arrow.Field{Name: "u64", Type: arrow.PrimitiveTypes.Uint64},
-			arrow.Field{Name: "f32", Type: arrow.PrimitiveTypes.Float32},
-			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
-			arrow.Field{Name: "str", Type: arrow.BinaryTypes.String},
-			arrow.Field{Name: "ts", Type: arrow.FixedWidthTypes.Timestamp_ms},
+			{Name: "bool", Type: arrow.FixedWidthTypes.Boolean},
+			{Name: "i8", Type: arrow.PrimitiveTypes.Int8},
+			{Name: "i16", Type: arrow.PrimitiveTypes.Int16},
+			{Name: "i32", Type: arrow.PrimitiveTypes.Int32},
+			{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "u8", Type: arrow.PrimitiveTypes.Uint8},
+			{Name: "u16", Type: arrow.PrimitiveTypes.Uint16},
+			{Name: "u32", Type: arrow.PrimitiveTypes.Uint32},
+			{Name: "u64", Type: arrow.PrimitiveTypes.Uint64},
+			{Name: "f32", Type: arrow.PrimitiveTypes.Float32},
+			{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
+			{Name: "str", Type: arrow.BinaryTypes.String},
+			{Name: "ts", Type: arrow.FixedWidthTypes.Timestamp_ms},
 		},
 		nil,
 	)
@@ -379,9 +379,9 @@ func TestCSVReaderWithChunk(t *testing.T) {
 
 	schema := arrow.NewSchema(
 		[]arrow.Field{
-			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
-			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
-			arrow.Field{Name: "str", Type: arrow.BinaryTypes.String},
+			{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
+			{Name: "str", Type: arrow.BinaryTypes.String},
 		},
 		nil,
 	)
@@ -632,7 +632,7 @@ func BenchmarkRead(b *testing.B) {
 		return buf.Bytes()
 	}
 
-	for _, rows := range []int{10, 1e2, 1e3, 1e4, 1e5} {
+	for _, rows := range []int{10, 1e2, 1e3, 1e4} {
 		for _, cols := range []int{1, 10, 100, 1000} {
 			raw := gen(rows, cols)
 			for _, chunks := range []int{-1, 0, 10, 100, 1000} {
@@ -651,9 +651,9 @@ func benchRead(b *testing.B, raw []byte, rows, cols, chunks int) {
 	var fields []arrow.Field
 	for i := 0; i < cols; i++ {
 		fields = append(fields, []arrow.Field{
-			arrow.Field{Name: fmt.Sprintf("i64-%d", i), Type: arrow.PrimitiveTypes.Int64},
-			arrow.Field{Name: fmt.Sprintf("f64-%d", i), Type: arrow.PrimitiveTypes.Float64},
-			arrow.Field{Name: fmt.Sprintf("str-%d", i), Type: arrow.BinaryTypes.String},
+			{Name: fmt.Sprintf("i64-%d", i), Type: arrow.PrimitiveTypes.Int64},
+			{Name: fmt.Sprintf("f64-%d", i), Type: arrow.PrimitiveTypes.Float64},
+			{Name: fmt.Sprintf("str-%d", i), Type: arrow.BinaryTypes.String},
 		}...)
 	}
 

diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go
@@ -177,7 +177,7 @@ func benchReadTable(b *testing.B, name string, tbl arrow.Table, nbytes int64) {
 		b.SetBytes(nbytes)
 
 		for i := 0; i < b.N; i++ {
-			pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()), nil, nil)
+			pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
 			if err != nil {
 				b.Error(err)
 			}