From 6426da45d1bfd79e998fe160f86dfcfebd2cb372 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 15 Sep 2022 16:31:06 -0400 Subject: [PATCH 01/24] ARROW-17751: [Go][Benchmarking] Add Go Benchmark Script --- ci/scripts/go_bench.sh | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 ci/scripts/go_bench.sh diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh new file mode 100644 index 00000000000..aaeb5928cea --- /dev/null +++ b/ci/scripts/go_bench.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this will output the benchmarks to STDOUT but if `-json` is passed +# as the second argument, it will create a file "bench_stats.json" +# in the directory this is called from containing a json representation + +set -ex + +# simplistic semver comparison +verlte() { + [ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ] +} +verlt() { + [ "$1" = "$2" ] && return 1 || verlte $1 $2 +} + +ver=`go env GOVERSION` + +source_dir=${1}/go + +export PARQUET_TEST_DATA=${1}/cpp/submodules/parquet-testing/data +pushd ${source_dir} + +go test -bench=. -benchmem -run=^$ ./... | tee bench_stat.dat + +if verlte "1.18" "${ver#go}"; then + go test -bench=. -benchmem -run=^$ ./arrow/compute | tee bench_stat_compute.dat +fi + +popd + +if [[ "$2" = "-json" ]]; then + go install go.bobheadxi.dev/gobenchdata@latest + cat ${source_dir}/bench_*.dat | gobenchdata --json bench_stats.json +fi + +rm ${source_dir}/bench_*.dat \ No newline at end of file From 86f4d6c12ae797cf7de233c45b186ec8b8b30a74 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 16 Sep 2022 16:13:01 -0400 Subject: [PATCH 02/24] add go-bench-adapt --- .github/workflows/go.yml | 32 +++++++++++++++++ ci/scripts/go_bench_adapt.py | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 ci/scripts/go_bench_adapt.py diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 5fccebbca15..c1c9a84272f 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -77,6 +77,10 @@ jobs: if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' continue-on-error: true run: archery docker push debian-go + - name: Run Benchmarks + run: | + pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python + python ci/scripts/go_bench_adapt.py docker_cgo: name: AMD64 Debian 11 GO ${{ matrix.go }} - CGO @@ -184,6 +188,15 @@ jobs: - name: Test shell: bash run: ci/scripts/go_test.sh $(pwd) + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Run Benchmarks + shell: bash + run: | + pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python + python ci/scripts/go_bench_adapt.py macos: name: AMD64 macOS 11 Go ${{ matrix.go }} @@ -219,6 +232,16 @@ jobs: - name: Test shell: bash run: ci/scripts/go_test.sh $(pwd) + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Run Benchmarks + shell: bash + run: | + pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python + python ci/scripts/go_bench_adapt.py + macos-cgo: name: AMD64 macOS 11 Go ${{ matrix.go }} - CGO @@ -316,3 +339,12 @@ jobs: - name: Test shell: bash run: ci/scripts/go_test.sh $(pwd) + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Run Benchmarks + shell: bash + run: | + pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python + python ci/scripts/go_bench_adapt.py diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py new file mode 100644 index 00000000000..a0ce615e314 --- /dev/null +++ b/ci/scripts/go_bench_adapt.py @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import uuid +from pathlib import Path +from typing import List + +from benchadapt import BenchmarkResult +from benchadapt.adapters import BenchmarkAdapter + +ARROW_ROOT = Path(__file__).parent.parent.parent.resolve() +SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts" + + +class GoAdapter(BenchmarkAdapter): + result_file = "bench_stats.json" + command = ["sh", SCRIPTS_PATH / "go_bench.sh", ARROW_ROOT, "-json"] + + def __init__(self) -> None: + super().__init__(command=self.command) + + def transform_results(self) -> List[BenchmarkResult]: + with open(self.result_file, "r") as f: + raw_results = json.load(f) + + parsed_results = [] + for suite in raw_results[0]["Suites"]: + batch_id = uuid.uuid4().hex + + for benchmark in suite["Benchmarks"]: + data = benchmark["Mem"]["MBPerSec"] * 1e6 + time = 1 / benchmark["NsPerOp"] * 1e9 + + parsed = BenchmarkResult( + run_name=benchmark["Name"], + batch_id=batch_id, + stats={ + "data": [data], + "units": "b/s", + "times": [time], + "times_unit": "i/s", + }, + context={"benchmark_language": "Go"}, + ) + + parsed_results.append(parsed) + + return parsed_results + + +if __name__ == "__main__": + go_adapter = GoAdapter() + go_adapter.run() \ No newline at end of file From 1e7c87d516f4e69fbfa571e6ddd5c99210de8518 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 16 Sep 2022 16:25:24 -0400 Subject: [PATCH 03/24] bump to python 3.10 for benchadapt --- .github/workflows/go.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index c1c9a84272f..f9772661999 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -68,7 +68,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.10 - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build @@ -191,7 +191,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.10 - name: Run Benchmarks shell: bash run: | @@ -235,7 +235,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.10 - name: Run Benchmarks shell: bash run: | @@ -342,7 +342,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.10 - name: Run Benchmarks shell: bash run: | From 8cd25235306d18db2c93dc566dff4462f5db950d Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 16 Sep 2022 16:27:41 -0400 Subject: [PATCH 04/24] fix python version specification --- .github/workflows/go.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index f9772661999..6181588144b 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -68,7 +68,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: '3.10' - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build @@ -191,7 +191,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: '3.10' - name: Run Benchmarks shell: bash run: | @@ -235,7 +235,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: '3.10' - name: Run Benchmarks shell: bash run: | @@ -342,7 +342,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: 3.10 + python-version: '3.10' - name: Run Benchmarks shell: bash run: | From 5986de0e90e8664c0640637b4218d44dcf38271d Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 16 Sep 2022 16:38:03 -0400 Subject: [PATCH 05/24] use bash because pushd --- ci/scripts/go_bench_adapt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index a0ce615e314..c3d32436660 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -29,7 +29,7 @@ class GoAdapter(BenchmarkAdapter): result_file = "bench_stats.json" - command = ["sh", SCRIPTS_PATH / "go_bench.sh", ARROW_ROOT, "-json"] + command = ["bash", SCRIPTS_PATH / "go_bench.sh", ARROW_ROOT, "-json"] def __init__(self) -> None: super().__init__(command=self.command) From 6a1789cd71436f606c6f997d54eb476ad142609c Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 16 Sep 2022 16:55:40 -0400 Subject: [PATCH 06/24] increase timeout to run benchmarks --- .github/workflows/go.yml | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 6181588144b..bf3770670f8 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -46,7 +46,7 @@ jobs: name: AMD64 Debian 11 Go ${{ matrix.go }} runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 15 + timeout-minutes: 30 strategy: fail-fast: false matrix: @@ -187,22 +187,13 @@ jobs: run: ci/scripts/go_build.sh $(pwd) - name: Test shell: bash - run: ci/scripts/go_test.sh $(pwd) - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - name: Run Benchmarks - shell: bash - run: | - pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python - python ci/scripts/go_bench_adapt.py + run: ci/scripts/go_test.sh $(pwd) macos: name: AMD64 macOS 11 Go ${{ matrix.go }} runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 15 + timeout-minutes: 30 strategy: fail-fast: false matrix: @@ -339,12 +330,4 @@ jobs: - name: Test shell: bash run: ci/scripts/go_test.sh $(pwd) - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - name: Run Benchmarks - shell: bash - run: | - pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python - python ci/scripts/go_bench_adapt.py + \ No newline at end of file From 41c41888291909c25a23ff1cd74e6fe3862570bf Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 16 Sep 2022 17:28:22 -0400 Subject: [PATCH 07/24] fix bench tests --- go/arrow/csv/reader_test.go | 40 ++++++++++++------------ go/parquet/pqarrow/reader_writer_test.go | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go index 9b735ba6ddc..1bbc697a3b2 100644 --- a/go/arrow/csv/reader_test.go +++ b/go/arrow/csv/reader_test.go @@ -257,19 +257,19 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool) { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, - arrow.Field{Name: "i8", Type: arrow.PrimitiveTypes.Int8}, - arrow.Field{Name: "i16", Type: arrow.PrimitiveTypes.Int16}, - arrow.Field{Name: "i32", Type: arrow.PrimitiveTypes.Int32}, - arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: "u8", Type: arrow.PrimitiveTypes.Uint8}, - arrow.Field{Name: "u16", Type: arrow.PrimitiveTypes.Uint16}, - arrow.Field{Name: "u32", Type: arrow.PrimitiveTypes.Uint32}, - arrow.Field{Name: "u64", Type: arrow.PrimitiveTypes.Uint64}, - arrow.Field{Name: "f32", Type: arrow.PrimitiveTypes.Float32}, - arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "str", Type: arrow.BinaryTypes.String}, - arrow.Field{Name: "ts", Type: arrow.FixedWidthTypes.Timestamp_ms}, + {Name: "bool", Type: arrow.FixedWidthTypes.Boolean}, + {Name: "i8", Type: arrow.PrimitiveTypes.Int8}, + {Name: "i16", Type: arrow.PrimitiveTypes.Int16}, + {Name: "i32", Type: arrow.PrimitiveTypes.Int32}, + {Name: "i64", Type: arrow.PrimitiveTypes.Int64}, + {Name: "u8", Type: arrow.PrimitiveTypes.Uint8}, + {Name: "u16", Type: arrow.PrimitiveTypes.Uint16}, + {Name: "u32", Type: arrow.PrimitiveTypes.Uint32}, + {Name: "u64", Type: arrow.PrimitiveTypes.Uint64}, + {Name: "f32", Type: arrow.PrimitiveTypes.Float32}, + {Name: "f64", Type: arrow.PrimitiveTypes.Float64}, + {Name: "str", Type: arrow.BinaryTypes.String}, + {Name: "ts", Type: arrow.FixedWidthTypes.Timestamp_ms}, }, nil, ) @@ -379,9 +379,9 @@ func TestCSVReaderWithChunk(t *testing.T) { schema := arrow.NewSchema( []arrow.Field{ - arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "str", Type: arrow.BinaryTypes.String}, + {Name: "i64", Type: arrow.PrimitiveTypes.Int64}, + {Name: "f64", Type: arrow.PrimitiveTypes.Float64}, + {Name: "str", Type: arrow.BinaryTypes.String}, }, nil, ) @@ -632,7 +632,7 @@ func BenchmarkRead(b *testing.B) { return buf.Bytes() } - for _, rows := range []int{10, 1e2, 1e3, 1e4, 1e5} { + for _, rows := range []int{10, 1e2, 1e3, 1e4} { for _, cols := range []int{1, 10, 100, 1000} { raw := gen(rows, cols) for _, chunks := range []int{-1, 0, 10, 100, 1000} { @@ -651,9 +651,9 @@ func benchRead(b *testing.B, raw []byte, rows, cols, chunks int) { var fields []arrow.Field for i := 0; i < cols; i++ { fields = append(fields, []arrow.Field{ - arrow.Field{Name: fmt.Sprintf("i64-%d", i), Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: fmt.Sprintf("f64-%d", i), Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: fmt.Sprintf("str-%d", i), Type: arrow.BinaryTypes.String}, + {Name: fmt.Sprintf("i64-%d", i), Type: arrow.PrimitiveTypes.Int64}, + {Name: fmt.Sprintf("f64-%d", i), Type: arrow.PrimitiveTypes.Float64}, + {Name: fmt.Sprintf("str-%d", i), Type: arrow.BinaryTypes.String}, }...) } diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go index 91dd6b6b7ec..3821f591d2e 100644 --- a/go/parquet/pqarrow/reader_writer_test.go +++ b/go/parquet/pqarrow/reader_writer_test.go @@ -177,7 +177,7 @@ func benchReadTable(b *testing.B, name string, tbl arrow.Table, nbytes int64) { b.SetBytes(nbytes) for i := 0; i < b.N; i++ { - pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()), nil, nil) + pf, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) if err != nil { b.Error(err) } From b2aff8199e2882e4cfb928c7715202ddd828dd2f Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Mon, 19 Sep 2022 17:27:54 -0400 Subject: [PATCH 08/24] add GOPATH to path for gobenchdata --- ci/scripts/go_bench.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh index aaeb5928cea..5347b42524e 100644 --- a/ci/scripts/go_bench.sh +++ b/ci/scripts/go_bench.sh @@ -48,6 +48,7 @@ popd if [[ "$2" = "-json" ]]; then go install go.bobheadxi.dev/gobenchdata@latest + export PATH=`go env GOPATH`/bin:$PATH cat ${source_dir}/bench_*.dat | gobenchdata --json bench_stats.json fi From b98196b61da9700fdee526505bec56f32a216045 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 21 Sep 2022 16:00:18 -0400 Subject: [PATCH 09/24] test with local secrets for conbench --- .github/workflows/go.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index bf3770670f8..b8d5e867503 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -78,6 +78,10 @@ jobs: continue-on-error: true run: archery docker push debian-go - name: Run Benchmarks + env: + CONBENCH_URL: ${{ secrets.CONBENCH_URL }} + CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} + CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} run: | pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python ci/scripts/go_bench_adapt.py @@ -229,6 +233,10 @@ jobs: python-version: '3.10' - name: Run Benchmarks shell: bash + env: + CONBENCH_URL: ${{ secrets.CONBENCH_URL }} + CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} + CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} run: | pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python ci/scripts/go_bench_adapt.py From b3f56deddb0d8d37cce8f7bd9a2c1786b171624e Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 21 Sep 2022 16:13:49 -0400 Subject: [PATCH 10/24] should be _transform_results --- ci/scripts/go_bench_adapt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index c3d32436660..49683d304dc 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -34,7 +34,7 @@ class GoAdapter(BenchmarkAdapter): def __init__(self) -> None: super().__init__(command=self.command) - def transform_results(self) -> List[BenchmarkResult]: + def _transform_results(self) -> List[BenchmarkResult]: with open(self.result_file, "r") as f: raw_results = json.load(f) From e54199964ee70942929a8919aeb0c749148e70b7 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 22 Sep 2022 15:00:03 -0400 Subject: [PATCH 11/24] set logging for benchadapt to debug --- ci/scripts/go_bench_adapt.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index 49683d304dc..300bbd4f56e 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -17,11 +17,15 @@ import json import uuid +import logging from pathlib import Path from typing import List from benchadapt import BenchmarkResult from benchadapt.adapters import BenchmarkAdapter +from benchadapt.log import log + +log.setLevel(logging.DEBUG) ARROW_ROOT = Path(__file__).parent.parent.parent.resolve() SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts" From 65e02db7cbf79ef43dd4433eb0e4f11a20a71a02 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 22 Sep 2022 15:10:48 -0400 Subject: [PATCH 12/24] oops, gotta call it correctly! --- ci/scripts/go_bench_adapt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index 300bbd4f56e..a5808183df1 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -69,4 +69,4 @@ def _transform_results(self) -> List[BenchmarkResult]: if __name__ == "__main__": go_adapter = GoAdapter() - go_adapter.run() \ No newline at end of file + go_adapter() \ No newline at end of file From 465988270e7861b373c3a2d2da425015ae624deb Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 22 Sep 2022 17:28:02 -0400 Subject: [PATCH 13/24] override field --- ci/scripts/go_bench_adapt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index a5808183df1..80931e76322 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -68,5 +68,5 @@ def _transform_results(self) -> List[BenchmarkResult]: if __name__ == "__main__": - go_adapter = GoAdapter() + go_adapter = GoAdapter(result_fields_override={"info":{}}) go_adapter() \ No newline at end of file From b51a58969603dff0aa9a7ea7c9ecf2f44db0cb5d Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 22 Sep 2022 17:33:19 -0400 Subject: [PATCH 14/24] add a run_reason --- ci/scripts/go_bench_adapt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index 80931e76322..0d4a650f1cc 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -68,5 +68,5 @@ def _transform_results(self) -> List[BenchmarkResult]: if __name__ == "__main__": - go_adapter = GoAdapter(result_fields_override={"info":{}}) + go_adapter = GoAdapter(result_fields_override={"run_reason": "commit", "info":{}}) go_adapter() \ No newline at end of file From d1b37b2830ffa0f3d1034d910352f249c9541917 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 22 Sep 2022 17:43:30 -0400 Subject: [PATCH 15/24] pass args to super --- ci/scripts/go_bench_adapt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index 0d4a650f1cc..ca75deea560 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -35,8 +35,8 @@ class GoAdapter(BenchmarkAdapter): result_file = "bench_stats.json" command = ["bash", SCRIPTS_PATH / "go_bench.sh", ARROW_ROOT, "-json"] - def __init__(self) -> None: - super().__init__(command=self.command) + def __init__(self, *args, **kwargs) -> None: + super().__init__(command=self.command, *args, **kwargs) def _transform_results(self) -> List[BenchmarkResult]: with open(self.result_file, "r") as f: From cc2a3bf446e3aa8b44ed071d3777c3cfa56d9f15 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 23 Sep 2022 11:29:21 -0400 Subject: [PATCH 16/24] add tags --- ci/scripts/go_bench_adapt.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index ca75deea560..5c81d3d2c03 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -45,13 +45,18 @@ def _transform_results(self) -> List[BenchmarkResult]: parsed_results = [] for suite in raw_results[0]["Suites"]: batch_id = uuid.uuid4().hex + pkg = suite["Pkg"] for benchmark in suite["Benchmarks"]: data = benchmark["Mem"]["MBPerSec"] * 1e6 time = 1 / benchmark["NsPerOp"] * 1e9 + name = benchmark["Name"].removeprefix('Benchmark') + ncpu = name[name.rfind('-')+1:] + pieces = name[:-(len(ncpu)+1)].split('/') + parsed = BenchmarkResult( - run_name=benchmark["Name"], + run_name=name, batch_id=batch_id, stats={ "data": [data], @@ -59,7 +64,17 @@ def _transform_results(self) -> List[BenchmarkResult]: "times": [time], "times_unit": "i/s", }, - context={"benchmark_language": "Go"}, + context={ + "benchmark_language": "Go", + "goos": suite["Goos"], + "goarch": suite["Goarch"], + }, + tags={ + "pkg": pkg, + "num_cpu": ncpu, + "name": pieces[0], + "params": '/'.join(pieces[1:]), + }, ) parsed_results.append(parsed) From 0c9b43673f3fa7762569e032a100f8b2c70995ab Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 23 Sep 2022 12:01:59 -0400 Subject: [PATCH 17/24] updating fields --- ci/scripts/go_bench_adapt.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index 5c81d3d2c03..47db7ebfbc0 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -60,9 +60,10 @@ def _transform_results(self) -> List[BenchmarkResult]: batch_id=batch_id, stats={ "data": [data], - "units": "b/s", - "times": [time], - "times_unit": "i/s", + "unit": "b/s", + "time": [time], + "time_unit": "i/s", + "iterations": benchmark["Runs"], }, context={ "benchmark_language": "Go", From d7806c8ac2511980cbff0d54e80f720ce33a373e Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 23 Sep 2022 14:13:16 -0400 Subject: [PATCH 18/24] "times" field --- ci/scripts/go_bench_adapt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index 47db7ebfbc0..9bb1dcfa742 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -61,7 +61,7 @@ def _transform_results(self) -> List[BenchmarkResult]: stats={ "data": [data], "unit": "b/s", - "time": [time], + "times": [time], "time_unit": "i/s", "iterations": benchmark["Runs"], }, From 023a99415da74707f15a82cc4f48cb8d260bd7d6 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 23 Sep 2022 15:08:26 -0400 Subject: [PATCH 19/24] fix run id, hopefully --- ci/scripts/go_bench_adapt.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index 9bb1dcfa742..aed1e3f608d 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -42,6 +42,7 @@ def _transform_results(self) -> List[BenchmarkResult]: with open(self.result_file, "r") as f: raw_results = json.load(f) + run_id = uuid.uuid4().hex parsed_results = [] for suite in raw_results[0]["Suites"]: batch_id = uuid.uuid4().hex @@ -55,8 +56,8 @@ def _transform_results(self) -> List[BenchmarkResult]: ncpu = name[name.rfind('-')+1:] pieces = name[:-(len(ncpu)+1)].split('/') - parsed = BenchmarkResult( - run_name=name, + parsed = BenchmarkResult( + run_id=run_id, batch_id=batch_id, stats={ "data": [data], @@ -76,13 +77,14 @@ def _transform_results(self) -> List[BenchmarkResult]: "name": pieces[0], "params": '/'.join(pieces[1:]), }, + run_reason='commit', ) - + parsed.run_name = f"{parsed.run_reason}: {parsed.github['commit']}" parsed_results.append(parsed) return parsed_results if __name__ == "__main__": - go_adapter = GoAdapter(result_fields_override={"run_reason": "commit", "info":{}}) + go_adapter = GoAdapter(result_fields_override={"info":{}}) go_adapter() \ No newline at end of file From 9f7cedc28b57b17dc4948ecdd2ea670bac8c5833 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 18 Oct 2022 13:04:12 -0400 Subject: [PATCH 20/24] update conbench url --- .github/workflows/go.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index b8d5e867503..b1b61562bc4 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -79,7 +79,7 @@ jobs: run: archery docker push debian-go - name: Run Benchmarks env: - CONBENCH_URL: ${{ secrets.CONBENCH_URL }} + CONBENCH_URL: https://conbench.ursa.dev CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} run: | @@ -234,7 +234,7 @@ jobs: - name: Run Benchmarks shell: bash env: - CONBENCH_URL: ${{ secrets.CONBENCH_URL }} + CONBENCH_URL: 'https://conbench.ursa.dev' CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} run: | From d23f79e227d90a201e227c0a6ea397b17d460270 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 19 Oct 2022 11:28:04 -0400 Subject: [PATCH 21/24] limit benchmarks to push events for arrow repo where the secrets are allowed. --- .github/workflows/go.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index b1b61562bc4..020df74c724 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -78,6 +78,7 @@ jobs: continue-on-error: true run: archery docker push debian-go - name: Run Benchmarks + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: CONBENCH_URL: https://conbench.ursa.dev CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} @@ -228,10 +229,12 @@ jobs: shell: bash run: ci/scripts/go_test.sh $(pwd) - name: Setup Python + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' uses: actions/setup-python@v4 with: python-version: '3.10' - name: Run Benchmarks + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' shell: bash env: CONBENCH_URL: 'https://conbench.ursa.dev' From d23e513d8852c5ca3508be00182288f75ea47d8a Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 19 Oct 2022 14:25:19 -0400 Subject: [PATCH 22/24] add ref name to run_reason --- .github/workflows/go.yml | 1 + ci/scripts/go_bench_adapt.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 020df74c724..3eeb2c5b831 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -240,6 +240,7 @@ jobs: CONBENCH_URL: 'https://conbench.ursa.dev' CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} + CONBENCH_REF_NAME: ${{ github.ref_name }} run: | pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python ci/scripts/go_bench_adapt.py diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index aed1e3f608d..df4da24602b 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -16,6 +16,7 @@ # under the License. import json +import os import uuid import logging from pathlib import Path @@ -29,7 +30,7 @@ ARROW_ROOT = Path(__file__).parent.parent.parent.resolve() SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts" - +REF_NAME = os.environ.get("CONBENCH_REF_NAME") class GoAdapter(BenchmarkAdapter): result_file = "bench_stats.json" @@ -77,7 +78,7 @@ def _transform_results(self) -> List[BenchmarkResult]: "name": pieces[0], "params": '/'.join(pieces[1:]), }, - run_reason='commit', + run_reason=f'commit to {REF_NAME}', ) parsed.run_name = f"{parsed.run_reason}: {parsed.github['commit']}" parsed_results.append(parsed) From 72bcf9d564d7f0b00406e304721bdfe441adaa40 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 19 Oct 2022 14:44:44 -0400 Subject: [PATCH 23/24] leave it hardcoded to "commit" --- .github/workflows/go.yml | 3 +-- ci/scripts/go_bench_adapt.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 3eeb2c5b831..676121f00cd 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -239,8 +239,7 @@ jobs: env: CONBENCH_URL: 'https://conbench.ursa.dev' CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} - CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} - CONBENCH_REF_NAME: ${{ github.ref_name }} + CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} run: | pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python ci/scripts/go_bench_adapt.py diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index df4da24602b..f20785f4b48 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -30,7 +30,6 @@ ARROW_ROOT = Path(__file__).parent.parent.parent.resolve() SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts" -REF_NAME = os.environ.get("CONBENCH_REF_NAME") class GoAdapter(BenchmarkAdapter): result_file = "bench_stats.json" @@ -78,7 +77,7 @@ def _transform_results(self) -> List[BenchmarkResult]: "name": pieces[0], "params": '/'.join(pieces[1:]), }, - run_reason=f'commit to {REF_NAME}', + run_reason=f'commit', ) parsed.run_name = f"{parsed.run_reason}: {parsed.github['commit']}" parsed_results.append(parsed) From 2ffda34e174458b34b120411326240be8754bbc8 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 21 Oct 2022 13:56:33 -0400 Subject: [PATCH 24/24] check branch name for reason --- .github/workflows/go.yml | 15 ++++++++------- ci/scripts/go_bench_adapt.py | 9 +++++---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 676121f00cd..abf2eb517eb 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -83,6 +83,7 @@ jobs: CONBENCH_URL: https://conbench.ursa.dev CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} + CONBENCH_REF: ${{ github.ref_name }} run: | pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python ci/scripts/go_bench_adapt.py @@ -173,7 +174,7 @@ jobs: staticcheck: v0.2.2 - go: 1.18 staticcheck: latest - steps: + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: @@ -184,7 +185,7 @@ jobs: with: go-version: ${{ matrix.go }} cache: true - cache-dependency-path: go/go.sum + cache-dependency-path: go/go.sum - name: Install staticcheck run: go install honnef.co/go/tools/cmd/staticcheck@${{ matrix.staticcheck }} - name: Build @@ -192,7 +193,7 @@ jobs: run: ci/scripts/go_build.sh $(pwd) - name: Test shell: bash - run: ci/scripts/go_test.sh $(pwd) + run: ci/scripts/go_test.sh $(pwd) macos: name: AMD64 macOS 11 Go ${{ matrix.go }} @@ -208,7 +209,7 @@ jobs: staticcheck: v0.2.2 - go: 1.18 staticcheck: latest - steps: + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: @@ -239,7 +240,8 @@ jobs: env: CONBENCH_URL: 'https://conbench.ursa.dev' CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} - CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} + CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} + CONBENCH_REF: ${{ github.ref_name }} run: | pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python ci/scripts/go_bench_adapt.py @@ -261,7 +263,7 @@ jobs: staticcheck: latest env: ARROW_GO_TESTCGO: "1" - steps: + steps: - name: Checkout Arrow uses: actions/checkout@v3 with: @@ -341,4 +343,3 @@ jobs: - name: Test shell: bash run: ci/scripts/go_test.sh $(pwd) - \ No newline at end of file diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py index f20785f4b48..db1c09cbc59 100644 --- a/ci/scripts/go_bench_adapt.py +++ b/ci/scripts/go_bench_adapt.py @@ -30,6 +30,7 @@ ARROW_ROOT = Path(__file__).parent.parent.parent.resolve() SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts" +RUN_REASON = "commit" if os.environ.get("CONBENCH_REF") == "master" else "branch" class GoAdapter(BenchmarkAdapter): result_file = "bench_stats.json" @@ -42,7 +43,7 @@ def _transform_results(self) -> List[BenchmarkResult]: with open(self.result_file, "r") as f: raw_results = json.load(f) - run_id = uuid.uuid4().hex + run_id = uuid.uuid4().hex parsed_results = [] for suite in raw_results[0]["Suites"]: batch_id = uuid.uuid4().hex @@ -54,9 +55,9 @@ def _transform_results(self) -> List[BenchmarkResult]: name = benchmark["Name"].removeprefix('Benchmark') ncpu = name[name.rfind('-')+1:] - pieces = name[:-(len(ncpu)+1)].split('/') + pieces = name[:-(len(ncpu)+1)].split('/') - parsed = BenchmarkResult( + parsed = BenchmarkResult( run_id=run_id, batch_id=batch_id, stats={ @@ -77,7 +78,7 @@ def _transform_results(self) -> List[BenchmarkResult]: "name": pieces[0], "params": '/'.join(pieces[1:]), }, - run_reason=f'commit', + run_reason=RUN_REASON, ) parsed.run_name = f"{parsed.run_reason}: {parsed.github['commit']}" parsed_results.append(parsed)