From 99f1ea61a09bfdaafec7cf19b8cd2e8f5096612b Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Wed, 27 Mar 2024 14:19:09 +0100 Subject: [PATCH 1/2] Run TPC-H SF10 during PR benchmarks --- .github/workflows/pr_benchmarks.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr_benchmarks.yml b/.github/workflows/pr_benchmarks.yml index b7b85c9fcf14b..f1c944a8f450f 100644 --- a/.github/workflows/pr_benchmarks.yml +++ b/.github/workflows/pr_benchmarks.yml @@ -28,8 +28,9 @@ jobs: cd benchmarks mkdir data - # Setup the TPC-H data set with a scale factor of 10 + # Setup the TPC-H data sets for scale factors 1 and 10 ./bench.sh data tpch + ./bench.sh data tpch10 - name: Generate unique result names run: | @@ -44,6 +45,7 @@ jobs: cd benchmarks ./bench.sh run tpch + ./bench.sh run tpch10 # For some reason this step doesn't seem to propagate the env var down into the script if [ -d "results/HEAD" ]; then @@ -64,6 +66,7 @@ jobs: cd benchmarks ./bench.sh run tpch + ./bench.sh run tpch10 echo ${{ github.event.issue.number }} > pr From bc5da258add983861d89dc78db825884702cbb04 Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Wed, 27 Mar 2024 21:40:51 +0100 Subject: [PATCH 2/2] Add memory benchmarks to the workflow Also distinguish the output file by the SF used. --- .github/workflows/pr_benchmarks.yml | 6 +++++- benchmarks/bench.sh | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr_benchmarks.yml b/.github/workflows/pr_benchmarks.yml index f1c944a8f450f..29d001783b17c 100644 --- a/.github/workflows/pr_benchmarks.yml +++ b/.github/workflows/pr_benchmarks.yml @@ -31,7 +31,7 @@ jobs: # Setup the TPC-H data sets for scale factors 1 and 10 ./bench.sh data tpch ./bench.sh data tpch10 - + - name: Generate unique result names run: | echo "HEAD_LONG_SHA=$(git log -1 --format='%H')" >> "$GITHUB_ENV" @@ -45,7 +45,9 @@ jobs: cd benchmarks ./bench.sh run tpch + ./bench.sh run tpch_mem ./bench.sh run tpch10 + ./bench.sh run tpch_mem10 # For some reason this step doesn't seem to propagate the env var down into the script if [ -d "results/HEAD" ]; then @@ -66,7 +68,9 @@ jobs: cd benchmarks ./bench.sh run tpch + ./bench.sh run tpch_mem ./bench.sh run tpch10 + ./bench.sh run tpch_mem10 echo ${{ github.event.issue.number }} > pr diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh index 039f4790acb0d..a72400892752e 100755 --- a/benchmarks/bench.sh +++ b/benchmarks/bench.sh @@ -314,7 +314,7 @@ run_tpch() { fi TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}" - RESULTS_FILE="${RESULTS_DIR}/tpch.json" + RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json" echo "RESULTS_FILE: ${RESULTS_FILE}" echo "Running tpch benchmark..." $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --format parquet -o ${RESULTS_FILE} @@ -329,7 +329,7 @@ run_tpch_mem() { fi TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}" - RESULTS_FILE="${RESULTS_DIR}/tpch_mem.json" + RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json" echo "RESULTS_FILE: ${RESULTS_FILE}" echo "Running tpch_mem benchmark..." # -m means in memory