Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .github/workflows/pr_benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ jobs:
cd benchmarks
mkdir data

# Setup the TPC-H data set with a scale factor of 10
# Setup the TPC-H data sets for scale factors 1 and 10
./bench.sh data tpch
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to do SF=1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point; I guess it can be beneficial to detect minor systemic/non-linear regressions that are larger than the noise level, but smaller then the sensitivity of SF 10?


./bench.sh data tpch10

- name: Generate unique result names
run: |
echo "HEAD_LONG_SHA=$(git log -1 --format='%H')" >> "$GITHUB_ENV"
Expand All @@ -44,6 +45,9 @@ jobs:
cd benchmarks

./bench.sh run tpch
./bench.sh run tpch_mem
./bench.sh run tpch10
./bench.sh run tpch_mem10

# For some reason this step doesn't seem to propagate the env var down into the script
if [ -d "results/HEAD" ]; then
Expand All @@ -64,6 +68,9 @@ jobs:
cd benchmarks

./bench.sh run tpch
./bench.sh run tpch_mem
./bench.sh run tpch10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could run tpch10_mem as well if it doesn't run OOM, or tpch_mem otherwise which should have less variance.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I can add tpch10_mem as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I've added both tpch_mem10 and tpch_mem, so that we can observe and compare the noise level for each one.

./bench.sh run tpch_mem10

echo ${{ github.event.issue.number }} > pr

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ run_tpch() {
fi
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"

RESULTS_FILE="${RESULTS_DIR}/tpch.json"
RESULTS_FILE="${RESULTS_DIR}/tpch_sf${SCALE_FACTOR}.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running tpch benchmark..."
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --format parquet -o ${RESULTS_FILE}
Expand All @@ -329,7 +329,7 @@ run_tpch_mem() {
fi
TPCH_DIR="${DATA_DIR}/tpch_sf${SCALE_FACTOR}"

RESULTS_FILE="${RESULTS_DIR}/tpch_mem.json"
RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
echo "RESULTS_FILE: ${RESULTS_FILE}"
echo "Running tpch_mem benchmark..."
# -m means in memory
Expand Down