From 15e20aee92990e25c5aa4d18a1754baa45326d03 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 28 Aug 2025 05:41:13 +0800 Subject: [PATCH 1/4] add a ci job for typo checking Signed-off-by: Ruihang Xia --- .github/workflows/rust.yml | 8 +++++++ typos.toml | 46 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 typos.toml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index d03bbe43f745..5e4d0f06dfe0 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -781,3 +781,11 @@ jobs: - name: Check datafusion-proto working-directory: datafusion/proto run: cargo msrv --output-format json --log-target stdout verify + typos: + name: Spell Check with Typos + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + persist-credentials: false + - uses: crate-ci/typos@1.35.5 diff --git a/typos.toml b/typos.toml new file mode 100644 index 000000000000..46f21febcf86 --- /dev/null +++ b/typos.toml @@ -0,0 +1,46 @@ +[default.extend-words] +# random words from unit tests +Pn = "Pn" +fo = "fo" +nd = "nd" +Nd = "Nd" +ba = "ba" +ECT = "ECT" +Ue = "Ue" +Iy = "Iy" +hte = "hte" +numer = "numer" +abd = "abd" +aroun = "aroun" +abov = "abov" +Ois = "Ois" +alo = "alo" + +# abbreviations, common words, etc. +typ = "typ" +datas = "datas" +YOUY = "YOUY" +lits = "lits" + +# exposed to public API +Serializeable = "Serializeable" + +# from test cases like TPC-* or ClickBench +carefull = "carefull" +precentage = "precentage" +flate = "flate" +hom = "hom" +alph = "alph" +wih = "wih" +Ded = "Ded" + +[files] +extend-exclude = [ + "*.slt", + "*.slt.part", + "*.svg", + "*.sql", + "dev/changelog/**", + "benchmarks/**", + "*.csv" +] From 7116861bdd94aedbaecabf4bd5d379c538360120 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 28 Aug 2025 05:43:58 +0800 Subject: [PATCH 2/4] correct version number Signed-off-by: Ruihang Xia --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5e4d0f06dfe0..18758eb69c3e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -788,4 +788,4 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 with: persist-credentials: false - - uses: crate-ci/typos@1.35.5 + - uses: crate-ci/typos@v1 From bd829320072f2dfb7d58fd2c62859a66898329ba Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 28 Aug 2025 05:49:05 +0800 Subject: [PATCH 3/4] fix new occurs Signed-off-by: Ruihang Xia --- .../core/tests/physical_optimizer/filter_pushdown/mod.rs | 2 +- datafusion/physical-plan/src/joins/join_filter.rs | 2 +- datafusion/physical-plan/src/joins/nested_loop_join.rs | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index 94710a14cdf8..02f5b6b1e133 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -1095,7 +1095,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { // Top-level CoalesceBatchesExec let cb = Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc; - // Top-level CoalesceParititionsExec + // Top-level CoalescePartitionsxec let cp = Arc::new(CoalescePartitionsExec::new(cb)) as Arc; // Add a sort for determistic output let plan = Arc::new(SortExec::new( diff --git a/datafusion/physical-plan/src/joins/join_filter.rs b/datafusion/physical-plan/src/joins/join_filter.rs index e026f1f860b9..de5df2be5565 100644 --- a/datafusion/physical-plan/src/joins/join_filter.rs +++ b/datafusion/physical-plan/src/joins/join_filter.rs @@ -34,7 +34,7 @@ pub struct JoinFilter { } /// For display in `EXPLAIN` plans, only expression with column names is needed, -/// it output expresion like `(col1 + col2) = 0` +/// it output expression like `(col1 + col2) = 0` impl Display for JoinFilter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.expression.fmt_sql(f) diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 596e890b879c..be39c0990c3c 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -827,7 +827,7 @@ impl Stream for NestedLoopJoinStream { // side batch, before start joining. NLJState::BufferingLeft => { debug!("[NLJState] Entering: {:?}", self.state); - // inside `collect_left_input` (the rountine to buffer build + // inside `collect_left_input` (the routine to buffer build // -side batches), related metrics except build time will be // updated. // stop on drop @@ -1583,7 +1583,7 @@ fn apply_filter_to_row_join_batch( /// 30 /// 40 /// -/// # After applying it, only index 1 and 3 elemnt in probe_side_batch will be +/// # After applying it, only index 1 and 3 elements in probe_side_batch will be /// # kept /// probe_side_filter: /// false From 03068763f4b00e258910e42855e5a5063e95acb9 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 27 Aug 2025 22:33:00 -0700 Subject: [PATCH 4/4] Update datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs Co-authored-by: Jeffrey Vo --- datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index 02f5b6b1e133..7b04694792e5 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -1095,7 +1095,7 @@ async fn test_hashjoin_dynamic_filter_pushdown_partitioned() { // Top-level CoalesceBatchesExec let cb = Arc::new(CoalesceBatchesExec::new(hash_join, 8192)) as Arc; - // Top-level CoalescePartitionsxec + // Top-level CoalescePartitionsExec let cp = Arc::new(CoalescePartitionsExec::new(cb)) as Arc; // Add a sort for determistic output let plan = Arc::new(SortExec::new(