From 342c3c091151927e048f54049b1d045749b0e4c5 Mon Sep 17 00:00:00 2001
From: Sasha Krassovsky <krassovskysasha@gmail.com>
Date: Tue, 22 Feb 2022 21:00:05 -0800
Subject: [PATCH 01/11] Add TPC-H Generator

---
 cpp/src/arrow/CMakeLists.txt                 |    1 +
 cpp/src/arrow/compute/exec/CMakeLists.txt    |    2 +
 cpp/src/arrow/compute/exec/tpch_benchmark.cc |  175 +
 cpp/src/arrow/compute/exec/tpch_node.cc      | 3704 ++++++++++++++++++
 cpp/src/arrow/compute/exec/tpch_node.h       |   69 +
 cpp/src/arrow/compute/kernels/vector_sort.cc |    5 +-
 6 files changed, 3954 insertions(+), 2 deletions(-)
 create mode 100644 cpp/src/arrow/compute/exec/tpch_benchmark.cc
 create mode 100644 cpp/src/arrow/compute/exec/tpch_node.cc
 create mode 100644 cpp/src/arrow/compute/exec/tpch_node.h
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index b984bc10425..d73de73565f 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -394,6 +394,7 @@ if(ARROW_COMPUTE)
        compute/exec/sink_node.cc
        compute/exec/source_node.cc
        compute/exec/task_util.cc
+       compute/exec/tpch_node.cc
        compute/exec/union_node.cc
        compute/exec/util.cc
        compute/function.cc
diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt
index 3832273593d..cf725667107 100644
--- a/cpp/src/arrow/compute/exec/CMakeLists.txt
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -32,6 +32,8 @@ add_arrow_compute_test(util_test PREFIX "arrow-compute")
 
 add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute")
 
+add_arrow_benchmark(tpch_benchmark PREFIX "arrow-compute")
+
 if(ARROW_BUILD_OPENMP_BENCHMARKS)
   find_package(OpenMP REQUIRED)
   add_arrow_benchmark(hash_join_benchmark
diff --git a/cpp/src/arrow/compute/exec/tpch_benchmark.cc b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
new file mode 100644
index 00000000000..963782333cf
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/testing/future_util.h"
+#include "arrow/compute/exec/test_util.h"
+#include "arrow/compute/exec/tpch_node.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/compute/cast.h"
+
+namespace arrow
+{
+namespace compute
+{
+
+std::shared_ptr<ExecPlan> Plan_Q1(AsyncGenerator<util::optional<ExecBatch>> &sink_gen, int scale_factor)
+{
+    ExecContext *ctx = default_exec_context();
+    *ctx = ExecContext(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+    std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(ctx);
+    TpchGen gen = *TpchGen::Make(plan.get(), scale_factor);
+
+    ExecNode *lineitem = *gen.Lineitem(
+        {
+            "L_QUANTITY",
+            "L_EXTENDEDPRICE",
+            "L_TAX",
+            "L_DISCOUNT",
+            "L_SHIPDATE",
+            "L_RETURNFLAG",
+            "L_LINESTATUS"
+        });
+
+    std::shared_ptr<Date32Scalar> sept_2_1998 = std::make_shared<Date32Scalar>(10471); // September 2, 1998 is 10471 days after January 1, 1970
+    Expression filter = less_equal(field_ref("L_SHIPDATE"), literal(std::move(sept_2_1998)));
+    FilterNodeOptions filter_opts(filter);
+
+    Expression l_returnflag = field_ref("L_RETURNFLAG");
+    Expression l_linestatus = field_ref("L_LINESTATUS");
+    Expression quantity = field_ref("L_QUANTITY");
+    Expression base_price = field_ref("L_EXTENDEDPRICE");
+
+    std::shared_ptr<Decimal128Scalar> decimal_1 = std::make_shared<Decimal128Scalar>(Decimal128{0, 100}, decimal(12, 2));
+    Expression discount_multiplier = call("subtract", { literal(decimal_1), field_ref("L_DISCOUNT") });
+    Expression tax_multiplier = call("add", { literal(decimal_1), field_ref("L_TAX") });
+    Expression disc_price = call("multiply", { field_ref("L_EXTENDEDPRICE"), discount_multiplier });
+    Expression charge = call("multiply",
+                             {
+                                 call("cast",
+                                      {
+                                          call("multiply", { field_ref("L_EXTENDEDPRICE"), discount_multiplier })
+                                      }, compute::CastOptions::Unsafe(decimal(12, 2))),
+                                 tax_multiplier
+                             });
+    Expression discount = field_ref("L_DISCOUNT");
+    
+    std::vector<Expression> projection_list =
+        {
+            l_returnflag,
+            l_linestatus,
+            quantity,
+            base_price,
+            disc_price,
+            charge,
+            quantity,
+            base_price,
+            discount
+        };
+    std::vector<std::string> project_names =
+        {
+            "l_returnflag",
+            "l_linestatus",
+            "sum_qty",
+            "sum_base_price",
+            "sum_disc_price",
+            "sum_charge",
+            "avg_qty",
+            "avg_price",
+            "avg_disc"
+        };
+    ProjectNodeOptions project_opts(std::move(projection_list));
+
+    ScalarAggregateOptions sum_opts = ScalarAggregateOptions::Defaults();
+    CountOptions count_opts(CountOptions::CountMode::ALL);
+    std::vector<arrow::compute::internal::Aggregate> aggs = 
+        {
+            { "hash_sum", &sum_opts },
+            { "hash_sum", &sum_opts },
+            { "hash_sum", &sum_opts },
+            { "hash_sum", &sum_opts },
+            { "hash_mean", &sum_opts },
+            { "hash_mean", &sum_opts },
+            { "hash_mean", &sum_opts },
+            { "hash_count", &count_opts }
+        };
+
+    std::vector<FieldRef> cols =
+        {
+            2, 3, 4, 5, 6, 7, 8, 2
+        };
+
+    std::vector<std::string> names =
+        {
+            "sum_qty",
+            "sum_base_price",
+            "sum_disc_price",
+            "sum_charge",
+            "avg_qty",
+            "avg_price",
+            "avg_disc",
+            "count_order"
+        };
+
+    std::vector<FieldRef> keys = { "L_RETURNFLAG", "L_LINESTATUS" };
+    AggregateNodeOptions agg_opts(aggs, cols, names, keys);
+
+    SortKey l_returnflag_key("L_RETURNFLAG");
+    SortKey l_linestatus_key("L_LINESTATUS");
+    SortOptions sort_opts({ l_returnflag_key, l_linestatus_key });
+    OrderBySinkNodeOptions order_by_opts(sort_opts, &sink_gen);
+
+    Declaration filter_decl("filter", { Declaration::Input(lineitem) }, filter_opts);
+    Declaration project_decl("project", project_opts);
+    Declaration aggregate_decl("aggregate", agg_opts);
+    Declaration orderby_decl("order_by_sink", order_by_opts);
+
+    Declaration q1 = Declaration::Sequence(
+        {
+            filter_decl,
+            project_decl,
+            aggregate_decl,
+            orderby_decl
+        });
+    std::ignore = *q1.AddToPlan(plan.get());
+    return plan;
+}
+
+static void BM_Tpch_Q1(benchmark::State &st)
+{
+    for(auto _ : st)
+    {
+        st.PauseTiming();
+        AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+        std::shared_ptr<ExecPlan> plan = Plan_Q1(sink_gen, st.range(0));
+        st.ResumeTiming();
+        auto fut = StartAndCollect(plan.get(), sink_gen);
+        auto res = *fut.MoveResult();
+#ifndef NDEBUG
+        st.PauseTiming();
+        for(auto &batch : res)
+            std::cout << batch.ToString() << std::endl;
+        st.ResumeTiming();
+#endif
+    }
+}
+
+//BENCHMARK(BM_Tpch_Q1)->RangeMultiplier(10)->Range(1, 1000)->ArgNames({ "SF" });
+BENCHMARK(BM_Tpch_Q1)->RangeMultiplier(10)->Range(1, 10)->ArgNames({ "SF" });
+}
+}
diff --git a/cpp/src/arrow/compute/exec/tpch_node.cc b/cpp/src/arrow/compute/exec/tpch_node.cc
new file mode 100644
index 00000000000..842bf828574
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/tpch_node.cc
@@ -0,0 +1,3704 @@
+#include "arrow/compute/exec/tpch_node.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/future.h"
+#include "arrow/util/unreachable.h"
+
+#include <algorithm>
+#include <bitset>
+#include <cstring>
+#include <random>
+#include <vector>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <unordered_set>
+
+namespace arrow
+{
+    using internal::checked_cast;
+
+    namespace compute
+    {
+        class TpchText
+        {
+        public:
+            Status Init();
+            Result<Datum> GenerateComments(
+                size_t num_comments,
+                size_t min_length,
+                size_t max_length,
+                random::pcg32_fast &rng);
+
+        private:
+            void GenerateWord(size_t &offset, const char **words, size_t num_choices);
+            void GenerateNoun(size_t &offset);
+            void GenerateVerb(size_t &offset);
+            void GenerateAdjective(size_t &offset);
+            void GenerateAdverb(size_t &offset);
+            void GeneratePreposition(size_t &offset);
+            void GenerateAuxiliary(size_t &offset);
+            void GenerateTerminator(size_t &offset);
+
+            void GenerateNounPhrase(size_t &offset);
+            void GenerateVerbPhrase(size_t &offset);
+            void GeneratePrepositionalPhrase(size_t &offset);
+
+            void GenerateSentence(size_t &offset);
+
+            std::unique_ptr<Buffer> text_;
+            random::pcg32_fast rng_;
+            static constexpr size_t kTextBytes = 300 * 1024 * 1024; // 300 MB
+        };
+
+        class TpchTableGenerator
+        {
+        public:
+            using OutputBatchCallback = std::function<void(ExecBatch)>;
+            using FinishedCallback = std::function<void(int64_t)>;
+            using GenerateFn = std::function<Status(size_t)>;
+            using ScheduleCallback = std::function<Status(GenerateFn)>;
+            using AbortCallback = std::function<void()>;
+
+            virtual Status Init(
+                std::vector<std::string> columns,
+                int scale_factor,
+                int64_t batch_size) = 0;
+
+            virtual Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback schedule_callback) = 0;
+
+            void Abort(AbortCallback abort_callback)
+            {
+                bool expected = false;
+                if(done_.compare_exchange_strong(expected, true))
+                {
+                    abort_callback();
+                }
+            }
+
+            virtual std::shared_ptr<Schema> schema() const = 0;
+
+            virtual ~TpchTableGenerator() = default;
+
+        protected:
+            std::atomic<bool> done_ = { false };
+            std::atomic<int64_t> batches_generated_ = { 0 };
+        };
+
+        int GetNumDigits(int64_t x)
+        {
+            // This if statement chain is for MAXIMUM SPEED
+            /*
+              .,
+              .      _,'f----.._
+              |\ ,-'"/  |     ,'
+              |,_  ,--.      /
+              /,-. ,'`.     (_
+              f  o|  o|__     "`-.
+              ,-._.,--'_ `.   _.,-`
+              `"' ___.,'` j,-'
+              `-.__.,--'
+             */
+            // Source: https://stackoverflow.com/questions/1068849/how-do-i-determine-the-number-of-digits-of-an-integer-in-c
+            ARROW_DCHECK(x >= 0);
+            if(x < 10ll) return 1;
+            if(x < 100ll) return 2;
+            if(x < 1000ll) return 3;
+            if(x < 10000ll) return 4;
+            if(x < 100000ll) return 5;
+            if(x < 1000000ll) return 6;
+            if(x < 10000000ll) return 7;
+            if(x < 100000000ll) return 8;
+            if(x < 1000000000ll) return 9;
+            if(x < 10000000000ll) return 10;
+            if(x < 100000000000ll) return 11;
+            if(x < 1000000000000ll) return 12;
+            if(x < 10000000000000ll) return 13;
+            if(x < 100000000000000ll) return 14;
+            if(x < 1000000000000000ll) return 15;
+            if(x < 10000000000000000ll) return 16;
+            if(x < 100000000000000000ll) return 17;
+            if(x < 1000000000000000000ll) return 18;
+            return -1;
+        }
+
+        void AppendNumberPaddedToNineDigits(char *out, int64_t x)
+        {
+            // We do all of this to avoid calling snprintf, which does a lot of crazy
+            // locale stuff. On Windows and MacOS this can get suuuuper slow
+            int num_digits = GetNumDigits(x);
+            int num_padding_zeros = std::max(9 - num_digits, 0);
+            std::memset(out, '0', static_cast<size_t>(num_padding_zeros));
+            while(x > 0)
+            {
+                *(out + num_padding_zeros + num_digits - 1) = ('0' + x % 10);
+                num_digits -= 1;
+                x /= 10;
+            }
+        }
+
+        Result<std::shared_ptr<Schema>> SetOutputColumns(
+            const std::vector<std::string> &columns,
+            const std::vector<std::shared_ptr<DataType>> &types,
+            const std::unordered_map<std::string, int> &name_map,
+            std::vector<int> &gen_list)
+        {
+            gen_list.clear();
+            std::vector<std::shared_ptr<Field>> fields;
+            if(columns.empty())
+            {
+                for(auto pair : name_map)
+                {
+                    int col_idx = pair.second;
+                    fields.push_back(field(pair.first, types[col_idx]));
+                    gen_list.push_back(col_idx);
+                }
+                return schema(std::move(fields));
+            }
+            else
+            {
+                for(const std::string &col : columns)
+                {
+                    auto entry = name_map.find(col);
+                    if(entry == name_map.end())
+                        return Status::Invalid("Not a valid column name");
+                    int col_idx = static_cast<int>(entry->second);
+                    fields.push_back(field(col, types[col_idx]));
+                    gen_list.push_back(col_idx);
+                }
+                return schema(std::move(fields));
+            }
+        }
+
+        static TpchText g_text;
+
+        Status TpchText::Init()
+        {
+            ARROW_ASSIGN_OR_RAISE(text_, AllocateBuffer(kTextBytes));
+            size_t offset = 0;
+            while(offset < kTextBytes)
+                GenerateSentence(offset);
+            return Status::OK();
+        }
+
+        Result<Datum> TpchText::GenerateComments(
+            size_t num_comments,
+            size_t min_length,
+            size_t max_length,
+            random::pcg32_fast &rng)
+        {
+            std::uniform_int_distribution<size_t> length_dist(min_length, max_length);
+            ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> offset_buffer, AllocateBuffer(sizeof(int32_t) * (num_comments + 1)));
+            int32_t *offsets = reinterpret_cast<int32_t *>(offset_buffer->mutable_data());
+            offsets[0] = 0;
+            for(size_t i = 1; i <= num_comments; i++)
+                offsets[i] = offsets[i - 1] + length_dist(rng);
+
+            ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> comment_buffer, AllocateBuffer(offsets[num_comments]));
+            char *comments = reinterpret_cast<char *>(comment_buffer->mutable_data());
+            for(size_t i = 0; i < num_comments; i++)
+            {
+                size_t length = offsets[i + 1] - offsets[i];
+                std::uniform_int_distribution<size_t> offset_dist(0, kTextBytes - length);
+                size_t offset_in_text = offset_dist(rng);
+                std::memcpy(comments + offsets[i], text_->data() + offset_in_text, length);
+            }
+            ArrayData ad(utf8(), num_comments, { nullptr, std::move(comment_buffer), std::move(offset_buffer) });
+            return std::move(ad);
+        }
+
+        Result<Datum> RandomVString(
+            random::pcg32_fast &rng,
+            int64_t num_rows,
+            int32_t min_length,
+            int32_t max_length)
+        {
+            std::uniform_int_distribution<int32_t> length_dist(min_length, max_length);
+            ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> offset_buff, AllocateBuffer((num_rows + 1) * sizeof(int32_t)));
+            int32_t *offsets = reinterpret_cast<int32_t *>(offset_buff->mutable_data());
+            offsets[0] = 0;
+            for(int64_t i = 1; i <= num_rows; i++)
+                offsets[i] = offsets[i - 1] + length_dist(rng);
+            ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> str_buff, AllocateBuffer(offsets[num_rows]));
+            char *str = reinterpret_cast<char *>(str_buff->mutable_data());
+
+            // Spec says to pick random alphanumeric characters from a set of at least
+            // 64 symbols. Now, let's think critically here: 26 letters in the alphabet,
+            // so 52 total for upper and lower case, and 10 possible digits gives 62
+            // characters...
+            // dbgen solves this by including a space and a comma as well, so we'll
+            // copy that.
+            const char alpha_numerics[65] =
+                "0123456789abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ,";
+            std::uniform_int_distribution<int> char_dist(0, 63);
+            for(int32_t i = 0; i < offsets[num_rows]; i++)
+                str[i] = alpha_numerics[char_dist(rng)];
+
+            ArrayData ad(utf8(), num_rows, { nullptr, std::move(str_buff), std::move(offset_buff) });
+            return std::move(ad);
+        }
+
+        void AppendNumber(char *&out, int num_digits, int32_t x)
+        {
+            out += (num_digits - 1);
+            while(x > 0)
+            {
+                *out-- = x % 10;
+                x /= 10;
+            }
+            x += num_digits;
+        }
+
+        void GeneratePhoneNumber(
+            char *out,
+            random::pcg32_fast &rng,
+            int32_t country)
+        {
+            std::uniform_int_distribution<int32_t> three_digit(100, 999);
+            std::uniform_int_distribution<int32_t> four_digit(1000, 9999);
+
+            int32_t country_code = country + 10;
+            int32_t l1 = three_digit(rng);
+            int32_t l2 = three_digit(rng);
+            int32_t l3 = four_digit(rng);
+            AppendNumber(out, 2, country_code);
+            *out++ = '-';
+            AppendNumber(out, 3, l1);
+            *out++ = '-';
+            AppendNumber(out, 3, l2);
+            *out++ = '-';
+            AppendNumber(out, 4, l3);
+        }
+
+        static constexpr uint32_t STARTDATE = 8035; // January 1, 1992 is 8035 days after January 1, 1970
+        static constexpr uint32_t CURRENTDATE = 9298; // June 17, 1995 is 9298 days after January 1, 1970
+        static constexpr uint32_t ENDDATE = 10591; // December 12, 1998 is 10591 days after January 1, 1970
+
+        const char *NameParts[] =
+        {
+            "almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", "blanched", "blue",
+            "blush", "brown", "burlywood", "burnished", "chartreuse", "chiffon", "chocolate", "coral",
+            "cornflower", "cornsilk", "cream", "cyan", "dark", "deep", "dim", "dodger", "drab", "firebrick",
+            "floral", "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew",
+            "hot", "indian", "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen",
+            "magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo",
+            "navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", "pink", "plum", "powder",
+            "puff", "purple", "red", "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell", "sienna",
+            "sky", "slate", "smoke", "snow", "spring", "steel", "tan", "thistle", "tomato", "turquoise", "violet",
+            "wheat", "white", "yellow",
+        };
+        static constexpr size_t kNumNameParts = sizeof(NameParts) / sizeof(NameParts[0]);
+
+        const char *Types_1[] =
+        {
+            "STANDARD ", "SMALL ", "MEDIUM ", "LARGE ", "ECONOMY ", "PROMO ",
+        };
+        static constexpr size_t kNumTypes_1 = sizeof(Types_1) / sizeof(Types_1[0]);
+
+        const char *Types_2[] =
+        {
+            "ANODIZED ", "BURNISHED ", "PLATED ", "POLISHED ", "BRUSHED ",
+        };
+        static constexpr size_t kNumTypes_2 = sizeof(Types_2) / sizeof(Types_2[0]);
+
+        const char *Types_3[] =
+        {
+            "TIN", "NICKEL", "BRASS", "STEEL", "COPPER",
+        };
+        static constexpr size_t kNumTypes_3 = sizeof(Types_3) / sizeof(Types_3[0]);
+
+        const char *Containers_1[] =
+        {
+            "SM ", "LG ", "MD ", "JUMBO ", "WRAP ",
+        };
+        static constexpr size_t kNumContainers_1 = sizeof(Containers_1) / sizeof(Containers_1[0]);
+
+        const char *Containers_2[] =
+        {
+            "CASE", "BOX", "BAG", "JAR", "PKG", "PACK", "CAN", "DRUM",
+        };
+        static constexpr size_t kNumContainers_2 = sizeof(Containers_2) / sizeof(Containers_2[0]);
+
+        const char *Segments[] =
+        {
+            "AUTOMOBILE", "BUILDING", "FURNITURE", "MACHINERY", "HOUSEHOLD",
+        };
+        static constexpr size_t kNumSegments = sizeof(Segments) / sizeof(Segments[0]);
+
+        const char *Priorities[] =
+        {
+            "1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED", "5-LOW",
+        };
+        static constexpr size_t kNumPriorities = sizeof(Priorities) / sizeof(Priorities[0]);
+
+        const char *Instructions[] =
+        {
+            "DELIVER IN PERSON", "COLLECT COD", "NONE", "TAKE BACK RETURN",
+        };
+        static constexpr size_t kNumInstructions = sizeof(Instructions) / sizeof(Instructions[0]);
+
+        const char *Modes[] =
+        {
+            "REG AIR", "AIR", "RAIL", "SHIP", "TRUCK", "MAIL", "FOB",
+        };
+        static constexpr size_t kNumModes = sizeof(Modes) / sizeof(Modes[0]);
+
+        const char *Nouns[] =
+        {
+            "foxes ", "ideas ", "theodolites ", "pinto beans ", "instructions ", "dependencies ", "excuses ",
+            "platelets ", "asymptotes ", "courts ", "dolphins ", "multipliers ", "sautemes ", "warthogs ", "frets ",
+            "dinos ", "attainments ", "somas ", "Tiresias '", "patterns ", "forges ", "braids ", "hockey players ", "frays ",
+            "warhorses ", "dugouts ", "notomis ", "epitaphs ", "pearls ", "tithes ", "waters ", "orbits ", "gifts ", "sheaves ",
+            "depths ", "sentiments ", "decoys ", "realms ", "pains ", "grouches ", "escapades ",
+        };
+        static constexpr size_t kNumNouns = sizeof(Nouns) / sizeof(Nouns[0]);
+
+        const char *Verbs[] =
+        {
+            "sleep ", "wake ", "are ", "cajole ", "haggle ", "nag ", "use ", "boost ", "affix ", "detect ", "integrate ",
+            "maintain ", "nod ", "was ", "lose ", "sublate ", "solve ", "thrash ", "promise ", "engage ", "hinder ",
+            "print ", "x-ray ", "breach ", "eat ", "grow ", "impress ", "mold ", "poach ", "serve ", "run ", "dazzle ",
+            "snooze ", "doze ", "unwind ", "kindle ", "play ", "hang ", "believe ", "doubt ",
+        };
+        static constexpr size_t kNumVerbs = sizeof(Verbs) / sizeof(Verbs[0]);
+
+        const char *Adjectives[] =
+        {
+            "furious ", "sly ", "careful ", "blithe ", "quick ", "fluffy ", "slow ", "quiet ", "ruthless ", "thin ",
+            "close ", "dogged ", "daring ", "brave ", "stealthy ", "permanent ", "enticing ", "idle ", "busy ",
+            "regular ", "final ", "ironic ", "even ", "bold ", "silent ",
+        };
+        static constexpr size_t kNumAdjectives = sizeof(Adjectives) / sizeof(Adjectives[0]);
+
+        const char *Adverbs[] =
+        {
+            "sometimes ", "always ", "never ", "furiously ", "slyly ", "carefully ", "blithely ", "quickly ", "fluffily ",
+            "slowly ", "quietly ", "ruthlessly ", "thinly ", "closely ", "doggedly ", "daringly ", "bravely ", "stealthily ",
+            "permanently ", "enticingly ", "idly ", "busily ", "regularly ", "finally ", "ironically ", "evenly ", "boldly ",
+            "silently ",
+        };
+        static constexpr size_t kNumAdverbs = sizeof(Adverbs) / sizeof(Adverbs[0]);
+
+        const char *Prepositions[] =
+        {
+            "about ", "above ", "according to ", "across ", "after ", "against ", "along ", "alongside of ", "among ",
+            "around ", "at ", "atop ", "before ", "behind ", "beneath ", "beside ", "besides ", "between ", "beyond ",
+            "beyond ", "by ", "despite ", "during ", "except ", "for ", "from ", "in place of ", "inside ", "instead of ",
+            "into ", "near ", "of ", "on ", "outside ", "over ", "past ", "since ", "through ", "throughout ", "to ",
+            "toward ", "under ", "until ", "up ", "upon ", "without ", "with ", "within ",
+        };
+        static constexpr size_t kNumPrepositions = sizeof(Prepositions) / sizeof(Prepositions[0]);
+
+        const char *Auxiliaries[] =
+        {
+            "do ", "may ", "might ", "shall ", "will ", "would ", "can ", "could ", "should ", "ought to ", "must ",
+            "will have to ", "shall have to ", "could have to ", "should have to ", "must have to ", "need to ", "try to ",
+        };
+        static constexpr size_t kNumAuxiliaries = sizeof(Auxiliaries) / sizeof(Auxiliaries[0]);
+
+        const char *Terminators[] =
+        {
+            ".", ";", ":", "?", "!", "--",
+        };
+        static constexpr size_t kNumTerminators = sizeof(Terminators) / sizeof(Terminators[0]);
+
+        void TpchText::GenerateWord(size_t &offset, const char **words, size_t num_choices)
+        {
+            std::uniform_int_distribution<size_t> dist(0, num_choices - 1);
+            const char *word = words[dist(rng_)];
+            size_t bytes_left = kTextBytes - offset;
+            size_t length = std::strlen(word);
+            size_t bytes_to_copy = std::min(bytes_left, length);
+            std::memcpy(text_->mutable_data() + offset, word, bytes_to_copy);
+            offset += bytes_to_copy;
+        }
+
+        void TpchText::GenerateNoun(size_t &offset)
+        {
+            GenerateWord(offset, Nouns, kNumNouns);
+        }
+
+        void TpchText::GenerateVerb(size_t &offset)
+        {
+            GenerateWord(offset, Verbs, kNumVerbs);
+        }
+
+        void TpchText::GenerateAdjective(size_t &offset)
+        {
+            GenerateWord(offset, Adjectives, kNumAdjectives);
+        }
+
+        void TpchText::GenerateAdverb(size_t &offset)
+        {
+            GenerateWord(offset, Adverbs, kNumAdverbs);
+        }
+
+        void TpchText::GeneratePreposition(size_t &offset)
+        {
+            GenerateWord(offset, Prepositions, kNumPrepositions);
+        }
+
+        void TpchText::GenerateAuxiliary(size_t &offset)
+        {
+            GenerateWord(offset, Auxiliaries, kNumAuxiliaries);
+        }
+
+        void TpchText::GenerateTerminator(size_t &offset)
+        {
+            GenerateWord(offset, Terminators, kNumTerminators);
+        }
+
+        void TpchText::GenerateNounPhrase(size_t &offset)
+        {
+            std::uniform_int_distribution<size_t> dist(0, 3);
+            const char *comma_space = ", ";
+            switch(dist(rng_))
+            {
+            case 0:
+                GenerateNoun(offset);
+                break;
+            case 1:
+                GenerateAdjective(offset);
+                GenerateNoun(offset);
+                break;
+            case 2:
+                GenerateAdjective(offset);
+                GenerateWord(offset, &comma_space, 1);
+                GenerateAdjective(offset);
+                GenerateNoun(offset);
+                break;
+            case 3:
+                GenerateAdverb(offset);
+                GenerateAdjective(offset);
+                GenerateNoun(offset);
+                break;
+            default:
+                Unreachable("Random number should be between 0 and 3 inclusive");
+                break;
+            }
+        }
+
+        void TpchText::GenerateVerbPhrase(size_t &offset)
+        {
+            std::uniform_int_distribution<size_t> dist(0, 3);
+            switch(dist(rng_))
+            {
+            case 0:
+                GenerateVerb(offset);
+                break;
+            case 1:
+                GenerateAuxiliary(offset);
+                GenerateVerb(offset);
+                break;
+            case 2:
+                GenerateVerb(offset);
+                GenerateAdverb(offset);
+                break;
+            case 3:
+                GenerateAuxiliary(offset);
+                GenerateVerb(offset);
+                GenerateAdverb(offset);
+                break;
+            default:
+                Unreachable("Random number should be between 0 and 3 inclusive");
+                break;
+            }
+        }
+
+        void TpchText::GeneratePrepositionalPhrase(size_t &offset)
+        {
+            const char *the_space = "the ";
+            GeneratePreposition(offset);
+            GenerateWord(offset, &the_space, 1);
+            GenerateNounPhrase(offset);
+        }
+
+        void TpchText::GenerateSentence(size_t &offset)
+        {
+            std::uniform_int_distribution<size_t> dist(0, 4);
+            switch(dist(rng_))
+            {
+            case 0:
+                GenerateNounPhrase(offset);
+                GenerateVerbPhrase(offset);
+                GenerateTerminator(offset);
+                break;
+            case 1:
+                GenerateNounPhrase(offset);
+                GenerateVerbPhrase(offset);
+                GeneratePrepositionalPhrase(offset);
+                GenerateTerminator(offset);
+                break;
+            case 2:
+                GenerateNounPhrase(offset);
+                GenerateVerbPhrase(offset);
+                GenerateNounPhrase(offset);
+                GenerateTerminator(offset);
+                break;
+            case 3:
+                GenerateNounPhrase(offset);
+                GenerateVerbPhrase(offset);
+                GenerateNounPhrase(offset);
+                GenerateTerminator(offset);
+                break;
+            case 4:
+                GenerateNounPhrase(offset);
+                GeneratePrepositionalPhrase(offset);
+                GenerateVerbPhrase(offset);
+                GenerateNounPhrase(offset);
+                GenerateTerminator(offset);
+                break;
+            case 5:
+                GenerateNounPhrase(offset);
+                GeneratePrepositionalPhrase(offset);
+                GenerateVerbPhrase(offset);
+                GeneratePrepositionalPhrase(offset);
+                GenerateTerminator(offset);
+                break;
+            default:
+                Unreachable("Random number should be between 0 and 5 inclusive");
+                break;
+            }
+        }
+
+        using GenerateColumnFn = std::function<Status(size_t)>;
+        class PartAndPartSupplierGenerator
+        {
+        public:
+            Status Init(
+                size_t num_threads,
+                int64_t batch_size,
+                int scale_factor)
+            {
+                if(!inited_)
+                {
+                    inited_ = true;
+                    batch_size_ = batch_size;
+                    scale_factor_ = scale_factor;
+
+                    thread_local_data_.resize(num_threads);
+                    for(ThreadLocalData &tld : thread_local_data_)
+                    {
+                        // 5 is the maximum number of different strings we need to concatenate
+                        tld.string_indices.resize(5 * batch_size_);
+                    }
+                    part_rows_to_generate_ = scale_factor_ * 200000;
+                }
+                return Status::OK();
+            }
+            
+            Result<std::shared_ptr<Schema>> SetPartOutputColumns(const std::vector<std::string> &cols)
+            {
+                return SetOutputColumns(cols, part_types_, part_name_map_, part_cols_);
+            }
+
+            Result<std::shared_ptr<Schema>> SetPartSuppOutputColumns(const std::vector<std::string> &cols)
+            {
+                return SetOutputColumns(cols, partsupp_types_, partsupp_name_map_, partsupp_cols_);
+            }
+
+            Result<util::optional<ExecBatch>> NextPartBatch(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                {
+                    std::lock_guard<std::mutex> lock(part_output_queue_mutex_);
+                    if(!part_output_queue_.empty())
+                    {
+                        ExecBatch batch = std::move(part_output_queue_.front());
+                        part_output_queue_.pop();
+                        return std::move(batch);
+                    }
+                    else if(part_rows_generated_ == part_rows_to_generate_)
+                    {
+                        return util::nullopt;
+                    }
+                    else
+                    {
+                        tld.partkey_start = part_rows_generated_;
+                        tld.part_to_generate = std::min(
+                            batch_size_,
+                            part_rows_to_generate_ - part_rows_generated_);
+                        part_rows_generated_ += tld.part_to_generate;
+                        ARROW_DCHECK(part_rows_generated_ <= part_rows_to_generate_);
+                    }
+                }
+                tld.part.clear();
+                tld.part.resize(PART::kNumCols);
+                RETURN_NOT_OK(InitPartsupp(thread_index));
+
+                for(int col : part_cols_)
+                    RETURN_NOT_OK(part_generators_[col](thread_index));
+                for(int col : partsupp_cols_)
+                    RETURN_NOT_OK(partsupp_generators_[col](thread_index));
+
+                std::vector<Datum> part_result(part_cols_.size());
+                for(size_t i = 0; i < part_cols_.size(); i++)
+                {
+                    int col_idx = part_cols_[i];
+                    part_result[i] = tld.part[col_idx];
+                }
+                if(!partsupp_cols_.empty())
+                {
+                    std::vector<ExecBatch> partsupp_results;
+                    for(size_t ibatch = 0; ibatch < tld.partsupp.size(); ibatch++)
+                    {
+                        std::vector<Datum> partsupp_result(partsupp_cols_.size());
+                        for(size_t icol = 0; icol < partsupp_cols_.size(); icol++)
+                        {
+                            int col_idx = partsupp_cols_[icol];
+                            partsupp_result[icol] = tld.partsupp[ibatch][col_idx];
+                        }
+                        ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(partsupp_result)));
+                        partsupp_results.emplace_back(std::move(eb));
+                    }
+                    {
+                        std::lock_guard<std::mutex> guard(partsupp_output_queue_mutex_);
+                        for(ExecBatch &eb : partsupp_results)
+                        {
+                            partsupp_output_queue_.emplace(std::move(eb));
+                        }
+                    }
+                }
+                return ExecBatch::Make(std::move(part_result));
+            }
+
+            Result<util::optional<ExecBatch>> NextPartSuppBatch(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                {
+                    std::lock_guard<std::mutex> lock(part_output_queue_mutex_);
+                    if(!part_output_queue_.empty())
+                    {
+                        ExecBatch batch = std::move(part_output_queue_.front());
+                        part_output_queue_.pop();
+                        return std::move(batch);
+                    }
+                    else if(part_rows_generated_ == part_rows_to_generate_)
+                    {
+                        return util::nullopt;
+                    }
+                    else
+                    {
+                        tld.partkey_start = part_rows_generated_;
+                        tld.part_to_generate = std::min(
+                            batch_size_,
+                            part_rows_to_generate_ - part_rows_generated_);
+                        part_rows_generated_ += tld.part_to_generate;
+                        ARROW_DCHECK(part_rows_generated_ <= part_rows_to_generate_);
+                    }
+                }
+                tld.part.clear();
+                tld.part.resize(PART::kNumCols);
+                RETURN_NOT_OK(InitPartsupp(thread_index));
+
+                for(int col : part_cols_)
+                    RETURN_NOT_OK(part_generators_[col](thread_index));
+                for(int col : partsupp_cols_)
+                    RETURN_NOT_OK(partsupp_generators_[col](thread_index));
+                if(!part_cols_.empty())
+                {
+                    std::vector<Datum> part_result(part_cols_.size());
+                    for(size_t i = 0; i < part_cols_.size(); i++)
+                    {
+                        int col_idx = part_cols_[i];
+                        part_result[i] = tld.part[col_idx];
+                    }
+                    ARROW_ASSIGN_OR_RAISE(ExecBatch part_batch, ExecBatch::Make(std::move(part_result)));
+                    {
+                        std::lock_guard<std::mutex> lock(part_output_queue_mutex_);
+                        part_output_queue_.emplace(std::move(part_batch));
+                    }
+                }
+                std::vector<ExecBatch> partsupp_results;
+                for(size_t ibatch = 0; ibatch < tld.partsupp.size(); ibatch++)
+                {
+                    std::vector<Datum> partsupp_result(partsupp_cols_.size());
+                    for(size_t icol = 0; icol < partsupp_cols_.size(); icol++)
+                    {
+                        int col_idx = partsupp_cols_[icol];
+                        partsupp_result[icol] = tld.partsupp[ibatch][col_idx];
+                    }
+                    ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(partsupp_result)));
+                    partsupp_results.emplace_back(std::move(eb));
+                }
+                // Return the first batch, enqueue the rest.
+                {
+                    std::lock_guard<std::mutex> lock(partsupp_output_queue_mutex_);
+                    for(size_t i = 1; i < partsupp_results.size(); i++)
+                        partsupp_output_queue_.emplace(std::move(partsupp_results[i]));
+                }
+                return std::move(partsupp_results[0]);
+            }
+
+        private:
+#define FOR_EACH_PART_COLUMN(F)                 \
+            F(P_PARTKEY)                        \
+            F(P_NAME)                           \
+            F(P_MFGR)                           \
+            F(P_BRAND)                          \
+            F(P_TYPE)                           \
+            F(P_SIZE)                           \
+            F(P_CONTAINER)                      \
+            F(P_RETAILPRICE)                    \
+            F(P_COMMENT)
+
+#define FOR_EACH_PARTSUPP_COLUMN(F)             \
+            F(PS_PARTKEY)                       \
+            F(PS_SUPPKEY)                       \
+            F(PS_AVAILQTY)                      \
+            F(PS_SUPPLYCOST)                    \
+            F(PS_COMMENT)                       \
+
+#define MAKE_ENUM(col) col,
+            struct PART
+            {
+                enum
+                {
+                    FOR_EACH_PART_COLUMN(MAKE_ENUM)
+                    kNumCols,
+                };
+            };
+            struct PARTSUPP
+            {
+                enum
+                {
+                    FOR_EACH_PARTSUPP_COLUMN(MAKE_ENUM)
+                    kNumCols,
+                };
+            };
+
+#define MAKE_STRING_MAP(col)                            \
+            { #col, PART::col },
+            const std::unordered_map<std::string, int> part_name_map_ =
+            {
+                FOR_EACH_PART_COLUMN(MAKE_STRING_MAP)
+            };
+#undef MAKE_STRING_MAP
+#define MAKE_STRING_MAP(col)                            \
+            { #col, PARTSUPP::col },
+            const std::unordered_map<std::string, int> partsupp_name_map_ =
+            {
+                FOR_EACH_PARTSUPP_COLUMN(MAKE_STRING_MAP)
+            };
+#undef MAKE_STRING_MAP
+#define MAKE_FN_ARRAY(col)                                              \
+            [this](size_t thread_index) { return this->col(thread_index); },
+            std::vector<GenerateColumnFn> part_generators_ =
+            {
+                FOR_EACH_PART_COLUMN(MAKE_FN_ARRAY)
+            };
+            std::vector<GenerateColumnFn> partsupp_generators_ =
+            {
+                FOR_EACH_PARTSUPP_COLUMN(MAKE_FN_ARRAY)
+            };
+#undef MAKE_FN_ARRAY
+#undef FOR_EACH_LINEITEM_COLUMN
+#undef FOR_EACH_ORDERS_COLUMN
+
+            const std::vector<std::shared_ptr<DataType>> part_types_ =
+            {
+                int32(),
+                utf8(),
+                fixed_size_binary(25),
+                fixed_size_binary(10),
+                utf8(),
+                int32(),
+                fixed_size_binary(10),
+                decimal(12, 2),
+                utf8(),
+            };
+
+            const std::vector<std::shared_ptr<DataType>> partsupp_types_ =
+            {
+                int32(),
+                int32(),
+                int32(),
+                decimal(12, 2),
+                utf8(),
+            };
+
+            Status AllocatePartBatch(size_t thread_index, int column)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                ARROW_DCHECK(tld.part[column].kind() == Datum::NONE);
+                int32_t byte_width = arrow::internal::GetByteWidth(*part_types_[column]);
+                ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(tld.part_to_generate * byte_width));
+                ArrayData ad(part_types_[column], tld.part_to_generate, { nullptr, std::move(buff) });
+                tld.part[column] = std::move(ad);
+                return Status::OK();
+            }
+
+            Status P_PARTKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_PARTKEY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocatePartBatch(thread_index, PART::P_PARTKEY));
+                    int32_t *p_partkey = reinterpret_cast<int32_t *>(
+                        tld.part[PART::P_PARTKEY].array()->buffers[1]->mutable_data());
+                    for(int64_t i = 0; i < tld.part_to_generate; i++)
+                    {
+                        p_partkey[i] = (tld.partkey_start + i + 1);
+                        ARROW_DCHECK(1 <= p_partkey[i] && p_partkey[i] <= part_rows_to_generate_);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status P_NAME(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_NAME].kind() == Datum::NONE)
+                {
+                    std::uniform_int_distribution<uint8_t> dist(0, static_cast<uint8_t>(kNumNameParts - 1));
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> offset_buff, AllocateBuffer((tld.part_to_generate + 1) * sizeof(int32_t)));
+                    int32_t *offsets = reinterpret_cast<int32_t *>(offset_buff->mutable_data());
+                    offsets[0] = 0;
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        size_t string_length = 0;
+                        for(int ipart = 0; ipart < 5; ipart++)
+                        {
+                            uint8_t name_part_index = dist(tld.rng);
+                            tld.string_indices[irow * 5 + ipart] = name_part_index;
+                            string_length += std::strlen(NameParts[name_part_index]);
+                        }
+                        // Add 4 because there is a space between each word (i.e. four spaces)
+                        offsets[irow + 1] = offsets[irow] + string_length + 4;
+                    }
+                    // Add an extra byte for the space after in the very last string.
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> string_buffer, AllocateBuffer(offsets[tld.part_to_generate] + 1));
+                    char *strings = reinterpret_cast<char *>(string_buffer->mutable_data());
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        char *row = strings + offsets[irow];
+                        for(int ipart = 0; ipart < 5; ipart++)
+                        {
+                            uint8_t name_part_index = tld.string_indices[irow * 5 + ipart];
+                            const char *part = NameParts[name_part_index];
+                            size_t length = std::strlen(part);
+                            std::memcpy(row, part, length);
+                            row += length;
+                            *row++ = ' ';
+                        }
+                    }
+                    ArrayData ad(part_types_[PART::P_NAME], tld.part_to_generate, { nullptr, std::move(string_buffer), std::move(offset_buff) });
+                    Datum datum(ad);
+                    tld.part[PART::P_NAME] = std::move(datum);
+                }
+                return Status::OK();
+            }
+
+            Status P_MFGR(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_MFGR].kind() == Datum::NONE)
+                {
+                    std::uniform_int_distribution<int> dist(1, 5);
+                    const char *manufacturer = "Manufacturer#";
+                    const size_t manufacturer_length = std::strlen(manufacturer);
+                    RETURN_NOT_OK(AllocatePartBatch(thread_index, PART::P_MFGR));
+                    char *p_mfgr = reinterpret_cast<char *>(tld.part[PART::P_MFGR].array()->buffers[1]->mutable_data());
+                    int32_t byte_width = arrow::internal::GetByteWidth(*part_types_[PART::P_MFGR]);
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        std::strncpy(p_mfgr + byte_width * irow, manufacturer, byte_width);
+                        char mfgr_id = '0' + dist(tld.rng);
+                        *(p_mfgr + byte_width * irow + manufacturer_length) = mfgr_id;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status P_BRAND(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_MFGR].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(P_MFGR(thread_index));
+                    std::uniform_int_distribution<int> dist(1, 5);
+                    const char *brand = "Brand#";
+                    const size_t brand_length = std::strlen(brand);
+                    RETURN_NOT_OK(AllocatePartBatch(thread_index, PART::P_BRAND));
+                    const char *p_mfgr = reinterpret_cast<const char *>(
+                        tld.part[PART::P_MFGR].array()->buffers[1]->data());
+                    char *p_brand = reinterpret_cast<char *>(tld.part[PART::P_BRAND].array()->buffers[1]->mutable_data());
+                    int32_t byte_width = arrow::internal::GetByteWidth(*part_types_[PART::P_BRAND]);
+                    int32_t mfgr_byte_width = arrow::internal::GetByteWidth(*part_types_[PART::P_MFGR]);
+                    const size_t mfgr_id_offset = std::strlen("Manufacturer#");
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        char mfgr_id = *(p_mfgr + irow * mfgr_byte_width + mfgr_id_offset);
+                        char brand_id = '0' + dist(tld.rng);
+                        std::strncpy(p_brand + byte_width * irow, brand, byte_width);
+                        *(p_brand + byte_width * irow + brand_length) = mfgr_id;
+                        *(p_brand + byte_width * irow + brand_length + 1) = brand_id;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status P_TYPE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_TYPE].kind() == Datum::NONE)
+                {
+                    using D = std::uniform_int_distribution<uint8_t>;
+                    D dists[] =
+                    {
+                        D{ 0, static_cast<uint8_t>(kNumTypes_1 - 1) },
+                        D{ 0, static_cast<uint8_t>(kNumTypes_2 - 1) },
+                        D{ 0, static_cast<uint8_t>(kNumTypes_3 - 1) },
+                    };
+
+                    const char **types[] = { Types_1, Types_2, Types_3 };
+
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> offset_buff, AllocateBuffer((tld.part_to_generate + 1) * sizeof(int32_t)));
+                    int32_t *offsets = reinterpret_cast<int32_t *>(offset_buff->mutable_data());
+                    offsets[0] = 0;
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        size_t string_length = 0;
+                        for(int ipart = 0; ipart < 3; ipart++)
+                        {
+                            uint8_t name_part_index = dists[ipart](tld.rng);
+                            tld.string_indices[irow * 3 + ipart] = name_part_index;
+                            string_length += std::strlen(types[ipart][name_part_index]);
+                        }
+                        // Add 4 because there is a space between each word (i.e. 2 spaces)
+                        offsets[irow + 1] = offsets[irow] + string_length + 2;
+                    }
+                    // Add an extra byte for the space after in the very last string.
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> string_buffer, AllocateBuffer(offsets[tld.part_to_generate] + 1));
+                    char *strings = reinterpret_cast<char *>(string_buffer->mutable_data());
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        char *row = strings + offsets[irow];
+                        for(int ipart = 0; ipart < 3; ipart++)
+                        {
+                            uint8_t name_part_index = tld.string_indices[irow * 3 + ipart];
+                            const char *part = types[ipart][name_part_index];
+                            size_t length = std::strlen(part);
+                            std::memcpy(row, part, length);
+                            row += length;
+                            *row++ = ' ';
+                        }
+                    }
+                    ArrayData ad(part_types_[PART::P_TYPE], tld.part_to_generate, { nullptr, std::move(string_buffer), std::move(offset_buff) });
+                    Datum datum(ad);
+                    tld.part[PART::P_TYPE] = std::move(datum);
+                }
+                return Status::OK();
+            }
+
+            Status P_SIZE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_SIZE].kind() == Datum::NONE)
+                {
+                    std::uniform_int_distribution<int32_t> dist(1, 50);
+                    RETURN_NOT_OK(AllocatePartBatch(thread_index, PART::P_SIZE));
+                    int32_t *p_size = reinterpret_cast<int32_t *>(
+                        tld.part[PART::P_SIZE].array()->buffers[1]->mutable_data());
+                    for(int64_t i = 0; i < tld.part_to_generate; i++)
+                        p_size[i] = dist(tld.rng);
+                }
+                return Status::OK();
+            }
+
+            Status P_CONTAINER(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_CONTAINER].kind() == Datum::NONE)
+                {
+                    std::uniform_int_distribution<int> dist1(0, static_cast<uint8_t>(kNumContainers_1 - 1));
+                    std::uniform_int_distribution<int> dist2(0, static_cast<uint8_t>(kNumContainers_2 - 1));
+                    RETURN_NOT_OK(AllocatePartBatch(thread_index, PART::P_CONTAINER));
+                    char *p_container = reinterpret_cast<char *>(
+                        tld.part[PART::P_CONTAINER].array()->buffers[1]->mutable_data());
+                    int32_t byte_width = arrow::internal::GetByteWidth(*part_types_[PART::P_CONTAINER]);
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        int container1_idx = dist1(tld.rng);
+                        int container2_idx = dist2(tld.rng);
+                        const char *container1 = Containers_1[container1_idx];
+                        const char *container2 = Containers_2[container2_idx];
+                        size_t container1_length = std::strlen(container1);
+                        size_t container2_length = std::strlen(container2);
+
+                        char *row = p_container + byte_width * irow;
+                        // Abuse strncpy to zero out the rest of the array
+                        std::strncpy(row, container1, byte_width);
+                        row[container1_length] = ' ';
+                        std::memcpy(row + container1_length + 1, container2, container2_length);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status P_RETAILPRICE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_RETAILPRICE].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(P_PARTKEY(thread_index));
+                    RETURN_NOT_OK(AllocatePartBatch(thread_index, PART::P_RETAILPRICE));
+                    const int32_t *p_partkey = reinterpret_cast<const int32_t *>(
+                        tld.part[PART::P_PARTKEY].array()->buffers[1]->data());
+                    Decimal128 *p_retailprice = reinterpret_cast<Decimal128 *>(
+                        tld.part[PART::P_RETAILPRICE].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
+                    {
+                        int32_t partkey = p_partkey[irow];
+                        int64_t retail_price = (90000 + ((partkey / 10) % 20001) + 100 * (partkey % 1000));
+                        p_retailprice[irow] = { retail_price };
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status P_COMMENT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PART::P_COMMENT].kind() == Datum::NONE)
+                {
+                    ARROW_ASSIGN_OR_RAISE(tld.part[PART::P_COMMENT], g_text.GenerateComments(batch_size_, 5, 22, tld.rng));
+                }
+                return Status::OK();
+            }
+            
+            Status InitPartsupp(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                tld.generated_partsupp.reset();
+                tld.partsupp.clear();
+                int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
+                int64_t num_batches = (ps_to_generate + batch_size_ - 1) / batch_size_;
+                tld.partsupp.resize(num_batches);
+                for(std::vector<Datum> &batch : tld.partsupp)
+                {
+                    batch.clear();
+                    batch.resize(PARTSUPP::kNumCols);
+                }
+                return Status::OK();
+            }
+
+            Status AllocatePartSuppBatch(size_t thread_index, size_t ibatch, int column)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                int32_t byte_width = arrow::internal::GetByteWidth(*partsupp_types_[column]);
+                ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(batch_size_ * byte_width));
+                ArrayData ad(partsupp_types_[column], batch_size_, { nullptr, std::move(buff) });
+                tld.partsupp[ibatch][column] = std::move(ad);
+                return Status::OK();
+            }
+
+            Status PS_PARTKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_partsupp[PARTSUPP::PS_PARTKEY])
+                {
+                    tld.generated_partsupp[PARTSUPP::PS_PARTKEY] = true;
+                    RETURN_NOT_OK(P_PARTKEY(thread_index));
+                    const int32_t *p_partkey = reinterpret_cast<const int32_t *>(
+                        tld.part[PART::P_PARTKEY].array()->buffers[1]->data());
+
+                    size_t ibatch = 0;
+                    int64_t ipartsupp = 0;
+                    int64_t ipart = 0;
+                    int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
+                    for(int64_t irow = 0; irow < ps_to_generate; ibatch++)
+                    {
+                        RETURN_NOT_OK(AllocatePartSuppBatch(thread_index, ibatch, PARTSUPP::PS_PARTKEY));
+                        int32_t *ps_partkey = reinterpret_cast<int32_t *>(
+                            tld.partsupp[ibatch][PARTSUPP::PS_PARTKEY].array()->buffers[1]->mutable_data());
+                        int64_t next_run = std::min(batch_size_, ps_to_generate - irow);
+
+                        int64_t batch_offset = 0;
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; ipartsupp < kPartSuppRowsPerPart && irun < next_run; ipartsupp++, irun++)
+                                ps_partkey[batch_offset++] = p_partkey[ipart];
+                            if(ipartsupp == kPartSuppRowsPerPart)
+                            {
+                                ipartsupp = 0;
+                                ipart++;
+                            }
+                        }
+                        irow += next_run;
+                        tld.partsupp[ibatch][PARTSUPP::PS_PARTKEY].array()->length = batch_offset;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status PS_SUPPKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_partsupp[PARTSUPP::PS_SUPPKEY])
+                {
+                    tld.generated_partsupp[PARTSUPP::PS_SUPPKEY] = true;
+                    RETURN_NOT_OK(P_PARTKEY(thread_index));
+                    const int32_t *p_partkey = reinterpret_cast<const int32_t *>(
+                        tld.part[PART::P_PARTKEY].array()->buffers[1]->data());
+
+                    size_t ibatch = 0;
+                    int64_t ipartsupp = 0;
+                    int64_t ipart = 0;
+                    int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
+                    const int32_t S = scale_factor_ * 10000;
+                    for(int64_t irow = 0; irow < ps_to_generate; ibatch++)
+                    {
+                        RETURN_NOT_OK(AllocatePartSuppBatch(thread_index, ibatch, PARTSUPP::PS_SUPPKEY));
+                        int32_t *ps_suppkey = reinterpret_cast<int32_t *>(
+                            tld.partsupp[ibatch][PARTSUPP::PS_PARTKEY].array()->buffers[1]->mutable_data());
+                        int64_t next_run = std::min(batch_size_, ps_to_generate - irow);
+
+                        int64_t batch_offset = 0;
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; ipartsupp < kPartSuppRowsPerPart && irun < next_run; ipartsupp++, irun++)
+                            {
+                                int32_t supplier = static_cast<int32_t>(ipartsupp);
+                                int32_t partkey = p_partkey[ipart];
+                                ps_suppkey[batch_offset++] = (partkey + (supplier * ((S / 4) + (partkey - 1) / S))) % S + 1; 
+                            }
+                            if(ipartsupp == kPartSuppRowsPerPart)
+                            {
+                                ipartsupp = 0;
+                                ipart++;
+                            }
+                        }
+                        irow += next_run;
+                        tld.partsupp[ibatch][PARTSUPP::PS_SUPPKEY].array()->length = batch_offset;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status PS_AVAILQTY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_partsupp[PARTSUPP::PS_AVAILQTY])
+                {
+                    tld.generated_partsupp[PARTSUPP::PS_AVAILQTY] = true;
+                    std::uniform_int_distribution<int32_t> dist(1, 9999);
+                    int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
+                    int64_t ibatch = 0;
+                    for(int64_t irow = 0; irow < ps_to_generate; ibatch++)
+                    {
+                        RETURN_NOT_OK(AllocatePartSuppBatch(thread_index, ibatch, PARTSUPP::PS_AVAILQTY));
+                        int32_t *ps_availqty = reinterpret_cast<int32_t *>(
+                            tld.partsupp[ibatch][PARTSUPP::PS_AVAILQTY].array()->buffers[1]->mutable_data());
+                        int64_t next_run = std::min(batch_size_, ps_to_generate - irow);
+                        for(int64_t irun = 0; irun < next_run; irun++)
+                            ps_availqty[irun] = dist(tld.rng);
+
+                        tld.partsupp[ibatch][PARTSUPP::PS_AVAILQTY].array()->length = next_run;
+                        irow += next_run;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status PS_SUPPLYCOST(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_partsupp[PARTSUPP::PS_SUPPLYCOST])
+                {
+                    tld.generated_partsupp[PARTSUPP::PS_SUPPLYCOST] = true;
+                    std::uniform_int_distribution<int64_t> dist(100, 100000);
+                    int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
+                    int64_t ibatch = 0;
+                    for(int64_t irow = 0; irow < ps_to_generate; ibatch++)
+                    {
+                        RETURN_NOT_OK(AllocatePartSuppBatch(thread_index, ibatch, PARTSUPP::PS_SUPPLYCOST));
+                        Decimal128 *ps_supplycost = reinterpret_cast<Decimal128 *>(
+                            tld.partsupp[ibatch][PARTSUPP::PS_SUPPLYCOST].array()->buffers[1]->mutable_data());
+                        int64_t next_run = std::min(batch_size_, ps_to_generate - irow);
+                        for(int64_t irun = 0; irun < next_run; irun++)
+                            ps_supplycost[irun] = { dist(tld.rng) };
+
+                        tld.partsupp[ibatch][PARTSUPP::PS_AVAILQTY].array()->length = next_run;
+                        irow += next_run;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status PS_COMMENT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.part[PARTSUPP::PS_COMMENT].kind() == Datum::NONE)
+                {
+                    int64_t irow = 0;
+                    int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
+                    for(size_t ibatch = 0; ibatch < tld.partsupp.size(); ibatch++)
+                    {
+                        int64_t num_rows = std::min(batch_size_, ps_to_generate - irow);
+                        ARROW_ASSIGN_OR_RAISE(
+                            tld.partsupp[ibatch][PARTSUPP::PS_COMMENT], g_text.GenerateComments(num_rows, 49, 198, tld.rng));
+                        irow += num_rows;
+                    }
+                }
+                return Status::OK();
+            }
+
+            struct ThreadLocalData
+            {
+                std::vector<Datum> part;
+                std::vector<int8_t> string_indices;
+                int64_t part_to_generate;
+                int64_t partkey_start;
+
+                std::vector<std::vector<Datum>> partsupp;
+                std::bitset<PARTSUPP::kNumCols> generated_partsupp;
+                random::pcg32_fast rng;
+            };
+            std::vector<ThreadLocalData> thread_local_data_;
+
+            bool inited_ = false;
+            std::mutex part_output_queue_mutex_;
+            std::mutex partsupp_output_queue_mutex_;
+            std::queue<ExecBatch> part_output_queue_;
+            std::queue<ExecBatch> partsupp_output_queue_;
+            int64_t batch_size_;
+            int scale_factor_;
+            int64_t part_rows_to_generate_;
+            int64_t part_rows_generated_;
+            std::vector<int> part_cols_;
+            std::vector<int> partsupp_cols_;
+  
+            static constexpr int64_t kPartSuppRowsPerPart = 4;
+        };
+
+        class OrdersAndLineItemGenerator
+        {
+        public:
+            Status Init(
+                size_t num_threads,
+                int64_t batch_size,
+                int scale_factor)
+            {
+                if(!inited_)
+                {
+                    inited_ = true;
+                    batch_size_ = batch_size;
+                    scale_factor_ = scale_factor;
+
+                    thread_local_data_.resize(num_threads);
+                    for(ThreadLocalData &tld : thread_local_data_)
+                    {
+                        tld.items_per_order.resize(batch_size_);
+                    }
+                    orders_rows_to_generate_ = scale_factor_ * 150000 * 10;
+                }
+                return Status::OK();
+            }
+
+            Result<std::shared_ptr<Schema>> SetOrdersOutputColumns(const std::vector<std::string> &cols)
+            {
+                return SetOutputColumns(cols, orders_types_, orders_name_map_, orders_cols_);
+            }
+
+            Result<std::shared_ptr<Schema>> SetLineItemOutputColumns(const std::vector<std::string> &cols)
+            {
+                return SetOutputColumns(cols, lineitem_types_, lineitem_name_map_, lineitem_cols_);
+            }
+
+            Result<util::optional<ExecBatch>> NextOrdersBatch(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                {
+                    std::lock_guard<std::mutex> lock(orders_output_queue_mutex_);
+                    if(!orders_output_queue_.empty())
+                    {
+                        ExecBatch batch = std::move(orders_output_queue_.front());
+                        orders_output_queue_.pop();
+                        return std::move(batch);
+                    }
+                    else if(orders_rows_generated_ == orders_rows_to_generate_)
+                    {
+                        return util::nullopt;
+                    }
+                    else
+                    {
+                        tld.orderkey_start = orders_rows_generated_;
+                        tld.orders_to_generate = std::min(
+                            batch_size_,
+                            orders_rows_to_generate_ - orders_rows_generated_);
+                        orders_rows_generated_ += tld.orders_to_generate;
+                        ARROW_DCHECK(orders_rows_generated_ <= orders_rows_to_generate_);
+                    }
+                }
+                tld.orders.clear();
+                tld.orders.resize(ORDERS::kNumCols);
+                RETURN_NOT_OK(GenerateRowCounts(thread_index));
+                tld.first_batch_offset = 0;
+                tld.generated_lineitem.reset();
+
+                for(int col : orders_cols_)
+                    RETURN_NOT_OK(orders_generators_[col](thread_index));
+                for(int col : lineitem_cols_)
+                    RETURN_NOT_OK(lineitem_generators_[col](thread_index));
+
+                std::vector<Datum> orders_result(orders_cols_.size());
+                for(size_t i = 0; i < orders_cols_.size(); i++)
+                {
+                    int col_idx = orders_cols_[i];
+                    orders_result[i] = tld.orders[col_idx];
+                }
+                if(!lineitem_cols_.empty())
+                {
+                    std::vector<ExecBatch> lineitem_results;
+                    for(size_t ibatch = 0; ibatch < tld.lineitem.size(); ibatch++)
+                    {
+                        std::vector<Datum> lineitem_result(lineitem_cols_.size());
+                        for(size_t icol = 0; icol < lineitem_cols_.size(); icol++)
+                        {
+                            int col_idx = lineitem_cols_[icol];
+                            lineitem_result[icol] = tld.lineitem[ibatch][col_idx];
+                        }
+                        ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(lineitem_result)));
+                        lineitem_results.emplace_back(std::move(eb));
+                    }
+                    {
+                        std::lock_guard<std::mutex> guard(lineitem_output_queue_mutex_);
+                        for(ExecBatch &eb : lineitem_results)
+                        {
+                            lineitem_output_queue_.emplace(std::move(eb));
+                        }
+                    }
+                }
+                return ExecBatch::Make(std::move(orders_result));
+            }
+
+            Result<util::optional<ExecBatch>> NextLineItemBatch(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                ExecBatch queued;
+                bool from_queue = false;
+                {
+                    std::lock_guard<std::mutex> lock(lineitem_output_queue_mutex_);
+                    if(!lineitem_output_queue_.empty())
+                    {
+                        queued = std::move(lineitem_output_queue_.front());
+                        lineitem_output_queue_.pop();
+                        from_queue = true;
+                    }
+                }
+                tld.first_batch_offset = 0;
+                if(from_queue)
+                {
+                    ARROW_DCHECK(queued.length <= batch_size_);
+                    tld.first_batch_offset = queued.length;
+                    if(queued.length == batch_size_)
+                        return std::move(queued);
+                }
+                {
+                    std::lock_guard<std::mutex> lock(orders_output_queue_mutex_);
+                    tld.orderkey_start = orders_rows_generated_;
+                    tld.orders_to_generate = std::min(
+                        batch_size_,
+                        orders_rows_to_generate_ - orders_rows_generated_);
+                    orders_rows_generated_ += tld.orders_to_generate;
+                    ARROW_DCHECK(orders_rows_generated_ <= orders_rows_to_generate_);
+                    if(orders_rows_generated_ == orders_rows_to_generate_)
+                    {
+                        if(from_queue)
+                            return std::move(queued);
+                        return util::nullopt;
+                    }
+                }
+                tld.orders.clear();
+                tld.orders.resize(ORDERS::kNumCols);
+                RETURN_NOT_OK(GenerateRowCounts(thread_index));
+                tld.generated_lineitem.reset();
+                if(from_queue)
+                {
+                    for(size_t i = 0; i < lineitem_cols_.size(); i++)
+                        if(tld.lineitem[0][lineitem_cols_[i]].kind() == Datum::NONE)
+                            tld.lineitem[0][lineitem_cols_[i]] = std::move(queued[i]);
+                }
+
+                for(int col : orders_cols_)
+                    RETURN_NOT_OK(orders_generators_[col](thread_index));
+                for(int col : lineitem_cols_)
+                    RETURN_NOT_OK(lineitem_generators_[col](thread_index));
+
+                if(!orders_cols_.empty())
+                {
+                    std::vector<Datum> orders_result(orders_cols_.size());
+                    for(size_t i = 0; i < orders_cols_.size(); i++)
+                    {
+                        int col_idx = orders_cols_[i];
+                        orders_result[i] = tld.orders[col_idx];
+                    }
+                    ARROW_ASSIGN_OR_RAISE(ExecBatch orders_batch, ExecBatch::Make(std::move(orders_result)));
+                    {
+                        std::lock_guard<std::mutex> lock(orders_output_queue_mutex_);
+                        orders_output_queue_.emplace(std::move(orders_batch));
+                    }
+                }
+                std::vector<ExecBatch> lineitem_results;
+                for(size_t ibatch = 0; ibatch < tld.lineitem.size(); ibatch++)
+                {
+                    std::vector<Datum> lineitem_result(lineitem_cols_.size());
+                    for(size_t icol = 0; icol < lineitem_cols_.size(); icol++)
+                    {
+                        int col_idx = lineitem_cols_[icol];
+                        lineitem_result[icol] = tld.lineitem[ibatch][col_idx];
+                    }
+                    ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(lineitem_result)));
+                    lineitem_results.emplace_back(std::move(eb));
+                }
+                // Return the first batch, enqueue the rest.
+                {
+                    std::lock_guard<std::mutex> lock(lineitem_output_queue_mutex_);
+                    for(size_t i = 1; i < lineitem_results.size(); i++)
+                        lineitem_output_queue_.emplace(std::move(lineitem_results[i]));
+                }
+                return std::move(lineitem_results[0]);
+            }
+
+        private:
+#define FOR_EACH_ORDERS_COLUMN(F)               \
+            F(O_ORDERKEY)                       \
+            F(O_CUSTKEY)                        \
+            F(O_ORDERSTATUS)                    \
+            F(O_TOTALPRICE)                     \
+            F(O_ORDERDATE)                      \
+            F(O_ORDERPRIORITY)                  \
+            F(O_CLERK)                          \
+            F(O_SHIPPRIORITY)                   \
+            F(O_COMMENT)
+
+#define FOR_EACH_LINEITEM_COLUMN(F)             \
+            F(L_ORDERKEY)                       \
+            F(L_PARTKEY)                        \
+            F(L_SUPPKEY)                        \
+            F(L_LINENUMBER)                     \
+            F(L_QUANTITY)                       \
+            F(L_EXTENDEDPRICE)                  \
+            F(L_DISCOUNT)                       \
+            F(L_TAX)                            \
+            F(L_RETURNFLAG)                     \
+            F(L_LINESTATUS)                     \
+            F(L_SHIPDATE)                       \
+            F(L_COMMITDATE)                     \
+            F(L_RECEIPTDATE)                    \
+            F(L_SHIPINSTRUCT)                   \
+            F(L_SHIPMODE)                       \
+            F(L_COMMENT)
+
+#define MAKE_ENUM(col) col,
+            struct ORDERS
+            {
+                enum
+                {
+                    FOR_EACH_ORDERS_COLUMN(MAKE_ENUM)
+                    kNumCols,
+                };
+            };
+            struct LINEITEM
+            {
+                enum
+                {
+                    FOR_EACH_LINEITEM_COLUMN(MAKE_ENUM)
+                    kNumCols,
+                };
+            };
+
+#define MAKE_STRING_MAP(col)                            \
+            { #col, ORDERS::col },
+            const std::unordered_map<std::string, int> orders_name_map_ =
+            {
+                FOR_EACH_ORDERS_COLUMN(MAKE_STRING_MAP)
+            };
+#undef MAKE_STRING_MAP
+#define MAKE_STRING_MAP(col)                            \
+            { #col, LINEITEM::col },
+            const std::unordered_map<std::string, int> lineitem_name_map_ =
+            {
+                FOR_EACH_LINEITEM_COLUMN(MAKE_STRING_MAP)
+            };
+#undef MAKE_STRING_MAP
+#define MAKE_FN_ARRAY(col)                                              \
+            [this](size_t thread_index) { return this->col(thread_index); },
+            std::vector<GenerateColumnFn> orders_generators_ =
+            {
+                FOR_EACH_ORDERS_COLUMN(MAKE_FN_ARRAY)
+            };
+            std::vector<GenerateColumnFn> lineitem_generators_ =
+            {
+                FOR_EACH_LINEITEM_COLUMN(MAKE_FN_ARRAY)
+            };
+#undef MAKE_FN_ARRAY
+#undef FOR_EACH_LINEITEM_COLUMN
+#undef FOR_EACH_ORDERS_COLUMN
+
+            const std::vector<std::shared_ptr<DataType>> orders_types_ =
+            {
+                int32(),
+                int32(),
+                fixed_size_binary(1),
+                decimal(12, 2),
+                date32(),
+                fixed_size_binary(15),
+                fixed_size_binary(15),
+                int32(),
+                utf8()
+            };
+
+            const std::vector<std::shared_ptr<DataType>> lineitem_types_ =
+            {
+                int32(),
+                int32(),
+                int32(),
+                int32(),
+                decimal(12, 2),
+                decimal(12, 2),
+                decimal(12, 2),
+                decimal(12, 2),
+                fixed_size_binary(1),
+                fixed_size_binary(1),
+                date32(),
+                date32(),
+                date32(),
+                fixed_size_binary(25),
+                fixed_size_binary(10),
+                utf8(),
+            };
+
+            Status AllocateOrdersBatch(size_t thread_index, int column)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                ARROW_DCHECK(tld.orders[column].kind() == Datum::NONE);
+                int32_t byte_width = arrow::internal::GetByteWidth(*orders_types_[column]);
+                ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(tld.orders_to_generate * byte_width));
+                ArrayData ad(orders_types_[column], tld.orders_to_generate, { nullptr, std::move(buff) });
+                tld.orders[column] = std::move(ad);
+                return Status::OK();
+            }
+
+            Status O_ORDERKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_ORDERKEY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_ORDERKEY));
+                    int32_t *o_orderkey = reinterpret_cast<int32_t *>(
+                        tld.orders[ORDERS::O_ORDERKEY].array()->buffers[1]->mutable_data());
+                    for(int64_t i = 0; i < tld.orders_to_generate; i++)
+                    {
+                        o_orderkey[i] = (tld.orderkey_start + i + 1);
+                        ARROW_DCHECK(1 <= o_orderkey[i] && o_orderkey[i] <= orders_rows_to_generate_);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status O_CUSTKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_CUSTKEY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_CUSTKEY));
+
+                    // Spec says it must be a random number between 1 and SF*150000 that is not
+                    // divisible by 3. Rather than repeatedly generating numbers until we get to
+                    // a non-divisible-by-3 number, we just generate a number between
+                    // 0 and SF * 50000 - 1, multiply by 3, and then add either 1 or 2. 
+                    std::uniform_int_distribution<int32_t> base_dist(0, scale_factor_ * 50000 - 1);
+                    std::uniform_int_distribution<int32_t> offset_dist(1, 2);
+                    int32_t *o_custkey = reinterpret_cast<int32_t *>(
+                        tld.orders[ORDERS::O_CUSTKEY].array()->buffers[1]->mutable_data());
+                    for(int64_t i = 0; i < tld.orders_to_generate; i++)
+                        o_custkey[i] = 3 * base_dist(tld.rng) + offset_dist(tld.rng);
+                }
+                return Status::OK();
+            }
+
+            Status O_ORDERSTATUS(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_ORDERSTATUS].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(L_LINESTATUS(thread_index));
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_ORDERSTATUS));
+
+                    char *o_orderstatus = reinterpret_cast<char *>(
+                        tld.orders[ORDERS::O_ORDERSTATUS].array()->buffers[1]->mutable_data());
+
+                    size_t batch_offset = tld.first_batch_offset;
+                    size_t ibatch = 0;
+                    size_t iorder = 0;
+                    int32_t iline = 0;
+                    bool all_f = true;
+                    bool all_o = true;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        const char *l_linestatus = reinterpret_cast<const char *>(
+                            tld.lineitem[ibatch][LINEITEM::L_LINESTATUS].array()->buffers[1]->data());
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; iline < tld.items_per_order[iorder] && irun < next_run; iline++, irun++, batch_offset++)
+                            {
+                                all_f &= l_linestatus[batch_offset] == 'F';
+                                all_o &= l_linestatus[batch_offset] == 'O';
+                            }
+                            if(iline == tld.items_per_order[iorder])
+                            {
+                                iline = 0;
+                                ARROW_DCHECK(!(all_f && all_o));
+                                if(all_f)
+                                    o_orderstatus[iorder] = 'F';
+                                else if(all_o)
+                                    o_orderstatus[iorder] = 'O';
+                                else
+                                    o_orderstatus[iorder] = 'P';
+                                iorder++;
+                            }
+                        }
+                        irow += next_run;
+                        batch_offset = 0;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status O_TOTALPRICE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_TOTALPRICE].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(L_EXTENDEDPRICE(thread_index));
+                    RETURN_NOT_OK(L_TAX(thread_index));
+                    RETURN_NOT_OK(L_DISCOUNT(thread_index));
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_TOTALPRICE));
+
+                    size_t batch_offset = tld.first_batch_offset;
+                    size_t ibatch = 0;
+                    size_t iorder = 0;
+                    int32_t iline = 0;
+                    int64_t sum = 0;
+                    Decimal128 *o_totalprice = reinterpret_cast<Decimal128 *>(
+                        tld.orders[ORDERS::O_TOTALPRICE].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+
+                        const Decimal128 *l_extendedprice = reinterpret_cast<const Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_EXTENDEDPRICE].array()->buffers[1]->data());
+                        const Decimal128 *l_tax = reinterpret_cast<const Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_TAX].array()->buffers[1]->data());
+                        const Decimal128 *l_discount = reinterpret_cast<const Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_DISCOUNT].array()->buffers[1]->data());
+
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; iline < tld.items_per_order[iorder] && irun < next_run; iline++, irun++, batch_offset++)
+                            {
+                                int64_t eprice = static_cast<int64_t>(l_extendedprice[batch_offset]);
+                                int64_t tax = static_cast<int64_t>(l_tax[batch_offset]);
+                                int64_t discount = static_cast<int64_t>(l_discount[batch_offset]);
+                                sum += (eprice * (100 + tax) * (100 - discount));
+                            }
+                            if(iline == tld.items_per_order[iorder])
+                            {
+                                sum /= 100 * 100;
+                                o_totalprice[iorder] = { sum };
+                                iline = 0;
+                                iorder++;
+                            }
+                        }
+                        irow += next_run;
+                        batch_offset = 0;
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status O_ORDERDATE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_ORDERDATE].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_ORDERDATE));
+
+                    std::uniform_int_distribution<uint32_t> dist(STARTDATE, ENDDATE - 151);
+                    uint32_t *o_orderdate = reinterpret_cast<uint32_t *>(
+                        tld.orders[ORDERS::O_ORDERDATE].array()->buffers[1]->mutable_data());
+                    for(int64_t i = 0; i < tld.orders_to_generate; i++)
+                        o_orderdate[i] = dist(tld.rng);
+                }
+                return Status::OK();
+            }
+
+            Status O_ORDERPRIORITY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_ORDERPRIORITY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_ORDERPRIORITY));
+                    int32_t byte_width = arrow::internal::GetByteWidth(*orders_types_[ORDERS::O_ORDERPRIORITY]);
+                    std::uniform_int_distribution<int32_t> dist(0, kNumPriorities - 1);
+                    char *o_orderpriority = reinterpret_cast<char *>(
+                        tld.orders[ORDERS::O_ORDERPRIORITY].array()->buffers[1]->mutable_data());
+                    for(int64_t i = 0; i < tld.orders_to_generate; i++)
+                    {
+                        const char *str = Priorities[dist(tld.rng)];
+                        std::strncpy(o_orderpriority + i * byte_width, str, byte_width);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status O_CLERK(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_CLERK].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_CLERK));
+                    int32_t byte_width = arrow::internal::GetByteWidth(*orders_types_[ORDERS::O_CLERK]);
+                    std::uniform_int_distribution<int64_t> dist(1, scale_factor_ * 1000);
+                    char *o_clerk = reinterpret_cast<char *>(
+                        tld.orders[ORDERS::O_CLERK].array()->buffers[1]->mutable_data());
+                    for(int64_t i = 0; i < tld.orders_to_generate; i++)
+                    {
+                        const char *clerk = "Clerk#";
+                        const size_t clerk_length = std::strlen(clerk);
+                        int64_t clerk_number = dist(tld.rng);
+                        char *output = o_clerk + i * byte_width;
+                        std::strncpy(output, clerk, byte_width);
+                        AppendNumberPaddedToNineDigits(output + clerk_length, clerk_number);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status O_SHIPPRIORITY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_SHIPPRIORITY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_SHIPPRIORITY));
+                    int32_t *o_shippriority = reinterpret_cast<int32_t *>(
+                        tld.orders[ORDERS::O_SHIPPRIORITY].array()->buffers[1]->mutable_data());
+                    std::memset(o_shippriority, 0, tld.orders_to_generate * sizeof(int32_t));
+                }
+                return Status::OK();
+            }
+
+            Status O_COMMENT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.orders[ORDERS::O_COMMENT].kind() == Datum::NONE)
+                {
+                    ARROW_ASSIGN_OR_RAISE(tld.orders[ORDERS::O_COMMENT], g_text.GenerateComments(batch_size_, 19, 78, tld.rng));
+                }
+                return Status::OK();
+            }
+
+            Status GenerateRowCounts(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                std::uniform_int_distribution<int> length_dist(1, 7);
+                tld.lineitem_to_generate = 0;
+                tld.items_per_order.clear();
+                for(int64_t i = 0; i < tld.orders_to_generate; i++)
+                {
+                    int64_t length = length_dist(tld.rng);
+                    tld.items_per_order.push_back(length);
+                    tld.lineitem_to_generate += length;
+                }
+                size_t num_batches = (tld.first_batch_offset + tld.lineitem_to_generate + batch_size_ - 1) / batch_size_;
+                tld.lineitem.clear();
+                tld.lineitem.resize(num_batches);
+                for(std::vector<Datum> &batch : tld.lineitem)
+                {
+                    batch.clear();
+                    batch.resize(LINEITEM::kNumCols);
+                }
+                return Status::OK();
+            }
+
+            Status AllocateLineItemBufferIfNeeded(size_t thread_index, size_t ibatch, int column, size_t &out_batch_offset)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.lineitem[ibatch][column].kind() == Datum::NONE)
+                {
+                    int32_t byte_width = arrow::internal::GetByteWidth(*lineitem_types_[column]);
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(batch_size_ * byte_width));
+                    ArrayData ad(lineitem_types_[column], batch_size_, { nullptr, std::move(buff) });
+                    tld.lineitem[ibatch][column] = std::move(ad);
+                    out_batch_offset = 0;
+                }
+                if(ibatch == 0)
+                    out_batch_offset = tld.first_batch_offset;
+                return Status::OK();
+            }
+
+            Status L_ORDERKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_ORDERKEY])
+                {
+                    tld.generated_lineitem[LINEITEM::L_ORDERKEY] = true;
+                    RETURN_NOT_OK(O_ORDERKEY(thread_index));
+                    const int32_t *o_orderkey = reinterpret_cast<const int32_t *>(
+                        tld.orders[ORDERS::O_ORDERKEY].array()->buffers[1]->data());
+
+                    size_t ibatch = 0;
+                    size_t iorder = 0;
+                    int32_t iline = 0;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_ORDERKEY, batch_offset));
+                        int32_t *l_linenumber = reinterpret_cast<int32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_ORDERKEY].array()->buffers[1]->mutable_data());
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; iline < tld.items_per_order[iorder] && irun < next_run; iline++, irun++)
+                                l_linenumber[batch_offset++] = o_orderkey[iorder];
+                            if(iline == tld.items_per_order[iorder])
+                            {
+                                iline = 0;
+                                iorder++;
+                            }
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_ORDERKEY].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_PARTKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_PARTKEY])
+                {
+                    tld.generated_lineitem[LINEITEM::L_PARTKEY] = true;
+
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<int32_t> dist(1, scale_factor_ * 200000);
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_PARTKEY, batch_offset));
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        int32_t *l_partkey = reinterpret_cast<int32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_PARTKEY].array()->buffers[1]->mutable_data());
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                            l_partkey[batch_offset] = dist(tld.rng);
+
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_PARTKEY].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_SUPPKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_SUPPKEY])
+                {
+                    tld.generated_lineitem[LINEITEM::L_SUPPKEY] = true;
+                    RETURN_NOT_OK(L_PARTKEY(thread_index));
+
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<int32_t> dist(0, 3);
+                    const int32_t S = scale_factor_ * 10000;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset = 0;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_SUPPKEY, batch_offset));
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        int32_t *l_suppkey = reinterpret_cast<int32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_SUPPKEY].array()->buffers[1]->mutable_data());
+                        const int32_t *l_partkey = reinterpret_cast<const int32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_PARTKEY].array()->buffers[1]->data());
+                        for(int64_t i = 0; i < next_run; i++)
+                        {
+                            int32_t supplier = dist(tld.rng);
+                            int32_t partkey = l_partkey[batch_offset];
+                            // Fun fact: the parentheses for this expression are unbalanced in the TPC-H spec.
+                            l_suppkey[batch_offset++] = (partkey + (supplier * ((S / 4) + (partkey - 1) / S))) % S + 1; 
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_SUPPKEY].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_LINENUMBER(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_LINENUMBER])
+                {
+                    tld.generated_lineitem[LINEITEM::L_LINENUMBER] = true;
+                    size_t ibatch = 0;
+                    size_t iorder = 0;
+                    int32_t iline = 0;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_LINENUMBER, batch_offset));
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        int32_t *l_linenumber = reinterpret_cast<int32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_LINENUMBER].array()->buffers[1]->mutable_data());
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; iline < tld.items_per_order[iorder] && irun < next_run; iline++, irun++)
+                            {
+                                l_linenumber[batch_offset++] = (iline + 1);
+                                ARROW_DCHECK(1 <= (iline + 1) && (iline + 1) <= 7);
+                            }
+                            if(iline == tld.items_per_order[iorder])
+                            {
+                                iline = 0;
+                                iorder++;
+                            }
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_LINENUMBER].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_QUANTITY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_QUANTITY])
+                {
+                    tld.generated_lineitem[LINEITEM::L_QUANTITY] = true;
+
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<int64_t> dist(1, 50);
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_QUANTITY, batch_offset));
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        Decimal128 *l_quantity = reinterpret_cast<Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_QUANTITY].array()->buffers[1]->mutable_data());
+                        for(int64_t i = 0; i < next_run; i++)
+                        {
+                            // Multiply by 100 because the type is decimal(12, 2), so the decimal goes after two digits
+                            int64_t quantity = dist(tld.rng) * 100;
+                            l_quantity[batch_offset++] = { quantity };
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_QUANTITY].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_EXTENDEDPRICE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_EXTENDEDPRICE])
+                {
+                    tld.generated_lineitem[LINEITEM::L_EXTENDEDPRICE] = true;
+                    RETURN_NOT_OK(L_PARTKEY(thread_index));
+                    RETURN_NOT_OK(L_QUANTITY(thread_index));
+                    size_t ibatch = 0;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_EXTENDEDPRICE, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        const int32_t *l_partkey = reinterpret_cast<const int32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_PARTKEY].array()->buffers[1]->data());
+                        const Decimal128 *l_quantity = reinterpret_cast<const Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_QUANTITY].array()->buffers[1]->data());
+                        Decimal128 *l_extendedprice = reinterpret_cast<Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_EXTENDEDPRICE].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                        {
+                            int64_t partkey = static_cast<int64_t>(l_partkey[batch_offset]);
+                            // Divide by 100 to recover the integer representation (not Decimal).
+                            int64_t quantity = static_cast<int64_t>(l_quantity[batch_offset]) / 100;
+
+                            // Spec says to divide by 100, but that happens automatically due to this being stored
+                            // to two decimal points.
+                            int64_t retail_price = (90000 + ((partkey / 10) % 20001) + 100 * (partkey % 1000));
+                            int64_t extended_price = retail_price * quantity;
+                            l_extendedprice[batch_offset] = { extended_price };
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_EXTENDEDPRICE].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_DISCOUNT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_DISCOUNT])
+                {
+                    tld.generated_lineitem[LINEITEM::L_DISCOUNT] = true;
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<int32_t> dist(0, 10);
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_DISCOUNT, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        Decimal128 *l_discount = reinterpret_cast<Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_DISCOUNT].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                            l_discount[batch_offset] = { dist(tld.rng) };
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_DISCOUNT].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_TAX(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_TAX])
+                {
+                    tld.generated_lineitem[LINEITEM::L_TAX] = true;
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<int32_t> dist(0, 8);
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_TAX, batch_offset));
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        Decimal128 *l_tax = reinterpret_cast<Decimal128 *>(
+                            tld.lineitem[ibatch][LINEITEM::L_TAX].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                            l_tax[batch_offset] = { dist(tld.rng) };
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_TAX].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_RETURNFLAG(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_RETURNFLAG])
+                {
+                    tld.generated_lineitem[LINEITEM::L_RETURNFLAG] = true;
+                    RETURN_NOT_OK(L_RECEIPTDATE(thread_index));
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<uint32_t> dist;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_RETURNFLAG, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        char *l_returnflag = reinterpret_cast<char *>(
+                            tld.lineitem[ibatch][LINEITEM::L_RETURNFLAG].array()->buffers[1]->mutable_data());
+                        const uint32_t *l_receiptdate = reinterpret_cast<const uint32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_RECEIPTDATE].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                        {
+                            if(l_receiptdate[batch_offset] <= CURRENTDATE)
+                            {
+                                uint32_t r = dist(tld.rng);
+                                l_returnflag[batch_offset] = (r % 2 == 1) ? 'R' : 'A';
+                            }
+                            else
+                            {
+                                l_returnflag[batch_offset] = 'N';
+                            }
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_RETURNFLAG].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_LINESTATUS(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_LINESTATUS])
+                {
+                    tld.generated_lineitem[LINEITEM::L_LINESTATUS] = true;
+                    RETURN_NOT_OK(L_SHIPDATE(thread_index));
+                    size_t ibatch = 0;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_LINESTATUS, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        char *l_linestatus = reinterpret_cast<char *>(
+                            tld.lineitem[ibatch][LINEITEM::L_LINESTATUS].array()->buffers[1]->mutable_data());
+                        const uint32_t *l_shipdate = reinterpret_cast<const uint32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_SHIPDATE].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                        {
+                            if(l_shipdate[batch_offset] > CURRENTDATE)
+                                l_linestatus[batch_offset] = 'O';
+                            else
+                                l_linestatus[batch_offset] = 'F';
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_LINESTATUS].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_SHIPDATE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_SHIPDATE])
+                {
+                    tld.generated_lineitem[LINEITEM::L_SHIPDATE] = true;
+                    RETURN_NOT_OK(O_ORDERDATE(thread_index));
+                    const int32_t *o_orderdate = reinterpret_cast<const int32_t *>(
+                        tld.orders[ORDERS::O_ORDERDATE].array()->buffers[1]->data());
+                    std::uniform_int_distribution<uint32_t> dist(1, 121);
+                    size_t ibatch = 0;
+                    size_t iorder = 0;
+                    int32_t iline = 0;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_SHIPDATE, batch_offset));
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        uint32_t *l_shipdate = reinterpret_cast<uint32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_SHIPDATE].array()->buffers[1]->mutable_data());
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; iline < tld.items_per_order[iorder] && irun < next_run; iline++, irun++)
+                                l_shipdate[batch_offset++] = o_orderdate[iorder] + dist(tld.rng);
+                            if(iline == tld.items_per_order[iorder])
+                            {
+                                iline = 0;
+                                iorder++;
+                            }
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_SHIPDATE].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_COMMITDATE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_COMMITDATE])
+                {
+                    tld.generated_lineitem[LINEITEM::L_COMMITDATE] = true;
+                    const int32_t *o_orderdate = reinterpret_cast<const int32_t *>(
+                        tld.orders[ORDERS::O_ORDERDATE].array()->buffers[1]->data());
+                    std::uniform_int_distribution<uint32_t> dist(30, 90);
+                    size_t ibatch = 0;
+                    size_t iorder = 0;
+                    int32_t iline = 0;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_COMMITDATE, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        uint32_t *l_commitdate = reinterpret_cast<uint32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_COMMITDATE].array()->buffers[1]->mutable_data());
+                        for(int64_t irun = 0; irun < next_run;)
+                        {
+                            for(; iline < tld.items_per_order[iorder] && irun < next_run; iline++, irun++)
+                                l_commitdate[batch_offset++] = o_orderdate[iorder] + dist(tld.rng);
+                            if(iline == tld.items_per_order[iorder])
+                            {
+                                iline = 0;
+                                iorder++;
+                            }
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_COMMITDATE].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_RECEIPTDATE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_RECEIPTDATE])
+                {
+                    tld.generated_lineitem[LINEITEM::L_RECEIPTDATE] = true;
+                    RETURN_NOT_OK(L_SHIPDATE(thread_index));
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<int32_t> dist(1, 30);
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_RECEIPTDATE, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        uint32_t *l_receiptdate = reinterpret_cast<uint32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_RECEIPTDATE].array()->buffers[1]->mutable_data());
+                        const uint32_t *l_shipdate = reinterpret_cast<const uint32_t *>(
+                            tld.lineitem[ibatch][LINEITEM::L_SHIPDATE].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                            l_receiptdate[batch_offset] = l_shipdate[batch_offset] + dist(tld.rng);
+
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_RECEIPTDATE].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_SHIPINSTRUCT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_SHIPINSTRUCT])
+                {
+                    tld.generated_lineitem[LINEITEM::L_SHIPINSTRUCT] = true;
+                    int32_t byte_width = arrow::internal::GetByteWidth(*lineitem_types_[LINEITEM::L_SHIPINSTRUCT]);
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<size_t> dist(0, kNumInstructions - 1);
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_SHIPINSTRUCT, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        char *l_shipinstruct = reinterpret_cast<char *>(
+                            tld.lineitem[ibatch][LINEITEM::L_SHIPINSTRUCT].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                        {
+                            const char *str = Instructions[dist(tld.rng)];
+                            // Note that we don't have to memset the buffer to 0 because strncpy pads each string
+                            // with 0's anyway
+                            std::strncpy(l_shipinstruct + batch_offset * byte_width, str, byte_width);
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_SHIPINSTRUCT].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_SHIPMODE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_SHIPMODE])
+                {
+                    tld.generated_lineitem[LINEITEM::L_SHIPMODE] = true;
+                    int32_t byte_width = arrow::internal::GetByteWidth(*lineitem_types_[LINEITEM::L_SHIPMODE]);
+                    size_t ibatch = 0;
+                    std::uniform_int_distribution<size_t> dist(0, kNumModes - 1);
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        size_t batch_offset;
+                        RETURN_NOT_OK(AllocateLineItemBufferIfNeeded(thread_index, ibatch, LINEITEM::L_SHIPMODE, batch_offset));
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+                        char *l_shipmode = reinterpret_cast<char *>(
+                            tld.lineitem[ibatch][LINEITEM::L_SHIPMODE].array()->buffers[1]->mutable_data());
+
+                        for(int64_t i = 0; i < next_run; i++, batch_offset++)
+                        {
+                            const char *str = Modes[dist(tld.rng)];
+                            std::strncpy(l_shipmode + batch_offset * byte_width, str, byte_width);
+                        }
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_SHIPMODE].array()->length = static_cast<int64_t>(batch_offset);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status L_COMMENT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(!tld.generated_lineitem[LINEITEM::L_COMMENT])
+                {
+                    tld.generated_lineitem[LINEITEM::L_COMMENT] = true;
+
+                    size_t batch_offset = tld.first_batch_offset;
+                    size_t ibatch = 0;
+                    for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
+                    {
+                        // Comments are kind of sneaky: we always generate the full batch and then just bump the length
+                        if(tld.lineitem[ibatch][LINEITEM::L_COMMENT].kind() == Datum::NONE)
+                        {
+                            ARROW_ASSIGN_OR_RAISE(tld.lineitem[ibatch][LINEITEM::L_COMMENT], g_text.GenerateComments(batch_size_, 10, 43, tld.rng));
+                            batch_offset = 0;
+                        }
+
+                        int64_t remaining_in_batch = static_cast<int64_t>(batch_size_ - batch_offset);
+                        int64_t next_run = std::min(tld.lineitem_to_generate - irow, remaining_in_batch);
+
+                        batch_offset += next_run;
+                        irow += next_run;
+                        tld.lineitem[ibatch][LINEITEM::L_COMMENT].array()->length = batch_offset;
+                    }
+                }
+                return Status::OK();
+            }
+
+            struct ThreadLocalData
+            {
+                std::vector<Datum> orders;
+                int64_t orders_to_generate;
+                int64_t orderkey_start;
+
+                std::vector<std::vector<Datum>> lineitem;
+                std::vector<int> items_per_order;
+                int64_t lineitem_to_generate;
+                int64_t first_batch_offset;
+                std::bitset<LINEITEM::kNumCols> generated_lineitem;
+                random::pcg32_fast rng;
+            };
+            std::vector<ThreadLocalData> thread_local_data_;
+
+            bool inited_ = false;
+            std::mutex orders_output_queue_mutex_;
+            std::mutex lineitem_output_queue_mutex_;
+            std::queue<ExecBatch> orders_output_queue_;
+            std::queue<ExecBatch> lineitem_output_queue_;
+            int64_t batch_size_;
+            int scale_factor_;
+            int64_t orders_rows_to_generate_;
+            int64_t orders_rows_generated_;
+            std::vector<int> orders_cols_;
+            std::vector<int> lineitem_cols_;
+        };
+
+        class SupplierGenerator : public TpchTableGenerator
+        {
+        public:
+            Status Init(
+                std::vector<std::string> columns,
+                int scale_factor,
+                int64_t batch_size) override
+            {
+                scale_factor_ = scale_factor;
+                batch_size_ = batch_size;
+                rows_to_generate_ = scale_factor_ * 10000;
+                rows_generated_.store(0);
+                ARROW_ASSIGN_OR_RAISE(schema_, SetOutputColumns(
+                                          columns,
+                                          types_,
+                                          name_map_,
+                                          gen_list_));
+
+                random::pcg32_fast rng;
+                std::uniform_int_distribution<int64_t> dist(0, rows_to_generate_ - 1);
+                size_t num_special_rows = static_cast<size_t>(5 * scale_factor_);
+                std::unordered_set<int64_t> good_rows_set;
+                while(good_rows_set.size() < num_special_rows)
+                {
+                    good_rows_set.insert(dist(rng));
+                }
+                std::unordered_set<int64_t> bad_rows_set;
+                while(bad_rows_set.size() < num_special_rows)
+                {
+                    int64_t bad_row;
+                    do
+                    {
+                        bad_row = dist(rng);
+                    } while(good_rows_set.find(bad_row) != good_rows_set.end());
+                }
+                good_rows_.clear();
+                bad_rows_.clear();
+                good_rows_.insert(good_rows_.end(), good_rows_set.begin(), good_rows_set.end());
+                bad_rows_.insert(bad_rows_.end(), bad_rows_set.begin(), bad_rows_set.end());
+                std::sort(good_rows_.begin(), good_rows_.end());
+                std::sort(bad_rows_.begin(), bad_rows_.end());
+                return Status::OK();
+            }
+
+            Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback schedule_callback) override
+            {
+                thread_local_data_.resize(num_threads);
+                output_callback_ = std::move(output_callback);
+                finished_callback_ = std::move(finished_callback);
+                schedule_callback_ = std::move(schedule_callback);
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+        private:
+#define FOR_EACH_COLUMN(F)                      \
+            F(S_SUPPKEY)                        \
+            F(S_NAME)                           \
+            F(S_ADDRESS)                        \
+            F(S_NATIONKEY)                      \
+            F(S_PHONE)                          \
+            F(S_ACCTBAL)                        \
+            F(S_COMMENT)
+
+#define MAKE_ENUM(col) col,
+            struct SUPPLIER
+            {
+                enum
+                {
+                    FOR_EACH_COLUMN(MAKE_ENUM)
+                    kNumCols,
+                };
+            };
+#undef MAKE_ENUM
+#define MAKE_STRING_MAP(col)                    \
+            { #col, SUPPLIER::col },
+            const std::unordered_map<std::string, int> name_map_ =
+            {
+                FOR_EACH_COLUMN(MAKE_STRING_MAP)
+            };
+#undef MAKE_STRING_MAP
+#define MAKE_FN_ARRAY(col)                                              \
+            [this](size_t thread_index) { return this->col(thread_index); },
+            std::vector<GenerateColumnFn> generators_ =
+            {
+                FOR_EACH_COLUMN(MAKE_FN_ARRAY)
+            };
+#undef MAKE_FN_ARRAY
+#undef FOR_EACH_COLUMN
+
+            std::vector<std::shared_ptr<DataType>> types_ =
+            {
+                int32(),
+                fixed_size_binary(25),
+                utf8(),
+                int32(),
+                fixed_size_binary(15),
+                decimal(12, 2),
+                utf8(),
+            };
+
+            Status ProduceCallback(size_t thread_index)
+            {
+                if(done_.load())
+                    return Status::OK();
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                tld.suppkey_start = rows_generated_.fetch_add(batch_size_);
+                if(tld.suppkey_start >= rows_to_generate_)
+                    return Status::OK();
+
+                tld.to_generate = std::min(batch_size_,
+                                           rows_to_generate_ - tld.suppkey_start);
+                bool is_last_batch = tld.to_generate < batch_size_;
+
+                tld.batch.clear();
+                tld.batch.resize(SUPPLIER::kNumCols);
+                for(int col : gen_list_)
+                    RETURN_NOT_OK(generators_[col](thread_index));
+
+                std::vector<Datum> result(gen_list_.size());
+                for(size_t i = 0; i < gen_list_.size(); i++)
+                {
+                    int col_idx = gen_list_[i];
+                    result[i] = tld.batch[col_idx];
+                }
+                ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(result)));
+                batches_generated_++;
+                output_callback_(std::move(eb));
+                if(is_last_batch)
+                {
+                    bool expected = false;
+                    if(done_.compare_exchange_strong(expected, true))
+                    {
+                        finished_callback_(batches_generated_.load());
+                    }
+                    return Status::OK();
+                }
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            Status AllocateColumn(size_t thread_index, int column)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                ARROW_DCHECK(tld.batch[column].kind() == Datum::NONE);
+                int32_t byte_width = arrow::internal::GetByteWidth(*types_[column]);
+                ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(tld.to_generate * byte_width));
+                ArrayData ad(types_[column], tld.to_generate, { nullptr, std::move(buff) });
+                tld.batch[column] = std::move(ad);
+                return Status::OK();
+            }
+
+            Status S_SUPPKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[SUPPLIER::S_SUPPKEY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateColumn(thread_index, SUPPLIER::S_SUPPKEY));
+                    int32_t *s_suppkey = reinterpret_cast<int32_t *>(
+                        tld.batch[SUPPLIER::S_SUPPKEY].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        s_suppkey[irow] = (tld.suppkey_start + irow + 1);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status S_NAME(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[SUPPLIER::S_NAME].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(S_SUPPKEY(thread_index));
+                    const int32_t *s_suppkey = reinterpret_cast<const int32_t *>(
+                        tld.batch[SUPPLIER::S_SUPPKEY].array()->buffers[1]->data());
+                    RETURN_NOT_OK(AllocateColumn(thread_index, SUPPLIER::S_NAME));
+                    int32_t byte_width = arrow::internal::GetByteWidth(*types_[SUPPLIER::S_NAME]);
+                    char *s_name = reinterpret_cast<char *>(
+                        tld.batch[SUPPLIER::S_NAME].array()->buffers[1]->mutable_data());
+                    // Look man, I'm just following the spec ok? Section 4.2.3 as of March 1 2022
+                    const char *supplier = "Supplie#r";
+                    const size_t supplier_length = std::strlen(supplier);
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        char *out = s_name + byte_width * irow;
+                        std::memcpy(out, supplier, supplier_length);
+                        AppendNumberPaddedToNineDigits(out + supplier_length, s_suppkey[irow]);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status S_ADDRESS(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[SUPPLIER::S_ADDRESS].kind() == Datum::NONE)
+                {
+                    ARROW_ASSIGN_OR_RAISE(
+                        tld.batch[SUPPLIER::S_ADDRESS],
+                        RandomVString(tld.rng, tld.to_generate, 10, 40));
+                }
+                return Status::OK();
+            }
+
+            Status S_NATIONKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[SUPPLIER::S_NATIONKEY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateColumn(thread_index, SUPPLIER::S_NATIONKEY));
+                    std::uniform_int_distribution<int32_t> dist(0, 24);
+                    int32_t *s_nationkey = reinterpret_cast<int32_t *>(
+                        tld.batch[SUPPLIER::S_NATIONKEY].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                        s_nationkey[irow] = dist(tld.rng);
+                }
+                return Status::OK();
+            }
+
+            Status S_PHONE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[SUPPLIER::S_PHONE].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(S_NATIONKEY(thread_index));
+                    RETURN_NOT_OK(AllocateColumn(thread_index, SUPPLIER::S_PHONE));
+                    int32_t byte_width = arrow::internal::GetByteWidth(*types_[SUPPLIER::S_PHONE]); 
+                    const int32_t *s_nationkey = reinterpret_cast<const int32_t *>(
+                        tld.batch[SUPPLIER::S_NATIONKEY].array()->buffers[1]->data());
+                    char *s_phone = reinterpret_cast<char *>(
+                        tld.batch[SUPPLIER::S_PHONE].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        GeneratePhoneNumber(
+                            s_phone + irow * byte_width,
+                            tld.rng,
+                            s_nationkey[irow]);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status S_ACCTBAL(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[SUPPLIER::S_ACCTBAL].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateColumn(thread_index, SUPPLIER::S_ACCTBAL));
+                    Decimal128 *s_acctbal = reinterpret_cast<Decimal128 *>(
+                        tld.batch[SUPPLIER::S_ACCTBAL].array()->buffers[1]->mutable_data());
+                    std::uniform_int_distribution<int64_t> dist(-99999, 999999);
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                        s_acctbal[irow] = { dist(tld.rng) };
+                }
+                return Status::OK();
+            }
+
+            Status S_COMMENT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[SUPPLIER::S_COMMENT].kind() == Datum::NONE)
+                {
+                    ARROW_ASSIGN_OR_RAISE(tld.batch[SUPPLIER::S_COMMENT], g_text.GenerateComments(batch_size_, 25, 100, tld.rng));
+                    ModifyComments(thread_index, "Recommends", good_rows_);
+                    ModifyComments(thread_index, "Complaints", bad_rows_);
+                }
+                return Status::OK();
+            }
+
+            void ModifyComments(
+                size_t thread_index,
+                const char *review,
+                const std::vector<int64_t> &indices)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                const int32_t *offsets = reinterpret_cast<const int32_t *>(
+                    tld.batch[SUPPLIER::S_COMMENT].array()->buffers[2]->data());
+                char *str = reinterpret_cast<char *>(
+                    tld.batch[SUPPLIER::S_COMMENT].array()->buffers[1]->mutable_data());
+                const char *customer = "Customer";
+                const size_t customer_length = std::strlen(customer);
+                const size_t review_length = std::strlen(review);
+
+                auto it = std::lower_bound(indices.begin(), indices.end(), tld.suppkey_start);
+                for(; it != indices.end() && *it < tld.suppkey_start + tld.to_generate; it++)
+                {
+                    int64_t idx_in_batch = *it - tld.suppkey_start;
+                    char *out = str + offsets[idx_in_batch];
+                    int32_t str_length = offsets[idx_in_batch + 1] - offsets[idx_in_batch];
+                    std::uniform_int_distribution<int32_t> gap_dist(0, str_length - customer_length - review_length);
+                    int32_t gap = gap_dist(tld.rng);
+                    int32_t total_length = customer_length + gap + review_length;
+                    std::uniform_int_distribution<int32_t> start_dist(0, str_length - total_length);
+                    int32_t start = start_dist(tld.rng);
+                    std::memcpy(out + start, customer, customer_length);
+                    std::memcpy(out + start + gap, review, review_length);
+                }
+            }
+
+            struct ThreadLocalData
+            {
+                random::pcg32_fast rng;
+                int64_t suppkey_start;
+                int64_t to_generate;
+                std::vector<Datum> batch;
+            };
+            std::vector<ThreadLocalData> thread_local_data_;
+            std::vector<int64_t> good_rows_;
+            std::vector<int64_t> bad_rows_;
+
+            OutputBatchCallback output_callback_;
+            FinishedCallback finished_callback_;
+            ScheduleCallback schedule_callback_;
+            int64_t rows_to_generate_;
+            std::atomic<int64_t> rows_generated_;
+            int scale_factor_;
+            int64_t batch_size_;
+            std::vector<int> gen_list_;
+            std::shared_ptr<Schema> schema_;
+        };
+
+        class PartGenerator : public TpchTableGenerator
+        {
+        public:
+            PartGenerator(std::shared_ptr<PartAndPartSupplierGenerator> gen)
+                : gen_(std::move(gen))
+            {
+                batches_generated_.store(0);
+            }
+
+            Status Init(
+                std::vector<std::string> columns,
+                int scale_factor,
+                int64_t batch_size) override
+            {
+                scale_factor_ = scale_factor;
+                batch_size_ = batch_size;
+                ARROW_ASSIGN_OR_RAISE(schema_,
+                                      gen_->SetPartOutputColumns(columns));
+                return Status::OK();
+            }
+            
+            Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback schedule_callback) override
+            {
+                RETURN_NOT_OK(gen_->Init(num_threads, batch_size_, scale_factor_));
+                output_callback_ = std::move(output_callback);
+                finished_callback_ = std::move(finished_callback);
+                schedule_callback_ = std::move(schedule_callback);
+
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+        private:
+            Status ProduceCallback(size_t thread_index)
+            {
+                ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
+                                      gen_->NextPartBatch(thread_index));
+                if(done_.load() || !maybe_batch.has_value())
+                {
+                    bool expected = false;
+                    if(done_.compare_exchange_strong(expected, true))
+                    {
+                        finished_callback_(batches_generated_.load());
+                    }
+                    return Status::OK();
+                }
+                ExecBatch batch = std::move(*maybe_batch);
+                batches_generated_++;
+                output_callback_(std::move(batch));
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            OutputBatchCallback output_callback_;
+            FinishedCallback finished_callback_;
+            ScheduleCallback schedule_callback_;
+            int64_t batch_size_;
+            int64_t scale_factor_;
+            std::shared_ptr<PartAndPartSupplierGenerator> gen_;
+            std::shared_ptr<Schema> schema_;
+        };
+
+        class PartSuppGenerator : public TpchTableGenerator
+        {
+        public:
+            PartSuppGenerator(std::shared_ptr<PartAndPartSupplierGenerator> gen)
+                : gen_(std::move(gen))
+            {
+                batches_generated_.store(0);
+            }
+
+            Status Init(
+                std::vector<std::string> columns,
+                int scale_factor,
+                int64_t batch_size) override
+            {
+                scale_factor_ = scale_factor;
+                batch_size_ = batch_size;
+                ARROW_ASSIGN_OR_RAISE(schema_,
+                                      gen_->SetPartSuppOutputColumns(columns));
+                return Status::OK();
+            }
+            
+            Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback schedule_callback) override
+            {
+                RETURN_NOT_OK(gen_->Init(num_threads, batch_size_, scale_factor_));
+                output_callback_ = std::move(output_callback);
+                finished_callback_ = std::move(finished_callback);
+                schedule_callback_ = std::move(schedule_callback);
+
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+        private:
+            Status ProduceCallback(size_t thread_index)
+            {
+                ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
+                                      gen_->NextPartSuppBatch(thread_index));
+                if(done_.load() || !maybe_batch.has_value())
+                {
+                    bool expected = false;
+                    if(done_.compare_exchange_strong(expected, true))
+                    {
+                        finished_callback_(batches_generated_.load());
+                    }
+                    return Status::OK();
+                }
+                ExecBatch batch = std::move(*maybe_batch);
+                batches_generated_++;
+                output_callback_(std::move(batch));
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            OutputBatchCallback output_callback_;
+            FinishedCallback finished_callback_;
+            ScheduleCallback schedule_callback_;
+            int64_t batch_size_;
+            int64_t scale_factor_;
+            std::shared_ptr<PartAndPartSupplierGenerator> gen_;
+            std::shared_ptr<Schema> schema_;
+        };
+
+        class CustomerGenerator : public TpchTableGenerator
+        {
+        public:
+            Status Init(
+                std::vector<std::string> columns,
+                int scale_factor,
+                int64_t batch_size) override
+            {
+                scale_factor_ = scale_factor;
+                batch_size_ = batch_size;
+                rows_to_generate_ = scale_factor_ * 150000;
+                rows_generated_.store(0);
+                ARROW_ASSIGN_OR_RAISE(schema_, SetOutputColumns(
+                                          columns,
+                                          types_,
+                                          name_map_,
+                                          gen_list_));
+                return Status::OK();
+            }
+
+            Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback schedule_callback) override
+            {
+                thread_local_data_.resize(num_threads);
+                output_callback_ = std::move(output_callback);
+                finished_callback_ = std::move(finished_callback);
+                schedule_callback_ = std::move(schedule_callback);
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+        private:
+#define FOR_EACH_COLUMN(F)                      \
+            F(C_CUSTKEY)                        \
+            F(C_NAME)                           \
+            F(C_ADDRESS)                        \
+            F(C_NATIONKEY)                      \
+            F(C_PHONE)                          \
+            F(C_ACCTBAL)                        \
+            F(C_MKTSEGMENT)                     \
+            F(C_COMMENT)
+
+#define MAKE_ENUM(col) col,
+            struct CUSTOMER
+            {
+                enum
+                {
+                    FOR_EACH_COLUMN(MAKE_ENUM)
+                    kNumCols,
+                };
+            };
+#undef MAKE_ENUM
+#define MAKE_STRING_MAP(col)                    \
+            { #col, CUSTOMER::col },
+            const std::unordered_map<std::string, int> name_map_ =
+            {
+                FOR_EACH_COLUMN(MAKE_STRING_MAP)
+            };
+#undef MAKE_STRING_MAP
+#define MAKE_FN_ARRAY(col)                                              \
+            [this](size_t thread_index) { return this->col(thread_index); },
+            std::vector<GenerateColumnFn> generators_ =
+            {
+                FOR_EACH_COLUMN(MAKE_FN_ARRAY)
+            };
+#undef MAKE_FN_ARRAY
+#undef FOR_EACH_COLUMN
+
+            std::vector<std::shared_ptr<DataType>> types_ =
+            {
+                int32(),
+                utf8(),
+                utf8(),
+                int32(),
+                fixed_size_binary(15),
+                decimal(12, 2),
+                fixed_size_binary(10),
+                utf8(),
+            };
+
+            Status ProduceCallback(size_t thread_index)
+            {
+                if(done_.load())
+                    return Status::OK();
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                tld.custkey_start = rows_generated_.fetch_add(batch_size_);
+                if(tld.custkey_start >= rows_to_generate_)
+                    return Status::OK();
+
+                tld.to_generate = std::min(batch_size_,
+                                           rows_to_generate_ - tld.custkey_start);
+                bool is_last_batch = tld.to_generate < batch_size_;
+
+                tld.batch.clear();
+                tld.batch.resize(CUSTOMER::kNumCols);
+                for(int col : gen_list_)
+                    RETURN_NOT_OK(generators_[col](thread_index));
+
+                std::vector<Datum> result(gen_list_.size());
+                for(size_t i = 0; i < gen_list_.size(); i++)
+                {
+                    int col_idx = gen_list_[i];
+                    result[i] = tld.batch[col_idx];
+                }
+                ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(result)));
+                batches_generated_++;
+                output_callback_(std::move(eb));
+                if(is_last_batch)
+                {
+                    bool expected = false;
+                    if(done_.compare_exchange_strong(expected, true))
+                    {
+                        finished_callback_(batches_generated_.load());
+                    }
+                    return Status::OK();
+                }
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            Status AllocateColumn(size_t thread_index, int column)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                ARROW_DCHECK(tld.batch[column].kind() == Datum::NONE);
+                int32_t byte_width = arrow::internal::GetByteWidth(*types_[column]);
+                ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(tld.to_generate * byte_width));
+                ArrayData ad(types_[column], tld.to_generate, { nullptr, std::move(buff) });
+                tld.batch[column] = std::move(ad);
+                return Status::OK();
+            }
+
+            Status C_CUSTKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_CUSTKEY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateColumn(thread_index, CUSTOMER::C_CUSTKEY));
+                    int32_t *c_custkey = reinterpret_cast<int32_t *>(
+                        tld.batch[CUSTOMER::C_CUSTKEY].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        c_custkey[irow] = (tld.custkey_start + irow + 1);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status C_NAME(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_NAME].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(C_CUSTKEY(thread_index));
+                    const int32_t *c_custkey = reinterpret_cast<const int32_t *>(
+                        tld.batch[CUSTOMER::C_CUSTKEY].array()->buffers[1]->data());
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> offset_buff, AllocateBuffer((tld.to_generate + 1) * sizeof(int32_t)));
+                    int32_t *offsets = reinterpret_cast<int32_t *>(offset_buff->mutable_data());
+                    const char *customer = "Customer#";
+                    const size_t customer_length = std::strlen(customer);
+                    offsets[0] = 0;
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        int num_digits = GetNumDigits(c_custkey[irow]);
+                        int num_chars = std::max(num_digits, 9);
+                        offsets[irow + 1] = offsets[irow] + num_chars + customer_length;
+                    }
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> str_buff, AllocateBuffer(offsets[tld.to_generate]));
+                    char *str = reinterpret_cast<char *>(str_buff->mutable_data());
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        char *out = str + offsets[irow];
+                        std::memcpy(out, customer, customer_length);
+                        AppendNumberPaddedToNineDigits(out + customer_length, c_custkey[irow]);
+                    }
+                    ArrayData ad(utf8(), tld.to_generate, { nullptr, std::move(str_buff), std::move(offset_buff) });
+                    tld.batch[CUSTOMER::C_NAME] = std::move(ad);
+                }
+                return Status::OK();
+            }
+
+            Status C_ADDRESS(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_ADDRESS].kind() == Datum::NONE)
+                {
+                    ARROW_ASSIGN_OR_RAISE(
+                        tld.batch[CUSTOMER::C_ADDRESS],
+                        RandomVString(tld.rng, tld.to_generate, 10, 40));
+                }
+                return Status::OK();
+            }
+
+            Status C_NATIONKEY(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_NATIONKEY].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateColumn(thread_index, CUSTOMER::C_NATIONKEY));
+                    std::uniform_int_distribution<int32_t> dist(0, 24);
+                    int32_t *c_nationkey = reinterpret_cast<int32_t *>(
+                        tld.batch[CUSTOMER::C_NATIONKEY].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                        c_nationkey[irow] = dist(tld.rng);
+                }
+                return Status::OK();
+            }
+
+            Status C_PHONE(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_PHONE].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(C_NATIONKEY(thread_index));
+                    RETURN_NOT_OK(AllocateColumn(thread_index, CUSTOMER::C_PHONE));
+                    int32_t byte_width = arrow::internal::GetByteWidth(*types_[CUSTOMER::C_PHONE]); 
+                    const int32_t *c_nationkey = reinterpret_cast<const int32_t *>(
+                        tld.batch[CUSTOMER::C_NATIONKEY].array()->buffers[1]->data());
+                    char *c_phone = reinterpret_cast<char *>(
+                        tld.batch[CUSTOMER::C_PHONE].array()->buffers[1]->mutable_data());
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        GeneratePhoneNumber(
+                            c_phone + irow * byte_width,
+                            tld.rng,
+                            c_nationkey[irow]);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status C_ACCTBAL(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_ACCTBAL].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateColumn(thread_index, CUSTOMER::C_ACCTBAL));
+                    Decimal128 *c_acctbal = reinterpret_cast<Decimal128 *>(
+                        tld.batch[CUSTOMER::C_ACCTBAL].array()->buffers[1]->mutable_data());
+                    std::uniform_int_distribution<int64_t> dist(-99999, 999999);
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                        c_acctbal[irow] = { dist(tld.rng) };
+                }
+                return Status::OK();
+            }
+
+            Status C_MKTSEGMENT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_MKTSEGMENT].kind() == Datum::NONE)
+                {
+                    RETURN_NOT_OK(AllocateColumn(thread_index, CUSTOMER::C_MKTSEGMENT));
+                    int32_t byte_width = arrow::internal::GetByteWidth(*types_[CUSTOMER::C_MKTSEGMENT]); 
+                    char *c_mktsegment = reinterpret_cast<char *>(
+                        tld.batch[CUSTOMER::C_MKTSEGMENT].array()->buffers[1]->mutable_data());
+                    std::uniform_int_distribution<int32_t> dist(0, kNumSegments - 1);
+                    for(int64_t irow = 0; irow < tld.to_generate; irow++)
+                    {
+                        char *out = c_mktsegment + irow * byte_width;
+                        int str_idx = dist(tld.rng);
+                        std::strncpy(out, Segments[str_idx], byte_width);
+                    }
+                }
+                return Status::OK();
+            }
+
+            Status C_COMMENT(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                if(tld.batch[CUSTOMER::C_COMMENT].kind() == Datum::NONE)
+                {
+                    ARROW_ASSIGN_OR_RAISE(tld.batch[CUSTOMER::C_COMMENT], g_text.GenerateComments(batch_size_, 29, 116, tld.rng));
+                }
+                return Status::OK();
+            }
+
+            struct ThreadLocalData
+            {
+                random::pcg32_fast rng;
+                int64_t custkey_start;
+                int64_t to_generate;
+                std::vector<Datum> batch;
+            };
+            std::vector<ThreadLocalData> thread_local_data_;
+
+            OutputBatchCallback output_callback_;
+            FinishedCallback finished_callback_;
+            ScheduleCallback schedule_callback_;
+            int64_t rows_to_generate_;
+            std::atomic<int64_t> rows_generated_;
+            int scale_factor_;
+            int64_t batch_size_;
+            std::vector<int> gen_list_;
+            std::shared_ptr<Schema> schema_;
+        };
+
+        class OrdersGenerator : public TpchTableGenerator
+        {
+        public:
+            OrdersGenerator(std::shared_ptr<OrdersAndLineItemGenerator> gen)
+                : gen_(std::move(gen))
+            {
+                batches_generated_.store(0);
+            }
+
+            Status Init(
+                std::vector<std::string> columns,
+                int scale_factor,
+                int64_t batch_size) override
+            {
+                scale_factor_ = scale_factor;
+                batch_size_ = batch_size;
+                ARROW_ASSIGN_OR_RAISE(schema_,
+                                      gen_->SetOrdersOutputColumns(columns));
+                return Status::OK();
+            }
+            
+            Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback schedule_callback) override
+            {
+                RETURN_NOT_OK(gen_->Init(num_threads, batch_size_, scale_factor_));
+                output_callback_ = std::move(output_callback);
+                finished_callback_ = std::move(finished_callback);
+                schedule_callback_ = std::move(schedule_callback);
+
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+        private:
+            Status ProduceCallback(size_t thread_index)
+            {
+                ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
+                                      gen_->NextOrdersBatch(thread_index));
+                if(done_.load() || !maybe_batch.has_value())
+                {
+                    bool expected = false;
+                    if(done_.compare_exchange_strong(expected, true))
+                    {
+                        finished_callback_(batches_generated_.load());
+                    }
+                    return Status::OK();
+                }
+                ExecBatch batch = std::move(*maybe_batch);
+                batches_generated_++;
+                output_callback_(std::move(batch));
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            OutputBatchCallback output_callback_;
+            FinishedCallback finished_callback_;
+            ScheduleCallback schedule_callback_;
+            int64_t batch_size_;
+            int64_t scale_factor_;
+            std::shared_ptr<OrdersAndLineItemGenerator> gen_;
+            std::shared_ptr<Schema> schema_;
+        };
+
+        class LineitemGenerator : public TpchTableGenerator
+        {
+        public:
+            LineitemGenerator(std::shared_ptr<OrdersAndLineItemGenerator> gen)
+                : gen_(std::move(gen))
+            {}
+
+            Status Init(
+                std::vector<std::string> columns,
+                int scale_factor,
+                int64_t batch_size) override
+            {
+                scale_factor_ = scale_factor;
+                batch_size_ = batch_size;
+                ARROW_ASSIGN_OR_RAISE(schema_,
+                                      gen_->SetLineItemOutputColumns(columns));
+                return Status::OK();
+            }
+            
+            Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback schedule_callback) override
+            {
+                RETURN_NOT_OK(gen_->Init(num_threads, batch_size_, scale_factor_));
+                output_callback_ = std::move(output_callback);
+                finished_callback_ = std::move(finished_callback);
+                schedule_callback_ = std::move(schedule_callback);
+
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+        private:
+            Status ProduceCallback(size_t thread_index)
+            {
+                ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
+                                      gen_->NextLineItemBatch(thread_index));
+                if(!maybe_batch.has_value())
+                {
+                    bool expected = false;
+                    if(done_.compare_exchange_strong(expected, true))
+                    {
+                        finished_callback_(batches_generated_.load());
+                    }
+                    return Status::OK();
+                }
+                ExecBatch batch = std::move(*maybe_batch);
+                batches_generated_++;
+                output_callback_(std::move(batch));
+                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+            }
+
+            OutputBatchCallback output_callback_;
+            FinishedCallback finished_callback_;
+            ScheduleCallback schedule_callback_;
+            int64_t batch_size_;
+            int64_t scale_factor_;
+            std::shared_ptr<OrdersAndLineItemGenerator> gen_;
+            std::shared_ptr<Schema> schema_;
+        };
+
+        class NationGenerator : public TpchTableGenerator
+        {
+        public:
+            Status Init(
+                std::vector<std::string> columns,
+                int /*scale_factor*/,
+                int64_t /*batch_size*/) override
+            {
+                ARROW_ASSIGN_OR_RAISE(schema_,
+                                      SetOutputColumns(
+                                          columns,
+                                          types_,
+                                          name_map_,
+                                          column_indices_));
+                return Status::OK();
+            }
+
+            Status StartProducing(
+                size_t /*num_threads*/,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback /*schedule_task_callback*/) override
+            {
+                std::shared_ptr<Buffer> N_NATIONKEY_buffer = Buffer::Wrap(N_NATIONKEY, sizeof(N_NATIONKEY));
+                ArrayData N_NATIONKEY_arraydata(int32(), kRowCount, { nullptr, std::move(N_NATIONKEY_buffer) });
+
+                ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> N_NAME_buffer, AllocateBuffer(kRowCount * kNameByteWidth));
+                char *N_NAME = reinterpret_cast<char *>(N_NAME_buffer->mutable_data());
+                for(size_t i = 0; i < kRowCount; i++)
+                    std::strncpy(N_NAME + kNameByteWidth * i, country_names_[i], kNameByteWidth);
+                ArrayData N_NAME_arraydata(fixed_size_binary(kNameByteWidth), kRowCount, { nullptr, std::move(N_NAME_buffer) });
+
+                std::shared_ptr<Buffer> N_REGIONKEY_buffer = Buffer::Wrap(N_REGIONKEY, sizeof(N_REGIONKEY));
+                ArrayData N_REGIONKEY_arraydata(int32(), kRowCount, { nullptr, std::move(N_REGIONKEY_buffer) });
+
+                ARROW_ASSIGN_OR_RAISE(Datum N_COMMENT_datum, g_text.GenerateComments(kRowCount, 31, 114, rng_));
+
+                std::vector<Datum> fields =
+                    {
+                        std::move(N_NATIONKEY_arraydata),
+                        std::move(N_NAME_arraydata),
+                        std::move(N_REGIONKEY_arraydata),
+                        std::move(N_COMMENT_datum)
+                    };
+
+                std::vector<Datum> result;
+                for(const int &col : column_indices_)
+                    result.push_back(fields[col]);
+                ARROW_ASSIGN_OR_RAISE(ExecBatch batch, ExecBatch::Make(std::move(result)));
+                output_callback(std::move(batch));
+                finished_callback(static_cast<int64_t>(1));
+                return Status::OK();
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+        private:            
+            random::pcg32_fast rng_;
+
+            static constexpr size_t kRowCount = 25;
+            static constexpr int32_t kNameByteWidth = 25;
+            const int32_t N_NATIONKEY[kRowCount] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 };
+            const char *country_names_[kRowCount] =
+            {
+                "ALGERIA", "ARGENTINA", "BRAZIL", "CANADA", "EGYPT", "ETHIOPIA", "FRANCE", "GERMANY",
+                "INDONESIA", "IRAQ", "IRAN", "JAPAN", "JORDAN", "KENYA", "MOROCCO", "MOZAMBIQUE", "PERU",
+                "CHINA", "ROMANIA", "SAUDI ARABIA", "VIETNAM", "RUSSIA", "UNITED KINGDOM", "UNITED STATES"
+            };
+            const int32_t N_REGIONKEY[kRowCount] = { 0, 1, 1, 1, 4, 0, 3, 3, 2, 2, 4, 4, 2, 4, 0, 0, 0, 1, 2, 3, 4, 2, 3, 3, 1 };
+
+            struct NATION
+            {
+                enum
+                {
+                    N_NATIONKEY,
+                    N_NAME,
+                    N_REGIONKEY,
+                    N_COMMENT,
+                };
+            };
+
+            const std::unordered_map<std::string, int> name_map_ =
+            {
+                { "N_NATIONKEY", NATION::N_NATIONKEY },
+                { "N_NAME", NATION::N_NAME },
+                { "N_REGIONKEY", NATION::N_REGIONKEY },
+                { "N_COMMENT", NATION::N_COMMENT },
+            };
+
+            std::vector<std::shared_ptr<DataType>> types_ =
+            {
+                int32(),
+                fixed_size_binary(kNameByteWidth),
+                int32(),
+                utf8(),
+            };
+
+            std::shared_ptr<Schema> schema_;
+            std::vector<int> column_indices_;
+        };
+
+        class RegionGenerator : public TpchTableGenerator
+        {
+        public:
+            Status Init(
+                std::vector<std::string> columns,
+                int /*scale_factor*/,
+                int64_t /*batch_size*/) override
+            {
+                ARROW_ASSIGN_OR_RAISE(schema_,
+                                      SetOutputColumns(
+                                          columns,
+                                          types_,
+                                          name_map_,
+                                          column_indices_));
+                return Status::OK();
+            }
+
+            Status StartProducing(
+                size_t num_threads,
+                OutputBatchCallback output_callback,
+                FinishedCallback finished_callback,
+                ScheduleCallback /*schedule_task_callback*/) override
+            {
+                std::shared_ptr<Buffer> R_REGIONKEY_buffer = Buffer::Wrap(R_REGIONKEY, sizeof(R_REGIONKEY));
+                ArrayData R_REGIONKEY_arraydata(int32(), kRowCount, { nullptr, std::move(R_REGIONKEY_buffer) });
+
+                ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> R_NAME_buffer, AllocateBuffer(kRowCount * kNameByteWidth));
+                char *R_NAME_data = reinterpret_cast<char *>(R_NAME_buffer->mutable_data());
+                for(size_t i = 0; i < kRowCount; i++)
+                    std::strncpy(R_NAME_data + kNameByteWidth * i, region_names_[i], kNameByteWidth);
+                ArrayData R_NAME_arraydata(types_[static_cast<int>(REGION::R_NAME)], kRowCount, { nullptr, std::move(R_NAME_buffer) });
+
+                ARROW_ASSIGN_OR_RAISE(Datum R_COMMENT_datum, g_text.GenerateComments(kRowCount, 31, 115, rng_));
+
+                std::vector<Datum> fields = { std::move(R_REGIONKEY_arraydata), std::move(R_NAME_arraydata), std::move(R_COMMENT_datum) };
+                std::vector<Datum> result;
+                for(const int &col : column_indices_)
+                    result.push_back(fields[col]);
+                ARROW_ASSIGN_OR_RAISE(ExecBatch batch, ExecBatch::Make(std::move(result)));
+                output_callback(std::move(batch));
+                finished_callback(static_cast<int64_t>(1));
+                return Status::OK();
+            }
+
+            std::shared_ptr<Schema> schema() const override
+            {
+                return schema_;
+            }
+
+            random::pcg32_fast rng_;
+
+            static constexpr size_t kRowCount = 5;
+            static constexpr int32_t kNameByteWidth = 25;
+            const int32_t R_REGIONKEY[kRowCount] = { 0, 1, 2, 3, 4 };
+            const char *region_names_[kRowCount] =
+            {
+                "AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST"
+            };
+
+            struct REGION
+            {
+                enum
+                {
+                    R_REGIONKEY,
+                    R_NAME,
+                    R_COMMENT,
+                    kNumColumns,
+                };
+            };
+
+            const std::unordered_map<std::string, int> name_map_ =
+            {
+                { "R_REGIONKEY", REGION::R_REGIONKEY },
+                { "R_NAME", REGION::R_NAME },
+                { "R_COMMENT", REGION::R_COMMENT },
+            };
+
+            const std::vector<std::shared_ptr<DataType>> types_ =
+            {
+                int32(),
+                fixed_size_binary(kNameByteWidth),
+                utf8(),
+            };
+
+            std::shared_ptr<Schema> schema_;
+            std::vector<int> column_indices_;
+        };
+
+        class TpchNode : public ExecNode
+        {
+        public:
+            TpchNode(ExecPlan *plan,
+                     std::unique_ptr<TpchTableGenerator> generator)
+                : ExecNode(plan, {}, {}, generator->schema(), /*num_outputs=*/1),
+                  generator_(std::move(generator))
+            {
+            }
+
+            const char *kind_name() const override
+            {
+                return "TpchNode";
+            }
+
+            [[noreturn]]
+            static void NoInputs()
+            {
+                Unreachable("TPC-H node should never have any inputs");
+            }
+
+            [[noreturn]]
+            void InputReceived(ExecNode *, ExecBatch) override
+            {
+                NoInputs();
+            }
+
+            [[noreturn]]
+            void ErrorReceived(ExecNode *, Status) override
+            {
+                NoInputs();
+            }
+
+            [[noreturn]]
+            void InputFinished(ExecNode *, int) override
+            {
+                NoInputs();
+            }
+
+            Status StartProducing() override
+            {
+                finished_ = Future<>::Make();
+                return generator_->StartProducing(
+                    thread_indexer_.Capacity(),
+                    [this](ExecBatch batch) { this->OutputBatchCallback(std::move(batch)); },
+                    [this](int64_t num_batches) { this->FinishedCallback(num_batches); },
+                    [this](std::function<Status(size_t)> func) -> Status { return this->ScheduleTaskCallback(std::move(func)); }
+                    );
+            }
+
+            void PauseProducing(ExecNode *output) override {}
+            void ResumeProducing(ExecNode *output) override {}
+
+            void StopProducing(ExecNode *output) override
+            {
+                DCHECK_EQ(output, outputs_[0]);
+                StopProducing();
+            }
+
+            void StopProducing() override
+            {
+                generator_->Abort([this]() { this->finished_.MarkFinished(); });
+            }
+
+            Future<> finished() override
+            {
+                return finished_;
+            }
+
+        private:
+            void OutputBatchCallback(ExecBatch batch)
+            {
+                outputs_[0]->InputReceived(this, std::move(batch));
+            }
+
+            void FinishedCallback(int64_t total_num_batches)
+            {
+                outputs_[0]->InputFinished(this, static_cast<int>(total_num_batches));
+                finished_.MarkFinished();
+            }
+
+            Status ScheduleTaskCallback(std::function<Status(size_t)> func)
+            {
+                auto executor = plan_->exec_context()->executor();
+                if (executor)
+                {
+                    RETURN_NOT_OK(executor->Spawn([this, func]
+                    {
+                        size_t thread_index = thread_indexer_();
+                        Status status = func(thread_index);
+                        if (!status.ok())
+                        {
+                            StopProducing();
+                            ErrorIfNotOk(status);
+                            return;
+                        }
+                    }));
+                }
+                else
+                {
+                    return func(0);
+                }
+                return Status::OK();
+            }
+
+            std::unique_ptr<TpchTableGenerator> generator_;
+
+            Future<> finished_ = Future<>::MakeFinished();
+            ThreadIndexer thread_indexer_;
+        };
+
+        Result<TpchGen> TpchGen::Make(ExecPlan *plan, int scale_factor, int64_t batch_size)
+        {
+            static bool has_inited_text = false;
+            if(!has_inited_text)
+            {
+                RETURN_NOT_OK(g_text.Init());
+                has_inited_text = true;
+            }
+            TpchGen result(plan, scale_factor, batch_size);
+            return result;
+        }
+
+        template <typename Generator>
+        Result<ExecNode *> TpchGen::CreateNode(std::vector<std::string> columns)
+        {
+            std::unique_ptr<Generator> generator = arrow::internal::make_unique<Generator>();
+            RETURN_NOT_OK(generator->Init(std::move(columns), scale_factor_, batch_size_));
+            return plan_->EmplaceNode<TpchNode>(plan_, std::move(generator));
+        }
+
+        Result<ExecNode *> TpchGen::Supplier(std::vector<std::string> columns)
+        {
+            return CreateNode<SupplierGenerator>(std::move(columns));
+        }
+
+        Result<ExecNode *> TpchGen::Part(std::vector<std::string> columns)
+        {
+            if(!part_and_part_supp_generator_)
+            {
+                part_and_part_supp_generator_ = std::make_shared<PartAndPartSupplierGenerator>();
+            }
+            std::unique_ptr<PartGenerator> generator = arrow::internal::make_unique<PartGenerator>(part_and_part_supp_generator_);
+            RETURN_NOT_OK(generator->Init(std::move(columns), scale_factor_, batch_size_));
+            return plan_->EmplaceNode<TpchNode>(plan_, std::move(generator));
+        }
+
+        Result<ExecNode *> TpchGen::PartSupp(std::vector<std::string> columns)
+        {
+            if(!part_and_part_supp_generator_)
+            {
+                part_and_part_supp_generator_ = std::make_shared<PartAndPartSupplierGenerator>();
+            }
+            std::unique_ptr<PartGenerator> generator = arrow::internal::make_unique<PartGenerator>(part_and_part_supp_generator_);
+            RETURN_NOT_OK(generator->Init(std::move(columns), scale_factor_, batch_size_));
+            return plan_->EmplaceNode<TpchNode>(plan_, std::move(generator));
+        }
+
+        Result<ExecNode *> TpchGen::Customer(std::vector<std::string> columns)
+        {
+            return CreateNode<CustomerGenerator>(std::move(columns));
+        }
+
+        Result<ExecNode *> TpchGen::Orders(std::vector<std::string> columns)
+        {
+            if(!orders_and_line_item_generator_)
+            {
+                orders_and_line_item_generator_ = std::make_shared<OrdersAndLineItemGenerator>();
+            }
+            std::unique_ptr<OrdersGenerator> generator = arrow::internal::make_unique<OrdersGenerator>(orders_and_line_item_generator_);
+            RETURN_NOT_OK(generator->Init(std::move(columns), scale_factor_, batch_size_));
+            return plan_->EmplaceNode<TpchNode>(plan_, std::move(generator));
+        }
+
+        Result<ExecNode *> TpchGen::Lineitem(std::vector<std::string> columns)
+        {
+            if(!orders_and_line_item_generator_)
+            {
+                orders_and_line_item_generator_ = std::make_shared<OrdersAndLineItemGenerator>();
+            }
+            std::unique_ptr<LineitemGenerator> generator = arrow::internal::make_unique<LineitemGenerator>(orders_and_line_item_generator_);
+            RETURN_NOT_OK(generator->Init(std::move(columns), scale_factor_, batch_size_));
+            return plan_->EmplaceNode<TpchNode>(plan_, std::move(generator));
+        }
+
+        Result<ExecNode *> TpchGen::Nation(std::vector<std::string> columns)
+        {
+            return CreateNode<NationGenerator>(std::move(columns));
+        }
+
+        Result<ExecNode *> TpchGen::Region(std::vector<std::string> columns)
+        {
+            return CreateNode<RegionGenerator>(std::move(columns));
+        }
+    }
+}
diff --git a/cpp/src/arrow/compute/exec/tpch_node.h b/cpp/src/arrow/compute/exec/tpch_node.h
new file mode 100644
index 00000000000..dc282aae981
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/tpch_node.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/compute/exec/options.h"
+#include "arrow/compute/exec/exec_plan.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/pcg_random.h"
+#include <vector>
+#include <string>
+
+namespace arrow
+{
+    namespace compute
+    {
+        class OrdersAndLineItemGenerator;
+        class PartAndPartSupplierGenerator;
+
+        class TpchGen
+        {
+        public:
+            static Result<TpchGen> Make(ExecPlan *plan, int scale_factor = 1, int64_t batch_size = 4096);
+
+            Result<ExecNode *> Supplier(std::vector<std::string> columns = {});
+            Result<ExecNode *> Part(std::vector<std::string> columns = {});
+            Result<ExecNode *> PartSupp(std::vector<std::string> columns = {});
+            Result<ExecNode *> Customer(std::vector<std::string> columns = {});
+            Result<ExecNode *> Orders(std::vector<std::string> columns = {});
+            Result<ExecNode *> Lineitem(std::vector<std::string> columns = {});
+            Result<ExecNode *> Nation(std::vector<std::string> columns = {});
+            Result<ExecNode *> Region(std::vector<std::string> columns = {});
+
+        private:
+            TpchGen(ExecPlan *plan, int scale_factor, int64_t batch_size)
+                : plan_(plan),
+                  scale_factor_(scale_factor),
+                  batch_size_(batch_size),
+                  orders_and_line_item_generator_(nullptr)
+            {}
+
+            template <typename Generator>
+            Result<ExecNode *> CreateNode(std::vector<std::string> columns);
+
+            ExecPlan *plan_;
+            int scale_factor_;
+            int64_t batch_size_;
+
+            std::shared_ptr<PartAndPartSupplierGenerator> part_and_part_supp_generator_;
+            std::shared_ptr<OrdersAndLineItemGenerator> orders_and_line_item_generator_;
+        };
+    }
+}
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index dd5bead58aa..0bf7e5422b2 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -893,7 +893,8 @@ class TableSorter {
 
   TableSorter(ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end,
               const Table& table, const SortOptions& options)
-      : ctx_(ctx),
+      : status_(),
+        ctx_(ctx),
         table_(table),
         batches_(MakeBatches(table, &status_)),
         options_(options),
@@ -1138,6 +1139,7 @@ class TableSorter {
     MergeNullsOnly(range_begin, range_middle, range_end, temp_indices, null_count);
   }
 
+  Status status_;
   ExecContext* ctx_;
   const Table& table_;
   const RecordBatchVector batches_;
@@ -1148,7 +1150,6 @@ class TableSorter {
   uint64_t* indices_begin_;
   uint64_t* indices_end_;
   Comparator comparator_;
-  Status status_;
 };
 
 // ----------------------------------------------------------------------

From c4495dcd003614734d08cf6d77d2d8b09dacaf65 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 2 Mar 2022 15:08:17 -0600
Subject: [PATCH 02/11] Draft of R bindings

---
 r/DESCRIPTION                |  1 +
 r/NAMESPACE                  |  1 +
 r/R/arrowExports.R           |  5 +++
 r/R/tpch.R                   | 36 ++++++++++++++++++++++
 r/man/tpch_dbgen.Rd          | 20 ++++++++++++
 r/src/arrowExports.cpp       | 18 +++++++++++
 r/src/compute-exec.cpp       | 56 ++++++++++++++++++++++++++++++++++
 r/tests/testthat/test-tpch.R | 59 ++++++++++++++++++++++++++++++++++++
 8 files changed, 196 insertions(+)
 create mode 100644 r/R/tpch.R
 create mode 100644 r/man/tpch_dbgen.Rd
 create mode 100644 r/tests/testthat/test-tpch.R

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index ae4bbcb8c38..17d97bebe08 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -125,4 +125,5 @@ Collate:
     'reexports-bit64.R'
     'reexports-tidyselect.R'
     'schema.R'
+    'tpch.R'
     'util.R'
diff --git a/r/NAMESPACE b/r/NAMESPACE
index d841bb29072..029177df0aa 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -282,6 +282,7 @@ export(time64)
 export(timestamp)
 export(to_arrow)
 export(to_duckdb)
+export(tpch_dbgen)
 export(type)
 export(uint16)
 export(uint32)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index d6cf785a650..8bfd08b7a1e 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -412,6 +412,10 @@ ExecNode_ReadFromRecordBatchReader <- function(plan, reader) {
   .Call(`_arrow_ExecNode_ReadFromRecordBatchReader`, plan, reader)
 }
 
+Tpch_Dbgen <- function(plan, scale_factor, table_name) {
+  .Call(`_arrow_Tpch_Dbgen`, plan, scale_factor, table_name)
+}
+
 RecordBatch__cast <- function(batch, schema, options) {
   .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
@@ -1851,3 +1855,4 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
+
diff --git a/r/R/tpch.R b/r/R/tpch.R
new file mode 100644
index 00000000000..78c2d112584
--- /dev/null
+++ b/r/R/tpch.R
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+tpch_tables <- c("customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier")
+
+
+#' Generate a RecordBatchReader with TPC-H data in it
+#'
+#' @param table the table to generate
+#' @param scale_factor the scale factor to generate
+#'
+#' @return a RecordBatchReader that will contain the generated data
+#' @export
+#'
+#' @keywords internal
+tpch_dbgen <- function(table = tpch_tables, scale_factor) {
+  table <- match.arg(table)
+
+  Tpch_Dbgen(arrow:::ExecPlan$create(), scale_factor, table)
+}
+
+
diff --git a/r/man/tpch_dbgen.Rd b/r/man/tpch_dbgen.Rd
new file mode 100644
index 00000000000..88cc1cf1857
--- /dev/null
+++ b/r/man/tpch_dbgen.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tpch.R
+\name{tpch_dbgen}
+\alias{tpch_dbgen}
+\title{Generate a RecordBatchReader with TPC-H data in it}
+\usage{
+tpch_dbgen(table = tpch_tables, scale_factor)
+}
+\arguments{
+\item{table}{the table to generate}
+
+\item{scale_factor}{the scale factor to generate}
+}
+\value{
+a RecordBatchReader that will contain the generated data
+}
+\description{
+Generate a RecordBatchReader with TPC-H data in it
+}
+\keyword{internal}
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 3e4196421c9..e3cc6d79933 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1626,6 +1626,23 @@ extern "C" SEXP _arrow_ExecNode_ReadFromRecordBatchReader(SEXP plan_sexp, SEXP r
 }
 #endif
 
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::RecordBatchReader> Tpch_Dbgen(const std::shared_ptr<compute::ExecPlan>& plan, int scale_factor, std::string table_name);
+extern "C" SEXP _arrow_Tpch_Dbgen(SEXP plan_sexp, SEXP scale_factor_sexp, SEXP table_name_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<int>::type scale_factor(scale_factor_sexp);
+	arrow::r::Input<std::string>::type table_name(table_name_sexp);
+	return cpp11::as_sexp(Tpch_Dbgen(plan, scale_factor, table_name));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_Tpch_Dbgen(SEXP plan_sexp, SEXP scale_factor_sexp, SEXP table_name_sexp){
+	Rf_error("Cannot call Tpch_Dbgen(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // compute.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::RecordBatch> RecordBatch__cast(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& schema, cpp11::list options);
@@ -7472,6 +7489,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 5}, 
 		{ "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 7}, 
 		{ "_arrow_ExecNode_ReadFromRecordBatchReader", (DL_FUNC) &_arrow_ExecNode_ReadFromRecordBatchReader, 2}, 
+		{ "_arrow_Tpch_Dbgen", (DL_FUNC) &_arrow_Tpch_Dbgen, 3}, 
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index 3982af4f7f5..0d556d102a7 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -23,6 +23,7 @@
 #include <arrow/compute/exec/exec_plan.h>
 #include <arrow/compute/exec/expression.h>
 #include <arrow/compute/exec/options.h>
+#include <arrow/compute/exec/tpch_node.h>
 #include <arrow/table.h>
 #include <arrow/util/async_generator.h>
 #include <arrow/util/future.h>
@@ -277,4 +278,59 @@ std::shared_ptr<compute::ExecNode> ExecNode_ReadFromRecordBatchReader(
   return MakeExecNodeOrStop("source", plan.get(), {}, options);
 }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::RecordBatchReader> Tpch_Dbgen(
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    int scale_factor,
+    std::string table_name
+    ) {
+
+  auto gen = ValueOrStop(arrow::compute::TpchGen::Make(plan.get(), scale_factor));
+
+  compute::ExecNode *table;
+  if (table_name == "part") {
+    table = ValueOrStop(gen.Part());
+  } else if (table_name == "supplier") {
+    table = ValueOrStop(gen.Supplier());
+  } else if (table_name == "partsupp") {
+    table = ValueOrStop(gen.PartSupp());
+  } else if (table_name == "customer") {
+    table = ValueOrStop(gen.Customer());
+  } else if (table_name == "nation") {
+    table = ValueOrStop(gen.Nation());
+  } else if (table_name == "lineitem") {
+    table = ValueOrStop(gen.Lineitem());
+  } else if (table_name == "region") {
+    table = ValueOrStop(gen.Region());
+  } else if (table_name == "orders") {
+    table = ValueOrStop(gen.Orders());
+  } else {
+    cpp11::stop("That's not a valid table name");
+  }
+
+  arrow::AsyncGenerator<arrow::util::optional<compute::ExecBatch>> sink_gen;
+
+  MakeExecNodeOrStop("sink", plan.get(), {table},
+                     compute::SinkNodeOptions{&sink_gen});
+
+  StopIfNotOk(plan->Validate());
+  StopIfNotOk(plan->StartProducing());
+
+  // If the generator is destroyed before being completely drained, inform plan
+  std::shared_ptr<void> stop_producing{nullptr, [plan](...) {
+    bool not_finished_yet =
+      plan->finished().TryAddCallback([&plan] {
+        return [plan](const arrow::Status&) {};
+      });
+
+    if (not_finished_yet) {
+      plan->StopProducing();
+    }
+  }};
+
+  return compute::MakeGeneratorReader(
+    table->output_schema(),
+    [stop_producing, plan, sink_gen] { return sink_gen(); }, gc_memory_pool());
+}
+
 #endif
diff --git a/r/tests/testthat/test-tpch.R b/r/tests/testthat/test-tpch.R
new file mode 100644
index 00000000000..8077f76e4fd
--- /dev/null
+++ b/r/tests/testthat/test-tpch.R
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("tpch_dbgen()", {
+  lineitem_rbr <- tpch_dbgen("lineitem", 1)
+  lineitem_tab <- lineitem_rbr$read_table()
+  expect_identical(ncol(lineitem_tab), 16L)
+
+  # and check a handful of types
+  expect_type_equal(lineitem_tab[["L_ORDERKEY"]], int32())
+  expect_type_equal(lineitem_tab[["L_RECEIPTDATE"]], date32())
+
+  region_rbr <- tpch_dbgen("region", 1)
+  region_tab <- region_rbr$read_table()
+  expect_identical(dim(region_tab), c(5L, 3L))
+
+  # and check a handful of types
+  expect_type_equal(region_tab[["R_REGIONKEY"]], int32())
+  expect_type_equal(region_tab[["R_COMMENT"]], string())
+
+  part_rbr <- tpch_dbgen("part", 1)
+  part_tab <- part_rbr$read_table()
+  expect_identical(dim(part_tab), c(200000L, 9L))
+
+  # and check a handful of types
+  expect_type_equal(part_tab[["R_PARTKEY"]], int32())
+})
+
+# these two are tested above
+tpch_tables_up <- setdiff(tpch_tables, c("lineitem", "region"))
+
+# nation segfaults
+# supplier hangs
+tpch_tables_up <- setdiff(tpch_tables_up, c("nation", "supplier"))
+
+# all of the rest below have an error with:
+# Invalid: Arrays used to construct an ExecBatch must have equal length
+
+for (table_name in tpch_tables_up) {
+  test_that(paste0("Generating table: ", table_name), {
+    rbr <- tpch_dbgen(table_name, 1)
+    tab <- rbr$read_table()
+    expect_r6_class(tab, "Table")
+  })
+}

From d7c508c36467c9d97fbfc8c5c7ec4cd4b5c1e871 Mon Sep 17 00:00:00 2001
From: Sasha Krassovsky <krassovskysasha@gmail.com>
Date: Wed, 2 Mar 2022 18:08:41 -0800
Subject: [PATCH 03/11] Fix bugs, parallel text generation, rudimentary tests

---
 cpp/src/arrow/compute/exec/CMakeLists.txt    |   1 +
 cpp/src/arrow/compute/exec/tpch_node.cc      | 302 +++++++++++--------
 cpp/src/arrow/compute/exec/tpch_node_test.cc | 203 +++++++++++++
 3 files changed, 382 insertions(+), 124 deletions(-)
 create mode 100644 cpp/src/arrow/compute/exec/tpch_node_test.cc

diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt
index cf725667107..452cda8b914 100644
--- a/cpp/src/arrow/compute/exec/CMakeLists.txt
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -26,6 +26,7 @@ add_arrow_compute_test(expression_test
 
 add_arrow_compute_test(plan_test PREFIX "arrow-compute")
 add_arrow_compute_test(hash_join_node_test PREFIX "arrow-compute")
+add_arrow_compute_test(tpch_node_test PREFIX "arrow-compute")
 add_arrow_compute_test(union_node_test PREFIX "arrow-compute")
 
 add_arrow_compute_test(util_test PREFIX "arrow-compute")
diff --git a/cpp/src/arrow/compute/exec/tpch_node.cc b/cpp/src/arrow/compute/exec/tpch_node.cc
index 842bf828574..445df7d08b9 100644
--- a/cpp/src/arrow/compute/exec/tpch_node.cc
+++ b/cpp/src/arrow/compute/exec/tpch_node.cc
@@ -22,7 +22,7 @@ namespace arrow
         class TpchText
         {
         public:
-            Status Init();
+            Status InitIfNeeded(random::pcg32_fast &rng);
             Result<Datum> GenerateComments(
                 size_t num_comments,
                 size_t min_length,
@@ -30,24 +30,28 @@ namespace arrow
                 random::pcg32_fast &rng);
 
         private:
-            void GenerateWord(size_t &offset, const char **words, size_t num_choices);
-            void GenerateNoun(size_t &offset);
-            void GenerateVerb(size_t &offset);
-            void GenerateAdjective(size_t &offset);
-            void GenerateAdverb(size_t &offset);
-            void GeneratePreposition(size_t &offset);
-            void GenerateAuxiliary(size_t &offset);
-            void GenerateTerminator(size_t &offset);
+            bool GenerateWord(int64_t &offset, random::pcg32_fast &rng, char *arr, const char **words, size_t num_choices);
+            bool GenerateNoun(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GenerateVerb(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GenerateAdjective(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GenerateAdverb(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GeneratePreposition(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GenerateAuxiliary(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GenerateTerminator(int64_t &offset, random::pcg32_fast &rng, char *arr);
 
-            void GenerateNounPhrase(size_t &offset);
-            void GenerateVerbPhrase(size_t &offset);
-            void GeneratePrepositionalPhrase(size_t &offset);
+            bool GenerateNounPhrase(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GenerateVerbPhrase(int64_t &offset, random::pcg32_fast &rng, char *arr);
+            bool GeneratePrepositionalPhrase(int64_t &offset, random::pcg32_fast &rng, char *arr);
 
-            void GenerateSentence(size_t &offset);
+            bool GenerateSentence(int64_t &offset, random::pcg32_fast &rng, char *arr);
 
+            std::atomic<bool> done_ = { false };
+            int64_t generated_offset_ = 0;
+            std::mutex text_guard_;
             std::unique_ptr<Buffer> text_;
             random::pcg32_fast rng_;
-            static constexpr size_t kTextBytes = 300 * 1024 * 1024; // 300 MB
+            static constexpr int64_t kChunkSize = 8192;
+            static constexpr int64_t kTextBytes = 300 * 1024 * 1024; // 300 MB
         };
 
         class TpchTableGenerator
@@ -150,11 +154,13 @@ namespace arrow
             std::vector<std::shared_ptr<Field>> fields;
             if(columns.empty())
             {
+                fields.resize(name_map.size());
+                gen_list.resize(name_map.size());
                 for(auto pair : name_map)
                 {
                     int col_idx = pair.second;
-                    fields.push_back(field(pair.first, types[col_idx]));
-                    gen_list.push_back(col_idx);
+                    fields[col_idx] = field(pair.first, types[col_idx]);
+                    gen_list[col_idx] = col_idx;
                 }
                 return schema(std::move(fields));
             }
@@ -175,12 +181,39 @@ namespace arrow
 
         static TpchText g_text;
 
-        Status TpchText::Init()
+        Status TpchText::InitIfNeeded(random::pcg32_fast &rng)
         {
-            ARROW_ASSIGN_OR_RAISE(text_, AllocateBuffer(kTextBytes));
-            size_t offset = 0;
-            while(offset < kTextBytes)
-                GenerateSentence(offset);
+            if(done_.load())
+                return Status::OK();
+
+            {
+                std::lock_guard<std::mutex> lock(text_guard_);
+                if(!text_)
+                {
+                    ARROW_ASSIGN_OR_RAISE(text_, AllocateBuffer(kTextBytes));
+                }
+            }
+            char *out = reinterpret_cast<char *>(text_->mutable_data());
+            char temp_buff[kChunkSize];
+            while(done_.load() == false)
+            {
+                int64_t current_offset = 0;
+                int64_t offset = 0;
+                while(GenerateSentence(offset, rng, temp_buff))
+                    current_offset = offset;
+
+                {
+                    std::lock_guard<std::mutex> lock(text_guard_);
+                    if(done_.load())
+                        return Status::OK();
+                    int64_t bytes_remaining = kTextBytes - generated_offset_;
+                    int64_t memcpy_size = std::min(offset, bytes_remaining);
+                    std::memcpy(out + generated_offset_, temp_buff, memcpy_size);
+                    generated_offset_ += memcpy_size;
+                    if(generated_offset_ == kTextBytes)
+                        done_.store(true);
+                }
+            }
             return Status::OK();
         }
 
@@ -190,6 +223,7 @@ namespace arrow
             size_t max_length,
             random::pcg32_fast &rng)
         {
+            RETURN_NOT_OK(InitIfNeeded(rng));
             std::uniform_int_distribution<size_t> length_dist(min_length, max_length);
             ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> offset_buffer, AllocateBuffer(sizeof(int32_t) * (num_comments + 1)));
             int32_t *offsets = reinterpret_cast<int32_t *>(offset_buffer->mutable_data());
@@ -206,7 +240,7 @@ namespace arrow
                 size_t offset_in_text = offset_dist(rng);
                 std::memcpy(comments + offsets[i], text_->data() + offset_in_text, length);
             }
-            ArrayData ad(utf8(), num_comments, { nullptr, std::move(comment_buffer), std::move(offset_buffer) });
+            ArrayData ad(utf8(), num_comments, { nullptr, std::move(offset_buffer), std::move(comment_buffer) });
             return std::move(ad);
         }
 
@@ -237,7 +271,7 @@ namespace arrow
             for(int32_t i = 0; i < offsets[num_rows]; i++)
                 str[i] = alpha_numerics[char_dist(rng)];
 
-            ArrayData ad(utf8(), num_rows, { nullptr, std::move(str_buff), std::move(offset_buff) });
+            ArrayData ad(utf8(), num_rows, { nullptr, std::move(offset_buff), std::move(str_buff) });
             return std::move(ad);
         }
 
@@ -246,10 +280,10 @@ namespace arrow
             out += (num_digits - 1);
             while(x > 0)
             {
-                *out-- = x % 10;
+                *out-- = '0' + (x % 10);
                 x /= 10;
             }
-            x += num_digits;
+            out += num_digits;
         }
 
         void GeneratePhoneNumber(
@@ -405,163 +439,176 @@ namespace arrow
         };
         static constexpr size_t kNumTerminators = sizeof(Terminators) / sizeof(Terminators[0]);
 
-        void TpchText::GenerateWord(size_t &offset, const char **words, size_t num_choices)
+        bool TpchText::GenerateWord(int64_t &offset, random::pcg32_fast &rng, char *arr, const char **words, size_t num_choices)
         {
             std::uniform_int_distribution<size_t> dist(0, num_choices - 1);
-            const char *word = words[dist(rng_)];
-            size_t bytes_left = kTextBytes - offset;
+            const char *word = words[dist(rng)];
             size_t length = std::strlen(word);
-            size_t bytes_to_copy = std::min(bytes_left, length);
-            std::memcpy(text_->mutable_data() + offset, word, bytes_to_copy);
-            offset += bytes_to_copy;
+            if(offset + length > kChunkSize)
+                return false;
+            std::memcpy(arr + offset, word, length);
+            offset += length;
+            return true;
         }
 
-        void TpchText::GenerateNoun(size_t &offset)
+        bool TpchText::GenerateNoun(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
-            GenerateWord(offset, Nouns, kNumNouns);
+            return GenerateWord(offset, rng, arr, Nouns, kNumNouns);
         }
 
-        void TpchText::GenerateVerb(size_t &offset)
+        bool TpchText::GenerateVerb(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
-            GenerateWord(offset, Verbs, kNumVerbs);
+            return GenerateWord(offset, rng, arr, Verbs, kNumVerbs);
         }
 
-        void TpchText::GenerateAdjective(size_t &offset)
+        bool TpchText::GenerateAdjective(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
-            GenerateWord(offset, Adjectives, kNumAdjectives);
+            return GenerateWord(offset, rng, arr, Adjectives, kNumAdjectives);
         }
 
-        void TpchText::GenerateAdverb(size_t &offset)
+        bool TpchText::GenerateAdverb(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
-            GenerateWord(offset, Adverbs, kNumAdverbs);
+            return GenerateWord(offset, rng, arr, Adverbs, kNumAdverbs);
         }
 
-        void TpchText::GeneratePreposition(size_t &offset)
+        bool TpchText::GeneratePreposition(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
-            GenerateWord(offset, Prepositions, kNumPrepositions);
+            return GenerateWord(offset, rng, arr, Prepositions, kNumPrepositions);
         }
 
-        void TpchText::GenerateAuxiliary(size_t &offset)
+        bool TpchText::GenerateAuxiliary(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
-            GenerateWord(offset, Auxiliaries, kNumAuxiliaries);
+            return GenerateWord(offset, rng, arr, Auxiliaries, kNumAuxiliaries);
         }
 
-        void TpchText::GenerateTerminator(size_t &offset)
+        bool TpchText::GenerateTerminator(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
-            GenerateWord(offset, Terminators, kNumTerminators);
+            bool result = GenerateWord(offset, rng, arr, Terminators, kNumTerminators);
+            // Swap the space with the terminator
+            if(result)
+                std::swap(*(arr + offset - 2), *(arr + offset - 1));
+            return result;
         }
 
-        void TpchText::GenerateNounPhrase(size_t &offset)
+        bool TpchText::GenerateNounPhrase(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
             std::uniform_int_distribution<size_t> dist(0, 3);
             const char *comma_space = ", ";
+            bool success = true;
             switch(dist(rng_))
             {
             case 0:
-                GenerateNoun(offset);
+                success &= GenerateNoun(offset, rng, arr);
                 break;
             case 1:
-                GenerateAdjective(offset);
-                GenerateNoun(offset);
+                success &= GenerateAdjective(offset, rng, arr);
+                success &= GenerateNoun(offset, rng, arr);
                 break;
             case 2:
-                GenerateAdjective(offset);
-                GenerateWord(offset, &comma_space, 1);
-                GenerateAdjective(offset);
-                GenerateNoun(offset);
+                success &= GenerateAdjective(offset, rng, arr);
+                success &= GenerateWord(offset, rng, arr, &comma_space, 1);
+                success &= GenerateAdjective(offset, rng, arr);
+                success &= GenerateNoun(offset, rng, arr);
                 break;
             case 3:
-                GenerateAdverb(offset);
-                GenerateAdjective(offset);
-                GenerateNoun(offset);
+                GenerateAdverb(offset, rng, arr);
+                GenerateAdjective(offset, rng, arr);
+                GenerateNoun(offset, rng, arr);
                 break;
             default:
                 Unreachable("Random number should be between 0 and 3 inclusive");
                 break;
             }
+            return success;
         }
 
-        void TpchText::GenerateVerbPhrase(size_t &offset)
+        bool TpchText::GenerateVerbPhrase(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
             std::uniform_int_distribution<size_t> dist(0, 3);
+            bool success = true;
             switch(dist(rng_))
             {
             case 0:
-                GenerateVerb(offset);
+                success &= GenerateVerb(offset, rng, arr);
                 break;
             case 1:
-                GenerateAuxiliary(offset);
-                GenerateVerb(offset);
+                success &= GenerateAuxiliary(offset, rng, arr);
+                success &= GenerateVerb(offset, rng, arr);
                 break;
             case 2:
-                GenerateVerb(offset);
-                GenerateAdverb(offset);
+                success &= GenerateVerb(offset, rng, arr);
+                success &= GenerateAdverb(offset, rng, arr);
                 break;
             case 3:
-                GenerateAuxiliary(offset);
-                GenerateVerb(offset);
-                GenerateAdverb(offset);
+                success &= GenerateAuxiliary(offset, rng, arr);
+                success &= GenerateVerb(offset, rng, arr);
+                success &= GenerateAdverb(offset, rng, arr);
                 break;
             default:
                 Unreachable("Random number should be between 0 and 3 inclusive");
                 break;
             }
+            return success;
         }
 
-        void TpchText::GeneratePrepositionalPhrase(size_t &offset)
+        bool TpchText::GeneratePrepositionalPhrase(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
             const char *the_space = "the ";
-            GeneratePreposition(offset);
-            GenerateWord(offset, &the_space, 1);
-            GenerateNounPhrase(offset);
+            bool success = true;
+            success &= GeneratePreposition(offset, rng, arr);
+            success &= GenerateWord(offset, rng, arr, &the_space, 1);
+            success &= GenerateNounPhrase(offset, rng, arr);
+            return success;
         }
 
-        void TpchText::GenerateSentence(size_t &offset)
+        bool TpchText::GenerateSentence(int64_t &offset, random::pcg32_fast &rng, char *arr)
         {
             std::uniform_int_distribution<size_t> dist(0, 4);
+            bool success = true;
             switch(dist(rng_))
             {
             case 0:
-                GenerateNounPhrase(offset);
-                GenerateVerbPhrase(offset);
-                GenerateTerminator(offset);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GenerateVerbPhrase(offset, rng, arr);
+                success &= GenerateTerminator(offset, rng, arr);
                 break;
             case 1:
-                GenerateNounPhrase(offset);
-                GenerateVerbPhrase(offset);
-                GeneratePrepositionalPhrase(offset);
-                GenerateTerminator(offset);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GenerateVerbPhrase(offset, rng, arr);
+                success &= GeneratePrepositionalPhrase(offset, rng, arr);
+                success &= GenerateTerminator(offset, rng, arr);
                 break;
             case 2:
-                GenerateNounPhrase(offset);
-                GenerateVerbPhrase(offset);
-                GenerateNounPhrase(offset);
-                GenerateTerminator(offset);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GenerateVerbPhrase(offset, rng, arr);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GenerateTerminator(offset, rng, arr);
                 break;
             case 3:
-                GenerateNounPhrase(offset);
-                GenerateVerbPhrase(offset);
-                GenerateNounPhrase(offset);
-                GenerateTerminator(offset);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GenerateVerbPhrase(offset, rng, arr);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GenerateTerminator(offset, rng, arr);
                 break;
             case 4:
-                GenerateNounPhrase(offset);
-                GeneratePrepositionalPhrase(offset);
-                GenerateVerbPhrase(offset);
-                GenerateNounPhrase(offset);
-                GenerateTerminator(offset);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GeneratePrepositionalPhrase(offset, rng, arr);
+                success &= GenerateVerbPhrase(offset, rng, arr);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GenerateTerminator(offset, rng, arr);
                 break;
             case 5:
-                GenerateNounPhrase(offset);
-                GeneratePrepositionalPhrase(offset);
-                GenerateVerbPhrase(offset);
-                GeneratePrepositionalPhrase(offset);
-                GenerateTerminator(offset);
+                success &= GenerateNounPhrase(offset, rng, arr);
+                success &= GeneratePrepositionalPhrase(offset, rng, arr);
+                success &= GenerateVerbPhrase(offset, rng, arr);
+                success &= GeneratePrepositionalPhrase(offset, rng, arr);
+                success &= GenerateTerminator(offset, rng, arr);
                 break;
             default:
                 Unreachable("Random number should be between 0 and 5 inclusive");
                 break;
             }
+            return success;
         }
 
         using GenerateColumnFn = std::function<Status(size_t)>;
@@ -669,14 +716,17 @@ namespace arrow
             {
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 {
-                    std::lock_guard<std::mutex> lock(part_output_queue_mutex_);
-                    if(!part_output_queue_.empty())
+                    std::lock_guard<std::mutex> lock(partsupp_output_queue_mutex_);
+                    if(!partsupp_output_queue_.empty())
                     {
-                        ExecBatch batch = std::move(part_output_queue_.front());
-                        part_output_queue_.pop();
-                        return std::move(batch);
+                        ExecBatch result = std::move(partsupp_output_queue_.front());
+                        partsupp_output_queue_.pop();
+                        return std::move(result);
                     }
-                    else if(part_rows_generated_ == part_rows_to_generate_)
+                }
+                {
+                    std::lock_guard<std::mutex> lock(part_output_queue_mutex_);
+                    if(part_rows_generated_ == part_rows_to_generate_)
                     {
                         return util::nullopt;
                     }
@@ -885,7 +935,7 @@ namespace arrow
                             *row++ = ' ';
                         }
                     }
-                    ArrayData ad(part_types_[PART::P_NAME], tld.part_to_generate, { nullptr, std::move(string_buffer), std::move(offset_buff) });
+                    ArrayData ad(part_types_[PART::P_NAME], tld.part_to_generate, { nullptr, std::move(offset_buff), std::move(string_buffer) });
                     Datum datum(ad);
                     tld.part[PART::P_NAME] = std::move(datum);
                 }
@@ -916,7 +966,7 @@ namespace arrow
             Status P_BRAND(size_t thread_index)
             {
                 ThreadLocalData &tld = thread_local_data_[thread_index];
-                if(tld.part[PART::P_MFGR].kind() == Datum::NONE)
+                if(tld.part[PART::P_BRAND].kind() == Datum::NONE)
                 {
                     RETURN_NOT_OK(P_MFGR(thread_index));
                     std::uniform_int_distribution<int> dist(1, 5);
@@ -987,7 +1037,7 @@ namespace arrow
                             *row++ = ' ';
                         }
                     }
-                    ArrayData ad(part_types_[PART::P_TYPE], tld.part_to_generate, { nullptr, std::move(string_buffer), std::move(offset_buff) });
+                    ArrayData ad(part_types_[PART::P_TYPE], tld.part_to_generate, { nullptr, std::move(offset_buff), std::move(string_buffer) });
                     Datum datum(ad);
                     tld.part[PART::P_TYPE] = std::move(datum);
                 }
@@ -1065,7 +1115,7 @@ namespace arrow
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 if(tld.part[PART::P_COMMENT].kind() == Datum::NONE)
                 {
-                    ARROW_ASSIGN_OR_RAISE(tld.part[PART::P_COMMENT], g_text.GenerateComments(batch_size_, 5, 22, tld.rng));
+                    ARROW_ASSIGN_OR_RAISE(tld.part[PART::P_COMMENT], g_text.GenerateComments(tld.part_to_generate, 5, 22, tld.rng));
                 }
                 return Status::OK();
             }
@@ -1222,7 +1272,7 @@ namespace arrow
                         for(int64_t irun = 0; irun < next_run; irun++)
                             ps_supplycost[irun] = { dist(tld.rng) };
 
-                        tld.partsupp[ibatch][PARTSUPP::PS_AVAILQTY].array()->length = next_run;
+                        tld.partsupp[ibatch][PARTSUPP::PS_SUPPLYCOST].array()->length = next_run;
                         irow += next_run;
                     }
                 }
@@ -1594,8 +1644,11 @@ namespace arrow
                         tld.orders[ORDERS::O_ORDERKEY].array()->buffers[1]->mutable_data());
                     for(int64_t i = 0; i < tld.orders_to_generate; i++)
                     {
-                        o_orderkey[i] = (tld.orderkey_start + i + 1);
-                        ARROW_DCHECK(1 <= o_orderkey[i] && o_orderkey[i] <= orders_rows_to_generate_);
+                        int32_t orderkey_index = tld.orderkey_start + i;
+                        int32_t index_of_run = orderkey_index / 8;
+                        int32_t index_in_run = orderkey_index % 8;
+                        o_orderkey[i] = (index_of_run * 32 + index_in_run + 1);
+                        ARROW_DCHECK(1 <= o_orderkey[i] && o_orderkey[i] <= 4 * orders_rows_to_generate_);
                     }
                 }
                 return Status::OK();
@@ -1802,7 +1855,7 @@ namespace arrow
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 if(tld.orders[ORDERS::O_COMMENT].kind() == Datum::NONE)
                 {
-                    ARROW_ASSIGN_OR_RAISE(tld.orders[ORDERS::O_COMMENT], g_text.GenerateComments(batch_size_, 19, 78, tld.rng));
+                    ARROW_ASSIGN_OR_RAISE(tld.orders[ORDERS::O_COMMENT], g_text.GenerateComments(tld.orders_to_generate, 19, 78, tld.rng));
                 }
                 return Status::OK();
             }
@@ -2444,6 +2497,7 @@ namespace arrow
                     {
                         bad_row = dist(rng);
                     } while(good_rows_set.find(bad_row) != good_rows_set.end());
+                    bad_rows_set.insert(bad_row);
                 }
                 good_rows_.clear();
                 bad_rows_.clear();
@@ -2680,7 +2734,7 @@ namespace arrow
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 if(tld.batch[SUPPLIER::S_COMMENT].kind() == Datum::NONE)
                 {
-                    ARROW_ASSIGN_OR_RAISE(tld.batch[SUPPLIER::S_COMMENT], g_text.GenerateComments(batch_size_, 25, 100, tld.rng));
+                    ARROW_ASSIGN_OR_RAISE(tld.batch[SUPPLIER::S_COMMENT], g_text.GenerateComments(tld.to_generate, 25, 100, tld.rng));
                     ModifyComments(thread_index, "Recommends", good_rows_);
                     ModifyComments(thread_index, "Complaints", bad_rows_);
                 }
@@ -2694,9 +2748,9 @@ namespace arrow
             {
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 const int32_t *offsets = reinterpret_cast<const int32_t *>(
-                    tld.batch[SUPPLIER::S_COMMENT].array()->buffers[2]->data());
+                    tld.batch[SUPPLIER::S_COMMENT].array()->buffers[1]->data());
                 char *str = reinterpret_cast<char *>(
-                    tld.batch[SUPPLIER::S_COMMENT].array()->buffers[1]->mutable_data());
+                    tld.batch[SUPPLIER::S_COMMENT].array()->buffers[2]->mutable_data());
                 const char *customer = "Customer";
                 const size_t customer_length = std::strlen(customer);
                 const size_t review_length = std::strlen(review);
@@ -3057,7 +3111,7 @@ namespace arrow
                         std::memcpy(out, customer, customer_length);
                         AppendNumberPaddedToNineDigits(out + customer_length, c_custkey[irow]);
                     }
-                    ArrayData ad(utf8(), tld.to_generate, { nullptr, std::move(str_buff), std::move(offset_buff) });
+                    ArrayData ad(utf8(), tld.to_generate, { nullptr, std::move(offset_buff), std::move(str_buff) });
                     tld.batch[CUSTOMER::C_NAME] = std::move(ad);
                 }
                 return Status::OK();
@@ -3153,7 +3207,7 @@ namespace arrow
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 if(tld.batch[CUSTOMER::C_COMMENT].kind() == Datum::NONE)
                 {
-                    ARROW_ASSIGN_OR_RAISE(tld.batch[CUSTOMER::C_COMMENT], g_text.GenerateComments(batch_size_, 29, 116, tld.rng));
+                    ARROW_ASSIGN_OR_RAISE(tld.batch[CUSTOMER::C_COMMENT], g_text.GenerateComments(tld.to_generate, 29, 116, tld.rng));
                 }
                 return Status::OK();
             }
@@ -3381,9 +3435,15 @@ namespace arrow
             const int32_t N_NATIONKEY[kRowCount] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 };
             const char *country_names_[kRowCount] =
             {
-                "ALGERIA", "ARGENTINA", "BRAZIL", "CANADA", "EGYPT", "ETHIOPIA", "FRANCE", "GERMANY",
-                "INDONESIA", "IRAQ", "IRAN", "JAPAN", "JORDAN", "KENYA", "MOROCCO", "MOZAMBIQUE", "PERU",
-                "CHINA", "ROMANIA", "SAUDI ARABIA", "VIETNAM", "RUSSIA", "UNITED KINGDOM", "UNITED STATES"
+                "ALGERIA", "ARGENTINA", "BRAZIL",
+                "CANADA", "EGYPT", "ETHIOPIA",
+                "FRANCE", "GERMANY", "INDIA",
+                "INDONESIA", "IRAN", "IRAQ",
+                "JAPAN", "JORDAN", "KENYA",
+                "MOROCCO", "MOZAMBIQUE", "PERU",
+                "CHINA", "ROMANIA", "SAUDI ARABIA",
+                "VIETNAM", "RUSSIA", "UNITED KINGDOM",
+                "UNITED STATES"
             };
             const int32_t N_REGIONKEY[kRowCount] = { 0, 1, 1, 1, 4, 0, 3, 3, 2, 2, 4, 4, 2, 4, 0, 0, 0, 1, 2, 3, 4, 2, 3, 3, 1 };
 
@@ -3619,12 +3679,6 @@ namespace arrow
 
         Result<TpchGen> TpchGen::Make(ExecPlan *plan, int scale_factor, int64_t batch_size)
         {
-            static bool has_inited_text = false;
-            if(!has_inited_text)
-            {
-                RETURN_NOT_OK(g_text.Init());
-                has_inited_text = true;
-            }
             TpchGen result(plan, scale_factor, batch_size);
             return result;
         }
@@ -3659,7 +3713,7 @@ namespace arrow
             {
                 part_and_part_supp_generator_ = std::make_shared<PartAndPartSupplierGenerator>();
             }
-            std::unique_ptr<PartGenerator> generator = arrow::internal::make_unique<PartGenerator>(part_and_part_supp_generator_);
+            std::unique_ptr<PartSuppGenerator> generator = arrow::internal::make_unique<PartSuppGenerator>(part_and_part_supp_generator_);
             RETURN_NOT_OK(generator->Init(std::move(columns), scale_factor_, batch_size_));
             return plan_->EmplaceNode<TpchNode>(plan_, std::move(generator));
         }
diff --git a/cpp/src/arrow/compute/exec/tpch_node_test.cc b/cpp/src/arrow/compute/exec/tpch_node_test.cc
new file mode 100644
index 00000000000..c844d7e88c1
--- /dev/null
+++ b/cpp/src/arrow/compute/exec/tpch_node_test.cc
@@ -0,0 +1,203 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock-matchers.h>
+
+#include "arrow/api.h"
+#include "arrow/compute/exec/options.h"
+#include "arrow/compute/exec/test_util.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/compute/kernels/row_encoder.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/exec/tpch_node.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/pcg_random.h"
+#include "arrow/util/thread_pool.h"
+#include "arrow/array/validate.h"
+
+namespace arrow
+{
+    namespace compute
+    {
+        void ValidateBatch(const ExecBatch &batch)
+        {
+            for(const Datum &d : batch.values)
+                ASSERT_OK(arrow::internal::ValidateArray(*d.array()));
+        }
+
+        TEST(TpchNode, Supplier)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.Supplier();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            int64_t num_rows = 0;
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+                num_rows += batch.length;
+            }
+            ASSERT_EQ(num_rows, 10000);
+        }
+
+        TEST(TpchNode, Part)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.Part();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            int64_t num_rows = 0;
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+                num_rows += batch.length;
+            }
+            ASSERT_EQ(num_rows, 200000);
+        }
+
+        TEST(TpchNode, PartSupp)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.PartSupp();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            int64_t num_rows = 0;
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+                num_rows += batch.length;
+            }
+            ASSERT_EQ(num_rows, 800000);
+        }
+
+        TEST(TpchNode, Customer)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.Customer();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            int64_t num_rows = 0;
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+                num_rows += batch.length;
+            }
+            ASSERT_EQ(num_rows, 150000);
+        }
+
+        TEST(TpchNode, Orders)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.Orders();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            int64_t num_rows = 0;
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+                num_rows += batch.length;
+            }
+            ASSERT_EQ(num_rows, 1500000);
+        }
+
+        TEST(TpchNode, Lineitem)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.Lineitem();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+            }
+        }
+
+        TEST(TpchNode, Nation)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.Nation();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            int64_t num_rows = 0;
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+                num_rows += batch.length;
+            }
+            ASSERT_EQ(num_rows, 25);
+        }
+
+        TEST(TpchNode, Region)
+        {
+            ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+            std::shared_ptr<ExecPlan> plan = *ExecPlan::Make(&ctx);
+            TpchGen gen = *TpchGen::Make(plan.get());
+            ExecNode *table = *gen.Region();
+            AsyncGenerator<util::optional<ExecBatch>> sink_gen;
+            Declaration sink("sink", { Declaration::Input(table) }, SinkNodeOptions{&sink_gen});
+            std::ignore = *sink.AddToPlan(plan.get());
+            auto fut = StartAndCollect(plan.get(), sink_gen);
+            auto res = *fut.MoveResult();
+            int64_t num_rows = 0;
+            for(auto &batch : res)
+            {
+                ValidateBatch(batch);
+                num_rows += batch.length;
+            }
+            ASSERT_EQ(num_rows, 5);
+        }
+    }
+}

From 289337ea518c3617f7db41cf7851b89eaa3eb9df Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 3 Mar 2022 08:14:37 -0600
Subject: [PATCH 04/11] Uncommenting R tests, and a first stab at the
 filewriter C++

---
 r/R/arrowExports.R           |  4 ++
 r/R/tpch.R                   | 20 ++++++++-
 r/src/arrowExports.cpp       | 23 ++++++++++
 r/src/compute-exec.cpp       | 85 ++++++++++++++++++++++++++++++++++++
 r/tests/testthat/test-tpch.R | 14 ++----
 5 files changed, 135 insertions(+), 11 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 8bfd08b7a1e..c20ecd188bc 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -416,6 +416,10 @@ Tpch_Dbgen <- function(plan, scale_factor, table_name) {
   .Call(`_arrow_Tpch_Dbgen`, plan, scale_factor, table_name)
 }
 
+Tpch_Dbgen_Write <- function(plan, scale_factor, table_name, filesystem, base_dir, existing_data_behavior, max_partitions) {
+  invisible(.Call(`_arrow_Tpch_Dbgen_Write`, plan, scale_factor, table_name, filesystem, base_dir, existing_data_behavior, max_partitions))
+}
+
 RecordBatch__cast <- function(batch, schema, options) {
   .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
 }
diff --git a/r/R/tpch.R b/r/R/tpch.R
index 78c2d112584..ef0e002a6e5 100644
--- a/r/R/tpch.R
+++ b/r/R/tpch.R
@@ -30,7 +30,25 @@ tpch_tables <- c("customer", "lineitem", "nation", "orders", "part", "partsupp",
 tpch_dbgen <- function(table = tpch_tables, scale_factor) {
   table <- match.arg(table)
 
-  Tpch_Dbgen(arrow:::ExecPlan$create(), scale_factor, table)
+  Tpch_Dbgen(ExecPlan$create(), scale_factor, table)
 }
 
+tpch_dbgen_write <- function(table = tpch_tables, scale_factor, path, ...) {
+  table <- match.arg(table)
+
+  path_and_fs <- get_path_and_filesystem(path)
+
+  existing_data_behavior <- 0L
+  max_partitions <- 1024L
+
+  Tpch_Dbgen_Write(
+    ExecPlan$create(),
+    scale_factor,
+    table,
+    path_and_fs$fs,
+    path_and_fs$path,
+    existing_data_behavior,
+    max_partitions
+  )
+}
 
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index e3cc6d79933..bce8a52a7f9 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -1643,6 +1643,28 @@ extern "C" SEXP _arrow_Tpch_Dbgen(SEXP plan_sexp, SEXP scale_factor_sexp, SEXP t
 }
 #endif
 
+// compute-exec.cpp
+#if defined(ARROW_R_WITH_ARROW)
+void Tpch_Dbgen_Write(const std::shared_ptr<compute::ExecPlan>& plan, int scale_factor, std::string table_name, const std::shared_ptr<fs::FileSystem>& filesystem, std::string base_dir, arrow::dataset::ExistingDataBehavior existing_data_behavior, int max_partitions);
+extern "C" SEXP _arrow_Tpch_Dbgen_Write(SEXP plan_sexp, SEXP scale_factor_sexp, SEXP table_name_sexp, SEXP filesystem_sexp, SEXP base_dir_sexp, SEXP existing_data_behavior_sexp, SEXP max_partitions_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<compute::ExecPlan>&>::type plan(plan_sexp);
+	arrow::r::Input<int>::type scale_factor(scale_factor_sexp);
+	arrow::r::Input<std::string>::type table_name(table_name_sexp);
+	arrow::r::Input<const std::shared_ptr<fs::FileSystem>&>::type filesystem(filesystem_sexp);
+	arrow::r::Input<std::string>::type base_dir(base_dir_sexp);
+	arrow::r::Input<arrow::dataset::ExistingDataBehavior>::type existing_data_behavior(existing_data_behavior_sexp);
+	arrow::r::Input<int>::type max_partitions(max_partitions_sexp);
+	Tpch_Dbgen_Write(plan, scale_factor, table_name, filesystem, base_dir, existing_data_behavior, max_partitions);
+	return R_NilValue;
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_Tpch_Dbgen_Write(SEXP plan_sexp, SEXP scale_factor_sexp, SEXP table_name_sexp, SEXP filesystem_sexp, SEXP base_dir_sexp, SEXP existing_data_behavior_sexp, SEXP max_partitions_sexp){
+	Rf_error("Cannot call Tpch_Dbgen_Write(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
 // compute.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::shared_ptr<arrow::RecordBatch> RecordBatch__cast(const std::shared_ptr<arrow::RecordBatch>& batch, const std::shared_ptr<arrow::Schema>& schema, cpp11::list options);
@@ -7490,6 +7512,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 7}, 
 		{ "_arrow_ExecNode_ReadFromRecordBatchReader", (DL_FUNC) &_arrow_ExecNode_ReadFromRecordBatchReader, 2}, 
 		{ "_arrow_Tpch_Dbgen", (DL_FUNC) &_arrow_Tpch_Dbgen, 3}, 
+		{ "_arrow_Tpch_Dbgen_Write", (DL_FUNC) &_arrow_Tpch_Dbgen_Write, 7}, 
 		{ "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, 
 		{ "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, 
 		{ "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, 
diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp
index 0d556d102a7..075cb030fcd 100644
--- a/r/src/compute-exec.cpp
+++ b/r/src/compute-exec.cpp
@@ -24,6 +24,11 @@
 #include <arrow/compute/exec/expression.h>
 #include <arrow/compute/exec/options.h>
 #include <arrow/compute/exec/tpch_node.h>
+// TODO: We probably don't want to add dataset + filesystem here, so instead we'll probably
+// want to move the definition of Tpch_Dbgen_Write if it works
+#include <arrow/dataset/api.h>
+#include <arrow/filesystem/filesystem.h>
+#include <arrow/filesystem/localfs.h>
 #include <arrow/table.h>
 #include <arrow/util/async_generator.h>
 #include <arrow/util/future.h>
@@ -33,6 +38,10 @@
 #include <iostream>
 
 namespace compute = ::arrow::compute;
+// TODO: We probably don't want to add dataset + fs here, so instead we'll probably
+// want to move the definition of Tpch_Dbgen_Write if it works
+namespace ds = ::arrow::dataset;
+namespace fs = ::arrow::fs;
 
 std::shared_ptr<compute::FunctionOptions> make_compute_options(std::string func_name,
                                                                cpp11::list options);
@@ -333,4 +342,80 @@ std::shared_ptr<arrow::RecordBatchReader> Tpch_Dbgen(
     [stop_producing, plan, sink_gen] { return sink_gen(); }, gc_memory_pool());
 }
 
+// [[arrow::export]]
+void Tpch_Dbgen_Write(
+    const std::shared_ptr<compute::ExecPlan>& plan,
+    int scale_factor,
+    std::string table_name,
+    const std::shared_ptr<fs::FileSystem>& filesystem, std::string base_dir,
+    arrow::dataset::ExistingDataBehavior existing_data_behavior, int max_partitions
+) {
+  auto gen = ValueOrStop(arrow::compute::TpchGen::Make(plan.get(), scale_factor));
+
+  compute::ExecNode *table;
+  if (table_name == "part") {
+    table = ValueOrStop(gen.Part());
+  } else if (table_name == "supplier") {
+    table = ValueOrStop(gen.Supplier());
+  } else if (table_name == "partsupp") {
+    table = ValueOrStop(gen.PartSupp());
+  } else if (table_name == "customer") {
+    table = ValueOrStop(gen.Customer());
+  } else if (table_name == "nation") {
+    table = ValueOrStop(gen.Nation());
+  } else if (table_name == "lineitem") {
+    table = ValueOrStop(gen.Lineitem());
+  } else if (table_name == "region") {
+    table = ValueOrStop(gen.Region());
+  } else if (table_name == "orders") {
+    table = ValueOrStop(gen.Orders());
+  } else {
+    cpp11::stop("That's not a valid table name");
+  }
+
+  // TODO: unhardcode this once it's working
+  auto base_path =  base_dir + "/parquet_dataset";
+  filesystem->CreateDir(base_path);
+
+  auto format = std::make_shared<ds::ParquetFileFormat>();
+
+  ds::FileSystemDatasetWriteOptions write_options;
+  write_options.file_write_options = format->DefaultWriteOptions();
+  write_options.existing_data_behavior = ds::ExistingDataBehavior::kDeleteMatchingPartitions;
+  write_options.filesystem = filesystem;
+  write_options.base_dir = base_path;
+  write_options.partitioning = arrow::dataset::Partitioning::Default();
+  write_options.basename_template = "part{i}.parquet";
+  write_options.max_partitions = 1024;
+
+  // TODO: this had a checked_cast in front of it in the code I adapted it from
+  // but I ran into namespace issues when doing it so I took it out to see if it
+  // worked, but maybe that's what's causing the sefault?
+  const ds::WriteNodeOptions options =
+    ds::WriteNodeOptions{write_options, table->output_schema()};
+
+
+  MakeExecNodeOrStop("consuming_sink", plan.get(), {table}, options);
+
+  cpp11::message("Just after consume");
+
+  StopIfNotOk(plan->Validate());
+
+  cpp11::message("Just after validate");
+
+  StopIfNotOk(plan->StartProducing());
+
+  // If the generator is destroyed before being completely drained, inform plan
+  std::shared_ptr<void> stop_producing{nullptr, [plan](...) {
+    bool not_finished_yet =
+      plan->finished().TryAddCallback([&plan] {
+        return [plan](const arrow::Status&) {};
+      });
+
+    if (not_finished_yet) {
+      plan->StopProducing();
+    }
+  }};
+}
+
 #endif
diff --git a/r/tests/testthat/test-tpch.R b/r/tests/testthat/test-tpch.R
index 8077f76e4fd..eedf8954807 100644
--- a/r/tests/testthat/test-tpch.R
+++ b/r/tests/testthat/test-tpch.R
@@ -37,18 +37,12 @@ test_that("tpch_dbgen()", {
   expect_identical(dim(part_tab), c(200000L, 9L))
 
   # and check a handful of types
-  expect_type_equal(part_tab[["R_PARTKEY"]], int32())
+  expect_type_equal(part_tab[["P_PARTKEY"]], int32())
+  expect_type_equal(part_tab[["P_NAME"]], string())
 })
 
-# these two are tested above
-tpch_tables_up <- setdiff(tpch_tables, c("lineitem", "region"))
-
-# nation segfaults
-# supplier hangs
-tpch_tables_up <- setdiff(tpch_tables_up, c("nation", "supplier"))
-
-# all of the rest below have an error with:
-# Invalid: Arrays used to construct an ExecBatch must have equal length
+# these three are tested above, but test that we can get tables for all the rest
+tpch_tables_up <- setdiff(tpch_tables, c("lineitem", "region", "part"))
 
 for (table_name in tpch_tables_up) {
   test_that(paste0("Generating table: ", table_name), {

From 2c580acee7786cf4f027fb1eb8a862298b5d480b Mon Sep 17 00:00:00 2001
From: Sasha Krassovsky <krassovskysasha@gmail.com>
Date: Fri, 4 Mar 2022 23:55:24 -0800
Subject: [PATCH 05/11] Make it actually multithreaded

---
 cpp/src/arrow/compute/exec/tpch_benchmark.cc |   3 +-
 cpp/src/arrow/compute/exec/tpch_node.cc      | 221 +++++++++++++------
 cpp/src/arrow/compute/exec/tpch_node_test.cc |   1 +
 3 files changed, 153 insertions(+), 72 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/tpch_benchmark.cc b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
index 963782333cf..9b4fad177e4 100644
--- a/cpp/src/arrow/compute/exec/tpch_benchmark.cc
+++ b/cpp/src/arrow/compute/exec/tpch_benchmark.cc
@@ -170,6 +170,7 @@ static void BM_Tpch_Q1(benchmark::State &st)
 }
 
 //BENCHMARK(BM_Tpch_Q1)->RangeMultiplier(10)->Range(1, 1000)->ArgNames({ "SF" });
-BENCHMARK(BM_Tpch_Q1)->RangeMultiplier(10)->Range(1, 10)->ArgNames({ "SF" });
+//BENCHMARK(BM_Tpch_Q1)->RangeMultiplier(10)->Range(1, 10)->ArgNames({ "SF" });
+BENCHMARK(BM_Tpch_Q1)->Args({1})->ArgNames({ "SF" });
 }
 }
diff --git a/cpp/src/arrow/compute/exec/tpch_node.cc b/cpp/src/arrow/compute/exec/tpch_node.cc
index 445df7d08b9..f9367b1131a 100644
--- a/cpp/src/arrow/compute/exec/tpch_node.cc
+++ b/cpp/src/arrow/compute/exec/tpch_node.cc
@@ -89,7 +89,7 @@ namespace arrow
 
         protected:
             std::atomic<bool> done_ = { false };
-            std::atomic<int64_t> batches_generated_ = { 0 };
+            std::atomic<int64_t> batches_outputted_ = { 0 };
         };
 
         int GetNumDigits(int64_t x)
@@ -197,17 +197,17 @@ namespace arrow
             char temp_buff[kChunkSize];
             while(done_.load() == false)
             {
-                int64_t current_offset = 0;
-                int64_t offset = 0;
-                while(GenerateSentence(offset, rng, temp_buff))
-                    current_offset = offset;
+                int64_t known_valid_offset = 0;
+                int64_t try_offset = 0;
+                while(GenerateSentence(try_offset, rng, temp_buff))
+                    known_valid_offset = try_offset;
 
                 {
                     std::lock_guard<std::mutex> lock(text_guard_);
                     if(done_.load())
                         return Status::OK();
                     int64_t bytes_remaining = kTextBytes - generated_offset_;
-                    int64_t memcpy_size = std::min(offset, bytes_remaining);
+                    int64_t memcpy_size = std::min(known_valid_offset, bytes_remaining);
                     std::memcpy(out + generated_offset_, temp_buff, memcpy_size);
                     generated_offset_ += memcpy_size;
                     if(generated_offset_ == kTextBytes)
@@ -283,7 +283,7 @@ namespace arrow
                 *out-- = '0' + (x % 10);
                 x /= 10;
             }
-            out += num_digits;
+            out += (num_digits + 1);
         }
 
         void GeneratePhoneNumber(
@@ -506,7 +506,7 @@ namespace arrow
                 break;
             case 2:
                 success &= GenerateAdjective(offset, rng, arr);
-                success &= GenerateWord(offset, rng, arr, &comma_space, 1);
+                success &= GenerateWord(--offset, rng, arr, &comma_space, 1);
                 success &= GenerateAdjective(offset, rng, arr);
                 success &= GenerateNoun(offset, rng, arr);
                 break;
@@ -637,6 +637,16 @@ namespace arrow
                 return Status::OK();
             }
             
+            int64_t part_batches_generated() const
+            {
+                return part_batches_generated_.load();
+            }
+
+            int64_t partsupp_batches_generated() const
+            {
+                return partsupp_batches_generated_.load();
+            }
+
             Result<std::shared_ptr<Schema>> SetPartOutputColumns(const std::vector<std::string> &cols)
             {
                 return SetOutputColumns(cols, part_types_, part_name_map_, part_cols_);
@@ -647,18 +657,20 @@ namespace arrow
                 return SetOutputColumns(cols, partsupp_types_, partsupp_name_map_, partsupp_cols_);
             }
 
-            Result<util::optional<ExecBatch>> NextPartBatch(size_t thread_index)
+            Result<util::optional<ExecBatch>> NextPartBatch()
             {
+                size_t thread_index = thread_indexer_();
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 {
                     std::lock_guard<std::mutex> lock(part_output_queue_mutex_);
+                    bool all_generated = part_rows_generated_ == part_rows_to_generate_;
                     if(!part_output_queue_.empty())
                     {
                         ExecBatch batch = std::move(part_output_queue_.front());
                         part_output_queue_.pop();
                         return std::move(batch);
                     }
-                    else if(part_rows_generated_ == part_rows_to_generate_)
+                    else if(all_generated)
                     {
                         return util::nullopt;
                     }
@@ -669,6 +681,10 @@ namespace arrow
                             batch_size_,
                             part_rows_to_generate_ - part_rows_generated_);
                         part_rows_generated_ += tld.part_to_generate;
+
+                        int64_t num_ps_batches = PartsuppBatchesToGenerate(thread_index);
+                        part_batches_generated_.fetch_add(1);
+                        partsupp_batches_generated_.fetch_add(num_ps_batches);
                         ARROW_DCHECK(part_rows_generated_ <= part_rows_to_generate_);
                     }
                 }
@@ -712,8 +728,9 @@ namespace arrow
                 return ExecBatch::Make(std::move(part_result));
             }
 
-            Result<util::optional<ExecBatch>> NextPartSuppBatch(size_t thread_index)
+            Result<util::optional<ExecBatch>> NextPartSuppBatch()
             {
+                size_t thread_index = thread_indexer_();
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 {
                     std::lock_guard<std::mutex> lock(partsupp_output_queue_mutex_);
@@ -737,6 +754,9 @@ namespace arrow
                             batch_size_,
                             part_rows_to_generate_ - part_rows_generated_);
                         part_rows_generated_ += tld.part_to_generate;
+                        int64_t num_ps_batches = PartsuppBatchesToGenerate(thread_index);
+                        part_batches_generated_.fetch_add(1);
+                        partsupp_batches_generated_.fetch_add(num_ps_batches);
                         ARROW_DCHECK(part_rows_generated_ <= part_rows_to_generate_);
                     }
                 }
@@ -1120,13 +1140,20 @@ namespace arrow
                 return Status::OK();
             }
             
+            int64_t PartsuppBatchesToGenerate(size_t thread_index)
+            {
+                ThreadLocalData &tld = thread_local_data_[thread_index];
+                int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
+                int64_t num_batches = (ps_to_generate + batch_size_ - 1) / batch_size_;
+                return num_batches;
+            }
+
             Status InitPartsupp(size_t thread_index)
             {
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 tld.generated_partsupp.reset();
                 tld.partsupp.clear();
-                int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
-                int64_t num_batches = (ps_to_generate + batch_size_ - 1) / batch_size_;
+                int64_t num_batches = PartsuppBatchesToGenerate(thread_index);
                 tld.partsupp.resize(num_batches);
                 for(std::vector<Datum> &batch : tld.partsupp)
                 {
@@ -1321,7 +1348,10 @@ namespace arrow
             int64_t part_rows_generated_;
             std::vector<int> part_cols_;
             std::vector<int> partsupp_cols_;
-  
+            ThreadIndexer thread_indexer_;
+
+            std::atomic<size_t> part_batches_generated_ = { 0 };
+            std::atomic<size_t> partsupp_batches_generated_ = { 0 };
             static constexpr int64_t kPartSuppRowsPerPart = 4;
         };
 
@@ -1349,6 +1379,16 @@ namespace arrow
                 return Status::OK();
             }
 
+            int64_t orders_batches_generated() const
+            {
+                return orders_batches_generated_.load();
+            }
+
+            int64_t lineitem_batches_generated() const
+            {
+                return lineitem_batches_generated_.load();
+            }
+
             Result<std::shared_ptr<Schema>> SetOrdersOutputColumns(const std::vector<std::string> &cols)
             {
                 return SetOutputColumns(cols, orders_types_, orders_name_map_, orders_cols_);
@@ -1359,8 +1399,9 @@ namespace arrow
                 return SetOutputColumns(cols, lineitem_types_, lineitem_name_map_, lineitem_cols_);
             }
 
-            Result<util::optional<ExecBatch>> NextOrdersBatch(size_t thread_index)
+            Result<util::optional<ExecBatch>> NextOrdersBatch()
             {
+                size_t thread_index = thread_indexer_();
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 {
                     std::lock_guard<std::mutex> lock(orders_output_queue_mutex_);
@@ -1381,6 +1422,7 @@ namespace arrow
                             batch_size_,
                             orders_rows_to_generate_ - orders_rows_generated_);
                         orders_rows_generated_ += tld.orders_to_generate;
+                        orders_batches_generated_.fetch_add(1);
                         ARROW_DCHECK(orders_rows_generated_ <= orders_rows_to_generate_);
                     }
                 }
@@ -1426,8 +1468,9 @@ namespace arrow
                 return ExecBatch::Make(std::move(orders_result));
             }
 
-            Result<util::optional<ExecBatch>> NextLineItemBatch(size_t thread_index)
+            Result<util::optional<ExecBatch>> NextLineItemBatch()
             {
+                size_t thread_index = thread_indexer_();
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 ExecBatch queued;
                 bool from_queue = false;
@@ -1450,18 +1493,20 @@ namespace arrow
                 }
                 {
                     std::lock_guard<std::mutex> lock(orders_output_queue_mutex_);
-                    tld.orderkey_start = orders_rows_generated_;
-                    tld.orders_to_generate = std::min(
-                        batch_size_,
-                        orders_rows_to_generate_ - orders_rows_generated_);
-                    orders_rows_generated_ += tld.orders_to_generate;
-                    ARROW_DCHECK(orders_rows_generated_ <= orders_rows_to_generate_);
                     if(orders_rows_generated_ == orders_rows_to_generate_)
                     {
                         if(from_queue)
                             return std::move(queued);
                         return util::nullopt;
                     }
+
+                    tld.orderkey_start = orders_rows_generated_;
+                    tld.orders_to_generate = std::min(
+                        batch_size_,
+                        orders_rows_to_generate_ - orders_rows_generated_);
+                    orders_rows_generated_ += tld.orders_to_generate;
+                    orders_batches_generated_.fetch_add(1ll);
+                    ARROW_DCHECK(orders_rows_generated_ <= orders_rows_to_generate_);
                 }
                 tld.orders.clear();
                 tld.orders.resize(ORDERS::kNumCols);
@@ -1469,6 +1514,7 @@ namespace arrow
                 tld.generated_lineitem.reset();
                 if(from_queue)
                 {
+                    lineitem_batches_generated_.fetch_sub(1);
                     for(size_t i = 0; i < lineitem_cols_.size(); i++)
                         if(tld.lineitem[0][lineitem_cols_[i]].kind() == Datum::NONE)
                             tld.lineitem[0][lineitem_cols_[i]] = std::move(queued[i]);
@@ -1505,6 +1551,7 @@ namespace arrow
                     ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(lineitem_result)));
                     lineitem_results.emplace_back(std::move(eb));
                 }
+                lineitem_batches_generated_.fetch_add(static_cast<int64_t>(lineitem_results.size()));
                 // Return the first batch, enqueue the rest.
                 {
                     std::lock_guard<std::mutex> lock(lineitem_output_queue_mutex_);
@@ -1872,7 +1919,7 @@ namespace arrow
                     tld.items_per_order.push_back(length);
                     tld.lineitem_to_generate += length;
                 }
-                size_t num_batches = (tld.first_batch_offset + tld.lineitem_to_generate + batch_size_ - 1) / batch_size_;
+                int64_t num_batches = (tld.first_batch_offset + tld.lineitem_to_generate + batch_size_ - 1) / batch_size_;
                 tld.lineitem.clear();
                 tld.lineitem.resize(num_batches);
                 for(std::vector<Datum> &batch : tld.lineitem)
@@ -1889,13 +1936,17 @@ namespace arrow
                 if(tld.lineitem[ibatch][column].kind() == Datum::NONE)
                 {
                     int32_t byte_width = arrow::internal::GetByteWidth(*lineitem_types_[column]);
+                    std::printf("Thread %lu, byte size %d\n", thread_index, byte_width);
                     ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(batch_size_ * byte_width));
                     ArrayData ad(lineitem_types_[column], batch_size_, { nullptr, std::move(buff) });
                     tld.lineitem[ibatch][column] = std::move(ad);
                     out_batch_offset = 0;
                 }
-                if(ibatch == 0)
+                else
+                {
+                    ARROW_DCHECK(ibatch == 0);
                     out_batch_offset = tld.first_batch_offset;
+                }
                 return Status::OK();
             }
 
@@ -2461,6 +2512,10 @@ namespace arrow
             int64_t orders_rows_generated_;
             std::vector<int> orders_cols_;
             std::vector<int> lineitem_cols_;
+            ThreadIndexer thread_indexer_;
+
+            std::atomic<size_t> orders_batches_generated_ = { 0 };
+            std::atomic<size_t> lineitem_batches_generated_ = { 0 };
         };
 
         class SupplierGenerator : public TpchTableGenerator
@@ -2518,7 +2573,9 @@ namespace arrow
                 output_callback_ = std::move(output_callback);
                 finished_callback_ = std::move(finished_callback);
                 schedule_callback_ = std::move(schedule_callback);
-                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+                for(size_t i = 0; i < num_threads; i++)
+                    RETURN_NOT_OK(schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); }));
+                return Status::OK();
             }
 
             std::shared_ptr<Schema> schema() const override
@@ -2584,7 +2641,6 @@ namespace arrow
 
                 tld.to_generate = std::min(batch_size_,
                                            rows_to_generate_ - tld.suppkey_start);
-                bool is_last_batch = tld.to_generate < batch_size_;
 
                 tld.batch.clear();
                 tld.batch.resize(SUPPLIER::kNumCols);
@@ -2598,15 +2654,14 @@ namespace arrow
                     result[i] = tld.batch[col_idx];
                 }
                 ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(result)));
-                batches_generated_++;
+                int64_t batches_to_generate = (rows_to_generate_ + batch_size_ - 1) / batch_size_;
+                int64_t batches_outputted_before_this_one = batches_outputted_.fetch_add(1);
+                bool is_last_batch = batches_outputted_before_this_one == (batches_to_generate - 1);
                 output_callback_(std::move(eb));
                 if(is_last_batch)
                 {
-                    bool expected = false;
-                    if(done_.compare_exchange_strong(expected, true))
-                    {
-                        finished_callback_(batches_generated_.load());
-                    }
+                    done_.store(true);
+                    finished_callback_(batches_outputted_.load());
                     return Status::OK();
                 }
                 return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
@@ -2657,7 +2712,7 @@ namespace arrow
                     for(int64_t irow = 0; irow < tld.to_generate; irow++)
                     {
                         char *out = s_name + byte_width * irow;
-                        std::memcpy(out, supplier, supplier_length);
+                        std::strncpy(out, supplier, byte_width);
                         AppendNumberPaddedToNineDigits(out + supplier_length, s_suppkey[irow]);
                     }
                 }
@@ -2799,7 +2854,6 @@ namespace arrow
             PartGenerator(std::shared_ptr<PartAndPartSupplierGenerator> gen)
                 : gen_(std::move(gen))
             {
-                batches_generated_.store(0);
             }
 
             Status Init(
@@ -2825,7 +2879,9 @@ namespace arrow
                 finished_callback_ = std::move(finished_callback);
                 schedule_callback_ = std::move(schedule_callback);
 
-                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+                for(size_t i = 0; i < num_threads; i++)
+                    RETURN_NOT_OK(schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); }));
+                return Status::OK();
             }
 
             std::shared_ptr<Schema> schema() const override
@@ -2834,22 +2890,26 @@ namespace arrow
             }
 
         private:
-            Status ProduceCallback(size_t thread_index)
+            Status ProduceCallback(size_t)
             {
+                if(done_.load())
+                    return Status::OK();
                 ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
-                                      gen_->NextPartBatch(thread_index));
-                if(done_.load() || !maybe_batch.has_value())
+                                      gen_->NextPartBatch());
+                if(!maybe_batch.has_value())
                 {
-                    bool expected = false;
-                    if(done_.compare_exchange_strong(expected, true))
+                    int64_t batches_generated = gen_->part_batches_generated();
+                    if(batches_generated == batches_outputted_.load())
                     {
-                        finished_callback_(batches_generated_.load());
+                        bool expected = false;
+                        if(done_.compare_exchange_strong(expected, true))
+                            finished_callback_(batches_outputted_.load());
                     }
                     return Status::OK();
                 }
                 ExecBatch batch = std::move(*maybe_batch);
-                batches_generated_++;
                 output_callback_(std::move(batch));
+                batches_outputted_++;
                 return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
             }
 
@@ -2868,7 +2928,6 @@ namespace arrow
             PartSuppGenerator(std::shared_ptr<PartAndPartSupplierGenerator> gen)
                 : gen_(std::move(gen))
             {
-                batches_generated_.store(0);
             }
 
             Status Init(
@@ -2894,7 +2953,9 @@ namespace arrow
                 finished_callback_ = std::move(finished_callback);
                 schedule_callback_ = std::move(schedule_callback);
 
-                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+                for(size_t i = 0; i < num_threads; i++)
+                    RETURN_NOT_OK(schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); }));
+                return Status::OK();
             }
 
             std::shared_ptr<Schema> schema() const override
@@ -2903,22 +2964,26 @@ namespace arrow
             }
 
         private:
-            Status ProduceCallback(size_t thread_index)
+            Status ProduceCallback(size_t)
             {
+                if(done_.load())
+                    return Status::OK();
                 ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
-                                      gen_->NextPartSuppBatch(thread_index));
-                if(done_.load() || !maybe_batch.has_value())
+                                      gen_->NextPartSuppBatch());
+                if(!maybe_batch.has_value())
                 {
-                    bool expected = false;
-                    if(done_.compare_exchange_strong(expected, true))
+                    int64_t batches_generated = gen_->partsupp_batches_generated();
+                    if(batches_generated == batches_outputted_.load())
                     {
-                        finished_callback_(batches_generated_.load());
+                        bool expected = false;
+                        if(done_.compare_exchange_strong(expected, true))
+                            finished_callback_(batches_outputted_.load());
                     }
                     return Status::OK();
                 }
                 ExecBatch batch = std::move(*maybe_batch);
-                batches_generated_++;
                 output_callback_(std::move(batch));
+                batches_outputted_++;
                 return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
             }
 
@@ -2961,7 +3026,9 @@ namespace arrow
                 output_callback_ = std::move(output_callback);
                 finished_callback_ = std::move(finished_callback);
                 schedule_callback_ = std::move(schedule_callback);
-                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+                for(size_t i = 0; i < num_threads; i++)
+                    RETURN_NOT_OK(schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); }));
+                return Status::OK();
             }
 
             std::shared_ptr<Schema> schema() const override
@@ -3029,7 +3096,6 @@ namespace arrow
 
                 tld.to_generate = std::min(batch_size_,
                                            rows_to_generate_ - tld.custkey_start);
-                bool is_last_batch = tld.to_generate < batch_size_;
 
                 tld.batch.clear();
                 tld.batch.resize(CUSTOMER::kNumCols);
@@ -3043,14 +3109,16 @@ namespace arrow
                     result[i] = tld.batch[col_idx];
                 }
                 ARROW_ASSIGN_OR_RAISE(ExecBatch eb, ExecBatch::Make(std::move(result)));
-                batches_generated_++;
+                int64_t batches_to_generate = (rows_to_generate_ + batch_size_ - 1) / batch_size_;
+                int64_t batches_generated_before_this_one = batches_outputted_.fetch_add(1);
+                bool is_last_batch = batches_generated_before_this_one == (batches_to_generate - 1);
                 output_callback_(std::move(eb));
                 if(is_last_batch)
                 {
                     bool expected = false;
                     if(done_.compare_exchange_strong(expected, true))
                     {
-                        finished_callback_(batches_generated_.load());
+                        finished_callback_(batches_outputted_.load());
                     }
                     return Status::OK();
                 }
@@ -3238,7 +3306,6 @@ namespace arrow
             OrdersGenerator(std::shared_ptr<OrdersAndLineItemGenerator> gen)
                 : gen_(std::move(gen))
             {
-                batches_generated_.store(0);
             }
 
             Status Init(
@@ -3264,7 +3331,9 @@ namespace arrow
                 finished_callback_ = std::move(finished_callback);
                 schedule_callback_ = std::move(schedule_callback);
 
-                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+                for(size_t i = 0; i < num_threads; i++)
+                    RETURN_NOT_OK(schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); }));
+                return Status::OK();
             }
 
             std::shared_ptr<Schema> schema() const override
@@ -3273,22 +3342,26 @@ namespace arrow
             }
 
         private:
-            Status ProduceCallback(size_t thread_index)
+            Status ProduceCallback(size_t)
             {
+                if(done_.load())
+                    return Status::OK();
                 ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
-                                      gen_->NextOrdersBatch(thread_index));
-                if(done_.load() || !maybe_batch.has_value())
+                                      gen_->NextOrdersBatch());
+                if(!maybe_batch.has_value())
                 {
-                    bool expected = false;
-                    if(done_.compare_exchange_strong(expected, true))
+                    int64_t batches_generated = gen_->orders_batches_generated();
+                    if(batches_generated == batches_outputted_.load())
                     {
-                        finished_callback_(batches_generated_.load());
+                        bool expected = false;
+                        if(done_.compare_exchange_strong(expected, true))
+                            finished_callback_(batches_outputted_.load());
                     }
                     return Status::OK();
                 }
                 ExecBatch batch = std::move(*maybe_batch);
-                batches_generated_++;
                 output_callback_(std::move(batch));
+                batches_outputted_++;
                 return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
             }
 
@@ -3331,7 +3404,9 @@ namespace arrow
                 finished_callback_ = std::move(finished_callback);
                 schedule_callback_ = std::move(schedule_callback);
 
-                return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
+                for(size_t i = 0; i < num_threads; i++)
+                    RETURN_NOT_OK(schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); }));
+                return Status::OK();
             }
 
             std::shared_ptr<Schema> schema() const override
@@ -3340,22 +3415,26 @@ namespace arrow
             }
 
         private:
-            Status ProduceCallback(size_t thread_index)
+            Status ProduceCallback(size_t)
             {
+                if(done_.load())
+                    return Status::OK();
                 ARROW_ASSIGN_OR_RAISE(util::optional<ExecBatch> maybe_batch,
-                                      gen_->NextLineItemBatch(thread_index));
+                                      gen_->NextLineItemBatch());
                 if(!maybe_batch.has_value())
                 {
-                    bool expected = false;
-                    if(done_.compare_exchange_strong(expected, true))
+                    int64_t batches_generated = gen_->lineitem_batches_generated();
+                    if(batches_generated == batches_outputted_.load())
                     {
-                        finished_callback_(batches_generated_.load());
+                        bool expected = false;
+                        if(done_.compare_exchange_strong(expected, true))
+                            finished_callback_(batches_outputted_.load());
                     }
                     return Status::OK();
                 }
                 ExecBatch batch = std::move(*maybe_batch);
-                batches_generated_++;
                 output_callback_(std::move(batch));
+                batches_outputted_++;
                 return schedule_callback_([this](size_t thread_index) { return this->ProduceCallback(thread_index); });
             }
 
diff --git a/cpp/src/arrow/compute/exec/tpch_node_test.cc b/cpp/src/arrow/compute/exec/tpch_node_test.cc
index c844d7e88c1..4273e18d4eb 100644
--- a/cpp/src/arrow/compute/exec/tpch_node_test.cc
+++ b/cpp/src/arrow/compute/exec/tpch_node_test.cc
@@ -58,6 +58,7 @@ namespace arrow
             for(auto &batch : res)
             {
                 ValidateBatch(batch);
+                std::cout << batch.ToString() << std::endl;
                 num_rows += batch.length;
             }
             ASSERT_EQ(num_rows, 10000);

From de2305a81cd40b5633f931fc571d7e20b943066a Mon Sep 17 00:00:00 2001
From: Sasha Krassovsky <krassovskysasha@gmail.com>
Date: Sat, 5 Mar 2022 12:47:39 -0800
Subject: [PATCH 06/11] Fill new arrays with empty Datums explicitly

---
 cpp/src/arrow/compute/exec/tpch_node.cc      | 25 ++++++++------------
 cpp/src/arrow/compute/exec/tpch_node_test.cc |  1 -
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/tpch_node.cc b/cpp/src/arrow/compute/exec/tpch_node.cc
index f9367b1131a..877fc85ab63 100644
--- a/cpp/src/arrow/compute/exec/tpch_node.cc
+++ b/cpp/src/arrow/compute/exec/tpch_node.cc
@@ -688,8 +688,8 @@ namespace arrow
                         ARROW_DCHECK(part_rows_generated_ <= part_rows_to_generate_);
                     }
                 }
-                tld.part.clear();
                 tld.part.resize(PART::kNumCols);
+                std::fill(tld.part.begin(), tld.part.end(), Datum());
                 RETURN_NOT_OK(InitPartsupp(thread_index));
 
                 for(int col : part_cols_)
@@ -760,8 +760,8 @@ namespace arrow
                         ARROW_DCHECK(part_rows_generated_ <= part_rows_to_generate_);
                     }
                 }
-                tld.part.clear();
                 tld.part.resize(PART::kNumCols);
+                std::fill(tld.part.begin(), tld.part.end(), Datum());
                 RETURN_NOT_OK(InitPartsupp(thread_index));
 
                 for(int col : part_cols_)
@@ -1152,13 +1152,12 @@ namespace arrow
             {
                 ThreadLocalData &tld = thread_local_data_[thread_index];
                 tld.generated_partsupp.reset();
-                tld.partsupp.clear();
                 int64_t num_batches = PartsuppBatchesToGenerate(thread_index);
                 tld.partsupp.resize(num_batches);
                 for(std::vector<Datum> &batch : tld.partsupp)
                 {
-                    batch.clear();
                     batch.resize(PARTSUPP::kNumCols);
+                    std::fill(batch.begin(), batch.end(), Datum());
                 }
                 return Status::OK();
             }
@@ -1426,8 +1425,8 @@ namespace arrow
                         ARROW_DCHECK(orders_rows_generated_ <= orders_rows_to_generate_);
                     }
                 }
-                tld.orders.clear();
                 tld.orders.resize(ORDERS::kNumCols);
+                std::fill(tld.orders.begin(), tld.orders.end(), Datum());
                 RETURN_NOT_OK(GenerateRowCounts(thread_index));
                 tld.first_batch_offset = 0;
                 tld.generated_lineitem.reset();
@@ -1508,8 +1507,8 @@ namespace arrow
                     orders_batches_generated_.fetch_add(1ll);
                     ARROW_DCHECK(orders_rows_generated_ <= orders_rows_to_generate_);
                 }
-                tld.orders.clear();
                 tld.orders.resize(ORDERS::kNumCols);
+                std::fill(tld.orders.begin(), tld.orders.end(), Datum());
                 RETURN_NOT_OK(GenerateRowCounts(thread_index));
                 tld.generated_lineitem.reset();
                 if(from_queue)
@@ -1920,12 +1919,11 @@ namespace arrow
                     tld.lineitem_to_generate += length;
                 }
                 int64_t num_batches = (tld.first_batch_offset + tld.lineitem_to_generate + batch_size_ - 1) / batch_size_;
-                tld.lineitem.clear();
                 tld.lineitem.resize(num_batches);
                 for(std::vector<Datum> &batch : tld.lineitem)
                 {
-                    batch.clear();
                     batch.resize(LINEITEM::kNumCols);
+                    std::fill(batch.begin(), batch.end(), Datum());
                 }
                 return Status::OK();
             }
@@ -1936,17 +1934,14 @@ namespace arrow
                 if(tld.lineitem[ibatch][column].kind() == Datum::NONE)
                 {
                     int32_t byte_width = arrow::internal::GetByteWidth(*lineitem_types_[column]);
-                    std::printf("Thread %lu, byte size %d\n", thread_index, byte_width);
                     ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> buff, AllocateBuffer(batch_size_ * byte_width));
                     ArrayData ad(lineitem_types_[column], batch_size_, { nullptr, std::move(buff) });
                     tld.lineitem[ibatch][column] = std::move(ad);
                     out_batch_offset = 0;
                 }
-                else
-                {
-                    ARROW_DCHECK(ibatch == 0);
+                if(ibatch == 0)
                     out_batch_offset = tld.first_batch_offset;
-                }
+
                 return Status::OK();
             }
 
@@ -2642,8 +2637,8 @@ namespace arrow
                 tld.to_generate = std::min(batch_size_,
                                            rows_to_generate_ - tld.suppkey_start);
 
-                tld.batch.clear();
                 tld.batch.resize(SUPPLIER::kNumCols);
+                std::fill(tld.batch.begin(), tld.batch.end(), Datum());
                 for(int col : gen_list_)
                     RETURN_NOT_OK(generators_[col](thread_index));
 
@@ -3097,8 +3092,8 @@ namespace arrow
                 tld.to_generate = std::min(batch_size_,
                                            rows_to_generate_ - tld.custkey_start);
 
-                tld.batch.clear();
                 tld.batch.resize(CUSTOMER::kNumCols);
+                std::fill(tld.batch.begin(), tld.batch.end(), Datum());
                 for(int col : gen_list_)
                     RETURN_NOT_OK(generators_[col](thread_index));
 
diff --git a/cpp/src/arrow/compute/exec/tpch_node_test.cc b/cpp/src/arrow/compute/exec/tpch_node_test.cc
index 4273e18d4eb..c844d7e88c1 100644
--- a/cpp/src/arrow/compute/exec/tpch_node_test.cc
+++ b/cpp/src/arrow/compute/exec/tpch_node_test.cc
@@ -58,7 +58,6 @@ namespace arrow
             for(auto &batch : res)
             {
                 ValidateBatch(batch);
-                std::cout << batch.ToString() << std::endl;
                 num_rows += batch.length;
             }
             ASSERT_EQ(num_rows, 10000);

From 3eb99c6f31fdd0fafc34d4109940bc2676219316 Mon Sep 17 00:00:00 2001
From: Sasha Krassovsky <krassovskysasha@gmail.com>
Date: Tue, 8 Mar 2022 12:01:15 -0800
Subject: [PATCH 07/11] Add some tests, fix some bugs

---
 cpp/src/arrow/compute/exec/tpch_node.cc      |  95 +++---
 cpp/src/arrow/compute/exec/tpch_node.h       |   6 +-
 cpp/src/arrow/compute/exec/tpch_node_test.cc | 288 ++++++++++++++++++-
 3 files changed, 338 insertions(+), 51 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/tpch_node.cc b/cpp/src/arrow/compute/exec/tpch_node.cc
index 877fc85ab63..496b44a1dc0 100644
--- a/cpp/src/arrow/compute/exec/tpch_node.cc
+++ b/cpp/src/arrow/compute/exec/tpch_node.cc
@@ -49,7 +49,6 @@ namespace arrow
             int64_t generated_offset_ = 0;
             std::mutex text_guard_;
             std::unique_ptr<Buffer> text_;
-            random::pcg32_fast rng_;
             static constexpr int64_t kChunkSize = 8192;
             static constexpr int64_t kTextBytes = 300 * 1024 * 1024; // 300 MB
         };
@@ -65,7 +64,7 @@ namespace arrow
 
             virtual Status Init(
                 std::vector<std::string> columns,
-                int scale_factor,
+                float scale_factor,
                 int64_t batch_size) = 0;
 
             virtual Status StartProducing(
@@ -495,7 +494,7 @@ namespace arrow
             std::uniform_int_distribution<size_t> dist(0, 3);
             const char *comma_space = ", ";
             bool success = true;
-            switch(dist(rng_))
+            switch(dist(rng))
             {
             case 0:
                 success &= GenerateNoun(offset, rng, arr);
@@ -526,7 +525,7 @@ namespace arrow
         {
             std::uniform_int_distribution<size_t> dist(0, 3);
             bool success = true;
-            switch(dist(rng_))
+            switch(dist(rng))
             {
             case 0:
                 success &= GenerateVerb(offset, rng, arr);
@@ -565,7 +564,7 @@ namespace arrow
         {
             std::uniform_int_distribution<size_t> dist(0, 4);
             bool success = true;
-            switch(dist(rng_))
+            switch(dist(rng))
             {
             case 0:
                 success &= GenerateNounPhrase(offset, rng, arr);
@@ -618,7 +617,7 @@ namespace arrow
             Status Init(
                 size_t num_threads,
                 int64_t batch_size,
-                int scale_factor)
+                float scale_factor)
             {
                 if(!inited_)
                 {
@@ -632,7 +631,7 @@ namespace arrow
                         // 5 is the maximum number of different strings we need to concatenate
                         tld.string_indices.resize(5 * batch_size_);
                     }
-                    part_rows_to_generate_ = scale_factor_ * 200000;
+                    part_rows_to_generate_ = static_cast<int64_t>(scale_factor_ * 200000);
                 }
                 return Status::OK();
             }
@@ -693,7 +692,9 @@ namespace arrow
                 RETURN_NOT_OK(InitPartsupp(thread_index));
 
                 for(int col : part_cols_)
+                {
                     RETURN_NOT_OK(part_generators_[col](thread_index));
+                }
                 for(int col : partsupp_cols_)
                     RETURN_NOT_OK(partsupp_generators_[col](thread_index));
 
@@ -995,17 +996,20 @@ namespace arrow
                     RETURN_NOT_OK(AllocatePartBatch(thread_index, PART::P_BRAND));
                     const char *p_mfgr = reinterpret_cast<const char *>(
                         tld.part[PART::P_MFGR].array()->buffers[1]->data());
-                    char *p_brand = reinterpret_cast<char *>(tld.part[PART::P_BRAND].array()->buffers[1]->mutable_data());
+                    char *p_brand = reinterpret_cast<char *>(
+                        tld.part[PART::P_BRAND].array()->buffers[1]->mutable_data());
                     int32_t byte_width = arrow::internal::GetByteWidth(*part_types_[PART::P_BRAND]);
                     int32_t mfgr_byte_width = arrow::internal::GetByteWidth(*part_types_[PART::P_MFGR]);
                     const size_t mfgr_id_offset = std::strlen("Manufacturer#");
                     for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
                     {
+                        char *row = p_brand + byte_width * irow;
                         char mfgr_id = *(p_mfgr + irow * mfgr_byte_width + mfgr_id_offset);
                         char brand_id = '0' + dist(tld.rng);
-                        std::strncpy(p_brand + byte_width * irow, brand, byte_width);
-                        *(p_brand + byte_width * irow + brand_length) = mfgr_id;
-                        *(p_brand + byte_width * irow + brand_length + 1) = brand_id;
+                        std::strncpy(row, brand, byte_width);
+                        *(row + brand_length) = mfgr_id;
+                        *(row + brand_length + 1) = brand_id;
+                        irow += 0;
                     }
                 }
                 return Status::OK();
@@ -1038,11 +1042,9 @@ namespace arrow
                             tld.string_indices[irow * 3 + ipart] = name_part_index;
                             string_length += std::strlen(types[ipart][name_part_index]);
                         }
-                        // Add 4 because there is a space between each word (i.e. 2 spaces)
-                        offsets[irow + 1] = offsets[irow] + string_length + 2;
+                        offsets[irow + 1] = offsets[irow] + string_length;
                     }
-                    // Add an extra byte for the space after in the very last string.
-                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> string_buffer, AllocateBuffer(offsets[tld.part_to_generate] + 1));
+                    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<Buffer> string_buffer, AllocateBuffer(offsets[tld.part_to_generate]));
                     char *strings = reinterpret_cast<char *>(string_buffer->mutable_data());
                     for(int64_t irow = 0; irow < tld.part_to_generate; irow++)
                     {
@@ -1054,7 +1056,6 @@ namespace arrow
                             size_t length = std::strlen(part);
                             std::memcpy(row, part, length);
                             row += length;
-                            *row++ = ' ';
                         }
                     }
                     ArrayData ad(part_types_[PART::P_TYPE], tld.part_to_generate, { nullptr, std::move(offset_buff), std::move(string_buffer) });
@@ -1100,10 +1101,8 @@ namespace arrow
                         size_t container2_length = std::strlen(container2);
 
                         char *row = p_container + byte_width * irow;
-                        // Abuse strncpy to zero out the rest of the array
                         std::strncpy(row, container1, byte_width);
-                        row[container1_length] = ' ';
-                        std::memcpy(row + container1_length + 1, container2, container2_length);
+                        std::memcpy(row + container1_length, container2, container2_length);
                     }
                 }
                 return Status::OK();
@@ -1225,7 +1224,7 @@ namespace arrow
                     int64_t ipartsupp = 0;
                     int64_t ipart = 0;
                     int64_t ps_to_generate = kPartSuppRowsPerPart * tld.part_to_generate;
-                    const int32_t S = scale_factor_ * 10000;
+                    const int32_t S = static_cast<int32_t>(scale_factor_ * 10000);
                     for(int64_t irow = 0; irow < ps_to_generate; ibatch++)
                     {
                         RETURN_NOT_OK(AllocatePartSuppBatch(thread_index, ibatch, PARTSUPP::PS_SUPPKEY));
@@ -1342,7 +1341,7 @@ namespace arrow
             std::queue<ExecBatch> part_output_queue_;
             std::queue<ExecBatch> partsupp_output_queue_;
             int64_t batch_size_;
-            int scale_factor_;
+            float scale_factor_;
             int64_t part_rows_to_generate_;
             int64_t part_rows_generated_;
             std::vector<int> part_cols_;
@@ -1360,7 +1359,7 @@ namespace arrow
             Status Init(
                 size_t num_threads,
                 int64_t batch_size,
-                int scale_factor)
+                float scale_factor)
             {
                 if(!inited_)
                 {
@@ -1373,7 +1372,7 @@ namespace arrow
                     {
                         tld.items_per_order.resize(batch_size_);
                     }
-                    orders_rows_to_generate_ = scale_factor_ * 150000 * 10;
+                    orders_rows_to_generate_ = static_cast<int64_t>(scale_factor_ * 150000 * 10);
                 }
                 return Status::OK();
             }
@@ -1711,7 +1710,8 @@ namespace arrow
                     // divisible by 3. Rather than repeatedly generating numbers until we get to
                     // a non-divisible-by-3 number, we just generate a number between
                     // 0 and SF * 50000 - 1, multiply by 3, and then add either 1 or 2. 
-                    std::uniform_int_distribution<int32_t> base_dist(0, scale_factor_ * 50000 - 1);
+                    int32_t sf_50k = static_cast<int32_t>(scale_factor_ * 50000);
+                    std::uniform_int_distribution<int32_t> base_dist(0, sf_50k - 1);
                     std::uniform_int_distribution<int32_t> offset_dist(1, 2);
                     int32_t *o_custkey = reinterpret_cast<int32_t *>(
                         tld.orders[ORDERS::O_CUSTKEY].array()->buffers[1]->mutable_data());
@@ -1867,7 +1867,8 @@ namespace arrow
                 {
                     RETURN_NOT_OK(AllocateOrdersBatch(thread_index, ORDERS::O_CLERK));
                     int32_t byte_width = arrow::internal::GetByteWidth(*orders_types_[ORDERS::O_CLERK]);
-                    std::uniform_int_distribution<int64_t> dist(1, scale_factor_ * 1000);
+                    int64_t max_clerk_id = static_cast<int64_t>(scale_factor_ * 1000);
+                    std::uniform_int_distribution<int64_t> dist(1, max_clerk_id);
                     char *o_clerk = reinterpret_cast<char *>(
                         tld.orders[ORDERS::O_CLERK].array()->buffers[1]->mutable_data());
                     for(int64_t i = 0; i < tld.orders_to_generate; i++)
@@ -1991,7 +1992,8 @@ namespace arrow
                     tld.generated_lineitem[LINEITEM::L_PARTKEY] = true;
 
                     size_t ibatch = 0;
-                    std::uniform_int_distribution<int32_t> dist(1, scale_factor_ * 200000);
+                    int32_t max_partkey = static_cast<int32_t>(scale_factor_ * 200000);
+                    std::uniform_int_distribution<int32_t> dist(1, max_partkey);
                     for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
                     {
                         size_t batch_offset;
@@ -2020,7 +2022,7 @@ namespace arrow
 
                     size_t ibatch = 0;
                     std::uniform_int_distribution<int32_t> dist(0, 3);
-                    const int32_t S = scale_factor_ * 10000;
+                    const int32_t S = static_cast<int32_t>(scale_factor_ * 10000);
                     for(int64_t irow = 0; irow < tld.lineitem_to_generate; ibatch++)
                     {
                         size_t batch_offset = 0;
@@ -2502,7 +2504,7 @@ namespace arrow
             std::queue<ExecBatch> orders_output_queue_;
             std::queue<ExecBatch> lineitem_output_queue_;
             int64_t batch_size_;
-            int scale_factor_;
+            float scale_factor_;
             int64_t orders_rows_to_generate_;
             int64_t orders_rows_generated_;
             std::vector<int> orders_cols_;
@@ -2518,12 +2520,12 @@ namespace arrow
         public:
             Status Init(
                 std::vector<std::string> columns,
-                int scale_factor,
+                float scale_factor,
                 int64_t batch_size) override
             {
                 scale_factor_ = scale_factor;
                 batch_size_ = batch_size;
-                rows_to_generate_ = scale_factor_ * 10000;
+                rows_to_generate_ = static_cast<int64_t>(scale_factor_ * 10000);
                 rows_generated_.store(0);
                 ARROW_ASSIGN_OR_RAISE(schema_, SetOutputColumns(
                                           columns,
@@ -2537,7 +2539,8 @@ namespace arrow
                 std::unordered_set<int64_t> good_rows_set;
                 while(good_rows_set.size() < num_special_rows)
                 {
-                    good_rows_set.insert(dist(rng));
+                    int64_t row = dist(rng);
+                    good_rows_set.insert(row);
                 }
                 std::unordered_set<int64_t> bad_rows_set;
                 while(bad_rows_set.size() < num_special_rows)
@@ -2817,7 +2820,7 @@ namespace arrow
                     std::uniform_int_distribution<int32_t> start_dist(0, str_length - total_length);
                     int32_t start = start_dist(tld.rng);
                     std::memcpy(out + start, customer, customer_length);
-                    std::memcpy(out + start + gap, review, review_length);
+                    std::memcpy(out + start + customer_length + gap, review, review_length);
                 }
             }
 
@@ -2837,7 +2840,7 @@ namespace arrow
             ScheduleCallback schedule_callback_;
             int64_t rows_to_generate_;
             std::atomic<int64_t> rows_generated_;
-            int scale_factor_;
+            float scale_factor_;
             int64_t batch_size_;
             std::vector<int> gen_list_;
             std::shared_ptr<Schema> schema_;
@@ -2853,7 +2856,7 @@ namespace arrow
 
             Status Init(
                 std::vector<std::string> columns,
-                int scale_factor,
+                float scale_factor,
                 int64_t batch_size) override
             {
                 scale_factor_ = scale_factor;
@@ -2912,7 +2915,7 @@ namespace arrow
             FinishedCallback finished_callback_;
             ScheduleCallback schedule_callback_;
             int64_t batch_size_;
-            int64_t scale_factor_;
+            float scale_factor_;
             std::shared_ptr<PartAndPartSupplierGenerator> gen_;
             std::shared_ptr<Schema> schema_;
         };
@@ -2927,7 +2930,7 @@ namespace arrow
 
             Status Init(
                 std::vector<std::string> columns,
-                int scale_factor,
+                float scale_factor,
                 int64_t batch_size) override
             {
                 scale_factor_ = scale_factor;
@@ -2986,7 +2989,7 @@ namespace arrow
             FinishedCallback finished_callback_;
             ScheduleCallback schedule_callback_;
             int64_t batch_size_;
-            int64_t scale_factor_;
+            float scale_factor_;
             std::shared_ptr<PartAndPartSupplierGenerator> gen_;
             std::shared_ptr<Schema> schema_;
         };
@@ -2996,7 +2999,7 @@ namespace arrow
         public:
             Status Init(
                 std::vector<std::string> columns,
-                int scale_factor,
+                float scale_factor,
                 int64_t batch_size) override
             {
                 scale_factor_ = scale_factor;
@@ -3289,7 +3292,7 @@ namespace arrow
             ScheduleCallback schedule_callback_;
             int64_t rows_to_generate_;
             std::atomic<int64_t> rows_generated_;
-            int scale_factor_;
+            float scale_factor_;
             int64_t batch_size_;
             std::vector<int> gen_list_;
             std::shared_ptr<Schema> schema_;
@@ -3305,7 +3308,7 @@ namespace arrow
 
             Status Init(
                 std::vector<std::string> columns,
-                int scale_factor,
+                float scale_factor,
                 int64_t batch_size) override
             {
                 scale_factor_ = scale_factor;
@@ -3364,7 +3367,7 @@ namespace arrow
             FinishedCallback finished_callback_;
             ScheduleCallback schedule_callback_;
             int64_t batch_size_;
-            int64_t scale_factor_;
+            float scale_factor_;
             std::shared_ptr<OrdersAndLineItemGenerator> gen_;
             std::shared_ptr<Schema> schema_;
         };
@@ -3378,7 +3381,7 @@ namespace arrow
 
             Status Init(
                 std::vector<std::string> columns,
-                int scale_factor,
+                float scale_factor,
                 int64_t batch_size) override
             {
                 scale_factor_ = scale_factor;
@@ -3437,7 +3440,7 @@ namespace arrow
             FinishedCallback finished_callback_;
             ScheduleCallback schedule_callback_;
             int64_t batch_size_;
-            int64_t scale_factor_;
+            float scale_factor_;
             std::shared_ptr<OrdersAndLineItemGenerator> gen_;
             std::shared_ptr<Schema> schema_;
         };
@@ -3447,7 +3450,7 @@ namespace arrow
         public:
             Status Init(
                 std::vector<std::string> columns,
-                int /*scale_factor*/,
+                float /*scale_factor*/,
                 int64_t /*batch_size*/) override
             {
                 ARROW_ASSIGN_OR_RAISE(schema_,
@@ -3557,7 +3560,7 @@ namespace arrow
         public:
             Status Init(
                 std::vector<std::string> columns,
-                int /*scale_factor*/,
+                float /*scale_factor*/,
                 int64_t /*batch_size*/) override
             {
                 ARROW_ASSIGN_OR_RAISE(schema_,
@@ -3751,7 +3754,7 @@ namespace arrow
             ThreadIndexer thread_indexer_;
         };
 
-        Result<TpchGen> TpchGen::Make(ExecPlan *plan, int scale_factor, int64_t batch_size)
+        Result<TpchGen> TpchGen::Make(ExecPlan *plan, float scale_factor, int64_t batch_size)
         {
             TpchGen result(plan, scale_factor, batch_size);
             return result;
diff --git a/cpp/src/arrow/compute/exec/tpch_node.h b/cpp/src/arrow/compute/exec/tpch_node.h
index dc282aae981..1d904a2b5f0 100644
--- a/cpp/src/arrow/compute/exec/tpch_node.h
+++ b/cpp/src/arrow/compute/exec/tpch_node.h
@@ -36,7 +36,7 @@ namespace arrow
         class TpchGen
         {
         public:
-            static Result<TpchGen> Make(ExecPlan *plan, int scale_factor = 1, int64_t batch_size = 4096);
+            static Result<TpchGen> Make(ExecPlan *plan, float scale_factor = 1.0f, int64_t batch_size = 4096);
 
             Result<ExecNode *> Supplier(std::vector<std::string> columns = {});
             Result<ExecNode *> Part(std::vector<std::string> columns = {});
@@ -48,7 +48,7 @@ namespace arrow
             Result<ExecNode *> Region(std::vector<std::string> columns = {});
 
         private:
-            TpchGen(ExecPlan *plan, int scale_factor, int64_t batch_size)
+            TpchGen(ExecPlan *plan, float scale_factor, int64_t batch_size)
                 : plan_(plan),
                   scale_factor_(scale_factor),
                   batch_size_(batch_size),
@@ -59,7 +59,7 @@ namespace arrow
             Result<ExecNode *> CreateNode(std::vector<std::string> columns);
 
             ExecPlan *plan_;
-            int scale_factor_;
+            float scale_factor_;
             int64_t batch_size_;
 
             std::shared_ptr<PartAndPartSupplierGenerator> part_and_part_supp_generator_;
diff --git a/cpp/src/arrow/compute/exec/tpch_node_test.cc b/cpp/src/arrow/compute/exec/tpch_node_test.cc
index c844d7e88c1..6253075b85f 100644
--- a/cpp/src/arrow/compute/exec/tpch_node_test.cc
+++ b/cpp/src/arrow/compute/exec/tpch_node_test.cc
@@ -33,6 +33,8 @@
 #include "arrow/util/thread_pool.h"
 #include "arrow/array/validate.h"
 
+#include <unordered_set>
+
 namespace arrow
 {
     namespace compute
@@ -43,6 +45,227 @@ namespace arrow
                 ASSERT_OK(arrow::internal::ValidateArray(*d.array()));
         }
 
+        void VerifyUniqueKey(
+            std::unordered_set<int32_t> &seen,
+            const Datum &d,
+            int32_t min,
+            int32_t max)
+        {
+            const int32_t *keys = reinterpret_cast<const int32_t *>(d.array()->buffers[1]->data());
+            int64_t num_keys = d.length();
+            for(int64_t i = 0; i < num_keys; i++)
+            {
+                ASSERT_TRUE(seen.find(keys[i]) == seen.end());
+                ASSERT_LE(keys[i], max);
+                ASSERT_GE(keys[i], min);
+                seen.insert(keys[i]);
+            }
+        }
+
+        void VerifyStringAndNumber_FixedWidth(
+            const Datum &strings,
+            const Datum &numbers,
+            int byte_width,
+            const char *prefix,
+            bool verify_padding = true)
+        {
+            int64_t length = strings.length();
+            const char *str = reinterpret_cast<const char *>(
+                strings.array()->buffers[1]->data());
+
+            const int32_t *nums = nullptr;
+            if(numbers.kind() != Datum::NONE)
+            {
+                ASSERT_EQ(length, numbers.length());
+                nums = reinterpret_cast<const int32_t *>(
+                    numbers.array()->buffers[1]->data());
+            }
+
+            size_t num_offset = std::strlen(prefix);
+            for(int64_t i = 0; i < length; i++)
+            {
+                const char *row = str + i * byte_width;
+                ASSERT_EQ(std::memcmp(row, prefix, num_offset), 0) << row << ", prefix=" << prefix << ", i=" << i;
+                const char *num_str = row + num_offset;
+                int64_t num = 0;
+                int ibyte = static_cast<int>(num_offset);
+                for(; *num_str && ibyte < byte_width; ibyte++)
+                {
+                    num *= 10;
+                    ASSERT_TRUE(std::isdigit(*num_str));
+                    num += *num_str++ - '0';
+                }
+                if(nums)
+                {
+                    ASSERT_EQ(static_cast<int32_t>(num), nums[i]);
+                }
+                if(verify_padding)
+                {
+                    int num_chars = ibyte - num_offset;
+                    ASSERT_GE(num_chars, 9);
+                }
+            }
+        }
+
+        void VerifyVString(const Datum &d, int min_length, int max_length)
+        {
+            int64_t length = d.length();
+            const int32_t *off = reinterpret_cast<const int32_t *>(
+                d.array()->buffers[1]->data());
+            const char *str = reinterpret_cast<const char *>(
+                d.array()->buffers[2]->data());
+            for(int64_t i = 0; i < length; i++)
+            {
+                int32_t start = off[i];
+                int32_t end = off[i + 1];
+                int32_t length = end - start;
+                ASSERT_LE(length, max_length);
+                ASSERT_GE(length, min_length);
+                for(int32_t i = start; i < end; i++)
+                {
+                    bool is_valid = std::isdigit(str[i]) || std::isalpha(str[i]) || str[i] == ',' || str[i] == ' ';
+                    ASSERT_TRUE(is_valid) << "Character " << str[i] << " is not a digit, a letter, a comma, or a space";
+                }
+            }
+        }
+
+        void VerifyAllBetween(const Datum &d, int32_t min, int32_t max)
+        {
+            int64_t length = d.length();
+            const int32_t *n = reinterpret_cast<const int32_t *>(d.array()->buffers[1]->data());
+            for(int64_t i = 0; i < length; i++)
+            {
+                ASSERT_GE(n[i], min) << "Value must be between " << min << " and " << max << ", got " << n[i];
+                ASSERT_LE(n[i], max) << "Value must be between " << min << " and " << max << ", got " << n[i];
+            }
+        }
+
+        void VerifyNationKey(const Datum &d)
+        {
+            VerifyAllBetween(d, 0, 24);
+        }
+
+        void VerifyPhone(const Datum &d)
+        {
+            int64_t length = d.length();
+            const char *phones = reinterpret_cast<const char *>(d.array()->buffers[1]->data());
+            constexpr int kByteWidth = 15; // This is common for all PHONE columns
+            for(int64_t i = 0; i < length; i++)
+            {
+                const char *row = phones + i * kByteWidth;
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_EQ(*row++, '-');
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_EQ(*row++, '-');
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_EQ(*row++, '-');
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+                ASSERT_TRUE(std::isdigit(*row++));
+            }
+        }
+
+        void VerifyDecimalsBetween(const Datum &d, int64_t min, int64_t max)
+        {
+            int64_t length = d.length();
+            const Decimal128 *decs = reinterpret_cast<const Decimal128 *>(
+                d.array()->buffers[1]->data());
+            for(int64_t i = 0; i < length; i++)
+            {
+                int64_t val = static_cast<int64_t>(decs[i]);
+                ASSERT_LE(val, max);
+                ASSERT_GE(val, min);
+            }
+        }
+        
+        void VerifyCorrectNumberOfWords_Varlen(const Datum &d, int num_words)
+        {
+            int expected_num_spaces = num_words - 1;
+            int64_t length = d.length();
+            const int32_t *offsets = reinterpret_cast<const int32_t *>(
+                d.array()->buffers[1]->data());
+            const char *str = reinterpret_cast<const char *>(
+                d.array()->buffers[2]->data());
+
+            for(int64_t i = 0; i < length; i++)
+            {
+                int actual_num_spaces = 0;
+
+                int32_t start = offsets[i];
+                int32_t end = offsets[i + 1];
+                int32_t str_len = end - start;
+                char tmp_str[256] = {};
+                std::memcpy(tmp_str, str + start, str_len);
+                bool is_only_alphas_or_spaces = true;
+                for(int32_t j = offsets[i]; j < offsets[i + 1]; j++)
+                {
+                    bool is_space = str[j] == ' ';
+                    actual_num_spaces += is_space;
+                    is_only_alphas_or_spaces &= (is_space || std::isalpha(str[j]));
+                }
+                ASSERT_TRUE(is_only_alphas_or_spaces) << "Words must be composed only of letters, got " << tmp_str;
+                ASSERT_EQ(actual_num_spaces, expected_num_spaces) << "Wrong number of spaces in " << tmp_str;
+            }
+        }
+
+        void VerifyCorrectNumberOfWords_FixedWidth(const Datum &d, int num_words, int byte_width)
+        {
+            int expected_num_spaces = num_words - 1;
+            int64_t length = d.length();
+            const char *str = reinterpret_cast<const char *>(
+                d.array()->buffers[1]->data());
+
+            for(int64_t i = 0; i < length; i++)
+            {
+                int actual_num_spaces = 0;
+                const char *row = str + i * byte_width;
+                bool is_only_alphas_or_spaces = true;
+                for(int32_t j = 0; j < byte_width && row[j]; j++)
+                {
+                    bool is_space = row[j] == ' ';
+                    actual_num_spaces += is_space;
+                    is_only_alphas_or_spaces &= (is_space || std::isalpha(row[j]));
+                }
+                ASSERT_TRUE(is_only_alphas_or_spaces) << "Words must be composed only of letters, got " << row;
+                ASSERT_EQ(actual_num_spaces, expected_num_spaces) << "Wrong number of spaces in " << row;
+            }
+        }
+
+        void CountModifiedComments(const Datum &d, int &good_count, int &bad_count)
+        {
+            int64_t length = d.length();
+            const int32_t *offsets = reinterpret_cast<const int32_t *>(
+                d.array()->buffers[1]->data());
+            const char *str = reinterpret_cast<const char *>(
+                d.array()->buffers[2]->data());
+            // Length of S_COMMENT is at most 100
+            char tmp_string[101];
+            for(int64_t i = 0; i < length; i++)
+            {
+                const char *row = str + offsets[i];
+                int32_t row_length = offsets[i + 1] - offsets[i];
+                std::memset(tmp_string, 0, sizeof(tmp_string));
+                std::memcpy(tmp_string, row, row_length);
+                char *customer = std::strstr(tmp_string, "Customer");
+                char *recommends = std::strstr(tmp_string, "Recommends");
+                char *complaints = std::strstr(tmp_string, "Complaints");
+                if(customer)
+                {
+                    ASSERT_TRUE((recommends != nullptr) ^ (complaints != nullptr));
+                    if(recommends)
+                        good_count++;
+                    if(complaints)
+                        bad_count++;
+                }
+            }
+        }
+
         TEST(TpchNode, Supplier)
         {
             ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
@@ -54,13 +277,34 @@ namespace arrow
             std::ignore = *sink.AddToPlan(plan.get());
             auto fut = StartAndCollect(plan.get(), sink_gen);
             auto res = *fut.MoveResult();
+
+            int64_t kExpectedRows = 10000;
             int64_t num_rows = 0;
+
+            std::unordered_set<int32_t> seen_suppkey;
+            int good_count = 0;
+            int bad_count = 0;
             for(auto &batch : res)
             {
                 ValidateBatch(batch);
+                VerifyUniqueKey(
+                    seen_suppkey,
+                    batch[0],
+                    /*min=*/1,
+                    /*max=*/static_cast<int32_t>(kExpectedRows));
+                VerifyStringAndNumber_FixedWidth(batch[1], batch[0], /*byte_width=*/25, "Supplie#r");
+                VerifyVString(batch[2], /*min_length=*/10, /*max_length=*/40);
+                VerifyNationKey(batch[3]);
+                VerifyPhone(batch[4]);
+                VerifyDecimalsBetween(batch[5], -99999, 999999);
+                CountModifiedComments(batch[6], good_count, bad_count);
                 num_rows += batch.length;
             }
-            ASSERT_EQ(num_rows, 10000);
+            ASSERT_EQ(seen_suppkey.size(), kExpectedRows);
+            ASSERT_EQ(num_rows, kExpectedRows);
+            ASSERT_EQ(good_count, 5);
+            ASSERT_EQ(bad_count, 5);
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
 
         TEST(TpchNode, Part)
@@ -74,13 +318,47 @@ namespace arrow
             std::ignore = *sink.AddToPlan(plan.get());
             auto fut = StartAndCollect(plan.get(), sink_gen);
             auto res = *fut.MoveResult();
+
+            int64_t kExpectedRows = 200000;
             int64_t num_rows = 0;
+
+            std::unordered_set<int32_t> seen_partkey;
             for(auto &batch : res)
             {
                 ValidateBatch(batch);
+                VerifyUniqueKey(
+                    seen_partkey,
+                    batch[0],
+                    /*min=*/1,
+                    /*max=*/static_cast<int32_t>(kExpectedRows));
+                VerifyCorrectNumberOfWords_Varlen(
+                    batch[1],
+                    /*num_words*=*/5);
+                VerifyStringAndNumber_FixedWidth(
+                    batch[2],
+                    Datum(),
+                    /*byte_width=*/25,
+                    "Manufacturer#",
+                    /*verify_padding=*/false);
+                VerifyStringAndNumber_FixedWidth(
+                    batch[3],
+                    Datum(),
+                    /*byte_width=*/10,
+                    "Brand#",
+                    /*verify_padding=*/false);
+                VerifyCorrectNumberOfWords_Varlen(
+                    batch[4],
+                    /*num_words=*/3);
+                VerifyAllBetween(batch[5], /*min=*/1, /*max=*/50);
+                VerifyCorrectNumberOfWords_FixedWidth(
+                    batch[6],
+                    /*num_words=*/2,
+                    /*byte_width=*/10);
                 num_rows += batch.length;
             }
-            ASSERT_EQ(num_rows, 200000);
+            ASSERT_EQ(seen_partkey.size(), kExpectedRows);
+            ASSERT_EQ(num_rows, kExpectedRows);
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
 
         TEST(TpchNode, PartSupp)
@@ -101,6 +379,7 @@ namespace arrow
                 num_rows += batch.length;
             }
             ASSERT_EQ(num_rows, 800000);
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
 
         TEST(TpchNode, Customer)
@@ -121,6 +400,7 @@ namespace arrow
                 num_rows += batch.length;
             }
             ASSERT_EQ(num_rows, 150000);
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
 
         TEST(TpchNode, Orders)
@@ -141,6 +421,7 @@ namespace arrow
                 num_rows += batch.length;
             }
             ASSERT_EQ(num_rows, 1500000);
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
 
         TEST(TpchNode, Lineitem)
@@ -158,6 +439,7 @@ namespace arrow
             {
                 ValidateBatch(batch);
             }
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
 
         TEST(TpchNode, Nation)
@@ -178,6 +460,7 @@ namespace arrow
                 num_rows += batch.length;
             }
             ASSERT_EQ(num_rows, 25);
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
 
         TEST(TpchNode, Region)
@@ -198,6 +481,7 @@ namespace arrow
                 num_rows += batch.length;
             }
             ASSERT_EQ(num_rows, 5);
+            arrow::internal::GetCpuThreadPool()->WaitForIdle();
         }
     }
 }

From 7f3e6bc57b6ad0bfd9fcad694faa7a090c9091ae Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 8 Mar 2022 12:50:14 -1000
Subject: [PATCH 08/11] First pass at a query testing tool.

---
 dev/qtester/.clang-tidy                     |  22 +
 dev/qtester/.gitignore                      |  45 ++
 dev/qtester/CMakeLists.txt                  |  59 ++
 dev/qtester/builtin_queries.cc              | 101 +++
 dev/qtester/builtin_queries.h               |  17 +
 dev/qtester/queries/tpch1.substrait.pb.json | 749 ++++++++++++++++++++
 dev/qtester/query_tester.cc                 |  51 ++
 dev/qtester/test_runner.cc                  | 219 ++++++
 dev/qtester/test_runner.h                   | 113 +++
 9 files changed, 1376 insertions(+)
 create mode 100644 dev/qtester/.clang-tidy
 create mode 100644 dev/qtester/.gitignore
 create mode 100644 dev/qtester/CMakeLists.txt
 create mode 100644 dev/qtester/builtin_queries.cc
 create mode 100644 dev/qtester/builtin_queries.h
 create mode 100644 dev/qtester/queries/tpch1.substrait.pb.json
 create mode 100644 dev/qtester/query_tester.cc
 create mode 100644 dev/qtester/test_runner.cc
 create mode 100644 dev/qtester/test_runner.h

diff --git a/dev/qtester/.clang-tidy b/dev/qtester/.clang-tidy
new file mode 100644
index 00000000000..bcdacd174be
--- /dev/null
+++ b/dev/qtester/.clang-tidy
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+---
+Checks: '*,-llvmlibc*,-cert-err58-cpp,-modernize-use-trailing-return-type,-fuchsia-*,-cppcoreguidelines-*,
+  -readability-magic-numbers,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-cplusplus.NewDeleteLeaks,
+  -readability-function-cognitive-complexity, -hicpp-special-member-functions, -bugprone-exception-escape'
+WarningsAsErrors: '*'
+FormatStyle: 'file'
diff --git a/dev/qtester/.gitignore b/dev/qtester/.gitignore
new file mode 100644
index 00000000000..e1e921762f9
--- /dev/null
+++ b/dev/qtester/.gitignore
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+thirdparty/*.tar*
+CMakeFiles/
+CMakeCache.txt
+CMakeUserPresets.json
+CTestTestfile.cmake
+Makefile
+cmake_install.cmake
+build/
+*-build/
+Testing/
+build-support/boost_*
+vcpkg_installed/
+
+# Build directories created by Clion
+cmake-build-*/
+
+#########################################
+# Editor temporary/working/backup files #
+.#*
+*\#*\#
+[#]*#
+*~
+*$
+*.bak
+*flymake*
+*.kdev4
+*.log
+*.swp
diff --git a/dev/qtester/CMakeLists.txt b/dev/qtester/CMakeLists.txt
new file mode 100644
index 00000000000..b35260f4134
--- /dev/null
+++ b/dev/qtester/CMakeLists.txt
@@ -0,0 +1,59 @@
+cmake_minimum_required(VERSION 3.19)
+project(arrow-query-tester)
+
+set(CMAKE_CXX_STANDARD 17)
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
+endif()
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+include(ExternalProject)
+
+# Add Arrow
+find_package(Arrow REQUIRED COMPONENTS dataset parquet engine)
+# Argparse is a modern library for interpreting CLI args
+set(ARGPARSE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/argparse_ep-install")
+set(ARGPARSE_CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${ARGPARSE_PREFIX}")
+set(ARGPARSE_INCLUDE_DIR "${ARGPARSE_PREFIX}/include")
+externalproject_add(argparse
+                    CMAKE_ARGS ${ARGPARSE_CMAKE_ARGS}
+                    INSTALL_DIR ${ARGPARSE_PREFIX}
+                    URL https://github.com/p-ranav/argparse/archive/refs/tags/v2.2.tar.gz
+                    URL_HASH "SHA256=f0fc6ab7e70ac24856c160f44ebb0dd79dc1f7f4a614ee2810d42bb73799872b")
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set(CMAKE_CXX_CLANG_TIDY "clang-tidy-12")
+endif()
+
+function(ADD_PROGRAM TARGET)
+    set(options)
+    set(one_value_args)
+    set(multi_value_args EXTRA_SOURCES)
+    cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
+    add_executable(
+            ${TARGET}
+            ${TARGET}.cc
+            ${ARG_EXTRA_SOURCES}
+    )
+    add_dependencies(${TARGET} argparse)
+    target_include_directories(${TARGET} SYSTEM PRIVATE "${ARGPARSE_INCLUDE_DIR}")
+    target_link_libraries(
+            ${TARGET}
+            arrow_shared
+            arrow_dataset
+            arrow_engine
+            parquet
+    )
+    if (MSVC)
+        target_compile_options(${TARGET} PRIVATE /W4 /WX)
+    else ()
+        target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic -Werror)
+    endif ()
+
+endfunction()
+
+add_program(query_tester EXTRA_SOURCES builtin_queries.cc test_runner.cc)
diff --git a/dev/qtester/builtin_queries.cc b/dev/qtester/builtin_queries.cc
new file mode 100644
index 00000000000..d4a6da5f404
--- /dev/null
+++ b/dev/qtester/builtin_queries.cc
@@ -0,0 +1,101 @@
+#include "builtin_queries.h"
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/exec_plan.h>
+#include <arrow/compute/exec/tpch_node.h>
+
+namespace cp = arrow::compute;
+
+namespace arrow::qtest {
+
+namespace {
+
+Result<std::shared_ptr<cp::ExecPlan>> Tpch1(
+    std::shared_ptr<cp::SinkNodeConsumer> consumer) {
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan, cp::ExecPlan::Make());
+  ARROW_ASSIGN_OR_RAISE(cp::TpchGen gen, cp::TpchGen::Make(plan.get(), 1));
+
+  ARROW_ASSIGN_OR_RAISE(
+      cp::ExecNode * lineitem,
+      gen.Lineitem({"L_QUANTITY", "L_EXTENDEDPRICE", "L_TAX", "L_DISCOUNT", "L_SHIPDATE",
+                    "L_RETURNFLAG", "L_LINESTATUS"}));
+
+  std::shared_ptr<Date32Scalar> sept_2_1998 = std::make_shared<Date32Scalar>(
+      10471);  // September 2, 1998 is 10471 days after January 1, 1970
+  cp::Expression filter =
+      cp::less_equal(cp::field_ref("L_SHIPDATE"), cp::literal(std::move(sept_2_1998)));
+  cp::FilterNodeOptions filter_opts(filter);
+
+  cp::Expression l_returnflag = cp::field_ref("L_RETURNFLAG");
+  cp::Expression l_linestatus = cp::field_ref("L_LINESTATUS");
+  cp::Expression quantity = cp::field_ref("L_QUANTITY");
+  cp::Expression base_price = cp::field_ref("L_EXTENDEDPRICE");
+
+  std::shared_ptr<Decimal128Scalar> decimal_1 =
+      std::make_shared<Decimal128Scalar>(Decimal128{0, 100}, decimal(12, 2));
+  cp::Expression discount_multiplier =
+      cp::call("subtract", {cp::literal(decimal_1), cp::field_ref("L_DISCOUNT")});
+  cp::Expression tax_multiplier =
+      cp::call("add", {cp::literal(decimal_1), cp::field_ref("L_TAX")});
+  cp::Expression disc_price =
+      cp::call("multiply", {cp::field_ref("L_EXTENDEDPRICE"), discount_multiplier});
+  cp::Expression charge = cp::call(
+      "multiply", {cp::call("cast",
+                            {cp::call("multiply", {cp::field_ref("L_EXTENDEDPRICE"),
+                                                   discount_multiplier})},
+                            cp::CastOptions::Unsafe(decimal(12, 2))),
+                   tax_multiplier});
+  cp::Expression discount = cp::field_ref("L_DISCOUNT");
+
+  std::vector<cp::Expression> projection_list = {l_returnflag, l_linestatus, quantity,
+                                                 base_price,   disc_price,   charge,
+                                                 quantity,     base_price,   discount};
+  std::vector<std::string> project_names = {
+      "l_returnflag", "l_linestatus", "sum_qty",   "sum_base_price", "sum_disc_price",
+      "sum_charge",   "avg_qty",      "avg_price", "avg_disc"};
+  cp::ProjectNodeOptions project_opts(std::move(projection_list));
+
+  cp::ScalarAggregateOptions sum_opts = cp::ScalarAggregateOptions::Defaults();
+  cp::CountOptions count_opts(cp::CountOptions::CountMode::ALL);
+  std::vector<arrow::compute::internal::Aggregate> aggs = {
+      {"hash_sum", &sum_opts},  {"hash_sum", &sum_opts},    {"hash_sum", &sum_opts},
+      {"hash_sum", &sum_opts},  {"hash_mean", &sum_opts},   {"hash_mean", &sum_opts},
+      {"hash_mean", &sum_opts}, {"hash_count", &count_opts}};
+
+  std::vector<FieldRef> cols = {2, 3, 4, 5, 6, 7, 8, 2};
+
+  std::vector<std::string> names = {"sum_qty",    "sum_base_price", "sum_disc_price",
+                                    "sum_charge", "avg_qty",        "avg_price",
+                                    "avg_disc",   "count_order"};
+
+  std::vector<FieldRef> keys = {"L_RETURNFLAG", "L_LINESTATUS"};
+  cp::AggregateNodeOptions agg_opts(aggs, cols, names, keys);
+
+  cp::ConsumingSinkNodeOptions sink_opts(std::move(consumer));
+
+  cp::Declaration filter_decl("filter", {cp::Declaration::Input(lineitem)}, filter_opts);
+  cp::Declaration project_decl("project", project_opts);
+  cp::Declaration aggregate_decl("aggregate", agg_opts);
+  cp::Declaration sink_decl("consuming_sink", sink_opts);
+
+  cp::Declaration q1 =
+      cp::Declaration::Sequence({filter_decl, project_decl, aggregate_decl, sink_decl});
+  std::ignore = *q1.AddToPlan(plan.get());
+  return plan;
+}
+
+std::unordered_map<std::string, QueryPlanFactory> CreateBuiltinQueriesMap() {
+  std::unordered_map<std::string, QueryPlanFactory> builtin_queries_map;
+  builtin_queries_map.insert({"tpch-1", Tpch1});
+  return builtin_queries_map;
+}
+
+}  // namespace
+
+const std::unordered_map<std::string, QueryPlanFactory>& GetBuiltinQueries() {
+  static std::unordered_map<std::string, QueryPlanFactory> builtin_queries_map =
+      CreateBuiltinQueriesMap();
+  return builtin_queries_map;
+}
+
+}  // namespace arrow::qtest
\ No newline at end of file
diff --git a/dev/qtester/builtin_queries.h b/dev/qtester/builtin_queries.h
new file mode 100644
index 00000000000..b84e8c98f0e
--- /dev/null
+++ b/dev/qtester/builtin_queries.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <unordered_map>
+
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/exec_plan.h>
+
+namespace arrow::qtest {
+
+using QueryPlanFactory = std::function<Result<std::shared_ptr<compute::ExecPlan>>(
+    std::shared_ptr<compute::SinkNodeConsumer>)>;
+
+const std::unordered_map<std::string, QueryPlanFactory>& GetBuiltinQueries();
+
+}  // namespace arrow::qtest
\ No newline at end of file
diff --git a/dev/qtester/queries/tpch1.substrait.pb.json b/dev/qtester/queries/tpch1.substrait.pb.json
new file mode 100644
index 00000000000..4b0ddaa6bc2
--- /dev/null
+++ b/dev/qtester/queries/tpch1.substrait.pb.json
@@ -0,0 +1,749 @@
+{
+  "extensionUris": [{
+    "extensionUriAnchor": 3,
+    "uri": "/functions_aggregate_generic.yaml"
+  }, {
+    "extensionUriAnchor": 2,
+    "uri": "/functions_arithmetic_decimal.yaml"
+  }, {
+    "extensionUriAnchor": 1,
+    "uri": "/functions_datetime.yaml"
+  }],
+  "extensions": [{
+    "extensionFunction": {
+      "extensionUriReference": 1,
+      "functionAnchor": 0,
+      "name": "lte:date_date"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 1,
+      "functionAnchor": 1,
+      "name": "subtract:date_day"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 2,
+      "name": "multiply:opt_decimal_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 3,
+      "name": "subtract:opt_decimal_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 4,
+      "name": "add:opt_decimal_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 5,
+      "name": "sum:opt_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 6,
+      "name": "avg:opt_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 3,
+      "functionAnchor": 7,
+      "name": "count:opt"
+    }
+  }],
+  "relations": [{
+    "root": {
+      "input": {
+        "sort": {
+          "common": {
+            "direct": {
+            }
+          },
+          "input": {
+            "aggregate": {
+              "common": {
+                "direct": {
+                }
+              },
+              "input": {
+                "project": {
+                  "common": {
+                    "emit": {
+                      "outputMapping": [16, 17, 18, 19, 20, 21, 22]
+                    }
+                  },
+                  "input": {
+                    "filter": {
+                      "common": {
+                        "direct": {
+                        }
+                      },
+                      "input": {
+                        "read": {
+                          "common": {
+                            "direct": {
+                            }
+                          },
+                          "baseSchema": {
+                            "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"],
+                            "struct": {
+                              "types": [{
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              }, {
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              }, {
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              }, {
+                                "i32": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "fixedChar": {
+                                  "length": 1,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "fixedChar": {
+                                  "length": 1,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "date": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "date": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "date": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "fixedChar": {
+                                  "length": 25,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "fixedChar": {
+                                  "length": 10,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }, {
+                                "varchar": {
+                                  "length": 44,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }],
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_REQUIRED"
+                            }
+                          },
+                          "namedTable": {
+                            "names": ["LINEITEM"]
+                          }
+                        }
+                      },
+                      "condition": {
+                        "scalarFunction": {
+                          "functionReference": 0,
+                          "args": [{
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 10
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }, {
+                            "scalarFunction": {
+                              "functionReference": 1,
+                              "args": [{
+                                "literal": {
+                                  "date": 10561,
+                                  "nullable": false
+                                }
+                              }, {
+                                "literal": {
+                                  "intervalDayToSecond": {
+                                    "days": 120,
+                                    "seconds": 0
+                                  },
+                                  "nullable": false
+                                }
+                              }],
+                              "outputType": {
+                                "date": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              }
+                            }
+                          }],
+                          "outputType": {
+                            "bool": {
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          }
+                        }
+                      }
+                    }
+                  },
+                  "expressions": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 8
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }, {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 9
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }, {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 4
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }, {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 5
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }, {
+                    "scalarFunction": {
+                      "functionReference": 2,
+                      "args": [{
+                        "selection": {
+                          "directReference": {
+                            "structField": {
+                              "field": 5
+                            }
+                          },
+                          "rootReference": {
+                          }
+                        }
+                      }, {
+                        "scalarFunction": {
+                          "functionReference": 3,
+                          "args": [{
+                            "cast": {
+                              "type": {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              "input": {
+                                "literal": {
+                                  "i32": 1,
+                                  "nullable": false
+                                }
+                              }
+                            }
+                          }, {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 6
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }],
+                          "outputType": {
+                            "decimal": {
+                              "scale": 0,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          }
+                        }
+                      }],
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }
+                    }
+                  }, {
+                    "scalarFunction": {
+                      "functionReference": 2,
+                      "args": [{
+                        "scalarFunction": {
+                          "functionReference": 2,
+                          "args": [{
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 5
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }, {
+                            "scalarFunction": {
+                              "functionReference": 3,
+                              "args": [{
+                                "cast": {
+                                  "type": {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  "input": {
+                                    "literal": {
+                                      "i32": 1,
+                                      "nullable": false
+                                    }
+                                  }
+                                }
+                              }, {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 6
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              }],
+                              "outputType": {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            }
+                          }],
+                          "outputType": {
+                            "decimal": {
+                              "scale": 0,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          }
+                        }
+                      }, {
+                        "scalarFunction": {
+                          "functionReference": 4,
+                          "args": [{
+                            "cast": {
+                              "type": {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              "input": {
+                                "literal": {
+                                  "i32": 1,
+                                  "nullable": false
+                                }
+                              }
+                            }
+                          }, {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 7
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }],
+                          "outputType": {
+                            "decimal": {
+                              "scale": 0,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          }
+                        }
+                      }],
+                      "outputType": {
+                        "decimal": {
+                          "scale": 0,
+                          "precision": 19,
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }
+                    }
+                  }, {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 6
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }]
+                }
+              },
+              "groupings": [{
+                "groupingExpressions": [{
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 0
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                }, {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 1
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                }]
+              }, {
+                "groupingExpressions": [{
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 0
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                }, {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 1
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                }]
+              }],
+              "measures": [{
+                "measure": {
+                  "functionReference": 5,
+                  "args": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 2
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }
+                }
+              }, {
+                "measure": {
+                  "functionReference": 5,
+                  "args": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 3
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }
+                }
+              }, {
+                "measure": {
+                  "functionReference": 5,
+                  "args": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 4
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }
+                }
+              }, {
+                "measure": {
+                  "functionReference": 5,
+                  "args": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 5
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }
+                }
+              }, {
+                "measure": {
+                  "functionReference": 6,
+                  "args": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 2
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }
+                }
+              }, {
+                "measure": {
+                  "functionReference": 6,
+                  "args": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 3
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }
+                }
+              }, {
+                "measure": {
+                  "functionReference": 6,
+                  "args": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 6
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "decimal": {
+                      "scale": 0,
+                      "precision": 19,
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  }
+                }
+              }, {
+                "measure": {
+                  "functionReference": 7,
+                  "args": [],
+                  "sorts": [],
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                  "outputType": {
+                    "i64": {
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_REQUIRED"
+                    }
+                  }
+                }
+              }]
+            }
+          },
+          "sorts": [{
+            "expr": {
+              "selection": {
+                "directReference": {
+                  "structField": {
+                    "field": 0
+                  }
+                },
+                "rootReference": {
+                }
+              }
+            },
+            "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+          }, {
+            "expr": {
+              "selection": {
+                "directReference": {
+                  "structField": {
+                    "field": 1
+                  }
+                },
+                "rootReference": {
+                }
+              }
+            },
+            "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+          }]
+        }
+      },
+      "names": ["L_RETURNFLAG", "L_LINESTATUS", "SUM_QTY", "SUM_BASE_PRICE", "SUM_DISC_PRICE", "SUM_CHARGE", "AVG_QTY", "AVG_PRICE", "AVG_DISC", "COUNT_ORDER"]
+    }
+  }],
+  "expectedTypeUrls": []
+}
diff --git a/dev/qtester/query_tester.cc b/dev/qtester/query_tester.cc
new file mode 100644
index 00000000000..f9d924532a3
--- /dev/null
+++ b/dev/qtester/query_tester.cc
@@ -0,0 +1,51 @@
+#include <argparse/argparse.hpp>
+
+#include "test_runner.h"
+
+int main(int argc, char* argv[]) {
+  argparse::ArgumentParser program("query_tester");
+
+  program.add_argument("query").required().help("name of the query to run");
+  program.add_argument("--num-iterations").default_value(1).scan<'i', int>();
+  program.add_argument("--cpu-threads")
+      .help("size to use for the CPU thread pool, default controlled by Arrow")
+      .scan<'i', int>();
+  program.add_argument("--io-threads")
+      .help("size to use for the I/O thread pool, default controlled by Arrow")
+      .scan<'i', int>();
+  program.add_argument("--validate")
+      .help("if set the program will validate the query results")
+      .default_value(false)
+      .implicit_value(true);
+
+  try {
+    program.parse_args(argc, argv);
+  } catch (const std::runtime_error& err) {
+    std::cerr << err.what() << std::endl;
+    std::cerr << program;
+    return 1;
+  }
+
+  arrow::qtest::QueryTestOptions options;
+  options.query_name = program.get<std::string>("query");
+  options.cpu_threads = program.present<int>("--cpu-threads");
+  options.io_threads = program.present<int>("--io-threads");
+  options.validate = program.get<bool>("--validate");
+  options.num_iterations = program.get<int>("--num-iterations");
+  options.executable_path = argv[0];
+
+  arrow::Result<arrow::qtest::QueryTestResult> result =
+      arrow::qtest::RunQueryTest(options);
+  if (!result.ok()) {
+    std::cout << "Error encountered running test: " << result.status() << std::endl;
+    return 1;
+  }
+
+  arrow::Status report_status = arrow::qtest::ReportResult(*result);
+  if (!report_status.ok()) {
+    std::cout << "Error encountered reporting status: " << result.status() << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
\ No newline at end of file
diff --git a/dev/qtester/test_runner.cc b/dev/qtester/test_runner.cc
new file mode 100644
index 00000000000..72f3eaf1a23
--- /dev/null
+++ b/dev/qtester/test_runner.cc
@@ -0,0 +1,219 @@
+#include "test_runner.h"
+#include "builtin_queries.h"
+
+#include <arrow/filesystem/api.h>
+
+#include <filesystem>
+#include <iostream>
+#include <mutex>
+
+namespace std_fs = std::filesystem;
+namespace cp = arrow::compute;
+
+namespace arrow::qtest {
+
+Status ValidateOptions(const QueryTestOptions& options) {
+  if (options.cpu_threads && *options.cpu_threads <= 0) {
+    return Status::Invalid("cpu-threads must be > 0");
+  }
+  if (options.io_threads && *options.io_threads <= 0) {
+    return Status::Invalid("io-threads must be > 0");
+  }
+  if (options.num_iterations <= 0) {
+    return Status::Invalid("num-iterations must be > 0");
+  }
+  if (options.validate) {
+    return Status::NotImplemented("validation has not yet been implemented");
+  }
+  return Status::OK();
+}
+
+namespace {
+Result<std::string> DoGetRootDirectory(const std::string& executable_path) {
+  std_fs::path path = std_fs::absolute(std_fs::path(executable_path));
+  while (true) {
+    if (std_fs::is_directory(path / "queries") &&
+        std_fs::is_directory(path / "datasets")) {
+      return path;
+    }
+    if (path.has_parent_path() && path != path.parent_path()) {
+      path = path.parent_path();
+    } else {
+      return Status::Invalid(
+          "Could not locate the root directory.  Did you perhaps move or copy the "
+          "query_tester executable outside of the project directory?");
+    }
+  }
+}
+
+Result<std::string> GetRootDirectory(const std::string& executable) {
+  static Result<std::string> cached_root_directory = DoGetRootDirectory(executable);
+  return cached_root_directory;
+}
+
+Result<std::shared_ptr<Buffer>> PathToBuffer(const std_fs::path& path) {
+  fs::LocalFileSystem local_fs;
+  ARROW_ASSIGN_OR_RAISE(fs::FileInfo file_info, local_fs.GetFileInfo(path));
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<io::InputStream> in_stream,
+                        local_fs.OpenInputStream(path));
+  return in_stream->Read(file_info.size());
+}
+
+Result<std::shared_ptr<compute::ExecPlan>> DeclsToPlan(
+    const std::vector<cp::Declaration>& decls) {
+  ARROW_ASSIGN_OR_RAISE(auto plan, compute::ExecPlan::Make());
+  for (const auto& decl : decls) {
+    ARROW_RETURN_NOT_OK(decl.AddToPlan(plan.get()));
+  }
+  return plan;
+}
+
+Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromSubstraitJson(
+    const std_fs::path& path, const engine::ConsumerFactory& consumer_factory) {
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> json_bytes, PathToBuffer(path));
+  ARROW_ASSIGN_OR_RAISE(
+      std::shared_ptr<Buffer> plan_bytes,
+      engine::internal::SubstraitFromJSON("Plan", json_bytes->ToString()));
+  ARROW_ASSIGN_OR_RAISE(std::vector<cp::Declaration> decls,
+                        engine::DeserializePlan(*plan_bytes, consumer_factory));
+  return DeclsToPlan(decls);
+}
+
+Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromSubstraitBinary(
+    const std_fs::path& path, const engine::ConsumerFactory& consumer_factory) {
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> plan_bytes, PathToBuffer(path));
+  ARROW_ASSIGN_OR_RAISE(std::vector<cp::Declaration> decls,
+                        engine::DeserializePlan(*plan_bytes, consumer_factory));
+  return DeclsToPlan(decls);
+}
+
+Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromPath(
+    const std_fs::path& path, const std::string& extension,
+    const engine::ConsumerFactory& consumer_factory) {
+  if (extension == "substrait.pb.json") {
+    return LoadQueryFromSubstraitJson(path, consumer_factory);
+  }
+  if (extension == "substrait.pb") {
+    return LoadQueryFromSubstraitBinary(path, consumer_factory);
+  }
+
+  return Status::Invalid("No handler for query file format ", extension);
+}
+
+class QueryResultUpdatingConsumer : public cp::SinkNodeConsumer {
+ public:
+  explicit QueryResultUpdatingConsumer(QueryTestResult* result) : result_(result) {}
+
+  arrow::Status Consume(cp::ExecBatch batch) override {
+    std::lock_guard<std::mutex> lg(mutex_);
+    result_->iterations[iteration_].num_rows_processed += batch.length;
+    result_->iterations[iteration_].num_bytes_processed += batch.TotalBufferSize();
+    return arrow::Status::OK();
+  }
+
+  arrow::Future<> Finish() override {
+    result_->iterations[iteration_].end_time = std::chrono::high_resolution_clock::now();
+    return arrow::Future<>::MakeFinished();
+  }
+
+  void Start(std::size_t iteration) {
+    iteration_ = iteration;
+    result_->iterations.emplace_back();
+    result_->iterations[iteration_].start_time =
+        std::chrono::high_resolution_clock::now();
+  }
+
+ private:
+  QueryTestResult* result_;
+  std::mutex mutex_;
+  std::size_t iteration_ = 0;
+};
+
+Result<std::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromFiles(
+    const std::string& root_path, const std::string& query_name,
+    const engine::ConsumerFactory& consumer_factory) {
+  for (const auto& entry :
+       std_fs::directory_iterator(std_fs::path(root_path) / "queries")) {
+    auto entry_path_str = entry.path().filename().string();
+    auto first_dot_idx = entry_path_str.find('.');
+    if (first_dot_idx != std::string::npos) {
+      auto stem = entry_path_str.substr(0, first_dot_idx);
+      if (stem == query_name) {
+        auto extension = entry_path_str.substr(first_dot_idx + 1);
+        return LoadQueryFromPath(entry.path(), extension, consumer_factory);
+      }
+    }
+  }
+  return std::nullopt;
+}
+
+Result<std::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromBuiltin(
+    const std::string& query_name, const engine::ConsumerFactory& consumer_factory) {
+  const auto& builtin_queries_map = GetBuiltinQueries();
+  const auto& query = builtin_queries_map.find(query_name);
+  if (query == builtin_queries_map.end()) {
+    return std::nullopt;
+  }
+  std::shared_ptr<cp::SinkNodeConsumer> consumer = consumer_factory();
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan, query->second(consumer));
+  return plan;
+}
+
+Status InitializeArrow(const QueryTestOptions& options) {
+  if (options.cpu_threads) {
+    ARROW_RETURN_NOT_OK(
+        arrow::internal::GetCpuThreadPool()->SetCapacity(*options.cpu_threads));
+  }
+  if (options.io_threads) {
+    ARROW_RETURN_NOT_OK(arrow::io::SetIOThreadPoolCapacity(*options.io_threads));
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Result<std::shared_ptr<compute::ExecPlan>> LoadQuery(
+    const std::string& root_path, const std::string& query_name,
+    const engine::ConsumerFactory& consumer_factory) {
+  ARROW_ASSIGN_OR_RAISE(std::optional<std::shared_ptr<compute::ExecPlan>> maybe_query,
+                        LoadQueryFromFiles(root_path, query_name, consumer_factory));
+  if (maybe_query) {
+    return *maybe_query;
+  }
+
+  ARROW_ASSIGN_OR_RAISE(maybe_query, LoadQueryFromBuiltin(query_name, consumer_factory));
+  if (maybe_query) {
+    return *maybe_query;
+  }
+
+  return Status::Invalid("Could not find any query file or builtin query named ",
+                         query_name);
+}
+
+Result<QueryTestResult> RunQueryTest(const QueryTestOptions& options) {
+  ARROW_ASSIGN_OR_RAISE(auto root_path, GetRootDirectory(options.executable_path));
+  ARROW_RETURN_NOT_OK(ValidateOptions(options));
+  ARROW_RETURN_NOT_OK(InitializeArrow(options));
+  QueryTestResult result;
+  auto consumer = std::make_shared<QueryResultUpdatingConsumer>(&result);
+  auto consumer_factory = [consumer] { return consumer; };
+  for (int i = 0; i < options.num_iterations; i++) {
+    consumer->Start(i);
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<compute::ExecPlan> plan,
+                          LoadQuery(root_path, options.query_name, consumer_factory));
+    ARROW_RETURN_NOT_OK(plan->StartProducing());
+    ARROW_RETURN_NOT_OK(plan->finished().status());
+  }
+
+  return result;
+}
+
+Status ReportResult(const QueryTestResult& result) {
+  std::cout << "Average       Duration: " << result.average_duration_seconds()
+            << "s (+/- " << result.stderr_duration_seconds() << "s)" << std::endl;
+  std::cout << "Average Output  Rows/S: " << result.average_rps() << "rps" << std::endl;
+  std::cout << "Average Output Bytes/S: " << result.average_bps() << "bps" << std::endl;
+  return Status::OK();
+}
+
+}  // namespace arrow::qtest
\ No newline at end of file
diff --git a/dev/qtester/test_runner.h b/dev/qtester/test_runner.h
new file mode 100644
index 00000000000..74685cd471e
--- /dev/null
+++ b/dev/qtester/test_runner.h
@@ -0,0 +1,113 @@
+#pragma once
+
+#include <arrow/api.h>
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/exec_plan.h>
+#include <arrow/engine/api.h>
+
+#include <optional>
+#include <string>
+
+namespace arrow {
+namespace qtest {
+
+struct QueryTestOptions {
+  /// Name of the query to run, will look for a query input file in the queries folder
+  std::string query_name;
+  /// Number of CPU threads to initialize Arrow with.  By default Arrow will base this
+  /// on std::thread::hardware_concurrency
+  std::optional<int> cpu_threads;
+  /// Number of I/O threads to initialize Arrow with.  By default Arrow will use 8
+  std::optional<int> io_threads;
+  /// Number of iterations of the query to run, defaults to a single run
+  int num_iterations = 1;
+  /// If true, validate the query results, if possible
+  bool validate = false;
+  /// Path to the query_tester executable, used to locate queries & datasets
+  std::string executable_path;
+};
+
+struct QueryIterationResult {
+  uint64_t num_rows_processed = 0;
+  uint64_t num_bytes_processed = 0;
+  std::chrono::high_resolution_clock::time_point start_time;
+  std::chrono::high_resolution_clock::time_point end_time;
+
+  double duration_seconds() const {
+    return std::chrono::duration<double>(end_time - start_time).count();
+  }
+};
+
+struct QueryTestResult {
+  std::vector<QueryIterationResult> iterations;
+
+  inline uint64_t total_bytes_processed() const {
+    uint64_t sum = 0;
+    for (const auto& iteration : iterations) {
+      sum += iteration.num_bytes_processed;
+    }
+    return sum;
+  }
+
+  inline uint64_t total_rows_processed() const {
+    uint64_t sum = 0;
+    for (const auto& iteration : iterations) {
+      sum += iteration.num_rows_processed;
+    }
+    return sum;
+  }
+
+  inline double total_duration_seconds() const {
+    double sum = 0;
+    for (const auto& iteration : iterations) {
+      sum += iteration.duration_seconds();
+    }
+    return sum;
+  }
+
+  inline double average_duration_seconds() const {
+    return total_duration_seconds() / iterations.size();
+  }
+
+  inline double stderr_duration_seconds() const {
+    double avg = average_duration_seconds();
+    double err_sum = 0;
+    for (const auto& iteration : iterations) {
+      err_sum += std::abs(iteration.duration_seconds() - avg);
+    }
+    return err_sum / iterations.size();
+  }
+
+  inline double average_bps() const {
+    return total_bytes_processed() / total_duration_seconds();
+  }
+
+  inline double average_rps() const {
+    return total_rows_processed() / total_duration_seconds();
+  }
+};
+
+/// Load a query and return the execution plan
+///
+/// The folder ${CWD}/queries will be searched for a file whose basename (everything
+/// before the first '.' matches query_name).  The extension will be used to figure
+/// out how to convert the file to an execution plan.  Supported extensions are:
+///
+/// .substrait.pb.json - Loads a Substrait plan using the JSON protobuf format
+/// .substrait.pb - Loads a Substrait plan using the binary protobuf format
+Result<std::shared_ptr<compute::ExecPlan>> LoadQuery(
+    const std::string& root_path, const std::string& query_name,
+    const engine::ConsumerFactory& consumer_factory);
+/// Validate the options (will be run automatically by RunQueryTest)
+Status ValidateOptions(const QueryTestOptions& options);
+/// Run a query test.
+///
+/// This will load the query, download and prepare any neccesary data,
+/// run the query the specified number of times, and then generate a report
+Result<QueryTestResult> RunQueryTest(const QueryTestOptions& options);
+
+/// Print a query test result
+Status ReportResult(const QueryTestResult& result);
+
+}  // namespace qtest
+}  // namespace arrow

From 6848a8b60eaf661813e7a78d1f9ed58e646975de Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 8 Mar 2022 15:40:11 -1000
Subject: [PATCH 09/11] Added in empty datasets directory.  It will be a
 destination for downloaded datasets in the future and is needed for the query
 tester to recognize the root directory.

---
 dev/qtester/datasets/.gitignore | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 dev/qtester/datasets/.gitignore

diff --git a/dev/qtester/datasets/.gitignore b/dev/qtester/datasets/.gitignore
new file mode 100644
index 00000000000..5e7d2734cfc
--- /dev/null
+++ b/dev/qtester/datasets/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore

From 239b20fd9d9db8055912873147c406e2b072468e Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 8 Mar 2022 17:38:04 -1000
Subject: [PATCH 10/11] ARROW-15877: Moved the standalone query-tester
 executable into the cpp directory

---
 cpp/CMakeLists.txt                            |  8 ++
 cpp/cmake_modules/DefineOptions.cmake         |  2 +
 .../tools/query-tester}/.gitignore            |  0
 .../tools/query-tester}/CMakeLists.txt        | 28 +------
 .../tools/query-tester}/builtin_queries.cc    |  6 +-
 .../tools/query-tester}/builtin_queries.h     |  6 +-
 .../tools/query-tester}/datasets/.gitignore   |  0
 .../queries/tpch1.substrait.pb.json           |  0
 .../tools/query-tester}/query_tester.cc       | 17 +++-
 .../tools/query-tester}/test_runner.cc        | 82 ++++++++++++-------
 .../tools/query-tester}/test_runner.h         |  9 +-
 dev/qtester/.clang-tidy                       | 22 -----
 12 files changed, 96 insertions(+), 84 deletions(-)
 rename {dev/qtester => cpp/tools/query-tester}/.gitignore (100%)
 rename {dev/qtester => cpp/tools/query-tester}/CMakeLists.txt (65%)
 rename {dev/qtester => cpp/tools/query-tester}/builtin_queries.cc (98%)
 rename {dev/qtester => cpp/tools/query-tester}/builtin_queries.h (82%)
 rename {dev/qtester => cpp/tools/query-tester}/datasets/.gitignore (100%)
 rename {dev/qtester => cpp/tools/query-tester}/queries/tpch1.substrait.pb.json (100%)
 rename {dev/qtester => cpp/tools/query-tester}/query_tester.cc (79%)
 rename {dev/qtester => cpp/tools/query-tester}/test_runner.cc (72%)
 rename {dev/qtester => cpp/tools/query-tester}/test_runner.h (96%)
 delete mode 100644 dev/qtester/.clang-tidy

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c350787bfe9..5c5bf96fa52 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -330,6 +330,10 @@ if(ARROW_BUILD_BENCHMARKS
   set(ARROW_TESTING ON)
 endif()
 
+if(ARROW_BUILD_QUERY_TESTER)
+  set(ARROW_ENGINE ON)
+endif()
+
 if(ARROW_GANDIVA)
   set(ARROW_WITH_RE2 ON)
 endif()
@@ -967,6 +971,10 @@ if(ARROW_SKYHOOK)
   add_subdirectory(src/skyhook)
 endif()
 
+if(ARROW_BUILD_QUERY_TESTER)
+  add_subdirectory(tools/query-tester)
+endif()
+
 if(ARROW_BUILD_EXAMPLES)
   add_custom_target(runexample ctest -L example)
   add_subdirectory(examples/arrow)
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 05fc14bbc72..bf3d778dde0 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -163,6 +163,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
 
   define_option(ARROW_BUILD_BENCHMARKS "Build the Arrow micro benchmarks" OFF)
 
+  define_option(ARROW_BUILD_QUERY_TESTER "Build the Arrow engine query testing tool" OFF)
+
   # Reference benchmarks are used to compare to naive implementation, or
   # discover various hardware limits.
   define_option(ARROW_BUILD_BENCHMARKS_REFERENCE
diff --git a/dev/qtester/.gitignore b/cpp/tools/query-tester/.gitignore
similarity index 100%
rename from dev/qtester/.gitignore
rename to cpp/tools/query-tester/.gitignore
diff --git a/dev/qtester/CMakeLists.txt b/cpp/tools/query-tester/CMakeLists.txt
similarity index 65%
rename from dev/qtester/CMakeLists.txt
rename to cpp/tools/query-tester/CMakeLists.txt
index b35260f4134..93a2abb51bd 100644
--- a/dev/qtester/CMakeLists.txt
+++ b/cpp/tools/query-tester/CMakeLists.txt
@@ -1,16 +1,5 @@
-cmake_minimum_required(VERSION 3.19)
-project(arrow-query-tester)
-
-set(CMAKE_CXX_STANDARD 17)
-if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libstdc++")
-endif()
-set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-
 include(ExternalProject)
 
-# Add Arrow
-find_package(Arrow REQUIRED COMPONENTS dataset parquet engine)
 # Argparse is a modern library for interpreting CLI args
 set(ARGPARSE_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/argparse_ep-install")
 set(ARGPARSE_CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${ARGPARSE_PREFIX}")
@@ -21,10 +10,6 @@ externalproject_add(argparse
                     URL https://github.com/p-ranav/argparse/archive/refs/tags/v2.2.tar.gz
                     URL_HASH "SHA256=f0fc6ab7e70ac24856c160f44ebb0dd79dc1f7f4a614ee2810d42bb73799872b")
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(CMAKE_CXX_CLANG_TIDY "clang-tidy-12")
-endif()
-
 function(ADD_PROGRAM TARGET)
     set(options)
     set(one_value_args)
@@ -39,21 +24,16 @@ function(ADD_PROGRAM TARGET)
             ${TARGET}.cc
             ${ARG_EXTRA_SOURCES}
     )
+    add_dependencies(${TARGET} arrow)
     add_dependencies(${TARGET} argparse)
     target_include_directories(${TARGET} SYSTEM PRIVATE "${ARGPARSE_INCLUDE_DIR}")
     target_link_libraries(
             ${TARGET}
             arrow_shared
-            arrow_dataset
-            arrow_engine
-            parquet
+            arrow_engine_shared
+            parquet_shared
     )
-    if (MSVC)
-        target_compile_options(${TARGET} PRIVATE /W4 /WX)
-    else ()
-        target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wpedantic -Werror)
-    endif ()
-
+    set_property(TARGET ${TARGET} PROPERTY CXX_STANDARD 17)
 endfunction()
 
 add_program(query_tester EXTRA_SOURCES builtin_queries.cc test_runner.cc)
diff --git a/dev/qtester/builtin_queries.cc b/cpp/tools/query-tester/builtin_queries.cc
similarity index 98%
rename from dev/qtester/builtin_queries.cc
rename to cpp/tools/query-tester/builtin_queries.cc
index d4a6da5f404..0c11e29d5c5 100644
--- a/dev/qtester/builtin_queries.cc
+++ b/cpp/tools/query-tester/builtin_queries.cc
@@ -6,7 +6,8 @@
 
 namespace cp = arrow::compute;
 
-namespace arrow::qtest {
+namespace arrow {
+namespace qtest {
 
 namespace {
 
@@ -98,4 +99,5 @@ const std::unordered_map<std::string, QueryPlanFactory>& GetBuiltinQueries() {
   return builtin_queries_map;
 }
 
-}  // namespace arrow::qtest
\ No newline at end of file
+}  // namespace qtest
+}  // namespace arrow
diff --git a/dev/qtester/builtin_queries.h b/cpp/tools/query-tester/builtin_queries.h
similarity index 82%
rename from dev/qtester/builtin_queries.h
rename to cpp/tools/query-tester/builtin_queries.h
index b84e8c98f0e..450644a9575 100644
--- a/dev/qtester/builtin_queries.h
+++ b/cpp/tools/query-tester/builtin_queries.h
@@ -7,11 +7,13 @@
 #include <arrow/compute/api.h>
 #include <arrow/compute/exec/exec_plan.h>
 
-namespace arrow::qtest {
+namespace arrow {
+namespace qtest {
 
 using QueryPlanFactory = std::function<Result<std::shared_ptr<compute::ExecPlan>>(
     std::shared_ptr<compute::SinkNodeConsumer>)>;
 
 const std::unordered_map<std::string, QueryPlanFactory>& GetBuiltinQueries();
 
-}  // namespace arrow::qtest
\ No newline at end of file
+}  // namespace qtest
+}  // namespace arrow
diff --git a/dev/qtester/datasets/.gitignore b/cpp/tools/query-tester/datasets/.gitignore
similarity index 100%
rename from dev/qtester/datasets/.gitignore
rename to cpp/tools/query-tester/datasets/.gitignore
diff --git a/dev/qtester/queries/tpch1.substrait.pb.json b/cpp/tools/query-tester/queries/tpch1.substrait.pb.json
similarity index 100%
rename from dev/qtester/queries/tpch1.substrait.pb.json
rename to cpp/tools/query-tester/queries/tpch1.substrait.pb.json
diff --git a/dev/qtester/query_tester.cc b/cpp/tools/query-tester/query_tester.cc
similarity index 79%
rename from dev/qtester/query_tester.cc
rename to cpp/tools/query-tester/query_tester.cc
index f9d924532a3..914ef0799c1 100644
--- a/dev/qtester/query_tester.cc
+++ b/cpp/tools/query-tester/query_tester.cc
@@ -1,7 +1,18 @@
 #include <argparse/argparse.hpp>
 
+#include <filesystem>
+#include <optional>
+
 #include "test_runner.h"
 
+template <typename T>
+arrow::util::optional<T> ToArrow(std::optional<T> std_opt) {
+  if (std_opt) {
+    return *std_opt;
+  }
+  return arrow::util::nullopt;
+}
+
 int main(int argc, char* argv[]) {
   argparse::ArgumentParser program("query_tester");
 
@@ -28,11 +39,11 @@ int main(int argc, char* argv[]) {
 
   arrow::qtest::QueryTestOptions options;
   options.query_name = program.get<std::string>("query");
-  options.cpu_threads = program.present<int>("--cpu-threads");
-  options.io_threads = program.present<int>("--io-threads");
+  options.cpu_threads = ToArrow(program.present<int>("--cpu-threads"));
+  options.io_threads = ToArrow(program.present<int>("--io-threads"));
   options.validate = program.get<bool>("--validate");
   options.num_iterations = program.get<int>("--num-iterations");
-  options.executable_path = argv[0];
+  options.executable_path = std::filesystem::absolute(argv[0]);
 
   arrow::Result<arrow::qtest::QueryTestResult> result =
       arrow::qtest::RunQueryTest(options);
diff --git a/dev/qtester/test_runner.cc b/cpp/tools/query-tester/test_runner.cc
similarity index 72%
rename from dev/qtester/test_runner.cc
rename to cpp/tools/query-tester/test_runner.cc
index 72f3eaf1a23..c0475dce9a0 100644
--- a/dev/qtester/test_runner.cc
+++ b/cpp/tools/query-tester/test_runner.cc
@@ -2,15 +2,16 @@
 #include "builtin_queries.h"
 
 #include <arrow/filesystem/api.h>
+#include <arrow/filesystem/path_util.h>
 
 #include <filesystem>
 #include <iostream>
 #include <mutex>
 
-namespace std_fs = std::filesystem;
 namespace cp = arrow::compute;
 
-namespace arrow::qtest {
+namespace arrow {
+namespace qtest {
 
 Status ValidateOptions(const QueryTestOptions& options) {
   if (options.cpu_threads && *options.cpu_threads <= 0) {
@@ -29,20 +30,38 @@ Status ValidateOptions(const QueryTestOptions& options) {
 }
 
 namespace {
+
+fs::LocalFileSystem* local_fs() {
+  static std::unique_ptr<fs::LocalFileSystem> local_fs =
+      std::unique_ptr<fs::LocalFileSystem>(new fs::LocalFileSystem());
+  return local_fs.get();
+}
+
+bool IsDirectory(const std::string& path) {
+  Result<fs::FileInfo> maybe_file_info = local_fs()->GetFileInfo(path);
+  if (!maybe_file_info.ok()) {
+    return false;
+  }
+  return maybe_file_info->IsDirectory();
+}
+
 Result<std::string> DoGetRootDirectory(const std::string& executable_path) {
-  std_fs::path path = std_fs::absolute(std_fs::path(executable_path));
+  std::string path = executable_path;
   while (true) {
-    if (std_fs::is_directory(path / "queries") &&
-        std_fs::is_directory(path / "datasets")) {
-      return path;
+    std::string potential_root = fs::internal::JoinAbstractPath(
+        std::vector<std::string>{path, "tools", "query-tester"});
+    if (IsDirectory(fs::internal::JoinAbstractPath(
+            std::vector<std::string>{potential_root, "queries"}))) {
+      return potential_root;
     }
-    if (path.has_parent_path() && path != path.parent_path()) {
-      path = path.parent_path();
-    } else {
+    std::pair<std::string, std::string> parent_info =
+        fs::internal::GetAbstractPathParent(path);
+    if (parent_info.first.empty()) {
       return Status::Invalid(
-          "Could not locate the root directory.  Did you perhaps move or copy the "
-          "query_tester executable outside of the project directory?");
+          "Could not locate the tools/query-tester directory.  Did you perhaps move or "
+          "copy the query_tester executable outside of the project directory?");
     }
+    path = parent_info.first;
   }
 }
 
@@ -51,7 +70,7 @@ Result<std::string> GetRootDirectory(const std::string& executable) {
   return cached_root_directory;
 }
 
-Result<std::shared_ptr<Buffer>> PathToBuffer(const std_fs::path& path) {
+Result<std::shared_ptr<Buffer>> PathToBuffer(const std::string& path) {
   fs::LocalFileSystem local_fs;
   ARROW_ASSIGN_OR_RAISE(fs::FileInfo file_info, local_fs.GetFileInfo(path));
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<io::InputStream> in_stream,
@@ -69,7 +88,7 @@ Result<std::shared_ptr<compute::ExecPlan>> DeclsToPlan(
 }
 
 Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromSubstraitJson(
-    const std_fs::path& path, const engine::ConsumerFactory& consumer_factory) {
+    const std::string& path, const engine::ConsumerFactory& consumer_factory) {
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> json_bytes, PathToBuffer(path));
   ARROW_ASSIGN_OR_RAISE(
       std::shared_ptr<Buffer> plan_bytes,
@@ -80,7 +99,7 @@ Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromSubstraitJson(
 }
 
 Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromSubstraitBinary(
-    const std_fs::path& path, const engine::ConsumerFactory& consumer_factory) {
+    const std::string& path, const engine::ConsumerFactory& consumer_factory) {
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> plan_bytes, PathToBuffer(path));
   ARROW_ASSIGN_OR_RAISE(std::vector<cp::Declaration> decls,
                         engine::DeserializePlan(*plan_bytes, consumer_factory));
@@ -88,7 +107,7 @@ Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromSubstraitBinary(
 }
 
 Result<std::shared_ptr<compute::ExecPlan>> LoadQueryFromPath(
-    const std_fs::path& path, const std::string& extension,
+    const std::string& path, const std::string& extension,
     const engine::ConsumerFactory& consumer_factory) {
   if (extension == "substrait.pb.json") {
     return LoadQueryFromSubstraitJson(path, consumer_factory);
@@ -129,30 +148,36 @@ class QueryResultUpdatingConsumer : public cp::SinkNodeConsumer {
   std::size_t iteration_ = 0;
 };
 
-Result<std::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromFiles(
+Result<util::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromFiles(
     const std::string& root_path, const std::string& query_name,
     const engine::ConsumerFactory& consumer_factory) {
-  for (const auto& entry :
-       std_fs::directory_iterator(std_fs::path(root_path) / "queries")) {
-    auto entry_path_str = entry.path().filename().string();
-    auto first_dot_idx = entry_path_str.find('.');
+  std::string queries_path =
+      fs::internal::JoinAbstractPath(std::vector<std::string>{root_path, "queries"});
+  fs::FileSelector selector;
+  selector.base_dir = queries_path;
+  selector.recursive = false;
+  ARROW_ASSIGN_OR_RAISE(std::vector<fs::FileInfo> query_files,
+                        local_fs()->GetFileInfo(selector));
+  for (const auto& query_file : query_files) {
+    auto query_file_str = query_file.base_name();
+    auto first_dot_idx = query_file_str.find('.');
     if (first_dot_idx != std::string::npos) {
-      auto stem = entry_path_str.substr(0, first_dot_idx);
+      auto stem = query_file_str.substr(0, first_dot_idx);
       if (stem == query_name) {
-        auto extension = entry_path_str.substr(first_dot_idx + 1);
-        return LoadQueryFromPath(entry.path(), extension, consumer_factory);
+        auto extension = query_file_str.substr(first_dot_idx + 1);
+        return LoadQueryFromPath(query_file.path(), extension, consumer_factory);
       }
     }
   }
-  return std::nullopt;
+  return util::nullopt;
 }
 
-Result<std::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromBuiltin(
+Result<util::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromBuiltin(
     const std::string& query_name, const engine::ConsumerFactory& consumer_factory) {
   const auto& builtin_queries_map = GetBuiltinQueries();
   const auto& query = builtin_queries_map.find(query_name);
   if (query == builtin_queries_map.end()) {
-    return std::nullopt;
+    return util::nullopt;
   }
   std::shared_ptr<cp::SinkNodeConsumer> consumer = consumer_factory();
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan, query->second(consumer));
@@ -175,7 +200,7 @@ Status InitializeArrow(const QueryTestOptions& options) {
 Result<std::shared_ptr<compute::ExecPlan>> LoadQuery(
     const std::string& root_path, const std::string& query_name,
     const engine::ConsumerFactory& consumer_factory) {
-  ARROW_ASSIGN_OR_RAISE(std::optional<std::shared_ptr<compute::ExecPlan>> maybe_query,
+  ARROW_ASSIGN_OR_RAISE(util::optional<std::shared_ptr<compute::ExecPlan>> maybe_query,
                         LoadQueryFromFiles(root_path, query_name, consumer_factory));
   if (maybe_query) {
     return *maybe_query;
@@ -216,4 +241,5 @@ Status ReportResult(const QueryTestResult& result) {
   return Status::OK();
 }
 
-}  // namespace arrow::qtest
\ No newline at end of file
+}  // namespace qtest
+}  // namespace arrow
diff --git a/dev/qtester/test_runner.h b/cpp/tools/query-tester/test_runner.h
similarity index 96%
rename from dev/qtester/test_runner.h
rename to cpp/tools/query-tester/test_runner.h
index 74685cd471e..d90b5f75b5f 100644
--- a/dev/qtester/test_runner.h
+++ b/cpp/tools/query-tester/test_runner.h
@@ -5,8 +5,11 @@
 #include <arrow/compute/exec/exec_plan.h>
 #include <arrow/engine/api.h>
 
-#include <optional>
+#include <chrono>
+#include <cmath>
+#include <memory>
 #include <string>
+#include <vector>
 
 namespace arrow {
 namespace qtest {
@@ -16,9 +19,9 @@ struct QueryTestOptions {
   std::string query_name;
   /// Number of CPU threads to initialize Arrow with.  By default Arrow will base this
   /// on std::thread::hardware_concurrency
-  std::optional<int> cpu_threads;
+  util::optional<int> cpu_threads;
   /// Number of I/O threads to initialize Arrow with.  By default Arrow will use 8
-  std::optional<int> io_threads;
+  util::optional<int> io_threads;
   /// Number of iterations of the query to run, defaults to a single run
   int num_iterations = 1;
   /// If true, validate the query results, if possible
diff --git a/dev/qtester/.clang-tidy b/dev/qtester/.clang-tidy
deleted file mode 100644
index bcdacd174be..00000000000
--- a/dev/qtester/.clang-tidy
+++ /dev/null
@@ -1,22 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
----
-Checks: '*,-llvmlibc*,-cert-err58-cpp,-modernize-use-trailing-return-type,-fuchsia-*,-cppcoreguidelines-*,
-  -readability-magic-numbers,-clang-analyzer-cplusplus.NewDelete,-clang-analyzer-cplusplus.NewDeleteLeaks,
-  -readability-function-cognitive-complexity, -hicpp-special-member-functions, -bugprone-exception-escape'
-WarningsAsErrors: '*'
-FormatStyle: 'file'

From a3b4362b0b4302d46e966fe7d115f45ff363a41d Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Tue, 8 Mar 2022 17:58:43 -1000
Subject: [PATCH 11/11] ARROW-15877: ExecContext was not using the thread pool

---
 cpp/tools/query-tester/builtin_queries.cc |  5 +++--
 cpp/tools/query-tester/builtin_queries.h  |  2 +-
 cpp/tools/query-tester/test_runner.cc     | 17 +++++++++++------
 cpp/tools/query-tester/test_runner.h      |  2 +-
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/cpp/tools/query-tester/builtin_queries.cc b/cpp/tools/query-tester/builtin_queries.cc
index 0c11e29d5c5..7ec799a357a 100644
--- a/cpp/tools/query-tester/builtin_queries.cc
+++ b/cpp/tools/query-tester/builtin_queries.cc
@@ -12,8 +12,9 @@ namespace qtest {
 namespace {
 
 Result<std::shared_ptr<cp::ExecPlan>> Tpch1(
-    std::shared_ptr<cp::SinkNodeConsumer> consumer) {
-  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan, cp::ExecPlan::Make());
+    std::shared_ptr<cp::SinkNodeConsumer> consumer, cp::ExecContext* exec_context) {
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan,
+                        cp::ExecPlan::Make(exec_context));
   ARROW_ASSIGN_OR_RAISE(cp::TpchGen gen, cp::TpchGen::Make(plan.get(), 1));
 
   ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/tools/query-tester/builtin_queries.h b/cpp/tools/query-tester/builtin_queries.h
index 450644a9575..1584d00d5a6 100644
--- a/cpp/tools/query-tester/builtin_queries.h
+++ b/cpp/tools/query-tester/builtin_queries.h
@@ -11,7 +11,7 @@ namespace arrow {
 namespace qtest {
 
 using QueryPlanFactory = std::function<Result<std::shared_ptr<compute::ExecPlan>>(
-    std::shared_ptr<compute::SinkNodeConsumer>)>;
+    std::shared_ptr<compute::SinkNodeConsumer>, compute::ExecContext*)>;
 
 const std::unordered_map<std::string, QueryPlanFactory>& GetBuiltinQueries();
 
diff --git a/cpp/tools/query-tester/test_runner.cc b/cpp/tools/query-tester/test_runner.cc
index c0475dce9a0..ad64e32fa46 100644
--- a/cpp/tools/query-tester/test_runner.cc
+++ b/cpp/tools/query-tester/test_runner.cc
@@ -173,14 +173,16 @@ Result<util::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromFiles(
 }
 
 Result<util::optional<std::shared_ptr<compute::ExecPlan>>> LoadQueryFromBuiltin(
-    const std::string& query_name, const engine::ConsumerFactory& consumer_factory) {
+    const std::string& query_name, const engine::ConsumerFactory& consumer_factory,
+    cp::ExecContext* exec_context) {
   const auto& builtin_queries_map = GetBuiltinQueries();
   const auto& query = builtin_queries_map.find(query_name);
   if (query == builtin_queries_map.end()) {
     return util::nullopt;
   }
   std::shared_ptr<cp::SinkNodeConsumer> consumer = consumer_factory();
-  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan, query->second(consumer));
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<cp::ExecPlan> plan,
+                        query->second(consumer, exec_context));
   return plan;
 }
 
@@ -199,14 +201,15 @@ Status InitializeArrow(const QueryTestOptions& options) {
 
 Result<std::shared_ptr<compute::ExecPlan>> LoadQuery(
     const std::string& root_path, const std::string& query_name,
-    const engine::ConsumerFactory& consumer_factory) {
+    const engine::ConsumerFactory& consumer_factory, cp::ExecContext* exec_context) {
   ARROW_ASSIGN_OR_RAISE(util::optional<std::shared_ptr<compute::ExecPlan>> maybe_query,
                         LoadQueryFromFiles(root_path, query_name, consumer_factory));
   if (maybe_query) {
     return *maybe_query;
   }
 
-  ARROW_ASSIGN_OR_RAISE(maybe_query, LoadQueryFromBuiltin(query_name, consumer_factory));
+  ARROW_ASSIGN_OR_RAISE(maybe_query,
+                        LoadQueryFromBuiltin(query_name, consumer_factory, exec_context));
   if (maybe_query) {
     return *maybe_query;
   }
@@ -219,13 +222,15 @@ Result<QueryTestResult> RunQueryTest(const QueryTestOptions& options) {
   ARROW_ASSIGN_OR_RAISE(auto root_path, GetRootDirectory(options.executable_path));
   ARROW_RETURN_NOT_OK(ValidateOptions(options));
   ARROW_RETURN_NOT_OK(InitializeArrow(options));
+  cp::ExecContext exec_context(default_memory_pool(), internal::GetCpuThreadPool());
   QueryTestResult result;
   auto consumer = std::make_shared<QueryResultUpdatingConsumer>(&result);
   auto consumer_factory = [consumer] { return consumer; };
   for (int i = 0; i < options.num_iterations; i++) {
     consumer->Start(i);
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<compute::ExecPlan> plan,
-                          LoadQuery(root_path, options.query_name, consumer_factory));
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<compute::ExecPlan> plan,
+        LoadQuery(root_path, options.query_name, consumer_factory, &exec_context));
     ARROW_RETURN_NOT_OK(plan->StartProducing());
     ARROW_RETURN_NOT_OK(plan->finished().status());
   }
diff --git a/cpp/tools/query-tester/test_runner.h b/cpp/tools/query-tester/test_runner.h
index d90b5f75b5f..8eb4789986f 100644
--- a/cpp/tools/query-tester/test_runner.h
+++ b/cpp/tools/query-tester/test_runner.h
@@ -100,7 +100,7 @@ struct QueryTestResult {
 /// .substrait.pb - Loads a Substrait plan using the binary protobuf format
 Result<std::shared_ptr<compute::ExecPlan>> LoadQuery(
     const std::string& root_path, const std::string& query_name,
-    const engine::ConsumerFactory& consumer_factory);
+    const engine::ConsumerFactory& consumer_factory, compute::ExecContext* exec_context);
 /// Validate the options (will be run automatically by RunQueryTest)
 Status ValidateOptions(const QueryTestOptions& options);
 /// Run a query test.