diff --git a/CMakeLists.txt b/CMakeLists.txt index a8c5ecd0a49b..e7bb96d706bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,7 +160,7 @@ option(ENABLE_EXTENSION_AUTOLOADING "Enable extension auto-loading by default." option(ENABLE_EXTENSION_AUTOINSTALL "Enable extension auto-installing by default." FALSE) option(EXTENSION_TESTS_ONLY "Only load the tests for extensions, don't actually build them; useful for testing loadable extensions" FALSE) option(WASM_LOADABLE_EXTENSIONS "WebAssembly build with loadable extensions." FALSE) -option(ENABLE_SANITIZER "Enable address sanitizer." TRUE) +option(ENABLE_SANITIZER "Enable address sanitizer." FALSE) option(ENABLE_THREAD_SANITIZER "Enable thread sanitizer." FALSE) option(ENABLE_UBSAN "Enable undefined behavior sanitizer." TRUE) option(DISABLE_VPTR_SANITIZER "Disable vptr sanitizer; work-around for sanitizer false positive on Macbook M1" FALSE) @@ -1574,3 +1574,22 @@ if(EXISTS ${CMAKE_CONFIG_TEMPLATE} AND EXISTS ${CMAKE_CONFIG_VERSION_TEMPLATE}) "${PROJECT_BINARY_DIR}/DuckDBConfigVersion.cmake" DESTINATION "${INSTALL_CMAKE_DIR}") endif() +# TODO: This is the old way of Loading OOTEs can be removed after all OOTES in extensions.csv play ball with the new way +# build out-of-tree extensions on demand +if(NOT "${EXTERNAL_EXTENSION_DIRECTORIES}" STREQUAL "") + separate_arguments(EXTERNAL_EXTENSION_DIRECTORIES) + + foreach(EXTERNAL_EXTENSION_DIRECTORY IN LISTS EXTERNAL_EXTENSION_DIRECTORIES) + + # the build path seems to get ignored on windows in just the right way. no idea why. + get_filename_component(EXTERNAL_EXTENSION_NAME ${EXTERNAL_EXTENSION_DIRECTORY} NAME) + add_subdirectory(${EXTERNAL_EXTENSION_DIRECTORY} "extension/${EXTERNAL_EXTENSION_NAME}") + endforeach() +endif() + +# Add a pixels example executable +add_subdirectory(examples/pixels-example) + +# Add a parquet example executable +add_subdirectory(examples/parquet-example) + diff --git a/benchmark/benchmark_runner.cpp b/benchmark/benchmark_runner.cpp index fe140e282d0c..5a4e97192da9 100644 --- a/benchmark/benchmark_runner.cpp +++ b/benchmark/benchmark_runner.cpp @@ -57,8 +57,6 @@ void BenchmarkRunner::InitializeBenchmarkDirectory() { atomic is_active; atomic timeout; -atomic summarize; -std::vector summary; void sleep_thread(Benchmark *benchmark, BenchmarkRunner *runner, BenchmarkState *state, bool hotrun, const optional_idx &optional_timeout) { @@ -87,7 +85,7 @@ void sleep_thread(Benchmark *benchmark, BenchmarkRunner *runner, BenchmarkState if (!hotrun) { runner->Log(StringUtil::Format("%s\t%d\t", benchmark->name, 0)); } - runner->LogResult("Benchmark timeout reached; Interrupt failed. Benchmark killed by benchmark runner"); + runner->LogResult("KILLED"); exit(1); } } @@ -118,100 +116,53 @@ void BenchmarkRunner::LogOutput(string message) { } } -void BenchmarkRunner::LogSummary(string benchmark, string message, size_t i) { - string log_result_line = StringUtil::Format("%s\t%d\t", benchmark, i) + "\tINCORRECT\n"; - string failure_message = benchmark + "\nname\trun\ttiming\n" + log_result_line + message; - summary.push_back(failure_message); -} - void BenchmarkRunner::RunBenchmark(Benchmark *benchmark) { Profiler profiler; auto display_name = benchmark->DisplayName(); - duckdb::unique_ptr state; - try { - state = benchmark->Initialize(configuration); - benchmark->Assert(state.get()); - } catch (std::exception &ex) { - Log(StringUtil::Format("%s\t1\t", benchmark->name)); - LogResult("ERROR"); - duckdb::ErrorData error_data(ex); - LogLine(error_data.Message()); - return; - } - // auto nruns = benchmark->NRuns(); - auto nruns=1; + auto state = benchmark->Initialize(configuration); + auto nruns = benchmark->NRuns(); LogLine("NRuns: "+std::to_string(nruns)+"\n"); - string error; + for (size_t i = 0; i < nruns; i++) { + bool hotrun = i >= 0; + if (hotrun) { + Log(StringUtil::Format("%s\t%d\t", benchmark->name, i)); + } + if (hotrun && benchmark->RequireReinit()) { + state = benchmark->Initialize(configuration); + } + is_active = true; + timeout = false; + std::thread interrupt_thread(sleep_thread, benchmark, this, state.get(), hotrun, + benchmark->Timeout(configuration)); - try { profiler.Start(); benchmark->Run(state.get()); profiler.End(); - } catch (std::exception &ex) { - duckdb::ErrorData error_data(ex); - error = error_data.Message(); - } - auto verify = benchmark->Verify(state.get()); - if (!verify.empty()) { - LogResult("INCORRECT"); - LogLine("INCORRECT RESULT: " + verify); - LogOutput("INCORRECT RESULT: " + verify); - LogSummary(benchmark->name, "INCORRECT RESULT: " + verify, 0); - // break; - } else { - LogResult("Result: "+std::to_string(profiler.Elapsed())); + + is_active = false; + interrupt_thread.join(); + if (hotrun) { + LogOutput(benchmark->GetLogOutput(state.get())); + if (timeout) { + // write timeout + LogResult("TIMEOUT"); + break; + } else { + // write time + //auto verify = benchmark->Verify(state.get()); + //if (!verify.empty()) { + // LogResult("INCORRECT"); + // LogLine("INCORRECT RESULT: " + verify); + // LogOutput("INCORRECT RESULT: " + verify); + // break; + //} else { + LogResult("\nResult: "+std::to_string(profiler.Elapsed())); + //} + } + } + benchmark->Cleanup(state.get()); } - // for (size_t i = 0; i < nruns; i++) { - // bool hotrun = i >= 0; - // if (hotrun) { - // Log(StringUtil::Format("%s\t%d\t", benchmark->name, i)); - // } - // if (hotrun && benchmark->RequireReinit()) { - // state = benchmark->Initialize(configuration); - // } - // is_active = true; - // timeout = false; - // std::thread interrupt_thread(sleep_thread, benchmark, this, state.get(), hotrun, - // benchmark->Timeout(configuration)); - // - // string error; - // try { - // profiler.Start(); - // benchmark->Run(state.get()); - // profiler.End(); - // } catch (std::exception &ex) { - // duckdb::ErrorData error_data(ex); - // error = error_data.Message(); - // } - // - // is_active = false; - // interrupt_thread.join(); - // if (hotrun) { - // LogOutput(benchmark->GetLogOutput(state.get())); - // if (!error.empty()) { - // LogResult("ERROR"); - // LogLine(error); - // break; - // } else if (timeout) { - // LogResult("TIMEOUT"); - // break; - // } else { - // // write time - // auto verify = benchmark->Verify(state.get()); - // if (!verify.empty()) { - // LogResult("INCORRECT"); - // LogLine("INCORRECT RESULT: " + verify); - // LogOutput("INCORRECT RESULT: " + verify); - // LogSummary(benchmark->name, "INCORRECT RESULT: " + verify, i); - // break; - // } else { - // LogResult("Result: "+std::to_string(profiler.Elapsed())); - // } - // } - // } - // benchmark->Cleanup(state.get()); - // } benchmark->Finalize(); } @@ -282,8 +233,6 @@ void parse_arguments(const int arg_counter, char const *const *arg_values) { auto &instance = BenchmarkRunner::GetInstance(); auto &benchmarks = instance.benchmarks; for (int arg_index = 1; arg_index < arg_counter; ++arg_index) { - // make it summarize failures by default - summarize = true; string arg = arg_values[arg_index]; if (arg == "--list") { // list names of all benchmarks @@ -324,19 +273,6 @@ void parse_arguments(const int arg_counter, char const *const *arg_values) { fprintf(stderr, "Could not open file %s for writing\n", splits[1].c_str()); exit(1); } - } else if (arg == "--no-summary") { - summarize = false; - } else if (StringUtil::StartsWith(arg, "--")) { - // custom argument - auto arg_name = arg.substr(2); - if (arg_index + 1 >= arg_counter) { - fprintf(stderr, "Benchmark argument %s requires an argument\n", arg_name.c_str()); - print_help(); - exit(1); - } - arg_index++; - auto arg_value = arg_values[arg_index]; - instance.custom_arguments.emplace(std::move(arg_name), std::move(arg_value)); } else { if (!instance.configuration.name_pattern.empty()) { fprintf(stderr, "Only one benchmark can be specified.\n"); @@ -431,17 +367,6 @@ int main(int argc, char **argv) { LoadInterpretedBenchmarks(*fs); parse_arguments(argc, argv); const auto configuration_error = run_benchmarks(); - - if (!summary.empty() && summarize) { - std::cout << "\n====================================================" << std::endl; - std::cout << "================ FAILURES SUMMARY ================" << std::endl; - std::cout << "====================================================\n" << std::endl; - for (size_t i = 0; i < summary.size(); i++) { - std::cout << i + 1 << ": " << summary[i] << std::endl; - std::cout << "----------------------------------------------------" << std::endl; - } - } - if (configuration_error != ConfigurationError::None) { print_error_message(configuration_error); exit(1); diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q01.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q01.benchmark new file mode 100644 index 000000000000..a6db07bdbd63 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q01.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q02.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q02.benchmark new file mode 100644 index 000000000000..711291db6c3d --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q02.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q03.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q03.benchmark new file mode 100644 index 000000000000..f7676a3e054b --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q03.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q04.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q04.benchmark new file mode 100644 index 000000000000..8d349361b976 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q04.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q05.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q05.benchmark new file mode 100644 index 000000000000..98dacafefe89 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q05.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q06.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q06.benchmark new file mode 100644 index 000000000000..53c8aac481a9 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q06.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q07.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q07.benchmark new file mode 100644 index 000000000000..e1c241e65376 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q07.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q08.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q08.benchmark new file mode 100644 index 000000000000..50b1d5148fe7 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q08.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q09.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q09.benchmark new file mode 100644 index 000000000000..10a501af0300 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q09.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q10.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q10.benchmark new file mode 100644 index 000000000000..761ef4154c09 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q10.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q11.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q11.benchmark new file mode 100644 index 000000000000..6a85c6ca8d32 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q11.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q12.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q12.benchmark new file mode 100644 index 000000000000..d4353cdbe35d --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q12.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q13.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q13.benchmark new file mode 100644 index 000000000000..8dae9a09c8d5 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q13.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q14.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q14.benchmark new file mode 100644 index 000000000000..89403b67f1f0 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q14.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q15.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q15.benchmark new file mode 100644 index 000000000000..f960792bd44a --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q15.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q16.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q16.benchmark new file mode 100644 index 000000000000..57677ea1b4de --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q16.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q17.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q17.benchmark new file mode 100644 index 000000000000..f9b5abd97b51 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q17.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q18.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q18.benchmark new file mode 100644 index 000000000000..097feeb8ba86 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q18.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q19.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q19.benchmark new file mode 100644 index 000000000000..4379986ef889 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q19.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q20.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q20.benchmark new file mode 100644 index 000000000000..233e920828b2 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q20.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q21.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q21.benchmark new file mode 100644 index 000000000000..d53e0e7ec2ba --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q21.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q22.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q22.benchmark new file mode 100644 index 000000000000..1c9096fe67e7 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q22.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q23.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q23.benchmark new file mode 100644 index 000000000000..00fff1f125f7 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q23.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q24.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q24.benchmark new file mode 100644 index 000000000000..76e82788ec6c --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q24.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q25.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q25.benchmark new file mode 100644 index 000000000000..f01177fb4348 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q25.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q26.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q26.benchmark new file mode 100644 index 000000000000..5d57b627668a --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q26.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q27.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q27.benchmark new file mode 100644 index 000000000000..f0d3fa93d27e --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q27.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q28.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q28.benchmark new file mode 100644 index 000000000000..b8880bbbd143 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q28.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q29.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q29.benchmark new file mode 100644 index 000000000000..57358849f5a1 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q29.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q30.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q30.benchmark new file mode 100644 index 000000000000..9b1ccdd809c7 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q30.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q31.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q31.benchmark new file mode 100644 index 000000000000..cf885452eb10 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q31.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q32.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q32.benchmark new file mode 100644 index 000000000000..3bd257ef2c50 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q32.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q33.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q33.benchmark new file mode 100644 index 000000000000..a07e41542260 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q33.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q34.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q34.benchmark new file mode 100644 index 000000000000..2b4108fb85ba --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q34.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q35.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q35.benchmark new file mode 100644 index 000000000000..771b1db172ca --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q35.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q36.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q36.benchmark new file mode 100644 index 000000000000..c1d3815208eb --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q36.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q37.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q37.benchmark new file mode 100644 index 000000000000..7137d4752b93 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q37.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q38.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q38.benchmark new file mode 100644 index 000000000000..8727073cfb21 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q38.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q39.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q39.benchmark new file mode 100644 index 000000000000..80052705f180 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q39.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q40.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q40.benchmark new file mode 100644 index 000000000000..1f80206cbaf4 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q40.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q41.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q41.benchmark new file mode 100644 index 000000000000..b150e99fdbf9 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q41.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q42.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q42.benchmark new file mode 100644 index 000000000000..fc20f17040bd --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q42.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q43.benchmark b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q43.benchmark new file mode 100644 index 000000000000..eff271803c52 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd-withoutoptimizer/q43.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark-withoutoptimizer.in b/benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark-withoutoptimizer.in new file mode 100644 index 000000000000..dca9458363fd --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark-withoutoptimizer.in @@ -0,0 +1,15 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [clickbench] + +require pixels + +name Q${QUERY_NUMBER_PADDED} +group Clickbench + +load +CREATE VIEW hits AS SELECT * FROM parquet_scan(["/data/9a3-01/clickbench/parquet-e0/hits/*"]); + +run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql + +# result benchmark/clickbench/answers/q${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in b/benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in index dca9458363fd..4c5d4745e483 100644 --- a/benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-parquet-e0-1ssd.benchmark.in @@ -8,7 +8,7 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM parquet_scan(["/data/9a3-01/clickbench/parquet-e0/hits/*"]); +CREATE VIEW hits AS SELECT * FROM parquet_scan(["/nvme1/liyu/parquet-data/clickbench-e0/hits/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in new file mode 100644 index 000000000000..2fd2a149efdf --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in @@ -0,0 +1,43 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [clickbench] + +require pixels + +name Q${QUERY_NUMBER_PADDED} +group Clickbench + +load +CREATE VIEW hits AS SELECT * FROM parquet_scan([ + "/data/9a3-01/clickbench/parquet-e0/hits/*", + "/data/9a3-02/clickbench/parquet-e0/hits/*", + "/data/9a3-03/clickbench/parquet-e0/hits/*", + "/data/9a3-04/clickbench/parquet-e0/hits/*", + "/data/9a3-05/clickbench/parquet-e0/hits/*", + "/data/9a3-06/clickbench/parquet-e0/hits/*", + "/data/9a3-07/clickbench/parquet-e0/hits/*", + "/data/9a3-08/clickbench/parquet-e0/hits/*", + "/data/9a3-09/clickbench/parquet-e0/hits/*", + "/data/9a3-10/clickbench/parquet-e0/hits/*", + "/data/9a3-11/clickbench/parquet-e0/hits/*", + "/data/9a3-12/clickbench/parquet-e0/hits/*", + "/data/9a3-13/clickbench/parquet-e0/hits/*", + "/data/9a3-14/clickbench/parquet-e0/hits/*", + "/data/9a3-15/clickbench/parquet-e0/hits/*", + "/data/9a3-16/clickbench/parquet-e0/hits/*", + "/data/9a3-17/clickbench/parquet-e0/hits/*", + "/data/9a3-18/clickbench/parquet-e0/hits/*", + "/data/9a3-19/clickbench/parquet-e0/hits/*", + "/data/9a3-20/clickbench/parquet-e0/hits/*", + "/data/9a3-21/clickbench/parquet-e0/hits/*", + "/data/9a3-22/clickbench/parquet-e0/hits/*", + "/data/9a3-23/clickbench/parquet-e0/hits/*", + "/data/9a3-24/clickbench/parquet-e0/hits/*" + ] +) +load +PRAGMA disable_optimizer + +run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql + +# result benchmark/clickbench/answers-24ssd/q${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q01.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q01.benchmark new file mode 100644 index 000000000000..fb37687d696e --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q01.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q02.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q02.benchmark new file mode 100644 index 000000000000..9d37319eeb97 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q02.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q03.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q03.benchmark new file mode 100644 index 000000000000..068ee719c0f1 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q03.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q04.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q04.benchmark new file mode 100644 index 000000000000..d0598d2bf115 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q04.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q05.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q05.benchmark new file mode 100644 index 000000000000..76b9fe39ae87 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q05.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q06.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q06.benchmark new file mode 100644 index 000000000000..f7c232999cfa --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q06.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q07.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q07.benchmark new file mode 100644 index 000000000000..dec4a34c497a --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q07.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q08.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q08.benchmark new file mode 100644 index 000000000000..2385478a1656 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q08.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q09.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q09.benchmark new file mode 100644 index 000000000000..061b8e50a9cd --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q09.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q10.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q10.benchmark new file mode 100644 index 000000000000..a9ef13fc8f1e --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q10.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q11.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q11.benchmark new file mode 100644 index 000000000000..e405795fab0b --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q11.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q12.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q12.benchmark new file mode 100644 index 000000000000..1af45f563e1a --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q12.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q13.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q13.benchmark new file mode 100644 index 000000000000..f8503ccc1ea6 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q13.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q14.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q14.benchmark new file mode 100644 index 000000000000..e541c19bf753 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q14.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q15.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q15.benchmark new file mode 100644 index 000000000000..aa3e2371ae05 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q15.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q16.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q16.benchmark new file mode 100644 index 000000000000..9a4fe39035cd --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q16.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q17.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q17.benchmark new file mode 100644 index 000000000000..5bcbc3cf8d8e --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q17.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q18.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q18.benchmark new file mode 100644 index 000000000000..ede54f432f98 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q18.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q19.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q19.benchmark new file mode 100644 index 000000000000..6b25cf815cf8 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q19.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q20.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q20.benchmark new file mode 100644 index 000000000000..7e412b8105e6 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q20.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q21.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q21.benchmark new file mode 100644 index 000000000000..c3a1ca2d0c86 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q21.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q22.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q22.benchmark new file mode 100644 index 000000000000..d9a5aa899061 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q22.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q23.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q23.benchmark new file mode 100644 index 000000000000..af5c55f0f32b --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q23.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q24.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q24.benchmark new file mode 100644 index 000000000000..de6af07739c4 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q24.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q25.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q25.benchmark new file mode 100644 index 000000000000..156b30138f53 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q25.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q26.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q26.benchmark new file mode 100644 index 000000000000..36a3f3e5820b --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q26.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q27.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q27.benchmark new file mode 100644 index 000000000000..9f3631d0ce93 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q27.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q28.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q28.benchmark new file mode 100644 index 000000000000..f75413e74766 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q28.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q29.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q29.benchmark new file mode 100644 index 000000000000..4f562ed16c6f --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q29.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q30.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q30.benchmark new file mode 100644 index 000000000000..35b3f6981042 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q30.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q31.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q31.benchmark new file mode 100644 index 000000000000..6b84ae64df28 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q31.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q32.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q32.benchmark new file mode 100644 index 000000000000..c2f1db181d03 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q32.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q33.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q33.benchmark new file mode 100644 index 000000000000..1faace8cb2f9 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q33.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q34.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q34.benchmark new file mode 100644 index 000000000000..c05208a0ede4 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q34.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q35.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q35.benchmark new file mode 100644 index 000000000000..6658d794962e --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q35.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q36.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q36.benchmark new file mode 100644 index 000000000000..931cf2bffef9 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q36.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q37.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q37.benchmark new file mode 100644 index 000000000000..40d04d0d87ef --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q37.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q38.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q38.benchmark new file mode 100644 index 000000000000..7c1ac33469ef --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q38.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q39.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q39.benchmark new file mode 100644 index 000000000000..728e0ab8ee4c --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q39.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q40.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q40.benchmark new file mode 100644 index 000000000000..1830e4914cd8 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q40.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q41.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q41.benchmark new file mode 100644 index 000000000000..80d3445fcade --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q41.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q42.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q42.benchmark new file mode 100644 index 000000000000..45f52dd87af9 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q42.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q43.benchmark b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q43.benchmark new file mode 100644 index 000000000000..be6be4e9aad2 --- /dev/null +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer/q43.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-parquet-e0-24ssd-withoutoptimizer.benchmark.in +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/clickbench/clickbench-parquet-e0-24ssd.benchmark.in b/benchmark/clickbench/clickbench-parquet-e0-24ssd.benchmark.in index dc11269a9b18..ecaa9f9fa357 100644 --- a/benchmark/clickbench/clickbench-parquet-e0-24ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-parquet-e0-24ssd.benchmark.in @@ -8,34 +8,8 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM parquet_scan([ - "/data/9a3-01/clickbench/parquet-e0/hits/*", - "/data/9a3-02/clickbench/parquet-e0/hits/*", - "/data/9a3-03/clickbench/parquet-e0/hits/*", - "/data/9a3-04/clickbench/parquet-e0/hits/*", - "/data/9a3-05/clickbench/parquet-e0/hits/*", - "/data/9a3-06/clickbench/parquet-e0/hits/*", - "/data/9a3-07/clickbench/parquet-e0/hits/*", - "/data/9a3-08/clickbench/parquet-e0/hits/*", - "/data/9a3-09/clickbench/parquet-e0/hits/*", - "/data/9a3-10/clickbench/parquet-e0/hits/*", - "/data/9a3-11/clickbench/parquet-e0/hits/*", - "/data/9a3-12/clickbench/parquet-e0/hits/*", - "/data/9a3-13/clickbench/parquet-e0/hits/*", - "/data/9a3-14/clickbench/parquet-e0/hits/*", - "/data/9a3-15/clickbench/parquet-e0/hits/*", - "/data/9a3-16/clickbench/parquet-e0/hits/*", - "/data/9a3-17/clickbench/parquet-e0/hits/*", - "/data/9a3-18/clickbench/parquet-e0/hits/*", - "/data/9a3-19/clickbench/parquet-e0/hits/*", - "/data/9a3-20/clickbench/parquet-e0/hits/*", - "/data/9a3-21/clickbench/parquet-e0/hits/*", - "/data/9a3-22/clickbench/parquet-e0/hits/*", - "/data/9a3-23/clickbench/parquet-e0/hits/*", - "/data/9a3-24/clickbench/parquet-e0/hits/*" - ] -) +CREATE VIEW hits AS SELECT * FROM parquet_scan(["/nvme1/liyu/parquet-data/clickbench-e0/hits/*","/nvme2/liyu/parquet-data/clickbench-e0/hits/*","/nvme3/liyu/parquet-data/clickbench-e0/hits/*","/nvme4/liyu/parquet-data/clickbench-e0/hits/*","/nvme5/liyu/parquet-data/clickbench-e0/hits/*","/nvme6/liyu/parquet-data/clickbench-e0/hits/*","/nvme7/liyu/parquet-data/clickbench-e0/hits/*","/nvme9/liyu/parquet-data/clickbench-e0/hits/*","/nvme10/liyu/parquet-data/clickbench-e0/hits/*","/nvme11/liyu/parquet-data/clickbench-e0/hits/*","/nvme14/liyu/parquet-data/clickbench-e0/hits/*","/nvme15/liyu/parquet-data/clickbench-e0/hits/*","/nvme16/liyu/parquet-data/clickbench-e0/hits/*","/nvme17/liyu/parquet-data/clickbench-e0/hits/*","/nvme18/liyu/parquet-data/clickbench-e0/hits/*","/nvme19/liyu/parquet-data/clickbench-e0/hits/*","/nvme20/liyu/parquet-data/clickbench-e0/hits/*","/nvme21/liyu/parquet-data/clickbench-e0/hits/*","/nvme22/liyu/parquet-data/clickbench-e0/hits/*","/nvme23/liyu/parquet-data/clickbench-e0/hits/*","/nvme24/liyu/parquet-data/clickbench-e0/hits/*","/nvme25/liyu/parquet-data/clickbench-e0/hits/*","/nvme26/liyu/parquet-data/clickbench-e0/hits/*","/nvme27/liyu/parquet-data/clickbench-e0/hits/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql -# result benchmark/clickbench/answers-24ssd/q${QUERY_NUMBER_PADDED}.csv +# result benchmark/clickbench/answers/q${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/clickbench/clickbench-parquet-e2-1ssd.benchmark.in b/benchmark/clickbench/clickbench-parquet-e2-1ssd.benchmark.in index ef5aa705a6fa..4c5d4745e483 100644 --- a/benchmark/clickbench/clickbench-parquet-e2-1ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-parquet-e2-1ssd.benchmark.in @@ -8,7 +8,7 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM parquet_scan(["/data/9a3-01/clickbench/parquet-e2/hits/*"]); +CREATE VIEW hits AS SELECT * FROM parquet_scan(["/nvme1/liyu/parquet-data/clickbench-e0/hits/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql diff --git a/benchmark/clickbench/clickbench-parquet-e2-24ssd.benchmark.in b/benchmark/clickbench/clickbench-parquet-e2-24ssd.benchmark.in index 6d0382730784..7d5a42bf707d 100644 --- a/benchmark/clickbench/clickbench-parquet-e2-24ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-parquet-e2-24ssd.benchmark.in @@ -8,33 +8,7 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM parquet_scan([ - "/data/9a3-01/clickbench/parquet-e2/hits/*", - "/data/9a3-02/clickbench/parquet-e2/hits/*", - "/data/9a3-03/clickbench/parquet-e2/hits/*", - "/data/9a3-04/clickbench/parquet-e2/hits/*", - "/data/9a3-05/clickbench/parquet-e2/hits/*", - "/data/9a3-06/clickbench/parquet-e2/hits/*", - "/data/9a3-07/clickbench/parquet-e2/hits/*", - "/data/9a3-08/clickbench/parquet-e2/hits/*", - "/data/9a3-09/clickbench/parquet-e2/hits/*", - "/data/9a3-10/clickbench/parquet-e2/hits/*", - "/data/9a3-11/clickbench/parquet-e2/hits/*", - "/data/9a3-12/clickbench/parquet-e2/hits/*", - "/data/9a3-13/clickbench/parquet-e2/hits/*", - "/data/9a3-14/clickbench/parquet-e2/hits/*", - "/data/9a3-15/clickbench/parquet-e2/hits/*", - "/data/9a3-16/clickbench/parquet-e2/hits/*", - "/data/9a3-17/clickbench/parquet-e2/hits/*", - "/data/9a3-18/clickbench/parquet-e2/hits/*", - "/data/9a3-19/clickbench/parquet-e2/hits/*", - "/data/9a3-20/clickbench/parquet-e2/hits/*", - "/data/9a3-21/clickbench/parquet-e2/hits/*", - "/data/9a3-22/clickbench/parquet-e2/hits/*", - "/data/9a3-23/clickbench/parquet-e2/hits/*", - "/data/9a3-24/clickbench/parquet-e2/hits/*" - ] -) +CREATE VIEW hits AS SELECT * FROM parquet_scan(["/nvme1/liyu/parquet-data/clickbench-e2/hits/*","/nvme2/liyu/parquet-data/clickbench-e2/hits/*","/nvme3/liyu/parquet-data/clickbench-e2/hits/*","/nvme4/liyu/parquet-data/clickbench-e2/hits/*","/nvme5/liyu/parquet-data/clickbench-e2/hits/*","/nvme6/liyu/parquet-data/clickbench-e2/hits/*","/nvme7/liyu/parquet-data/clickbench-e2/hits/*","/nvme9/liyu/parquet-data/clickbench-e2/hits/*","/nvme10/liyu/parquet-data/clickbench-e2/hits/*","/nvme11/liyu/parquet-data/clickbench-e2/hits/*","/nvme14/liyu/parquet-data/clickbench-e2/hits/*","/nvme15/liyu/parquet-data/clickbench-e2/hits/*","/nvme16/liyu/parquet-data/clickbench-e2/hits/*","/nvme17/liyu/parquet-data/clickbench-e2/hits/*","/nvme18/liyu/parquet-data/clickbench-e2/hits/*","/nvme19/liyu/parquet-data/clickbench-e2/hits/*","/nvme20/liyu/parquet-data/clickbench-e2/hits/*","/nvme21/liyu/parquet-data/clickbench-e2/hits/*","/nvme22/liyu/parquet-data/clickbench-e2/hits/*","/nvme23/liyu/parquet-data/clickbench-e2/hits/*","/nvme24/liyu/parquet-data/clickbench-e2/hits/*","/nvme25/liyu/parquet-data/clickbench-e2/hits/*","/nvme26/liyu/parquet-data/clickbench-e2/hits/*","/nvme27/liyu/parquet-data/clickbench-e2/hits/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool.benchmark.in b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool.benchmark.in new file mode 100644 index 000000000000..79b19474dfd7 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool.benchmark.in @@ -0,0 +1,15 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [clickbench] + +require pixels + +name Q${QUERY_NUMBER_PADDED} +group Clickbench + +load +CREATE VIEW hits AS SELECT * FROM pixels_scan(["/data/9a3-01/clickbench/pixels-e0/hits/v-0-ordered/*"]); + +run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql + +# result benchmark/clickbench/answers/q${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q01.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q01.benchmark new file mode 100644 index 000000000000..ad994383db61 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q01.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q02.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q02.benchmark new file mode 100644 index 000000000000..0062059ff2f1 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q02.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q03.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q03.benchmark new file mode 100644 index 000000000000..3fe5cd61494e --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q03.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q04.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q04.benchmark new file mode 100644 index 000000000000..e420a4ff48a1 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q04.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q05.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q05.benchmark new file mode 100644 index 000000000000..803c71003490 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q05.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q06.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q06.benchmark new file mode 100644 index 000000000000..87bad5aa7941 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q06.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q07.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q07.benchmark new file mode 100644 index 000000000000..20a5697f99cd --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q07.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q08.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q08.benchmark new file mode 100644 index 000000000000..11749be12974 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q08.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q09.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q09.benchmark new file mode 100644 index 000000000000..f58e906396ad --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q09.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q10.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q10.benchmark new file mode 100644 index 000000000000..d8afef9957af --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q10.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q11.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q11.benchmark new file mode 100644 index 000000000000..8327b0bb9fbb --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q11.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q12.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q12.benchmark new file mode 100644 index 000000000000..bff44a9ac911 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q12.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q13.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q13.benchmark new file mode 100644 index 000000000000..b887a6a4bfd6 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q13.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q14.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q14.benchmark new file mode 100644 index 000000000000..5b840db523f4 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q14.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q15.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q15.benchmark new file mode 100644 index 000000000000..e919d70b87d0 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q15.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q16.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q16.benchmark new file mode 100644 index 000000000000..8e8a00033b1d --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q16.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q17.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q17.benchmark new file mode 100644 index 000000000000..7fbb956ac465 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q17.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q18.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q18.benchmark new file mode 100644 index 000000000000..a1d600c4baa5 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q18.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q19.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q19.benchmark new file mode 100644 index 000000000000..17ecd5fd00aa --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q19.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q20.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q20.benchmark new file mode 100644 index 000000000000..10dbc81bd3ea --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q20.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q21.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q21.benchmark new file mode 100644 index 000000000000..4a2d1ca13867 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q21.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q22.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q22.benchmark new file mode 100644 index 000000000000..9b83b9700c70 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q22.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q23.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q23.benchmark new file mode 100644 index 000000000000..cb31390122af --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q23.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q24.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q24.benchmark new file mode 100644 index 000000000000..bde917968b8e --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q24.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q25.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q25.benchmark new file mode 100644 index 000000000000..45d5cccd147c --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q25.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q26.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q26.benchmark new file mode 100644 index 000000000000..83b804e4815e --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q26.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q27.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q27.benchmark new file mode 100644 index 000000000000..9ab4e660a356 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q27.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q28.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q28.benchmark new file mode 100644 index 000000000000..bd8a2d474a23 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q28.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q29.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q29.benchmark new file mode 100644 index 000000000000..8ebd18395ed1 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q29.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q30.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q30.benchmark new file mode 100644 index 000000000000..cd69e8e7317a --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q30.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q31.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q31.benchmark new file mode 100644 index 000000000000..f0eb4c134906 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q31.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q32.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q32.benchmark new file mode 100644 index 000000000000..47421688f3db --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q32.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q33.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q33.benchmark new file mode 100644 index 000000000000..34240a76ea02 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q33.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q34.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q34.benchmark new file mode 100644 index 000000000000..9b056d34936a --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q34.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q35.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q35.benchmark new file mode 100644 index 000000000000..de34eb43f00e --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q35.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q36.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q36.benchmark new file mode 100644 index 000000000000..7ac0b2d3e47b --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q36.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q37.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q37.benchmark new file mode 100644 index 000000000000..c1294d856b4a --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q37.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q38.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q38.benchmark new file mode 100644 index 000000000000..2befdd03c7db --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q38.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q39.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q39.benchmark new file mode 100644 index 000000000000..2cabc8a0d7d3 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q39.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q40.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q40.benchmark new file mode 100644 index 000000000000..679310572da9 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q40.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q41.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q41.benchmark new file mode 100644 index 000000000000..a5f7030ebea9 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q41.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q42.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q42.benchmark new file mode 100644 index 000000000000..0457648191e9 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q42.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q43.benchmark b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q43.benchmark new file mode 100644 index 000000000000..d69845f50615 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd-bufferpool/q43.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in b/benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in index 79b19474dfd7..86570ab334cd 100644 --- a/benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-pixels-e0-1ssd.benchmark.in @@ -8,7 +8,7 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM pixels_scan(["/data/9a3-01/clickbench/pixels-e0/hits/v-0-ordered/*"]); +CREATE VIEW hits AS SELECT * FROM pixels_scan(["/nvme1/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool.benchmark.in b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool.benchmark.in new file mode 100644 index 000000000000..0eee1255bb73 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool.benchmark.in @@ -0,0 +1,41 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [clickbench] + +require pixels + +name Q${QUERY_NUMBER_PADDED} +group Clickbench + +load +CREATE VIEW hits AS SELECT * FROM pixels_scan([ + "/data/9a3-01/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-02/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-03/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-04/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-05/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-06/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-07/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-08/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-09/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-10/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-11/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-12/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-13/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-14/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-15/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-16/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-17/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-18/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-19/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-20/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-21/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-22/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-23/clickbench/pixels-e0/hits/v-0-ordered/*", + "/data/9a3-24/clickbench/pixels-e0/hits/v-0-ordered/*" + ] +) + +run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql + +# result benchmark/clickbench/answers-24ssd/q${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q01.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q01.benchmark new file mode 100644 index 000000000000..2bfac4f64814 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q01.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q02.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q02.benchmark new file mode 100644 index 000000000000..e0050b8dce78 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q02.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q03.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q03.benchmark new file mode 100644 index 000000000000..945328f09737 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q03.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q04.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q04.benchmark new file mode 100644 index 000000000000..0bc656ef165c --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q04.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q05.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q05.benchmark new file mode 100644 index 000000000000..355a024e3755 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q05.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q06.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q06.benchmark new file mode 100644 index 000000000000..57dd303698e8 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q06.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q07.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q07.benchmark new file mode 100644 index 000000000000..eed799c5c14a --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q07.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q08.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q08.benchmark new file mode 100644 index 000000000000..5cd6f70b60cd --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q08.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q09.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q09.benchmark new file mode 100644 index 000000000000..91163d401c41 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q09.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q10.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q10.benchmark new file mode 100644 index 000000000000..223f1dc36887 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q10.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q11.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q11.benchmark new file mode 100644 index 000000000000..fc7b0749f2ce --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q11.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q12.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q12.benchmark new file mode 100644 index 000000000000..94539f3e3bf4 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q12.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q13.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q13.benchmark new file mode 100644 index 000000000000..9da8656a2b1e --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q13.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q14.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q14.benchmark new file mode 100644 index 000000000000..5e0d07b7bfdd --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q14.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q15.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q15.benchmark new file mode 100644 index 000000000000..1515f4b5a5bc --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q15.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q16.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q16.benchmark new file mode 100644 index 000000000000..acb3402dfd70 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q16.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q17.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q17.benchmark new file mode 100644 index 000000000000..7f1eef7b3fff --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q17.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q18.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q18.benchmark new file mode 100644 index 000000000000..37409bd0f0b0 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q18.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q19.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q19.benchmark new file mode 100644 index 000000000000..3790f11940cb --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q19.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q20.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q20.benchmark new file mode 100644 index 000000000000..c5ac5489dca5 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q20.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q21.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q21.benchmark new file mode 100644 index 000000000000..f3882d3ab9c9 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q21.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q22.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q22.benchmark new file mode 100644 index 000000000000..b5a94c4326a3 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q22.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q23.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q23.benchmark new file mode 100644 index 000000000000..ec49150274be --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q23.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q24.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q24.benchmark new file mode 100644 index 000000000000..49879a9e9e70 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q24.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q25.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q25.benchmark new file mode 100644 index 000000000000..580b7e430397 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q25.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q26.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q26.benchmark new file mode 100644 index 000000000000..cd264e273d50 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q26.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q27.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q27.benchmark new file mode 100644 index 000000000000..103dfa768211 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q27.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q28.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q28.benchmark new file mode 100644 index 000000000000..b0c2a5fd64da --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q28.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q29.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q29.benchmark new file mode 100644 index 000000000000..59a2d0ff56ff --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q29.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q30.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q30.benchmark new file mode 100644 index 000000000000..ffb26a715c99 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q30.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q31.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q31.benchmark new file mode 100644 index 000000000000..a64f2a9464fe --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q31.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q32.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q32.benchmark new file mode 100644 index 000000000000..25117b0afe92 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q32.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q33.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q33.benchmark new file mode 100644 index 000000000000..851b881b2592 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q33.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q34.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q34.benchmark new file mode 100644 index 000000000000..7d56d94d96c8 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q34.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q35.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q35.benchmark new file mode 100644 index 000000000000..1f4bd50c2146 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q35.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q36.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q36.benchmark new file mode 100644 index 000000000000..7092dabaf02a --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q36.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q37.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q37.benchmark new file mode 100644 index 000000000000..427d00bc444e --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q37.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q38.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q38.benchmark new file mode 100644 index 000000000000..8a110d7d2b7f --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q38.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q39.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q39.benchmark new file mode 100644 index 000000000000..db7fadbcac5d --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q39.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q40.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q40.benchmark new file mode 100644 index 000000000000..96051d942b91 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q40.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q41.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q41.benchmark new file mode 100644 index 000000000000..490d4cf7181f --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q41.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q42.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q42.benchmark new file mode 100644 index 000000000000..893dcaf78566 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q42.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q43.benchmark b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q43.benchmark new file mode 100644 index 000000000000..dac0c969c336 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd-bufferpool/q43.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in b/benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in index 0eee1255bb73..e67d0c99551c 100644 --- a/benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-pixels-e0-24ssd.benchmark.in @@ -8,34 +8,8 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM pixels_scan([ - "/data/9a3-01/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-02/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-03/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-04/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-05/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-06/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-07/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-08/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-09/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-10/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-11/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-12/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-13/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-14/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-15/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-16/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-17/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-18/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-19/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-20/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-21/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-22/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-23/clickbench/pixels-e0/hits/v-0-ordered/*", - "/data/9a3-24/clickbench/pixels-e0/hits/v-0-ordered/*" - ] -) +CREATE VIEW hits AS SELECT * FROM pixels_scan(["/nvme1/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme2/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme3/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme4/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme5/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme6/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme7/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme9/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme10/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme11/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme14/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme15/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme16/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme17/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme18/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme19/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme20/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme21/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme22/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme23/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme24/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme25/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme26/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*","/nvme27/liyu/pixels-data/clickbench-e0/hits/v-0-ordered/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql -# result benchmark/clickbench/answers-24ssd/q${QUERY_NUMBER_PADDED}.csv +# result benchmark/clickbench/answers/q${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/clickbench/clickbench-pixels-e1-1ssd.benchmark.in b/benchmark/clickbench/clickbench-pixels-e1-1ssd.benchmark.in index 5708d62e996d..e272c8895914 100644 --- a/benchmark/clickbench/clickbench-pixels-e1-1ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-pixels-e1-1ssd.benchmark.in @@ -8,7 +8,7 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM pixels_scan(["/data/9a3-01/clickbench/pixels-e1/hits/v-0-ordered/*"]); +CREATE VIEW hits AS SELECT * FROM pixels_scan(["/nvme1/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q01.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q01.benchmark new file mode 100644 index 000000000000..c189b9955a0c --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q01.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q02.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q02.benchmark new file mode 100644 index 000000000000..9c413f5793a1 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q02.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q03.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q03.benchmark new file mode 100644 index 000000000000..d69c88490738 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q03.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q04.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q04.benchmark new file mode 100644 index 000000000000..08293bbe3376 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q04.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q05.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q05.benchmark new file mode 100644 index 000000000000..8798fd0542f8 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q05.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q06.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q06.benchmark new file mode 100644 index 000000000000..8049dc907eda --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q06.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q07.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q07.benchmark new file mode 100644 index 000000000000..84f30101772f --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q07.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q08.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q08.benchmark new file mode 100644 index 000000000000..82ee1dbc75db --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q08.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q09.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q09.benchmark new file mode 100644 index 000000000000..919014bdb530 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q09.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q10.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q10.benchmark new file mode 100644 index 000000000000..cb1fc0b8c986 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q10.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q11.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q11.benchmark new file mode 100644 index 000000000000..25e0b7e9abde --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q11.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q12.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q12.benchmark new file mode 100644 index 000000000000..83cba3410f0d --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q12.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q13.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q13.benchmark new file mode 100644 index 000000000000..e645b2935312 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q13.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q14.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q14.benchmark new file mode 100644 index 000000000000..973329fde962 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q14.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q15.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q15.benchmark new file mode 100644 index 000000000000..686af333f90a --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q15.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q16.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q16.benchmark new file mode 100644 index 000000000000..ae886967b595 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q16.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q17.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q17.benchmark new file mode 100644 index 000000000000..eb1fdc0cea01 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q17.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q18.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q18.benchmark new file mode 100644 index 000000000000..f628b789aaf9 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q18.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q19.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q19.benchmark new file mode 100644 index 000000000000..ba4c70997312 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q19.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q20.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q20.benchmark new file mode 100644 index 000000000000..3970f032963b --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q20.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q21.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q21.benchmark new file mode 100644 index 000000000000..353af2e28945 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q21.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q22.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q22.benchmark new file mode 100644 index 000000000000..cb13d45599ba --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q22.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q23.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q23.benchmark new file mode 100644 index 000000000000..7bfa9d2fae83 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q23.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q24.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q24.benchmark new file mode 100644 index 000000000000..95edb0f484a0 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q24.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q25.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q25.benchmark new file mode 100644 index 000000000000..95768279c349 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q25.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q26.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q26.benchmark new file mode 100644 index 000000000000..3ffe9f0b3fa1 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q26.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q27.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q27.benchmark new file mode 100644 index 000000000000..7610417d2c10 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q27.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q28.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q28.benchmark new file mode 100644 index 000000000000..f0c3f635157e --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q28.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q29.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q29.benchmark new file mode 100644 index 000000000000..1701570d75a4 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q29.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q30.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q30.benchmark new file mode 100644 index 000000000000..37bb19d176e3 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q30.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q31.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q31.benchmark new file mode 100644 index 000000000000..a27ae7f4210d --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q31.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q32.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q32.benchmark new file mode 100644 index 000000000000..d704faa824ca --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q32.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q33.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q33.benchmark new file mode 100644 index 000000000000..eda8dea56360 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q33.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q34.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q34.benchmark new file mode 100644 index 000000000000..c8fc84ceb20d --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q34.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q35.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q35.benchmark new file mode 100644 index 000000000000..6a26a02ffa56 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q35.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q36.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q36.benchmark new file mode 100644 index 000000000000..9689ee5990f7 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q36.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q37.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q37.benchmark new file mode 100644 index 000000000000..566ee5cf0910 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q37.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q38.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q38.benchmark new file mode 100644 index 000000000000..d90d14452829 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q38.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q39.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q39.benchmark new file mode 100644 index 000000000000..a1205b9d0c72 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q39.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q40.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q40.benchmark new file mode 100644 index 000000000000..93db0c62a762 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q40.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q41.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q41.benchmark new file mode 100644 index 000000000000..b2321964e2d5 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q41.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q42.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q42.benchmark new file mode 100644 index 000000000000..f2e935cdaf37 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q42.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q43.benchmark b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q43.benchmark new file mode 100644 index 000000000000..72853c6d61e3 --- /dev/null +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd-bufferpool/q43.benchmark @@ -0,0 +1,2 @@ +template benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in b/benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in index 1301821eeff1..60470ca3187b 100644 --- a/benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in +++ b/benchmark/clickbench/clickbench-pixels-e1-24ssd.benchmark.in @@ -8,34 +8,7 @@ name Q${QUERY_NUMBER_PADDED} group Clickbench load -CREATE VIEW hits AS SELECT * FROM pixels_scan( -[ - "/data/9a3-01/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-02/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-03/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-04/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-05/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-06/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-07/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-08/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-09/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-10/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-11/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-12/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-13/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-14/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-15/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-16/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-17/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-18/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-19/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-20/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-21/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-22/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-23/clickbench/pixels-e1/hits/v-0-ordered/*", - "/data/9a3-24/clickbench/pixels-e1/hits/v-0-ordered/*" -] -) +CREATE VIEW hits AS SELECT * FROM pixels_scan(["/nvme1/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme2/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme3/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme4/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme5/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme6/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme7/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme9/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme10/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme11/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme14/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme15/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme16/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme17/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme18/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme19/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme20/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme21/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme22/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme23/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme24/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme25/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme26/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*","/nvme27/liyu/pixels-data/clickbench-e1/hits/v-0-ordered/*"]); run benchmark/clickbench/queries/q${QUERY_NUMBER_PADDED}.sql diff --git a/benchmark/clickbench/queries-1/load.sql b/benchmark/clickbench/queries-1/load.sql new file mode 100644 index 000000000000..5d0025ad33d7 --- /dev/null +++ b/benchmark/clickbench/queries-1/load.sql @@ -0,0 +1,110 @@ +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT, + Referer TEXT, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT, + Params TEXT, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT, + BrowserCountry TEXT, + SocialNetwork TEXT, + SocialAction TEXT, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT, + ParamCurrency TEXT, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT, + OpenstatCampaignID TEXT, + OpenstatAdID TEXT, + OpenstatSourceID TEXT, + UTMSource TEXT, + UTMMedium TEXT, + UTMCampaign TEXT, + UTMContent TEXT, + UTMTerm TEXT, + FromTag TEXT, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) +); +INSERT INTO hits SELECT * FROM read_parquet('https://github.com/duckdb/duckdb-data/releases/download/v1.0/hits.parquet'); diff --git a/benchmark/clickbench/queries-1/q00.sql b/benchmark/clickbench/queries-1/q00.sql new file mode 100644 index 000000000000..c70aa7a844d7 --- /dev/null +++ b/benchmark/clickbench/queries-1/q00.sql @@ -0,0 +1 @@ +SELECT COUNT(*) FROM hits; diff --git a/benchmark/clickbench/queries-1/q01.sql b/benchmark/clickbench/queries-1/q01.sql new file mode 100644 index 000000000000..c70aa7a844d7 --- /dev/null +++ b/benchmark/clickbench/queries-1/q01.sql @@ -0,0 +1 @@ +SELECT COUNT(*) FROM hits; diff --git a/benchmark/clickbench/queries-test/q01.sql b/benchmark/clickbench/queries-test/q01.sql index 200afa446255..5d2d9cd9787b 100644 --- a/benchmark/clickbench/queries-test/q01.sql +++ b/benchmark/clickbench/queries-test/q01.sql @@ -1 +1 @@ -SELECT * FROM hits; \ No newline at end of file +SELECT watchid FROM hits; \ No newline at end of file diff --git a/benchmark/clickbench/test.py b/benchmark/clickbench/test.py new file mode 100644 index 000000000000..b699a8b47779 --- /dev/null +++ b/benchmark/clickbench/test.py @@ -0,0 +1,59 @@ +import os +import shutil + +def add_pragma_to_sql_files(source_dir, dest_dir): + """ + 在所有SQL文件开头添加PRAGMA disable_optimizer; + 并保持目录结构复制到新目录 + + 参数: + source_dir: 源SQL文件目录 + dest_dir: 目标目录 + """ + # 确保源目录存在 + if not os.path.exists(source_dir): + print(f"错误: 源目录 '{source_dir}' 不存在") + return + + # 创建目标目录(如果不存在) + os.makedirs(dest_dir, exist_ok=True) + + # 遍历源目录中的所有文件和子目录 + for root, dirs, files in os.walk(source_dir): + # 为每个子目录在目标目录中创建对应的目录 + for dir_name in dirs: + source_subdir = os.path.join(root, dir_name) + relative_path = os.path.relpath(source_subdir, source_dir) + dest_subdir = os.path.join(dest_dir, relative_path) + os.makedirs(dest_subdir, exist_ok=True) + + # 处理每个SQL文件 + for file in files: + if file.endswith('.sql'): + # 构建源文件和目标文件的路径 + source_file = os.path.join(root, file) + relative_path = os.path.relpath(root, source_dir) + dest_file = os.path.join(dest_dir, relative_path, file) + + # 读取源文件内容 + with open(source_file, 'r', encoding='utf-8') as f: + content = f.read() + + # 在内容开头添加PRAGMA语句 + new_content = f"PRAGMA disable_optimizer;\n{content}" + + # 写入目标文件 + with open(dest_file, 'w', encoding='utf-8') as f: + f.write(new_content) + + print(f"已处理: {dest_file}") + + print(f"所有SQL文件处理完成,已保存至: {os.path.abspath(dest_dir)}") + +if __name__ == "__main__": + # 源目录和目标目录设置 + source_directory = "queries" + destination_directory = "queries-withoutoptimizer" + + # 执行处理 + add_pragma_to_sql_files(source_directory, destination_directory) diff --git a/benchmark/include/benchmark.hpp b/benchmark/include/benchmark.hpp index 1fe4b7df79f6..7780e6d55264 100644 --- a/benchmark/include/benchmark.hpp +++ b/benchmark/include/benchmark.hpp @@ -28,7 +28,8 @@ struct BenchmarkState { //! The base Benchmark class is a base class that is used to create and register //! new benchmarks class Benchmark { - constexpr static size_t DEFAULT_NRUNS = 5; + + constexpr static size_t DEFAULT_NRUNS = 1; Benchmark(Benchmark &) = delete; public: diff --git a/benchmark/include/benchmark_runner.hpp b/benchmark/include/benchmark_runner.hpp index 59457cac99fb..c275597ebd98 100644 --- a/benchmark/include/benchmark_runner.hpp +++ b/benchmark/include/benchmark_runner.hpp @@ -51,6 +51,7 @@ class BenchmarkRunner { ofstream out_file; ofstream log_file; uint32_t threads = MaxValue(std::thread::hardware_concurrency(), 1u); + uint32_t nRuns=1;// default unordered_map custom_arguments; }; diff --git a/benchmark/interpreted_benchmark.cpp b/benchmark/interpreted_benchmark.cpp index b16bd68a6024..7e25b8757a9e 100644 --- a/benchmark/interpreted_benchmark.cpp +++ b/benchmark/interpreted_benchmark.cpp @@ -8,9 +8,11 @@ #include "duckdb/main/extension_helper.hpp" #include "duckdb/main/query_profiler.hpp" #include "test_helpers.hpp" + #include "duckdb/common/helper.hpp" #include "duckdb/execution/operator/helper/physical_result_collector.hpp" #include "duckdb/common/arrow/physical_arrow_collector.hpp" +//#incoude #include #include @@ -43,32 +45,23 @@ struct InterpretedBenchmarkState : public BenchmarkState { Connection con; duckdb::unique_ptr result; - explicit InterpretedBenchmarkState(string path, const string &version) - : benchmark_config(GetBenchmarkConfig(version)), - db(path.empty() ? nullptr : path.c_str(), benchmark_config.get()), con(db) { + explicit InterpretedBenchmarkState(string path) + : benchmark_config(GetBenchmarkConfig()), db(path.empty() ? nullptr : path.c_str(), benchmark_config.get()), + con(db) { auto &instance = BenchmarkRunner::GetInstance(); auto res = con.Query("PRAGMA threads=" + to_string(instance.threads)); D_ASSERT(!res->HasError()); } - duckdb::unique_ptr GetBenchmarkConfig(const string &version = "") { + duckdb::unique_ptr GetBenchmarkConfig() { auto result = make_uniq(); - if (!version.empty()) { - result->options.serialization_compatibility = SerializationCompatibility::FromString(version); - } result->options.load_extensions = false; return result; } }; -void ProcessReplacements(string &str, const unordered_map &replacement_map) { - for (auto &replacement : replacement_map) { - str = StringUtil::Replace(str, "${" + replacement.first + "}", replacement.second); - } -} - struct BenchmarkFileReader { - BenchmarkFileReader(string path_, const unordered_map &replacement_map) + BenchmarkFileReader(string path_, unordered_map replacement_map) : path(path_), infile(path), linenr(0), replacements(replacement_map) { } @@ -78,7 +71,9 @@ struct BenchmarkFileReader { return false; } linenr++; - ProcessReplacements(line, replacements); + for (auto &replacement : replacements) { + line = StringUtil::Replace(line, "${" + replacement.first + "}", replacement.second); + } StringUtil::Trim(line); return true; } @@ -95,7 +90,7 @@ struct BenchmarkFileReader { std::string path; std::ifstream infile; int linenr; - const unordered_map &replacements; + unordered_map replacements; }; InterpretedBenchmark::InterpretedBenchmark(string full_path) @@ -103,33 +98,24 @@ InterpretedBenchmark::InterpretedBenchmark(string full_path) replacement_mapping["BENCHMARK_DIR"] = BenchmarkRunner::DUCKDB_BENCHMARK_DIRECTORY; } -BenchmarkQuery InterpretedBenchmark::ReadQueryFromFile(BenchmarkFileReader &reader, string file) { +void InterpretedBenchmark::ReadResultFromFile(BenchmarkFileReader &reader, const string &file) { // read the results from the file - BenchmarkQuery query; - query.query = ""; - - ProcessReplacements(file, replacement_mapping); - DuckDB db; Connection con(db); - auto result = con.Query("FROM read_csv('" + file + - "', delim='|', header=1, nullstr='NULL', all_varchar=1, quote ='\"', escape ='\"')"); - query.column_count = result->ColumnCount(); + auto result = + con.Query("SELECT * FROM read_csv_auto('" + file + "', delim='|', header=1, nullstr='NULL', all_varchar=1)"); + result_column_count = result->ColumnCount(); for (auto &row : *result) { vector row_values; for (idx_t col_idx = 0; col_idx < result->ColumnCount(); col_idx++) { row_values.push_back(row.GetValue(col_idx)); } - query.expected_result.push_back(std::move(row_values)); + result_values.push_back(std::move(row_values)); } - return query; } -BenchmarkQuery InterpretedBenchmark::ReadQueryFromReader(BenchmarkFileReader &reader, const string &sql, - const string &header) { - BenchmarkQuery query; - query.query = sql; - query.column_count = header.size(); +void InterpretedBenchmark::ReadResultFromReader(BenchmarkFileReader &reader, const string &header) { + result_column_count = header.size(); // keep reading results until eof string line; while (reader.ReadLine(line)) { @@ -137,13 +123,12 @@ BenchmarkQuery InterpretedBenchmark::ReadQueryFromReader(BenchmarkFileReader &re break; } auto result_splits = StringUtil::Split(line, "\t"); - if (result_splits.size() != query.column_count) { + if ((int64_t)result_splits.size() != result_column_count) { throw std::runtime_error(reader.FormatException("expected " + std::to_string(result_splits.size()) + - " values but got " + std::to_string(query.column_count))); + " values but got " + std::to_string(result_column_count))); } - query.expected_result.push_back(std::move(result_splits)); + result_values.push_back(std::move(result_splits)); } - return query; } static void ThrowResultModeError(BenchmarkFileReader &reader) { @@ -153,8 +138,11 @@ static void ThrowResultModeError(BenchmarkFileReader &reader) { throw std::runtime_error(reader.FormatException(error)); } -void InterpretedBenchmark::ProcessFile(const string &path) { - BenchmarkFileReader reader(path, replacement_mapping); +void InterpretedBenchmark::LoadBenchmark() { + if (is_loaded) { + return; + } + BenchmarkFileReader reader(benchmark_path, replacement_mapping); string line; while (reader.ReadLine(line)) { // skip blank lines and comments @@ -163,12 +151,10 @@ void InterpretedBenchmark::ProcessFile(const string &path) { } // look for a command in this line auto splits = StringUtil::Split(StringUtil::Lower(line), ' '); - if (splits[0] == "load" || splits[0] == "run" || splits[0] == "init" || splits[0] == "cleanup" || - splits[0] == "reload") { + if (splits[0] == "load" || splits[0] == "run" || splits[0] == "init" || splits[0] == "cleanup") { if (queries.find(splits[0]) != queries.end()) { throw std::runtime_error("Multiple calls to " + splits[0] + " in the same benchmark file"); } - // load command: keep reading until we find a blank line or EOF string query; while (reader.ReadLine(line)) { @@ -199,18 +185,10 @@ void InterpretedBenchmark::ProcessFile(const string &path) { } queries[splits[0]] = query; } else if (splits[0] == "require") { - if (splits.size() < 2 || splits.size() > 3) { + if (splits.size() != 2) { throw std::runtime_error(reader.FormatException("require requires a single parameter")); } - if (splits.size() == 3) { - if (splits[2] != "load_only") { - throw std::runtime_error( - reader.FormatException("require only supports load_only as a second parameter")); - } - load_extensions.insert(splits[1]); - } else { - extensions.insert(splits[1]); - } + extensions.insert(splits[1]); } else if (splits[0] == "resultmode") { if (splits.size() < 2) { ThrowResultModeError(reader); @@ -256,19 +234,9 @@ void InterpretedBenchmark::ProcessFile(const string &path) { cache_file = cache_db; cache_db = string(); } - - ProcessReplacements(cache_db, replacement_mapping); - ProcessReplacements(cache_file, replacement_mapping); - } else if (splits[0] == "cache_file") { - if (splits.size() == 2) { - cache_file = splits[1]; - ProcessReplacements(cache_file, replacement_mapping); - } else { - throw std::runtime_error(reader.FormatException("cache_file requires a single file")); - } } else if (splits[0] == "storage") { - if (splits.size() < 2) { - throw std::runtime_error(reader.FormatException("storage requires at least one parameter")); + if (splits.size() != 2) { + throw std::runtime_error(reader.FormatException("storage requires a single parameter")); } if (splits[1] == "transient") { in_memory = true; @@ -277,10 +245,6 @@ void InterpretedBenchmark::ProcessFile(const string &path) { } else { throw std::runtime_error(reader.FormatException("Invalid argument for storage")); } - - if (splits.size() == 3) { - storage_version = splits[2]; - } } else if (splits[0] == "require_reinit") { if (splits.size() != 1) { throw std::runtime_error(reader.FormatException("require_reinit does not take any parameters")); @@ -299,13 +263,25 @@ void InterpretedBenchmark::ProcessFile(const string &path) { } else { subgroup = result; } - } else if (splits[0] == "assert") { + } else if (splits[0] == "result_query") { + if (result_column_count > 0) { + throw std::runtime_error(reader.FormatException("multiple results found")); + } // count the amount of columns if (splits.size() <= 1 || splits[1].size() == 0) { throw std::runtime_error( - reader.FormatException("assert must be followed by a column count (e.g. result III)")); + reader.FormatException("result_query must be followed by a column count (e.g. result III)")); + } + bool is_file = false; + for (idx_t i = 0; i < splits[1].size(); i++) { + if (splits[1][i] != 'i') { + is_file = true; + break; + } + } + if (is_file) { + ReadResultFromFile(reader, splits[1]); } - // read the actual query bool found_end = false; string sql; @@ -316,17 +292,23 @@ void InterpretedBenchmark::ProcessFile(const string &path) { } sql += "\n" + line; } + result_query = sql; if (!found_end) { throw std::runtime_error(reader.FormatException( "result_query must be followed by a query and a result (separated by ----)")); } - - assert_queries.push_back(ReadQueryFromReader(reader, sql, splits[1])); - } else if (splits[0] == "result_query" || splits[0] == "result") { + if (!is_file) { + ReadResultFromReader(reader, splits[1]); + } + } else if (splits[0] == "result") { + if (result_column_count > 0) { + throw std::runtime_error(reader.FormatException("multiple results found")); + } // count the amount of columns - if (splits.size() <= 1 || splits[1].empty()) { + if (splits.size() <= 1 || splits[1].size() == 0) { throw std::runtime_error( - reader.FormatException("result must be followed by a column count (e.g. result III)")); + reader.FormatException("result must be followed by a column count (e.g. result III) or a file " + "(e.g. result /path/to/file.csv)")); } bool is_file = false; for (idx_t i = 0; i < splits[1].size(); i++) { @@ -335,74 +317,19 @@ void InterpretedBenchmark::ProcessFile(const string &path) { break; } } - bool matches_condition = true; - if (splits.size() > 2) { - // conditional result - for (idx_t split_idx = 2; split_idx < splits.size(); split_idx++) { - auto &condition = splits[split_idx]; - if (!StringUtil::Contains(condition, "=")) { - throw InvalidInputException("result with condition - only = is supported currently"); - } - auto condition_splits = StringUtil::Split(condition, '='); - if (condition_splits.size() != 2) { - throw InvalidInputException("result with condition must have one equality"); - } - auto &condition_arg = condition_splits[0]; - auto &condition_val = condition_splits[1]; - auto entry = replacement_mapping.find(condition_arg); - if (entry == replacement_mapping.end()) { - throw InvalidInputException("Condition argument %s not found in benchmark", condition_arg); - } - if (entry->second != condition_val) { - matches_condition = false; - break; - } - } - } - string result_query; - if (splits[0] == "result_query") { - // read the actual query - bool found_end = false; - string sql; + if (is_file) { + ReadResultFromFile(reader, splits[1]); + + // read the main file until we encounter an empty line + string line; while (reader.ReadLine(line)) { - if (line == "----") { - found_end = true; + if (line.empty()) { break; } - sql += "\n" + line; - } - if (!found_end) { - throw std::runtime_error(reader.FormatException( - "result_query must be followed by a query and a result (separated by ----)")); } - result_query = sql; } else { - //! Read directly from the answer - result_query = "select * from __answer"; - } - BenchmarkQuery result_check; - if (is_file) { - if (matches_condition) { - result_check = ReadQueryFromFile(reader, splits[1]); - result_check.query = result_query; - } - } else { - result_check = ReadQueryFromReader(reader, result_query, splits[1]); - } - if (matches_condition) { - if (!result_queries.empty()) { - throw std::runtime_error(reader.FormatException("multiple results found")); - } - result_queries.push_back(std::move(result_check)); - } - } else if (splits[0] == "retry") { - if (splits.size() != 3) { - throw std::runtime_error(reader.FormatException(splits[0] + " requires two parameters")); - } - if (splits[1] != "load") { - throw std::runtime_error("Only retry load is supported"); + ReadResultFromReader(reader, splits[1]); } - retry_load = std::stoull(splits[2]); } else if (splits[0] == "template") { // template: update the path to read benchmark_path = splits[1]; @@ -421,50 +348,10 @@ void InterpretedBenchmark::ProcessFile(const string &path) { // restart the load from the template file LoadBenchmark(); return; - } else if (splits[0] == "argument") { - if (splits.size() != 3) { - throw std::runtime_error( - reader.FormatException(splits[0] + " requires two parameters (name and default)")); - } - auto &arg_name = splits[1]; - string arg_value = splits[2]; - auto &instance = BenchmarkRunner::GetInstance(); - auto entry = instance.custom_arguments.find(arg_name); - if (entry != instance.custom_arguments.end()) { - arg_value = entry->second; - } - if (handled_arguments.count(arg_name) > 0) { - // argument is already defined - ignore this definition - continue; - } - handled_arguments.insert(arg_name); - replacement_mapping[arg_name] = std::move(arg_value); - } else if (splits[0] == "include") { - if (splits.size() != 2) { - throw InvalidInputException("include requires a single argument"); - } - ProcessFile(splits[1]); } else { throw std::runtime_error(reader.FormatException("unrecognized command " + splits[0])); } } -} - -void InterpretedBenchmark::LoadBenchmark() { - if (is_loaded) { - return; - } - - ProcessFile(benchmark_path); - // throw an error if an argument was not handled - auto &instance = BenchmarkRunner::GetInstance(); - for (auto &entry : instance.custom_arguments) { - auto &custom_arg = entry.first; - if (handled_arguments.count(custom_arg) == 0) { - throw InvalidInputException("Invalid benchmark argument %s: argument was not specified in benchmark %s", - custom_arg, benchmark_path); - } - } // set up the queries if (queries.find("run") == queries.end()) { throw InvalidInputException("Invalid benchmark file: no \"run\" query specified"); @@ -473,47 +360,36 @@ void InterpretedBenchmark::LoadBenchmark() { is_loaded = true; } -void LoadExtensions(InterpretedBenchmarkState &state, const std::unordered_set &extensions_to_load) { - for (auto &extension : extensions_to_load) { - auto result = ExtensionHelper::LoadExtension(state.db, extension); - if (result == ExtensionLoadResult::EXTENSION_UNKNOWN) { - throw InvalidInputException("Unknown extension " + extension); - } else if (result == ExtensionLoadResult::NOT_LOADED) { - throw InvalidInputException("Extension " + extension + - " is not available/was not compiled. Cannot run this benchmark."); - } - } -} - -unique_ptr InterpretedBenchmark::RunLoadQuery(InterpretedBenchmarkState &state, const string &load_query) { - LoadExtensions(state, load_extensions); - auto result = state.con.Query(load_query); - for (idx_t i = 0; i < retry_load; i++) { - if (!result->HasError()) { - break; - } - result = state.con.Query(load_query); - } - return unique_ptr_cast(std::move(result)); -} - unique_ptr InterpretedBenchmark::Initialize(BenchmarkConfiguration &config) { duckdb::unique_ptr result; LoadBenchmark(); duckdb::unique_ptr state; auto full_db_path = GetDatabasePath(); try { - state = make_uniq(full_db_path, storage_version); + state = make_uniq(full_db_path); } catch (Exception &e) { // if the connection throws an error, chances are it's a storage format error. // In this case delete the file and connect again. DeleteDatabase(full_db_path); - state = make_uniq(full_db_path, storage_version); + state = make_uniq(full_db_path); } - extensions.insert("core_functions"); +// for(auto &extension:extensions){ +//// std::cout<<"Extension: "+extension<db, extension); + if (result == ExtensionLoadResult::EXTENSION_UNKNOWN) { + throw InvalidInputException("Unknown extension " + extension); + } else if (result == ExtensionLoadResult::NOT_LOADED) { + throw InvalidInputException("Extension " + extension + + " is not available/was not compiled. Cannot run this benchmark."); + } + } - LoadExtensions(*state, extensions); if (queries.find("init") != queries.end()) { string init_query = queries["init"]; result = state->con.Query(init_query); @@ -529,23 +405,16 @@ unique_ptr InterpretedBenchmark::Initialize(BenchmarkConfigurati if (queries.find("load") != queries.end()) { load_query = queries["load"]; } - string reload_query; - if (queries.find("reload") != queries.end()) { - reload_query = queries["reload"]; - } if (!cache_file.empty()) { auto fs = FileSystem::CreateLocal(); if (!fs->FileExists(fs->JoinPath(BenchmarkRunner::DUCKDB_BENCHMARK_DIRECTORY, cache_file))) { // no cache or db_path specified: just run the initialization code - result = RunLoadQuery(*state, load_query); - } else if (!reload_query.empty()) { - // run reload query - result = RunLoadQuery(*state, reload_query); + result = state->con.Query(load_query); } } else if (cache_db.empty() && cache_db.compare(DEFAULT_DB_PATH) != 0) { // no cache or db_path specified: just run the initialization code - result = RunLoadQuery(*state, load_query); + result = state->con.Query(load_query); } else { // cache or db_path is specified: try to load from one of them bool in_memory_db_has_data = false; @@ -563,10 +432,7 @@ unique_ptr InterpretedBenchmark::Initialize(BenchmarkConfigurati } if (!in_memory_db_has_data) { // failed to load: write the cache - result = RunLoadQuery(*state, load_query); - } else if (!reload_query.empty()) { - // succeeded: run the reload query - result = RunLoadQuery(*state, reload_query); + result = state->con.Query(load_query); } } while (result) { @@ -607,32 +473,15 @@ ScopedConfigSetting PrepareResultCollector(ClientConfig &config, InterpretedBenc return ScopedConfigSetting( config, [&benchmark](ClientConfig &config) { - config.get_result_collector = [&benchmark](ClientContext &context, - PreparedStatementData &data) -> PhysicalOperator & { + config.result_collector = [&benchmark](ClientContext &context, PreparedStatementData &data) { return PhysicalArrowCollector::Create(context, data, benchmark.ArrowBatchSize()); }; }, - [](ClientConfig &config) { config.get_result_collector = nullptr; }); + [](ClientConfig &config) { config.result_collector = nullptr; }); } return ScopedConfigSetting(config); } -void InterpretedBenchmark::Assert(BenchmarkState *state_p) { - auto &state = (InterpretedBenchmarkState &)*state_p; - - for (auto &assert_query : assert_queries) { - auto &query = assert_query.query; - auto result = state.con.Query(query); - if (result->HasError()) { - result->ThrowError(); - } - auto verify_result = VerifyInternal(state_p, assert_query, *result); - if (!verify_result.empty()) { - throw InvalidInputException("Assertion query failed:\n%s", verify_result); - } - } -} - void InterpretedBenchmark::Run(BenchmarkState *state_p) { auto &state = (InterpretedBenchmarkState &)*state_p; auto &context = state.con.context; @@ -684,25 +533,21 @@ string InterpretedBenchmark::GetDatabasePath() { return db_path; } -string InterpretedBenchmark::VerifyInternal(BenchmarkState *state_p, const BenchmarkQuery &query, - MaterializedQueryResult &result) { +string InterpretedBenchmark::VerifyInternal(BenchmarkState *state_p, MaterializedQueryResult &result) { auto &state = (InterpretedBenchmarkState &)*state_p; - - auto &result_values = query.expected_result; - D_ASSERT(query.column_count >= 1); - if (query.column_count != result.ColumnCount()) { + // compare the column count + if (result_column_count >= 0 && (int64_t)result.ColumnCount() != result_column_count) { return StringUtil::Format("Error in result: expected %lld columns but got %lld\nObtained result: %s", - (int64_t)query.column_count, (int64_t)result.ColumnCount(), result.ToString()); + (int64_t)result_column_count, (int64_t)result.ColumnCount(), result.ToString()); } - // compare row count - if (result.RowCount() != query.expected_result.size()) { + if (result.RowCount() != result_values.size()) { return StringUtil::Format("Error in result: expected %lld rows but got %lld\nObtained result: %s", (int64_t)result_values.size(), (int64_t)result.RowCount(), result.ToString()); } // compare values - for (idx_t r = 0; r < result_values.size(); r++) { - for (idx_t c = 0; c < query.column_count; c++) { + for (int64_t r = 0; r < (int64_t)result_values.size(); r++) { + for (int64_t c = 0; c < result_column_count; c++) { auto value = result.GetValue(c, r); if (result_values[r][c] == "NULL" && value.IsNull()) { continue; @@ -732,6 +577,7 @@ string InterpretedBenchmark::VerifyInternal(BenchmarkState *state_p, const Bench string InterpretedBenchmark::Verify(BenchmarkState *state_p) { auto &state = (InterpretedBenchmarkState &)*state_p; + if (!state.result) { D_ASSERT(result_type != QueryResultType::MATERIALIZED_RESULT); return string(); @@ -740,55 +586,49 @@ string InterpretedBenchmark::Verify(BenchmarkState *state_p) { if (state.result->HasError()) { return state.result->GetError(); } - if (result_queries.empty()) { + + if (result_column_count == 0) { // no result specified return string(); } - D_ASSERT(result_queries.size() == 1); - auto &query = result_queries[0]; - auto result_query = query.query; - if (result_query.empty()) { - result_query = "select * from __answer"; - } - - // we are running a result query - // store the current result in a table called "__answer" - auto &collection = state.result->Collection(); - auto &names = state.result->names; - auto &types = state.result->types; - case_insensitive_set_t name_set; - // first create the (empty) table - string create_tbl = "CREATE OR REPLACE TEMP TABLE __answer("; - for (idx_t i = 0; i < names.size(); i++) { - if (!name_set.insert(names[i]).second) { - auto err_str = StringUtil::Format("Duplicate column name \"%s\" in benchmark query", names[i]); - throw std::runtime_error(err_str); + + if (!result_query.empty()) { + // we are running a result query + // store the current result in a table called "__answer" + auto &collection = state.result->Collection(); + auto &names = state.result->names; + auto &types = state.result->types; + // first create the (empty) table + string create_tbl = "CREATE OR REPLACE TEMP TABLE __answer("; + for (idx_t i = 0; i < names.size(); i++) { + if (i > 0) { + create_tbl += ", "; + } + create_tbl += KeywordHelper::WriteOptionallyQuoted(names[i]); + create_tbl += " "; + create_tbl += types[i].ToString(); } - if (i > 0) { - create_tbl += ", "; + create_tbl += ")"; + auto new_result = state.con.Query(create_tbl); + if (new_result->HasError()) { + return new_result->GetError(); } - create_tbl += KeywordHelper::WriteOptionallyQuoted(names[i]); - create_tbl += " "; - create_tbl += types[i].ToString(); - } - create_tbl += ")"; - auto new_result = state.con.Query(create_tbl); - if (new_result->HasError()) { - return new_result->GetError(); - } - // now append the result to the answer table - auto table_info = state.con.TableInfo("__answer"); - if (table_info == nullptr) { - throw std::runtime_error("Received a nullptr when querying table info of __answer"); - } - state.con.Append(*table_info, collection); + // now append the result to the answer table + auto table_info = state.con.TableInfo("__answer"); + if (table_info == nullptr) { + throw std::runtime_error("Received a nullptr when querying table info of __answer"); + } + state.con.Append(*table_info, collection); - // finally run the result query and verify the result of that query - new_result = state.con.Query(result_query); - if (new_result->HasError()) { - return new_result->GetError(); + // finally run the result query and verify the result of that query + new_result = state.con.Query(result_query); + if (new_result->HasError()) { + return new_result->GetError(); + } + return VerifyInternal(state_p, *new_result); + } else { + return VerifyInternal(state_p, *state.result); } - return VerifyInternal(state_p, query, *new_result); } void InterpretedBenchmark::Interrupt(BenchmarkState *state_p) { diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp index fff277c4745d..2700a9991fdc 100644 --- a/extension/parquet/parquet_extension.cpp +++ b/extension/parquet/parquet_extension.cpp @@ -1,8 +1,10 @@ +#define DUCKDB_EXTENSION_MAIN + #include "parquet_extension.hpp" +#include "cast_column_reader.hpp" #include "duckdb.hpp" #include "duckdb/parser/expression/positional_reference_expression.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/query_node/select_node.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" #include "duckdb/planner/operator/logical_projection.hpp" @@ -12,22 +14,22 @@ #include "parquet_metadata.hpp" #include "parquet_reader.hpp" #include "parquet_writer.hpp" -#include "reader/struct_column_reader.hpp" +#include "struct_column_reader.hpp" #include "zstd_file_system.hpp" -#include "writer/primitive_column_writer.hpp" #include #include #include #include #include +#ifndef DUCKDB_AMALGAMATION #include "duckdb/catalog/catalog.hpp" #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" #include "duckdb/common/constants.hpp" #include "duckdb/common/enums/file_compression_type.hpp" #include "duckdb/common/file_system.hpp" #include "duckdb/common/helper.hpp" -#include "duckdb/common/multi_file/multi_file_reader.hpp" +#include "duckdb/common/multi_file_reader.hpp" #include "duckdb/common/serializer/deserializer.hpp" #include "duckdb/common/serializer/serializer.hpp" #include "duckdb/common/type_visitor.hpp" @@ -36,7 +38,7 @@ #include "duckdb/function/table_function.hpp" #include "duckdb/main/client_context.hpp" #include "duckdb/main/config.hpp" -#include "duckdb/main/extension/extension_loader.hpp" +#include "duckdb/main/extension_util.hpp" #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/parser/parsed_data/create_copy_function_info.hpp" @@ -46,12 +48,986 @@ #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/storage/statistics/base_statistics.hpp" #include "duckdb/storage/table/row_group.hpp" -#include "duckdb/common/multi_file/multi_file_function.hpp" -#include "duckdb/common/primitive_dictionary.hpp" -#include "parquet_multi_file_info.hpp" +#endif namespace duckdb { +struct ParquetReadBindData : public TableFunctionData { + shared_ptr file_list; + unique_ptr multi_file_reader; + + shared_ptr initial_reader; + atomic chunk_count; + vector names; + vector types; + //! Table column names - set when using COPY tbl FROM file.parquet + vector table_columns; + + // The union readers are created (when parquet union_by_name option is on) during binding + // Those readers can be re-used during ParquetParallelStateNext + vector> union_readers; + + // These come from the initial_reader, but need to be stored in case the initial_reader is removed by a filter + idx_t initial_file_cardinality; + idx_t initial_file_row_groups; + ParquetOptions parquet_options; + + MultiFileReaderBindData reader_bind; + + void Initialize(shared_ptr reader) { + initial_reader = std::move(reader); + initial_file_cardinality = initial_reader->NumRows(); + initial_file_row_groups = initial_reader->NumRowGroups(); + parquet_options = initial_reader->parquet_options; + } + void Initialize(ClientContext &, unique_ptr &union_data) { + Initialize(std::move(union_data->reader)); + } +}; + +struct ParquetReadLocalState : public LocalTableFunctionState { + shared_ptr reader; + ParquetReaderScanState scan_state; + bool is_parallel; + idx_t batch_index; + idx_t file_index; + //! The DataChunk containing all read columns (even columns that are immediately removed) + DataChunk all_columns; +}; + +enum class ParquetFileState : uint8_t { UNOPENED, OPENING, OPEN, CLOSED }; + +struct ParquetFileReaderData { + // Create data for an unopened file + explicit ParquetFileReaderData(const string &file_to_be_opened) + : reader(nullptr), file_state(ParquetFileState::UNOPENED), file_mutex(make_uniq()), + file_to_be_opened(file_to_be_opened) { + } + // Create data for an existing reader + explicit ParquetFileReaderData(shared_ptr reader_p) + : reader(std::move(reader_p)), file_state(ParquetFileState::OPEN), file_mutex(make_uniq()) { + } + // Create data for an existing reader + explicit ParquetFileReaderData(unique_ptr union_data_p) : file_mutex(make_uniq()) { + if (union_data_p->reader) { + reader = std::move(union_data_p->reader); + file_state = ParquetFileState::OPEN; + } else { + union_data = std::move(union_data_p); + file_state = ParquetFileState::UNOPENED; + } + } + + //! Currently opened reader for the file + shared_ptr reader; + //! Flag to indicate the file is being opened + ParquetFileState file_state; + //! Mutexes to wait for the file when it is being opened + unique_ptr file_mutex; + //! Parquet options for opening the file + unique_ptr union_data; + + //! (only set when file_state is UNOPENED) the file to be opened + string file_to_be_opened; +}; + +struct ParquetReadGlobalState : public GlobalTableFunctionState { + explicit ParquetReadGlobalState(MultiFileList &file_list_p) : file_list(file_list_p) { + } + explicit ParquetReadGlobalState(unique_ptr owned_file_list_p) + : file_list(*owned_file_list_p), owned_file_list(std::move(owned_file_list_p)) { + } + + //! The file list to scan + MultiFileList &file_list; + //! The scan over the file_list + MultiFileListScanData file_list_scan; + //! Owned multi file list - if filters have been dynamically pushed into the reader + unique_ptr owned_file_list; + + unique_ptr multi_file_reader_state; + + mutex lock; + + //! The current set of parquet readers + vector> readers; + + //! Signal to other threads that a file failed to open, letting every thread abort. + bool error_opening_file = false; + + //! Index of file currently up for scanning + atomic file_index; + //! Index of row group within file currently up for scanning + idx_t row_group_index; + //! Batch index of the next row group to be scanned + idx_t batch_index; + + idx_t max_threads; + vector projection_ids; + vector scanned_types; + vector column_ids; + optional_ptr filters; + + idx_t MaxThreads() const override { + return max_threads; + } + + bool CanRemoveColumns() const { + return !projection_ids.empty(); + } +}; + +struct ParquetWriteBindData : public TableFunctionData { + vector sql_types; + vector column_names; + duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY; + vector> kv_metadata; + idx_t row_group_size = Storage::ROW_GROUP_SIZE; + + //! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW + static constexpr const idx_t BYTES_PER_ROW = 1024; + idx_t row_group_size_bytes; + + //! How/Whether to encrypt the data + shared_ptr encryption_config; + bool debug_use_openssl = true; + + //! Dictionary compression is applied only if the compression ratio exceeds this threshold + double dictionary_compression_ratio_threshold = 1.0; + + //! After how many row groups to rotate to a new file + optional_idx row_groups_per_file; + + ChildFieldIDs field_ids; + //! The compression level, higher value is more + optional_idx compression_level; +}; + +struct ParquetWriteGlobalState : public GlobalFunctionData { + unique_ptr writer; +}; + +struct ParquetWriteLocalState : public LocalFunctionData { + explicit ParquetWriteLocalState(ClientContext &context, const vector &types) + : buffer(context, types, ColumnDataAllocatorType::HYBRID) { + buffer.InitializeAppend(append_state); + } + + ColumnDataCollection buffer; + ColumnDataAppendState append_state; +}; + +BindInfo ParquetGetBindInfo(const optional_ptr bind_data) { + auto bind_info = BindInfo(ScanType::PARQUET); + auto &parquet_bind = bind_data->Cast(); + + vector file_path; + for (const auto &file : parquet_bind.file_list->Files()) { + file_path.emplace_back(file); + } + + // LCOV_EXCL_START + bind_info.InsertOption("file_path", Value::LIST(LogicalType::VARCHAR, file_path)); + bind_info.InsertOption("binary_as_string", Value::BOOLEAN(parquet_bind.parquet_options.binary_as_string)); + bind_info.InsertOption("file_row_number", Value::BOOLEAN(parquet_bind.parquet_options.file_row_number)); + bind_info.InsertOption("debug_use_openssl", Value::BOOLEAN(parquet_bind.parquet_options.debug_use_openssl)); + parquet_bind.parquet_options.file_options.AddBatchInfo(bind_info); + // LCOV_EXCL_STOP + return bind_info; +} + +static void ParseFileRowNumberOption(MultiFileReaderBindData &bind_data, ParquetOptions &options, + vector &return_types, vector &names) { + if (options.file_row_number) { + if (StringUtil::CIFind(names, "file_row_number") != DConstants::INVALID_INDEX) { + throw BinderException( + "Using file_row_number option on file with column named file_row_number is not supported"); + } + + bind_data.file_row_number_idx = names.size(); + return_types.emplace_back(LogicalType::BIGINT); + names.emplace_back("file_row_number"); + } +} + +static MultiFileReaderBindData BindSchema(ClientContext &context, vector &return_types, + vector &names, ParquetReadBindData &result, ParquetOptions &options) { + D_ASSERT(!options.schema.empty()); + + options.file_options.AutoDetectHivePartitioning(*result.file_list, context); + + auto &file_options = options.file_options; + if (file_options.union_by_name || file_options.hive_partitioning) { + throw BinderException("Parquet schema cannot be combined with union_by_name=true or hive_partitioning=true"); + } + + vector schema_col_names; + vector schema_col_types; + schema_col_names.reserve(options.schema.size()); + schema_col_types.reserve(options.schema.size()); + for (const auto &column : options.schema) { + schema_col_names.push_back(column.name); + schema_col_types.push_back(column.type); + } + + // perform the binding on the obtained set of names + types + MultiFileReaderBindData bind_data; + result.multi_file_reader->BindOptions(options.file_options, *result.file_list, schema_col_types, schema_col_names, + bind_data); + + names = schema_col_names; + return_types = schema_col_types; + D_ASSERT(names.size() == return_types.size()); + + ParseFileRowNumberOption(bind_data, options, return_types, names); + + return bind_data; +} + +static void InitializeParquetReader(ParquetReader &reader, const ParquetReadBindData &bind_data, + const vector &global_column_ids, + optional_ptr table_filters, ClientContext &context, + optional_idx file_idx, optional_ptr reader_state) { + auto &parquet_options = bind_data.parquet_options; + auto &reader_data = reader.reader_data; + + reader.table_columns = bind_data.table_columns; + // Mark the file in the file list we are scanning here + reader_data.file_list_idx = file_idx; + + if (bind_data.parquet_options.schema.empty()) { + bind_data.multi_file_reader->InitializeReader( + reader, parquet_options.file_options, bind_data.reader_bind, bind_data.types, bind_data.names, + global_column_ids, table_filters, bind_data.file_list->GetFirstFile(), context, reader_state); + return; + } + + // a fixed schema was supplied, initialize the MultiFileReader settings here so we can read using the schema + + // this deals with hive partitioning and filename=true + bind_data.multi_file_reader->FinalizeBind(parquet_options.file_options, bind_data.reader_bind, reader.GetFileName(), + reader.GetNames(), bind_data.types, bind_data.names, global_column_ids, + reader_data, context, reader_state); + + // create a mapping from field id to column index in file + unordered_map field_id_to_column_index; + auto &column_readers = reader.root_reader->Cast().child_readers; + for (idx_t column_index = 0; column_index < column_readers.size(); column_index++) { + auto &column_schema = column_readers[column_index]->Schema(); + if (column_schema.__isset.field_id) { + field_id_to_column_index[column_schema.field_id] = column_index; + } + } + + // loop through the schema definition + for (idx_t i = 0; i < global_column_ids.size(); i++) { + auto global_column_index = global_column_ids[i]; + + // check if this is a constant column + bool constant = false; + for (auto &entry : reader_data.constant_map) { + if (entry.column_id == i) { + constant = true; + break; + } + } + if (constant) { + // this column is constant for this file + continue; + } + + // Handle any generate columns that are not in the schema (currently only file_row_number) + if (global_column_index >= parquet_options.schema.size()) { + if (bind_data.reader_bind.file_row_number_idx == global_column_index) { + reader_data.column_mapping.push_back(i); + reader_data.column_ids.push_back(reader.file_row_number_idx); + } + continue; + } + + const auto &column_definition = parquet_options.schema[global_column_index]; + auto it = field_id_to_column_index.find(column_definition.field_id); + if (it == field_id_to_column_index.end()) { + // field id not present in file, use default value + reader_data.constant_map.emplace_back(i, column_definition.default_value); + continue; + } + + const auto &local_column_index = it->second; + auto &column_reader = column_readers[local_column_index]; + if (column_reader->Type() != column_definition.type) { + // differing types, wrap in a cast column reader + reader_data.cast_map[local_column_index] = column_definition.type; + } + + reader_data.column_mapping.push_back(i); + reader_data.column_ids.push_back(local_column_index); + } + reader_data.empty_columns = reader_data.column_ids.empty(); + + // Finally, initialize the filters + bind_data.multi_file_reader->CreateFilterMap(bind_data.types, table_filters, reader_data, reader_state); + reader_data.filters = table_filters; +} + +static bool GetBooleanArgument(const pair> &option) { + if (option.second.empty()) { + return true; + } + Value boolean_value; + string error_message; + if (!option.second[0].DefaultTryCastAs(LogicalType::BOOLEAN, boolean_value, &error_message)) { + throw InvalidInputException("Unable to cast \"%s\" to BOOLEAN for Parquet option \"%s\"", + option.second[0].ToString(), option.first); + } + return BooleanValue::Get(boolean_value); +} + +class ParquetScanFunction { +public: + static TableFunctionSet GetFunctionSet() { + TableFunction table_function("parquet_scan", {LogicalType::VARCHAR}, ParquetScanImplementation, ParquetScanBind, + ParquetScanInitGlobal, ParquetScanInitLocal); + table_function.statistics = ParquetScanStats; + table_function.cardinality = ParquetCardinality; + table_function.table_scan_progress = ParquetProgress; + table_function.named_parameters["binary_as_string"] = LogicalType::BOOLEAN; + table_function.named_parameters["file_row_number"] = LogicalType::BOOLEAN; + table_function.named_parameters["debug_use_openssl"] = LogicalType::BOOLEAN; + table_function.named_parameters["compression"] = LogicalType::VARCHAR; + table_function.named_parameters["schema"] = + LogicalType::MAP(LogicalType::INTEGER, LogicalType::STRUCT({{{"name", LogicalType::VARCHAR}, + {"type", LogicalType::VARCHAR}, + {"default_value", LogicalType::VARCHAR}}})); + table_function.named_parameters["encryption_config"] = LogicalTypeId::ANY; + table_function.get_batch_index = ParquetScanGetBatchIndex; + table_function.serialize = ParquetScanSerialize; + table_function.deserialize = ParquetScanDeserialize; + table_function.get_bind_info = ParquetGetBindInfo; + table_function.projection_pushdown = true; + table_function.filter_pushdown = true; + table_function.filter_prune = true; + table_function.pushdown_complex_filter = ParquetComplexFilterPushdown; + + MultiFileReader::AddParameters(table_function); + + return MultiFileReader::CreateFunctionSet(table_function); + } + + static unique_ptr ParquetReadBind(ClientContext &context, CopyInfo &info, + vector &expected_names, + vector &expected_types) { + D_ASSERT(expected_names.size() == expected_types.size()); + ParquetOptions parquet_options(context); + + for (auto &option : info.options) { + auto loption = StringUtil::Lower(option.first); + if (loption == "compression" || loption == "codec" || loption == "row_group_size") { + // CODEC/COMPRESSION and ROW_GROUP_SIZE options have no effect on parquet read. + // These options are determined from the file. + continue; + } else if (loption == "binary_as_string") { + parquet_options.binary_as_string = GetBooleanArgument(option); + } else if (loption == "file_row_number") { + parquet_options.file_row_number = GetBooleanArgument(option); + } else if (loption == "debug_use_openssl") { + parquet_options.debug_use_openssl = GetBooleanArgument(option); + } else if (loption == "encryption_config") { + if (option.second.size() != 1) { + throw BinderException("Parquet encryption_config cannot be empty!"); + } + parquet_options.encryption_config = ParquetEncryptionConfig::Create(context, option.second[0]); + } else { + throw NotImplementedException("Unsupported option for COPY FROM parquet: %s", option.first); + } + } + + // TODO: Allow overriding the MultiFileReader for COPY FROM? + auto multi_file_reader = MultiFileReader::CreateDefault("ParquetCopy"); + vector paths = {info.file_path}; + auto file_list = multi_file_reader->CreateFileList(context, paths); + + return ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), expected_types, + expected_names, parquet_options); + } + + static unique_ptr ParquetScanStats(ClientContext &context, const FunctionData *bind_data_p, + column_t column_index) { + auto &bind_data = bind_data_p->Cast(); + + if (IsRowIdColumnId(column_index)) { + return nullptr; + } + + // NOTE: we do not want to parse the Parquet metadata for the sole purpose of getting column statistics + + auto &config = DBConfig::GetConfig(context); + + if (bind_data.file_list->GetExpandResult() != FileExpandResult::MULTIPLE_FILES) { + if (bind_data.initial_reader) { + // most common path, scanning single parquet file + return bind_data.initial_reader->ReadStatistics(bind_data.names[column_index]); + } else if (!config.options.object_cache_enable) { + // our initial reader was reset + return nullptr; + } + } else if (config.options.object_cache_enable) { + // multiple files, object cache enabled: merge statistics + unique_ptr overall_stats; + + auto &cache = ObjectCache::GetObjectCache(context); + // for more than one file, we could be lucky and metadata for *every* file is in the object cache (if + // enabled at all) + FileSystem &fs = FileSystem::GetFileSystem(context); + + for (const auto &file_name : bind_data.file_list->Files()) { + auto metadata = cache.Get(file_name); + if (!metadata) { + // missing metadata entry in cache, no usable stats + return nullptr; + } + if (!fs.IsRemoteFile(file_name)) { + auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); + // we need to check if the metadata cache entries are current + if (fs.GetLastModifiedTime(*handle) >= metadata->read_time) { + // missing or invalid metadata entry in cache, no usable stats overall + return nullptr; + } + } else { + // for remote files we just avoid reading stats entirely + return nullptr; + } + // get and merge stats for file + auto file_stats = ParquetReader::ReadStatistics(context, bind_data.parquet_options, metadata, + bind_data.names[column_index]); + if (!file_stats) { + return nullptr; + } + if (overall_stats) { + overall_stats->Merge(*file_stats); + } else { + overall_stats = std::move(file_stats); + } + } + // success! + return overall_stats; + } + + // multiple files and no object cache, no luck! + return nullptr; + } + + static unique_ptr ParquetScanBindInternal(ClientContext &context, + unique_ptr multi_file_reader, + unique_ptr file_list, + vector &return_types, vector &names, + ParquetOptions parquet_options) { + auto result = make_uniq(); + result->multi_file_reader = std::move(multi_file_reader); + result->file_list = std::move(file_list); + + bool bound_on_first_file = true; + if (result->multi_file_reader->Bind(parquet_options.file_options, *result->file_list, result->types, + result->names, result->reader_bind)) { + result->multi_file_reader->BindOptions(parquet_options.file_options, *result->file_list, result->types, + result->names, result->reader_bind); + // Enable the parquet file_row_number on the parquet options if the file_row_number_idx was set + if (result->reader_bind.file_row_number_idx != DConstants::INVALID_INDEX) { + parquet_options.file_row_number = true; + } + bound_on_first_file = false; + } else if (!parquet_options.schema.empty()) { + // A schema was supplied: use the schema for binding + result->reader_bind = BindSchema(context, result->types, result->names, *result, parquet_options); + } else { + parquet_options.file_options.AutoDetectHivePartitioning(*result->file_list, context); + // Default bind + result->reader_bind = result->multi_file_reader->BindReader( + context, result->types, result->names, *result->file_list, *result, parquet_options); + } + + if (return_types.empty()) { + // no expected types - just copy the types + return_types = result->types; + names = result->names; + } else { + if (return_types.size() != result->types.size()) { + auto file_string = bound_on_first_file ? result->file_list->GetFirstFile() + : StringUtil::Join(result->file_list->GetPaths(), ","); + string extended_error; + extended_error = "Table schema: "; + for (idx_t col_idx = 0; col_idx < return_types.size(); col_idx++) { + if (col_idx > 0) { + extended_error += ", "; + } + extended_error += names[col_idx] + " " + return_types[col_idx].ToString(); + } + extended_error += "\nParquet schema: "; + for (idx_t col_idx = 0; col_idx < result->types.size(); col_idx++) { + if (col_idx > 0) { + extended_error += ", "; + } + extended_error += result->names[col_idx] + " " + result->types[col_idx].ToString(); + } + extended_error += "\n\nPossible solutions:"; + extended_error += "\n* Manually specify which columns to insert using \"INSERT INTO tbl SELECT ... " + "FROM read_parquet(...)\""; + throw ConversionException( + "Failed to read file(s) \"%s\" - column count mismatch: expected %d columns but found %d\n%s", + file_string, return_types.size(), result->types.size(), extended_error); + } + // expected types - overwrite the types we want to read instead + result->types = return_types; + result->table_columns = names; + } + result->parquet_options = parquet_options; + return std::move(result); + } + + static unique_ptr ParquetScanBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto multi_file_reader = MultiFileReader::Create(input.table_function); + + ParquetOptions parquet_options(context); + for (auto &kv : input.named_parameters) { + if (kv.second.IsNull()) { + throw BinderException("Cannot use NULL as function argument"); + } + auto loption = StringUtil::Lower(kv.first); + if (multi_file_reader->ParseOption(kv.first, kv.second, parquet_options.file_options, context)) { + continue; + } + if (loption == "binary_as_string") { + parquet_options.binary_as_string = BooleanValue::Get(kv.second); + } else if (loption == "file_row_number") { + parquet_options.file_row_number = BooleanValue::Get(kv.second); + } else if (loption == "debug_use_openssl") { + parquet_options.debug_use_openssl = BooleanValue::Get(kv.second); + } else if (loption == "schema") { + // Argument is a map that defines the schema + const auto &schema_value = kv.second; + const auto column_values = ListValue::GetChildren(schema_value); + if (column_values.empty()) { + throw BinderException("Parquet schema cannot be empty"); + } + parquet_options.schema.reserve(column_values.size()); + for (idx_t i = 0; i < column_values.size(); i++) { + parquet_options.schema.emplace_back( + ParquetColumnDefinition::FromSchemaValue(context, column_values[i])); + } + + // cannot be combined with hive_partitioning=true, so we disable auto-detection + parquet_options.file_options.auto_detect_hive_partitioning = false; + } else if (loption == "encryption_config") { + parquet_options.encryption_config = ParquetEncryptionConfig::Create(context, kv.second); + } + } + + auto file_list = multi_file_reader->CreateFileList(context, input.inputs[0]); + + auto files=file_list->GetPaths(); + return ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), return_types, names, + parquet_options); + } + + static double ParquetProgress(ClientContext &context, const FunctionData *bind_data_p, + const GlobalTableFunctionState *global_state) { + auto &bind_data = bind_data_p->Cast(); + auto &gstate = global_state->Cast(); + + auto total_count = gstate.file_list.GetTotalFileCount(); + if (total_count == 0) { + return 100.0; + } + if (bind_data.initial_file_cardinality == 0) { + return (100.0 * (static_cast(gstate.file_index) + 1.0)) / static_cast(total_count); + } + auto percentage = MinValue(100.0, (static_cast(bind_data.chunk_count) * STANDARD_VECTOR_SIZE * + 100.0 / static_cast(bind_data.initial_file_cardinality))); + return (percentage + 100.0 * static_cast(gstate.file_index)) / static_cast(total_count); + } + + static unique_ptr + ParquetScanInitLocal(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *gstate_p) { + auto &bind_data = input.bind_data->Cast(); + auto &gstate = gstate_p->Cast(); + + auto result = make_uniq(); + result->is_parallel = true; + result->batch_index = 0; + + if (gstate.CanRemoveColumns()) { + result->all_columns.Initialize(context.client, gstate.scanned_types); + } + if (!ParquetParallelStateNext(context.client, bind_data, *result, gstate)) { + return nullptr; + } + return std::move(result); + } + + static unique_ptr ParquetDynamicFilterPushdown(ClientContext &context, + const ParquetReadBindData &data, + const vector &column_ids, + optional_ptr filters) { + if (!filters) { + return nullptr; + } + auto new_list = data.multi_file_reader->DynamicFilterPushdown( + context, *data.file_list, data.parquet_options.file_options, data.names, data.types, column_ids, *filters); + return new_list; + } + + static unique_ptr ParquetScanInitGlobal(ClientContext &context, + TableFunctionInitInput &input) { + auto &bind_data = input.bind_data->CastNoConst(); + unique_ptr result; + + // before instantiating a scan trigger a dynamic filter pushdown if possible + auto new_list = ParquetDynamicFilterPushdown(context, bind_data, input.column_ids, input.filters); + if (new_list) { + result = make_uniq(std::move(new_list)); + } else { + result = make_uniq(*bind_data.file_list); + } + auto &file_list = result->file_list; + file_list.InitializeScan(result->file_list_scan); + + result->multi_file_reader_state = bind_data.multi_file_reader->InitializeGlobalState( + context, bind_data.parquet_options.file_options, bind_data.reader_bind, file_list, bind_data.types, + bind_data.names, input.column_ids); + if (file_list.IsEmpty()) { + result->readers = {}; + } else if (!bind_data.union_readers.empty()) { + // TODO: confirm we are not changing behaviour by modifying the order here? + for (auto &reader : bind_data.union_readers) { + if (!reader) { + break; + } + result->readers.push_back(make_uniq(std::move(reader))); + } + if (result->readers.size() != file_list.GetTotalFileCount()) { + // This case happens with recursive CTEs: the first execution the readers have already + // been moved out of the bind data. + // FIXME: clean up this process and make it more explicit + result->readers = {}; + } + } else if (bind_data.initial_reader) { + // we can only use the initial reader if it was constructed from the first file + if (bind_data.initial_reader->file_name == file_list.GetFirstFile()) { + result->readers.push_back(make_uniq(std::move(bind_data.initial_reader))); + } + } + + // Ensure all readers are initialized and FileListScan is sync with readers list + for (auto &reader_data : result->readers) { + string file_name; + idx_t file_idx = result->file_list_scan.current_file_idx; + file_list.Scan(result->file_list_scan, file_name); + if (reader_data->union_data) { + if (file_name != reader_data->union_data->GetFileName()) { + throw InternalException("Mismatch in filename order and union reader order in parquet scan"); + } + } else { + D_ASSERT(reader_data->reader); + if (file_name != reader_data->reader->file_name) { + throw InternalException("Mismatch in filename order and reader order in parquet scan"); + } + InitializeParquetReader(*reader_data->reader, bind_data, input.column_ids, input.filters, context, + file_idx, result->multi_file_reader_state); + } + } + + result->column_ids = input.column_ids; + result->filters = input.filters.get(); + result->row_group_index = 0; + result->file_index = 0; + result->batch_index = 0; + result->max_threads = ParquetScanMaxThreads(context, input.bind_data.get()); + + bool require_extra_columns = + result->multi_file_reader_state && result->multi_file_reader_state->RequiresExtraColumns(); + if (input.CanRemoveFilterColumns() || require_extra_columns) { + if (!input.projection_ids.empty()) { + result->projection_ids = input.projection_ids; + } else { + result->projection_ids.resize(input.column_ids.size()); + iota(begin(result->projection_ids), end(result->projection_ids), 0); + } + + const auto table_types = bind_data.types; + for (const auto &col_idx : input.column_ids) { + if (IsRowIdColumnId(col_idx)) { + result->scanned_types.emplace_back(LogicalType::ROW_TYPE); + } else { + result->scanned_types.push_back(table_types[col_idx]); + } + } + } + + if (require_extra_columns) { + for (const auto &column_type : result->multi_file_reader_state->extra_columns) { + result->scanned_types.push_back(column_type); + } + } + + return std::move(result); + } + + static idx_t ParquetScanGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p, + LocalTableFunctionState *local_state, + GlobalTableFunctionState *global_state) { + auto &data = local_state->Cast(); + return data.batch_index; + } + + static void ParquetScanSerialize(Serializer &serializer, const optional_ptr bind_data_p, + const TableFunction &function) { + auto &bind_data = bind_data_p->Cast(); + + serializer.WriteProperty(100, "files", bind_data.file_list->GetAllFiles()); + serializer.WriteProperty(101, "types", bind_data.types); + serializer.WriteProperty(102, "names", bind_data.names); + serializer.WriteProperty(103, "parquet_options", bind_data.parquet_options); + if (serializer.ShouldSerialize(3)) { + serializer.WriteProperty(104, "table_columns", bind_data.table_columns); + } + } + + static unique_ptr ParquetScanDeserialize(Deserializer &deserializer, TableFunction &function) { + auto &context = deserializer.Get(); + auto files = deserializer.ReadProperty>(100, "files"); + auto types = deserializer.ReadProperty>(101, "types"); + auto names = deserializer.ReadProperty>(102, "names"); + auto parquet_options = deserializer.ReadProperty(103, "parquet_options"); + auto table_columns = + deserializer.ReadPropertyWithExplicitDefault>(104, "table_columns", vector {}); + + vector file_path; + for (auto &path : files) { + file_path.emplace_back(path); + } + + auto multi_file_reader = MultiFileReader::Create(function); + auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path), + FileGlobOptions::DISALLOW_EMPTY); + auto bind_data = ParquetScanBindInternal(context, std::move(multi_file_reader), std::move(file_list), types, + names, parquet_options); + bind_data->Cast().table_columns = std::move(table_columns); + return bind_data; + } + + static void ParquetScanImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + if (!data_p.local_state) { + return; + } + auto &data = data_p.local_state->Cast(); + auto &gstate = data_p.global_state->Cast(); + auto &bind_data = data_p.bind_data->CastNoConst(); + + do { + if (gstate.CanRemoveColumns()) { + data.all_columns.Reset(); + data.reader->Scan(data.scan_state, data.all_columns); + bind_data.multi_file_reader->FinalizeChunk(context, bind_data.reader_bind, data.reader->reader_data, + data.all_columns, gstate.multi_file_reader_state); + output.ReferenceColumns(data.all_columns, gstate.projection_ids); + } else { + data.reader->Scan(data.scan_state, output); + bind_data.multi_file_reader->FinalizeChunk(context, bind_data.reader_bind, data.reader->reader_data, + output, gstate.multi_file_reader_state); + } + + bind_data.chunk_count++; + if (output.size() > 0) { + return; + } + if (!ParquetParallelStateNext(context, bind_data, data, gstate)) { + return; + } + } while (true); + } + + static unique_ptr ParquetCardinality(ClientContext &context, const FunctionData *bind_data) { + auto &data = bind_data->Cast(); + + auto file_list_cardinality_estimate = data.file_list->GetCardinality(context); + if (file_list_cardinality_estimate) { + return file_list_cardinality_estimate; + } + + return make_uniq(data.initial_file_cardinality * data.file_list->GetTotalFileCount()); + } + + static idx_t ParquetScanMaxThreads(ClientContext &context, const FunctionData *bind_data) { + auto &data = bind_data->Cast(); + + if (data.file_list->GetExpandResult() == FileExpandResult::MULTIPLE_FILES) { + return TaskScheduler::GetScheduler(context).NumberOfThreads(); + } + + return MaxValue(data.initial_file_row_groups, (idx_t)1); + } + + // Queries the metadataprovider for another file to scan, updating the files/reader lists in the process. + // Returns true if resized + static bool ResizeFiles(ParquetReadGlobalState ¶llel_state) { + string scanned_file; + if (!parallel_state.file_list.Scan(parallel_state.file_list_scan, scanned_file)) { + return false; + } + + // Push the file in the reader data, to be opened later + parallel_state.readers.push_back(make_uniq(scanned_file)); + + return true; + } + + // This function looks for the next available row group. If not available, it will open files from bind_data.files + // until there is a row group available for scanning or the files runs out + static bool ParquetParallelStateNext(ClientContext &context, const ParquetReadBindData &bind_data, + ParquetReadLocalState &scan_data, ParquetReadGlobalState ¶llel_state) { + unique_lock parallel_lock(parallel_state.lock); + + while (true) { + if (parallel_state.error_opening_file) { + return false; + } + + if (parallel_state.file_index >= parallel_state.readers.size() && !ResizeFiles(parallel_state)) { + return false; + } + + auto ¤t_reader_data = *parallel_state.readers[parallel_state.file_index]; + if (current_reader_data.file_state == ParquetFileState::OPEN) { + if (parallel_state.row_group_index < current_reader_data.reader->NumRowGroups()) { + // The current reader has rowgroups left to be scanned + scan_data.reader = current_reader_data.reader; + vector group_indexes {parallel_state.row_group_index}; + scan_data.reader->InitializeScan(context, scan_data.scan_state, group_indexes); + scan_data.batch_index = parallel_state.batch_index++; + scan_data.file_index = parallel_state.file_index; + parallel_state.row_group_index++; + return true; + } else { + // Close current file + current_reader_data.file_state = ParquetFileState::CLOSED; + current_reader_data.reader = nullptr; + + // Set state to the next file + parallel_state.file_index++; + parallel_state.row_group_index = 0; + + continue; + } + } + + if (TryOpenNextFile(context, bind_data, scan_data, parallel_state, parallel_lock)) { + continue; + } + + // Check if the current file is being opened, in that case we need to wait for it. + if (current_reader_data.file_state == ParquetFileState::OPENING) { + WaitForFile(parallel_state.file_index, parallel_state, parallel_lock); + } + } + } + + static void ParquetComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p, + vector> &filters) { + auto &data = bind_data_p->Cast(); + + MultiFilePushdownInfo info(get); + auto new_list = data.multi_file_reader->ComplexFilterPushdown(context, *data.file_list, + data.parquet_options.file_options, info, filters); + + if (new_list) { + data.file_list = std::move(new_list); + MultiFileReader::PruneReaders(data, *data.file_list); + } + } + + //! Wait for a file to become available. Parallel lock should be locked when calling. + static void WaitForFile(idx_t file_index, ParquetReadGlobalState ¶llel_state, + unique_lock ¶llel_lock) { + while (true) { + // Get pointer to file mutex before unlocking + auto &file_mutex = *parallel_state.readers[file_index]->file_mutex; + + // To get the file lock, we first need to release the parallel_lock to prevent deadlocking. Note that this + // requires getting the ref to the file mutex pointer with the lock stil held: readers get be resized + parallel_lock.unlock(); + unique_lock current_file_lock(file_mutex); + parallel_lock.lock(); + + // Here we have both locks which means we can stop waiting if: + // - the thread opening the file is done and the file is available + // - the thread opening the file has failed + // - the file was somehow scanned till the end while we were waiting + if (parallel_state.file_index >= parallel_state.readers.size() || + parallel_state.readers[parallel_state.file_index]->file_state != ParquetFileState::OPENING || + parallel_state.error_opening_file) { + return; + } + } + } + + //! Helper function that try to start opening a next file. Parallel lock should be locked when calling. + static bool TryOpenNextFile(ClientContext &context, const ParquetReadBindData &bind_data, + ParquetReadLocalState &scan_data, ParquetReadGlobalState ¶llel_state, + unique_lock ¶llel_lock) { + const auto file_index_limit = + parallel_state.file_index + TaskScheduler::GetScheduler(context).NumberOfThreads(); + + for (idx_t i = parallel_state.file_index; i < file_index_limit; i++) { + // We check if we can resize files in this loop too otherwise we will only ever open 1 file ahead + if (i >= parallel_state.readers.size() && !ResizeFiles(parallel_state)) { + return false; + } + + auto ¤t_reader_data = *parallel_state.readers[i]; + if (current_reader_data.file_state == ParquetFileState::UNOPENED) { + current_reader_data.file_state = ParquetFileState::OPENING; + auto pq_options = bind_data.parquet_options; + + // Get pointer to file mutex before unlocking + auto ¤t_file_lock = *current_reader_data.file_mutex; + + // Now we switch which lock we are holding, instead of locking the global state, we grab the lock on + // the file we are opening. This file lock allows threads to wait for a file to be opened. + parallel_lock.unlock(); + unique_lock file_lock(current_file_lock); + + shared_ptr reader; + try { + if (current_reader_data.union_data) { + auto &union_data = *current_reader_data.union_data; + reader = make_shared_ptr(context, union_data.file_name, union_data.options, + union_data.metadata); + } else { + reader = + make_shared_ptr(context, current_reader_data.file_to_be_opened, pq_options); + } + InitializeParquetReader(*reader, bind_data, parallel_state.column_ids, parallel_state.filters, + context, i, parallel_state.multi_file_reader_state); + } catch (...) { + parallel_lock.lock(); + parallel_state.error_opening_file = true; + throw; + } + + // Now re-lock the state and add the reader + parallel_lock.lock(); + current_reader_data.reader = std::move(reader); + current_reader_data.file_state = ParquetFileState::OPEN; + + return true; + } + } + + return false; + } +}; + static case_insensitive_map_t GetChildNameToTypeMap(const LogicalType &type) { case_insensitive_map_t name_to_type_map; switch (type.id()) { @@ -202,79 +1178,10 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids, } } -struct ParquetWriteBindData : public TableFunctionData { - vector sql_types; - vector column_names; - duckdb_parquet::CompressionCodec::type codec = duckdb_parquet::CompressionCodec::SNAPPY; - vector> kv_metadata; - idx_t row_group_size = DEFAULT_ROW_GROUP_SIZE; - idx_t row_group_size_bytes = NumericLimits::Maximum(); - - //! How/Whether to encrypt the data - shared_ptr encryption_config; - bool debug_use_openssl = true; - - //! After how many distinct values should we abandon dictionary compression and bloom filters? - idx_t dictionary_size_limit = row_group_size / 20; - - void SetToDefaultDictionarySizeLimit() { - // This depends on row group size so we should "reset" if the row group size is changed - dictionary_size_limit = row_group_size / 20; - } - - //! This is huge but we grow it starting from 1 MB - idx_t string_dictionary_page_size_limit = PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE; - - bool enable_bloom_filters = true; - //! What false positive rate are we willing to accept for bloom filters - double bloom_filter_false_positive_ratio = 0.01; - - //! After how many row groups to rotate to a new file - optional_idx row_groups_per_file; - - ChildFieldIDs field_ids; - //! The compression level, higher value is more - int64_t compression_level = ZStdFileSystem::DefaultCompressionLevel(); - - //! Which encodings to include when writing - ParquetVersion parquet_version = ParquetVersion::V1; -}; - -struct ParquetWriteGlobalState : public GlobalFunctionData { - unique_ptr writer; - optional_ptr op; - - void LogFlushingRowGroup(const ColumnDataCollection &buffer, const string &reason) { - if (!op) { - return; - } - DUCKDB_LOG(writer->GetContext(), PhysicalOperatorLogType, *op, "ParquetWriter", "FlushRowGroup", - {{"file", writer->GetFileName()}, - {"rows", to_string(buffer.Count())}, - {"size", to_string(buffer.SizeInBytes())}, - {"reason", reason}}); - } - - mutex lock; - unique_ptr combine_buffer; -}; - -struct ParquetWriteLocalState : public LocalFunctionData { - explicit ParquetWriteLocalState(ClientContext &context, const vector &types) : buffer(context, types) { - buffer.SetPartitionIndex(0); // Makes the buffer manager less likely to spill this data - buffer.InitializeAppend(append_state); - } - - ColumnDataCollection buffer; - ColumnDataAppendState append_state; -}; - -static unique_ptr ParquetWriteBind(ClientContext &context, CopyFunctionBindInput &input, - const vector &names, const vector &sql_types) { +unique_ptr ParquetWriteBind(ClientContext &context, CopyFunctionBindInput &input, + const vector &names, const vector &sql_types) { D_ASSERT(names.size() == sql_types.size()); bool row_group_size_bytes_set = false; - bool compression_level_set = false; - bool dictionary_size_limit_set = false; auto bind_data = make_uniq(); for (auto &option : input.info.options) { const auto loption = StringUtil::Lower(option.first); @@ -284,9 +1191,6 @@ static unique_ptr ParquetWriteBind(ClientContext &context, CopyFun } if (loption == "row_group_size" || loption == "chunk_size") { bind_data->row_group_size = option.second[0].GetValue(); - if (!dictionary_size_limit_set) { - bind_data->SetToDefaultDictionarySizeLimit(); - } } else if (loption == "row_group_size_bytes") { auto roption = option.second[0]; if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) { @@ -300,23 +1204,22 @@ static unique_ptr ParquetWriteBind(ClientContext &context, CopyFun } else if (loption == "compression" || loption == "codec") { const auto roption = StringUtil::Lower(option.second[0].ToString()); if (roption == "uncompressed") { - bind_data->codec = duckdb_parquet::CompressionCodec::UNCOMPRESSED; + bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED; } else if (roption == "snappy") { - bind_data->codec = duckdb_parquet::CompressionCodec::SNAPPY; + bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY; } else if (roption == "gzip") { - bind_data->codec = duckdb_parquet::CompressionCodec::GZIP; + bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP; } else if (roption == "zstd") { - bind_data->codec = duckdb_parquet::CompressionCodec::ZSTD; + bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD; } else if (roption == "brotli") { - bind_data->codec = duckdb_parquet::CompressionCodec::BROTLI; + bind_data->codec = duckdb_parquet::format::CompressionCodec::BROTLI; } else if (roption == "lz4" || roption == "lz4_raw") { /* LZ4 is technically another compression scheme, but deprecated and arrow also uses them * interchangeably */ - bind_data->codec = duckdb_parquet::CompressionCodec::LZ4_RAW; + bind_data->codec = duckdb_parquet::format::CompressionCodec::LZ4_RAW; } else { - throw BinderException( - "Expected %s argument to be any of [uncompressed, brotli, gzip, snappy, lz4, lz4_raw or zstd]", - loption); + throw BinderException("Expected %s argument to be either [uncompressed, brotli, gzip, snappy, or zstd]", + loption); } } else if (loption == "field_ids") { if (option.second[0].type().id() == LogicalTypeId::VARCHAR && @@ -343,7 +1246,7 @@ static unique_ptr ParquetWriteBind(ClientContext &context, CopyFun } auto values = StructValue::GetChildren(kv_struct); for (idx_t i = 0; i < values.size(); i++) { - auto &value = values[i]; + auto value = values[i]; auto key = StructType::GetChildName(kv_struct_type, i); // If the value is a blob, write the raw blob bytes // otherwise, cast to string @@ -356,30 +1259,14 @@ static unique_ptr ParquetWriteBind(ClientContext &context, CopyFun } else if (loption == "encryption_config") { bind_data->encryption_config = ParquetEncryptionConfig::Create(context, option.second[0]); } else if (loption == "dictionary_compression_ratio_threshold") { - // deprecated, ignore setting - } else if (loption == "dictionary_size_limit") { - auto val = option.second[0].GetValue(); - if (val < 0) { - throw BinderException("dictionary_size_limit must be greater than 0 or 0 to disable"); - } - bind_data->dictionary_size_limit = val; - dictionary_size_limit_set = true; - } else if (loption == "string_dictionary_page_size_limit") { - auto val = option.second[0].GetValue(); - if (val > PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE || val == 0) { - throw BinderException( - "string_dictionary_page_size_limit cannot be 0 and must be less than or equal to %llu", - PrimitiveColumnWriter::MAX_UNCOMPRESSED_DICT_PAGE_SIZE); - } - bind_data->string_dictionary_page_size_limit = val; - } else if (loption == "write_bloom_filter") { - bind_data->enable_bloom_filters = BooleanValue::Get(option.second[0].DefaultCastAs(LogicalType::BOOLEAN)); - } else if (loption == "bloom_filter_false_positive_ratio") { auto val = option.second[0].GetValue(); - if (val <= 0) { - throw BinderException("bloom_filter_false_positive_ratio must be greater than 0"); + if (val == -1) { + val = NumericLimits::Maximum(); + } else if (val < 0) { + throw BinderException("dictionary_compression_ratio_threshold must be greater than 0, or -1 to disable " + "dictionary compression"); } - bind_data->bloom_filter_false_positive_ratio = val; + bind_data->dictionary_compression_ratio_threshold = val; } else if (loption == "debug_use_openssl") { auto val = StringUtil::Lower(option.second[0].GetValue()); if (val == "false") { @@ -390,23 +1277,7 @@ static unique_ptr ParquetWriteBind(ClientContext &context, CopyFun throw BinderException("Expected debug_use_openssl to be a BOOLEAN"); } } else if (loption == "compression_level") { - const auto val = option.second[0].GetValue(); - if (val < ZStdFileSystem::MinimumCompressionLevel() || val > ZStdFileSystem::MaximumCompressionLevel()) { - throw BinderException("Compression level must be between %lld and %lld", - ZStdFileSystem::MinimumCompressionLevel(), - ZStdFileSystem::MaximumCompressionLevel()); - } - bind_data->compression_level = val; - compression_level_set = true; - } else if (loption == "parquet_version") { - const auto roption = StringUtil::Upper(option.second[0].ToString()); - if (roption == "V1") { - bind_data->parquet_version = ParquetVersion::V1; - } else if (roption == "V2") { - bind_data->parquet_version = ParquetVersion::V2; - } else { - throw BinderException("Expected parquet_version 'V1' or 'V2'"); - } + bind_data->compression_level = option.second[0].GetValue(); } else { throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str()); } @@ -416,10 +1287,9 @@ static unique_ptr ParquetWriteBind(ClientContext &context, CopyFun throw BinderException("ROW_GROUP_SIZE_BYTES does not work while preserving insertion order. Use \"SET " "preserve_insertion_order=false;\" to disable preserving insertion order."); } - } - - if (compression_level_set && bind_data->codec != CompressionCodec::ZSTD) { - throw BinderException("Compression level is only supported for the ZSTD compression codec"); + } else { + // We always set a max row group size bytes so we don't use too much memory + bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW; } bind_data->sql_types = sql_types; @@ -427,29 +1297,22 @@ static unique_ptr ParquetWriteBind(ClientContext &context, CopyFun return std::move(bind_data); } -static unique_ptr ParquetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data, - const string &file_path) { +unique_ptr ParquetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data, + const string &file_path) { auto global_state = make_uniq(); auto &parquet_bind = bind_data.Cast(); auto &fs = FileSystem::GetFileSystem(context); - global_state->writer = make_uniq( - context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec, - parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config, - parquet_bind.dictionary_size_limit, parquet_bind.string_dictionary_page_size_limit, - parquet_bind.enable_bloom_filters, parquet_bind.bloom_filter_false_positive_ratio, - parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version); + global_state->writer = + make_uniq(context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, + parquet_bind.codec, parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, + parquet_bind.encryption_config, parquet_bind.dictionary_compression_ratio_threshold, + parquet_bind.compression_level, parquet_bind.debug_use_openssl); return std::move(global_state); } -static void ParquetWriteGetWrittenStatistics(ClientContext &context, FunctionData &bind_data, - GlobalFunctionData &gstate, CopyFunctionFileStatistics &statistics) { - auto &global_state = gstate.Cast(); - global_state.writer->SetWrittenStatistics(statistics); -} - -static void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate, - LocalFunctionData &lstate, DataChunk &input) { +void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate, + LocalFunctionData &lstate, DataChunk &input) { auto &bind_data = bind_data_p.Cast(); auto &global_state = gstate.Cast(); auto &local_state = lstate.Cast(); @@ -459,9 +1322,6 @@ static void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_ if (local_state.buffer.Count() >= bind_data.row_group_size || local_state.buffer.SizeInBytes() >= bind_data.row_group_size_bytes) { - const string reason = - local_state.buffer.Count() >= bind_data.row_group_size ? "ROW_GROUP_SIZE" : "ROW_GROUP_SIZE_BYTES"; - global_state.LogFlushingRowGroup(local_state.buffer, reason); // if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file local_state.append_state.current_chunk_state.handles.clear(); global_state.writer->Flush(local_state.buffer); @@ -469,53 +1329,21 @@ static void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_ } } -static void ParquetWriteCombine(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { - auto &bind_data = bind_data_p.Cast(); +void ParquetWriteCombine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + LocalFunctionData &lstate) { auto &global_state = gstate.Cast(); auto &local_state = lstate.Cast(); - - if (local_state.buffer.Count() >= bind_data.row_group_size / 2 || - local_state.buffer.SizeInBytes() >= bind_data.row_group_size_bytes / 2) { - // local state buffer is more than half of the row_group_size(_bytes), just flush it - global_state.LogFlushingRowGroup(local_state.buffer, "Combine"); - global_state.writer->Flush(local_state.buffer); - return; - } - - unique_lock guard(global_state.lock); - if (global_state.combine_buffer) { - // There is still some data, combine it - global_state.combine_buffer->Combine(local_state.buffer); - if (global_state.combine_buffer->Count() >= bind_data.row_group_size / 2 || - global_state.combine_buffer->SizeInBytes() >= bind_data.row_group_size_bytes / 2) { - // After combining, the combine buffer is more than half of the row_group_size(_bytes), so we flush - auto owned_combine_buffer = std::move(global_state.combine_buffer); - guard.unlock(); - global_state.LogFlushingRowGroup(*owned_combine_buffer, "Combine"); - // Lock free, of course - global_state.writer->Flush(*owned_combine_buffer); - } - return; - } - - global_state.combine_buffer = make_uniq(context.client, local_state.buffer.Types()); - global_state.combine_buffer->Combine(local_state.buffer); + // flush any data left in the local state to the file + global_state.writer->Flush(local_state.buffer); } -static void ParquetWriteFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { +void ParquetWriteFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { auto &global_state = gstate.Cast(); - // flush the combine buffer (if it's there) - if (global_state.combine_buffer) { - global_state.LogFlushingRowGroup(*global_state.combine_buffer, "Finalize"); - global_state.writer->Flush(*global_state.combine_buffer); - } - // finalize: write any additional metadata to the file here global_state.writer->Finalize(); } -static unique_ptr ParquetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data_p) { +unique_ptr ParquetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data_p) { auto &bind_data = bind_data_p.Cast(); return make_uniq(context.client, bind_data.sql_types); } @@ -524,7 +1352,8 @@ static unique_ptr ParquetWriteInitializeLocal(ExecutionContex // FIXME: Have these be generated instead template <> -const char *EnumUtil::ToChars(duckdb_parquet::CompressionCodec::type value) { +const char *EnumUtil::ToChars( + duckdb_parquet::format::CompressionCodec::type value) { switch (value) { case CompressionCodec::UNCOMPRESSED: return "UNCOMPRESSED"; @@ -556,7 +1385,8 @@ const char *EnumUtil::ToChars(duckdb_par } template <> -duckdb_parquet::CompressionCodec::type EnumUtil::FromString(const char *value) { +duckdb_parquet::format::CompressionCodec::type +EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "UNCOMPRESSED")) { return CompressionCodec::UNCOMPRESSED; } @@ -584,46 +1414,6 @@ duckdb_parquet::CompressionCodec::type EnumUtil::FromString -const char *EnumUtil::ToChars(ParquetVersion value) { - switch (value) { - case ParquetVersion::V1: - return "V1"; - case ParquetVersion::V2: - return "V2"; - default: - throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); - } -} - -template <> -ParquetVersion EnumUtil::FromString(const char *value) { - if (StringUtil::Equals(value, "V1")) { - return ParquetVersion::V1; - } - if (StringUtil::Equals(value, "V2")) { - return ParquetVersion::V2; - } - throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); -} - -static optional_idx SerializeCompressionLevel(const int64_t compression_level) { - return compression_level < 0 ? NumericLimits::Maximum() - NumericCast(AbsValue(compression_level)) - : NumericCast(compression_level); -} - -static int64_t DeserializeCompressionLevel(const optional_idx compression_level) { - // Was originally an optional_idx, now int64_t, so we still serialize as such - if (!compression_level.IsValid()) { - return ZStdFileSystem::DefaultCompressionLevel(); - } - if (compression_level.GetIndex() > NumericCast(ZStdFileSystem::MaximumCompressionLevel())) { - // restore the negative compression level - return -NumericCast(NumericLimits::Maximum() - compression_level.GetIndex()); - } - return NumericCast(compression_level.GetIndex()); -} - static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bind_data_p, const CopyFunction &function) { auto &bind_data = bind_data_p.Cast(); @@ -636,63 +1426,30 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin serializer.WriteProperty(106, "field_ids", bind_data.field_ids); serializer.WritePropertyWithDefault>(107, "encryption_config", bind_data.encryption_config, nullptr); - - // 108 was dictionary_compression_ratio_threshold, but was deleted - - // To avoid doubly defining the default values in both ParquetWriteBindData and here, - // and possibly making a mistake, we just get the values from ParquetWriteBindData. - // We have to std::move them, otherwise MSVC will complain that it's not a "const T &&" - const auto compression_level = SerializeCompressionLevel(bind_data.compression_level); - D_ASSERT(DeserializeCompressionLevel(compression_level) == bind_data.compression_level); - ParquetWriteBindData default_value; - serializer.WritePropertyWithDefault(109, "compression_level", compression_level); - serializer.WritePropertyWithDefault(110, "row_groups_per_file", bind_data.row_groups_per_file, - default_value.row_groups_per_file); - serializer.WritePropertyWithDefault(111, "debug_use_openssl", bind_data.debug_use_openssl, - default_value.debug_use_openssl); - serializer.WritePropertyWithDefault(112, "dictionary_size_limit", bind_data.dictionary_size_limit, - default_value.dictionary_size_limit); - serializer.WritePropertyWithDefault(113, "bloom_filter_false_positive_ratio", - bind_data.bloom_filter_false_positive_ratio, - default_value.bloom_filter_false_positive_ratio); - serializer.WritePropertyWithDefault(114, "parquet_version", bind_data.parquet_version, - default_value.parquet_version); - serializer.WritePropertyWithDefault(115, "string_dictionary_page_size_limit", - bind_data.string_dictionary_page_size_limit, - default_value.string_dictionary_page_size_limit); + serializer.WriteProperty(108, "dictionary_compression_ratio_threshold", + bind_data.dictionary_compression_ratio_threshold); + serializer.WritePropertyWithDefault(109, "compression_level", bind_data.compression_level); + serializer.WriteProperty(110, "row_groups_per_file", bind_data.row_groups_per_file); + serializer.WriteProperty(111, "debug_use_openssl", bind_data.debug_use_openssl); } static unique_ptr ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) { auto data = make_uniq(); data->sql_types = deserializer.ReadProperty>(100, "sql_types"); data->column_names = deserializer.ReadProperty>(101, "column_names"); - data->codec = deserializer.ReadProperty(102, "codec"); + data->codec = deserializer.ReadProperty(102, "codec"); data->row_group_size = deserializer.ReadProperty(103, "row_group_size"); data->row_group_size_bytes = deserializer.ReadProperty(104, "row_group_size_bytes"); data->kv_metadata = deserializer.ReadProperty>>(105, "kv_metadata"); data->field_ids = deserializer.ReadProperty(106, "field_ids"); - deserializer.ReadPropertyWithExplicitDefault>( - 107, "encryption_config", data->encryption_config, std::move(ParquetWriteBindData().encryption_config)); - deserializer.ReadDeletedProperty(108, "dictionary_compression_ratio_threshold"); - - optional_idx compression_level; - deserializer.ReadPropertyWithDefault(109, "compression_level", compression_level); - data->compression_level = DeserializeCompressionLevel(compression_level); - D_ASSERT(SerializeCompressionLevel(data->compression_level) == compression_level); - ParquetWriteBindData default_value; - data->row_groups_per_file = deserializer.ReadPropertyWithExplicitDefault( - 110, "row_groups_per_file", default_value.row_groups_per_file); - data->debug_use_openssl = - deserializer.ReadPropertyWithExplicitDefault(111, "debug_use_openssl", default_value.debug_use_openssl); - data->dictionary_size_limit = deserializer.ReadPropertyWithExplicitDefault( - 112, "dictionary_size_limit", default_value.dictionary_size_limit); - data->bloom_filter_false_positive_ratio = deserializer.ReadPropertyWithExplicitDefault( - 113, "bloom_filter_false_positive_ratio", default_value.bloom_filter_false_positive_ratio); - data->parquet_version = - deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", default_value.parquet_version); - data->string_dictionary_page_size_limit = deserializer.ReadPropertyWithExplicitDefault( - 115, "string_dictionary_page_size_limit", default_value.string_dictionary_page_size_limit); - + deserializer.ReadPropertyWithExplicitDefault>(107, "encryption_config", + data->encryption_config, nullptr); + deserializer.ReadPropertyWithExplicitDefault(108, "dictionary_compression_ratio_threshold", + data->dictionary_compression_ratio_threshold, 1.0); + deserializer.ReadPropertyWithDefault(109, "compression_level", data->compression_level); + data->row_groups_per_file = + deserializer.ReadPropertyWithExplicitDefault(110, "row_groups_per_file", optional_idx::Invalid()); + data->debug_use_openssl = deserializer.ReadPropertyWithExplicitDefault(111, "debug_use_openssl", true); return std::move(data); } // LCOV_EXCL_STOP @@ -700,7 +1457,7 @@ static unique_ptr ParquetCopyDeserialize(Deserializer &deserialize //===--------------------------------------------------------------------===// // Execution Mode //===--------------------------------------------------------------------===// -static CopyFunctionExecutionMode ParquetWriteExecutionMode(bool preserve_insertion_order, bool supports_batch_index) { +CopyFunctionExecutionMode ParquetWriteExecutionMode(bool preserve_insertion_order, bool supports_batch_index) { if (!preserve_insertion_order) { return CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; } @@ -710,22 +1467,15 @@ static CopyFunctionExecutionMode ParquetWriteExecutionMode(bool preserve_inserti return CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE; } //===--------------------------------------------------------------------===// -// Initialize Logger -//===--------------------------------------------------------------------===// -static void ParquetWriteInitializeOperator(GlobalFunctionData &gstate, const PhysicalOperator &op) { - auto &global_state = gstate.Cast(); - global_state.op = &op; -} -//===--------------------------------------------------------------------===// // Prepare Batch //===--------------------------------------------------------------------===// struct ParquetWriteBatchData : public PreparedBatchData { PreparedRowGroup prepared_row_group; }; -static unique_ptr ParquetWritePrepareBatch(ClientContext &context, FunctionData &bind_data, - GlobalFunctionData &gstate, - unique_ptr collection) { +unique_ptr ParquetWritePrepareBatch(ClientContext &context, FunctionData &bind_data, + GlobalFunctionData &gstate, + unique_ptr collection) { auto &global_state = gstate.Cast(); auto result = make_uniq(); global_state.writer->PrepareRowGroup(*collection, result->prepared_row_group); @@ -735,8 +1485,8 @@ static unique_ptr ParquetWritePrepareBatch(ClientContext &con //===--------------------------------------------------------------------===// // Flush Batch //===--------------------------------------------------------------------===// -static void ParquetWriteFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - PreparedBatchData &batch_p) { +void ParquetWriteFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, + PreparedBatchData &batch_p) { auto &global_state = gstate.Cast(); auto &batch = batch_p.Cast(); global_state.writer->FlushRowGroup(batch.prepared_row_group); @@ -745,7 +1495,7 @@ static void ParquetWriteFlushBatch(ClientContext &context, FunctionData &bind_da //===--------------------------------------------------------------------===// // Desired Batch Size //===--------------------------------------------------------------------===// -static idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData &bind_data_p) { +idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData &bind_data_p) { auto &bind_data = bind_data_p.Cast(); return bind_data.row_group_size; } @@ -753,13 +1503,13 @@ static idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData & //===--------------------------------------------------------------------===// // File rotation //===--------------------------------------------------------------------===// -static bool ParquetWriteRotateFiles(FunctionData &bind_data_p, const optional_idx &file_size_bytes) { +bool ParquetWriteRotateFiles(FunctionData &bind_data_p, const optional_idx &file_size_bytes) { auto &bind_data = bind_data_p.Cast(); return file_size_bytes.IsValid() || bind_data.row_groups_per_file.IsValid(); } -static bool ParquetWriteRotateNextFile(GlobalFunctionData &gstate, FunctionData &bind_data_p, - const optional_idx &file_size_bytes) { +bool ParquetWriteRotateNextFile(GlobalFunctionData &gstate, FunctionData &bind_data_p, + const optional_idx &file_size_bytes) { auto &global_state = gstate.Cast(); auto &bind_data = bind_data_p.Cast(); if (file_size_bytes.IsValid() && global_state.writer->FileSize() > file_size_bytes.GetIndex()) { @@ -775,8 +1525,8 @@ static bool ParquetWriteRotateNextFile(GlobalFunctionData &gstate, FunctionData //===--------------------------------------------------------------------===// // Scan Replacement //===--------------------------------------------------------------------===// -static unique_ptr ParquetScanReplacement(ClientContext &context, ReplacementScanInput &input, - optional_ptr data) { +unique_ptr ParquetScanReplacement(ClientContext &context, ReplacementScanInput &input, + optional_ptr data) { auto table_name = ReplacementScan::GetFullPath(input); if (!ReplacementScan::CanReplace(table_name, {"parquet"})) { return nullptr; @@ -820,18 +1570,18 @@ static vector> ParquetWriteSelect(CopyToSelectInput &inpu for (auto &expr : input.select_list) { const auto &type = expr->return_type; - const auto &name = expr->GetAlias(); + const auto &name = expr->alias; // Spatial types need to be encoded into WKB when writing GeoParquet. // But dont perform this conversion if this is a EXPORT DATABASE statement if (input.copy_to_type == CopyToType::COPY_TO_FILE && type.id() == LogicalTypeId::BLOB && type.HasAlias() && - type.GetAlias() == "GEOMETRY" && GeoParquetFileMetadata::IsGeoParquetConversionEnabled(context)) { + type.GetAlias() == "GEOMETRY") { LogicalType wkb_blob_type(LogicalTypeId::BLOB); wkb_blob_type.SetAlias("WKB_BLOB"); auto cast_expr = BoundCastExpression::AddCastToType(context, std::move(expr), wkb_blob_type, false); - cast_expr->SetAlias(name); + cast_expr->alias = name; result.push_back(std::move(cast_expr)); any_change = true; } @@ -843,7 +1593,7 @@ static vector> ParquetWriteSelect(CopyToSelectInput &inpu // Cast the column to the new type auto cast_expr = BoundCastExpression::AddCastToType(context, std::move(expr), new_type, false); - cast_expr->SetAlias(name); + cast_expr->alias = name; result.push_back(std::move(cast_expr)); any_change = true; } @@ -856,7 +1606,7 @@ static vector> ParquetWriteSelect(CopyToSelectInput &inpu }); auto cast_expr = BoundCastExpression::AddCastToType(context, std::move(expr), new_type, false); - cast_expr->SetAlias(name); + cast_expr->alias = name; result.push_back(std::move(cast_expr)); any_change = true; } @@ -874,49 +1624,43 @@ static vector> ParquetWriteSelect(CopyToSelectInput &inpu return {}; } -static void LoadInternal(ExtensionLoader &loader) { - auto &db_instance = loader.GetDatabaseInstance(); - auto &fs = db_instance.GetFileSystem(); +void ParquetExtension::Load(DuckDB &db) { + auto &db_instance = *db.instance; + auto &fs = db.GetFileSystem(); fs.RegisterSubSystem(FileCompressionType::ZSTD, make_uniq()); auto scan_fun = ParquetScanFunction::GetFunctionSet(); scan_fun.name = "read_parquet"; - loader.RegisterFunction(scan_fun); + ExtensionUtil::RegisterFunction(db_instance, scan_fun); scan_fun.name = "parquet_scan"; - loader.RegisterFunction(scan_fun); + ExtensionUtil::RegisterFunction(db_instance, scan_fun); // parquet_metadata ParquetMetaDataFunction meta_fun; - loader.RegisterFunction(MultiFileReader::CreateFunctionSet(meta_fun)); + ExtensionUtil::RegisterFunction(db_instance, MultiFileReader::CreateFunctionSet(meta_fun)); // parquet_schema ParquetSchemaFunction schema_fun; - loader.RegisterFunction(MultiFileReader::CreateFunctionSet(schema_fun)); + ExtensionUtil::RegisterFunction(db_instance, MultiFileReader::CreateFunctionSet(schema_fun)); // parquet_key_value_metadata ParquetKeyValueMetadataFunction kv_meta_fun; - loader.RegisterFunction(MultiFileReader::CreateFunctionSet(kv_meta_fun)); + ExtensionUtil::RegisterFunction(db_instance, MultiFileReader::CreateFunctionSet(kv_meta_fun)); // parquet_file_metadata ParquetFileMetadataFunction file_meta_fun; - loader.RegisterFunction(MultiFileReader::CreateFunctionSet(file_meta_fun)); - - // parquet_bloom_probe - ParquetBloomProbeFunction bloom_probe_fun; - loader.RegisterFunction(MultiFileReader::CreateFunctionSet(bloom_probe_fun)); + ExtensionUtil::RegisterFunction(db_instance, MultiFileReader::CreateFunctionSet(file_meta_fun)); CopyFunction function("parquet"); function.copy_to_select = ParquetWriteSelect; function.copy_to_bind = ParquetWriteBind; function.copy_to_initialize_global = ParquetWriteInitializeGlobal; function.copy_to_initialize_local = ParquetWriteInitializeLocal; - function.copy_to_get_written_statistics = ParquetWriteGetWrittenStatistics; function.copy_to_sink = ParquetWriteSink; function.copy_to_combine = ParquetWriteCombine; function.copy_to_finalize = ParquetWriteFinalize; function.execution_mode = ParquetWriteExecutionMode; - function.initialize_operator = ParquetWriteInitializeOperator; - function.copy_from_bind = MultiFileFunction::MultiFileBindCopy; + function.copy_from_bind = ParquetScanFunction::ParquetReadBind; function.copy_from_function = scan_fun.functions[0]; function.prepare_batch = ParquetWritePrepareBatch; function.flush_batch = ParquetWriteFlushBatch; @@ -927,33 +1671,17 @@ static void LoadInternal(ExtensionLoader &loader) { function.deserialize = ParquetCopyDeserialize; function.extension = "parquet"; - loader.RegisterFunction(function); + ExtensionUtil::RegisterFunction(db_instance, function); // parquet_key auto parquet_key_fun = PragmaFunction::PragmaCall("add_parquet_key", ParquetCrypto::AddKey, {LogicalType::VARCHAR, LogicalType::VARCHAR}); - loader.RegisterFunction(parquet_key_fun); + ExtensionUtil::RegisterFunction(db_instance, parquet_key_fun); - auto &config = DBConfig::GetConfig(db_instance); + auto &config = DBConfig::GetConfig(*db.instance); config.replacement_scans.emplace_back(ParquetScanReplacement); config.AddExtensionOption("binary_as_string", "In Parquet files, interpret binary data as a string.", LogicalType::BOOLEAN); - config.AddExtensionOption("disable_parquet_prefetching", "Disable the prefetching mechanism in Parquet", - LogicalType::BOOLEAN, Value(false)); - config.AddExtensionOption("prefetch_all_parquet_files", - "Use the prefetching mechanism for all types of parquet files", LogicalType::BOOLEAN, - Value(false)); - config.AddExtensionOption("parquet_metadata_cache", - "Cache Parquet metadata - useful when reading the same files multiple times", - LogicalType::BOOLEAN, Value(false)); - config.AddExtensionOption( - "enable_geoparquet_conversion", - "Attempt to decode/encode geometry data in/as GeoParquet files if the spatial extension is present.", - LogicalType::BOOLEAN, Value::BOOLEAN(true)); -} - -void ParquetExtension::Load(ExtensionLoader &loader) { - LoadInternal(loader); } std::string ParquetExtension::Name() { @@ -973,8 +1701,17 @@ std::string ParquetExtension::Version() const { #ifdef DUCKDB_BUILD_LOADABLE_EXTENSION extern "C" { -DUCKDB_CPP_EXTENSION_ENTRY(parquet, loader) { // NOLINT - duckdb::LoadInternal(loader); +DUCKDB_EXTENSION_API void parquet_init(duckdb::DatabaseInstance &db) { // NOLINT + duckdb::DuckDB db_wrapper(db); + db_wrapper.LoadExtension(); +} + +DUCKDB_EXTENSION_API const char *parquet_version() { // NOLINT + return duckdb::DuckDB::LibraryVersion(); } } #endif + +#ifndef DUCKDB_EXTENSION_MAIN +#error DUCKDB_EXTENSION_MAIN not defined +#endif diff --git a/output/clickbench_clickbench-pixels-e0-1ssd-bufferpool.csv b/output/clickbench_clickbench-pixels-e0-1ssd-bufferpool.csv new file mode 100644 index 000000000000..29e9996fdb8d --- /dev/null +++ b/output/clickbench_clickbench-pixels-e0-1ssd-bufferpool.csv @@ -0,0 +1,39 @@ +Benchmark,Result +q01.benchmark,14.170343 +q02.benchmark,18.235225 +q03.benchmark,17.918017 +q04.benchmark,18.590176 +q05.benchmark,18.085582 +q06.benchmark,21.398551 +q07.benchmark,14.662313 +q08.benchmark,18.452269 +q09.benchmark,25.964728 +q10.benchmark,23.207817 +q12.benchmark,29.326574 +q14.benchmark,26.034131 +q15.benchmark,25.995267 +q16.benchmark,21.036475 +q17.benchmark,23.881076 +q18.benchmark,19.610187 +q20.benchmark,1.892908 +q21.benchmark,18.713718 +q22.benchmark,20.040169 +q24.benchmark,35.990437 +q25.benchmark,17.327961 +q26.benchmark,18.674748 +q27.benchmark,18.2733 +q28.benchmark,21.131392 +q29.benchmark,32.309784 +q30.benchmark,16.285378 +q31.benchmark,24.287072 +q32.benchmark,23.296018 +q33.benchmark,24.719883 +q34.benchmark,26.134639 +q35.benchmark,24.139121 +q36.benchmark,20.501649 +q37.benchmark,21.182907 +q38.benchmark,19.607548 +q39.benchmark,20.993701 +q41.benchmark,29.706234 +q42.benchmark,20.672568 +q43.benchmark,20.375316 diff --git a/output/clickbench_clickbench-pixels-e0-1ssd.csv b/output/clickbench_clickbench-pixels-e0-1ssd.csv new file mode 100644 index 000000000000..e2ccb4c5b682 --- /dev/null +++ b/output/clickbench_clickbench-pixels-e0-1ssd.csv @@ -0,0 +1,41 @@ +Benchmark,Result +q01.benchmark,15.757249 +q02.benchmark,17.88019 +q03.benchmark,17.803563 +q04.benchmark,18.881646 +q05.benchmark,19.278091 +q06.benchmark,25.791202 +q07.benchmark,17.348792 +q08.benchmark,13.90056 +q09.benchmark,24.698815 +q10.benchmark,18.513604 +q11.benchmark,21.241715 +q12.benchmark,22.487947 +q13.benchmark,21.529094 +q14.benchmark,24.010304 +q15.benchmark,17.388465 +q16.benchmark,17.736482 +q17.benchmark,24.737086 +q18.benchmark,19.782378 +q20.benchmark,1.860769 +q21.benchmark,19.385729 +q22.benchmark,20.277437 +q24.benchmark,34.585307 +q25.benchmark,14.936159 +q26.benchmark,19.068776 +q27.benchmark,19.986898 +q28.benchmark,20.763615 +q29.benchmark,28.146574 +q30.benchmark,14.654793 +q31.benchmark,23.073658 +q32.benchmark,22.842035 +q33.benchmark,25.913948 +q34.benchmark,24.281734 +q35.benchmark,20.988711 +q36.benchmark,23.113085 +q37.benchmark,19.954513 +q38.benchmark,21.038312 +q39.benchmark,19.999905 +q41.benchmark,21.787307 +q42.benchmark,19.86512 +q43.benchmark,19.52756 diff --git a/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool-adaptiveBufferHuge.csv b/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool-adaptiveBufferHuge.csv new file mode 100644 index 000000000000..3a35d447c476 --- /dev/null +++ b/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool-adaptiveBufferHuge.csv @@ -0,0 +1,44 @@ +基准测试,运行1时间(s) +q01.benchmark,6.756715 +q02.benchmark,6.040534 +q03.benchmark,9.231798 +q04.benchmark,9.597661 +q05.benchmark,8.709126 +q06.benchmark,14.875222 +q07.benchmark,9.427406 +q08.benchmark,5.838311 +q09.benchmark,15.767517 +q10.benchmark,17.206131 +q11.benchmark,9.705007 +q12.benchmark,9.340654 +q13.benchmark,12.063004 +q14.benchmark,16.741211 +q15.benchmark,15.651642 +q16.benchmark,12.110086 +q17.benchmark,19.556765 +q18.benchmark,16.058539 +q19.benchmark,37.000104 +q20.benchmark,60.492741 +q21.benchmark,12.410252 +q22.benchmark,13.414562 +q23.benchmark,17.367481 +q24.benchmark,118.239985 +q25.benchmark,11.049821 +q26.benchmark,10.848098 +q27.benchmark,10.395859 +q28.benchmark,13.955817 +q29.benchmark,156.594763 +q30.benchmark,8.341072 +q31.benchmark,14.92558 +q32.benchmark,15.889191 +q33.benchmark,48.28977 +q34.benchmark,36.700977 +q35.benchmark,34.657199 +q36.benchmark,19.70051 +q37.benchmark,13.621804 +q38.benchmark,13.910702 +q39.benchmark,13.953182 +q40.benchmark,16.301299 +q41.benchmark,10.188114 +q42.benchmark,9.724652 +q43.benchmark,9.757857 diff --git a/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool-hugePage.csv b/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool-hugePage.csv new file mode 100644 index 000000000000..d0b2f8ca3d04 --- /dev/null +++ b/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool-hugePage.csv @@ -0,0 +1,44 @@ +基准测试,运行1时间(s),运行2时间(s) +q01.benchmark,10.857089,8.663359 +q02.benchmark,10.108049,11.0025 +q03.benchmark,9.748754,8.61813 +q04.benchmark,9.656689,8.195546 +q05.benchmark,12.932583,9.762595 +q06.benchmark,16.104243,16.899872 +q07.benchmark,10.607191,9.244094 +q08.benchmark,10.658306,9.998812 +q09.benchmark,20.284643,19.40803 +q10.benchmark,21.725097,23.343475 +q11.benchmark,11.63739,10.156999 +q12.benchmark,14.188163,11.013176 +q13.benchmark,16.169114,16.534223 +q14.benchmark,17.420313,19.483231 +q15.benchmark,18.032115,20.303722 +q16.benchmark,11.176863,10.083203 +q17.benchmark,18.52442,25.228373 +q18.benchmark,22.454989,24.44547 +q19.benchmark,28.309607,46.393617 +q20.benchmark,47.935395,39.055504 +q21.benchmark,13.215374,10.954276 +q22.benchmark,16.407418,15.025946 +q23.benchmark,15.694893,13.58449 +q24.benchmark,99.23, +q25.benchmark,12.072065,11.790868 +q26.benchmark,11.104628,9.73385 +q27.benchmark,10.937917,9.422097 +q28.benchmark,13.223408,13.101511 +q29.benchmark,93.64,103.86 +q30.benchmark,8.651959,6.126088 +q31.benchmark,14.344111,17.28983 +q32.benchmark,18.339756,20.762325 +q33.benchmark,48.228582,64.819234 +q34.benchmark,33.609459,47.834219 +q35.benchmark,34.052621,46.116522 +q36.benchmark,21.919392,22.051715 +q37.benchmark,13.21605,10.642212 +q38.benchmark,13.46057,10.923967 +q39.benchmark,13.671733,11.598111 +q40.benchmark,17.421832,15.29518 +q41.benchmark,10.44098,9.055398 +q42.benchmark,11.918534,10.463748 +q43.benchmark,10.287251,9.263507 diff --git a/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool.csv b/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool.csv new file mode 100644 index 000000000000..6c48201c1193 --- /dev/null +++ b/output/clickbench_clickbench-pixels-e0-24ssd-bufferpool.csv @@ -0,0 +1,44 @@ +基准测试,运行1时间(s) +q01.benchmark,9.960111 +q02.benchmark,9.972535 +q03.benchmark,11.144014 +q04.benchmark,8.934958 +q05.benchmark,12.652611 +q06.benchmark,18.552889 +q07.benchmark,10.257991 +q08.benchmark,9.810876 +q09.benchmark,19.290831 +q10.benchmark,21.360243 +q11.benchmark,12.649589 +q12.benchmark,13.794907 +q13.benchmark,18.888252 +q14.benchmark,19.255475 +q15.benchmark,19.139204 +q16.benchmark,11.486169 +q17.benchmark,20.913438 +q18.benchmark,25.072152 +q19.benchmark,33.801798 +q20.benchmark,32.860525 +q21.benchmark,15.045354 +q22.benchmark,13.899289 +q23.benchmark,17.618443 +q24.benchmark,106.777443 +q25.benchmark,11.600403 +q26.benchmark,10.809833 +q27.benchmark,11.979763 +q28.benchmark,13.756713 +q29.benchmark,148.865063 +q30.benchmark,9.891047 +q31.benchmark,16.881571 +q32.benchmark,19.141262 +q33.benchmark,50.366383 +q34.benchmark,36.172594 +q35.benchmark,32.697396 +q36.benchmark,20.23665 +q37.benchmark,14.141651 +q38.benchmark,15.836678 +q39.benchmark,14.522101 +q40.benchmark,18.172415 +q41.benchmark,11.7471 +q42.benchmark,13.362848 +q43.benchmark,12.260929 diff --git a/output/clickbench_clickbench-pixels-e0-24ssd.csv b/output/clickbench_clickbench-pixels-e0-24ssd.csv new file mode 100644 index 000000000000..e693e64c0d74 --- /dev/null +++ b/output/clickbench_clickbench-pixels-e0-24ssd.csv @@ -0,0 +1,44 @@ +基准测试,运行1时间(s) +q01.benchmark,7.852805 +q02.benchmark,8.056973 +q03.benchmark,8.42737 +q04.benchmark,8.142538 +q05.benchmark,8.912361 +q06.benchmark,12.988397 +q07.benchmark,7.358807 +q08.benchmark,7.129896 +q09.benchmark,13.644677 +q10.benchmark,15.620047 +q11.benchmark,9.889439 +q12.benchmark,8.880246 +q13.benchmark,11.381375 +q14.benchmark,15.137141 +q15.benchmark,12.519452 +q16.benchmark,9.754749 +q17.benchmark,13.819838 +q18.benchmark,14.804112 +q19.benchmark,23.974969 +q20.benchmark,31.479802 +q21.benchmark,14.282062 +q22.benchmark,15.192575 +q23.benchmark,16.515804 +q24.benchmark,103.895648 +q25.benchmark,17.252769 +q26.benchmark,8.13275 +q27.benchmark,9.600408 +q28.benchmark,15.778551 +q29.benchmark,132.772617 +q30.benchmark,10.111426 +q31.benchmark,12.721485 +q32.benchmark,14.116281 +q33.benchmark,47.025313 +q34.benchmark,34.308253 +q35.benchmark,31.836802 +q36.benchmark,13.61468 +q37.benchmark,13.983056 +q38.benchmark,12.801865 +q39.benchmark,12.660945 +q40.benchmark,16.978885 +q41.benchmark,9.500931 +q42.benchmark,10.057429 +q43.benchmark,8.213721 diff --git a/output/clickbench_clickbench-pixels-e1-24ssd-bufferpool.csv b/output/clickbench_clickbench-pixels-e1-24ssd-bufferpool.csv new file mode 100644 index 000000000000..340cd3193f14 --- /dev/null +++ b/output/clickbench_clickbench-pixels-e1-24ssd-bufferpool.csv @@ -0,0 +1,32 @@ +基准测试,运行1时间(s),运行2时间(s) +q01.benchmark,10.609366,7.103682 +q02.benchmark,9.341101,7.010036 +q03.benchmark,9.771302,7.221 +q04.benchmark,7.00964,4.127526 +q05.benchmark,12.487794,9.607405 +q06.benchmark,15.129936,13.609044 +q07.benchmark,13.235259,9.664351 +q08.benchmark,11.86893,8.68719 +q09.benchmark,15.959701,14.218944 +q10.benchmark,14.765543,12.534628 +q11.benchmark,13.183948,10.618427 +q12.benchmark,13.12232,10.19688 +q13.benchmark,15.813632,15.569104 +q14.benchmark,18.101884,18.630187 +q15.benchmark,15.687634,13.984801 +q16.benchmark,13.069931,9.653705 +q17.benchmark,18.211465,22.227771 +q18.benchmark,19.633871,21.106532 +q19.benchmark,29.950502,42.228594 +q20.benchmark,31.977904,34.67877 +q25.benchmark,12.563817,9.720498 +q26.benchmark,11.10764,8.039424 +q27.benchmark,14.278821,10.738971 +q30.benchmark,13.422669,9.894111 +q31.benchmark,15.987175,17.039924 +q32.benchmark,17.505213,18.338463 +q33.benchmark,47.386751,60.108655 +q36.benchmark,15.291168,14.707605 +q41.benchmark,13.444679,10.446433 +q42.benchmark,14.091739,10.979192 +q43.benchmark,13.921057,10.301261 diff --git a/output/clickbench_clickbench-pixels-e1-24ssd.csv b/output/clickbench_clickbench-pixels-e1-24ssd.csv new file mode 100644 index 000000000000..43b16cd2faa4 --- /dev/null +++ b/output/clickbench_clickbench-pixels-e1-24ssd.csv @@ -0,0 +1,44 @@ +基准测试,运行1时间(s) +q01.benchmark,8.657719 +q02.benchmark,7.70587 +q03.benchmark,8.418271 +q04.benchmark,8.8138 +q05.benchmark,9.754054 +q06.benchmark,12.316148 +q07.benchmark,7.644759 +q08.benchmark,7.887868 +q09.benchmark,13.204128 +q10.benchmark,14.085691 +q11.benchmark,9.818135 +q12.benchmark,10.296963 +q13.benchmark,12.811088 +q14.benchmark,14.232562 +q15.benchmark,12.929595 +q16.benchmark,10.050905 +q17.benchmark,15.193953 +q18.benchmark,15.566835 +q19.benchmark,24.152288 +q20.benchmark,31.869508 +q21.benchmark,13.534349 +q22.benchmark,13.838152 +q23.benchmark,17.155116 +q24.benchmark,116.922025 +q25.benchmark,17.160522 +q26.benchmark,9.491498 +q27.benchmark,9.41551 +q28.benchmark,11.445051 +q29.benchmark,137.49975 +q30.benchmark,9.91012 +q31.benchmark,12.98745 +q32.benchmark,13.786434 +q33.benchmark,44.239044 +q34.benchmark,38.880274 +q35.benchmark,34.800796 +q36.benchmark,12.805081 +q37.benchmark,14.119722 +q38.benchmark,12.66686 +q39.benchmark,14.031654 +q40.benchmark,15.573581 +q41.benchmark,11.976214 +q42.benchmark,8.082963 +q43.benchmark,9.839117 diff --git a/src/common/multi_file_reader.cpp b/src/common/multi_file_reader.cpp new file mode 100644 index 000000000000..8ee543de5b3d --- /dev/null +++ b/src/common/multi_file_reader.cpp @@ -0,0 +1,575 @@ +#include "duckdb/common/multi_file_reader.hpp" + +#include "duckdb/common/exception.hpp" +#include "duckdb/common/hive_partitioning.hpp" +#include "duckdb/common/types.hpp" +#include "duckdb/common/types/value.hpp" +#include "duckdb/function/function_set.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/main/config.hpp" +#include "duckdb/planner/expression/bound_columnref_expression.hpp" +#include "duckdb/common/string_util.hpp" + +#include + +namespace duckdb { + +MultiFileReaderGlobalState::~MultiFileReaderGlobalState() { +} + +MultiFileReader::~MultiFileReader() { +} + +unique_ptr MultiFileReader::Create(const TableFunction &table_function) { + unique_ptr res; + if (table_function.get_multi_file_reader) { + res = table_function.get_multi_file_reader(); + res->function_name = table_function.name; + } else { + res = make_uniq(); + res->function_name = table_function.name; + } + return res; +} + +unique_ptr MultiFileReader::CreateDefault(const string &function_name) { + auto res = make_uniq(); + res->function_name = function_name; + return res; +} + +Value MultiFileReader::CreateValueFromFileList(const vector &file_list) { + vector files; + for (auto &file : file_list) { + files.push_back(file); + } + return Value::LIST(std::move(files)); +} + +void MultiFileReader::AddParameters(TableFunction &table_function) { + table_function.named_parameters["filename"] = LogicalType::ANY; + table_function.named_parameters["hive_partitioning"] = LogicalType::BOOLEAN; + table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN; + table_function.named_parameters["hive_types"] = LogicalType::ANY; + table_function.named_parameters["hive_types_autocast"] = LogicalType::BOOLEAN; +} + +vector MultiFileReader::ParsePaths(const Value &input) { + if (input.IsNull()) { + throw ParserException("%s cannot take NULL list as parameter", function_name); + } + + if (input.type().id() == LogicalTypeId::VARCHAR) { + return {StringValue::Get(input)}; + } else if (input.type().id() == LogicalTypeId::LIST) { + vector paths; + for (auto &val : ListValue::GetChildren(input)) { + if (val.IsNull()) { + throw ParserException("%s reader cannot take NULL input as parameter", function_name); + } + if (val.type().id() != LogicalTypeId::VARCHAR) { + throw ParserException("%s reader can only take a list of strings as a parameter", function_name); + } + paths.push_back(StringValue::Get(val)); + } + return paths; + } else { + throw InternalException("Unsupported type for MultiFileReader::ParsePaths called with: '%s'"); + } +} + +unique_ptr MultiFileReader::CreateFileList(ClientContext &context, const vector &paths, + FileGlobOptions options) { + auto &config = DBConfig::GetConfig(context); + if (!config.options.enable_external_access) { + throw PermissionException("Scanning %s files is disabled through configuration", function_name); + } +// vector result_files; + + auto res = make_uniq(context, paths, options); + if (res->GetExpandResult() == FileExpandResult::NO_FILES && options == FileGlobOptions::DISALLOW_EMPTY) { + throw IOException("%s needs at least one file to read", function_name); + } + return std::move(res); +} + +unique_ptr MultiFileReader::CreateFileList(ClientContext &context, const Value &input, + FileGlobOptions options) { + auto paths = ParsePaths(input); + vector files; + if(paths.size()==1){ + FileSystem &fs = FileSystem::GetFileSystem(context); + auto file_name=paths.get(0); + files = fs.GlobFiles(file_name, context, options); + } + return CreateFileList(context, files, options); +} + +bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options, + ClientContext &context) { + auto loption = StringUtil::Lower(key); + if (loption == "filename") { + if (val.type() == LogicalType::VARCHAR) { + // If not, we interpret it as the name of the column containing the filename + options.filename = true; + options.filename_column = StringValue::Get(val); + } else { + Value boolean_value; + string error_message; + if (val.DefaultTryCastAs(LogicalType::BOOLEAN, boolean_value, &error_message)) { + // If the argument can be cast to boolean, we just interpret it as a boolean + options.filename = BooleanValue::Get(boolean_value); + } + } + } else if (loption == "hive_partitioning") { + options.hive_partitioning = BooleanValue::Get(val); + options.auto_detect_hive_partitioning = false; + } else if (loption == "union_by_name") { + options.union_by_name = BooleanValue::Get(val); + } else if (loption == "hive_types_autocast" || loption == "hive_type_autocast") { + options.hive_types_autocast = BooleanValue::Get(val); + } else if (loption == "hive_types" || loption == "hive_type") { + if (val.type().id() != LogicalTypeId::STRUCT) { + throw InvalidInputException( + "'hive_types' only accepts a STRUCT('name':VARCHAR, ...), but '%s' was provided", + val.type().ToString()); + } + // verify that that all the children of the struct value are VARCHAR + auto &children = StructValue::GetChildren(val); + for (idx_t i = 0; i < children.size(); i++) { + const Value &child = children[i]; + if (child.type().id() != LogicalType::VARCHAR) { + throw InvalidInputException("hive_types: '%s' must be a VARCHAR, instead: '%s' was provided", + StructType::GetChildName(val.type(), i), child.type().ToString()); + } + // for every child of the struct, get the logical type + LogicalType transformed_type = TransformStringToLogicalType(child.ToString(), context); + const string &name = StructType::GetChildName(val.type(), i); + options.hive_types_schema[name] = transformed_type; + } + D_ASSERT(!options.hive_types_schema.empty()); + } else { + return false; + } + return true; +} + +unique_ptr MultiFileReader::ComplexFilterPushdown(ClientContext &context, MultiFileList &files, + const MultiFileReaderOptions &options, + MultiFilePushdownInfo &info, + vector> &filters) { + return files.ComplexFilterPushdown(context, options, info, filters); +} + +unique_ptr MultiFileReader::DynamicFilterPushdown(ClientContext &context, const MultiFileList &files, + const MultiFileReaderOptions &options, + const vector &names, + const vector &types, + const vector &column_ids, + TableFilterSet &filters) { + return files.DynamicFilterPushdown(context, options, names, types, column_ids, filters); +} + +bool MultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &files, vector &return_types, + vector &names, MultiFileReaderBindData &bind_data) { + // The Default MultiFileReader can not perform any binding as it uses MultiFileLists with no schema information. + return false; +} + +void MultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList &files, + vector &return_types, vector &names, + MultiFileReaderBindData &bind_data) { + // Add generated constant column for filename + if (options.filename) { + if (std::find(names.begin(), names.end(), options.filename_column) != names.end()) { + throw BinderException("Option filename adds column \"%s\", but a column with this name is also in the " + "file. Try setting a different name: filename=''", + options.filename_column); + } + bind_data.filename_idx = names.size(); + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back(options.filename_column); + } + + // Add generated constant columns from hive partitioning scheme + if (options.hive_partitioning) { + D_ASSERT(files.GetExpandResult() != FileExpandResult::NO_FILES); + auto partitions = HivePartitioning::Parse(files.GetFirstFile()); + // verify that all files have the same hive partitioning scheme + for (const auto &file : files.Files()) { + auto file_partitions = HivePartitioning::Parse(file); + for (auto &part_info : partitions) { + if (file_partitions.find(part_info.first) == file_partitions.end()) { + string error = "Hive partition mismatch between file \"%s\" and \"%s\": key \"%s\" not found"; + if (options.auto_detect_hive_partitioning == true) { + throw InternalException(error + "(hive partitioning was autodetected)", files.GetFirstFile(), + file, part_info.first); + } + throw BinderException(error.c_str(), files.GetFirstFile(), file, part_info.first); + } + } + if (partitions.size() != file_partitions.size()) { + string error_msg = "Hive partition mismatch between file \"%s\" and \"%s\""; + if (options.auto_detect_hive_partitioning == true) { + throw InternalException(error_msg + "(hive partitioning was autodetected)", files.GetFirstFile(), + file); + } + throw BinderException(error_msg.c_str(), files.GetFirstFile(), file); + } + } + + if (!options.hive_types_schema.empty()) { + // verify that all hive_types are existing partitions + options.VerifyHiveTypesArePartitions(partitions); + } + + for (auto &part : partitions) { + idx_t hive_partitioning_index; + auto lookup = std::find(names.begin(), names.end(), part.first); + if (lookup != names.end()) { + // hive partitioning column also exists in file - override + auto idx = NumericCast(lookup - names.begin()); + hive_partitioning_index = idx; + return_types[idx] = options.GetHiveLogicalType(part.first); + } else { + // hive partitioning column does not exist in file - add a new column containing the key + hive_partitioning_index = names.size(); + return_types.emplace_back(options.GetHiveLogicalType(part.first)); + names.emplace_back(part.first); + } + bind_data.hive_partitioning_indexes.emplace_back(part.first, hive_partitioning_index); + } + } +} + +void MultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, + const string &filename, const vector &local_names, + const vector &global_types, const vector &global_names, + const vector &global_column_ids, MultiFileReaderData &reader_data, + ClientContext &context, optional_ptr global_state) { + + // create a map of name -> column index + case_insensitive_map_t name_map; + if (file_options.union_by_name) { + for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) { + name_map[local_names[col_idx]] = col_idx; + } + } + for (idx_t i = 0; i < global_column_ids.size(); i++) { + auto column_id = global_column_ids[i]; + if (IsRowIdColumnId(column_id)) { + // row-id + reader_data.constant_map.emplace_back(i, Value::BIGINT(42)); + continue; + } + if (column_id == options.filename_idx) { + // filename + reader_data.constant_map.emplace_back(i, Value(filename)); + continue; + } + if (!options.hive_partitioning_indexes.empty()) { + // hive partition constants + auto partitions = HivePartitioning::Parse(filename); + D_ASSERT(partitions.size() == options.hive_partitioning_indexes.size()); + bool found_partition = false; + for (auto &entry : options.hive_partitioning_indexes) { + if (column_id == entry.index) { + Value value = file_options.GetHivePartitionValue(partitions[entry.value], entry.value, context); + reader_data.constant_map.emplace_back(i, value); + found_partition = true; + break; + } + } + if (found_partition) { + continue; + } + } + if (file_options.union_by_name) { + auto &global_name = global_names[column_id]; + auto entry = name_map.find(global_name); + bool not_present_in_file = entry == name_map.end(); + if (not_present_in_file) { + // we need to project a column with name \"global_name\" - but it does not exist in the current file + // push a NULL value of the specified type + reader_data.constant_map.emplace_back(i, Value(global_types[column_id])); + continue; + } + } + } +} + +unique_ptr +MultiFileReader::InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options, + const MultiFileReaderBindData &bind_data, const MultiFileList &file_list, + const vector &global_types, const vector &global_names, + const vector &global_column_ids) { + // By default, the multifilereader does not require any global state + return nullptr; +} + +void MultiFileReader::CreateNameMapping(const string &file_name, const vector &local_types, + const vector &local_names, const vector &global_types, + const vector &global_names, const vector &global_column_ids, + MultiFileReaderData &reader_data, const string &initial_file, + optional_ptr global_state) { + D_ASSERT(global_types.size() == global_names.size()); + D_ASSERT(local_types.size() == local_names.size()); + // we have expected types: create a map of name -> column index + case_insensitive_map_t name_map; + for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) { + name_map[local_names[col_idx]] = col_idx; + } + for (idx_t i = 0; i < global_column_ids.size(); i++) { + // check if this is a constant column + bool constant = false; + for (auto &entry : reader_data.constant_map) { + if (entry.column_id == i) { + constant = true; + break; + } + } + if (constant) { + // this column is constant for this file + continue; + } + // not constant - look up the column in the name map + auto global_id = global_column_ids[i]; + if (global_id >= global_types.size()) { + throw InternalException( + "MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file"); + } + auto &global_name = global_names[global_id]; + auto entry = name_map.find(global_name); + if (entry == name_map.end()) { + string candidate_names; + for (auto &local_name : local_names) { + if (!candidate_names.empty()) { + candidate_names += ", "; + } + candidate_names += local_name; + } + throw IOException( + StringUtil::Format("Failed to read file \"%s\": schema mismatch in glob: column \"%s\" was read from " + "the original file \"%s\", but could not be found in file \"%s\".\nCandidate names: " + "%s\nIf you are trying to " + "read files with different schemas, try setting union_by_name=True", + file_name, global_name, initial_file, file_name, candidate_names)); + } + // we found the column in the local file - check if the types are the same + auto local_id = entry->second; + D_ASSERT(global_id < global_types.size()); + D_ASSERT(local_id < local_types.size()); + auto &global_type = global_types[global_id]; + auto &local_type = local_types[local_id]; + if (global_type != local_type) { + reader_data.cast_map[local_id] = global_type; + } + // the types are the same - create the mapping + reader_data.column_mapping.push_back(i); + reader_data.column_ids.push_back(local_id); + } + + reader_data.empty_columns = reader_data.column_ids.empty(); +} + +void MultiFileReader::CreateMapping(const string &file_name, const vector &local_types, + const vector &local_names, const vector &global_types, + const vector &global_names, const vector &global_column_ids, + optional_ptr filters, MultiFileReaderData &reader_data, + const string &initial_file, const MultiFileReaderBindData &options, + optional_ptr global_state) { + CreateNameMapping(file_name, local_types, local_names, global_types, global_names, global_column_ids, reader_data, + initial_file, global_state); + CreateFilterMap(global_types, filters, reader_data, global_state); +} + +void MultiFileReader::CreateFilterMap(const vector &global_types, optional_ptr filters, + MultiFileReaderData &reader_data, + optional_ptr global_state) { + if (filters) { + auto filter_map_size = global_types.size(); + if (global_state) { + filter_map_size += global_state->extra_columns.size(); + } + reader_data.filter_map.resize(filter_map_size); + + for (idx_t c = 0; c < reader_data.column_mapping.size(); c++) { + auto map_index = reader_data.column_mapping[c]; + reader_data.filter_map[map_index].index = c; + reader_data.filter_map[map_index].is_constant = false; + } + for (idx_t c = 0; c < reader_data.constant_map.size(); c++) { + auto constant_index = reader_data.constant_map[c].column_id; + reader_data.filter_map[constant_index].index = c; + reader_data.filter_map[constant_index].is_constant = true; + } + } +} + +void MultiFileReader::FinalizeChunk(ClientContext &context, const MultiFileReaderBindData &bind_data, + const MultiFileReaderData &reader_data, DataChunk &chunk, + optional_ptr global_state) { + // reference all the constants set up in MultiFileReader::FinalizeBind + for (auto &entry : reader_data.constant_map) { + chunk.data[entry.column_id].Reference(entry.value); + } + chunk.Verify(); +} + +TableFunctionSet MultiFileReader::CreateFunctionSet(TableFunction table_function) { + TableFunctionSet function_set(table_function.name); + function_set.AddFunction(table_function); + D_ASSERT(table_function.arguments.size() == 1 && table_function.arguments[0] == LogicalType::VARCHAR); + table_function.arguments[0] = LogicalType::LIST(LogicalType::VARCHAR); + function_set.AddFunction(std::move(table_function)); + return function_set; +} + +HivePartitioningIndex::HivePartitioningIndex(string value_p, idx_t index) : value(std::move(value_p)), index(index) { +} + +void MultiFileReaderOptions::AddBatchInfo(BindInfo &bind_info) const { + bind_info.InsertOption("filename", Value(filename_column)); + bind_info.InsertOption("hive_partitioning", Value::BOOLEAN(hive_partitioning)); + bind_info.InsertOption("auto_detect_hive_partitioning", Value::BOOLEAN(auto_detect_hive_partitioning)); + bind_info.InsertOption("union_by_name", Value::BOOLEAN(union_by_name)); + bind_info.InsertOption("hive_types_autocast", Value::BOOLEAN(hive_types_autocast)); +} + +void UnionByName::CombineUnionTypes(const vector &col_names, const vector &sql_types, + vector &union_col_types, vector &union_col_names, + case_insensitive_map_t &union_names_map) { + D_ASSERT(col_names.size() == sql_types.size()); + + for (idx_t col = 0; col < col_names.size(); ++col) { + auto union_find = union_names_map.find(col_names[col]); + + if (union_find != union_names_map.end()) { + // given same name , union_col's type must compatible with col's type + auto ¤t_type = union_col_types[union_find->second]; + auto compatible_type = LogicalType::ForceMaxLogicalType(current_type, sql_types[col]); + union_col_types[union_find->second] = compatible_type; + } else { + union_names_map[col_names[col]] = union_col_names.size(); + union_col_names.emplace_back(col_names[col]); + union_col_types.emplace_back(sql_types[col]); + } + } +} + +bool MultiFileReaderOptions::AutoDetectHivePartitioningInternal(MultiFileList &files, ClientContext &context) { + auto first_file = files.GetFirstFile(); + auto partitions = HivePartitioning::Parse(first_file); + if (partitions.empty()) { + // no partitions found in first file + return false; + } + + for (const auto &file : files.Files()) { + auto new_partitions = HivePartitioning::Parse(file); + if (new_partitions.size() != partitions.size()) { + // partition count mismatch + return false; + } + for (auto &part : new_partitions) { + auto entry = partitions.find(part.first); + if (entry == partitions.end()) { + // differing partitions between files + return false; + } + } + } + return true; +} +void MultiFileReaderOptions::AutoDetectHiveTypesInternal(MultiFileList &files, ClientContext &context) { + const LogicalType candidates[] = {LogicalType::DATE, LogicalType::TIMESTAMP, LogicalType::BIGINT}; + + unordered_map detected_types; + for (const auto &file : files.Files()) { + auto partitions = HivePartitioning::Parse(file); + if (partitions.empty()) { + return; + } + + for (auto &part : partitions) { + const string &name = part.first; + if (hive_types_schema.find(name) != hive_types_schema.end()) { + // type was explicitly provided by the user + continue; + } + LogicalType detected_type = LogicalType::VARCHAR; + Value value(part.second); + for (auto &candidate : candidates) { + const bool success = value.TryCastAs(context, candidate, true); + if (success) { + detected_type = candidate; + break; + } + } + auto entry = detected_types.find(name); + if (entry == detected_types.end()) { + // type was not yet detected - insert it + detected_types.insert(make_pair(name, std::move(detected_type))); + } else { + // type was already detected - check if the type matches + // if not promote to VARCHAR + if (entry->second != detected_type) { + entry->second = LogicalType::VARCHAR; + } + } + } + } + for (auto &entry : detected_types) { + hive_types_schema.insert(make_pair(entry.first, std::move(entry.second))); + } +} +void MultiFileReaderOptions::AutoDetectHivePartitioning(MultiFileList &files, ClientContext &context) { + D_ASSERT(files.GetExpandResult() != FileExpandResult::NO_FILES); + const bool hp_explicitly_disabled = !auto_detect_hive_partitioning && !hive_partitioning; + const bool ht_enabled = !hive_types_schema.empty(); + if (hp_explicitly_disabled && ht_enabled) { + throw InvalidInputException("cannot disable hive_partitioning when hive_types is enabled"); + } + if (ht_enabled && auto_detect_hive_partitioning && !hive_partitioning) { + // hive_types flag implies hive_partitioning + hive_partitioning = true; + auto_detect_hive_partitioning = false; + } + if (auto_detect_hive_partitioning) { + hive_partitioning = AutoDetectHivePartitioningInternal(files, context); + } + if (hive_partitioning && hive_types_autocast) { + AutoDetectHiveTypesInternal(files, context); + } +} +void MultiFileReaderOptions::VerifyHiveTypesArePartitions(const std::map &partitions) const { + for (auto &hive_type : hive_types_schema) { + if (partitions.find(hive_type.first) == partitions.end()) { + throw InvalidInputException("Unknown hive_type: \"%s\" does not appear to be a partition", hive_type.first); + } + } +} +LogicalType MultiFileReaderOptions::GetHiveLogicalType(const string &hive_partition_column) const { + if (!hive_types_schema.empty()) { + auto it = hive_types_schema.find(hive_partition_column); + if (it != hive_types_schema.end()) { + return it->second; + } + } + return LogicalType::VARCHAR; +} + +bool MultiFileReaderOptions::AnySet() { + return filename || hive_partitioning || union_by_name; +} + +Value MultiFileReaderOptions::GetHivePartitionValue(const string &value, const string &key, + ClientContext &context) const { + auto it = hive_types_schema.find(key); + if (it == hive_types_schema.end()) { + return HivePartitioning::GetValue(context, key, value, LogicalType::VARCHAR); + } + return HivePartitioning::GetValue(context, key, value, it->second); +} + +} // namespace duckdb diff --git a/src/common/types/vector.cpp b/src/common/types/vector.cpp index 7991f08b38fc..9ec81b048787 100644 --- a/src/common/types/vector.cpp +++ b/src/common/types/vector.cpp @@ -15,20 +15,20 @@ #include "duckdb/common/types/sel_cache.hpp" #include "duckdb/common/types/value.hpp" #include "duckdb/common/types/value_map.hpp" -#include "duckdb/common/types/varint.hpp" #include "duckdb/common/types/vector_cache.hpp" #include "duckdb/common/uhugeint.hpp" #include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/function/scalar/nested_functions.hpp" #include "duckdb/storage/buffer/buffer_handle.hpp" #include "duckdb/storage/string_uncompressed.hpp" -#include "duckdb/common/types/uuid.hpp" #include "fsst.h" +#include "duckdb/common/types/varint.hpp" #include // strlen() on Solaris + namespace duckdb { -UnifiedVectorFormat::UnifiedVectorFormat() : sel(nullptr), data(nullptr), physical_type(PhysicalType::INVALID) { +UnifiedVectorFormat::UnifiedVectorFormat() : sel(nullptr), data(nullptr) { } UnifiedVectorFormat::UnifiedVectorFormat(UnifiedVectorFormat &&other) noexcept : sel(nullptr), data(nullptr) { @@ -37,7 +37,6 @@ UnifiedVectorFormat::UnifiedVectorFormat(UnifiedVectorFormat &&other) noexcept : std::swap(data, other.data); std::swap(validity, other.validity); std::swap(owned_sel, other.owned_sel); - std::swap(physical_type, other.physical_type); if (refers_to_self) { sel = &owned_sel; } @@ -49,17 +48,16 @@ UnifiedVectorFormat &UnifiedVectorFormat::operator=(UnifiedVectorFormat &&other) std::swap(data, other.data); std::swap(validity, other.validity); std::swap(owned_sel, other.owned_sel); - std::swap(physical_type, other.physical_type); if (refers_to_self) { sel = &owned_sel; } return *this; } -Vector::Vector(LogicalType type_p, bool create_data, bool initialize_to_zero, idx_t capacity) +Vector::Vector(LogicalType type_p, bool create_data, bool zero_data, idx_t capacity) : vector_type(VectorType::FLAT_VECTOR), type(std::move(type_p)), data(nullptr), validity(capacity) { if (create_data) { - Initialize(initialize_to_zero, capacity); + Initialize(zero_data, capacity); } } @@ -73,9 +71,9 @@ Vector::Vector(LogicalType type_p, data_ptr_t dataptr) } } -Vector::Vector(LogicalType type_p, data_ptr_t dataptr, validity_t * validity_mask,idx_t capacity) +Vector::Vector(LogicalType type_p, data_ptr_t dataptr, validity_t * validity_mask) : vector_type(VectorType::FLAT_VECTOR), type(std::move(type_p)), data(dataptr), - validity(ValidityMask(validity_mask,capacity)) { + validity(validity_mask) { if (dataptr && !type.IsValid()) { throw InternalException("Cannot create a vector of type INVALID!"); } @@ -155,9 +153,10 @@ void Vector::ReferenceAndSetType(const Vector &other) { void Vector::Reinterpret(const Vector &other) { vector_type = other.vector_type; +#ifdef DEBUG auto &this_type = GetType(); auto &other_type = other.GetType(); -#ifdef DEBUG + auto type_is_same = other_type == this_type; bool this_is_nested = this_type.IsNested(); bool other_is_nested = other_type.IsNested(); @@ -170,13 +169,7 @@ void Vector::Reinterpret(const Vector &other) { D_ASSERT((not_nested && type_size_equal) || type_is_same); #endif AssignSharedPointer(buffer, other.buffer); - if (vector_type == VectorType::DICTIONARY_VECTOR && other_type != this_type) { - Vector new_vector(GetType(), nullptr); - new_vector.Reinterpret(DictionaryVector::Child(other)); - auxiliary = make_shared_ptr(std::move(new_vector)); - } else { - AssignSharedPointer(auxiliary, other.auxiliary); - } + AssignSharedPointer(auxiliary, other.auxiliary); data = other.data; validity = other.validity; } @@ -246,8 +239,6 @@ void Vector::Slice(const SelectionVector &sel, idx_t count) { if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { // already a dictionary, slice the current dictionary auto ¤t_sel = DictionaryVector::SelVector(*this); - auto dictionary_size = DictionaryVector::DictionarySize(*this); - auto dictionary_id = DictionaryVector::DictionaryId(*this); auto sliced_dictionary = current_sel.Slice(sel, count); buffer = make_buffer(std::move(sliced_dictionary)); if (GetType().InternalType() == PhysicalType::STRUCT) { @@ -257,11 +248,6 @@ void Vector::Slice(const SelectionVector &sel, idx_t count) { new_child.auxiliary = make_buffer(new_child, sel, count); auxiliary = make_buffer(std::move(new_child)); } - if (dictionary_size.IsValid()) { - auto &dict_buffer = buffer->Cast(); - dict_buffer.SetDictionarySize(dictionary_size.GetIndex()); - dict_buffer.SetDictionaryId(std::move(dictionary_id)); - } return; } @@ -282,25 +268,11 @@ void Vector::Slice(const SelectionVector &sel, idx_t count) { auxiliary = std::move(child_ref); } -void Vector::Dictionary(idx_t dictionary_size, const SelectionVector &sel, idx_t count) { - Slice(sel, count); - if (GetVectorType() == VectorType::DICTIONARY_VECTOR) { - buffer->Cast().SetDictionarySize(dictionary_size); - } -} - -void Vector::Dictionary(const Vector &dict, idx_t dictionary_size, const SelectionVector &sel, idx_t count) { - Reference(dict); - Dictionary(dictionary_size, sel, count); -} - void Vector::Slice(const SelectionVector &sel, idx_t count, SelCache &cache) { if (GetVectorType() == VectorType::DICTIONARY_VECTOR && GetType().InternalType() != PhysicalType::STRUCT) { // dictionary vector: need to merge dictionaries // check if we have a cached entry auto ¤t_sel = DictionaryVector::SelVector(*this); - auto dictionary_size = DictionaryVector::DictionarySize(*this); - auto dictionary_id = DictionaryVector::DictionaryId(*this); auto target_data = current_sel.data(); auto entry = cache.cache.find(target_data); if (entry != cache.cache.end()) { @@ -311,17 +283,12 @@ void Vector::Slice(const SelectionVector &sel, idx_t count, SelCache &cache) { Slice(sel, count); cache.cache[target_data] = this->buffer; } - if (dictionary_size.IsValid()) { - auto &dict_buffer = buffer->Cast(); - dict_buffer.SetDictionarySize(dictionary_size.GetIndex()); - dict_buffer.SetDictionaryId(std::move(dictionary_id)); - } } else { Slice(sel, count); } } -void Vector::Initialize(bool initialize_to_zero, idx_t capacity) { +void Vector::Initialize(bool zero_data, idx_t capacity) { auxiliary.reset(); validity.Reset(); auto &type = GetType(); @@ -340,13 +307,13 @@ void Vector::Initialize(bool initialize_to_zero, idx_t capacity) { if (type_size > 0) { buffer = VectorBuffer::CreateStandardVector(type, capacity); data = buffer->GetData(); - if (initialize_to_zero) { + if (zero_data) { memset(data, 0, capacity * type_size); } } - if (capacity > validity.Capacity()) { - validity.Resize(capacity); + if (capacity > validity.TargetCount()) { + validity.Resize(validity.TargetCount(), capacity); } } @@ -403,7 +370,7 @@ void Vector::Resize(idx_t current_size, idx_t new_size) { for (auto &resize_info_entry : resize_infos) { // Resize the validity mask. auto new_validity_size = new_size * resize_info_entry.multiplier; - resize_info_entry.vec.validity.Resize(new_validity_size); + resize_info_entry.vec.validity.Resize(current_size, new_validity_size); // For nested data types, we only need to resize the validity mask. if (!resize_info_entry.data) { @@ -422,9 +389,7 @@ void Vector::Resize(idx_t current_size, idx_t new_size) { } // Copy the data buffer to a resized buffer. - auto stored_allocator = resize_info_entry.buffer->GetAllocator(); - auto new_data = stored_allocator ? stored_allocator->Allocate(target_size) - : Allocator::DefaultAllocator().Allocate(target_size); + auto new_data = make_unsafe_uniq_array_uninitialized(target_size); memcpy(new_data.get(), resize_info_entry.data, old_size); resize_info_entry.buffer->SetData(std::move(new_data)); resize_info_entry.vec.data = resize_info_entry.buffer->GetData(); @@ -451,6 +416,7 @@ void Vector::SetValue(idx_t index, const Value &val) { } D_ASSERT(val.IsNull() || (val.type().InternalType() == GetType().InternalType())); + validity.EnsureWritable(); validity.Set(index, !val.IsNull()); auto physical_type = GetType().InternalType(); if (val.IsNull() && !IsStructOrArrayRecursive(GetType())) { @@ -596,8 +562,7 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) { case VectorType::SEQUENCE_VECTOR: { int64_t start, increment; SequenceVector::GetSequence(*vector, start, increment); - return Value::Numeric(vector->GetType(), - start + static_cast(static_cast(increment) * index)); + return Value::Numeric(vector->GetType(), start + increment * NumericCast(index)); } default: throw InternalException("Unimplemented vector type for Vector::GetValue"); @@ -618,16 +583,9 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) { auto str_compressed = reinterpret_cast(data)[index]; auto decoder = FSSTVector::GetDecoder(*vector); auto &decompress_buffer = FSSTVector::GetDecompressBuffer(*vector); - auto string_val = FSSTPrimitives::DecompressValue(decoder, str_compressed.GetData(), str_compressed.GetSize(), - decompress_buffer); - switch (vector->GetType().id()) { - case LogicalTypeId::VARCHAR: - return Value(std::move(string_val)); - case LogicalTypeId::BLOB: - return Value::BLOB_RAW(string_val); - default: - throw InternalException("Unsupported vector type for FSST vector"); - } + Value result = FSSTPrimitives::DecompressValue(decoder, str_compressed.GetData(), str_compressed.GetSize(), + decompress_buffer); + return result; } switch (vector->GetType().id()) { @@ -643,8 +601,6 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) { return Value::DATE(reinterpret_cast(data)[index]); case LogicalTypeId::TIME: return Value::TIME(reinterpret_cast(data)[index]); - case LogicalTypeId::TIME_NS: - return Value::TIME_NS(reinterpret_cast(data)[index]); case LogicalTypeId::TIME_TZ: return Value::TIMETZ(reinterpret_cast(data)[index]); case LogicalTypeId::BIGINT: @@ -660,13 +616,13 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) { case LogicalTypeId::TIMESTAMP: return Value::TIMESTAMP(reinterpret_cast(data)[index]); case LogicalTypeId::TIMESTAMP_NS: - return Value::TIMESTAMPNS(reinterpret_cast(data)[index]); + return Value::TIMESTAMPNS(reinterpret_cast(data)[index]); case LogicalTypeId::TIMESTAMP_MS: - return Value::TIMESTAMPMS(reinterpret_cast(data)[index]); + return Value::TIMESTAMPMS(reinterpret_cast(data)[index]); case LogicalTypeId::TIMESTAMP_SEC: - return Value::TIMESTAMPSEC(reinterpret_cast(data)[index]); + return Value::TIMESTAMPSEC(reinterpret_cast(data)[index]); case LogicalTypeId::TIMESTAMP_TZ: - return Value::TIMESTAMPTZ(reinterpret_cast(data)[index]); + return Value::TIMESTAMPTZ(reinterpret_cast(data)[index]); case LogicalTypeId::HUGEINT: return Value::HUGEINT(reinterpret_cast(data)[index]); case LogicalTypeId::UHUGEINT: @@ -778,7 +734,7 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) { for (idx_t i = offset; i < offset + stride; i++) { children.push_back(child_vec.GetValue(i)); } - return Value::ARRAY(ArrayType::GetChildType(type), std::move(children)); + return Value::ARRAY(std::move(children)); } default: throw InternalException("Unimplemented type for value access"); @@ -847,8 +803,7 @@ string Vector::ToString(idx_t count) const { int64_t start, increment; SequenceVector::GetSequence(*this, start, increment); for (idx_t i = 0; i < count; i++) { - retval += to_string(start + static_cast(static_cast(increment) * i)) + - (i == count - 1 ? "" : ", "); + retval += to_string(start + increment * UnsafeNumericCast(i)) + (i == count - 1 ? "" : ", "); } break; } @@ -940,27 +895,6 @@ void Vector::Flatten(idx_t count) { switch (GetVectorType()) { case VectorType::FLAT_VECTOR: // already a flat vector - switch (GetType().InternalType()) { - case PhysicalType::STRUCT: { - auto &entries = StructVector::GetEntries(*this); - for (auto &entry : entries) { - entry->Flatten(count); - } - break; - } - case PhysicalType::LIST: { - auto &entry = ListVector::GetEntry(*this); - entry.Flatten(ListVector::GetListSize(*this)); - break; - } - case PhysicalType::ARRAY: { - auto &entry = ArrayVector::GetEntry(*this); - entry.Flatten(ArrayVector::GetTotalSize(*this)); - break; - } - default: - break; - } break; case VectorType::FSST_VECTOR: { // Even though count may only be a part of the vector, we need to flatten the whole thing due to the way @@ -1171,7 +1105,6 @@ void Vector::Flatten(const SelectionVector &sel, idx_t count) { } void Vector::ToUnifiedFormat(idx_t count, UnifiedVectorFormat &format) { - format.physical_type = GetType().InternalType(); switch (GetVectorType()) { case VectorType::DICTIONARY_VECTOR: { auto &sel = DictionaryVector::SelVector(*this); @@ -1249,61 +1182,10 @@ void Vector::Sequence(int64_t start, int64_t increment, idx_t count) { } // FIXME: This should ideally be const -void Vector::Serialize(Serializer &serializer, idx_t count, bool compressed_serialization) { +void Vector::Serialize(Serializer &serializer, idx_t count) { auto &logical_type = GetType(); UnifiedVectorFormat vdata; - - // serialize compressed vectors to save space, but skip this if serializing into older versions - if (!serializer.ShouldSerialize(5)) { - compressed_serialization = false; - } - if (compressed_serialization) { - auto vtype = GetVectorType(); - if (vtype == VectorType::DICTIONARY_VECTOR && DictionaryVector::DictionarySize(*this).IsValid()) { - auto dict = DictionaryVector::Child(*this); - if (dict.GetVectorType() == VectorType::FLAT_VECTOR) { - idx_t dict_count = DictionaryVector::DictionarySize(*this).GetIndex(); - auto old_sel = DictionaryVector::SelVector(*this); - SelectionVector new_sel(count), used_sel(count), map_sel(dict_count); - - // dictionaries may be large (row-group level). A vector may use only a small part. - // So, restrict dict to the used_sel subset & remap old_sel into new_sel to the new dict positions - sel_t CODE_UNSEEN = static_cast(dict_count); - for (sel_t i = 0; i < dict_count; ++i) { - map_sel[i] = CODE_UNSEEN; // initialize with unused marker - } - idx_t used_count = 0; - for (idx_t i = 0; i < count; ++i) { - auto pos = old_sel[i]; - if (map_sel[pos] == CODE_UNSEEN) { - map_sel[pos] = static_cast(used_count); - used_sel[used_count++] = pos; - } - new_sel[i] = map_sel[pos]; - } - if (used_count * 2 < count) { // only serialize as a dict vector if that makes things smaller - auto sel_data = reinterpret_cast(new_sel.data()); - dict.Slice(used_sel, used_count); - serializer.WriteProperty(90, "vector_type", VectorType::DICTIONARY_VECTOR); - serializer.WriteProperty(91, "sel_vector", sel_data, sizeof(sel_t) * count); - serializer.WriteProperty(92, "dict_count", used_count); - return dict.Serialize(serializer, used_count, false); - } - } - } else if (vtype == VectorType::CONSTANT_VECTOR && count >= 1) { - serializer.WriteProperty(90, "vector_type", VectorType::CONSTANT_VECTOR); - return Vector::Serialize(serializer, 1, false); // just serialize one value - } else if (vtype == VectorType::SEQUENCE_VECTOR) { - serializer.WriteProperty(90, "vector_type", VectorType::SEQUENCE_VECTOR); - auto data = reinterpret_cast(buffer->GetData()); - serializer.WriteProperty(91, "seq_start", data[0]); - serializer.WriteProperty(92, "seq_increment", data[1]); - return; // for sequence vectors we do not serialize anything else - } else { - // TODO: other compressed vector types (FSST) - } - } ToUnifiedFormat(count, vdata); const bool has_validity_mask = (count > 0) && !vdata.validity.AllValid(); @@ -1342,8 +1224,7 @@ void Vector::Serialize(Serializer &serializer, idx_t count, bool compressed_seri // Serialize entries as a list serializer.WriteList(103, "children", entries.size(), [&](Serializer::List &list, idx_t i) { - list.WriteObject( - [&](Serializer &object) { entries[i]->Serialize(object, count, compressed_serialization); }); + list.WriteObject([&](Serializer &object) { entries[i]->Serialize(object, count); }); }); break; } @@ -1372,9 +1253,7 @@ void Vector::Serialize(Serializer &serializer, idx_t count, bool compressed_seri object.WriteProperty(101, "length", entries[i].length); }); }); - serializer.WriteObject(106, "child", [&](Serializer &object) { - child.Serialize(object, list_size, compressed_serialization); - }); + serializer.WriteObject(106, "child", [&](Serializer &object) { child.Serialize(object, list_size); }); break; } case PhysicalType::ARRAY: { @@ -1385,9 +1264,7 @@ void Vector::Serialize(Serializer &serializer, idx_t count, bool compressed_seri auto array_size = ArrayType::GetSize(serialized_vector.GetType()); auto child_size = array_size * count; serializer.WriteProperty(103, "array_size", array_size); - serializer.WriteObject(104, "child", [&](Serializer &object) { - child.Serialize(object, child_size, compressed_serialization); - }); + serializer.WriteObject(104, "child", [&](Serializer &object) { child.Serialize(object, child_size); }); break; } default: @@ -1398,34 +1275,12 @@ void Vector::Serialize(Serializer &serializer, idx_t count, bool compressed_seri void Vector::Deserialize(Deserializer &deserializer, idx_t count) { auto &logical_type = GetType(); - const auto vtype = // older versions that only supported flat vectors did not serialize vector_type, - deserializer.ReadPropertyWithExplicitDefault(90, "vector_type", VectorType::FLAT_VECTOR); - - // first handle deserialization of compressed vector types - if (vtype == VectorType::CONSTANT_VECTOR) { - Vector::Deserialize(deserializer, 1); // read a vector of size 1 - Vector::SetVectorType(VectorType::CONSTANT_VECTOR); - return; - } else if (vtype == VectorType::DICTIONARY_VECTOR) { - SelectionVector sel(count); - deserializer.ReadProperty(91, "sel_vector", reinterpret_cast(sel.data()), sizeof(sel_t) * count); - const auto dict_count = deserializer.ReadProperty(92, "dict_count"); - Vector::Deserialize(deserializer, dict_count); // deserialize the dictionary in this vector - Vector::Slice(sel, count); // will create a dictionary vector - return; - } else if (vtype == VectorType::SEQUENCE_VECTOR) { - const int64_t seq_start = deserializer.ReadProperty(91, "seq_start"); - const int64_t seq_increment = deserializer.ReadProperty(92, "seq_increment"); - Vector::Sequence(seq_start, seq_increment, count); - return; - } auto &validity = FlatVector::Validity(*this); - auto validity_count = MaxValue(count, STANDARD_VECTOR_SIZE); - validity.Reset(validity_count); + validity.Reset(); const auto has_validity_mask = deserializer.ReadProperty(100, "has_validity_mask"); if (has_validity_mask) { - validity.Initialize(validity_count); + validity.Initialize(MaxValue(count, STANDARD_VECTOR_SIZE)); deserializer.ReadProperty(101, "validity", data_ptr_cast(validity.GetData()), validity.ValidityMaskSize(count)); } @@ -1493,10 +1348,10 @@ void Vector::Deserialize(Deserializer &deserializer, idx_t count) { } void Vector::SetVectorType(VectorType vector_type_p) { - vector_type = vector_type_p; + this->vector_type = vector_type_p; auto physical_type = GetType().InternalType(); - auto flat_or_const = GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR; - if (TypeIsConstantSize(physical_type) && flat_or_const) { + if (TypeIsConstantSize(physical_type) && + (GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR)) { auxiliary.reset(); } if (vector_type == VectorType::CONSTANT_VECTOR && physical_type == PhysicalType::STRUCT) { @@ -1640,7 +1495,7 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count) auto oidx = sel->get_index(i); if (validity.RowIsValid(oidx)) { auto buf = strings[oidx].GetData(); - D_ASSERT(idx_t(*buf) < 8); + D_ASSERT(*buf >= 0 && *buf < 8); Bit::Verify(strings[oidx]); } } @@ -1837,8 +1692,7 @@ void Vector::DebugTransformToDictionary(Vector &vector, idx_t count) { original_sel.set_index(offset++, verify_count - 1 - i * 2); } // now slice the inverted vector with the inverted selection vector - vector.Dictionary(inverted_vector, verify_count, original_sel, count); - DictionaryVector::SetDictionaryId(vector, UUID::ToString(UUID::GenerateRandomUUID())); + vector.Slice(inverted_vector, original_sel, count); vector.Verify(count); } @@ -1902,29 +1756,23 @@ void Vector::DebugShuffleNestedVector(Vector &vector, idx_t count) { void FlatVector::SetNull(Vector &vector, idx_t idx, bool is_null) { D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); vector.validity.Set(idx, !is_null); - if (!is_null) { - return; - } - - auto &type = vector.GetType(); - auto internal_type = type.InternalType(); - - // Set all child entries to NULL. - if (internal_type == PhysicalType::STRUCT) { - auto &entries = StructVector::GetEntries(vector); - for (auto &entry : entries) { - FlatVector::SetNull(*entry, idx, is_null); - } - return; - } - - // Set all child entries to NULL. - if (internal_type == PhysicalType::ARRAY) { - auto &child = ArrayVector::GetEntry(vector); - auto array_size = ArrayType::GetSize(type); - auto child_offset = idx * array_size; - for (idx_t i = 0; i < array_size; i++) { - FlatVector::SetNull(child, child_offset + i, is_null); + if (is_null) { + auto &type = vector.GetType(); + auto internal_type = type.InternalType(); + if (internal_type == PhysicalType::STRUCT) { + // set all child entries to null as well + auto &entries = StructVector::GetEntries(vector); + for (auto &entry : entries) { + FlatVector::SetNull(*entry, idx, is_null); + } + } else if (internal_type == PhysicalType::ARRAY) { + // set the child element in the array to null as well + auto &child = ArrayVector::GetEntry(vector); + auto array_size = ArrayType::GetSize(type); + auto child_offset = idx * array_size; + for (idx_t i = 0; i < array_size; i++) { + FlatVector::SetNull(child, child_offset + i, is_null); + } } } } @@ -2085,30 +1933,17 @@ string_t StringVector::AddString(Vector &vector, const string &data) { return StringVector::AddString(vector, string_t(data.c_str(), UnsafeNumericCast(data.size()))); } -VectorStringBuffer &StringVector::GetStringBuffer(Vector &vector) { - if (vector.GetType().InternalType() != PhysicalType::VARCHAR) { - throw InternalException("StringVector::GetStringBuffer - vector is not of internal type VARCHAR but of type %s", - vector.GetType()); - } - if (!vector.auxiliary) { - auto stored_allocator = vector.buffer ? vector.buffer->GetAllocator() : nullptr; - if (stored_allocator) { - vector.auxiliary = make_buffer(*stored_allocator); - } else { - vector.auxiliary = make_buffer(); - } - } - D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); - return vector.auxiliary.get()->Cast(); -} - string_t StringVector::AddString(Vector &vector, string_t data) { D_ASSERT(vector.GetType().id() == LogicalTypeId::VARCHAR || vector.GetType().id() == LogicalTypeId::BIT); if (data.IsInlined()) { // string will be inlined: no need to store in string heap return data; } - auto &string_buffer = GetStringBuffer(vector); + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + auto &string_buffer = vector.auxiliary.get()->Cast(); return string_buffer.AddString(data); } @@ -2118,7 +1953,11 @@ string_t StringVector::AddStringOrBlob(Vector &vector, string_t data) { // string will be inlined: no need to store in string heap return data; } - auto &string_buffer = GetStringBuffer(vector); + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + auto &string_buffer = vector.auxiliary.get()->Cast(); return string_buffer.AddBlob(data); } @@ -2127,18 +1966,30 @@ string_t StringVector::EmptyString(Vector &vector, idx_t len) { if (len <= string_t::INLINE_LENGTH) { return string_t(UnsafeNumericCast(len)); } - auto &string_buffer = GetStringBuffer(vector); + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::STRING_BUFFER); + auto &string_buffer = vector.auxiliary.get()->Cast(); return string_buffer.EmptyString(len); } void StringVector::AddHandle(Vector &vector, BufferHandle handle) { - auto &string_buffer = GetStringBuffer(vector); + D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + auto &string_buffer = vector.auxiliary->Cast(); string_buffer.AddHeapReference(make_buffer(std::move(handle))); } void StringVector::AddBuffer(Vector &vector, buffer_ptr buffer) { + D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR); D_ASSERT(buffer.get() != vector.auxiliary.get()); - auto &string_buffer = GetStringBuffer(vector); + if (!vector.auxiliary) { + vector.auxiliary = make_buffer(); + } + auto &string_buffer = vector.auxiliary->Cast(); string_buffer.AddHeapReference(std::move(buffer)); } @@ -2241,15 +2092,15 @@ void FSSTVector::DecompressVector(const Vector &src, Vector &dst, idx_t src_offs auto dst_mask = FlatVector::Validity(dst); auto ldata = FSSTVector::GetCompressedData(src); auto tdata = FlatVector::GetData(dst); - auto &str_buffer = StringVector::GetStringBuffer(dst); for (idx_t i = 0; i < copy_count; i++) { auto source_idx = sel->get_index(src_offset + i); auto target_idx = dst_offset + i; string_t compressed_string = ldata[source_idx]; if (dst_mask.RowIsValid(target_idx) && compressed_string.GetSize() > 0) { auto decoder = FSSTVector::GetDecoder(src); - tdata[target_idx] = FSSTPrimitives::DecompressValue(decoder, str_buffer, compressed_string.GetData(), - compressed_string.GetSize()); + auto &decompress_buffer = FSSTVector::GetDecompressBuffer(src); + tdata[target_idx] = FSSTPrimitives::DecompressValue(decoder, dst, compressed_string.GetData(), + compressed_string.GetSize(), decompress_buffer); } else { tdata[target_idx] = string_t(nullptr, 0); } diff --git a/src/include/duckdb/common/types/vector.hpp b/src/include/duckdb/common/types/vector.hpp index 6317a161a622..304d4ed53ca0 100644 --- a/src/include/duckdb/common/types/vector.hpp +++ b/src/include/duckdb/common/types/vector.hpp @@ -16,12 +16,10 @@ #include "duckdb/common/types/value.hpp" #include "duckdb/common/types/vector_buffer.hpp" #include "duckdb/common/vector_size.hpp" -#include "duckdb/common/type_util.hpp" namespace duckdb { class VectorCache; -class VectorStringBuffer; class VectorStructBuffer; class VectorListBuffer; struct SelCache; @@ -39,32 +37,13 @@ struct UnifiedVectorFormat { data_ptr_t data; ValidityMask validity; SelectionVector owned_sel; - PhysicalType physical_type; - template - void VerifyVectorType() const { -#ifdef DUCKDB_DEBUG_NO_SAFETY - D_ASSERT(StorageTypeCompatible(physical_type)); -#else - if (!StorageTypeCompatible(physical_type)) { - throw InternalException("Expected unified vector format of type %s, but found type %s", GetTypeId(), - physical_type); - } -#endif - } - - template - static inline const T *GetDataUnsafe(const UnifiedVectorFormat &format) { - return reinterpret_cast(format.data); - } template static inline const T *GetData(const UnifiedVectorFormat &format) { - format.VerifyVectorType(); - return GetDataUnsafe(format); + return reinterpret_cast(format.data); } template static inline T *GetDataNoConst(UnifiedVectorFormat &format) { - format.VerifyVectorType(); return reinterpret_cast(format.data); } }; @@ -127,15 +106,14 @@ class Vector { //! Create a non-owning vector that references the specified data DUCKDB_API Vector(LogicalType type, data_ptr_t dataptr); //! Create a non-owning vector that references the specified data with ValidityMask - DUCKDB_API explicit Vector(LogicalType type_p, data_ptr_t dataptr, validity_t * validity_mask,idx_t capacity); + DUCKDB_API explicit Vector(LogicalType type_p, data_ptr_t dataptr, validity_t * validity_mask); //! Create an owning vector that holds at most STANDARD_VECTOR_SIZE entries. /*! Create a new vector If create_data is true, the vector will be an owning empty vector. - If initialize_to_zero is true, the allocated data will be zero-initialized. + If zero_data is true, the allocated data will be zero-initialized. */ - DUCKDB_API Vector(LogicalType type, bool create_data, bool initialize_to_zero, - idx_t capacity = STANDARD_VECTOR_SIZE); + DUCKDB_API Vector(LogicalType type, bool create_data, bool zero_data, idx_t capacity = STANDARD_VECTOR_SIZE); // implicit copying of Vectors is not allowed Vector(const Vector &) = delete; // but moving of vectors is allowed @@ -168,14 +146,10 @@ class Vector { DUCKDB_API void Slice(const SelectionVector &sel, idx_t count); //! Slice the vector, keeping the result around in a cache or potentially using the cache instead of slicing DUCKDB_API void Slice(const SelectionVector &sel, idx_t count, SelCache &cache); - //! Turn this vector into a dictionary vector - DUCKDB_API void Dictionary(idx_t dictionary_size, const SelectionVector &sel, idx_t count); - //! Creates a reference to a dictionary of the other vector - DUCKDB_API void Dictionary(const Vector &dict, idx_t dictionary_size, const SelectionVector &sel, idx_t count); //! Creates the data of this vector with the specified type. Any data that //! is currently in the vector is destroyed. - DUCKDB_API void Initialize(bool initialize_to_zero = false, idx_t capacity = STANDARD_VECTOR_SIZE); + DUCKDB_API void Initialize(bool zero_data = false, idx_t capacity = STANDARD_VECTOR_SIZE); //! Converts this Vector to a printable string representation DUCKDB_API string ToString(idx_t count) const; @@ -229,7 +203,7 @@ class Vector { //! Returns a vector of ResizeInfo containing each (nested) vector to resize. DUCKDB_API void FindResizeInfos(vector &resize_infos, const idx_t multiplier); - DUCKDB_API void Serialize(Serializer &serializer, idx_t count, bool compressed_serialization = true); + DUCKDB_API void Serialize(Serializer &serializer, idx_t count); DUCKDB_API void Deserialize(Deserializer &deserializer, idx_t count); idx_t GetAllocationSize(idx_t cardinality) const; @@ -241,7 +215,7 @@ class Vector { inline const LogicalType &GetType() const { return type; } - inline data_ptr_t GetData() const { + inline data_ptr_t GetData() { return data; } @@ -296,18 +270,6 @@ class VectorChildBuffer : public VectorBuffer { }; struct ConstantVector { - template - static void VerifyVectorType(const Vector &vector) { -#ifdef DUCKDB_DEBUG_NO_SAFETY - D_ASSERT(StorageTypeCompatible(vector.GetType().InternalType())); -#else - if (!StorageTypeCompatible(vector.GetType().InternalType())) { - throw InternalException("Expected vector of type %s, but found vector of type %s", GetTypeId(), - vector.GetType().InternalType()); - } -#endif - } - static inline const_data_ptr_t GetData(const Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR || vector.GetVectorType() == VectorType::FLAT_VECTOR); @@ -319,22 +281,12 @@ struct ConstantVector { return vector.data; } template - static inline const T *GetDataUnsafe(const Vector &vector) { - return reinterpret_cast(GetData(vector)); - } - template - static inline T *GetDataUnsafe(Vector &vector) { - return reinterpret_cast(GetData(vector)); - } - template static inline const T *GetData(const Vector &vector) { - VerifyVectorType(vector); - return GetDataUnsafe(vector); + return (const T *)ConstantVector::GetData(vector); } template static inline T *GetData(Vector &vector) { - VerifyVectorType(vector); - return GetDataUnsafe(vector); + return (T *)ConstantVector::GetData(vector); } static inline bool IsNull(const Vector &vector) { D_ASSERT(vector.GetVectorType() == VectorType::CONSTANT_VECTOR); @@ -354,44 +306,22 @@ struct ConstantVector { }; struct DictionaryVector { - static void VerifyDictionary(const Vector &vector) { -#ifdef DUCKDB_DEBUG_NO_SAFETY - D_ASSERT(vector.GetVectorType() == VectorType::DICTIONARY_VECTOR); -#else - if (vector.GetVectorType() != VectorType::DICTIONARY_VECTOR) { - throw InternalException( - "Operation requires a dictionary vector but a non-dictionary vector was encountered"); - } -#endif - } static inline const SelectionVector &SelVector(const Vector &vector) { - VerifyDictionary(vector); + D_ASSERT(vector.GetVectorType() == VectorType::DICTIONARY_VECTOR); return vector.buffer->Cast().GetSelVector(); } static inline SelectionVector &SelVector(Vector &vector) { - VerifyDictionary(vector); + D_ASSERT(vector.GetVectorType() == VectorType::DICTIONARY_VECTOR); return vector.buffer->Cast().GetSelVector(); } static inline const Vector &Child(const Vector &vector) { - VerifyDictionary(vector); + D_ASSERT(vector.GetVectorType() == VectorType::DICTIONARY_VECTOR); return vector.auxiliary->Cast().data; } static inline Vector &Child(Vector &vector) { - VerifyDictionary(vector); + D_ASSERT(vector.GetVectorType() == VectorType::DICTIONARY_VECTOR); return vector.auxiliary->Cast().data; } - static inline optional_idx DictionarySize(const Vector &vector) { - VerifyDictionary(vector); - return vector.buffer->Cast().GetDictionarySize(); - } - static inline const string &DictionaryId(const Vector &vector) { - VerifyDictionary(vector); - return vector.buffer->Cast().GetDictionaryId(); - } - static inline void SetDictionaryId(Vector &vector, string new_id) { - VerifyDictionary(vector); - vector.buffer->Cast().SetDictionaryId(std::move(new_id)); - } }; struct FlatVector { @@ -416,14 +346,6 @@ struct FlatVector { static inline T *GetData(Vector &vector) { return ConstantVector::GetData(vector); } - template - static inline const T *GetDataUnsafe(const Vector &vector) { - return ConstantVector::GetDataUnsafe(vector); - } - template - static inline T *GetDataUnsafe(Vector &vector) { - return ConstantVector::GetDataUnsafe(vector); - } static inline void SetData(Vector &vector, data_ptr_t data) { D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR); vector.data = data; @@ -509,8 +431,6 @@ struct StringVector { //! Allocates an empty string of the specified size, and returns a writable pointer that can be used to store the //! result of an operation DUCKDB_API static string_t EmptyString(Vector &vector, idx_t len); - //! Returns a reference to the underlying VectorStringBuffer - throws an error if vector is not of type VARCHAR - DUCKDB_API static VectorStringBuffer &GetStringBuffer(Vector &vector); //! Adds a reference to a handle that stores strings of this vector DUCKDB_API static void AddHandle(Vector &vector, BufferHandle handle); //! Adds a reference to an unspecified vector buffer that stores strings of this vector diff --git a/src/include/duckdb/main/client_config.hpp b/src/include/duckdb/main/client_config.hpp index bad9e04e0224..fab1b57e8ccc 100644 --- a/src/include/duckdb/main/client_config.hpp +++ b/src/include/duckdb/main/client_config.hpp @@ -70,6 +70,7 @@ struct ClientConfig { //! Whether or not we should verify the serializer bool verify_serializer = false; //! Enable the running of optimizers + // We change it to false for testing, Notice!!! bool enable_optimizer = true; //! Enable caching operators bool enable_caching_operators = true;